managed to get faster again by searching hashmap using &str and only converting to String on insertion. Removed FxHashMap again
This commit is contained in:
parent
53ea542f36
commit
608cbb59e5
@ -1,10 +1,11 @@
|
|||||||
use crate::models::station_measurements::StationMeasurements;
|
use crate::models::station_measurements::StationMeasurements;
|
||||||
use crate::utils::parse;
|
use crate::utils::parse;
|
||||||
use memmap2::MmapOptions;
|
use memmap2::MmapOptions;
|
||||||
use rustc_hash::{FxBuildHasher, FxHashMap as HashMap};
|
|
||||||
use std::sync::mpsc;
|
use std::sync::mpsc;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use std::{fs::File, thread};
|
use std::{fs::File, thread};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use crate::utils::parse::hashstr;
|
||||||
|
|
||||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||||
|
|
||||||
@ -14,9 +15,8 @@ pub fn run() {
|
|||||||
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
||||||
let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
|
let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
|
||||||
let file_length = mmap.len();
|
let file_length = mmap.len();
|
||||||
let hasher = FxBuildHasher::default();
|
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
||||||
let mut stations: HashMap<String, StationMeasurements> =
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
|
||||||
let (tx, rx) = mpsc::channel();
|
let (tx, rx) = mpsc::channel();
|
||||||
let cores = thread::available_parallelism().unwrap().into();
|
let cores = thread::available_parallelism().unwrap().into();
|
||||||
let chunk_length = file_length / cores;
|
let chunk_length = file_length / cores;
|
||||||
@ -41,17 +41,18 @@ pub fn run() {
|
|||||||
let (start, end) = *bounds.get(i).unwrap();
|
let (start, end) = *bounds.get(i).unwrap();
|
||||||
let mmap_slice = &mmap[start..end];
|
let mmap_slice = &mmap[start..end];
|
||||||
s.spawn(move || {
|
s.spawn(move || {
|
||||||
let mut t_stations: HashMap<String, StationMeasurements> =
|
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
|
||||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
for line in mmap_slice.split(|&byte| { byte == b'\n' }) {
|
for line in mmap_slice.split(|&byte| { byte == b'\n' }) {
|
||||||
if line.len() == 0 {
|
if line.len() == 0 {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
let hash = hashstr(station);
|
||||||
|
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||||
let temp = parse::temp(temp);
|
let temp = parse::temp(temp);
|
||||||
let measurements_option = t_stations.get_mut(&station);
|
let measurements_option = t_stations.get_mut(&hash);
|
||||||
if let Some(measurements) = measurements_option {
|
if let Some((_, measurements)) = measurements_option {
|
||||||
measurements.update(temp);
|
measurements.update(temp);
|
||||||
} else {
|
} else {
|
||||||
let measurements = StationMeasurements {
|
let measurements = StationMeasurements {
|
||||||
@ -60,7 +61,7 @@ pub fn run() {
|
|||||||
count: 1,
|
count: 1,
|
||||||
sum: temp,
|
sum: temp,
|
||||||
};
|
};
|
||||||
t_stations.insert(station, measurements);
|
t_stations.insert(hash, (station.to_string(), measurements));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let _ = tx.send(t_stations);
|
let _ = tx.send(t_stations);
|
||||||
@ -68,18 +69,18 @@ pub fn run() {
|
|||||||
}
|
}
|
||||||
drop(tx);
|
drop(tx);
|
||||||
while let Ok(t_stations) = rx.recv() {
|
while let Ok(t_stations) = rx.recv() {
|
||||||
for (station, measurements) in t_stations.iter() {
|
for (hash, (station, measurements)) in t_stations.iter() {
|
||||||
let joined_measurements_options = stations.get_mut(station);
|
let joined_measurements_options = stations.get_mut(hash);
|
||||||
if let Some(joined_measurements) = joined_measurements_options {
|
if let Some((_, joined_measurements)) = joined_measurements_options {
|
||||||
joined_measurements.merge(measurements);
|
joined_measurements.merge(measurements);
|
||||||
} else {
|
} else {
|
||||||
stations.insert(station.to_owned(), *measurements);
|
stations.insert(*hash, (station.to_owned(), *measurements));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut stations: Vec<String> = stations
|
let mut stations: Vec<String> = stations
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(station, measurements)| {
|
.map(|(_, (station, measurements))| {
|
||||||
let measurements = measurements.to_string();
|
let measurements = measurements.to_string();
|
||||||
#[cfg(feature = "json")]
|
#[cfg(feature = "json")]
|
||||||
{
|
{
|
||||||
|
@ -57,7 +57,7 @@ pub fn run() {
|
|||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hashstr(station);
|
let hash = hashstr(station);
|
||||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||||
let measurements_option = t_stations.get_mut(&hash);
|
let measurements_option = t_stations.get_mut(&hash);
|
||||||
if let Some((_, measurements)) = measurements_option {
|
if let Some((_, measurements)) = measurements_option {
|
||||||
@ -69,7 +69,7 @@ pub fn run() {
|
|||||||
count: 1,
|
count: 1,
|
||||||
sum: temp,
|
sum: temp,
|
||||||
};
|
};
|
||||||
t_stations.insert(hash, (station, measurements));
|
t_stations.insert(hash, (station.to_string(), measurements));
|
||||||
}
|
}
|
||||||
currposition += line_len as u64;
|
currposition += line_len as u64;
|
||||||
if currposition >= end {
|
if currposition >= end {
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
use smol::fs::File;
|
use smol::fs::File;
|
||||||
use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom};
|
use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom};
|
||||||
use rustc_hash::{FxHashMap as HashMap, FxBuildHasher};
|
|
||||||
|
|
||||||
use crate::models::station_measurements::StationMeasurements;
|
use crate::models::station_measurements::StationMeasurements;
|
||||||
use crate::utils::parse;
|
use crate::utils::parse;
|
||||||
use std::sync::mpsc;
|
use std::sync::mpsc;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
use crate::utils::parse::hashstr;
|
||||||
|
|
||||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||||
|
|
||||||
@ -14,9 +15,8 @@ pub fn run() {
|
|||||||
const FILE_PATH: &str = "../../../measurements.txt";
|
const FILE_PATH: &str = "../../../measurements.txt";
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
thread::scope(|s| {
|
thread::scope(|s| {
|
||||||
let hasher = FxBuildHasher::default();
|
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
||||||
let mut stations: HashMap<String, StationMeasurements> =
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
|
||||||
let (tx, rx) = mpsc::channel();
|
let (tx, rx) = mpsc::channel();
|
||||||
let cores = thread::available_parallelism().unwrap().into();
|
let cores = thread::available_parallelism().unwrap().into();
|
||||||
let bounds = smol::block_on(async {
|
let bounds = smol::block_on(async {
|
||||||
@ -57,8 +57,8 @@ pub fn run() {
|
|||||||
.expect("File measurements.txt not found");
|
.expect("File measurements.txt not found");
|
||||||
let mut reader = BufReader::new(&mut file);
|
let mut reader = BufReader::new(&mut file);
|
||||||
reader.seek(SeekFrom::Start(currposition)).await.unwrap();
|
reader.seek(SeekFrom::Start(currposition)).await.unwrap();
|
||||||
let mut t_stations: HashMap<String, StationMeasurements> =
|
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
|
||||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
let mut line = Vec::with_capacity(108);
|
let mut line = Vec::with_capacity(108);
|
||||||
loop {
|
loop {
|
||||||
let line_len = reader
|
let line_len = reader
|
||||||
@ -69,10 +69,11 @@ pub fn run() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
let hash = hashstr(station);
|
||||||
|
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||||
let measurements_option = t_stations.get_mut(&station);
|
let measurements_option = t_stations.get_mut(&hash);
|
||||||
if let Some(measurements) = measurements_option {
|
if let Some((_, measurements)) = measurements_option {
|
||||||
measurements.update(temp);
|
measurements.update(temp);
|
||||||
} else {
|
} else {
|
||||||
let measurements = StationMeasurements {
|
let measurements = StationMeasurements {
|
||||||
@ -81,7 +82,7 @@ pub fn run() {
|
|||||||
count: 1,
|
count: 1,
|
||||||
sum: temp,
|
sum: temp,
|
||||||
};
|
};
|
||||||
t_stations.insert(station, measurements);
|
t_stations.insert(hash, (station.to_string(), measurements));
|
||||||
}
|
}
|
||||||
currposition += line_len as u64;
|
currposition += line_len as u64;
|
||||||
if currposition >= end {
|
if currposition >= end {
|
||||||
@ -95,18 +96,18 @@ pub fn run() {
|
|||||||
}
|
}
|
||||||
drop(tx);
|
drop(tx);
|
||||||
while let Ok(t_stations) = rx.recv() {
|
while let Ok(t_stations) = rx.recv() {
|
||||||
for (station, measurements) in t_stations.iter() {
|
for (hash, (station, measurements)) in t_stations.iter() {
|
||||||
let joined_measurements_options = stations.get_mut(station);
|
let joined_measurements_options = stations.get_mut(hash);
|
||||||
if let Some(joined_measurements) = joined_measurements_options {
|
if let Some((_, joined_measurements)) = joined_measurements_options {
|
||||||
joined_measurements.merge(measurements);
|
joined_measurements.merge(measurements);
|
||||||
} else {
|
} else {
|
||||||
stations.insert(station.to_owned(), *measurements);
|
stations.insert(*hash, (station.to_owned(), *measurements));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut stations: Vec<String> = stations
|
let mut stations: Vec<String> = stations
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(station, measurements)| {
|
.map(|(_, (station, measurements))| {
|
||||||
let measurements = measurements.to_string();
|
let measurements = measurements.to_string();
|
||||||
#[cfg(feature = "json")]
|
#[cfg(feature = "json")]
|
||||||
{
|
{
|
||||||
|
@ -25,7 +25,7 @@ pub fn run() {
|
|||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hashstr(station);
|
let hash = hashstr(station);
|
||||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||||
let measurements_option = stations.get_mut(&hash);
|
let measurements_option = stations.get_mut(&hash);
|
||||||
if let Some((_, measurements)) = measurements_option {
|
if let Some((_, measurements)) = measurements_option {
|
||||||
@ -37,7 +37,7 @@ pub fn run() {
|
|||||||
count: 1,
|
count: 1,
|
||||||
sum: temp,
|
sum: temp,
|
||||||
};
|
};
|
||||||
stations.insert(hash, (station, measurements));
|
stations.insert(hash, (station.to_string(), measurements));
|
||||||
}
|
}
|
||||||
line.clear();
|
line.clear();
|
||||||
}
|
}
|
||||||
@ -51,5 +51,5 @@ pub fn run() {
|
|||||||
stations.sort();
|
stations.sort();
|
||||||
let stations = stations.join(",");
|
let stations = stations.join(",");
|
||||||
println!("{{{stations}}}");
|
println!("{{{stations}}}");
|
||||||
println!("Time={} μs", now.elapsed().as_micros());
|
println!("Time={} ms", now.elapsed().as_millis());
|
||||||
}
|
}
|
||||||
|
@ -30,7 +30,7 @@ pub fn run() {
|
|||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hashstr(station);
|
let hash = hashstr(station);
|
||||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||||
let measurements_option = stations.get_mut(&hash);
|
let measurements_option = stations.get_mut(&hash);
|
||||||
if let Some((_, measurements)) = measurements_option {
|
if let Some((_, measurements)) = measurements_option {
|
||||||
@ -42,7 +42,7 @@ pub fn run() {
|
|||||||
count: 1,
|
count: 1,
|
||||||
sum: temp,
|
sum: temp,
|
||||||
};
|
};
|
||||||
stations.insert(hash, (station, measurements));
|
stations.insert(hash, (station.to_string(), measurements));
|
||||||
}
|
}
|
||||||
line.clear();
|
line.clear();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user