managed to get faster again by searching hashmap using &str and only converting to String on insertion. Removed FxHashMap again

This commit is contained in:
Fabian Schmidt 2024-08-27 10:57:23 +02:00
parent 53ea542f36
commit 608cbb59e5
5 changed files with 39 additions and 37 deletions

View File

@ -1,10 +1,11 @@
use crate::models::station_measurements::StationMeasurements; use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse; use crate::utils::parse;
use memmap2::MmapOptions; use memmap2::MmapOptions;
use rustc_hash::{FxBuildHasher, FxHashMap as HashMap};
use std::sync::mpsc; use std::sync::mpsc;
use std::time::Instant; use std::time::Instant;
use std::{fs::File, thread}; use std::{fs::File, thread};
use std::collections::HashMap;
use crate::utils::parse::hashstr;
const DEFAULT_HASHMAP_LENGTH: usize = 10000; const DEFAULT_HASHMAP_LENGTH: usize = 10000;
@ -14,9 +15,8 @@ pub fn run() {
let file = File::open(FILE_PATH).expect("File measurements.txt not found"); let file = File::open(FILE_PATH).expect("File measurements.txt not found");
let mmap = unsafe { MmapOptions::new().map(&file).unwrap() }; let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
let file_length = mmap.len(); let file_length = mmap.len();
let hasher = FxBuildHasher::default(); let mut stations: HashMap<usize, (String, StationMeasurements)> =
let mut stations: HashMap<String, StationMeasurements> = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
let (tx, rx) = mpsc::channel(); let (tx, rx) = mpsc::channel();
let cores = thread::available_parallelism().unwrap().into(); let cores = thread::available_parallelism().unwrap().into();
let chunk_length = file_length / cores; let chunk_length = file_length / cores;
@ -41,17 +41,18 @@ pub fn run() {
let (start, end) = *bounds.get(i).unwrap(); let (start, end) = *bounds.get(i).unwrap();
let mmap_slice = &mmap[start..end]; let mmap_slice = &mmap[start..end];
s.spawn(move || { s.spawn(move || {
let mut t_stations: HashMap<String, StationMeasurements> = let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
for line in mmap_slice.split(|&byte| { byte == b'\n' }) { for line in mmap_slice.split(|&byte| { byte == b'\n' }) {
if line.len() == 0 { if line.len() == 0 {
break; break;
} }
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; let hash = hashstr(station);
let station = unsafe { std::str::from_utf8_unchecked(station) };
let temp = parse::temp(temp); let temp = parse::temp(temp);
let measurements_option = t_stations.get_mut(&station); let measurements_option = t_stations.get_mut(&hash);
if let Some(measurements) = measurements_option { if let Some((_, measurements)) = measurements_option {
measurements.update(temp); measurements.update(temp);
} else { } else {
let measurements = StationMeasurements { let measurements = StationMeasurements {
@ -60,7 +61,7 @@ pub fn run() {
count: 1, count: 1,
sum: temp, sum: temp,
}; };
t_stations.insert(station, measurements); t_stations.insert(hash, (station.to_string(), measurements));
} }
} }
let _ = tx.send(t_stations); let _ = tx.send(t_stations);
@ -68,18 +69,18 @@ pub fn run() {
} }
drop(tx); drop(tx);
while let Ok(t_stations) = rx.recv() { while let Ok(t_stations) = rx.recv() {
for (station, measurements) in t_stations.iter() { for (hash, (station, measurements)) in t_stations.iter() {
let joined_measurements_options = stations.get_mut(station); let joined_measurements_options = stations.get_mut(hash);
if let Some(joined_measurements) = joined_measurements_options { if let Some((_, joined_measurements)) = joined_measurements_options {
joined_measurements.merge(measurements); joined_measurements.merge(measurements);
} else { } else {
stations.insert(station.to_owned(), *measurements); stations.insert(*hash, (station.to_owned(), *measurements));
} }
} }
} }
let mut stations: Vec<String> = stations let mut stations: Vec<String> = stations
.iter() .iter()
.map(|(station, measurements)| { .map(|(_, (station, measurements))| {
let measurements = measurements.to_string(); let measurements = measurements.to_string();
#[cfg(feature = "json")] #[cfg(feature = "json")]
{ {

View File

@ -57,7 +57,7 @@ pub fn run() {
} }
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashstr(station); let hash = hashstr(station);
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; let station = unsafe { std::str::from_utf8_unchecked(station) };
let temp = parse::temp(temp.split_last().unwrap().1); let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = t_stations.get_mut(&hash); let measurements_option = t_stations.get_mut(&hash);
if let Some((_, measurements)) = measurements_option { if let Some((_, measurements)) = measurements_option {
@ -69,7 +69,7 @@ pub fn run() {
count: 1, count: 1,
sum: temp, sum: temp,
}; };
t_stations.insert(hash, (station, measurements)); t_stations.insert(hash, (station.to_string(), measurements));
} }
currposition += line_len as u64; currposition += line_len as u64;
if currposition >= end { if currposition >= end {

View File

@ -1,12 +1,13 @@
use std::collections::HashMap;
use smol::fs::File; use smol::fs::File;
use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom}; use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom};
use rustc_hash::{FxHashMap as HashMap, FxBuildHasher};
use crate::models::station_measurements::StationMeasurements; use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse; use crate::utils::parse;
use std::sync::mpsc; use std::sync::mpsc;
use std::thread; use std::thread;
use std::time::Instant; use std::time::Instant;
use crate::utils::parse::hashstr;
const DEFAULT_HASHMAP_LENGTH: usize = 10000; const DEFAULT_HASHMAP_LENGTH: usize = 10000;
@ -14,9 +15,8 @@ pub fn run() {
const FILE_PATH: &str = "../../../measurements.txt"; const FILE_PATH: &str = "../../../measurements.txt";
let now = Instant::now(); let now = Instant::now();
thread::scope(|s| { thread::scope(|s| {
let hasher = FxBuildHasher::default(); let mut stations: HashMap<usize, (String, StationMeasurements)> =
let mut stations: HashMap<String, StationMeasurements> = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
let (tx, rx) = mpsc::channel(); let (tx, rx) = mpsc::channel();
let cores = thread::available_parallelism().unwrap().into(); let cores = thread::available_parallelism().unwrap().into();
let bounds = smol::block_on(async { let bounds = smol::block_on(async {
@ -57,8 +57,8 @@ pub fn run() {
.expect("File measurements.txt not found"); .expect("File measurements.txt not found");
let mut reader = BufReader::new(&mut file); let mut reader = BufReader::new(&mut file);
reader.seek(SeekFrom::Start(currposition)).await.unwrap(); reader.seek(SeekFrom::Start(currposition)).await.unwrap();
let mut t_stations: HashMap<String, StationMeasurements> = let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let mut line = Vec::with_capacity(108); let mut line = Vec::with_capacity(108);
loop { loop {
let line_len = reader let line_len = reader
@ -69,10 +69,11 @@ pub fn run() {
break; break;
} }
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; let hash = hashstr(station);
let station = unsafe { std::str::from_utf8_unchecked(station) };
let temp = parse::temp(temp.split_last().unwrap().1); let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = t_stations.get_mut(&station); let measurements_option = t_stations.get_mut(&hash);
if let Some(measurements) = measurements_option { if let Some((_, measurements)) = measurements_option {
measurements.update(temp); measurements.update(temp);
} else { } else {
let measurements = StationMeasurements { let measurements = StationMeasurements {
@ -81,7 +82,7 @@ pub fn run() {
count: 1, count: 1,
sum: temp, sum: temp,
}; };
t_stations.insert(station, measurements); t_stations.insert(hash, (station.to_string(), measurements));
} }
currposition += line_len as u64; currposition += line_len as u64;
if currposition >= end { if currposition >= end {
@ -95,18 +96,18 @@ pub fn run() {
} }
drop(tx); drop(tx);
while let Ok(t_stations) = rx.recv() { while let Ok(t_stations) = rx.recv() {
for (station, measurements) in t_stations.iter() { for (hash, (station, measurements)) in t_stations.iter() {
let joined_measurements_options = stations.get_mut(station); let joined_measurements_options = stations.get_mut(hash);
if let Some(joined_measurements) = joined_measurements_options { if let Some((_, joined_measurements)) = joined_measurements_options {
joined_measurements.merge(measurements); joined_measurements.merge(measurements);
} else { } else {
stations.insert(station.to_owned(), *measurements); stations.insert(*hash, (station.to_owned(), *measurements));
} }
} }
} }
let mut stations: Vec<String> = stations let mut stations: Vec<String> = stations
.iter() .iter()
.map(|(station, measurements)| { .map(|(_, (station, measurements))| {
let measurements = measurements.to_string(); let measurements = measurements.to_string();
#[cfg(feature = "json")] #[cfg(feature = "json")]
{ {

View File

@ -25,7 +25,7 @@ pub fn run() {
} }
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashstr(station); let hash = hashstr(station);
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; let station = unsafe { std::str::from_utf8_unchecked(station) };
let temp = parse::temp(temp.split_last().unwrap().1); let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = stations.get_mut(&hash); let measurements_option = stations.get_mut(&hash);
if let Some((_, measurements)) = measurements_option { if let Some((_, measurements)) = measurements_option {
@ -37,7 +37,7 @@ pub fn run() {
count: 1, count: 1,
sum: temp, sum: temp,
}; };
stations.insert(hash, (station, measurements)); stations.insert(hash, (station.to_string(), measurements));
} }
line.clear(); line.clear();
} }
@ -51,5 +51,5 @@ pub fn run() {
stations.sort(); stations.sort();
let stations = stations.join(","); let stations = stations.join(",");
println!("{{{stations}}}"); println!("{{{stations}}}");
println!("Time={} μs", now.elapsed().as_micros()); println!("Time={} ms", now.elapsed().as_millis());
} }

View File

@ -30,7 +30,7 @@ pub fn run() {
} }
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashstr(station); let hash = hashstr(station);
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; let station = unsafe { std::str::from_utf8_unchecked(station) };
let temp = parse::temp(temp.split_last().unwrap().1); let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = stations.get_mut(&hash); let measurements_option = stations.get_mut(&hash);
if let Some((_, measurements)) = measurements_option { if let Some((_, measurements)) = measurements_option {
@ -42,7 +42,7 @@ pub fn run() {
count: 1, count: 1,
sum: temp, sum: temp,
}; };
stations.insert(hash, (station, measurements)); stations.insert(hash, (station.to_string(), measurements));
} }
line.clear(); line.clear();
} }