use easy_parallel from smol project instead of std::thread. No performance improvement, but easier

This commit is contained in:
Fabian Schmidt 2024-08-27 13:23:51 +02:00
parent e832475fc3
commit a45ddd2dc0

View File

@ -1,134 +1,129 @@
use std::collections::HashMap;
use smol::fs::File; use smol::fs::File;
use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom}; use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom};
use std::collections::HashMap;
use crate::models::station_measurements::StationMeasurements; use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse; use crate::utils::parse;
use std::sync::mpsc; use crate::utils::parse::hashstr;
use easy_parallel::Parallel;
use std::thread; use std::thread;
use std::time::Instant; use std::time::Instant;
use crate::utils::parse::hashstr;
const DEFAULT_HASHMAP_LENGTH: usize = 10000; const DEFAULT_HASHMAP_LENGTH: usize = 10000;
pub fn run() { pub fn run() {
const FILE_PATH: &str = "../../../measurements.txt"; const FILE_PATH: &str = "../../../measurements.txt";
let now = Instant::now(); let now = Instant::now();
thread::scope(|s| { let mut stations: HashMap<usize, (String, StationMeasurements)> =
let mut stations: HashMap<usize, (String, StationMeasurements)> = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let cores = thread::available_parallelism().unwrap().into();
let (tx, rx) = mpsc::channel(); let bounds = smol::block_on(async {
let cores = thread::available_parallelism().unwrap().into(); let mut file = File::open(FILE_PATH)
let bounds = smol::block_on(async { .await
let mut file = File::open(FILE_PATH) .expect("File measurements.txt not found");
.await let mut reader = BufReader::new(&mut file);
.expect("File measurements.txt not found"); let file_length = reader.seek(SeekFrom::End(0)).await.unwrap();
let chunk_length = file_length as usize / cores;
let mut bounds = Vec::with_capacity(cores + 1);
bounds.push(0);
for i in 1..cores {
let mut reader = BufReader::new(&mut file); let mut reader = BufReader::new(&mut file);
let file_length = reader.seek(SeekFrom::End(0)).await.unwrap(); let mut byte_start = chunk_length * i;
let chunk_length = file_length as usize / cores; reader
let mut bounds = Vec::with_capacity(cores + 1); .seek(SeekFrom::Start(byte_start as u64))
bounds.push(0); .await
for i in 1..cores { .expect("could not seek");
let mut reader = BufReader::new(&mut file); let mut line = Vec::with_capacity(108);
let mut byte_start = chunk_length * i; let line_len = reader
reader .read_until(b'\n', &mut line)
.seek(SeekFrom::Start(byte_start as u64)) .await
.await .expect("could not read bytes");
.expect("could not seek"); byte_start += line_len;
let mut line = Vec::with_capacity(108); bounds.push(byte_start as u64);
let line_len = reader }
.read_until(b'\n', &mut line) bounds.push(file_length);
.await bounds
.expect("could not read bytes"); });
byte_start += line_len; let t_stations_vec = Parallel::new()
bounds.push(byte_start as u64); .each(0..cores, |i| {
}
bounds.push(file_length);
bounds
});
for i in 0..cores {
let tx = tx.clone();
let mut currposition = *bounds.get(i).unwrap(); let mut currposition = *bounds.get(i).unwrap();
let end = *bounds.get(i + 1).unwrap(); let end = *bounds.get(i + 1).unwrap();
s.spawn(move || { smol::block_on(async {
smol::block_on(async { let mut file = File::open(FILE_PATH)
let mut file = File::open(FILE_PATH) .await
.expect("File measurements.txt not found");
let mut reader = BufReader::new(&mut file);
reader.seek(SeekFrom::Start(currposition)).await.unwrap();
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let mut line = Vec::with_capacity(108);
loop {
let line_len = reader
.read_until(b'\n', &mut line)
.await .await
.expect("File measurements.txt not found"); .expect("could not read bytes");
let mut reader = BufReader::new(&mut file); if line_len == 0 {
reader.seek(SeekFrom::Start(currposition)).await.unwrap(); break;
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let mut line = Vec::with_capacity(108);
loop {
let line_len = reader
.read_until(b'\n', &mut line)
.await
.expect("could not read bytes");
if line_len == 0 {
break;
}
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashstr(station);
let station = unsafe { std::str::from_utf8_unchecked(station) };
let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = t_stations.get_mut(&hash);
if let Some((_, measurements)) = measurements_option {
measurements.update(temp);
} else {
let measurements = StationMeasurements {
min: temp,
max: temp,
count: 1,
sum: temp,
};
t_stations.insert(hash, (station.to_string(), measurements));
}
currposition += line_len as u64;
if currposition >= end {
break;
}
line.clear();
} }
let _ = tx.send(t_stations); let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
}) let hash = hashstr(station);
}); let station = unsafe { std::str::from_utf8_unchecked(station) };
} let temp = parse::temp(temp.split_last().unwrap().1);
drop(tx); let measurements_option = t_stations.get_mut(&hash);
while let Ok(t_stations) = rx.recv() { if let Some((_, measurements)) = measurements_option {
for (hash, (station, measurements)) in t_stations.iter() { measurements.update(temp);
let joined_measurements_options = stations.get_mut(hash); } else {
if let Some((_, joined_measurements)) = joined_measurements_options { let measurements = StationMeasurements {
joined_measurements.merge(measurements); min: temp,
} else { max: temp,
stations.insert(*hash, (station.to_owned(), *measurements)); count: 1,
sum: temp,
};
t_stations.insert(hash, (station.to_string(), measurements));
}
currposition += line_len as u64;
if currposition >= end {
break;
}
line.clear();
} }
t_stations
})
})
.run();
for t_stations in t_stations_vec {
for (hash, (station, measurements)) in t_stations.iter() {
let joined_measurements_options = stations.get_mut(hash);
if let Some((_, joined_measurements)) = joined_measurements_options {
joined_measurements.merge(measurements);
} else {
stations.insert(*hash, (station.to_owned(), *measurements));
} }
} }
let mut stations: Vec<String> = stations }
.iter() let mut stations: Vec<String> = stations
.map(|(_, (station, measurements))| { .iter()
let measurements = measurements.to_string(); .map(|(_, (station, measurements))| {
#[cfg(feature = "json")] let measurements = measurements.to_string();
{ #[cfg(feature = "json")]
format!("{{\"{station}\":\"{measurements}\"}}") {
} format!("{{\"{station}\":\"{measurements}\"}}")
#[cfg(not(feature = "json"))] }
{ #[cfg(not(feature = "json"))]
format!("{station}={measurements}") {
} format!("{station}={measurements}")
}) }
.collect(); })
stations.sort(); .collect();
let stations = stations.join(","); stations.sort();
#[cfg(feature = "json")] let stations = stations.join(",");
{ #[cfg(feature = "json")]
println!("\n\n[{stations}]"); {
} println!("\n\n[{stations}]");
#[cfg(not(feature = "json"))] }
{ #[cfg(not(feature = "json"))]
println!("\n\n{{{stations}}}"); {
} println!("\n\n{{{stations}}}");
println!("\n\nTime={} ms", now.elapsed().as_millis()); }
}); println!("\n\nTime={} ms", now.elapsed().as_millis());
} }