use scopes to allow to use mutex without arc

apply clippy to my implementations
This commit is contained in:
Fabian Schmidt 2024-07-22 15:12:09 +02:00
parent a3b6075b6a
commit 1193ada82d
2 changed files with 68 additions and 60 deletions

View File

@ -1,35 +1,49 @@
use std::{ use std::{
fs::File, fs::File,
io::{BufRead, BufReader}, io::{BufRead, BufReader},
sync::{Arc, Mutex}, sync::{Mutex},
thread, thread,
}; };
use std::collections::HashMap; use std::collections::HashMap;
use std::time::Instant; use std::time::Instant;
use onebrc::format_nums;
const DEFAULT_HASHMAP_LENGTH: usize = 10000; const DEFAULT_HASHMAP_LENGTH: usize = 10000;
fn main() { fn main() {
print!("\x1b[2J");
let stations:Mutex<HashMap<String, onebrc::StationMeasurements>> =
Mutex::new(HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH));
thread::scope(|s| {
// Doing this allows us to not "move" stations into the closure
// and remove the necessity for reference counting (Arc)
// no performance improvement but less complex
let stations = &stations;
let now = Instant::now(); let now = Instant::now();
let stations: Arc<Mutex<HashMap<String, onebrc::StationMeasurements>>> =
Arc::new(Mutex::new(HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH)));
let cores: usize = thread::available_parallelism().unwrap().into(); let cores: usize = thread::available_parallelism().unwrap().into();
let chunk_length = 1_000_000_000 / cores; let chunk_length = 1_000_000_000 / cores;
let mut handles = vec![];
for i in 0..cores { for i in 0..cores {
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
let reader = BufReader::new(file); let reader = BufReader::new(file);
let line_chunk = reader.lines().skip(chunk_length * i).take(chunk_length); let line_chunk = reader.lines().skip(chunk_length * i).take(chunk_length);
let stations_clone = stations.clone(); s.spawn(move || {
let handle = thread::spawn(move || {
let mut t_stations: HashMap<String, onebrc::StationMeasurements> = let mut t_stations: HashMap<String, onebrc::StationMeasurements> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let now_read_line = Instant::now(); let now_read_line = Instant::now();
println!("Start reading lines in thread {i}"); let print_line = i + 1;
let mut line_num = 0;
line_chunk.for_each(|line| { line_chunk.for_each(|line| {
if line_num == 0 {
print!("\x1b[{print_line};30HStart read line {}ms", now_read_line.elapsed().as_millis());
}
if line_num % 10000 == 0 {
let formatted = format_nums(line_num);
print!("\x1b[{print_line};0HThread #{i:0>2}: {formatted}");
}
line_num += 1;
let line = line.expect("could not read line"); let line = line.expect("could not read line");
let (station, temp) = line.split_once(';').expect("Error while splitting"); let (station, temp) = line.split_once(';').expect("Error while splitting");
let temp = onebrc::parse_temp(temp.as_bytes()); let temp = onebrc::parse_temp(temp.as_bytes());
@ -46,11 +60,9 @@ fn main() {
t_stations.insert(station.to_owned(), measurements); t_stations.insert(station.to_owned(), measurements);
} }
}); });
println!("Time reading lines in thread {i}={} μs", now_read_line.elapsed().as_micros()); print!("\x1b[{print_line};60HTime reading lines in thread {i}={} ms", now_read_line.elapsed().as_millis());
let now_insert_line = Instant::now();
println!("Start inserting lines in thread {i}");
for (station, measurements) in t_stations.iter() { for (station, measurements) in t_stations.iter() {
let mut stations_guard = stations_clone.lock().expect("Error while locking"); let mut stations_guard = stations.lock().expect("Error while locking");
let joined_measurements_options = stations_guard.get_mut(station.as_str()); let joined_measurements_options = stations_guard.get_mut(station.as_str());
if let Some(joined_measurements) = joined_measurements_options { if let Some(joined_measurements) = joined_measurements_options {
joined_measurements.merge(measurements); joined_measurements.merge(measurements);
@ -58,19 +70,15 @@ fn main() {
stations_guard.insert(station.to_owned(), *measurements); stations_guard.insert(station.to_owned(), *measurements);
} }
} }
println!("Time inserting lines in thread {i}={} μs", now_insert_line.elapsed().as_micros());
}); });
handles.push(handle);
} }
for handle in handles { let mut stations: Vec<String> = stations.lock().unwrap().iter().map(|(station, measurements)| {
handle.join().unwrap();
}
let mut stations: Vec<String> = stations.lock().unwrap().iter().map(|(&ref station, &ref measurements)| {
let measurements = measurements.to_string(); let measurements = measurements.to_string();
format!("{station}={measurements}") format!("{station}={measurements}")
}).collect(); }).collect();
stations.sort(); stations.sort();
let stations = stations.join(","); let _stations = stations.join(",");
println!("{{{stations}}}"); // println!("{{{stations}}}");
println!("Time={} μs", now.elapsed().as_micros()); println!("\n\nTime={} ms", now.elapsed().as_millis());
});
} }

View File

@ -32,7 +32,7 @@ fn main() {
stations.insert(station.to_owned(), measurements); stations.insert(station.to_owned(), measurements);
} }
} }
let mut stations: Vec<String> = stations.iter().map(|(&ref station, &ref measurements)| { let mut stations: Vec<String> = stations.iter().map(|(station, measurements)| {
let measurements = measurements.to_string(); let measurements = measurements.to_string();
format!("{station}={measurements}") format!("{station}={measurements}")
}).collect(); }).collect();