Going back because compile times trippled

This commit is contained in:
Fabian Schmidt 2024-08-05 11:22:08 +02:00
parent 1c066ec113
commit 3b3801ba0d
7 changed files with 45 additions and 67 deletions

View File

@ -616,19 +616,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]] [[package]]
name = "memmap2" name = "memmap"
version = "0.7.1" version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
dependencies = [ dependencies = [
"libc", "libc",
"winapi",
] ]
[[package]] [[package]]
name = "memmap2" name = "memmap2"
version = "0.9.4" version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6"
dependencies = [ dependencies = [
"libc", "libc",
] ]
@ -698,7 +699,7 @@ dependencies = [
"fast-float", "fast-float",
"libc", "libc",
"memchr", "memchr",
"memmap2 0.9.4", "memmap",
"polars", "polars",
"rayon", "rayon",
"rustc-hash", "rustc-hash",
@ -900,7 +901,7 @@ dependencies = [
"home", "home",
"itoa", "itoa",
"memchr", "memchr",
"memmap2 0.7.1", "memmap2",
"num-traits", "num-traits",
"once_cell", "once_cell",
"percent-encoding", "percent-encoding",

View File

@ -9,7 +9,7 @@ edition = "2021"
bstr = "1.9.1" bstr = "1.9.1"
fast-float = "0.2.0" fast-float = "0.2.0"
memchr = "2.7.4" memchr = "2.7.4"
memmap2 = "0.9.4" memmap = "0.7.0"
polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]} polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]}
rayon = "1.10.0" rayon = "1.10.0"
rustc-hash = "2.0.0" rustc-hash = "2.0.0"
@ -47,7 +47,6 @@ name = "phcs"
harness = false harness = false
[profile.release] [profile.release]
debug = true
lto = "fat" lto = "fat"
#strip = "symbols" strip = "symbols"
panic = "abort" panic = "abort"

View File

@ -1,11 +1,9 @@
use std::{fs::File, io::BufReader, thread}; use std::collections::HashMap;
use std::io::{BufRead, Seek, SeekFrom}; use std::io::{BufRead, Seek, SeekFrom};
use std::sync::mpsc; use std::sync::mpsc;
use std::time::Instant; use std::time::Instant;
use std::{fs::File, io::BufReader, thread};
use memmap2::MmapOptions; use memmap::MmapOptions;
use rustc_hash::{FxBuildHasher, FxHashMap as HashMap};
use crate::models::station_measurements::StationMeasurements; use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse; use crate::utils::parse;
use crate::utils::parse::hashstr; use crate::utils::parse::hashstr;
@ -16,9 +14,8 @@ pub fn run() {
const FILE_PATH: &str = "../../../measurements.txt"; const FILE_PATH: &str = "../../../measurements.txt";
let now = Instant::now(); let now = Instant::now();
thread::scope(|s| { thread::scope(|s| {
let hasher = FxBuildHasher::default();
let mut stations: HashMap<usize, (String, StationMeasurements)> = let mut stations: HashMap<usize, (String, StationMeasurements)> =
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let (tx, rx) = mpsc::channel(); let (tx, rx) = mpsc::channel();
let cores = thread::available_parallelism().unwrap().into(); let cores = thread::available_parallelism().unwrap().into();
let file = File::open(FILE_PATH).expect("File measurements.txt not found"); let file = File::open(FILE_PATH).expect("File measurements.txt not found");
@ -43,19 +40,26 @@ pub fn run() {
bounds.push(file_length); bounds.push(file_length);
for i in 0..cores { for i in 0..cores {
let tx = tx.clone(); let tx = tx.clone();
let currposition = *bounds.get(i).unwrap(); let mut currposition = *bounds.get(i).unwrap();
let end = *bounds.get(i + 1).unwrap(); let end = *bounds.get(i + 1).unwrap();
s.spawn(move || { s.spawn(move || {
let file = File::open(FILE_PATH).expect("File measurements.txt not found"); let file = File::open(FILE_PATH).expect("File measurements.txt not found");
let t_mmap = &unsafe { MmapOptions::new().map(&file).unwrap() }[currposition..end]; let mut reader = BufReader::new(&file);
reader.seek(SeekFrom::Start(currposition as u64)).unwrap();
let mut t_stations: HashMap<usize, (String, StationMeasurements)> = let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
for line in t_mmap.lines() { let mut line = Vec::with_capacity(108);
let line = line.expect("Could not read line"); loop {
let (station, temp) = line.rsplit_once(|char| char == ';').unwrap(); let line_len = reader
.read_until(b'\n', &mut line)
.expect("could not read bytes");
if line_len == 0 {
break;
}
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashstr(station); let hash = hashstr(station);
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
let temp = parse::temp(temp.as_bytes()); let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = t_stations.get_mut(&hash); let measurements_option = t_stations.get_mut(&hash);
if let Some((_, measurements)) = measurements_option { if let Some((_, measurements)) = measurements_option {
measurements.update(temp); measurements.update(temp);
@ -68,6 +72,11 @@ pub fn run() {
}; };
t_stations.insert(hash, (station, measurements)); t_stations.insert(hash, (station, measurements));
} }
currposition += line_len;
if currposition >= end {
break;
}
line.clear();
} }
let _ = tx.send(t_stations); let _ = tx.send(t_stations);
}); });

View File

@ -1,12 +1,11 @@
use std::{fs::File, io::BufReader, thread};
use std::collections::HashMap; use std::collections::HashMap;
use std::io::{BufRead, Seek, SeekFrom}; use std::io::{BufRead, Seek, SeekFrom};
use std::sync::mpsc; use std::sync::mpsc;
use std::time::Instant; use std::time::Instant;
use std::{fs::File, io::BufReader, thread};
use crate::models::station_measurements::StationMeasurements; use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse; use crate::utils::parse;
use crate::utils::parse::hashbytes; use crate::utils::parse::hashstr;
const DEFAULT_HASHMAP_LENGTH: usize = 10000; const DEFAULT_HASHMAP_LENGTH: usize = 10000;
@ -57,7 +56,7 @@ pub fn run() {
break; break;
} }
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashbytes(station); let hash = hashstr(station);
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
let temp = parse::temp(temp.split_last().unwrap().1); let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = t_stations.get_mut(&hash); let measurements_option = t_stations.get_mut(&hash);

View File

@ -1,5 +1,5 @@
use bstr::{BStr, ByteSlice}; use bstr::{BStr, ByteSlice};
use memmap2::MmapOptions; use memmap::MmapOptions;
use rayon::prelude::*; use rayon::prelude::*;
use rustc_hash::FxHashMap as HashMap; use rustc_hash::FxHashMap as HashMap;
use std::time::Instant; use std::time::Instant;

View File

@ -2,10 +2,9 @@ use std::collections::HashMap;
use std::fs::File; use std::fs::File;
use std::io::{BufRead, BufReader}; use std::io::{BufRead, BufReader};
use std::time::Instant; use std::time::Instant;
use crate::models::station_measurements::StationMeasurements; use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse; use crate::utils::parse;
use crate::utils::parse::hashbytes; use crate::utils::parse::hashstr;
const DEFAULT_HASHMAP_LENGTH: usize = 10000; const DEFAULT_HASHMAP_LENGTH: usize = 10000;
@ -25,7 +24,7 @@ pub fn run() {
break; break;
} }
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashbytes(station); let hash = hashstr(station);
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
let temp = parse::temp(temp.split_last().unwrap().1); let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = stations.get_mut(&hash); let measurements_option = stations.get_mut(&hash);

View File

@ -67,7 +67,7 @@ pub fn temp_simd(bytes: &[u8]) -> isize {
} }
#[inline] #[inline]
pub fn hashbytes(bytes: &[u8]) -> usize { pub fn hashstr(bytes: &[u8]) -> usize {
let mut hash = 0; let mut hash = 0;
let (chunks, remainder) = bytes.as_chunks::<8>(); let (chunks, remainder) = bytes.as_chunks::<8>();
for &chunk in chunks { for &chunk in chunks {
@ -84,27 +84,9 @@ pub fn hashbytes(bytes: &[u8]) -> usize {
hash hash
} }
#[inline]
pub fn hashstr(s: &str) -> usize {
let mut hash = 0;
let (chunks, remainder) = s.as_bytes().as_chunks::<8>();
for &chunk in chunks {
hash += usize::from_be_bytes(chunk);
}
let mut r = [0_u8; 8];
r[0] = remainder.len() as u8;
let mut idx = 1;
for &byte in remainder {
r[idx] = byte;
idx += 1;
}
hash += usize::from_be_bytes(r);
hash
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::utils::parse::{hashbytes, hashstr, temp_new}; use crate::utils::parse::{hashstr, temp_new};
#[test] #[test]
fn test_temp_new_max() { fn test_temp_new_max() {
@ -136,23 +118,12 @@ mod tests {
assert_eq!(temp_neg_10, -99); assert_eq!(temp_neg_10, -99);
} }
#[test]
fn test_hashbytes() {
let hash_1 = hashbytes(b"abcdefghijk");
let hash_2 = hashbytes(b"kjihgfedcba");
let hash_3 = hashbytes(b"abba");
let hash_4 = hashbytes(b"baab");
assert_ne!(hash_1, hash_2);
assert_ne!(hash_3, hash_4);
}
#[test] #[test]
fn test_hashstr() { fn test_hashstr() {
let hash_1 = hashstr("abcdefghijk"); let hash_1 = hashstr(b"abcdefghijk");
let hash_2 = hashstr("kjihgfedcba"); let hash_2 = hashstr(b"kjihgfedcba");
let hash_3 = hashstr("abba"); let hash_3 = hashstr(b"abba");
let hash_4 = hashstr("baab"); let hash_4 = hashstr(b"baab");
assert_ne!(hash_1, hash_2); assert_ne!(hash_1, hash_2);
assert_ne!(hash_3, hash_4); assert_ne!(hash_3, hash_4);