Going back because compile times trippled

This commit is contained in:
Fabian Schmidt 2024-08-05 11:22:08 +02:00
parent 1c066ec113
commit 3b3801ba0d
7 changed files with 45 additions and 67 deletions

View File

@ -616,19 +616,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "memmap2"
version = "0.7.1"
name = "memmap"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6"
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "memmap2"
version = "0.9.4"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322"
checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6"
dependencies = [
"libc",
]
@ -698,7 +699,7 @@ dependencies = [
"fast-float",
"libc",
"memchr",
"memmap2 0.9.4",
"memmap",
"polars",
"rayon",
"rustc-hash",
@ -900,7 +901,7 @@ dependencies = [
"home",
"itoa",
"memchr",
"memmap2 0.7.1",
"memmap2",
"num-traits",
"once_cell",
"percent-encoding",

View File

@ -9,7 +9,7 @@ edition = "2021"
bstr = "1.9.1"
fast-float = "0.2.0"
memchr = "2.7.4"
memmap2 = "0.9.4"
memmap = "0.7.0"
polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]}
rayon = "1.10.0"
rustc-hash = "2.0.0"
@ -47,7 +47,6 @@ name = "phcs"
harness = false
[profile.release]
debug = true
lto = "fat"
#strip = "symbols"
strip = "symbols"
panic = "abort"

View File

@ -1,11 +1,9 @@
use std::{fs::File, io::BufReader, thread};
use std::collections::HashMap;
use std::io::{BufRead, Seek, SeekFrom};
use std::sync::mpsc;
use std::time::Instant;
use memmap2::MmapOptions;
use rustc_hash::{FxBuildHasher, FxHashMap as HashMap};
use std::{fs::File, io::BufReader, thread};
use memmap::MmapOptions;
use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse;
use crate::utils::parse::hashstr;
@ -16,9 +14,8 @@ pub fn run() {
const FILE_PATH: &str = "../../../measurements.txt";
let now = Instant::now();
thread::scope(|s| {
let hasher = FxBuildHasher::default();
let mut stations: HashMap<usize, (String, StationMeasurements)> =
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let (tx, rx) = mpsc::channel();
let cores = thread::available_parallelism().unwrap().into();
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
@ -43,19 +40,26 @@ pub fn run() {
bounds.push(file_length);
for i in 0..cores {
let tx = tx.clone();
let currposition = *bounds.get(i).unwrap();
let mut currposition = *bounds.get(i).unwrap();
let end = *bounds.get(i + 1).unwrap();
s.spawn(move || {
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
let t_mmap = &unsafe { MmapOptions::new().map(&file).unwrap() }[currposition..end];
let mut reader = BufReader::new(&file);
reader.seek(SeekFrom::Start(currposition as u64)).unwrap();
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
for line in t_mmap.lines() {
let line = line.expect("Could not read line");
let (station, temp) = line.rsplit_once(|char| char == ';').unwrap();
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let mut line = Vec::with_capacity(108);
loop {
let line_len = reader
.read_until(b'\n', &mut line)
.expect("could not read bytes");
if line_len == 0 {
break;
}
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashstr(station);
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
let temp = parse::temp(temp.as_bytes());
let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = t_stations.get_mut(&hash);
if let Some((_, measurements)) = measurements_option {
measurements.update(temp);
@ -68,6 +72,11 @@ pub fn run() {
};
t_stations.insert(hash, (station, measurements));
}
currposition += line_len;
if currposition >= end {
break;
}
line.clear();
}
let _ = tx.send(t_stations);
});

View File

@ -1,12 +1,11 @@
use std::{fs::File, io::BufReader, thread};
use std::collections::HashMap;
use std::io::{BufRead, Seek, SeekFrom};
use std::sync::mpsc;
use std::time::Instant;
use std::{fs::File, io::BufReader, thread};
use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse;
use crate::utils::parse::hashbytes;
use crate::utils::parse::hashstr;
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
@ -57,7 +56,7 @@ pub fn run() {
break;
}
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashbytes(station);
let hash = hashstr(station);
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = t_stations.get_mut(&hash);

View File

@ -1,5 +1,5 @@
use bstr::{BStr, ByteSlice};
use memmap2::MmapOptions;
use memmap::MmapOptions;
use rayon::prelude::*;
use rustc_hash::FxHashMap as HashMap;
use std::time::Instant;

View File

@ -2,10 +2,9 @@ use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::time::Instant;
use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse;
use crate::utils::parse::hashbytes;
use crate::utils::parse::hashstr;
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
@ -25,7 +24,7 @@ pub fn run() {
break;
}
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashbytes(station);
let hash = hashstr(station);
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = stations.get_mut(&hash);

View File

@ -67,7 +67,7 @@ pub fn temp_simd(bytes: &[u8]) -> isize {
}
#[inline]
pub fn hashbytes(bytes: &[u8]) -> usize {
pub fn hashstr(bytes: &[u8]) -> usize {
let mut hash = 0;
let (chunks, remainder) = bytes.as_chunks::<8>();
for &chunk in chunks {
@ -84,27 +84,9 @@ pub fn hashbytes(bytes: &[u8]) -> usize {
hash
}
#[inline]
pub fn hashstr(s: &str) -> usize {
let mut hash = 0;
let (chunks, remainder) = s.as_bytes().as_chunks::<8>();
for &chunk in chunks {
hash += usize::from_be_bytes(chunk);
}
let mut r = [0_u8; 8];
r[0] = remainder.len() as u8;
let mut idx = 1;
for &byte in remainder {
r[idx] = byte;
idx += 1;
}
hash += usize::from_be_bytes(r);
hash
}
#[cfg(test)]
mod tests {
use crate::utils::parse::{hashbytes, hashstr, temp_new};
use crate::utils::parse::{hashstr, temp_new};
#[test]
fn test_temp_new_max() {
@ -136,23 +118,12 @@ mod tests {
assert_eq!(temp_neg_10, -99);
}
#[test]
fn test_hashbytes() {
let hash_1 = hashbytes(b"abcdefghijk");
let hash_2 = hashbytes(b"kjihgfedcba");
let hash_3 = hashbytes(b"abba");
let hash_4 = hashbytes(b"baab");
assert_ne!(hash_1, hash_2);
assert_ne!(hash_3, hash_4);
}
#[test]
fn test_hashstr() {
let hash_1 = hashstr("abcdefghijk");
let hash_2 = hashstr("kjihgfedcba");
let hash_3 = hashstr("abba");
let hash_4 = hashstr("baab");
let hash_1 = hashstr(b"abcdefghijk");
let hash_2 = hashstr(b"kjihgfedcba");
let hash_3 = hashstr(b"abba");
let hash_4 = hashstr(b"baab");
assert_ne!(hash_1, hash_2);
assert_ne!(hash_3, hash_4);