Going back because compile times trippled
This commit is contained in:
parent
1c066ec113
commit
3b3801ba0d
15
src/main/rust/Cargo.lock
generated
15
src/main/rust/Cargo.lock
generated
@ -616,19 +616,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.7.1"
|
||||
name = "memmap"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6"
|
||||
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.9.4"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322"
|
||||
checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
@ -698,7 +699,7 @@ dependencies = [
|
||||
"fast-float",
|
||||
"libc",
|
||||
"memchr",
|
||||
"memmap2 0.9.4",
|
||||
"memmap",
|
||||
"polars",
|
||||
"rayon",
|
||||
"rustc-hash",
|
||||
@ -900,7 +901,7 @@ dependencies = [
|
||||
"home",
|
||||
"itoa",
|
||||
"memchr",
|
||||
"memmap2 0.7.1",
|
||||
"memmap2",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
|
@ -9,7 +9,7 @@ edition = "2021"
|
||||
bstr = "1.9.1"
|
||||
fast-float = "0.2.0"
|
||||
memchr = "2.7.4"
|
||||
memmap2 = "0.9.4"
|
||||
memmap = "0.7.0"
|
||||
polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]}
|
||||
rayon = "1.10.0"
|
||||
rustc-hash = "2.0.0"
|
||||
@ -47,7 +47,6 @@ name = "phcs"
|
||||
harness = false
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
lto = "fat"
|
||||
#strip = "symbols"
|
||||
strip = "symbols"
|
||||
panic = "abort"
|
||||
|
@ -1,11 +1,9 @@
|
||||
use std::{fs::File, io::BufReader, thread};
|
||||
use std::collections::HashMap;
|
||||
use std::io::{BufRead, Seek, SeekFrom};
|
||||
use std::sync::mpsc;
|
||||
use std::time::Instant;
|
||||
|
||||
use memmap2::MmapOptions;
|
||||
use rustc_hash::{FxBuildHasher, FxHashMap as HashMap};
|
||||
|
||||
use std::{fs::File, io::BufReader, thread};
|
||||
use memmap::MmapOptions;
|
||||
use crate::models::station_measurements::StationMeasurements;
|
||||
use crate::utils::parse;
|
||||
use crate::utils::parse::hashstr;
|
||||
@ -16,9 +14,8 @@ pub fn run() {
|
||||
const FILE_PATH: &str = "../../../measurements.txt";
|
||||
let now = Instant::now();
|
||||
thread::scope(|s| {
|
||||
let hasher = FxBuildHasher::default();
|
||||
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
let (tx, rx) = mpsc::channel();
|
||||
let cores = thread::available_parallelism().unwrap().into();
|
||||
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
||||
@ -43,19 +40,26 @@ pub fn run() {
|
||||
bounds.push(file_length);
|
||||
for i in 0..cores {
|
||||
let tx = tx.clone();
|
||||
let currposition = *bounds.get(i).unwrap();
|
||||
let mut currposition = *bounds.get(i).unwrap();
|
||||
let end = *bounds.get(i + 1).unwrap();
|
||||
s.spawn(move || {
|
||||
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
||||
let t_mmap = &unsafe { MmapOptions::new().map(&file).unwrap() }[currposition..end];
|
||||
let mut reader = BufReader::new(&file);
|
||||
reader.seek(SeekFrom::Start(currposition as u64)).unwrap();
|
||||
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
|
||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
||||
for line in t_mmap.lines() {
|
||||
let line = line.expect("Could not read line");
|
||||
let (station, temp) = line.rsplit_once(|char| char == ';').unwrap();
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
let mut line = Vec::with_capacity(108);
|
||||
loop {
|
||||
let line_len = reader
|
||||
.read_until(b'\n', &mut line)
|
||||
.expect("could not read bytes");
|
||||
if line_len == 0 {
|
||||
break;
|
||||
}
|
||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||
let hash = hashstr(station);
|
||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
||||
let temp = parse::temp(temp.as_bytes());
|
||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||
let measurements_option = t_stations.get_mut(&hash);
|
||||
if let Some((_, measurements)) = measurements_option {
|
||||
measurements.update(temp);
|
||||
@ -68,6 +72,11 @@ pub fn run() {
|
||||
};
|
||||
t_stations.insert(hash, (station, measurements));
|
||||
}
|
||||
currposition += line_len;
|
||||
if currposition >= end {
|
||||
break;
|
||||
}
|
||||
line.clear();
|
||||
}
|
||||
let _ = tx.send(t_stations);
|
||||
});
|
||||
|
@ -1,12 +1,11 @@
|
||||
use std::{fs::File, io::BufReader, thread};
|
||||
use std::collections::HashMap;
|
||||
use std::io::{BufRead, Seek, SeekFrom};
|
||||
use std::sync::mpsc;
|
||||
use std::time::Instant;
|
||||
|
||||
use std::{fs::File, io::BufReader, thread};
|
||||
use crate::models::station_measurements::StationMeasurements;
|
||||
use crate::utils::parse;
|
||||
use crate::utils::parse::hashbytes;
|
||||
use crate::utils::parse::hashstr;
|
||||
|
||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||
|
||||
@ -57,7 +56,7 @@ pub fn run() {
|
||||
break;
|
||||
}
|
||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||
let hash = hashbytes(station);
|
||||
let hash = hashstr(station);
|
||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||
let measurements_option = t_stations.get_mut(&hash);
|
||||
|
@ -1,5 +1,5 @@
|
||||
use bstr::{BStr, ByteSlice};
|
||||
use memmap2::MmapOptions;
|
||||
use memmap::MmapOptions;
|
||||
use rayon::prelude::*;
|
||||
use rustc_hash::FxHashMap as HashMap;
|
||||
use std::time::Instant;
|
||||
|
@ -2,10 +2,9 @@ use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::time::Instant;
|
||||
|
||||
use crate::models::station_measurements::StationMeasurements;
|
||||
use crate::utils::parse;
|
||||
use crate::utils::parse::hashbytes;
|
||||
use crate::utils::parse::hashstr;
|
||||
|
||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||
|
||||
@ -25,7 +24,7 @@ pub fn run() {
|
||||
break;
|
||||
}
|
||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||
let hash = hashbytes(station);
|
||||
let hash = hashstr(station);
|
||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||
let measurements_option = stations.get_mut(&hash);
|
||||
|
@ -67,7 +67,7 @@ pub fn temp_simd(bytes: &[u8]) -> isize {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn hashbytes(bytes: &[u8]) -> usize {
|
||||
pub fn hashstr(bytes: &[u8]) -> usize {
|
||||
let mut hash = 0;
|
||||
let (chunks, remainder) = bytes.as_chunks::<8>();
|
||||
for &chunk in chunks {
|
||||
@ -84,27 +84,9 @@ pub fn hashbytes(bytes: &[u8]) -> usize {
|
||||
hash
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn hashstr(s: &str) -> usize {
|
||||
let mut hash = 0;
|
||||
let (chunks, remainder) = s.as_bytes().as_chunks::<8>();
|
||||
for &chunk in chunks {
|
||||
hash += usize::from_be_bytes(chunk);
|
||||
}
|
||||
let mut r = [0_u8; 8];
|
||||
r[0] = remainder.len() as u8;
|
||||
let mut idx = 1;
|
||||
for &byte in remainder {
|
||||
r[idx] = byte;
|
||||
idx += 1;
|
||||
}
|
||||
hash += usize::from_be_bytes(r);
|
||||
hash
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::utils::parse::{hashbytes, hashstr, temp_new};
|
||||
use crate::utils::parse::{hashstr, temp_new};
|
||||
|
||||
#[test]
|
||||
fn test_temp_new_max() {
|
||||
@ -136,23 +118,12 @@ mod tests {
|
||||
assert_eq!(temp_neg_10, -99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hashbytes() {
|
||||
let hash_1 = hashbytes(b"abcdefghijk");
|
||||
let hash_2 = hashbytes(b"kjihgfedcba");
|
||||
let hash_3 = hashbytes(b"abba");
|
||||
let hash_4 = hashbytes(b"baab");
|
||||
|
||||
assert_ne!(hash_1, hash_2);
|
||||
assert_ne!(hash_3, hash_4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hashstr() {
|
||||
let hash_1 = hashstr("abcdefghijk");
|
||||
let hash_2 = hashstr("kjihgfedcba");
|
||||
let hash_3 = hashstr("abba");
|
||||
let hash_4 = hashstr("baab");
|
||||
let hash_1 = hashstr(b"abcdefghijk");
|
||||
let hash_2 = hashstr(b"kjihgfedcba");
|
||||
let hash_3 = hashstr(b"abba");
|
||||
let hash_4 = hashstr(b"baab");
|
||||
|
||||
assert_ne!(hash_1, hash_2);
|
||||
assert_ne!(hash_3, hash_4);
|
||||
|
Loading…
Reference in New Issue
Block a user