Going back because compile times trippled
This commit is contained in:
parent
1c066ec113
commit
3b3801ba0d
15
src/main/rust/Cargo.lock
generated
15
src/main/rust/Cargo.lock
generated
@ -616,19 +616,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memmap2"
|
name = "memmap"
|
||||||
version = "0.7.1"
|
version = "0.7.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6"
|
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memmap2"
|
name = "memmap2"
|
||||||
version = "0.9.4"
|
version = "0.7.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322"
|
checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
@ -698,7 +699,7 @@ dependencies = [
|
|||||||
"fast-float",
|
"fast-float",
|
||||||
"libc",
|
"libc",
|
||||||
"memchr",
|
"memchr",
|
||||||
"memmap2 0.9.4",
|
"memmap",
|
||||||
"polars",
|
"polars",
|
||||||
"rayon",
|
"rayon",
|
||||||
"rustc-hash",
|
"rustc-hash",
|
||||||
@ -900,7 +901,7 @@ dependencies = [
|
|||||||
"home",
|
"home",
|
||||||
"itoa",
|
"itoa",
|
||||||
"memchr",
|
"memchr",
|
||||||
"memmap2 0.7.1",
|
"memmap2",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
|
@ -9,7 +9,7 @@ edition = "2021"
|
|||||||
bstr = "1.9.1"
|
bstr = "1.9.1"
|
||||||
fast-float = "0.2.0"
|
fast-float = "0.2.0"
|
||||||
memchr = "2.7.4"
|
memchr = "2.7.4"
|
||||||
memmap2 = "0.9.4"
|
memmap = "0.7.0"
|
||||||
polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]}
|
polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]}
|
||||||
rayon = "1.10.0"
|
rayon = "1.10.0"
|
||||||
rustc-hash = "2.0.0"
|
rustc-hash = "2.0.0"
|
||||||
@ -47,7 +47,6 @@ name = "phcs"
|
|||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
debug = true
|
|
||||||
lto = "fat"
|
lto = "fat"
|
||||||
#strip = "symbols"
|
strip = "symbols"
|
||||||
panic = "abort"
|
panic = "abort"
|
||||||
|
@ -1,11 +1,9 @@
|
|||||||
use std::{fs::File, io::BufReader, thread};
|
use std::collections::HashMap;
|
||||||
use std::io::{BufRead, Seek, SeekFrom};
|
use std::io::{BufRead, Seek, SeekFrom};
|
||||||
use std::sync::mpsc;
|
use std::sync::mpsc;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
use std::{fs::File, io::BufReader, thread};
|
||||||
use memmap2::MmapOptions;
|
use memmap::MmapOptions;
|
||||||
use rustc_hash::{FxBuildHasher, FxHashMap as HashMap};
|
|
||||||
|
|
||||||
use crate::models::station_measurements::StationMeasurements;
|
use crate::models::station_measurements::StationMeasurements;
|
||||||
use crate::utils::parse;
|
use crate::utils::parse;
|
||||||
use crate::utils::parse::hashstr;
|
use crate::utils::parse::hashstr;
|
||||||
@ -16,9 +14,8 @@ pub fn run() {
|
|||||||
const FILE_PATH: &str = "../../../measurements.txt";
|
const FILE_PATH: &str = "../../../measurements.txt";
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
thread::scope(|s| {
|
thread::scope(|s| {
|
||||||
let hasher = FxBuildHasher::default();
|
|
||||||
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
||||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
let (tx, rx) = mpsc::channel();
|
let (tx, rx) = mpsc::channel();
|
||||||
let cores = thread::available_parallelism().unwrap().into();
|
let cores = thread::available_parallelism().unwrap().into();
|
||||||
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
||||||
@ -43,19 +40,26 @@ pub fn run() {
|
|||||||
bounds.push(file_length);
|
bounds.push(file_length);
|
||||||
for i in 0..cores {
|
for i in 0..cores {
|
||||||
let tx = tx.clone();
|
let tx = tx.clone();
|
||||||
let currposition = *bounds.get(i).unwrap();
|
let mut currposition = *bounds.get(i).unwrap();
|
||||||
let end = *bounds.get(i + 1).unwrap();
|
let end = *bounds.get(i + 1).unwrap();
|
||||||
s.spawn(move || {
|
s.spawn(move || {
|
||||||
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
||||||
let t_mmap = &unsafe { MmapOptions::new().map(&file).unwrap() }[currposition..end];
|
let mut reader = BufReader::new(&file);
|
||||||
|
reader.seek(SeekFrom::Start(currposition as u64)).unwrap();
|
||||||
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
|
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
|
||||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
for line in t_mmap.lines() {
|
let mut line = Vec::with_capacity(108);
|
||||||
let line = line.expect("Could not read line");
|
loop {
|
||||||
let (station, temp) = line.rsplit_once(|char| char == ';').unwrap();
|
let line_len = reader
|
||||||
|
.read_until(b'\n', &mut line)
|
||||||
|
.expect("could not read bytes");
|
||||||
|
if line_len == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hashstr(station);
|
let hash = hashstr(station);
|
||||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
||||||
let temp = parse::temp(temp.as_bytes());
|
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||||
let measurements_option = t_stations.get_mut(&hash);
|
let measurements_option = t_stations.get_mut(&hash);
|
||||||
if let Some((_, measurements)) = measurements_option {
|
if let Some((_, measurements)) = measurements_option {
|
||||||
measurements.update(temp);
|
measurements.update(temp);
|
||||||
@ -68,6 +72,11 @@ pub fn run() {
|
|||||||
};
|
};
|
||||||
t_stations.insert(hash, (station, measurements));
|
t_stations.insert(hash, (station, measurements));
|
||||||
}
|
}
|
||||||
|
currposition += line_len;
|
||||||
|
if currposition >= end {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
line.clear();
|
||||||
}
|
}
|
||||||
let _ = tx.send(t_stations);
|
let _ = tx.send(t_stations);
|
||||||
});
|
});
|
||||||
|
@ -1,12 +1,11 @@
|
|||||||
use std::{fs::File, io::BufReader, thread};
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::{BufRead, Seek, SeekFrom};
|
use std::io::{BufRead, Seek, SeekFrom};
|
||||||
use std::sync::mpsc;
|
use std::sync::mpsc;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
use std::{fs::File, io::BufReader, thread};
|
||||||
use crate::models::station_measurements::StationMeasurements;
|
use crate::models::station_measurements::StationMeasurements;
|
||||||
use crate::utils::parse;
|
use crate::utils::parse;
|
||||||
use crate::utils::parse::hashbytes;
|
use crate::utils::parse::hashstr;
|
||||||
|
|
||||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||||
|
|
||||||
@ -57,7 +56,7 @@ pub fn run() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hashbytes(station);
|
let hash = hashstr(station);
|
||||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
||||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||||
let measurements_option = t_stations.get_mut(&hash);
|
let measurements_option = t_stations.get_mut(&hash);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use bstr::{BStr, ByteSlice};
|
use bstr::{BStr, ByteSlice};
|
||||||
use memmap2::MmapOptions;
|
use memmap::MmapOptions;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use rustc_hash::FxHashMap as HashMap;
|
use rustc_hash::FxHashMap as HashMap;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
@ -2,10 +2,9 @@ use std::collections::HashMap;
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufRead, BufReader};
|
use std::io::{BufRead, BufReader};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use crate::models::station_measurements::StationMeasurements;
|
use crate::models::station_measurements::StationMeasurements;
|
||||||
use crate::utils::parse;
|
use crate::utils::parse;
|
||||||
use crate::utils::parse::hashbytes;
|
use crate::utils::parse::hashstr;
|
||||||
|
|
||||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||||
|
|
||||||
@ -25,7 +24,7 @@ pub fn run() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hashbytes(station);
|
let hash = hashstr(station);
|
||||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
||||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||||
let measurements_option = stations.get_mut(&hash);
|
let measurements_option = stations.get_mut(&hash);
|
||||||
|
@ -67,7 +67,7 @@ pub fn temp_simd(bytes: &[u8]) -> isize {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn hashbytes(bytes: &[u8]) -> usize {
|
pub fn hashstr(bytes: &[u8]) -> usize {
|
||||||
let mut hash = 0;
|
let mut hash = 0;
|
||||||
let (chunks, remainder) = bytes.as_chunks::<8>();
|
let (chunks, remainder) = bytes.as_chunks::<8>();
|
||||||
for &chunk in chunks {
|
for &chunk in chunks {
|
||||||
@ -84,27 +84,9 @@ pub fn hashbytes(bytes: &[u8]) -> usize {
|
|||||||
hash
|
hash
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn hashstr(s: &str) -> usize {
|
|
||||||
let mut hash = 0;
|
|
||||||
let (chunks, remainder) = s.as_bytes().as_chunks::<8>();
|
|
||||||
for &chunk in chunks {
|
|
||||||
hash += usize::from_be_bytes(chunk);
|
|
||||||
}
|
|
||||||
let mut r = [0_u8; 8];
|
|
||||||
r[0] = remainder.len() as u8;
|
|
||||||
let mut idx = 1;
|
|
||||||
for &byte in remainder {
|
|
||||||
r[idx] = byte;
|
|
||||||
idx += 1;
|
|
||||||
}
|
|
||||||
hash += usize::from_be_bytes(r);
|
|
||||||
hash
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::utils::parse::{hashbytes, hashstr, temp_new};
|
use crate::utils::parse::{hashstr, temp_new};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_temp_new_max() {
|
fn test_temp_new_max() {
|
||||||
@ -136,23 +118,12 @@ mod tests {
|
|||||||
assert_eq!(temp_neg_10, -99);
|
assert_eq!(temp_neg_10, -99);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_hashbytes() {
|
|
||||||
let hash_1 = hashbytes(b"abcdefghijk");
|
|
||||||
let hash_2 = hashbytes(b"kjihgfedcba");
|
|
||||||
let hash_3 = hashbytes(b"abba");
|
|
||||||
let hash_4 = hashbytes(b"baab");
|
|
||||||
|
|
||||||
assert_ne!(hash_1, hash_2);
|
|
||||||
assert_ne!(hash_3, hash_4);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_hashstr() {
|
fn test_hashstr() {
|
||||||
let hash_1 = hashstr("abcdefghijk");
|
let hash_1 = hashstr(b"abcdefghijk");
|
||||||
let hash_2 = hashstr("kjihgfedcba");
|
let hash_2 = hashstr(b"kjihgfedcba");
|
||||||
let hash_3 = hashstr("abba");
|
let hash_3 = hashstr(b"abba");
|
||||||
let hash_4 = hashstr("baab");
|
let hash_4 = hashstr(b"baab");
|
||||||
|
|
||||||
assert_ne!(hash_1, hash_2);
|
assert_ne!(hash_1, hash_2);
|
||||||
assert_ne!(hash_3, hash_4);
|
assert_ne!(hash_3, hash_4);
|
||||||
|
Loading…
Reference in New Issue
Block a user