Going back because compile times trippled
This commit is contained in:
		
							
								
								
									
										15
									
								
								src/main/rust/Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										15
									
								
								src/main/rust/Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -616,19 +616,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" | ||||
|  | ||||
| [[package]] | ||||
| name = "memmap2" | ||||
| version = "0.7.1" | ||||
| name = "memmap" | ||||
| version = "0.7.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" | ||||
| checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" | ||||
| dependencies = [ | ||||
|  "libc", | ||||
|  "winapi", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "memmap2" | ||||
| version = "0.9.4" | ||||
| version = "0.7.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" | ||||
| checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" | ||||
| dependencies = [ | ||||
|  "libc", | ||||
| ] | ||||
| @@ -698,7 +699,7 @@ dependencies = [ | ||||
|  "fast-float", | ||||
|  "libc", | ||||
|  "memchr", | ||||
|  "memmap2 0.9.4", | ||||
|  "memmap", | ||||
|  "polars", | ||||
|  "rayon", | ||||
|  "rustc-hash", | ||||
| @@ -900,7 +901,7 @@ dependencies = [ | ||||
|  "home", | ||||
|  "itoa", | ||||
|  "memchr", | ||||
|  "memmap2 0.7.1", | ||||
|  "memmap2", | ||||
|  "num-traits", | ||||
|  "once_cell", | ||||
|  "percent-encoding", | ||||
|   | ||||
| @@ -9,7 +9,7 @@ edition = "2021" | ||||
| bstr = "1.9.1" | ||||
| fast-float = "0.2.0" | ||||
| memchr = "2.7.4" | ||||
| memmap2 = "0.9.4" | ||||
| memmap = "0.7.0" | ||||
| polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]} | ||||
| rayon = "1.10.0" | ||||
| rustc-hash = "2.0.0" | ||||
| @@ -47,7 +47,6 @@ name = "phcs" | ||||
| harness = false | ||||
|  | ||||
| [profile.release] | ||||
| debug = true | ||||
| lto = "fat" | ||||
| #strip = "symbols" | ||||
| strip = "symbols" | ||||
| panic = "abort" | ||||
|   | ||||
| @@ -1,11 +1,9 @@ | ||||
| use std::{fs::File, io::BufReader, thread}; | ||||
| use std::collections::HashMap; | ||||
| use std::io::{BufRead, Seek, SeekFrom}; | ||||
| use std::sync::mpsc; | ||||
| use std::time::Instant; | ||||
|  | ||||
| use memmap2::MmapOptions; | ||||
| use rustc_hash::{FxBuildHasher, FxHashMap as HashMap}; | ||||
|  | ||||
| use std::{fs::File, io::BufReader, thread}; | ||||
| use memmap::MmapOptions; | ||||
| use crate::models::station_measurements::StationMeasurements; | ||||
| use crate::utils::parse; | ||||
| use crate::utils::parse::hashstr; | ||||
| @@ -16,9 +14,8 @@ pub fn run() { | ||||
|     const FILE_PATH: &str = "../../../measurements.txt"; | ||||
|     let now = Instant::now(); | ||||
|     thread::scope(|s| { | ||||
|         let hasher = FxBuildHasher::default(); | ||||
|         let mut stations: HashMap<usize, (String, StationMeasurements)> = | ||||
|             HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); | ||||
|             HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||
|         let (tx, rx) = mpsc::channel(); | ||||
|         let cores = thread::available_parallelism().unwrap().into(); | ||||
|         let file = File::open(FILE_PATH).expect("File measurements.txt not found"); | ||||
| @@ -43,19 +40,26 @@ pub fn run() { | ||||
|         bounds.push(file_length); | ||||
|         for i in 0..cores { | ||||
|             let tx = tx.clone(); | ||||
|             let currposition = *bounds.get(i).unwrap(); | ||||
|             let mut currposition = *bounds.get(i).unwrap(); | ||||
|             let end = *bounds.get(i + 1).unwrap(); | ||||
|             s.spawn(move || { | ||||
|                 let file = File::open(FILE_PATH).expect("File measurements.txt not found"); | ||||
|                 let t_mmap = &unsafe { MmapOptions::new().map(&file).unwrap() }[currposition..end]; | ||||
|                 let mut reader = BufReader::new(&file); | ||||
|                 reader.seek(SeekFrom::Start(currposition as u64)).unwrap(); | ||||
|                 let mut t_stations: HashMap<usize, (String, StationMeasurements)> = | ||||
|                     HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); | ||||
|                for line in t_mmap.lines() { | ||||
|                    let line = line.expect("Could not read line"); | ||||
|                     let (station, temp) = line.rsplit_once(|char| char == ';').unwrap(); | ||||
|                     HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||
|                 let mut line = Vec::with_capacity(108); | ||||
|                 loop { | ||||
|                     let line_len = reader | ||||
|                         .read_until(b'\n', &mut line) | ||||
|                         .expect("could not read bytes"); | ||||
|                     if line_len == 0 { | ||||
|                         break; | ||||
|                     } | ||||
|                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||
|                     let hash = hashstr(station); | ||||
|                     let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; | ||||
|                     let temp = parse::temp(temp.as_bytes()); | ||||
|                     let temp = parse::temp(temp.split_last().unwrap().1); | ||||
|                     let measurements_option = t_stations.get_mut(&hash); | ||||
|                     if let Some((_, measurements)) = measurements_option { | ||||
|                         measurements.update(temp); | ||||
| @@ -68,6 +72,11 @@ pub fn run() { | ||||
|                         }; | ||||
|                         t_stations.insert(hash, (station, measurements)); | ||||
|                     } | ||||
|                     currposition += line_len; | ||||
|                     if currposition >= end { | ||||
|                         break; | ||||
|                     } | ||||
|                     line.clear(); | ||||
|                 } | ||||
|                 let _ = tx.send(t_stations); | ||||
|             }); | ||||
|   | ||||
| @@ -1,12 +1,11 @@ | ||||
| use std::{fs::File, io::BufReader, thread}; | ||||
| use std::collections::HashMap; | ||||
| use std::io::{BufRead, Seek, SeekFrom}; | ||||
| use std::sync::mpsc; | ||||
| use std::time::Instant; | ||||
|  | ||||
| use std::{fs::File, io::BufReader, thread}; | ||||
| use crate::models::station_measurements::StationMeasurements; | ||||
| use crate::utils::parse; | ||||
| use crate::utils::parse::hashbytes; | ||||
| use crate::utils::parse::hashstr; | ||||
|  | ||||
| const DEFAULT_HASHMAP_LENGTH: usize = 10000; | ||||
|  | ||||
| @@ -57,7 +56,7 @@ pub fn run() { | ||||
|                         break; | ||||
|                     } | ||||
|                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||
|                     let hash = hashbytes(station); | ||||
|                     let hash = hashstr(station); | ||||
|                     let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; | ||||
|                     let temp = parse::temp(temp.split_last().unwrap().1); | ||||
|                     let measurements_option = t_stations.get_mut(&hash); | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| use bstr::{BStr, ByteSlice}; | ||||
| use memmap2::MmapOptions; | ||||
| use memmap::MmapOptions; | ||||
| use rayon::prelude::*; | ||||
| use rustc_hash::FxHashMap as HashMap; | ||||
| use std::time::Instant; | ||||
|   | ||||
| @@ -2,10 +2,9 @@ use std::collections::HashMap; | ||||
| use std::fs::File; | ||||
| use std::io::{BufRead, BufReader}; | ||||
| use std::time::Instant; | ||||
|  | ||||
| use crate::models::station_measurements::StationMeasurements; | ||||
| use crate::utils::parse; | ||||
| use crate::utils::parse::hashbytes; | ||||
| use crate::utils::parse::hashstr; | ||||
|  | ||||
| const DEFAULT_HASHMAP_LENGTH: usize = 10000; | ||||
|  | ||||
| @@ -25,7 +24,7 @@ pub fn run() { | ||||
|             break; | ||||
|         } | ||||
|         let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||
|         let hash = hashbytes(station); | ||||
|         let hash = hashstr(station); | ||||
|         let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; | ||||
|         let temp = parse::temp(temp.split_last().unwrap().1); | ||||
|         let measurements_option = stations.get_mut(&hash); | ||||
|   | ||||
| @@ -67,7 +67,7 @@ pub fn temp_simd(bytes: &[u8]) -> isize { | ||||
| } | ||||
|  | ||||
| #[inline] | ||||
| pub fn hashbytes(bytes: &[u8]) -> usize { | ||||
| pub fn hashstr(bytes: &[u8]) -> usize { | ||||
|     let mut hash = 0; | ||||
|     let (chunks, remainder) = bytes.as_chunks::<8>(); | ||||
|     for &chunk in chunks { | ||||
| @@ -84,27 +84,9 @@ pub fn hashbytes(bytes: &[u8]) -> usize { | ||||
|     hash | ||||
| } | ||||
|  | ||||
| #[inline] | ||||
| pub fn hashstr(s: &str) -> usize { | ||||
|     let mut hash = 0; | ||||
|     let (chunks, remainder) = s.as_bytes().as_chunks::<8>(); | ||||
|     for &chunk in chunks { | ||||
|         hash += usize::from_be_bytes(chunk); | ||||
|     } | ||||
|     let mut r = [0_u8; 8]; | ||||
|     r[0] = remainder.len() as u8; | ||||
|     let mut idx = 1; | ||||
|     for &byte in remainder { | ||||
|         r[idx] = byte; | ||||
|         idx += 1; | ||||
|     } | ||||
|     hash += usize::from_be_bytes(r); | ||||
|     hash | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use crate::utils::parse::{hashbytes, hashstr, temp_new}; | ||||
|     use crate::utils::parse::{hashstr, temp_new}; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_temp_new_max() { | ||||
| @@ -136,23 +118,12 @@ mod tests { | ||||
|         assert_eq!(temp_neg_10, -99); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_hashbytes() { | ||||
|         let hash_1 = hashbytes(b"abcdefghijk"); | ||||
|         let hash_2 = hashbytes(b"kjihgfedcba"); | ||||
|         let hash_3 = hashbytes(b"abba"); | ||||
|         let hash_4 = hashbytes(b"baab"); | ||||
|  | ||||
|         assert_ne!(hash_1, hash_2); | ||||
|         assert_ne!(hash_3, hash_4); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_hashstr() { | ||||
|         let hash_1 = hashstr("abcdefghijk"); | ||||
|         let hash_2 = hashstr("kjihgfedcba"); | ||||
|         let hash_3 = hashstr("abba"); | ||||
|         let hash_4 = hashstr("baab"); | ||||
|         let hash_1 = hashstr(b"abcdefghijk"); | ||||
|         let hash_2 = hashstr(b"kjihgfedcba"); | ||||
|         let hash_3 = hashstr(b"abba"); | ||||
|         let hash_4 = hashstr(b"baab"); | ||||
|  | ||||
|         assert_ne!(hash_1, hash_2); | ||||
|         assert_ne!(hash_3, hash_4); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user