Store station name as byte slice and only convert to string once at the end, but using u64 hash as key is still faster
This commit is contained in:
		| @@ -17,7 +17,8 @@ pub fn run() { | |||||||
|     let mmap_ptr = mmap.as_ptr(); |     let mmap_ptr = mmap.as_ptr(); | ||||||
|     let file_length = mmap.len(); |     let file_length = mmap.len(); | ||||||
|     let hasher = FxBuildHasher; |     let hasher = FxBuildHasher; | ||||||
|     let mut stations: HashMap<u64, (String, StationMeasurements)> = |     // Even if I could now just use the byte slice as a key, doing the hash is still faster | ||||||
|  |     let mut stations: HashMap<u64, (&[u8], StationMeasurements)> = | ||||||
|         HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); |         HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); | ||||||
|     let (tx, rx) = mpsc::channel(); |     let (tx, rx) = mpsc::channel(); | ||||||
|     let cores = thread::available_parallelism().unwrap().into(); |     let cores = thread::available_parallelism().unwrap().into(); | ||||||
| @@ -43,7 +44,7 @@ pub fn run() { | |||||||
|             let (start, end) = *bounds.get(i).unwrap(); |             let (start, end) = *bounds.get(i).unwrap(); | ||||||
|             let mmap_slice = unsafe { from_raw_parts(mmap_ptr.add(start), end - start) }; |             let mmap_slice = unsafe { from_raw_parts(mmap_ptr.add(start), end - start) }; | ||||||
|             s.spawn(move || { |             s.spawn(move || { | ||||||
|                 let mut t_stations: HashMap<u64, (String, StationMeasurements)> = |                 let mut t_stations: HashMap<u64, (&[u8], StationMeasurements)> = | ||||||
|                     HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); |                     HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); | ||||||
|                 for line in mmap_slice.split(|&byte| byte == b'\n') { |                 for line in mmap_slice.split(|&byte| byte == b'\n') { | ||||||
|                     if line.is_empty() { |                     if line.is_empty() { | ||||||
| @@ -51,7 +52,6 @@ pub fn run() { | |||||||
|                     } |                     } | ||||||
|                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); |                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||||
|                     let hash = hash::bytes(station); |                     let hash = hash::bytes(station); | ||||||
|                     let station = unsafe { std::str::from_utf8_unchecked(station) }; |  | ||||||
|                     let temp = parse::temp(temp); |                     let temp = parse::temp(temp); | ||||||
|                     let measurements_option = t_stations.get_mut(&hash); |                     let measurements_option = t_stations.get_mut(&hash); | ||||||
|                     if let Some((_, measurements)) = measurements_option { |                     if let Some((_, measurements)) = measurements_option { | ||||||
| @@ -63,7 +63,7 @@ pub fn run() { | |||||||
|                             count: 1, |                             count: 1, | ||||||
|                             sum: temp, |                             sum: temp, | ||||||
|                         }; |                         }; | ||||||
|                         t_stations.insert(hash, (station.to_string(), measurements)); |                         t_stations.insert(hash, (station, measurements)); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 let _ = tx.send(t_stations); |                 let _ = tx.send(t_stations); | ||||||
| @@ -76,13 +76,14 @@ pub fn run() { | |||||||
|                 if let Some((_, joined_measurements)) = joined_measurements_options { |                 if let Some((_, joined_measurements)) = joined_measurements_options { | ||||||
|                     joined_measurements.merge(measurements); |                     joined_measurements.merge(measurements); | ||||||
|                 } else { |                 } else { | ||||||
|                     stations.insert(*hash, (station.to_owned(), *measurements)); |                     stations.insert(*hash, (station, *measurements)); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         let mut stations: Vec<String> = stations |         let mut stations: Vec<String> = stations | ||||||
|             .iter() |             .iter() | ||||||
|             .map(|(_, (station, measurements))| { |             .map(|(_, (station, measurements))| { | ||||||
|  |                 let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||||
|                 let measurements = measurements.to_string(); |                 let measurements = measurements.to_string(); | ||||||
|                 #[cfg(feature = "json")] |                 #[cfg(feature = "json")] | ||||||
|                 { |                 { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user