extract hash into own module
This commit is contained in:
		| @@ -1,6 +1,5 @@ | ||||
| use crate::models::station_measurements::StationMeasurements; | ||||
| use crate::utils::parse; | ||||
| use crate::utils::parse::hashstr; | ||||
| use crate::utils::{hash, parse}; | ||||
| use memmap2::MmapOptions; | ||||
| use std::collections::HashMap; | ||||
| use std::sync::mpsc; | ||||
| @@ -48,7 +47,7 @@ pub fn run() { | ||||
|                         break; | ||||
|                     } | ||||
|                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||
|                     let hash = hashstr(station); | ||||
|                     let hash = hash::bytes(station); | ||||
|                     let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||
|                     let temp = parse::temp(temp); | ||||
|                     let measurements_option = t_stations.get_mut(&hash); | ||||
|   | ||||
| @@ -1,6 +1,5 @@ | ||||
| use crate::models::station_measurements::StationMeasurements; | ||||
| use crate::utils::parse; | ||||
| use crate::utils::parse::hashstr; | ||||
| use crate::utils::{hash, parse}; | ||||
| use std::collections::HashMap; | ||||
| use std::io::{BufRead, Seek, SeekFrom}; | ||||
| use std::sync::mpsc; | ||||
| @@ -56,7 +55,7 @@ pub fn run() { | ||||
|                         break; | ||||
|                     } | ||||
|                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||
|                     let hash = hashstr(station); | ||||
|                     let hash = hash::bytes(station); | ||||
|                     let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||
|                     let temp = parse::temp(temp.split_last().unwrap().1); | ||||
|                     let measurements_option = t_stations.get_mut(&hash); | ||||
|   | ||||
| @@ -3,8 +3,7 @@ use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom}; | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| use crate::models::station_measurements::StationMeasurements; | ||||
| use crate::utils::parse; | ||||
| use crate::utils::parse::hashstr; | ||||
| use crate::utils::{hash, parse}; | ||||
| use easy_parallel::Parallel; | ||||
| use std::thread; | ||||
| use std::time::Instant; | ||||
| @@ -66,7 +65,7 @@ pub fn run() { | ||||
|                         break; | ||||
|                     } | ||||
|                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||
|                     let hash = hashstr(station); | ||||
|                     let hash = hash::bytes(station); | ||||
|                     let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||
|                     let temp = parse::temp(temp.split_last().unwrap().1); | ||||
|                     let measurements_option = t_stations.get_mut(&hash); | ||||
|   | ||||
| @@ -1,6 +1,5 @@ | ||||
| use crate::models::station_measurements::StationMeasurements; | ||||
| use crate::utils::parse; | ||||
| use crate::utils::parse::hashstr; | ||||
| use crate::utils::{hash, parse}; | ||||
| use std::collections::HashMap; | ||||
| use std::fs::File; | ||||
| use std::io::{BufRead, BufReader}; | ||||
| @@ -24,7 +23,7 @@ pub fn run() { | ||||
|             break; | ||||
|         } | ||||
|         let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||
|         let hash = hashstr(station); | ||||
|         let hash = hash::bytes(station); | ||||
|         let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||
|         let temp = parse::temp(temp.split_last().unwrap().1); | ||||
|         let measurements_option = stations.get_mut(&hash); | ||||
|   | ||||
| @@ -2,8 +2,7 @@ use smol::fs::File; | ||||
| use smol::io::{AsyncBufReadExt, BufReader}; | ||||
|  | ||||
| use crate::models::station_measurements::StationMeasurements; | ||||
| use crate::utils::parse; | ||||
| use crate::utils::parse::hashstr; | ||||
| use crate::utils::{hash, parse}; | ||||
| use std::collections::HashMap; | ||||
| use std::time::Instant; | ||||
|  | ||||
| @@ -29,7 +28,7 @@ pub fn run() { | ||||
|                 break; | ||||
|             } | ||||
|             let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||
|             let hash = hashstr(station); | ||||
|             let hash = hash::bytes(station); | ||||
|             let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||
|             let temp = parse::temp(temp.split_last().unwrap().1); | ||||
|             let measurements_option = stations.get_mut(&hash); | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| pub mod byte_pos; | ||||
| pub mod hash; | ||||
| pub mod parse; | ||||
| pub mod write_structured_measurements; | ||||
|  | ||||
|   | ||||
							
								
								
									
										33
									
								
								src/main/rust/src/utils/hash.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								src/main/rust/src/utils/hash.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| #[inline] | ||||
| pub fn bytes(bytes: &[u8]) -> usize { | ||||
|     let mut hash: usize = 0; | ||||
|     let (chunks, remainder) = bytes.as_chunks::<8>(); | ||||
|     for &chunk in chunks { | ||||
|         hash = hash.wrapping_add(usize::from_be_bytes(chunk)); | ||||
|     } | ||||
|     let mut r = [0_u8; 8]; | ||||
|     r[0] = remainder.len() as u8; | ||||
|     let mut idx = 1; | ||||
|     for &byte in remainder { | ||||
|         r[idx] = byte; | ||||
|         idx += 1; | ||||
|     } | ||||
|     hash += usize::from_be_bytes(r); | ||||
|     hash | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use crate::utils::hash; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_hashstr() { | ||||
|         let hash_1 = hash::bytes(b"abcdefghijk"); | ||||
|         let hash_2 = hash::bytes(b"kjihgfedcba"); | ||||
|         let hash_3 = hash::bytes(b"abba"); | ||||
|         let hash_4 = hash::bytes(b"baab"); | ||||
|  | ||||
|         assert_ne!(hash_1, hash_2); | ||||
|         assert_ne!(hash_3, hash_4); | ||||
|     } | ||||
| } | ||||
| @@ -69,27 +69,9 @@ pub fn temp_simd(bytes: &[u8]) -> isize { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[inline] | ||||
| pub fn hashstr(bytes: &[u8]) -> usize { | ||||
|     let mut hash: usize = 0; | ||||
|     let (chunks, remainder) = bytes.as_chunks::<8>(); | ||||
|     for &chunk in chunks { | ||||
|         hash = hash.wrapping_add(usize::from_be_bytes(chunk)); | ||||
|     } | ||||
|     let mut r = [0_u8; 8]; | ||||
|     r[0] = remainder.len() as u8; | ||||
|     let mut idx = 1; | ||||
|     for &byte in remainder { | ||||
|         r[idx] = byte; | ||||
|         idx += 1; | ||||
|     } | ||||
|     hash += usize::from_be_bytes(r); | ||||
|     hash | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use crate::utils::parse::{hashstr, temp_new}; | ||||
|     use crate::utils::parse::temp_new; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_temp_new_max() { | ||||
| @@ -120,15 +102,4 @@ mod tests { | ||||
|         let temp_neg_10 = temp_new("-9.9".as_bytes()); | ||||
|         assert_eq!(temp_neg_10, -99); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_hashstr() { | ||||
|         let hash_1 = hashstr(b"abcdefghijk"); | ||||
|         let hash_2 = hashstr(b"kjihgfedcba"); | ||||
|         let hash_3 = hashstr(b"abba"); | ||||
|         let hash_4 = hashstr(b"baab"); | ||||
|  | ||||
|         assert_ne!(hash_1, hash_2); | ||||
|         assert_ne!(hash_3, hash_4); | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user