extract hash into own module
This commit is contained in:
		| @@ -1,6 +1,5 @@ | |||||||
| use crate::models::station_measurements::StationMeasurements; | use crate::models::station_measurements::StationMeasurements; | ||||||
| use crate::utils::parse; | use crate::utils::{hash, parse}; | ||||||
| use crate::utils::parse::hashstr; |  | ||||||
| use memmap2::MmapOptions; | use memmap2::MmapOptions; | ||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
| use std::sync::mpsc; | use std::sync::mpsc; | ||||||
| @@ -48,7 +47,7 @@ pub fn run() { | |||||||
|                         break; |                         break; | ||||||
|                     } |                     } | ||||||
|                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); |                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||||
|                     let hash = hashstr(station); |                     let hash = hash::bytes(station); | ||||||
|                     let station = unsafe { std::str::from_utf8_unchecked(station) }; |                     let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||||
|                     let temp = parse::temp(temp); |                     let temp = parse::temp(temp); | ||||||
|                     let measurements_option = t_stations.get_mut(&hash); |                     let measurements_option = t_stations.get_mut(&hash); | ||||||
|   | |||||||
| @@ -1,6 +1,5 @@ | |||||||
| use crate::models::station_measurements::StationMeasurements; | use crate::models::station_measurements::StationMeasurements; | ||||||
| use crate::utils::parse; | use crate::utils::{hash, parse}; | ||||||
| use crate::utils::parse::hashstr; |  | ||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
| use std::io::{BufRead, Seek, SeekFrom}; | use std::io::{BufRead, Seek, SeekFrom}; | ||||||
| use std::sync::mpsc; | use std::sync::mpsc; | ||||||
| @@ -56,7 +55,7 @@ pub fn run() { | |||||||
|                         break; |                         break; | ||||||
|                     } |                     } | ||||||
|                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); |                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||||
|                     let hash = hashstr(station); |                     let hash = hash::bytes(station); | ||||||
|                     let station = unsafe { std::str::from_utf8_unchecked(station) }; |                     let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||||
|                     let temp = parse::temp(temp.split_last().unwrap().1); |                     let temp = parse::temp(temp.split_last().unwrap().1); | ||||||
|                     let measurements_option = t_stations.get_mut(&hash); |                     let measurements_option = t_stations.get_mut(&hash); | ||||||
|   | |||||||
| @@ -3,8 +3,7 @@ use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom}; | |||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
|  |  | ||||||
| use crate::models::station_measurements::StationMeasurements; | use crate::models::station_measurements::StationMeasurements; | ||||||
| use crate::utils::parse; | use crate::utils::{hash, parse}; | ||||||
| use crate::utils::parse::hashstr; |  | ||||||
| use easy_parallel::Parallel; | use easy_parallel::Parallel; | ||||||
| use std::thread; | use std::thread; | ||||||
| use std::time::Instant; | use std::time::Instant; | ||||||
| @@ -66,7 +65,7 @@ pub fn run() { | |||||||
|                         break; |                         break; | ||||||
|                     } |                     } | ||||||
|                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); |                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||||
|                     let hash = hashstr(station); |                     let hash = hash::bytes(station); | ||||||
|                     let station = unsafe { std::str::from_utf8_unchecked(station) }; |                     let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||||
|                     let temp = parse::temp(temp.split_last().unwrap().1); |                     let temp = parse::temp(temp.split_last().unwrap().1); | ||||||
|                     let measurements_option = t_stations.get_mut(&hash); |                     let measurements_option = t_stations.get_mut(&hash); | ||||||
|   | |||||||
| @@ -1,6 +1,5 @@ | |||||||
| use crate::models::station_measurements::StationMeasurements; | use crate::models::station_measurements::StationMeasurements; | ||||||
| use crate::utils::parse; | use crate::utils::{hash, parse}; | ||||||
| use crate::utils::parse::hashstr; |  | ||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
| use std::fs::File; | use std::fs::File; | ||||||
| use std::io::{BufRead, BufReader}; | use std::io::{BufRead, BufReader}; | ||||||
| @@ -24,7 +23,7 @@ pub fn run() { | |||||||
|             break; |             break; | ||||||
|         } |         } | ||||||
|         let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); |         let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||||
|         let hash = hashstr(station); |         let hash = hash::bytes(station); | ||||||
|         let station = unsafe { std::str::from_utf8_unchecked(station) }; |         let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||||
|         let temp = parse::temp(temp.split_last().unwrap().1); |         let temp = parse::temp(temp.split_last().unwrap().1); | ||||||
|         let measurements_option = stations.get_mut(&hash); |         let measurements_option = stations.get_mut(&hash); | ||||||
|   | |||||||
| @@ -2,8 +2,7 @@ use smol::fs::File; | |||||||
| use smol::io::{AsyncBufReadExt, BufReader}; | use smol::io::{AsyncBufReadExt, BufReader}; | ||||||
|  |  | ||||||
| use crate::models::station_measurements::StationMeasurements; | use crate::models::station_measurements::StationMeasurements; | ||||||
| use crate::utils::parse; | use crate::utils::{hash, parse}; | ||||||
| use crate::utils::parse::hashstr; |  | ||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
| use std::time::Instant; | use std::time::Instant; | ||||||
|  |  | ||||||
| @@ -29,7 +28,7 @@ pub fn run() { | |||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
|             let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); |             let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||||
|             let hash = hashstr(station); |             let hash = hash::bytes(station); | ||||||
|             let station = unsafe { std::str::from_utf8_unchecked(station) }; |             let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||||
|             let temp = parse::temp(temp.split_last().unwrap().1); |             let temp = parse::temp(temp.split_last().unwrap().1); | ||||||
|             let measurements_option = stations.get_mut(&hash); |             let measurements_option = stations.get_mut(&hash); | ||||||
|   | |||||||
| @@ -1,4 +1,5 @@ | |||||||
| pub mod byte_pos; | pub mod byte_pos; | ||||||
|  | pub mod hash; | ||||||
| pub mod parse; | pub mod parse; | ||||||
| pub mod write_structured_measurements; | pub mod write_structured_measurements; | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										33
									
								
								src/main/rust/src/utils/hash.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								src/main/rust/src/utils/hash.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | |||||||
|  | #[inline] | ||||||
|  | pub fn bytes(bytes: &[u8]) -> usize { | ||||||
|  |     let mut hash: usize = 0; | ||||||
|  |     let (chunks, remainder) = bytes.as_chunks::<8>(); | ||||||
|  |     for &chunk in chunks { | ||||||
|  |         hash = hash.wrapping_add(usize::from_be_bytes(chunk)); | ||||||
|  |     } | ||||||
|  |     let mut r = [0_u8; 8]; | ||||||
|  |     r[0] = remainder.len() as u8; | ||||||
|  |     let mut idx = 1; | ||||||
|  |     for &byte in remainder { | ||||||
|  |         r[idx] = byte; | ||||||
|  |         idx += 1; | ||||||
|  |     } | ||||||
|  |     hash += usize::from_be_bytes(r); | ||||||
|  |     hash | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[cfg(test)] | ||||||
|  | mod tests { | ||||||
|  |     use crate::utils::hash; | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn test_hashstr() { | ||||||
|  |         let hash_1 = hash::bytes(b"abcdefghijk"); | ||||||
|  |         let hash_2 = hash::bytes(b"kjihgfedcba"); | ||||||
|  |         let hash_3 = hash::bytes(b"abba"); | ||||||
|  |         let hash_4 = hash::bytes(b"baab"); | ||||||
|  |  | ||||||
|  |         assert_ne!(hash_1, hash_2); | ||||||
|  |         assert_ne!(hash_3, hash_4); | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -69,27 +69,9 @@ pub fn temp_simd(bytes: &[u8]) -> isize { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[inline] |  | ||||||
| pub fn hashstr(bytes: &[u8]) -> usize { |  | ||||||
|     let mut hash: usize = 0; |  | ||||||
|     let (chunks, remainder) = bytes.as_chunks::<8>(); |  | ||||||
|     for &chunk in chunks { |  | ||||||
|         hash = hash.wrapping_add(usize::from_be_bytes(chunk)); |  | ||||||
|     } |  | ||||||
|     let mut r = [0_u8; 8]; |  | ||||||
|     r[0] = remainder.len() as u8; |  | ||||||
|     let mut idx = 1; |  | ||||||
|     for &byte in remainder { |  | ||||||
|         r[idx] = byte; |  | ||||||
|         idx += 1; |  | ||||||
|     } |  | ||||||
|     hash += usize::from_be_bytes(r); |  | ||||||
|     hash |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| mod tests { | mod tests { | ||||||
|     use crate::utils::parse::{hashstr, temp_new}; |     use crate::utils::parse::temp_new; | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn test_temp_new_max() { |     fn test_temp_new_max() { | ||||||
| @@ -120,15 +102,4 @@ mod tests { | |||||||
|         let temp_neg_10 = temp_new("-9.9".as_bytes()); |         let temp_neg_10 = temp_new("-9.9".as_bytes()); | ||||||
|         assert_eq!(temp_neg_10, -99); |         assert_eq!(temp_neg_10, -99); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |  | ||||||
|     fn test_hashstr() { |  | ||||||
|         let hash_1 = hashstr(b"abcdefghijk"); |  | ||||||
|         let hash_2 = hashstr(b"kjihgfedcba"); |  | ||||||
|         let hash_3 = hashstr(b"abba"); |  | ||||||
|         let hash_4 = hashstr(b"baab"); |  | ||||||
|  |  | ||||||
|         assert_ne!(hash_1, hash_2); |  | ||||||
|         assert_ne!(hash_3, hash_4); |  | ||||||
|     } |  | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user