extract hash into own module
This commit is contained in:
parent
c306083192
commit
b8f589096f
@ -1,6 +1,5 @@
|
|||||||
use crate::models::station_measurements::StationMeasurements;
|
use crate::models::station_measurements::StationMeasurements;
|
||||||
use crate::utils::parse;
|
use crate::utils::{hash, parse};
|
||||||
use crate::utils::parse::hashstr;
|
|
||||||
use memmap2::MmapOptions;
|
use memmap2::MmapOptions;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::mpsc;
|
use std::sync::mpsc;
|
||||||
@ -48,7 +47,7 @@ pub fn run() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hashstr(station);
|
let hash = hash::bytes(station);
|
||||||
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||||
let temp = parse::temp(temp);
|
let temp = parse::temp(temp);
|
||||||
let measurements_option = t_stations.get_mut(&hash);
|
let measurements_option = t_stations.get_mut(&hash);
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
use crate::models::station_measurements::StationMeasurements;
|
use crate::models::station_measurements::StationMeasurements;
|
||||||
use crate::utils::parse;
|
use crate::utils::{hash, parse};
|
||||||
use crate::utils::parse::hashstr;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::{BufRead, Seek, SeekFrom};
|
use std::io::{BufRead, Seek, SeekFrom};
|
||||||
use std::sync::mpsc;
|
use std::sync::mpsc;
|
||||||
@ -56,7 +55,7 @@ pub fn run() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hashstr(station);
|
let hash = hash::bytes(station);
|
||||||
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||||
let measurements_option = t_stations.get_mut(&hash);
|
let measurements_option = t_stations.get_mut(&hash);
|
||||||
|
@ -3,8 +3,7 @@ use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom};
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use crate::models::station_measurements::StationMeasurements;
|
use crate::models::station_measurements::StationMeasurements;
|
||||||
use crate::utils::parse;
|
use crate::utils::{hash, parse};
|
||||||
use crate::utils::parse::hashstr;
|
|
||||||
use easy_parallel::Parallel;
|
use easy_parallel::Parallel;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
@ -66,7 +65,7 @@ pub fn run() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hashstr(station);
|
let hash = hash::bytes(station);
|
||||||
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||||
let measurements_option = t_stations.get_mut(&hash);
|
let measurements_option = t_stations.get_mut(&hash);
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
use crate::models::station_measurements::StationMeasurements;
|
use crate::models::station_measurements::StationMeasurements;
|
||||||
use crate::utils::parse;
|
use crate::utils::{hash, parse};
|
||||||
use crate::utils::parse::hashstr;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufRead, BufReader};
|
use std::io::{BufRead, BufReader};
|
||||||
@ -24,7 +23,7 @@ pub fn run() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hashstr(station);
|
let hash = hash::bytes(station);
|
||||||
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||||
let measurements_option = stations.get_mut(&hash);
|
let measurements_option = stations.get_mut(&hash);
|
||||||
|
@ -2,8 +2,7 @@ use smol::fs::File;
|
|||||||
use smol::io::{AsyncBufReadExt, BufReader};
|
use smol::io::{AsyncBufReadExt, BufReader};
|
||||||
|
|
||||||
use crate::models::station_measurements::StationMeasurements;
|
use crate::models::station_measurements::StationMeasurements;
|
||||||
use crate::utils::parse;
|
use crate::utils::{hash, parse};
|
||||||
use crate::utils::parse::hashstr;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
@ -29,7 +28,7 @@ pub fn run() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hashstr(station);
|
let hash = hash::bytes(station);
|
||||||
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||||
let temp = parse::temp(temp.split_last().unwrap().1);
|
let temp = parse::temp(temp.split_last().unwrap().1);
|
||||||
let measurements_option = stations.get_mut(&hash);
|
let measurements_option = stations.get_mut(&hash);
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
pub mod byte_pos;
|
pub mod byte_pos;
|
||||||
|
pub mod hash;
|
||||||
pub mod parse;
|
pub mod parse;
|
||||||
pub mod write_structured_measurements;
|
pub mod write_structured_measurements;
|
||||||
|
|
||||||
|
33
src/main/rust/src/utils/hash.rs
Normal file
33
src/main/rust/src/utils/hash.rs
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
#[inline]
|
||||||
|
pub fn bytes(bytes: &[u8]) -> usize {
|
||||||
|
let mut hash: usize = 0;
|
||||||
|
let (chunks, remainder) = bytes.as_chunks::<8>();
|
||||||
|
for &chunk in chunks {
|
||||||
|
hash = hash.wrapping_add(usize::from_be_bytes(chunk));
|
||||||
|
}
|
||||||
|
let mut r = [0_u8; 8];
|
||||||
|
r[0] = remainder.len() as u8;
|
||||||
|
let mut idx = 1;
|
||||||
|
for &byte in remainder {
|
||||||
|
r[idx] = byte;
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
hash += usize::from_be_bytes(r);
|
||||||
|
hash
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::utils::hash;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_hashstr() {
|
||||||
|
let hash_1 = hash::bytes(b"abcdefghijk");
|
||||||
|
let hash_2 = hash::bytes(b"kjihgfedcba");
|
||||||
|
let hash_3 = hash::bytes(b"abba");
|
||||||
|
let hash_4 = hash::bytes(b"baab");
|
||||||
|
|
||||||
|
assert_ne!(hash_1, hash_2);
|
||||||
|
assert_ne!(hash_3, hash_4);
|
||||||
|
}
|
||||||
|
}
|
@ -69,27 +69,9 @@ pub fn temp_simd(bytes: &[u8]) -> isize {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn hashstr(bytes: &[u8]) -> usize {
|
|
||||||
let mut hash: usize = 0;
|
|
||||||
let (chunks, remainder) = bytes.as_chunks::<8>();
|
|
||||||
for &chunk in chunks {
|
|
||||||
hash = hash.wrapping_add(usize::from_be_bytes(chunk));
|
|
||||||
}
|
|
||||||
let mut r = [0_u8; 8];
|
|
||||||
r[0] = remainder.len() as u8;
|
|
||||||
let mut idx = 1;
|
|
||||||
for &byte in remainder {
|
|
||||||
r[idx] = byte;
|
|
||||||
idx += 1;
|
|
||||||
}
|
|
||||||
hash += usize::from_be_bytes(r);
|
|
||||||
hash
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::utils::parse::{hashstr, temp_new};
|
use crate::utils::parse::temp_new;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_temp_new_max() {
|
fn test_temp_new_max() {
|
||||||
@ -120,15 +102,4 @@ mod tests {
|
|||||||
let temp_neg_10 = temp_new("-9.9".as_bytes());
|
let temp_neg_10 = temp_new("-9.9".as_bytes());
|
||||||
assert_eq!(temp_neg_10, -99);
|
assert_eq!(temp_neg_10, -99);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_hashstr() {
|
|
||||||
let hash_1 = hashstr(b"abcdefghijk");
|
|
||||||
let hash_2 = hashstr(b"kjihgfedcba");
|
|
||||||
let hash_3 = hashstr(b"abba");
|
|
||||||
let hash_4 = hashstr(b"baab");
|
|
||||||
|
|
||||||
assert_ne!(hash_1, hash_2);
|
|
||||||
assert_ne!(hash_3, hash_4);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user