extract hash into own module

This commit is contained in:
Fabian Schmidt 2024-08-27 13:54:23 +02:00
parent c306083192
commit b8f589096f
8 changed files with 45 additions and 45 deletions

View File

@ -1,6 +1,5 @@
use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse;
use crate::utils::parse::hashstr;
use crate::utils::{hash, parse};
use memmap2::MmapOptions;
use std::collections::HashMap;
use std::sync::mpsc;
@ -48,7 +47,7 @@ pub fn run() {
break;
}
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashstr(station);
let hash = hash::bytes(station);
let station = unsafe { std::str::from_utf8_unchecked(station) };
let temp = parse::temp(temp);
let measurements_option = t_stations.get_mut(&hash);

View File

@ -1,6 +1,5 @@
use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse;
use crate::utils::parse::hashstr;
use crate::utils::{hash, parse};
use std::collections::HashMap;
use std::io::{BufRead, Seek, SeekFrom};
use std::sync::mpsc;
@ -56,7 +55,7 @@ pub fn run() {
break;
}
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashstr(station);
let hash = hash::bytes(station);
let station = unsafe { std::str::from_utf8_unchecked(station) };
let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = t_stations.get_mut(&hash);

View File

@ -3,8 +3,7 @@ use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom};
use std::collections::HashMap;
use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse;
use crate::utils::parse::hashstr;
use crate::utils::{hash, parse};
use easy_parallel::Parallel;
use std::thread;
use std::time::Instant;
@ -66,7 +65,7 @@ pub fn run() {
break;
}
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashstr(station);
let hash = hash::bytes(station);
let station = unsafe { std::str::from_utf8_unchecked(station) };
let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = t_stations.get_mut(&hash);

View File

@ -1,6 +1,5 @@
use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse;
use crate::utils::parse::hashstr;
use crate::utils::{hash, parse};
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader};
@ -24,7 +23,7 @@ pub fn run() {
break;
}
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashstr(station);
let hash = hash::bytes(station);
let station = unsafe { std::str::from_utf8_unchecked(station) };
let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = stations.get_mut(&hash);

View File

@ -2,8 +2,7 @@ use smol::fs::File;
use smol::io::{AsyncBufReadExt, BufReader};
use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse;
use crate::utils::parse::hashstr;
use crate::utils::{hash, parse};
use std::collections::HashMap;
use std::time::Instant;
@ -29,7 +28,7 @@ pub fn run() {
break;
}
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashstr(station);
let hash = hash::bytes(station);
let station = unsafe { std::str::from_utf8_unchecked(station) };
let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = stations.get_mut(&hash);

View File

@ -1,4 +1,5 @@
pub mod byte_pos;
pub mod hash;
pub mod parse;
pub mod write_structured_measurements;

View File

@ -0,0 +1,33 @@
#[inline]
pub fn bytes(bytes: &[u8]) -> usize {
let mut hash: usize = 0;
let (chunks, remainder) = bytes.as_chunks::<8>();
for &chunk in chunks {
hash = hash.wrapping_add(usize::from_be_bytes(chunk));
}
let mut r = [0_u8; 8];
r[0] = remainder.len() as u8;
let mut idx = 1;
for &byte in remainder {
r[idx] = byte;
idx += 1;
}
hash += usize::from_be_bytes(r);
hash
}
#[cfg(test)]
mod tests {
use crate::utils::hash;
#[test]
fn test_hashstr() {
let hash_1 = hash::bytes(b"abcdefghijk");
let hash_2 = hash::bytes(b"kjihgfedcba");
let hash_3 = hash::bytes(b"abba");
let hash_4 = hash::bytes(b"baab");
assert_ne!(hash_1, hash_2);
assert_ne!(hash_3, hash_4);
}
}

View File

@ -69,27 +69,9 @@ pub fn temp_simd(bytes: &[u8]) -> isize {
}
}
#[inline]
pub fn hashstr(bytes: &[u8]) -> usize {
let mut hash: usize = 0;
let (chunks, remainder) = bytes.as_chunks::<8>();
for &chunk in chunks {
hash = hash.wrapping_add(usize::from_be_bytes(chunk));
}
let mut r = [0_u8; 8];
r[0] = remainder.len() as u8;
let mut idx = 1;
for &byte in remainder {
r[idx] = byte;
idx += 1;
}
hash += usize::from_be_bytes(r);
hash
}
#[cfg(test)]
mod tests {
use crate::utils::parse::{hashstr, temp_new};
use crate::utils::parse::temp_new;
#[test]
fn test_temp_new_max() {
@ -120,15 +102,4 @@ mod tests {
let temp_neg_10 = temp_new("-9.9".as_bytes());
assert_eq!(temp_neg_10, -99);
}
#[test]
fn test_hashstr() {
let hash_1 = hashstr(b"abcdefghijk");
let hash_2 = hashstr(b"kjihgfedcba");
let hash_3 = hashstr(b"abba");
let hash_4 = hashstr(b"baab");
assert_ne!(hash_1, hash_2);
assert_ne!(hash_3, hash_4);
}
}