some improvements by reducing the casts, implemented a get_pos function to get position of byte in byteslice by bitmasking... slightly slower
This commit is contained in:
parent
2c23e30fe0
commit
25d20169aa
@ -4,7 +4,8 @@ use std::collections::HashMap;
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufRead, BufReader};
|
use std::io::{BufRead, BufReader};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use onebrc::{hashstr, StationMeasurements};
|
|
||||||
|
use onebrc::{hashstr, parse_temp, StationMeasurements};
|
||||||
|
|
||||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||||
|
|
||||||
@ -24,7 +25,7 @@ fn main() {
|
|||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hashstr(station);
|
let hash = hashstr(station);
|
||||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
||||||
let temp = onebrc::parse_temp(temp.split_last().unwrap().1);
|
let temp = parse_temp(temp.split_last().unwrap().1);
|
||||||
let measurements_option = stations.get_mut(&hash);
|
let measurements_option = stations.get_mut(&hash);
|
||||||
if let Some((_, measurements)) = measurements_option {
|
if let Some((_, measurements)) = measurements_option {
|
||||||
measurements.update(temp);
|
measurements.update(temp);
|
||||||
|
@ -47,9 +47,9 @@ pub fn format_nums(num: usize) -> String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub const fn get_digit(b: u8) -> u32 {
|
pub const fn get_digit(b: u8) -> isize {
|
||||||
// wrapping_sub('0' as u32) same as - 48 but less magical
|
// wrapping_sub('0' as u32) same as - 48 but less magical
|
||||||
(b as u32).wrapping_sub('0' as u32)
|
(b as isize).wrapping_sub('0' as isize)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
@ -63,9 +63,9 @@ pub fn parse_temp(bytes: &[u8]) -> isize {
|
|||||||
_x => panic!("could not parse temp: is_negative = {is_negative}, length = {}", bytes.len()),
|
_x => panic!("could not parse temp: is_negative = {is_negative}, length = {}", bytes.len()),
|
||||||
};
|
};
|
||||||
if is_negative {
|
if is_negative {
|
||||||
-(as_decimal as isize)
|
-as_decimal
|
||||||
} else {
|
} else {
|
||||||
as_decimal as isize
|
as_decimal
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -123,32 +123,85 @@ pub fn new_parse_temp(bytes: &[u8]) -> isize {
|
|||||||
// }
|
// }
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn parse_line(line: &[u8]) -> (&[u8], &[u8]) {
|
pub fn get_pos(bytes: &[u8], find: u8) -> Option<u32> {
|
||||||
let mut idx = 0;
|
let chunks = bytes.windows(4);
|
||||||
let line_len = line.len();
|
let mut pos = 0;
|
||||||
while idx < line_len && line[idx] != b';' {
|
for chunk in chunks {
|
||||||
idx += 1;
|
let inner_pos = get_pos_in_chunk(chunk, find);
|
||||||
|
if inner_pos < chunk.len() as u32 {
|
||||||
|
return Some(pos + inner_pos);
|
||||||
|
}
|
||||||
|
pos += 1;
|
||||||
}
|
}
|
||||||
let station = &line[0..idx];
|
None
|
||||||
(station, &line[(idx+1)..(line_len-1)])
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn get_pos_in_chunk(byte_chunk: &[u8], find: u8) -> u32 {
|
||||||
|
let find_hex = u32::from_be_bytes([find; 4]);
|
||||||
|
let x = u32::from_be_bytes(byte_chunk.try_into().unwrap()) ^ find_hex;
|
||||||
|
let mask = (x - 0x01010101) & (!x & (0x80808080));
|
||||||
|
u32::leading_zeros(mask) >> 3
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::new_parse_temp;
|
use crate::{get_pos, hashstr, new_parse_temp};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_new_parse_temp() {
|
fn test_new_parse_temp_max() {
|
||||||
let temp_max = new_parse_temp("99.9".as_bytes());
|
let temp_max = new_parse_temp("99.9".as_bytes());
|
||||||
let temp_min = new_parse_temp("-99.9".as_bytes());
|
|
||||||
let temp_0 = new_parse_temp("0.0".as_bytes());
|
|
||||||
let temp_10 = new_parse_temp("10.0".as_bytes());
|
|
||||||
let temp_neg_10 = new_parse_temp("-10.0".as_bytes());
|
|
||||||
|
|
||||||
assert_eq!(temp_max, 999);
|
assert_eq!(temp_max, 999);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_new_parse_temp_min() {
|
||||||
|
let temp_min = new_parse_temp("-99.9".as_bytes());
|
||||||
assert_eq!(temp_min, -999);
|
assert_eq!(temp_min, -999);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_new_parse_temp_zero() {
|
||||||
|
let temp_0 = new_parse_temp("0.0".as_bytes());
|
||||||
assert_eq!(temp_0, 0);
|
assert_eq!(temp_0, 0);
|
||||||
assert_eq!(temp_10, 100);
|
}
|
||||||
assert_eq!(temp_neg_10, -100);
|
|
||||||
|
#[test]
|
||||||
|
fn test_new_parse_temp_pos() {
|
||||||
|
let temp_10 = new_parse_temp("9.9".as_bytes());
|
||||||
|
assert_eq!(temp_10, 99);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_new_parse_temp_neg() {
|
||||||
|
let temp_neg_10 = new_parse_temp("-9.9".as_bytes());
|
||||||
|
assert_eq!(temp_neg_10, -99);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_hashstr() {
|
||||||
|
let hash_1 = hashstr(b"abcdefghijk");
|
||||||
|
let hash_2 = hashstr(b"kjihgfedcba");
|
||||||
|
let hash_3 = hashstr(b"abba");
|
||||||
|
let hash_4 = hashstr(b"baab");
|
||||||
|
|
||||||
|
assert_ne!(hash_1, hash_2);
|
||||||
|
assert_ne!(hash_3, hash_4);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_getpos() {
|
||||||
|
let semi_bytes = vec![0_u8, 0_u8, 0_u8, 0_u8, 0_u8, 0_u8, 0_u8, 0_u8, b';', 0_u8, 0_u8];
|
||||||
|
let semi_bytes = semi_bytes.as_slice();
|
||||||
|
let pos = get_pos(semi_bytes, b';').unwrap();
|
||||||
|
assert_eq!(pos, 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_getpos_empty() {
|
||||||
|
let semi_bytes = vec![0_u8, 0_u8, 0_u8, 0_u8, 0_u8, 0_u8, 0_u8, 0_u8, 0_u8, 0_u8];
|
||||||
|
let semi_bytes = semi_bytes.as_slice();
|
||||||
|
let pos = get_pos(semi_bytes, b';');
|
||||||
|
assert_eq!(pos, None);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user