From 16cf4ca2cae528d5bdc310f011017f1063c4ce7b Mon Sep 17 00:00:00 2001 From: Fabian Schmidt Date: Tue, 23 Jul 2024 16:30:50 +0200 Subject: [PATCH] moved from reading String to reading bytes. A little faster, still need to implement for multithreaded solution --- src/main/rust/src/bin/single_thread.rs | 20 +++++++-------- src/main/rust/src/lib.rs | 35 ++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/src/main/rust/src/bin/single_thread.rs b/src/main/rust/src/bin/single_thread.rs index 6037fec..26add47 100644 --- a/src/main/rust/src/bin/single_thread.rs +++ b/src/main/rust/src/bin/single_thread.rs @@ -1,10 +1,9 @@ -use std::{ - fs::File, - io::{BufRead, BufReader}, -}; use std::collections::HashMap; +use std::fs::File; +use std::io::{BufReader, Read}; use std::time::Instant; +use onebrc::{parse_line, read_bytes_until}; const DEFAULT_HASHMAP_LENGTH: usize = 10000; @@ -14,12 +13,13 @@ fn main() { HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); - let reader = BufReader::new(file); - for line_result in reader.lines() { + let mut bytes = BufReader::new(file).bytes(); + while let Some(line_result) = read_bytes_until(&mut bytes, b'\n') { let line = line_result.expect("could not read line"); - let (station, temp) = line.split_once(';').unwrap(); - let temp = onebrc::parse_temp(temp.as_bytes()); - let measurements_option = stations.get_mut(station); + let (station, temp) = parse_line(&line); + let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; + let temp = onebrc::parse_temp(temp); + let measurements_option = stations.get_mut(&station); if let Some(measurements) = measurements_option { measurements.update(temp); } else { @@ -29,7 +29,7 @@ fn main() { count: 1, sum: temp, }; - stations.insert(station.to_owned(), measurements); + stations.insert(station, measurements); } } let mut stations: Vec = stations.iter().map(|(station, measurements)| { diff --git a/src/main/rust/src/lib.rs b/src/main/rust/src/lib.rs index 694dbd5..9d44b10 100644 --- a/src/main/rust/src/lib.rs +++ b/src/main/rust/src/lib.rs @@ -1,4 +1,6 @@ use std::fmt::Display; +use std::fs::File; +use std::io::{BufReader, Bytes}; #[derive(Copy, Clone)] pub struct StationMeasurements { @@ -64,4 +66,37 @@ pub fn parse_temp(bytes: &[u8]) -> isize { } else { as_decimal as isize } +} + +#[inline] +pub fn read_bytes_until(bytes: &mut Bytes>, delimiter: u8) -> Option> { + // 108 max length of line in bytes + let mut buf: [u8; 108] = [b'#'; 108]; + let mut idx = 0; + while let Some(byte) = bytes.next() { + if byte.is_err() { + panic!("Could not read byte"); + } + let byte = byte.unwrap(); + if delimiter == byte { + return Some(Ok(buf)); + } + buf[idx] = byte; + idx += 1; + } + None +} + +#[inline] +pub fn parse_line(line: &[u8]) -> (&[u8], &[u8]) { + let mut idx = 0; + while idx < line.len() && line[idx] != b';' { + idx += 1; + } + let station = &line[0..idx]; + let midpoint = idx + 1; + while idx < line.len() && line[idx] != b'#' { + idx += 1; + } + (station, &line[midpoint..idx]) } \ No newline at end of file