diff --git a/src/main/rust/src/bin/multi_threaded.rs b/src/main/rust/src/bin/multi_threaded.rs index b4317fb..f132903 100644 --- a/src/main/rust/src/bin/multi_threaded.rs +++ b/src/main/rust/src/bin/multi_threaded.rs @@ -4,10 +4,11 @@ use std::{ thread, }; use std::collections::HashMap; -use std::io::{Read, Seek, SeekFrom}; +use std::io::{BufRead, Seek, SeekFrom}; use std::sync::mpsc; use std::time::Instant; -use onebrc::{parse_line, parse_temp, read_bytes_until}; + +use onebrc::{parse_line, parse_temp}; const DEFAULT_HASHMAP_LENGTH: usize = 10000; @@ -28,18 +29,9 @@ fn main() { let mut reader = BufReader::new(&file); let mut byte_start = chunk_length * i; reader.seek(SeekFrom::Start(byte_start as u64)).expect("could not seek"); - let bytes = reader.bytes(); - for byte in bytes { - match byte { - Ok(byte) => { - byte_start += 1; - if byte == b'\n' { - break; - } - } - Err(_) => { panic!("could not go to next") } - } - } + let mut line = Vec::with_capacity(108); + let line_len = reader.read_until(b'\n', &mut line).expect("could not read bytes"); + byte_start += line_len; bounds.push(byte_start as u64); } bounds.push(file_length); @@ -51,12 +43,14 @@ fn main() { let file = File::open(FILE_PATH).expect("File measurements.txt not found"); let mut reader = BufReader::new(&file); reader.seek(SeekFrom::Start(currposition)).unwrap(); - let mut bytes = reader.bytes(); - let mut t_stations: HashMap = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); - - while let Some(line) = read_bytes_until(&mut bytes, b'\n') { + let mut line = Vec::with_capacity(108); + loop { + let line_len = reader.read_until(b'\n', &mut line).expect("could not read bytes"); + if line_len == 0 { + break; + } let (station, temp) = parse_line(&line); let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; let temp = parse_temp(temp); @@ -76,6 +70,7 @@ fn main() { if currposition >= end { break; } + line.clear(); } let _ = tx.send(t_stations); }); diff --git a/src/main/rust/src/bin/single_thread.rs b/src/main/rust/src/bin/single_thread.rs index 88512b6..82c97d4 100644 --- a/src/main/rust/src/bin/single_thread.rs +++ b/src/main/rust/src/bin/single_thread.rs @@ -1,9 +1,9 @@ use std::collections::HashMap; use std::fs::File; -use std::io::{BufReader, Read}; +use std::io::{BufRead, BufReader}; use std::time::Instant; -use onebrc::{parse_line, read_bytes_until}; +use onebrc::parse_line; const DEFAULT_HASHMAP_LENGTH: usize = 10000; @@ -13,8 +13,13 @@ fn main() { HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); - let mut bytes = BufReader::new(&file).bytes(); - while let Some(line) = read_bytes_until(&mut bytes, b'\n') { + let mut reader = BufReader::new(&file); + let mut line = Vec::with_capacity(108); + loop { + let line_len = reader.read_until(b'\n', &mut line).expect("could not read bytes"); + if line_len == 0 { + break; + } let (station, temp) = parse_line(&line); let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; let temp = onebrc::parse_temp(temp); @@ -30,6 +35,7 @@ fn main() { }; stations.insert(station, measurements); } + line.clear(); } let mut stations: Vec = stations.iter().map(|(station, measurements)| { let measurements = measurements.to_string(); diff --git a/src/main/rust/src/lib.rs b/src/main/rust/src/lib.rs index 101ecf6..a546884 100644 --- a/src/main/rust/src/lib.rs +++ b/src/main/rust/src/lib.rs @@ -1,6 +1,4 @@ use std::fmt::Display; -use std::fs::File; -use std::io::{BufReader, Bytes}; #[derive(Copy, Clone)] pub struct StationMeasurements { @@ -68,28 +66,30 @@ pub fn parse_temp(bytes: &[u8]) -> isize { } } -#[inline] -pub fn read_bytes_until(bytes: &mut Bytes>, delimiter: u8) -> Option> { - let mut buf: Vec = Vec::with_capacity(108); - for byte in bytes { - if byte.is_err() { - panic!("Could not read byte"); - } - let byte = byte.unwrap(); - if delimiter == byte { - return Some(buf); - } - buf.push(byte); - } - None -} +// using Bytes struct has more performance impact than the std read_until method which uses Vec instead of slice +// #[inline] +// pub fn read_bytes_until(bytes: &mut Bytes>, delimiter: u8) -> Option> { +// let mut buf: Vec = Vec::with_capacity(108); +// for byte in bytes { +// if byte.is_err() { +// panic!("Could not read byte"); +// } +// let byte = byte.unwrap(); +// if delimiter == byte { +// return Some(buf); +// } +// buf.push(byte); +// } +// None +// } #[inline] pub fn parse_line(line: &[u8]) -> (&[u8], &[u8]) { let mut idx = 0; - while idx < line.len() && line[idx] != b';' { + let line_len = line.len(); + while idx < line_len && line[idx] != b';' { idx += 1; } let station = &line[0..idx]; - (station, &line[(idx + 1)..]) + (station, &line[(idx+1)..(line_len-1)]) } \ No newline at end of file