moved from reading String to reading bytes. A little faster, still need to implement for multithreaded solution
This commit is contained in:
		| @@ -1,10 +1,9 @@ | ||||
| use std::{ | ||||
|     fs::File, | ||||
|     io::{BufRead, BufReader}, | ||||
| }; | ||||
| use std::collections::HashMap; | ||||
| use std::fs::File; | ||||
| use std::io::{BufReader, Read}; | ||||
| use std::time::Instant; | ||||
|  | ||||
| use onebrc::{parse_line, read_bytes_until}; | ||||
|  | ||||
| const DEFAULT_HASHMAP_LENGTH: usize = 10000; | ||||
|  | ||||
| @@ -14,12 +13,13 @@ fn main() { | ||||
|         HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||
|  | ||||
|     let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); | ||||
|     let reader = BufReader::new(file); | ||||
|     for line_result in reader.lines() { | ||||
|     let mut bytes = BufReader::new(file).bytes(); | ||||
|     while let Some(line_result) = read_bytes_until(&mut bytes, b'\n') { | ||||
|         let line = line_result.expect("could not read line"); | ||||
|         let (station, temp) = line.split_once(';').unwrap(); | ||||
|         let temp = onebrc::parse_temp(temp.as_bytes()); | ||||
|         let measurements_option = stations.get_mut(station); | ||||
|         let (station, temp) = parse_line(&line); | ||||
|         let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; | ||||
|         let temp = onebrc::parse_temp(temp); | ||||
|         let measurements_option = stations.get_mut(&station); | ||||
|         if let Some(measurements) = measurements_option { | ||||
|             measurements.update(temp); | ||||
|         } else { | ||||
| @@ -29,7 +29,7 @@ fn main() { | ||||
|                 count: 1, | ||||
|                 sum: temp, | ||||
|             }; | ||||
|             stations.insert(station.to_owned(), measurements); | ||||
|             stations.insert(station, measurements); | ||||
|         } | ||||
|     } | ||||
|     let mut stations: Vec<String> = stations.iter().map(|(station, measurements)| { | ||||
|   | ||||
| @@ -1,4 +1,6 @@ | ||||
| use std::fmt::Display; | ||||
| use std::fs::File; | ||||
| use std::io::{BufReader, Bytes}; | ||||
|  | ||||
| #[derive(Copy, Clone)] | ||||
| pub struct StationMeasurements { | ||||
| @@ -64,4 +66,37 @@ pub fn parse_temp(bytes: &[u8]) -> isize { | ||||
|     } else { | ||||
|         as_decimal as isize | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[inline] | ||||
| pub fn read_bytes_until(bytes: &mut Bytes<BufReader<File>>, delimiter: u8) -> Option<std::io::Result<[u8; 108]>> { | ||||
|     // 108 max length of line in bytes | ||||
|     let mut buf: [u8; 108] = [b'#'; 108]; | ||||
|     let mut idx = 0; | ||||
|     while let Some(byte) = bytes.next() { | ||||
|         if byte.is_err() { | ||||
|             panic!("Could not read byte"); | ||||
|         } | ||||
|         let byte = byte.unwrap(); | ||||
|         if delimiter == byte { | ||||
|             return Some(Ok(buf)); | ||||
|         } | ||||
|         buf[idx] = byte; | ||||
|         idx += 1; | ||||
|     } | ||||
|     None | ||||
| } | ||||
|  | ||||
| #[inline] | ||||
| pub fn parse_line(line: &[u8]) -> (&[u8], &[u8]) { | ||||
|     let mut idx = 0; | ||||
|     while idx < line.len() && line[idx] != b';' { | ||||
|         idx += 1; | ||||
|     } | ||||
|     let station = &line[0..idx]; | ||||
|     let midpoint = idx + 1; | ||||
|     while idx < line.len() && line[idx] != b'#' { | ||||
|         idx += 1; | ||||
|     } | ||||
|     (station, &line[midpoint..idx]) | ||||
| } | ||||
		Reference in New Issue
	
	Block a user