moved from reading String to reading bytes. A little faster, still need to implement for multithreaded solution

This commit is contained in:
Fabian Schmidt 2024-07-23 16:30:50 +02:00
parent b4e3992c65
commit 16cf4ca2ca
2 changed files with 45 additions and 10 deletions

View File

@ -1,10 +1,9 @@
use std::{
fs::File,
io::{BufRead, BufReader},
};
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufReader, Read};
use std::time::Instant;
use onebrc::{parse_line, read_bytes_until};
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
@ -14,12 +13,13 @@ fn main() {
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
let reader = BufReader::new(file);
for line_result in reader.lines() {
let mut bytes = BufReader::new(file).bytes();
while let Some(line_result) = read_bytes_until(&mut bytes, b'\n') {
let line = line_result.expect("could not read line");
let (station, temp) = line.split_once(';').unwrap();
let temp = onebrc::parse_temp(temp.as_bytes());
let measurements_option = stations.get_mut(station);
let (station, temp) = parse_line(&line);
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
let temp = onebrc::parse_temp(temp);
let measurements_option = stations.get_mut(&station);
if let Some(measurements) = measurements_option {
measurements.update(temp);
} else {
@ -29,7 +29,7 @@ fn main() {
count: 1,
sum: temp,
};
stations.insert(station.to_owned(), measurements);
stations.insert(station, measurements);
}
}
let mut stations: Vec<String> = stations.iter().map(|(station, measurements)| {

View File

@ -1,4 +1,6 @@
use std::fmt::Display;
use std::fs::File;
use std::io::{BufReader, Bytes};
#[derive(Copy, Clone)]
pub struct StationMeasurements {
@ -64,4 +66,37 @@ pub fn parse_temp(bytes: &[u8]) -> isize {
} else {
as_decimal as isize
}
}
#[inline]
pub fn read_bytes_until(bytes: &mut Bytes<BufReader<File>>, delimiter: u8) -> Option<std::io::Result<[u8; 108]>> {
// 108 max length of line in bytes
let mut buf: [u8; 108] = [b'#'; 108];
let mut idx = 0;
while let Some(byte) = bytes.next() {
if byte.is_err() {
panic!("Could not read byte");
}
let byte = byte.unwrap();
if delimiter == byte {
return Some(Ok(buf));
}
buf[idx] = byte;
idx += 1;
}
None
}
#[inline]
pub fn parse_line(line: &[u8]) -> (&[u8], &[u8]) {
let mut idx = 0;
while idx < line.len() && line[idx] != b';' {
idx += 1;
}
let station = &line[0..idx];
let midpoint = idx + 1;
while idx < line.len() && line[idx] != b'#' {
idx += 1;
}
(station, &line[midpoint..idx])
}