moved from reading String to reading bytes. A little faster, still need to implement for multithreaded solution
This commit is contained in:
parent
b4e3992c65
commit
16cf4ca2ca
@ -1,10 +1,9 @@
|
||||
use std::{
|
||||
fs::File,
|
||||
io::{BufRead, BufReader},
|
||||
};
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, Read};
|
||||
use std::time::Instant;
|
||||
|
||||
use onebrc::{parse_line, read_bytes_until};
|
||||
|
||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||
|
||||
@ -14,12 +13,13 @@ fn main() {
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
|
||||
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
|
||||
let reader = BufReader::new(file);
|
||||
for line_result in reader.lines() {
|
||||
let mut bytes = BufReader::new(file).bytes();
|
||||
while let Some(line_result) = read_bytes_until(&mut bytes, b'\n') {
|
||||
let line = line_result.expect("could not read line");
|
||||
let (station, temp) = line.split_once(';').unwrap();
|
||||
let temp = onebrc::parse_temp(temp.as_bytes());
|
||||
let measurements_option = stations.get_mut(station);
|
||||
let (station, temp) = parse_line(&line);
|
||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
||||
let temp = onebrc::parse_temp(temp);
|
||||
let measurements_option = stations.get_mut(&station);
|
||||
if let Some(measurements) = measurements_option {
|
||||
measurements.update(temp);
|
||||
} else {
|
||||
@ -29,7 +29,7 @@ fn main() {
|
||||
count: 1,
|
||||
sum: temp,
|
||||
};
|
||||
stations.insert(station.to_owned(), measurements);
|
||||
stations.insert(station, measurements);
|
||||
}
|
||||
}
|
||||
let mut stations: Vec<String> = stations.iter().map(|(station, measurements)| {
|
||||
|
@ -1,4 +1,6 @@
|
||||
use std::fmt::Display;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, Bytes};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct StationMeasurements {
|
||||
@ -65,3 +67,36 @@ pub fn parse_temp(bytes: &[u8]) -> isize {
|
||||
as_decimal as isize
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn read_bytes_until(bytes: &mut Bytes<BufReader<File>>, delimiter: u8) -> Option<std::io::Result<[u8; 108]>> {
|
||||
// 108 max length of line in bytes
|
||||
let mut buf: [u8; 108] = [b'#'; 108];
|
||||
let mut idx = 0;
|
||||
while let Some(byte) = bytes.next() {
|
||||
if byte.is_err() {
|
||||
panic!("Could not read byte");
|
||||
}
|
||||
let byte = byte.unwrap();
|
||||
if delimiter == byte {
|
||||
return Some(Ok(buf));
|
||||
}
|
||||
buf[idx] = byte;
|
||||
idx += 1;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn parse_line(line: &[u8]) -> (&[u8], &[u8]) {
|
||||
let mut idx = 0;
|
||||
while idx < line.len() && line[idx] != b';' {
|
||||
idx += 1;
|
||||
}
|
||||
let station = &line[0..idx];
|
||||
let midpoint = idx + 1;
|
||||
while idx < line.len() && line[idx] != b'#' {
|
||||
idx += 1;
|
||||
}
|
||||
(station, &line[midpoint..idx])
|
||||
}
|
Loading…
Reference in New Issue
Block a user