moved from reading String to reading bytes. A little faster, still need to implement for multithreaded solution
This commit is contained in:
		@@ -1,10 +1,9 @@
 | 
			
		||||
use std::{
 | 
			
		||||
    fs::File,
 | 
			
		||||
    io::{BufRead, BufReader},
 | 
			
		||||
};
 | 
			
		||||
use std::collections::HashMap;
 | 
			
		||||
use std::fs::File;
 | 
			
		||||
use std::io::{BufReader, Read};
 | 
			
		||||
use std::time::Instant;
 | 
			
		||||
 | 
			
		||||
use onebrc::{parse_line, read_bytes_until};
 | 
			
		||||
 | 
			
		||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
 | 
			
		||||
 | 
			
		||||
@@ -14,12 +13,13 @@ fn main() {
 | 
			
		||||
        HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
 | 
			
		||||
 | 
			
		||||
    let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
 | 
			
		||||
    let reader = BufReader::new(file);
 | 
			
		||||
    for line_result in reader.lines() {
 | 
			
		||||
    let mut bytes = BufReader::new(file).bytes();
 | 
			
		||||
    while let Some(line_result) = read_bytes_until(&mut bytes, b'\n') {
 | 
			
		||||
        let line = line_result.expect("could not read line");
 | 
			
		||||
        let (station, temp) = line.split_once(';').unwrap();
 | 
			
		||||
        let temp = onebrc::parse_temp(temp.as_bytes());
 | 
			
		||||
        let measurements_option = stations.get_mut(station);
 | 
			
		||||
        let (station, temp) = parse_line(&line);
 | 
			
		||||
        let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
 | 
			
		||||
        let temp = onebrc::parse_temp(temp);
 | 
			
		||||
        let measurements_option = stations.get_mut(&station);
 | 
			
		||||
        if let Some(measurements) = measurements_option {
 | 
			
		||||
            measurements.update(temp);
 | 
			
		||||
        } else {
 | 
			
		||||
@@ -29,7 +29,7 @@ fn main() {
 | 
			
		||||
                count: 1,
 | 
			
		||||
                sum: temp,
 | 
			
		||||
            };
 | 
			
		||||
            stations.insert(station.to_owned(), measurements);
 | 
			
		||||
            stations.insert(station, measurements);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    let mut stations: Vec<String> = stations.iter().map(|(station, measurements)| {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,6 @@
 | 
			
		||||
use std::fmt::Display;
 | 
			
		||||
use std::fs::File;
 | 
			
		||||
use std::io::{BufReader, Bytes};
 | 
			
		||||
 | 
			
		||||
#[derive(Copy, Clone)]
 | 
			
		||||
pub struct StationMeasurements {
 | 
			
		||||
@@ -65,3 +67,36 @@ pub fn parse_temp(bytes: &[u8]) -> isize {
 | 
			
		||||
        as_decimal as isize
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[inline]
 | 
			
		||||
pub fn read_bytes_until(bytes: &mut Bytes<BufReader<File>>, delimiter: u8) -> Option<std::io::Result<[u8; 108]>> {
 | 
			
		||||
    // 108 max length of line in bytes
 | 
			
		||||
    let mut buf: [u8; 108] = [b'#'; 108];
 | 
			
		||||
    let mut idx = 0;
 | 
			
		||||
    while let Some(byte) = bytes.next() {
 | 
			
		||||
        if byte.is_err() {
 | 
			
		||||
            panic!("Could not read byte");
 | 
			
		||||
        }
 | 
			
		||||
        let byte = byte.unwrap();
 | 
			
		||||
        if delimiter == byte {
 | 
			
		||||
            return Some(Ok(buf));
 | 
			
		||||
        }
 | 
			
		||||
        buf[idx] = byte;
 | 
			
		||||
        idx += 1;
 | 
			
		||||
    }
 | 
			
		||||
    None
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[inline]
 | 
			
		||||
pub fn parse_line(line: &[u8]) -> (&[u8], &[u8]) {
 | 
			
		||||
    let mut idx = 0;
 | 
			
		||||
    while idx < line.len() && line[idx] != b';' {
 | 
			
		||||
        idx += 1;
 | 
			
		||||
    }
 | 
			
		||||
    let station = &line[0..idx];
 | 
			
		||||
    let midpoint = idx + 1;
 | 
			
		||||
    while idx < line.len() && line[idx] != b'#' {
 | 
			
		||||
        idx += 1;
 | 
			
		||||
    }
 | 
			
		||||
    (station, &line[midpoint..idx])
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user