use read_until method instead of custom function because it is faster than mine using the Bytes struct
This commit is contained in:
parent
c6b8273d65
commit
5aa94e67d1
@ -4,10 +4,11 @@ use std::{
|
||||
thread,
|
||||
};
|
||||
use std::collections::HashMap;
|
||||
use std::io::{Read, Seek, SeekFrom};
|
||||
use std::io::{BufRead, Seek, SeekFrom};
|
||||
use std::sync::mpsc;
|
||||
use std::time::Instant;
|
||||
use onebrc::{parse_line, parse_temp, read_bytes_until};
|
||||
|
||||
use onebrc::{parse_line, parse_temp};
|
||||
|
||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||
|
||||
@ -28,18 +29,9 @@ fn main() {
|
||||
let mut reader = BufReader::new(&file);
|
||||
let mut byte_start = chunk_length * i;
|
||||
reader.seek(SeekFrom::Start(byte_start as u64)).expect("could not seek");
|
||||
let bytes = reader.bytes();
|
||||
for byte in bytes {
|
||||
match byte {
|
||||
Ok(byte) => {
|
||||
byte_start += 1;
|
||||
if byte == b'\n' {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(_) => { panic!("could not go to next") }
|
||||
}
|
||||
}
|
||||
let mut line = Vec::with_capacity(108);
|
||||
let line_len = reader.read_until(b'\n', &mut line).expect("could not read bytes");
|
||||
byte_start += line_len;
|
||||
bounds.push(byte_start as u64);
|
||||
}
|
||||
bounds.push(file_length);
|
||||
@ -51,12 +43,14 @@ fn main() {
|
||||
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
||||
let mut reader = BufReader::new(&file);
|
||||
reader.seek(SeekFrom::Start(currposition)).unwrap();
|
||||
let mut bytes = reader.bytes();
|
||||
|
||||
let mut t_stations: HashMap<String, onebrc::StationMeasurements> =
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
|
||||
while let Some(line) = read_bytes_until(&mut bytes, b'\n') {
|
||||
let mut line = Vec::with_capacity(108);
|
||||
loop {
|
||||
let line_len = reader.read_until(b'\n', &mut line).expect("could not read bytes");
|
||||
if line_len == 0 {
|
||||
break;
|
||||
}
|
||||
let (station, temp) = parse_line(&line);
|
||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
||||
let temp = parse_temp(temp);
|
||||
@ -76,6 +70,7 @@ fn main() {
|
||||
if currposition >= end {
|
||||
break;
|
||||
}
|
||||
line.clear();
|
||||
}
|
||||
let _ = tx.send(t_stations);
|
||||
});
|
||||
|
@ -1,9 +1,9 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, Read};
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::time::Instant;
|
||||
|
||||
use onebrc::{parse_line, read_bytes_until};
|
||||
use onebrc::parse_line;
|
||||
|
||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||
|
||||
@ -13,8 +13,13 @@ fn main() {
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
|
||||
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
|
||||
let mut bytes = BufReader::new(&file).bytes();
|
||||
while let Some(line) = read_bytes_until(&mut bytes, b'\n') {
|
||||
let mut reader = BufReader::new(&file);
|
||||
let mut line = Vec::with_capacity(108);
|
||||
loop {
|
||||
let line_len = reader.read_until(b'\n', &mut line).expect("could not read bytes");
|
||||
if line_len == 0 {
|
||||
break;
|
||||
}
|
||||
let (station, temp) = parse_line(&line);
|
||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
||||
let temp = onebrc::parse_temp(temp);
|
||||
@ -30,6 +35,7 @@ fn main() {
|
||||
};
|
||||
stations.insert(station, measurements);
|
||||
}
|
||||
line.clear();
|
||||
}
|
||||
let mut stations: Vec<String> = stations.iter().map(|(station, measurements)| {
|
||||
let measurements = measurements.to_string();
|
||||
|
@ -1,6 +1,4 @@
|
||||
use std::fmt::Display;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, Bytes};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct StationMeasurements {
|
||||
@ -68,28 +66,30 @@ pub fn parse_temp(bytes: &[u8]) -> isize {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn read_bytes_until(bytes: &mut Bytes<BufReader<&File>>, delimiter: u8) -> Option<Vec<u8>> {
|
||||
let mut buf: Vec<u8> = Vec::with_capacity(108);
|
||||
for byte in bytes {
|
||||
if byte.is_err() {
|
||||
panic!("Could not read byte");
|
||||
}
|
||||
let byte = byte.unwrap();
|
||||
if delimiter == byte {
|
||||
return Some(buf);
|
||||
}
|
||||
buf.push(byte);
|
||||
}
|
||||
None
|
||||
}
|
||||
// using Bytes struct has more performance impact than the std read_until method which uses Vec instead of slice
|
||||
// #[inline]
|
||||
// pub fn read_bytes_until(bytes: &mut Bytes<BufReader<&File>>, delimiter: u8) -> Option<Vec<u8>> {
|
||||
// let mut buf: Vec<u8> = Vec::with_capacity(108);
|
||||
// for byte in bytes {
|
||||
// if byte.is_err() {
|
||||
// panic!("Could not read byte");
|
||||
// }
|
||||
// let byte = byte.unwrap();
|
||||
// if delimiter == byte {
|
||||
// return Some(buf);
|
||||
// }
|
||||
// buf.push(byte);
|
||||
// }
|
||||
// None
|
||||
// }
|
||||
|
||||
#[inline]
|
||||
pub fn parse_line(line: &[u8]) -> (&[u8], &[u8]) {
|
||||
let mut idx = 0;
|
||||
while idx < line.len() && line[idx] != b';' {
|
||||
let line_len = line.len();
|
||||
while idx < line_len && line[idx] != b';' {
|
||||
idx += 1;
|
||||
}
|
||||
let station = &line[0..idx];
|
||||
(station, &line[(idx + 1)..])
|
||||
(station, &line[(idx+1)..(line_len-1)])
|
||||
}
|
Loading…
Reference in New Issue
Block a user