use read_until method instead of custom function because it is faster than mine using the Bytes struct
This commit is contained in:
parent
c6b8273d65
commit
5aa94e67d1
@ -4,10 +4,11 @@ use std::{
|
|||||||
thread,
|
thread,
|
||||||
};
|
};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::{Read, Seek, SeekFrom};
|
use std::io::{BufRead, Seek, SeekFrom};
|
||||||
use std::sync::mpsc;
|
use std::sync::mpsc;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use onebrc::{parse_line, parse_temp, read_bytes_until};
|
|
||||||
|
use onebrc::{parse_line, parse_temp};
|
||||||
|
|
||||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||||
|
|
||||||
@ -28,18 +29,9 @@ fn main() {
|
|||||||
let mut reader = BufReader::new(&file);
|
let mut reader = BufReader::new(&file);
|
||||||
let mut byte_start = chunk_length * i;
|
let mut byte_start = chunk_length * i;
|
||||||
reader.seek(SeekFrom::Start(byte_start as u64)).expect("could not seek");
|
reader.seek(SeekFrom::Start(byte_start as u64)).expect("could not seek");
|
||||||
let bytes = reader.bytes();
|
let mut line = Vec::with_capacity(108);
|
||||||
for byte in bytes {
|
let line_len = reader.read_until(b'\n', &mut line).expect("could not read bytes");
|
||||||
match byte {
|
byte_start += line_len;
|
||||||
Ok(byte) => {
|
|
||||||
byte_start += 1;
|
|
||||||
if byte == b'\n' {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(_) => { panic!("could not go to next") }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bounds.push(byte_start as u64);
|
bounds.push(byte_start as u64);
|
||||||
}
|
}
|
||||||
bounds.push(file_length);
|
bounds.push(file_length);
|
||||||
@ -51,12 +43,14 @@ fn main() {
|
|||||||
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
||||||
let mut reader = BufReader::new(&file);
|
let mut reader = BufReader::new(&file);
|
||||||
reader.seek(SeekFrom::Start(currposition)).unwrap();
|
reader.seek(SeekFrom::Start(currposition)).unwrap();
|
||||||
let mut bytes = reader.bytes();
|
|
||||||
|
|
||||||
let mut t_stations: HashMap<String, onebrc::StationMeasurements> =
|
let mut t_stations: HashMap<String, onebrc::StationMeasurements> =
|
||||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
|
let mut line = Vec::with_capacity(108);
|
||||||
while let Some(line) = read_bytes_until(&mut bytes, b'\n') {
|
loop {
|
||||||
|
let line_len = reader.read_until(b'\n', &mut line).expect("could not read bytes");
|
||||||
|
if line_len == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
let (station, temp) = parse_line(&line);
|
let (station, temp) = parse_line(&line);
|
||||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
||||||
let temp = parse_temp(temp);
|
let temp = parse_temp(temp);
|
||||||
@ -76,6 +70,7 @@ fn main() {
|
|||||||
if currposition >= end {
|
if currposition >= end {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
line.clear();
|
||||||
}
|
}
|
||||||
let _ = tx.send(t_stations);
|
let _ = tx.send(t_stations);
|
||||||
});
|
});
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufReader, Read};
|
use std::io::{BufRead, BufReader};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use onebrc::{parse_line, read_bytes_until};
|
use onebrc::parse_line;
|
||||||
|
|
||||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||||
|
|
||||||
@ -13,8 +13,13 @@ fn main() {
|
|||||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
|
|
||||||
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
|
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
|
||||||
let mut bytes = BufReader::new(&file).bytes();
|
let mut reader = BufReader::new(&file);
|
||||||
while let Some(line) = read_bytes_until(&mut bytes, b'\n') {
|
let mut line = Vec::with_capacity(108);
|
||||||
|
loop {
|
||||||
|
let line_len = reader.read_until(b'\n', &mut line).expect("could not read bytes");
|
||||||
|
if line_len == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
let (station, temp) = parse_line(&line);
|
let (station, temp) = parse_line(&line);
|
||||||
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
|
||||||
let temp = onebrc::parse_temp(temp);
|
let temp = onebrc::parse_temp(temp);
|
||||||
@ -30,6 +35,7 @@ fn main() {
|
|||||||
};
|
};
|
||||||
stations.insert(station, measurements);
|
stations.insert(station, measurements);
|
||||||
}
|
}
|
||||||
|
line.clear();
|
||||||
}
|
}
|
||||||
let mut stations: Vec<String> = stations.iter().map(|(station, measurements)| {
|
let mut stations: Vec<String> = stations.iter().map(|(station, measurements)| {
|
||||||
let measurements = measurements.to_string();
|
let measurements = measurements.to_string();
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
use std::fs::File;
|
|
||||||
use std::io::{BufReader, Bytes};
|
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
pub struct StationMeasurements {
|
pub struct StationMeasurements {
|
||||||
@ -68,28 +66,30 @@ pub fn parse_temp(bytes: &[u8]) -> isize {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
// using Bytes struct has more performance impact than the std read_until method which uses Vec instead of slice
|
||||||
pub fn read_bytes_until(bytes: &mut Bytes<BufReader<&File>>, delimiter: u8) -> Option<Vec<u8>> {
|
// #[inline]
|
||||||
let mut buf: Vec<u8> = Vec::with_capacity(108);
|
// pub fn read_bytes_until(bytes: &mut Bytes<BufReader<&File>>, delimiter: u8) -> Option<Vec<u8>> {
|
||||||
for byte in bytes {
|
// let mut buf: Vec<u8> = Vec::with_capacity(108);
|
||||||
if byte.is_err() {
|
// for byte in bytes {
|
||||||
panic!("Could not read byte");
|
// if byte.is_err() {
|
||||||
}
|
// panic!("Could not read byte");
|
||||||
let byte = byte.unwrap();
|
// }
|
||||||
if delimiter == byte {
|
// let byte = byte.unwrap();
|
||||||
return Some(buf);
|
// if delimiter == byte {
|
||||||
}
|
// return Some(buf);
|
||||||
buf.push(byte);
|
// }
|
||||||
}
|
// buf.push(byte);
|
||||||
None
|
// }
|
||||||
}
|
// None
|
||||||
|
// }
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn parse_line(line: &[u8]) -> (&[u8], &[u8]) {
|
pub fn parse_line(line: &[u8]) -> (&[u8], &[u8]) {
|
||||||
let mut idx = 0;
|
let mut idx = 0;
|
||||||
while idx < line.len() && line[idx] != b';' {
|
let line_len = line.len();
|
||||||
|
while idx < line_len && line[idx] != b';' {
|
||||||
idx += 1;
|
idx += 1;
|
||||||
}
|
}
|
||||||
let station = &line[0..idx];
|
let station = &line[0..idx];
|
||||||
(station, &line[(idx + 1)..])
|
(station, &line[(idx+1)..(line_len-1)])
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user