fxhashmap faster afterall...
This commit is contained in:
parent
b8f589096f
commit
ac5c45f8d5
@ -1,10 +1,10 @@
|
||||
use crate::models::station_measurements::StationMeasurements;
|
||||
use crate::utils::{hash, parse};
|
||||
use memmap2::MmapOptions;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::mpsc;
|
||||
use std::time::Instant;
|
||||
use std::{fs::File, thread};
|
||||
use rustc_hash::{FxHashMap as HashMap, FxBuildHasher};
|
||||
|
||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||
|
||||
@ -14,8 +14,9 @@ pub fn run() {
|
||||
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
||||
let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
|
||||
let file_length = mmap.len();
|
||||
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
let hasher = FxBuildHasher::default();
|
||||
let mut stations: HashMap<u64, (String, StationMeasurements)> =
|
||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
||||
let (tx, rx) = mpsc::channel();
|
||||
let cores = thread::available_parallelism().unwrap().into();
|
||||
let chunk_length = file_length / cores;
|
||||
@ -40,8 +41,8 @@ pub fn run() {
|
||||
let (start, end) = *bounds.get(i).unwrap();
|
||||
let mmap_slice = &mmap[start..end];
|
||||
s.spawn(move || {
|
||||
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
|
||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
||||
for line in mmap_slice.split(|&byte| byte == b'\n') {
|
||||
if line.len() == 0 {
|
||||
break;
|
||||
|
@ -12,7 +12,7 @@ pub fn run() {
|
||||
const FILE_PATH: &str = "../../../measurements.txt";
|
||||
let now = Instant::now();
|
||||
thread::scope(|s| {
|
||||
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
||||
let mut stations: HashMap<u64, (String, StationMeasurements)> =
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
let (tx, rx) = mpsc::channel();
|
||||
let cores = thread::available_parallelism().unwrap().into();
|
||||
@ -44,7 +44,7 @@ pub fn run() {
|
||||
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
||||
let mut reader = BufReader::new(&file);
|
||||
reader.seek(SeekFrom::Start(currposition)).unwrap();
|
||||
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
|
||||
let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
let mut line = Vec::with_capacity(108);
|
||||
loop {
|
||||
|
@ -13,7 +13,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||
pub fn run() {
|
||||
const FILE_PATH: &str = "../../../measurements.txt";
|
||||
let now = Instant::now();
|
||||
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
||||
let mut stations: HashMap<u64, (String, StationMeasurements)> =
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
let cores = thread::available_parallelism().unwrap().into();
|
||||
let bounds = smol::block_on(async {
|
||||
@ -53,7 +53,7 @@ pub fn run() {
|
||||
.expect("File measurements.txt not found");
|
||||
let mut reader = BufReader::new(&mut file);
|
||||
reader.seek(SeekFrom::Start(currposition)).await.unwrap();
|
||||
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
|
||||
let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
let mut line = Vec::with_capacity(108);
|
||||
loop {
|
||||
|
@ -9,7 +9,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||
|
||||
pub fn run() {
|
||||
let now = Instant::now();
|
||||
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
||||
let mut stations: HashMap<u64, (String, StationMeasurements)> =
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
|
||||
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
|
||||
|
@ -10,7 +10,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||
|
||||
pub fn run() {
|
||||
let now = Instant::now();
|
||||
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
||||
let mut stations: HashMap<u64, (String, StationMeasurements)> =
|
||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||
|
||||
smol::block_on(async {
|
||||
|
@ -1,9 +1,15 @@
|
||||
#[inline]
|
||||
pub fn bytes(bytes: &[u8]) -> usize {
|
||||
let mut hash: usize = 0;
|
||||
pub fn bytes(bytes: &[u8]) -> u64 {
|
||||
// hash from https://curiouscoding.nl/posts/1brc/ still wrong for measurements3.txt (and slower?)
|
||||
//let mut key = [0u8; 8];
|
||||
//let l = bytes.len().min(8);
|
||||
//key[..l].copy_from_slice(&bytes[..l]);
|
||||
//key[0] ^= bytes.len() as u8;
|
||||
//u64::from_ne_bytes(key)
|
||||
let mut hash: u64 = 0;
|
||||
let (chunks, remainder) = bytes.as_chunks::<8>();
|
||||
for &chunk in chunks {
|
||||
hash = hash.wrapping_add(usize::from_be_bytes(chunk));
|
||||
hash = hash.wrapping_add(u64::from_be_bytes(chunk));
|
||||
}
|
||||
let mut r = [0_u8; 8];
|
||||
r[0] = remainder.len() as u8;
|
||||
@ -12,7 +18,7 @@ pub fn bytes(bytes: &[u8]) -> usize {
|
||||
r[idx] = byte;
|
||||
idx += 1;
|
||||
}
|
||||
hash += usize::from_be_bytes(r);
|
||||
hash += u64::from_be_bytes(r);
|
||||
hash
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user