fxhashmap faster afterall...
This commit is contained in:
parent
b8f589096f
commit
ac5c45f8d5
@ -1,10 +1,10 @@
|
|||||||
use crate::models::station_measurements::StationMeasurements;
|
use crate::models::station_measurements::StationMeasurements;
|
||||||
use crate::utils::{hash, parse};
|
use crate::utils::{hash, parse};
|
||||||
use memmap2::MmapOptions;
|
use memmap2::MmapOptions;
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::sync::mpsc;
|
use std::sync::mpsc;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use std::{fs::File, thread};
|
use std::{fs::File, thread};
|
||||||
|
use rustc_hash::{FxHashMap as HashMap, FxBuildHasher};
|
||||||
|
|
||||||
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||||
|
|
||||||
@ -14,8 +14,9 @@ pub fn run() {
|
|||||||
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
||||||
let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
|
let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
|
||||||
let file_length = mmap.len();
|
let file_length = mmap.len();
|
||||||
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
let hasher = FxBuildHasher::default();
|
||||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
let mut stations: HashMap<u64, (String, StationMeasurements)> =
|
||||||
|
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
||||||
let (tx, rx) = mpsc::channel();
|
let (tx, rx) = mpsc::channel();
|
||||||
let cores = thread::available_parallelism().unwrap().into();
|
let cores = thread::available_parallelism().unwrap().into();
|
||||||
let chunk_length = file_length / cores;
|
let chunk_length = file_length / cores;
|
||||||
@ -40,8 +41,8 @@ pub fn run() {
|
|||||||
let (start, end) = *bounds.get(i).unwrap();
|
let (start, end) = *bounds.get(i).unwrap();
|
||||||
let mmap_slice = &mmap[start..end];
|
let mmap_slice = &mmap[start..end];
|
||||||
s.spawn(move || {
|
s.spawn(move || {
|
||||||
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
|
let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
|
||||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
||||||
for line in mmap_slice.split(|&byte| byte == b'\n') {
|
for line in mmap_slice.split(|&byte| byte == b'\n') {
|
||||||
if line.len() == 0 {
|
if line.len() == 0 {
|
||||||
break;
|
break;
|
||||||
|
@ -12,7 +12,7 @@ pub fn run() {
|
|||||||
const FILE_PATH: &str = "../../../measurements.txt";
|
const FILE_PATH: &str = "../../../measurements.txt";
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
thread::scope(|s| {
|
thread::scope(|s| {
|
||||||
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
let mut stations: HashMap<u64, (String, StationMeasurements)> =
|
||||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
let (tx, rx) = mpsc::channel();
|
let (tx, rx) = mpsc::channel();
|
||||||
let cores = thread::available_parallelism().unwrap().into();
|
let cores = thread::available_parallelism().unwrap().into();
|
||||||
@ -44,7 +44,7 @@ pub fn run() {
|
|||||||
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
|
||||||
let mut reader = BufReader::new(&file);
|
let mut reader = BufReader::new(&file);
|
||||||
reader.seek(SeekFrom::Start(currposition)).unwrap();
|
reader.seek(SeekFrom::Start(currposition)).unwrap();
|
||||||
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
|
let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
|
||||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
let mut line = Vec::with_capacity(108);
|
let mut line = Vec::with_capacity(108);
|
||||||
loop {
|
loop {
|
||||||
|
@ -13,7 +13,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
|||||||
pub fn run() {
|
pub fn run() {
|
||||||
const FILE_PATH: &str = "../../../measurements.txt";
|
const FILE_PATH: &str = "../../../measurements.txt";
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
let mut stations: HashMap<u64, (String, StationMeasurements)> =
|
||||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
let cores = thread::available_parallelism().unwrap().into();
|
let cores = thread::available_parallelism().unwrap().into();
|
||||||
let bounds = smol::block_on(async {
|
let bounds = smol::block_on(async {
|
||||||
@ -53,7 +53,7 @@ pub fn run() {
|
|||||||
.expect("File measurements.txt not found");
|
.expect("File measurements.txt not found");
|
||||||
let mut reader = BufReader::new(&mut file);
|
let mut reader = BufReader::new(&mut file);
|
||||||
reader.seek(SeekFrom::Start(currposition)).await.unwrap();
|
reader.seek(SeekFrom::Start(currposition)).await.unwrap();
|
||||||
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
|
let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
|
||||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
let mut line = Vec::with_capacity(108);
|
let mut line = Vec::with_capacity(108);
|
||||||
loop {
|
loop {
|
||||||
|
@ -9,7 +9,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
|||||||
|
|
||||||
pub fn run() {
|
pub fn run() {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
let mut stations: HashMap<u64, (String, StationMeasurements)> =
|
||||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
|
|
||||||
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
|
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
|
||||||
|
@ -10,7 +10,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
|||||||
|
|
||||||
pub fn run() {
|
pub fn run() {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
let mut stations: HashMap<usize, (String, StationMeasurements)> =
|
let mut stations: HashMap<u64, (String, StationMeasurements)> =
|
||||||
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
|
|
||||||
smol::block_on(async {
|
smol::block_on(async {
|
||||||
|
@ -1,9 +1,15 @@
|
|||||||
#[inline]
|
#[inline]
|
||||||
pub fn bytes(bytes: &[u8]) -> usize {
|
pub fn bytes(bytes: &[u8]) -> u64 {
|
||||||
let mut hash: usize = 0;
|
// hash from https://curiouscoding.nl/posts/1brc/ still wrong for measurements3.txt (and slower?)
|
||||||
|
//let mut key = [0u8; 8];
|
||||||
|
//let l = bytes.len().min(8);
|
||||||
|
//key[..l].copy_from_slice(&bytes[..l]);
|
||||||
|
//key[0] ^= bytes.len() as u8;
|
||||||
|
//u64::from_ne_bytes(key)
|
||||||
|
let mut hash: u64 = 0;
|
||||||
let (chunks, remainder) = bytes.as_chunks::<8>();
|
let (chunks, remainder) = bytes.as_chunks::<8>();
|
||||||
for &chunk in chunks {
|
for &chunk in chunks {
|
||||||
hash = hash.wrapping_add(usize::from_be_bytes(chunk));
|
hash = hash.wrapping_add(u64::from_be_bytes(chunk));
|
||||||
}
|
}
|
||||||
let mut r = [0_u8; 8];
|
let mut r = [0_u8; 8];
|
||||||
r[0] = remainder.len() as u8;
|
r[0] = remainder.len() as u8;
|
||||||
@ -12,7 +18,7 @@ pub fn bytes(bytes: &[u8]) -> usize {
|
|||||||
r[idx] = byte;
|
r[idx] = byte;
|
||||||
idx += 1;
|
idx += 1;
|
||||||
}
|
}
|
||||||
hash += usize::from_be_bytes(r);
|
hash += u64::from_be_bytes(r);
|
||||||
hash
|
hash
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user