Store station name as byte slice and only convert to string once at the end, but using u64 hash as key is still faster
This commit is contained in:
parent
98cd6e930c
commit
45b3014cbb
@ -17,7 +17,8 @@ pub fn run() {
|
||||
let mmap_ptr = mmap.as_ptr();
|
||||
let file_length = mmap.len();
|
||||
let hasher = FxBuildHasher;
|
||||
let mut stations: HashMap<u64, (String, StationMeasurements)> =
|
||||
// Even if I could now just use the byte slice as a key, doing the hash is still faster
|
||||
let mut stations: HashMap<u64, (&[u8], StationMeasurements)> =
|
||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
||||
let (tx, rx) = mpsc::channel();
|
||||
let cores = thread::available_parallelism().unwrap().into();
|
||||
@ -43,7 +44,7 @@ pub fn run() {
|
||||
let (start, end) = *bounds.get(i).unwrap();
|
||||
let mmap_slice = unsafe { from_raw_parts(mmap_ptr.add(start), end - start) };
|
||||
s.spawn(move || {
|
||||
let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
|
||||
let mut t_stations: HashMap<u64, (&[u8], StationMeasurements)> =
|
||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
||||
for line in mmap_slice.split(|&byte| byte == b'\n') {
|
||||
if line.is_empty() {
|
||||
@ -51,7 +52,6 @@ pub fn run() {
|
||||
}
|
||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||
let hash = hash::bytes(station);
|
||||
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||
let temp = parse::temp(temp);
|
||||
let measurements_option = t_stations.get_mut(&hash);
|
||||
if let Some((_, measurements)) = measurements_option {
|
||||
@ -63,7 +63,7 @@ pub fn run() {
|
||||
count: 1,
|
||||
sum: temp,
|
||||
};
|
||||
t_stations.insert(hash, (station.to_string(), measurements));
|
||||
t_stations.insert(hash, (station, measurements));
|
||||
}
|
||||
}
|
||||
let _ = tx.send(t_stations);
|
||||
@ -76,13 +76,14 @@ pub fn run() {
|
||||
if let Some((_, joined_measurements)) = joined_measurements_options {
|
||||
joined_measurements.merge(measurements);
|
||||
} else {
|
||||
stations.insert(*hash, (station.to_owned(), *measurements));
|
||||
stations.insert(*hash, (station, *measurements));
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut stations: Vec<String> = stations
|
||||
.iter()
|
||||
.map(|(_, (station, measurements))| {
|
||||
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||
let measurements = measurements.to_string();
|
||||
#[cfg(feature = "json")]
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user