Store station name as byte slice and only convert to string once at the end, but using u64 hash as key is still faster
This commit is contained in:
parent
98cd6e930c
commit
45b3014cbb
@ -17,7 +17,8 @@ pub fn run() {
|
|||||||
let mmap_ptr = mmap.as_ptr();
|
let mmap_ptr = mmap.as_ptr();
|
||||||
let file_length = mmap.len();
|
let file_length = mmap.len();
|
||||||
let hasher = FxBuildHasher;
|
let hasher = FxBuildHasher;
|
||||||
let mut stations: HashMap<u64, (String, StationMeasurements)> =
|
// Even if I could now just use the byte slice as a key, doing the hash is still faster
|
||||||
|
let mut stations: HashMap<u64, (&[u8], StationMeasurements)> =
|
||||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
||||||
let (tx, rx) = mpsc::channel();
|
let (tx, rx) = mpsc::channel();
|
||||||
let cores = thread::available_parallelism().unwrap().into();
|
let cores = thread::available_parallelism().unwrap().into();
|
||||||
@ -43,7 +44,7 @@ pub fn run() {
|
|||||||
let (start, end) = *bounds.get(i).unwrap();
|
let (start, end) = *bounds.get(i).unwrap();
|
||||||
let mmap_slice = unsafe { from_raw_parts(mmap_ptr.add(start), end - start) };
|
let mmap_slice = unsafe { from_raw_parts(mmap_ptr.add(start), end - start) };
|
||||||
s.spawn(move || {
|
s.spawn(move || {
|
||||||
let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
|
let mut t_stations: HashMap<u64, (&[u8], StationMeasurements)> =
|
||||||
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
|
||||||
for line in mmap_slice.split(|&byte| byte == b'\n') {
|
for line in mmap_slice.split(|&byte| byte == b'\n') {
|
||||||
if line.is_empty() {
|
if line.is_empty() {
|
||||||
@ -51,7 +52,6 @@ pub fn run() {
|
|||||||
}
|
}
|
||||||
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
|
||||||
let hash = hash::bytes(station);
|
let hash = hash::bytes(station);
|
||||||
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
|
||||||
let temp = parse::temp(temp);
|
let temp = parse::temp(temp);
|
||||||
let measurements_option = t_stations.get_mut(&hash);
|
let measurements_option = t_stations.get_mut(&hash);
|
||||||
if let Some((_, measurements)) = measurements_option {
|
if let Some((_, measurements)) = measurements_option {
|
||||||
@ -63,7 +63,7 @@ pub fn run() {
|
|||||||
count: 1,
|
count: 1,
|
||||||
sum: temp,
|
sum: temp,
|
||||||
};
|
};
|
||||||
t_stations.insert(hash, (station.to_string(), measurements));
|
t_stations.insert(hash, (station, measurements));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let _ = tx.send(t_stations);
|
let _ = tx.send(t_stations);
|
||||||
@ -76,13 +76,14 @@ pub fn run() {
|
|||||||
if let Some((_, joined_measurements)) = joined_measurements_options {
|
if let Some((_, joined_measurements)) = joined_measurements_options {
|
||||||
joined_measurements.merge(measurements);
|
joined_measurements.merge(measurements);
|
||||||
} else {
|
} else {
|
||||||
stations.insert(*hash, (station.to_owned(), *measurements));
|
stations.insert(*hash, (station, *measurements));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut stations: Vec<String> = stations
|
let mut stations: Vec<String> = stations
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(_, (station, measurements))| {
|
.map(|(_, (station, measurements))| {
|
||||||
|
let station = unsafe { std::str::from_utf8_unchecked(station) };
|
||||||
let measurements = measurements.to_string();
|
let measurements = measurements.to_string();
|
||||||
#[cfg(feature = "json")]
|
#[cfg(feature = "json")]
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user