Actually I just needed to add target-cpu=native to [build] in Cargo.toml, did that and create HashMap with capacity, also added reference implementation (which uses libraries unlike my solution)
This commit is contained in:
parent
0adcf3dec5
commit
1f4aff1255
37
rust/Cargo.lock
generated
37
rust/Cargo.lock
generated
@ -97,6 +97,17 @@ version = "2.5.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
|
checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bstr"
|
||||||
|
version = "1.9.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"regex-automata",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bumpalo"
|
name = "bumpalo"
|
||||||
version = "3.16.0"
|
version = "3.16.0"
|
||||||
@ -441,9 +452,19 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memchr"
|
name = "memchr"
|
||||||
version = "2.7.2"
|
version = "2.7.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
|
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memmap"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memmap2"
|
name = "memmap2"
|
||||||
@ -975,10 +996,22 @@ checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
|
|||||||
name = "rust"
|
name = "rust"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"bstr",
|
||||||
|
"fast-float",
|
||||||
"hashbrown",
|
"hashbrown",
|
||||||
|
"memchr",
|
||||||
|
"memmap",
|
||||||
"polars",
|
"polars",
|
||||||
|
"rayon",
|
||||||
|
"rustc-hash",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc-hash"
|
||||||
|
version = "2.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustversion"
|
name = "rustversion"
|
||||||
version = "1.0.15"
|
version = "1.0.15"
|
||||||
|
@ -6,5 +6,14 @@ edition = "2021"
|
|||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
bstr = "1.9.1"
|
||||||
|
fast-float = "0.2.0"
|
||||||
hashbrown = "0.14.3"
|
hashbrown = "0.14.3"
|
||||||
|
memchr = "2.7.4"
|
||||||
|
memmap = "0.7.0"
|
||||||
polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]}
|
polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]}
|
||||||
|
rayon = "1.10.0"
|
||||||
|
rustc-hash = "2.0.0"
|
||||||
|
|
||||||
|
[build]
|
||||||
|
rustflags = ["-C target-cpu=native"]
|
||||||
|
@ -15,9 +15,11 @@ struct StationMeasurements {
|
|||||||
sum: f64,
|
sum: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let stations: Arc<Mutex<HashMap<String, StationMeasurements>>> =
|
let stations: Arc<Mutex<HashMap<String, StationMeasurements>>> =
|
||||||
Arc::new(Mutex::new(HashMap::new()));
|
Arc::new(Mutex::new(HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH)));
|
||||||
|
|
||||||
let cores: usize = std::thread::available_parallelism().unwrap().into();
|
let cores: usize = std::thread::available_parallelism().unwrap().into();
|
||||||
|
|
||||||
@ -29,7 +31,8 @@ fn main() {
|
|||||||
let line_chunk = reader.lines().skip(chunk_length * i).take(chunk_length);
|
let line_chunk = reader.lines().skip(chunk_length * i).take(chunk_length);
|
||||||
let stations_clone = stations.clone();
|
let stations_clone = stations.clone();
|
||||||
let handle = thread::spawn(move || {
|
let handle = thread::spawn(move || {
|
||||||
let mut t_stations: HashMap<String, StationMeasurements> = HashMap::new();
|
let mut t_stations: HashMap<String, StationMeasurements> =
|
||||||
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
let mut line_num = 0;
|
let mut line_num = 0;
|
||||||
for line in line_chunk {
|
for line in line_chunk {
|
||||||
line_num += 1;
|
line_num += 1;
|
||||||
|
116
rust/src/bin/referenceImpl.rs
Normal file
116
rust/src/bin/referenceImpl.rs
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
use bstr::{BStr, ByteSlice};
|
||||||
|
use memmap::MmapOptions;
|
||||||
|
use rustc_hash::FxHashMap as HashMap;
|
||||||
|
use std::{fmt::Display, fs::File};
|
||||||
|
|
||||||
|
use rayon::prelude::*;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct State {
|
||||||
|
min: f64,
|
||||||
|
max: f64,
|
||||||
|
count: u64,
|
||||||
|
sum: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for State {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
min: f64::MAX,
|
||||||
|
max: f64::MIN,
|
||||||
|
count: 0,
|
||||||
|
sum: 0.0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for State {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let avg = self.sum / (self.count as f64);
|
||||||
|
write!(f, "{:.1}/{avg:.1}/{:.1}", self.min, self.max)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl State {
|
||||||
|
fn update(&mut self, v: f64) {
|
||||||
|
self.min = self.min.min(v);
|
||||||
|
self.max = self.max.max(v);
|
||||||
|
self.count += 1;
|
||||||
|
self.sum += v;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge(&mut self, other: &Self) {
|
||||||
|
self.min = self.min.min(other.min);
|
||||||
|
self.max = self.max.max(other.max);
|
||||||
|
self.count += other.count;
|
||||||
|
self.sum += other.sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_map<'a>(i: impl Iterator<Item = &'a [u8]>) -> HashMap<&'a BStr, State> {
|
||||||
|
let mut state: HashMap<&'a BStr, State> = Default::default();
|
||||||
|
for line in i {
|
||||||
|
let (name, value) = line.split_once_str(&[b';']).unwrap();
|
||||||
|
let value = fast_float::parse(value).unwrap();
|
||||||
|
state.entry(name.into()).or_default().update(value);
|
||||||
|
}
|
||||||
|
state
|
||||||
|
}
|
||||||
|
|
||||||
|
fn solve_for_part((start, end): (usize, usize), mem: &[u8]) -> HashMap<&BStr, State> {
|
||||||
|
make_map((&mem[start..end]).lines())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge<'a>(a: &mut HashMap<&'a BStr, State>, b: &HashMap<&'a BStr, State>) {
|
||||||
|
for (k, v) in b {
|
||||||
|
a.entry(k).or_default().merge(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let cores: usize = std::thread::available_parallelism().unwrap().into();
|
||||||
|
let path = match std::env::args().skip(1).next() {
|
||||||
|
Some(path) => path,
|
||||||
|
None => "measurements.txt".to_owned(),
|
||||||
|
};
|
||||||
|
let file = File::open(path).unwrap();
|
||||||
|
let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
|
||||||
|
|
||||||
|
let chunk_size = mmap.len() / cores;
|
||||||
|
let mut chunks: Vec<(usize, usize)> = vec![];
|
||||||
|
let mut start = 0;
|
||||||
|
for _ in 0..cores {
|
||||||
|
let end = (start + chunk_size).min(mmap.len());
|
||||||
|
let next_new_line = match memchr::memchr(b'\n', &mmap[end..]) {
|
||||||
|
Some(v) => v,
|
||||||
|
None => {
|
||||||
|
assert_eq!(end, mmap.len());
|
||||||
|
0
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let end = end + next_new_line;
|
||||||
|
chunks.push((start, end));
|
||||||
|
start = end + 1;
|
||||||
|
}
|
||||||
|
let parts: Vec<_> = chunks
|
||||||
|
.par_iter()
|
||||||
|
.map(|r| solve_for_part(*r, &mmap))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let state: HashMap<&BStr, State> = parts.into_iter().fold(Default::default(), |mut a, b| {
|
||||||
|
merge(&mut a, &b);
|
||||||
|
a
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut all: Vec<_> = state.into_iter().collect();
|
||||||
|
all.sort_unstable_by(|a, b| a.0.cmp(&b.0));
|
||||||
|
print!("{{");
|
||||||
|
for (i, (name, state)) in all.into_iter().enumerate() {
|
||||||
|
if i == 0 {
|
||||||
|
print!("{name}={state}");
|
||||||
|
} else {
|
||||||
|
print!(", {name}={state}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!("}}");
|
||||||
|
}
|
@ -11,8 +11,11 @@ struct StationMeasurements {
|
|||||||
temps: Vec<f64>,
|
temps: Vec<f64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let mut stations: HashMap<String, StationMeasurements> = HashMap::new();
|
let mut stations: HashMap<String, StationMeasurements> =
|
||||||
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
|
|
||||||
let file = File::open("../measurements.txt").expect("File measurements.txt not found");
|
let file = File::open("../measurements.txt").expect("File measurements.txt not found");
|
||||||
let reader = BufReader::new(file);
|
let reader = BufReader::new(file);
|
||||||
|
Loading…
Reference in New Issue
Block a user