Compare commits
14 Commits
520df062d3
...
b4e3992c65
Author | SHA1 | Date | |
---|---|---|---|
b4e3992c65 | |||
393f802741 | |||
327fe8564e | |||
14d608b209 | |||
2f82e8788d | |||
91adbf4c1c | |||
00096647ee | |||
fe1053b74a | |||
3ffed9099c | |||
65df621cf6 | |||
bd83b9bc2c | |||
15525282d6 | |||
6cc29fb645 | |||
6f548678f2 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -9,7 +9,7 @@ release.properties
|
|||||||
.project
|
.project
|
||||||
.classpath
|
.classpath
|
||||||
.settings/
|
.settings/
|
||||||
bin/
|
./bin/
|
||||||
|
|
||||||
# IntelliJ
|
# IntelliJ
|
||||||
.idea
|
.idea
|
||||||
|
194
src/main/rust/Cargo.lock
generated
Normal file
194
src/main/rust/Cargo.lock
generated
Normal file
@ -0,0 +1,194 @@
|
|||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bstr"
|
||||||
|
version = "1.9.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"regex-automata",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-deque"
|
||||||
|
version = "0.8.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-epoch"
|
||||||
|
version = "0.9.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-utils"
|
||||||
|
version = "0.8.20"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fast-float"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.155"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.7.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memmap"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "onebrc"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"bstr",
|
||||||
|
"fast-float",
|
||||||
|
"memchr",
|
||||||
|
"memmap",
|
||||||
|
"rayon",
|
||||||
|
"rustc-hash",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.86"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.36"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon"
|
||||||
|
version = "1.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
"rayon-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon-core"
|
||||||
|
version = "1.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-deque",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.4.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc-hash"
|
||||||
|
version = "2.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde"
|
||||||
|
version = "1.0.204"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
|
||||||
|
dependencies = [
|
||||||
|
"serde_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_derive"
|
||||||
|
version = "1.0.204"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.72"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-i686-pc-windows-gnu",
|
||||||
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-i686-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
20
src/main/rust/Cargo.toml
Normal file
20
src/main/rust/Cargo.toml
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
[package]
|
||||||
|
name = "onebrc"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
bstr = "1.9.1"
|
||||||
|
fast-float = "0.2.0"
|
||||||
|
memchr = "2.7.4"
|
||||||
|
memmap = "0.7.0"
|
||||||
|
#polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]}
|
||||||
|
rayon = "1.10.0"
|
||||||
|
rustc-hash = "2.0.0"
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
lto = "fat"
|
||||||
|
strip = "symbols"
|
||||||
|
panic = "abort"
|
88
src/main/rust/src/bin/multi_threaded.rs
Normal file
88
src/main/rust/src/bin/multi_threaded.rs
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
use std::{
|
||||||
|
fs::File,
|
||||||
|
io::{BufRead, BufReader},
|
||||||
|
thread,
|
||||||
|
};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::mpsc;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
use onebrc::parse_temp;
|
||||||
|
|
||||||
|
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
//print!("\x1b[2J");
|
||||||
|
print!("\x1b[s");
|
||||||
|
thread::scope(|s| {
|
||||||
|
let mut stations: HashMap<String, onebrc::StationMeasurements> = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
|
let (tx, rx) = mpsc::channel();
|
||||||
|
let now = Instant::now();
|
||||||
|
let cores: usize = thread::available_parallelism().unwrap().into();
|
||||||
|
let chunk_length = 1_000_000_000 / cores;
|
||||||
|
for i in 0..cores {
|
||||||
|
let print_line = i + 1;
|
||||||
|
print!("\x1b[u\x1b[{print_line}B\x1b[0CThread #{i:0>2}: 0%");
|
||||||
|
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
|
||||||
|
let reader = BufReader::new(file);
|
||||||
|
let line_chunk = reader.lines().skip(chunk_length * i).take(chunk_length);
|
||||||
|
let tx = tx.clone();
|
||||||
|
s.spawn(move || {
|
||||||
|
let mut t_stations: HashMap<String, onebrc::StationMeasurements> =
|
||||||
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
|
|
||||||
|
let now_read_line = Instant::now();
|
||||||
|
let mut line_num = 0;
|
||||||
|
line_chunk.for_each(|line| {
|
||||||
|
if line_num == 0 {
|
||||||
|
print!("\x1b[u\x1b[{print_line}B\x1b[30CStart read line {}ms", now_read_line.elapsed().as_millis());
|
||||||
|
}
|
||||||
|
if line_num % 10000 == 0 {
|
||||||
|
//let formatted = format_nums(line_num);
|
||||||
|
//print!("\x1b[u\x1b[{print_line}B\x1b[0CThread #{i:0>2}: {formatted}");
|
||||||
|
let percent = (line_num as f64 / chunk_length as f64) * 100.0;
|
||||||
|
print!("\x1b[u\x1b[{print_line}B\x1b[0CThread #{i:0>2}: {percent:.2}%");
|
||||||
|
}
|
||||||
|
line_num += 1;
|
||||||
|
let line = line.expect("could not read line");
|
||||||
|
let (station, temp) = line.split_once(';').expect("Error while splitting");
|
||||||
|
let temp = parse_temp(temp.as_bytes());
|
||||||
|
let measurements_option = t_stations.get_mut(station);
|
||||||
|
if let Some(measurements) = measurements_option {
|
||||||
|
measurements.update(temp);
|
||||||
|
} else {
|
||||||
|
let measurements = onebrc::StationMeasurements {
|
||||||
|
min: temp,
|
||||||
|
max: temp,
|
||||||
|
count: 1,
|
||||||
|
sum: temp,
|
||||||
|
};
|
||||||
|
t_stations.insert(station.to_owned(), measurements);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
print!("\x1b[u\x1b[{print_line}B\x1b[60CTime reading lines in thread {i}={} ms", now_read_line.elapsed().as_millis());
|
||||||
|
let _ = tx.send(t_stations);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
print!("\x1b[{cores}B");
|
||||||
|
drop(tx);
|
||||||
|
while let Ok(t_stations) = rx.recv() {
|
||||||
|
for (station, measurements) in t_stations.iter() {
|
||||||
|
let joined_measurements_options = stations.get_mut(station.as_str());
|
||||||
|
if let Some(joined_measurements) = joined_measurements_options {
|
||||||
|
joined_measurements.merge(measurements);
|
||||||
|
} else {
|
||||||
|
stations.insert(station.to_owned(), *measurements);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let mut stations: Vec<String> = stations.iter().map(|(station, measurements)| {
|
||||||
|
let measurements = measurements.to_string();
|
||||||
|
format!("{station}={measurements}")
|
||||||
|
}).collect();
|
||||||
|
stations.sort();
|
||||||
|
let _stations = stations.join(",");
|
||||||
|
// println!("{{{stations}}}");
|
||||||
|
println!("\n\nTime={} ms", now.elapsed().as_millis());
|
||||||
|
});
|
||||||
|
}
|
35
src/main/rust/src/bin/polars.rs
Normal file
35
src/main/rust/src/bin/polars.rs
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
use polars::prelude::*;
|
||||||
|
use std::time::Instant;
|
||||||
|
use std::vec;
|
||||||
|
|
||||||
|
fn run_polars() -> Result<DataFrame, PolarsError> {
|
||||||
|
let now = Instant::now();
|
||||||
|
|
||||||
|
let f1: Field = Field::new("station", DataType::String);
|
||||||
|
let f2: Field = Field::new("measure", DataType::Float64);
|
||||||
|
let sc: Schema = Schema::from_iter(vec![f1, f2]);
|
||||||
|
|
||||||
|
let q = LazyCsvReader::new("../../../measurements.txt")
|
||||||
|
.has_header(false)
|
||||||
|
.with_schema(Some(Arc::new(sc)))
|
||||||
|
.with_separator(b';')
|
||||||
|
.finish()?
|
||||||
|
.group_by(vec![col("station")])
|
||||||
|
.agg(vec![
|
||||||
|
col("measure").alias("min").min(),
|
||||||
|
col("measure").alias("mean").mean(),
|
||||||
|
col("measure").alias("max").max(),
|
||||||
|
])
|
||||||
|
.sort("station", Default::default())
|
||||||
|
.with_streaming(true);
|
||||||
|
|
||||||
|
let df = q.collect()?;
|
||||||
|
|
||||||
|
println!("Time={} μs", now.elapsed().as_micros());
|
||||||
|
|
||||||
|
Ok(df)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
run_polars();
|
||||||
|
}
|
116
src/main/rust/src/bin/referenceImpl.rs
Normal file
116
src/main/rust/src/bin/referenceImpl.rs
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
use bstr::{BStr, ByteSlice};
|
||||||
|
use memmap::MmapOptions;
|
||||||
|
use rustc_hash::FxHashMap as HashMap;
|
||||||
|
use std::{fmt::Display, fs::File};
|
||||||
|
|
||||||
|
use rayon::prelude::*;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct State {
|
||||||
|
min: f64,
|
||||||
|
max: f64,
|
||||||
|
count: u64,
|
||||||
|
sum: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for State {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
min: f64::MAX,
|
||||||
|
max: f64::MIN,
|
||||||
|
count: 0,
|
||||||
|
sum: 0.0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for State {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let avg = self.sum / (self.count as f64);
|
||||||
|
write!(f, "{:.1}/{avg:.1}/{:.1}", self.min, self.max)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl State {
|
||||||
|
fn update(&mut self, v: f64) {
|
||||||
|
self.min = self.min.min(v);
|
||||||
|
self.max = self.max.max(v);
|
||||||
|
self.count += 1;
|
||||||
|
self.sum += v;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge(&mut self, other: &Self) {
|
||||||
|
self.min = self.min.min(other.min);
|
||||||
|
self.max = self.max.max(other.max);
|
||||||
|
self.count += other.count;
|
||||||
|
self.sum += other.sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_map<'a>(i: impl Iterator<Item = &'a [u8]>) -> HashMap<&'a BStr, State> {
|
||||||
|
let mut state: HashMap<&'a BStr, State> = Default::default();
|
||||||
|
for line in i {
|
||||||
|
let (name, value) = line.split_once_str(&[b';']).unwrap();
|
||||||
|
let value = fast_float::parse(value).unwrap();
|
||||||
|
state.entry(name.into()).or_default().update(value);
|
||||||
|
}
|
||||||
|
state
|
||||||
|
}
|
||||||
|
|
||||||
|
fn solve_for_part((start, end): (usize, usize), mem: &[u8]) -> HashMap<&BStr, State> {
|
||||||
|
make_map((&mem[start..end]).lines())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge<'a>(a: &mut HashMap<&'a BStr, State>, b: &HashMap<&'a BStr, State>) {
|
||||||
|
for (k, v) in b {
|
||||||
|
a.entry(k).or_default().merge(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let cores: usize = std::thread::available_parallelism().unwrap().into();
|
||||||
|
let path = match std::env::args().skip(1).next() {
|
||||||
|
Some(path) => path,
|
||||||
|
None => "measurements.txt".to_owned(),
|
||||||
|
};
|
||||||
|
let file = File::open(path).unwrap();
|
||||||
|
let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
|
||||||
|
|
||||||
|
let chunk_size = mmap.len() / cores;
|
||||||
|
let mut chunks: Vec<(usize, usize)> = vec![];
|
||||||
|
let mut start = 0;
|
||||||
|
for _ in 0..cores {
|
||||||
|
let end = (start + chunk_size).min(mmap.len());
|
||||||
|
let next_new_line = match memchr::memchr(b'\n', &mmap[end..]) {
|
||||||
|
Some(v) => v,
|
||||||
|
None => {
|
||||||
|
assert_eq!(end, mmap.len());
|
||||||
|
0
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let end = end + next_new_line;
|
||||||
|
chunks.push((start, end));
|
||||||
|
start = end + 1;
|
||||||
|
}
|
||||||
|
let parts: Vec<_> = chunks
|
||||||
|
.par_iter()
|
||||||
|
.map(|r| solve_for_part(*r, &mmap))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let state: HashMap<&BStr, State> = parts.into_iter().fold(Default::default(), |mut a, b| {
|
||||||
|
merge(&mut a, &b);
|
||||||
|
a
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut all: Vec<_> = state.into_iter().collect();
|
||||||
|
all.sort_unstable_by(|a, b| a.0.cmp(&b.0));
|
||||||
|
print!("{{");
|
||||||
|
for (i, (name, state)) in all.into_iter().enumerate() {
|
||||||
|
if i == 0 {
|
||||||
|
print!("{name}={state}");
|
||||||
|
} else {
|
||||||
|
print!(", {name}={state}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!("}}");
|
||||||
|
}
|
44
src/main/rust/src/bin/single_thread.rs
Normal file
44
src/main/rust/src/bin/single_thread.rs
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
use std::{
|
||||||
|
fs::File,
|
||||||
|
io::{BufRead, BufReader},
|
||||||
|
};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
|
||||||
|
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let now = Instant::now();
|
||||||
|
let mut stations: HashMap<String, onebrc::StationMeasurements> =
|
||||||
|
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
|
||||||
|
|
||||||
|
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
|
||||||
|
let reader = BufReader::new(file);
|
||||||
|
for line_result in reader.lines() {
|
||||||
|
let line = line_result.expect("could not read line");
|
||||||
|
let (station, temp) = line.split_once(';').unwrap();
|
||||||
|
let temp = onebrc::parse_temp(temp.as_bytes());
|
||||||
|
let measurements_option = stations.get_mut(station);
|
||||||
|
if let Some(measurements) = measurements_option {
|
||||||
|
measurements.update(temp);
|
||||||
|
} else {
|
||||||
|
let measurements = onebrc::StationMeasurements {
|
||||||
|
min: temp,
|
||||||
|
max: temp,
|
||||||
|
count: 1,
|
||||||
|
sum: temp,
|
||||||
|
};
|
||||||
|
stations.insert(station.to_owned(), measurements);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let mut stations: Vec<String> = stations.iter().map(|(station, measurements)| {
|
||||||
|
let measurements = measurements.to_string();
|
||||||
|
format!("{station}={measurements}")
|
||||||
|
}).collect();
|
||||||
|
stations.sort();
|
||||||
|
let stations = stations.join(",");
|
||||||
|
println!("{{{stations}}}");
|
||||||
|
println!("Time={} μs", now.elapsed().as_micros());
|
||||||
|
}
|
||||||
|
|
67
src/main/rust/src/lib.rs
Normal file
67
src/main/rust/src/lib.rs
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
pub struct StationMeasurements {
|
||||||
|
pub min: isize,
|
||||||
|
pub max: isize,
|
||||||
|
pub count: isize,
|
||||||
|
pub sum: isize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StationMeasurements {
|
||||||
|
pub fn update(&mut self, v: isize) {
|
||||||
|
self.min = self.min.min(v);
|
||||||
|
self.max = self.max.max(v);
|
||||||
|
self.count += 1;
|
||||||
|
self.sum += v;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn merge(&mut self, other: &Self) {
|
||||||
|
self.min = self.min.min(other.min);
|
||||||
|
self.max = self.max.max(other.max);
|
||||||
|
self.count += other.count;
|
||||||
|
self.sum += other.sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for StationMeasurements {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let min = self.min as f64 / 10.0;
|
||||||
|
let max = self.max as f64 / 10.0;
|
||||||
|
let avg = (self.sum as f64 / self.count as f64) / 10.0;
|
||||||
|
write!(f, "{min}/{avg:.1}/{max}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn format_nums(num: usize) -> String {
|
||||||
|
num.to_string()
|
||||||
|
.as_bytes()
|
||||||
|
.rchunks(3)
|
||||||
|
.rev()
|
||||||
|
.map(std::str::from_utf8)
|
||||||
|
.collect::<Result<Vec<&str>, _>>()
|
||||||
|
.unwrap()
|
||||||
|
.join("_")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub const fn get_digit(b: u8) -> u32 {
|
||||||
|
(b as u32).wrapping_sub('0' as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn parse_temp(bytes: &[u8]) -> isize {
|
||||||
|
let is_negative = bytes[0] == b'-';
|
||||||
|
let as_decimal = match (is_negative, bytes.len()) {
|
||||||
|
(true, 4) => get_digit(bytes[1]) * 10 + get_digit(bytes[3]),
|
||||||
|
(true, 5) => get_digit(bytes[1]) * 100 + get_digit(bytes[2]) * 10 + get_digit(bytes[4]),
|
||||||
|
(false, 3) => get_digit(bytes[0]) * 10 + get_digit(bytes[2]),
|
||||||
|
(false, 4) => get_digit(bytes[0]) * 100 + get_digit(bytes[1]) * 10 + get_digit(bytes[3]),
|
||||||
|
_x => panic!(),
|
||||||
|
};
|
||||||
|
if is_negative {
|
||||||
|
-(as_decimal as isize)
|
||||||
|
} else {
|
||||||
|
as_decimal as isize
|
||||||
|
}
|
||||||
|
}
|
8
src/main/rust/src/main.rs
Normal file
8
src/main/rust/src/main.rs
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
|
||||||
|
fn main() {
|
||||||
|
// let now = Instant::now();
|
||||||
|
// let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
|
||||||
|
// let reader = BufReader::new(file);
|
||||||
|
// reader.lines().for_each(|_x| {()});
|
||||||
|
// println!("Time={} μs", now.elapsed().as_micros());
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user