Create single threaded version using smol for async reading of the file 46 -> 40 seconds

This commit is contained in:
Fabian Schmidt 2024-08-13 13:50:49 +02:00
parent b53212103b
commit bbc89aa2b3
5 changed files with 413 additions and 10 deletions

343
src/main/rust/Cargo.lock generated
View File

@ -82,6 +82,127 @@ dependencies = [
"serde",
]
[[package]]
name = "async-channel"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89b47800b0be77592da0afd425cc03468052844aff33b84e33cc696f64e77b6a"
dependencies = [
"concurrent-queue",
"event-listener-strategy",
"futures-core",
"pin-project-lite",
]
[[package]]
name = "async-executor"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7ebdfa2ebdab6b1760375fa7d6f382b9f486eac35fc994625a00e89280bdbb7"
dependencies = [
"async-task",
"concurrent-queue",
"fastrand",
"futures-lite",
"slab",
]
[[package]]
name = "async-fs"
version = "2.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebcd09b382f40fcd159c2d695175b2ae620ffa5f3bd6f664131efff4e8b9e04a"
dependencies = [
"async-lock",
"blocking",
"futures-lite",
]
[[package]]
name = "async-io"
version = "2.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "444b0228950ee6501b3568d3c93bf1176a1fdbc3b758dcd9475046d30f4dc7e8"
dependencies = [
"async-lock",
"cfg-if",
"concurrent-queue",
"futures-io",
"futures-lite",
"parking",
"polling",
"rustix",
"slab",
"tracing",
"windows-sys 0.59.0",
]
[[package]]
name = "async-lock"
version = "3.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18"
dependencies = [
"event-listener",
"event-listener-strategy",
"pin-project-lite",
]
[[package]]
name = "async-net"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b948000fad4873c1c9339d60f2623323a0cfd3816e5181033c6a5cb68b2accf7"
dependencies = [
"async-io",
"blocking",
"futures-lite",
]
[[package]]
name = "async-process"
version = "2.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8a07789659a4d385b79b18b9127fc27e1a59e1e89117c78c5ea3b806f016374"
dependencies = [
"async-channel",
"async-io",
"async-lock",
"async-signal",
"async-task",
"blocking",
"cfg-if",
"event-listener",
"futures-lite",
"rustix",
"tracing",
"windows-sys 0.59.0",
]
[[package]]
name = "async-signal"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "637e00349800c0bdf8bfc21ebbc0b6524abea702b0da4168ac00d070d0c0b9f3"
dependencies = [
"async-io",
"async-lock",
"atomic-waker",
"cfg-if",
"futures-core",
"futures-io",
"rustix",
"signal-hook-registry",
"slab",
"windows-sys 0.59.0",
]
[[package]]
name = "async-task"
version = "4.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de"
[[package]]
name = "atoi"
version = "2.0.0"
@ -97,6 +218,12 @@ version = "0.15.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ae037714f313c1353189ead58ef9eec30a8e8dc101b2622d461418fd59e28a9"
[[package]]
name = "atomic-waker"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]]
name = "autocfg"
version = "1.3.0"
@ -109,6 +236,19 @@ version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
[[package]]
name = "blocking"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "703f41c54fc768e63e091340b424302bb1c29ef4aa0c7f10fe849dfb114d29ea"
dependencies = [
"async-channel",
"async-task",
"futures-io",
"futures-lite",
"piper",
]
[[package]]
name = "bstr"
version = "1.10.0"
@ -256,6 +396,15 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "concurrent-queue"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.6"
@ -399,24 +548,86 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "errno"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "ethnum"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c"
[[package]]
name = "event-listener"
version = "5.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6032be9bd27023a771701cc49f9f053c751055f71efb2e0ae5c15809093675ba"
dependencies = [
"concurrent-queue",
"parking",
"pin-project-lite",
]
[[package]]
name = "event-listener-strategy"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f214dc438f977e6d4e3500aaa277f5ad94ca83fbbd9b1a15713ce2344ccc5a1"
dependencies = [
"event-listener",
"pin-project-lite",
]
[[package]]
name = "fast-float"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
[[package]]
name = "fastrand"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
[[package]]
name = "foreign_vec"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673"
[[package]]
name = "futures-core"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
[[package]]
name = "futures-io"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
[[package]]
name = "futures-lite"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52527eb5074e35e9339c6b4e8d12600c7128b68fb25dcb9fa9dec18f7c25f3a5"
dependencies = [
"fastrand",
"futures-core",
"futures-io",
"parking",
"pin-project-lite",
]
[[package]]
name = "getrandom"
version = "0.2.15"
@ -475,13 +686,19 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
[[package]]
name = "hermit-abi"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc"
[[package]]
name = "home"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
dependencies = [
"windows-sys",
"windows-sys 0.52.0",
]
[[package]]
@ -523,9 +740,9 @@ version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b"
dependencies = [
"hermit-abi",
"hermit-abi 0.3.9",
"libc",
"windows-sys",
"windows-sys 0.52.0",
]
[[package]]
@ -573,6 +790,12 @@ version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
[[package]]
name = "linux-raw-sys"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]]
name = "lock_api"
version = "0.4.12"
@ -703,6 +926,7 @@ dependencies = [
"polars",
"rayon",
"rustc-hash",
"smol",
]
[[package]]
@ -711,6 +935,12 @@ version = "11.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9"
[[package]]
name = "parking"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae"
[[package]]
name = "parking_lot"
version = "0.12.3"
@ -740,6 +970,23 @@ version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "pin-project-lite"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02"
[[package]]
name = "piper"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066"
dependencies = [
"atomic-waker",
"fastrand",
"futures-io",
]
[[package]]
name = "pkg-config"
version = "0.3.30"
@ -1078,6 +1325,21 @@ dependencies = [
"version_check",
]
[[package]]
name = "polling"
version = "3.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc2790cd301dec6cd3b7a025e4815cf825724a51c98dccfe6a3e55f05ffb6511"
dependencies = [
"cfg-if",
"concurrent-queue",
"hermit-abi 0.4.0",
"pin-project-lite",
"rustix",
"tracing",
"windows-sys 0.59.0",
]
[[package]]
name = "ppv-lite86"
version = "0.2.18"
@ -1209,6 +1471,19 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
[[package]]
name = "rustix"
version = "0.38.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
dependencies = [
"bitflags",
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.52.0",
]
[[package]]
name = "rustversion"
version = "1.0.17"
@ -1268,12 +1543,30 @@ dependencies = [
"serde",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1"
dependencies = [
"libc",
]
[[package]]
name = "simdutf8"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
[[package]]
name = "slab"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
dependencies = [
"autocfg",
]
[[package]]
name = "smallvec"
version = "1.13.2"
@ -1291,6 +1584,23 @@ dependencies = [
"version_check",
]
[[package]]
name = "smol"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e635339259e51ef85ac7aa29a1cd991b957047507288697a690e80ab97d07cad"
dependencies = [
"async-channel",
"async-executor",
"async-fs",
"async-io",
"async-lock",
"async-net",
"async-process",
"blocking",
"futures-lite",
]
[[package]]
name = "sqlparser"
version = "0.39.0"
@ -1422,6 +1732,22 @@ dependencies = [
"serde_json",
]
[[package]]
name = "tracing"
version = "0.1.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
dependencies = [
"pin-project-lite",
"tracing-core",
]
[[package]]
name = "tracing-core"
version = "0.1.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
[[package]]
name = "unicode-ident"
version = "1.0.12"
@ -1542,7 +1868,7 @@ version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
dependencies = [
"windows-sys",
"windows-sys 0.52.0",
]
[[package]]
@ -1579,6 +1905,15 @@ dependencies = [
"windows-targets",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"

View File

@ -14,6 +14,7 @@ polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"
rayon = "1.10.0"
rustc-hash = "2.0.0"
libc = "0.2.155"
smol = "2.0.0"
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }

View File

@ -0,0 +1,5 @@
use onebrc::implementations::smol::run;
fn main() {
run();
}

View File

@ -1,8 +1,9 @@
pub mod flare_flo;
pub mod libraries;
pub mod multi_threaded;
pub mod multi_threaded_structured;
pub mod phcs;
pub mod polars;
pub mod reference_impl;
pub mod single_thread;
pub mod multi_threaded;
pub mod polars;
pub mod flare_flo;
pub mod phcs;
pub mod libraries;
pub mod multi_threaded_structured;
pub mod smol;

View File

@ -0,0 +1,61 @@
use smol::fs::File;
use smol::io::{AsyncBufReadExt, BufReader};
use crate::models::station_measurements::StationMeasurements;
use crate::utils::parse;
use crate::utils::parse::hashstr;
use std::collections::HashMap;
use std::time::Instant;
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
pub fn run() {
let now = Instant::now();
let mut stations: HashMap<usize, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
smol::block_on(async {
let mut file = File::open("../../../measurements.txt")
.await
.expect("Could not read file");
let mut reader = BufReader::new(&mut file);
let mut line = Vec::with_capacity(108);
loop {
let line_len = reader
.read_until(b'\n', &mut line)
.await
.expect("could not read bytes");
if line_len == 0 {
break;
}
let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap();
let hash = hashstr(station);
let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) };
let temp = parse::temp(temp.split_last().unwrap().1);
let measurements_option = stations.get_mut(&hash);
if let Some((_, measurements)) = measurements_option {
measurements.update(temp);
} else {
let measurements = StationMeasurements {
min: temp,
max: temp,
count: 1,
sum: temp,
};
stations.insert(hash, (station, measurements));
}
line.clear();
}
let mut stations: Vec<String> = stations
.iter()
.map(|(_, (station, measurements))| {
let measurements = measurements.to_string();
format!("{station}={measurements}")
})
.collect();
stations.sort();
let stations = stations.join(",");
println!("{{{stations}}}");
println!("Time={} ms", now.elapsed().as_millis());
})
}