From e230a5ce2c4419bb9caf2ff72867840725d93ad9 Mon Sep 17 00:00:00 2001 From: Fabian Schmidt Date: Thu, 25 Jul 2024 10:03:37 +0200 Subject: [PATCH] My multi-treaded version is now faster than polars and takes less time to compile. It's a little more complex though --- src/main/rust/Cargo.lock | 1264 ++++++++++++++++++++++- src/main/rust/Cargo.toml | 2 +- src/main/rust/src/bin/fast_but_wrong.rs | 106 -- src/main/rust/src/bin/multi_threaded.rs | 104 +- src/main/rust/src/bin/referenceImpl.rs | 6 +- src/main/rust/src/lib.rs | 15 +- 6 files changed, 1339 insertions(+), 158 deletions(-) delete mode 100644 src/main/rust/src/bin/fast_but_wrong.rs diff --git a/src/main/rust/Cargo.lock b/src/main/rust/Cargo.lock index be2c2c0..a40635b 100644 --- a/src/main/rust/Cargo.lock +++ b/src/main/rust/Cargo.lock @@ -2,6 +2,101 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "argminmax" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa" +dependencies = [ + "num-traits", +] + +[[package]] +name = "array-init-cursor" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" + +[[package]] +name = "arrow-format" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07884ea216994cdc32a2d5f8274a8bee979cfe90274b83f86f440866ee3132c7" +dependencies = [ + "planus", + "serde", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atoi_simd" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae037714f313c1353189ead58ef9eec30a8e8dc101b2622d461418fd59e28a9" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + [[package]] name = "bstr" version = "1.9.1" @@ -13,6 +108,93 @@ dependencies = [ "serde", ] +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytemuck" +version = "1.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] + +[[package]] +name = "bytes" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952" + +[[package]] +name = "cc" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f" +dependencies = [ + "jobserver", + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets", +] + +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "crossterm", + "strum", + "strum_macros 0.26.4", + "unicode-width", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -32,30 +214,247 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +[[package]] +name = "crossterm" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +dependencies = [ + "bitflags", + "crossterm_winapi", + "libc", + "parking_lot", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + +[[package]] +name = "dyn-clone" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" + [[package]] name = "either" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "enum_dispatch" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" +dependencies = [ + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.72", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "ethnum" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" + [[package]] name = "fast-float" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" +[[package]] +name = "foreign_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", + "rayon", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "libc" version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "lz4" +version = "1.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "958b4caa893816eea05507c20cfe47574a43d9a697138a7872990bba8a0ece68" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "109de74d5d2353660401699a4174a4ff23fcc649caf553df71933c7fb45ad868" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "memchr" version = "2.7.4" @@ -72,6 +471,71 @@ dependencies = [ "winapi", ] +[[package]] +name = "memmap2" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" +dependencies = [ + "libc", +] + +[[package]] +name = "multiversion" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4851161a11d3ad0bf9402d90ffc3967bf231768bfd7aeb61755ad06dbf1a142" +dependencies = [ + "multiversion-macros", + "target-features", +] + +[[package]] +name = "multiversion-macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79a74ddee9e0c27d2578323c13905793e91622148f138ba29738f9dddb835e90" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", + "target-features", +] + +[[package]] +name = "now" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d89e9874397a1f0a52fc1f197a8effd9735223cb2390e9dcc83ac6cd02923d0" +dependencies = [ + "chrono", +] + +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + [[package]] name = "onebrc" version = "0.1.0" @@ -80,10 +544,356 @@ dependencies = [ "fast-float", "memchr", "memmap", + "polars", "rayon", "rustc-hash", ] +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "planus" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1691dd09e82f428ce8d6310bd6d5da2557c82ff17694d2a32cad7242aea89f" +dependencies = [ + "array-init-cursor", +] + +[[package]] +name = "polars" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938048fcda6a8e2ace6eb168bee1b415a92423ce51e418b853bf08fc40349b6b" +dependencies = [ + "getrandom", + "polars-core", + "polars-io", + "polars-lazy", + "polars-ops", + "polars-sql", + "polars-time", + "version_check", +] + +[[package]] +name = "polars-arrow" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce68a02f698ff7787c261aea1b4c040a8fe183a8fb200e2436d7f35d95a1b86f" +dependencies = [ + "ahash", + "arrow-format", + "atoi_simd", + "bytemuck", + "chrono", + "dyn-clone", + "either", + "ethnum", + "fast-float", + "foreign_vec", + "getrandom", + "hashbrown", + "itoa", + "lz4", + "multiversion", + "num-traits", + "polars-error", + "polars-utils", + "ryu", + "simdutf8", + "streaming-iterator", + "strength_reduce", + "version_check", + "zstd", +] + +[[package]] +name = "polars-compute" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b14fbc5f141b29b656a4cec4802632e5bff10bf801c6809c6bbfbd4078a044dd" +dependencies = [ + "bytemuck", + "num-traits", + "polars-arrow", + "polars-utils", + "version_check", +] + +[[package]] +name = "polars-core" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0f5efe734b6cbe5f97ea769be8360df5324fade396f1f3f5ad7fe9360ca4a23" +dependencies = [ + "ahash", + "bitflags", + "bytemuck", + "chrono", + "comfy-table", + "either", + "hashbrown", + "indexmap", + "num-traits", + "once_cell", + "polars-arrow", + "polars-compute", + "polars-error", + "polars-row", + "polars-utils", + "rand", + "rand_distr", + "rayon", + "regex", + "smartstring", + "thiserror", + "version_check", + "xxhash-rust", +] + +[[package]] +name = "polars-error" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6396de788f99ebfc9968e7b6f523e23000506cde4ba6dfc62ae4ce949002a886" +dependencies = [ + "arrow-format", + "regex", + "simdutf8", + "thiserror", +] + +[[package]] +name = "polars-io" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d0458efe8946f4718fd352f230c0db5a37926bd0d2bd25af79dc24746abaaea" +dependencies = [ + "ahash", + "atoi_simd", + "bytes", + "chrono", + "fast-float", + "home", + "itoa", + "memchr", + "memmap2", + "num-traits", + "once_cell", + "percent-encoding", + "polars-arrow", + "polars-core", + "polars-error", + "polars-time", + "polars-utils", + "rayon", + "regex", + "ryu", + "simdutf8", + "smartstring", +] + +[[package]] +name = "polars-lazy" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d7105b40905bb38e8fc4a7fd736594b7491baa12fad3ac492969ca221a1b5d5" +dependencies = [ + "ahash", + "bitflags", + "glob", + "once_cell", + "polars-arrow", + "polars-core", + "polars-io", + "polars-ops", + "polars-pipe", + "polars-plan", + "polars-time", + "polars-utils", + "rayon", + "smartstring", + "version_check", +] + +[[package]] +name = "polars-ops" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e09afc456ab11e75e5dcb43e00a01c71f3a46a2781e450054acb6bb096ca78e" +dependencies = [ + "ahash", + "argminmax", + "bytemuck", + "either", + "hashbrown", + "indexmap", + "memchr", + "num-traits", + "polars-arrow", + "polars-compute", + "polars-core", + "polars-error", + "polars-utils", + "rayon", + "regex", + "smartstring", + "version_check", +] + +[[package]] +name = "polars-pipe" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b7ead073cc3917027d77b59861a9f071db47125de9314f8907db1a0a3e4100" +dependencies = [ + "crossbeam-channel", + "crossbeam-queue", + "enum_dispatch", + "hashbrown", + "num-traits", + "polars-arrow", + "polars-compute", + "polars-core", + "polars-io", + "polars-ops", + "polars-plan", + "polars-row", + "polars-utils", + "rayon", + "smartstring", + "version_check", +] + +[[package]] +name = "polars-plan" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384a175624d050c31c473ee11df9d7af5d729ae626375e522158cfb3d150acd0" +dependencies = [ + "ahash", + "bytemuck", + "once_cell", + "percent-encoding", + "polars-arrow", + "polars-core", + "polars-io", + "polars-ops", + "polars-time", + "polars-utils", + "rayon", + "regex", + "smartstring", + "strum_macros 0.25.3", + "version_check", +] + +[[package]] +name = "polars-row" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32322f7acbb83db3e9c7697dc821be73d06238da89c817dcc8bc1549a5e9c72f" +dependencies = [ + "polars-arrow", + "polars-error", + "polars-utils", +] + +[[package]] +name = "polars-sql" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f0b4c6ddffdfd0453e84bc3918572c633014d661d166654399cf93752aa95b5" +dependencies = [ + "polars-arrow", + "polars-core", + "polars-error", + "polars-lazy", + "polars-plan", + "rand", + "serde", + "serde_json", + "sqlparser", +] + +[[package]] +name = "polars-time" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dee2649fc96bd1b6584e0e4a4b3ca7d22ed3d117a990e63ad438ecb26f7544d0" +dependencies = [ + "atoi", + "chrono", + "now", + "once_cell", + "polars-arrow", + "polars-core", + "polars-error", + "polars-ops", + "polars-utils", + "regex", + "smartstring", +] + +[[package]] +name = "polars-utils" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b174ca4a77ad47d7b91a0460aaae65bbf874c8bfbaaa5308675dadef3976bbda" +dependencies = [ + "ahash", + "bytemuck", + "hashbrown", + "indexmap", + "num-traits", + "once_cell", + "polars-error", + "rayon", + "smartstring", + "sysinfo", + "version_check", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + [[package]] name = "proc-macro2" version = "1.0.86" @@ -102,6 +912,46 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + [[package]] name = "rayon" version = "1.10.0" @@ -122,11 +972,43 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_syscall" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + [[package]] name = "regex-automata" version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "rustc-hash" @@ -134,6 +1016,24 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "serde" version = "1.0.204" @@ -151,7 +1051,111 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", +] + +[[package]] +name = "serde_json" +version = "1.0.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "smartstring" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" +dependencies = [ + "autocfg", + "static_assertions", + "version_check", +] + +[[package]] +name = "sqlparser" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7" +dependencies = [ + "log", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "strength_reduce" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.72", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.72", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", ] [[package]] @@ -165,12 +1169,124 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sysinfo" +version = "0.30.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "windows", +] + +[[package]] +name = "target-features" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" + +[[package]] +name = "thiserror" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] + [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "unicode-width" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.72", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + [[package]] name = "winapi" version = "0.3.9" @@ -192,3 +1308,149 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +dependencies = [ + "windows-core", + "windows-targets", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "xxhash-rust" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63658493314859b4dfdf3fb8c1defd61587839def09582db50b8a4e93afca6bb" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] + +[[package]] +name = "zstd" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.12+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/src/main/rust/Cargo.toml b/src/main/rust/Cargo.toml index 653aea3..69a6e2b 100644 --- a/src/main/rust/Cargo.toml +++ b/src/main/rust/Cargo.toml @@ -10,7 +10,7 @@ bstr = "1.9.1" fast-float = "0.2.0" memchr = "2.7.4" memmap = "0.7.0" -#polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]} +polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]} rayon = "1.10.0" rustc-hash = "2.0.0" diff --git a/src/main/rust/src/bin/fast_but_wrong.rs b/src/main/rust/src/bin/fast_but_wrong.rs deleted file mode 100644 index a0dd387..0000000 --- a/src/main/rust/src/bin/fast_but_wrong.rs +++ /dev/null @@ -1,106 +0,0 @@ -use std::{ - fs::File, - io::BufReader, - thread, -}; -use std::collections::HashMap; -use std::io::{Read, Seek, SeekFrom}; -use std::sync::mpsc; -use onebrc::{parse_line, parse_temp, read_bytes_until}; - -const DEFAULT_HASHMAP_LENGTH: usize = 10000; - -fn main() { - const FILE_PATH: &str = "../../../measurements.txt"; - let file = File::open(FILE_PATH).expect("File measurements.txt not found"); - thread::scope(|s| { - let mut stations: HashMap = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); - let (tx, rx) = mpsc::channel(); - let cores = thread::available_parallelism().unwrap().into(); - let mut reader = BufReader::new(&file); - let file_length = reader.seek(SeekFrom::End(0)).unwrap(); - let chunk_length = file_length as usize / cores; - reader.seek(SeekFrom::Start(0)).unwrap(); - for i in 0..cores { - let tx = tx.clone(); - s.spawn(move || { - let file = File::open(FILE_PATH).expect("File measurements.txt not found"); - let mut reader = BufReader::new(&file); - let mut currposition = (chunk_length * i) as u64; - let end = currposition + chunk_length as u64; - reader.seek(SeekFrom::Start(currposition)).unwrap(); - let mut bytes = reader.bytes(); - - while let Some(byte) = bytes.next() { - match byte { - Ok(byte) => { - if byte == b'\n' { - break; - } - } - Err(_) => { panic!("could not go to next") } - } - } - - let mut t_stations: HashMap = - HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); - - while let Some(line) = read_bytes_until(&mut bytes, b'\n') { - let (station, temp) = parse_line(&line); - let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; - let temp = parse_temp(temp); - let measurements_option = t_stations.get_mut(&station); - if let Some(measurements) = measurements_option { - measurements.update(temp); - } else { - let measurements = onebrc::StationMeasurements { - min: temp, - max: temp, - count: 1, - sum: temp, - }; - t_stations.insert(station, measurements); - } - currposition += line.len() as u64; - if currposition > end { - break; - } - } - let _ = tx.send(t_stations); - }); - } - drop(tx); - while let Ok(t_stations) = rx.recv() { - for (station, measurements) in t_stations.iter() { - let joined_measurements_options = stations.get_mut(station.as_str()); - if let Some(joined_measurements) = joined_measurements_options { - joined_measurements.merge(measurements); - } else { - stations.insert(station.to_owned(), *measurements); - } - } - } - let mut stations: Vec = stations.iter().map(|(station, measurements)| { - let measurements = measurements.to_string(); - #[cfg(feature = "json")] - { - format!("{{\"{station}\":\"{measurements}\"}}") - } - #[cfg(not(feature = "json"))] - { - format!("{station}={measurements}") - } - }).collect(); - stations.sort(); - let stations = stations.join(","); - #[cfg(feature = "json")] - { - println!("\n\n[{stations}]"); - } - #[cfg(not(feature = "json"))] - { - println!("\n\n{{{stations}}}"); - } - // println!("\n\nTime={} ms", now.elapsed().as_millis()); - }); -} diff --git a/src/main/rust/src/bin/multi_threaded.rs b/src/main/rust/src/bin/multi_threaded.rs index 0504ef0..c549af1 100644 --- a/src/main/rust/src/bin/multi_threaded.rs +++ b/src/main/rust/src/bin/multi_threaded.rs @@ -1,53 +1,67 @@ use std::{ fs::File, - io::{BufRead, BufReader}, + io::BufReader, thread, }; use std::collections::HashMap; +use std::io::{Read, Seek, SeekFrom}; use std::sync::mpsc; use std::time::Instant; - -use onebrc::parse_temp; +use onebrc::{parse_line, parse_temp, read_bytes_until}; const DEFAULT_HASHMAP_LENGTH: usize = 10000; fn main() { - //print!("\x1b[2J"); - print!("\x1b[s"); + const FILE_PATH: &str = "../../../measurements.txt"; + let now = Instant::now(); thread::scope(|s| { let mut stations: HashMap = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let (tx, rx) = mpsc::channel(); - let now = Instant::now(); - let cores: usize = thread::available_parallelism().unwrap().into(); - let chunk_length = 1_000_000_000 / cores; + let cores = thread::available_parallelism().unwrap().into(); + let file = File::open(FILE_PATH).expect("File measurements.txt not found"); + let mut reader = BufReader::new(&file); + let file_length = reader.seek(SeekFrom::End(0)).unwrap(); + println!("file_length = {file_length}"); + let chunk_length = file_length as usize / cores; + let mut bounds = Vec::with_capacity(cores + 1); + bounds.push(0); + for i in 1..cores { + let mut reader = BufReader::new(&file); + let mut byte_start = chunk_length * i; + reader.seek(SeekFrom::Start(byte_start as u64)).expect("could not seek"); + let mut bytes = reader.bytes(); + while let Some(byte) = bytes.next() { + match byte { + Ok(byte) => { + byte_start += 1; + if byte == b'\n' { + break; + } + } + Err(_) => { panic!("could not go to next") } + } + } + bounds.push(byte_start as u64); + } + bounds.push(file_length); for i in 0..cores { - let print_line = i + 1; - print!("\x1b[u\x1b[{print_line}B\x1b[0CThread #{i:0>2}: 0%"); - let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); - let reader = BufReader::new(file); - let line_chunk = reader.lines().skip(chunk_length * i).take(chunk_length); let tx = tx.clone(); + let mut currposition = *bounds.get(i).unwrap(); + let end = *bounds.get(i+1).unwrap(); s.spawn(move || { + let file = File::open(FILE_PATH).expect("File measurements.txt not found"); + let mut reader = BufReader::new(&file); + reader.seek(SeekFrom::Start(currposition)).unwrap(); + let mut bytes = reader.bytes(); + let mut t_stations: HashMap = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); - let now_read_line = Instant::now(); - let mut line_num = 0; - line_chunk.for_each(|line| { - if line_num == 0 { - print!("\x1b[u\x1b[{print_line}B\x1b[30CStart read line {}ms", now_read_line.elapsed().as_millis()); - } - if line_num % 10000 == 0 { - //let formatted = format_nums(line_num); - //print!("\x1b[u\x1b[{print_line}B\x1b[0CThread #{i:0>2}: {formatted}"); - let percent = (line_num as f64 / chunk_length as f64) * 100.0; - print!("\x1b[u\x1b[{print_line}B\x1b[0CThread #{i:0>2}: {percent:.2}%"); - } - line_num += 1; - let line = line.expect("could not read line"); - let (station, temp) = line.split_once(';').expect("Error while splitting"); - let temp = parse_temp(temp.as_bytes()); - let measurements_option = t_stations.get_mut(station); + while let Some(line) = read_bytes_until(&mut bytes, b'\n') { + let (station, temp) = parse_line(&line); + let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; + let temp = parse_temp(temp); + let measurements_option = t_stations.get_mut(&station); if let Some(measurements) = measurements_option { measurements.update(temp); } else { @@ -57,14 +71,16 @@ fn main() { count: 1, sum: temp, }; - t_stations.insert(station.to_owned(), measurements); + t_stations.insert(station, measurements); } - }); - print!("\x1b[u\x1b[{print_line}B\x1b[60CTime reading lines in thread {i}={} ms", now_read_line.elapsed().as_millis()); + currposition += line.len() as u64; + if currposition >= end { + break; + } + } let _ = tx.send(t_stations); }); } - print!("\x1b[{cores}B"); drop(tx); while let Ok(t_stations) = rx.recv() { for (station, measurements) in t_stations.iter() { @@ -78,11 +94,25 @@ fn main() { } let mut stations: Vec = stations.iter().map(|(station, measurements)| { let measurements = measurements.to_string(); - format!("{station}={measurements}") + #[cfg(feature = "json")] + { + format!("{{\"{station}\":\"{measurements}\"}}") + } + #[cfg(not(feature = "json"))] + { + format!("{station}={measurements}") + } }).collect(); stations.sort(); - let _stations = stations.join(","); - // println!("{{{stations}}}"); + let stations = stations.join(","); + #[cfg(feature = "json")] + { + println!("\n\n[{stations}]"); + } + #[cfg(not(feature = "json"))] + { + println!("\n\n{{{stations}}}"); + } println!("\n\nTime={} ms", now.elapsed().as_millis()); }); } diff --git a/src/main/rust/src/bin/referenceImpl.rs b/src/main/rust/src/bin/referenceImpl.rs index 9c43a8c..66c25b0 100644 --- a/src/main/rust/src/bin/referenceImpl.rs +++ b/src/main/rust/src/bin/referenceImpl.rs @@ -2,7 +2,7 @@ use bstr::{BStr, ByteSlice}; use memmap::MmapOptions; use rustc_hash::FxHashMap as HashMap; use std::{fmt::Display, fs::File}; - +use std::time::Instant; use rayon::prelude::*; #[derive(Debug)] @@ -68,10 +68,11 @@ fn merge<'a>(a: &mut HashMap<&'a BStr, State>, b: &HashMap<&'a BStr, State>) { } fn main() { + let now = Instant::now(); let cores: usize = std::thread::available_parallelism().unwrap().into(); let path = match std::env::args().skip(1).next() { Some(path) => path, - None => "measurements.txt".to_owned(), + None => "../../../measurements.txt".to_owned(), }; let file = File::open(path).unwrap(); let mmap = unsafe { MmapOptions::new().map(&file).unwrap() }; @@ -128,4 +129,5 @@ fn main() { } println!("}}"); } + println!("\n\nTime={} ms", now.elapsed().as_millis()); } diff --git a/src/main/rust/src/lib.rs b/src/main/rust/src/lib.rs index 10d3484..87987bb 100644 --- a/src/main/rust/src/lib.rs +++ b/src/main/rust/src/lib.rs @@ -69,10 +69,8 @@ pub fn parse_temp(bytes: &[u8]) -> isize { } #[inline] -pub fn read_bytes_until(bytes: &mut Bytes>, delimiter: u8) -> Option<[u8; 108]> { - // 108 max length of line in bytes - let mut buf: [u8; 108] = [b'#'; 108]; - let mut idx = 0; +pub fn read_bytes_until(bytes: &mut Bytes>, delimiter: u8) -> Option> { + let mut buf: Vec = Vec::with_capacity(108); while let Some(byte) = bytes.next() { if byte.is_err() { panic!("Could not read byte"); @@ -81,8 +79,7 @@ pub fn read_bytes_until(bytes: &mut Bytes>, delimiter: u8) -> O if delimiter == byte { return Some(buf); } - buf[idx] = byte; - idx += 1; + buf.push(byte); } None } @@ -94,9 +91,5 @@ pub fn parse_line(line: &[u8]) -> (&[u8], &[u8]) { idx += 1; } let station = &line[0..idx]; - let midpoint = idx + 1; - while idx < line.len() && line[idx] != b'#' { - idx += 1; - } - (station, &line[midpoint..idx]) + (station, &line[(idx + 1)..]) } \ No newline at end of file