From a3b6075b6ac444ba10e9bcd237a2e2b35a9e2c64 Mon Sep 17 00:00:00 2001 From: Fabian Schmidt Date: Mon, 22 Jul 2024 13:57:03 +0200 Subject: [PATCH] Fastest yet with scaled integers instead of floats --- src/main/rust/Cargo.lock | 1306 +---------------------- src/main/rust/Cargo.toml | 7 +- src/main/rust/src/bin/multi_threaded.rs | 90 +- src/main/rust/src/bin/polars.rs | 2 +- src/main/rust/src/bin/referenceImpl.rs | 3 +- src/main/rust/src/bin/single_thread.rs | 64 +- src/main/rust/src/lib.rs | 67 ++ src/main/rust/src/main.rs | 110 +- 8 files changed, 149 insertions(+), 1500 deletions(-) create mode 100644 src/main/rust/src/lib.rs diff --git a/src/main/rust/Cargo.lock b/src/main/rust/Cargo.lock index 585c3ba..be2c2c0 100644 --- a/src/main/rust/Cargo.lock +++ b/src/main/rust/Cargo.lock @@ -2,101 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "getrandom", - "once_cell", - "version_check", - "zerocopy", -] - -[[package]] -name = "aho-corasick" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" -dependencies = [ - "memchr", -] - -[[package]] -name = "allocator-api2" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" - -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "argminmax" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa" -dependencies = [ - "num-traits", -] - -[[package]] -name = "array-init-cursor" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" - -[[package]] -name = "arrow-format" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07884ea216994cdc32a2d5f8274a8bee979cfe90274b83f86f440866ee3132c7" -dependencies = [ - "planus", - "serde", -] - -[[package]] -name = "atoi" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" -dependencies = [ - "num-traits", -] - -[[package]] -name = "atoi_simd" -version = "0.15.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ae037714f313c1353189ead58ef9eec30a8e8dc101b2622d461418fd59e28a9" - -[[package]] -name = "autocfg" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" - -[[package]] -name = "bitflags" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" - [[package]] name = "bstr" version = "1.9.1" @@ -108,94 +13,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bumpalo" -version = "3.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" - -[[package]] -name = "bytemuck" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" -dependencies = [ - "bytemuck_derive", -] - -[[package]] -name = "bytemuck_derive" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.58", -] - -[[package]] -name = "bytes" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" - -[[package]] -name = "cc" -version = "1.0.96" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "065a29261d53ba54260972629f9ca6bffa69bac13cd1fed61420f7fa68b9f8bd" -dependencies = [ - "jobserver", - "libc", - "once_cell", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chrono" -version = "0.4.38" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" -dependencies = [ - "android-tzdata", - "iana-time-zone", - "num-traits", - "windows-targets", -] - -[[package]] -name = "comfy-table" -version = "7.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" -dependencies = [ - "crossterm", - "strum", - "strum_macros 0.26.2", - "unicode-width", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" - -[[package]] -name = "crossbeam-channel" -version = "0.5.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -215,78 +32,17 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "crossbeam-queue" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" -version = "0.8.19" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" - -[[package]] -name = "crossterm" -version = "0.27.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" -dependencies = [ - "bitflags", - "crossterm_winapi", - "libc", - "parking_lot", - "winapi", -] - -[[package]] -name = "crossterm_winapi" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" -dependencies = [ - "winapi", -] - -[[package]] -name = "dyn-clone" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "either" -version = "1.11.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" - -[[package]] -name = "enum_dispatch" -version = "0.3.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" -dependencies = [ - "once_cell", - "proc-macro2", - "quote", - "syn 2.0.58", -] - -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - -[[package]] -name = "ethnum" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "fast-float" @@ -294,161 +50,11 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" -[[package]] -name = "foreign_vec" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" - -[[package]] -name = "getrandom" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" -dependencies = [ - "cfg-if", - "js-sys", - "libc", - "wasi", - "wasm-bindgen", -] - -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - -[[package]] -name = "hashbrown" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" -dependencies = [ - "ahash", - "allocator-api2", - "rayon", -] - -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - -[[package]] -name = "home" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "iana-time-zone" -version = "0.1.60" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "indexmap" -version = "2.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "itoa" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" - -[[package]] -name = "jobserver" -version = "0.1.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" -dependencies = [ - "libc", -] - -[[package]] -name = "js-sys" -version = "0.3.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" -dependencies = [ - "wasm-bindgen", -] - [[package]] name = "libc" -version = "0.2.154" +version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" - -[[package]] -name = "libm" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" - -[[package]] -name = "lock_api" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" - -[[package]] -name = "lz4" -version = "1.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" -dependencies = [ - "libc", - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" -dependencies = [ - "cc", - "libc", -] +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "memchr" @@ -467,473 +73,35 @@ dependencies = [ ] [[package]] -name = "memmap2" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" +name = "onebrc" +version = "0.1.0" dependencies = [ - "libc", -] - -[[package]] -name = "multiversion" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4851161a11d3ad0bf9402d90ffc3967bf231768bfd7aeb61755ad06dbf1a142" -dependencies = [ - "multiversion-macros", - "target-features", -] - -[[package]] -name = "multiversion-macros" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79a74ddee9e0c27d2578323c13905793e91622148f138ba29738f9dddb835e90" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "target-features", -] - -[[package]] -name = "now" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d89e9874397a1f0a52fc1f197a8effd9735223cb2390e9dcc83ac6cd02923d0" -dependencies = [ - "chrono", -] - -[[package]] -name = "ntapi" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" -dependencies = [ - "winapi", -] - -[[package]] -name = "num-traits" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" - -[[package]] -name = "parking_lot" -version = "0.12.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets", -] - -[[package]] -name = "percent-encoding" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" - -[[package]] -name = "pkg-config" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" - -[[package]] -name = "planus" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc1691dd09e82f428ce8d6310bd6d5da2557c82ff17694d2a32cad7242aea89f" -dependencies = [ - "array-init-cursor", -] - -[[package]] -name = "polars" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938048fcda6a8e2ace6eb168bee1b415a92423ce51e418b853bf08fc40349b6b" -dependencies = [ - "getrandom", - "polars-core", - "polars-io", - "polars-lazy", - "polars-ops", - "polars-sql", - "polars-time", - "version_check", -] - -[[package]] -name = "polars-arrow" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce68a02f698ff7787c261aea1b4c040a8fe183a8fb200e2436d7f35d95a1b86f" -dependencies = [ - "ahash", - "arrow-format", - "atoi_simd", - "bytemuck", - "chrono", - "dyn-clone", - "either", - "ethnum", + "bstr", "fast-float", - "foreign_vec", - "getrandom", - "hashbrown", - "itoa", - "lz4", - "multiversion", - "num-traits", - "polars-error", - "polars-utils", - "ryu", - "simdutf8", - "streaming-iterator", - "strength_reduce", - "version_check", - "zstd", -] - -[[package]] -name = "polars-compute" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b14fbc5f141b29b656a4cec4802632e5bff10bf801c6809c6bbfbd4078a044dd" -dependencies = [ - "bytemuck", - "num-traits", - "polars-arrow", - "polars-utils", - "version_check", -] - -[[package]] -name = "polars-core" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f5efe734b6cbe5f97ea769be8360df5324fade396f1f3f5ad7fe9360ca4a23" -dependencies = [ - "ahash", - "bitflags", - "bytemuck", - "chrono", - "comfy-table", - "either", - "hashbrown", - "indexmap", - "num-traits", - "once_cell", - "polars-arrow", - "polars-compute", - "polars-error", - "polars-row", - "polars-utils", - "rand", - "rand_distr", - "rayon", - "regex", - "smartstring", - "thiserror", - "version_check", - "xxhash-rust", -] - -[[package]] -name = "polars-error" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6396de788f99ebfc9968e7b6f523e23000506cde4ba6dfc62ae4ce949002a886" -dependencies = [ - "arrow-format", - "regex", - "simdutf8", - "thiserror", -] - -[[package]] -name = "polars-io" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d0458efe8946f4718fd352f230c0db5a37926bd0d2bd25af79dc24746abaaea" -dependencies = [ - "ahash", - "atoi_simd", - "bytes", - "chrono", - "fast-float", - "home", - "itoa", "memchr", - "memmap2", - "num-traits", - "once_cell", - "percent-encoding", - "polars-arrow", - "polars-core", - "polars-error", - "polars-time", - "polars-utils", + "memmap", "rayon", - "regex", - "ryu", - "simdutf8", - "smartstring", + "rustc-hash", ] -[[package]] -name = "polars-lazy" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d7105b40905bb38e8fc4a7fd736594b7491baa12fad3ac492969ca221a1b5d5" -dependencies = [ - "ahash", - "bitflags", - "glob", - "once_cell", - "polars-arrow", - "polars-core", - "polars-io", - "polars-ops", - "polars-pipe", - "polars-plan", - "polars-time", - "polars-utils", - "rayon", - "smartstring", - "version_check", -] - -[[package]] -name = "polars-ops" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e09afc456ab11e75e5dcb43e00a01c71f3a46a2781e450054acb6bb096ca78e" -dependencies = [ - "ahash", - "argminmax", - "bytemuck", - "either", - "hashbrown", - "indexmap", - "memchr", - "num-traits", - "polars-arrow", - "polars-compute", - "polars-core", - "polars-error", - "polars-utils", - "rayon", - "regex", - "smartstring", - "version_check", -] - -[[package]] -name = "polars-pipe" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b7ead073cc3917027d77b59861a9f071db47125de9314f8907db1a0a3e4100" -dependencies = [ - "crossbeam-channel", - "crossbeam-queue", - "enum_dispatch", - "hashbrown", - "num-traits", - "polars-arrow", - "polars-compute", - "polars-core", - "polars-io", - "polars-ops", - "polars-plan", - "polars-row", - "polars-utils", - "rayon", - "smartstring", - "version_check", -] - -[[package]] -name = "polars-plan" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "384a175624d050c31c473ee11df9d7af5d729ae626375e522158cfb3d150acd0" -dependencies = [ - "ahash", - "bytemuck", - "once_cell", - "percent-encoding", - "polars-arrow", - "polars-core", - "polars-io", - "polars-ops", - "polars-time", - "polars-utils", - "rayon", - "regex", - "smartstring", - "strum_macros 0.25.3", - "version_check", -] - -[[package]] -name = "polars-row" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32322f7acbb83db3e9c7697dc821be73d06238da89c817dcc8bc1549a5e9c72f" -dependencies = [ - "polars-arrow", - "polars-error", - "polars-utils", -] - -[[package]] -name = "polars-sql" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f0b4c6ddffdfd0453e84bc3918572c633014d661d166654399cf93752aa95b5" -dependencies = [ - "polars-arrow", - "polars-core", - "polars-error", - "polars-lazy", - "polars-plan", - "rand", - "serde", - "serde_json", - "sqlparser", -] - -[[package]] -name = "polars-time" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee2649fc96bd1b6584e0e4a4b3ca7d22ed3d117a990e63ad438ecb26f7544d0" -dependencies = [ - "atoi", - "chrono", - "now", - "once_cell", - "polars-arrow", - "polars-core", - "polars-error", - "polars-ops", - "polars-utils", - "regex", - "smartstring", -] - -[[package]] -name = "polars-utils" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b174ca4a77ad47d7b91a0460aaae65bbf874c8bfbaaa5308675dadef3976bbda" -dependencies = [ - "ahash", - "bytemuck", - "hashbrown", - "indexmap", - "num-traits", - "once_cell", - "polars-error", - "rayon", - "smartstring", - "sysinfo", - "version_check", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - [[package]] name = "proc-macro2" -version = "1.0.79" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.35" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rand_distr" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" -dependencies = [ - "num-traits", - "rand", -] - [[package]] name = "rayon" version = "1.10.0" @@ -954,57 +122,11 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "redox_syscall" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" -dependencies = [ - "bitflags", -] - -[[package]] -name = "regex" -version = "1.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" - -[[package]] -name = "rust" -version = "0.1.0" -dependencies = [ - "bstr", - "fast-float", - "hashbrown", - "memchr", - "memmap", - "polars", - "rayon", - "rustc-hash", -] +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" [[package]] name = "rustc-hash" @@ -1012,277 +134,43 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" -[[package]] -name = "rustversion" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47" - -[[package]] -name = "ryu" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - [[package]] name = "serde" -version = "1.0.199" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c9f6e76df036c77cd94996771fb40db98187f096dd0b9af39c6c6e452ba966a" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.199" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11bd257a6541e141e42ca6d24ae26f7714887b47e89aa739099104c7e4d3b7fc" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", -] - -[[package]] -name = "serde_json" -version = "1.0.116" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "simdutf8" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" - -[[package]] -name = "smallvec" -version = "1.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" - -[[package]] -name = "smartstring" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" -dependencies = [ - "autocfg", - "static_assertions", - "version_check", -] - -[[package]] -name = "sqlparser" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7" -dependencies = [ - "log", -] - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" - -[[package]] -name = "strength_reduce" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" - -[[package]] -name = "strum" -version = "0.26.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" - -[[package]] -name = "strum_macros" -version = "0.25.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.58", -] - -[[package]] -name = "strum_macros" -version = "0.26.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.58", + "syn", ] [[package]] name = "syn" -version = "1.0.109" +version = "2.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] -[[package]] -name = "syn" -version = "2.0.58" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "sysinfo" -version = "0.30.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87341a165d73787554941cd5ef55ad728011566fe714e987d1b976c15dbc3a83" -dependencies = [ - "cfg-if", - "core-foundation-sys", - "libc", - "ntapi", - "once_cell", - "windows", -] - -[[package]] -name = "target-features" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" - -[[package]] -name = "thiserror" -version = "1.0.59" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.59" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.58", -] - [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "unicode-width" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "wasm-bindgen" -version = "0.2.92" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.92" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" -dependencies = [ - "bumpalo", - "log", - "once_cell", - "proc-macro2", - "quote", - "syn 2.0.58", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.92" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.92" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.58", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.92" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" - [[package]] name = "winapi" version = "0.3.9" @@ -1304,149 +192,3 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" -dependencies = [ - "windows-core", - "windows-targets", -] - -[[package]] -name = "windows-core" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-targets" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" - -[[package]] -name = "xxhash-rust" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "927da81e25be1e1a2901d59b81b37dd2efd1fc9c9345a55007f09bf5a2d3ee03" - -[[package]] -name = "zerocopy" -version = "0.7.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.58", -] - -[[package]] -name = "zstd" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "7.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" -dependencies = [ - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "2.0.10+zstd.1.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" -dependencies = [ - "cc", - "pkg-config", -] diff --git a/src/main/rust/Cargo.toml b/src/main/rust/Cargo.toml index af4ec78..8fe6710 100644 --- a/src/main/rust/Cargo.toml +++ b/src/main/rust/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "rust" +name = "onebrc" version = "0.1.0" edition = "2021" @@ -8,12 +8,9 @@ edition = "2021" [dependencies] bstr = "1.9.1" fast-float = "0.2.0" -hashbrown = "0.14.3" memchr = "2.7.4" memmap = "0.7.0" -polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]} +#polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]} rayon = "1.10.0" rustc-hash = "2.0.0" -[build] -rustflags = ["-C target-cpu=native"] diff --git a/src/main/rust/src/bin/multi_threaded.rs b/src/main/rust/src/bin/multi_threaded.rs index 519b99e..b5be52c 100644 --- a/src/main/rust/src/bin/multi_threaded.rs +++ b/src/main/rust/src/bin/multi_threaded.rs @@ -4,61 +4,40 @@ use std::{ sync::{Arc, Mutex}, thread, }; - -use hashbrown::HashMap; - -#[derive(Clone, Copy)] -struct StationMeasurements { - min: f64, - max: f64, - count: usize, - sum: f64, -} +use std::collections::HashMap; +use std::time::Instant; const DEFAULT_HASHMAP_LENGTH: usize = 10000; fn main() { - let stations: Arc>> = + let now = Instant::now(); + let stations: Arc>> = Arc::new(Mutex::new(HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH))); - let cores: usize = std::thread::available_parallelism().unwrap().into(); + let cores: usize = thread::available_parallelism().unwrap().into(); let chunk_length = 1_000_000_000 / cores; let mut handles = vec![]; for i in 0..cores { - let file = File::open("../measurements.txt").expect("File measurements.txt not found"); + let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); let reader = BufReader::new(file); let line_chunk = reader.lines().skip(chunk_length * i).take(chunk_length); let stations_clone = stations.clone(); let handle = thread::spawn(move || { - let mut t_stations: HashMap = + let mut t_stations: HashMap = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); - let mut line_num = 0; - for line in line_chunk { - line_num += 1; - let lineprint = chunk_length / 500; - if line_num % lineprint == 0 { - println!("Thread #{i}"); - let formatted_line_num = format_nums(line_num); - println!("Calculated {formatted_line_num} stations"); - } + + let now_read_line = Instant::now(); + println!("Start reading lines in thread {i}"); + line_chunk.for_each(|line| { let line = line.expect("could not read line"); - if line_num < 10 { - println!("{}", line); - } let (station, temp) = line.split_once(';').expect("Error while splitting"); - let temp = temp.parse().expect("Error while parsing temperature"); + let temp = onebrc::parse_temp(temp.as_bytes()); let measurements_option = t_stations.get_mut(station); if let Some(measurements) = measurements_option { - if temp < measurements.min { - measurements.min = temp; - } else if temp > measurements.max { - measurements.max = temp; - } - measurements.count += 1; - measurements.sum += temp; + measurements.update(temp); } else { - let measurements = StationMeasurements { + let measurements = onebrc::StationMeasurements { min: temp, max: temp, count: 1, @@ -66,43 +45,32 @@ fn main() { }; t_stations.insert(station.to_owned(), measurements); } - } - let mut stations_guard = stations_clone.lock().expect("Error while locking"); + }); + println!("Time reading lines in thread {i}={} μs", now_read_line.elapsed().as_micros()); + let now_insert_line = Instant::now(); + println!("Start inserting lines in thread {i}"); for (station, measurements) in t_stations.iter() { + let mut stations_guard = stations_clone.lock().expect("Error while locking"); let joined_measurements_options = stations_guard.get_mut(station.as_str()); if let Some(joined_measurements) = joined_measurements_options { - if measurements.min < joined_measurements.min { - joined_measurements.min = measurements.min; - } else if measurements.max > joined_measurements.max { - joined_measurements.max = measurements.max; - } - joined_measurements.count += measurements.count; - joined_measurements.sum += measurements.sum; + joined_measurements.merge(measurements); } else { stations_guard.insert(station.to_owned(), *measurements); } } + println!("Time inserting lines in thread {i}={} μs", now_insert_line.elapsed().as_micros()); }); handles.push(handle); } for handle in handles { handle.join().unwrap(); } - for (station, measurments) in stations.lock().unwrap().iter() { - let min = measurments.min; - let max = measurments.max; - let avg = measurments.sum / measurments.count as f64; - println!("{station}={min}/{max}/{avg:.2}"); - } -} - -fn format_nums(num: usize) -> String { - num.to_string() - .as_bytes() - .rchunks(3) - .rev() - .map(std::str::from_utf8) - .collect::, _>>() - .unwrap() - .join("_") + let mut stations: Vec = stations.lock().unwrap().iter().map(|(&ref station, &ref measurements)| { + let measurements = measurements.to_string(); + format!("{station}={measurements}") + }).collect(); + stations.sort(); + let stations = stations.join(","); + println!("{{{stations}}}"); + println!("Time={} μs", now.elapsed().as_micros()); } diff --git a/src/main/rust/src/bin/polars.rs b/src/main/rust/src/bin/polars.rs index c1772cd..9d81b89 100644 --- a/src/main/rust/src/bin/polars.rs +++ b/src/main/rust/src/bin/polars.rs @@ -9,7 +9,7 @@ fn run_polars() -> Result { let f2: Field = Field::new("measure", DataType::Float64); let sc: Schema = Schema::from_iter(vec![f1, f2]); - let q = LazyCsvReader::new("../measurements.txt") + let q = LazyCsvReader::new("../../../measurements.txt") .has_header(false) .with_schema(Some(Arc::new(sc))) .with_separator(b';') diff --git a/src/main/rust/src/bin/referenceImpl.rs b/src/main/rust/src/bin/referenceImpl.rs index 5569fc9..f84bf6a 100644 --- a/src/main/rust/src/bin/referenceImpl.rs +++ b/src/main/rust/src/bin/referenceImpl.rs @@ -68,7 +68,8 @@ fn merge<'a>(a: &mut HashMap<&'a BStr, State>, b: &HashMap<&'a BStr, State>) { } fn main() { - let cores: usize = std::thread::available_parallelism().unwrap().into(); + //let cores: usize = std::thread::available_parallelism().unwrap().into(); + let cores: usize = 1; let path = match std::env::args().skip(1).next() { Some(path) => path, None => "measurements.txt".to_owned(), diff --git a/src/main/rust/src/bin/single_thread.rs b/src/main/rust/src/bin/single_thread.rs index d29546b..fe4c371 100644 --- a/src/main/rust/src/bin/single_thread.rs +++ b/src/main/rust/src/bin/single_thread.rs @@ -2,71 +2,43 @@ use std::{ fs::File, io::{BufRead, BufReader}, }; +use std::collections::HashMap; +use std::time::Instant; -use hashbrown::HashMap; - -struct StationMeasurements { - min: f64, - max: f64, - temps: Vec, -} const DEFAULT_HASHMAP_LENGTH: usize = 10000; fn main() { - let mut stations: HashMap = + let now = Instant::now(); + let mut stations: HashMap = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); - let file = File::open("../measurements.txt").expect("File measurements.txt not found"); + let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); let reader = BufReader::new(file); - let mut line_num = 0; for line_result in reader.lines() { - line_num += 1; - if line_num % 250000 == 0 { - let formatted_line_num = format_nums(line_num); - println!("Calculated {formatted_line_num} stations"); - } let line = line_result.expect("could not read line"); let (station, temp) = line.split_once(';').unwrap(); - let temp = temp.parse().unwrap(); + let temp = onebrc::parse_temp(temp.as_bytes()); let measurements_option = stations.get_mut(station); if let Some(measurements) = measurements_option { - if temp < measurements.min { - measurements.min = temp; - } else if temp > measurements.max { - measurements.max = temp; - } - measurements.temps.push(temp); + measurements.update(temp); } else { - let measurements = StationMeasurements { + let measurements = onebrc::StationMeasurements { min: temp, max: temp, - temps: vec![temp], + count: 1, + sum: temp, }; stations.insert(station.to_owned(), measurements); } } - for (station, measurments) in stations { - let min = measurments.min; - let max = measurments.max; - let avg = avg(measurments.temps); - println!("{station}={min}/{max}/{avg}"); - } + let mut stations: Vec = stations.iter().map(|(&ref station, &ref measurements)| { + let measurements = measurements.to_string(); + format!("{station}={measurements}") + }).collect(); + stations.sort(); + let stations = stations.join(","); + println!("{{{stations}}}"); + println!("Time={} μs", now.elapsed().as_micros()); } -fn avg(temps: Vec) -> f64 { - let num_temps = temps.len() as f64; - let sum_temps: f64 = temps.iter().sum(); - sum_temps / num_temps -} - -fn format_nums(num: usize) -> String { - num.to_string() - .as_bytes() - .rchunks(3) - .rev() - .map(std::str::from_utf8) - .collect::, _>>() - .unwrap() - .join("_") -} diff --git a/src/main/rust/src/lib.rs b/src/main/rust/src/lib.rs new file mode 100644 index 0000000..694dbd5 --- /dev/null +++ b/src/main/rust/src/lib.rs @@ -0,0 +1,67 @@ +use std::fmt::Display; + +#[derive(Copy, Clone)] +pub struct StationMeasurements { + pub min: isize, + pub max: isize, + pub count: isize, + pub sum: isize, +} + +impl StationMeasurements { + pub fn update(&mut self, v: isize) { + self.min = self.min.min(v); + self.max = self.max.max(v); + self.count += 1; + self.sum += v; + } + + pub fn merge(&mut self, other: &Self) { + self.min = self.min.min(other.min); + self.max = self.max.max(other.max); + self.count += other.count; + self.sum += other.sum; + } +} + +impl Display for StationMeasurements { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let min = self.min as f64 / 10.0; + let max = self.max as f64 / 10.0; + let avg = (self.sum as f64 / self.count as f64) / 10.0; + write!(f, "{min}/{avg:.1}/{max}") + } +} + +pub fn format_nums(num: usize) -> String { + num.to_string() + .as_bytes() + .rchunks(3) + .rev() + .map(std::str::from_utf8) + .collect::, _>>() + .unwrap() + .join("_") +} + +#[inline] +pub const fn get_digit(b: u8) -> u32 { + (b as u32).wrapping_sub('0' as u32) +} + +#[inline] +pub fn parse_temp(bytes: &[u8]) -> isize { + let is_negative = bytes[0] == b'-'; + let as_decimal = match (is_negative, bytes.len()) { + (true, 4) => get_digit(bytes[1]) * 10 + get_digit(bytes[3]), + (true, 5) => get_digit(bytes[1]) * 100 + get_digit(bytes[2]) * 10 + get_digit(bytes[4]), + (false, 3) => get_digit(bytes[0]) * 10 + get_digit(bytes[2]), + (false, 4) => get_digit(bytes[0]) * 100 + get_digit(bytes[1]) * 10 + get_digit(bytes[3]), + _x => panic!(), + }; + if is_negative { + -(as_decimal as isize) + } else { + as_decimal as isize + } +} \ No newline at end of file diff --git a/src/main/rust/src/main.rs b/src/main/rust/src/main.rs index e7d54e3..bf63228 100644 --- a/src/main/rust/src/main.rs +++ b/src/main/rust/src/main.rs @@ -1,106 +1,8 @@ -use std::{ - fs::File, - io::{BufRead, BufReader}, - sync::{Arc, Mutex}, - thread, -}; - -use hashbrown::HashMap; - -#[derive(Clone, Copy)] -struct StationMeasurements { - min: f64, - max: f64, - count: usize, - sum: f64, -} fn main() { - let stations: Arc>> = - Arc::new(Mutex::new(HashMap::new())); - - //let cores: usize = std::thread::available_parallelism().unwrap().into(); - let cores: usize = 4; - - let chunk_length = 1_000_000_000 / cores; - let mut handles = vec![]; - for i in 0..cores { - let file = File::open("../measurements.txt").expect("File measurements.txt not found"); - let reader = BufReader::new(file); - let line_chunk = reader.lines().skip(chunk_length * i).take(chunk_length); - let stations_clone = stations.clone(); - let handle = thread::spawn(move || { - let mut t_stations: HashMap = HashMap::new(); - let mut line_num = 0; - for line in line_chunk { - line_num += 1; - let lineprint = chunk_length / 500; - if line_num % lineprint == 0 { - println!("Thread #{i}"); - let formatted_line_num = format_nums(line_num); - println!("Calculated {formatted_line_num} stations"); - } - let line = line.expect("could not read line"); - if line_num < 10 { - println!("{}", line); - } - let (station, temp) = line.split_once(';').expect("Error while splitting"); - let temp = temp.parse().expect("Error while parsing temperature"); - let measurements_option = t_stations.get_mut(station); - if let Some(measurements) = measurements_option { - if temp < measurements.min { - measurements.min = temp; - } else if temp > measurements.max { - measurements.max = temp; - } - measurements.count += 1; - measurements.sum += temp; - } else { - let measurements = StationMeasurements { - min: temp, - max: temp, - count: 1, - sum: temp, - }; - t_stations.insert(station.to_owned(), measurements); - } - } - let mut stations_guard = stations_clone.lock().expect("Error while locking"); - for (station, measurements) in t_stations.iter() { - let joined_measurements_options = stations_guard.get_mut(station.as_str()); - if let Some(joined_measurements) = joined_measurements_options { - if measurements.min < joined_measurements.min { - joined_measurements.min = measurements.min; - } else if measurements.max > joined_measurements.max { - joined_measurements.max = measurements.max; - } - joined_measurements.count += measurements.count; - joined_measurements.sum += measurements.sum; - } else { - stations_guard.insert(station.to_owned(), *measurements); - } - } - }); - handles.push(handle); - } - for handle in handles { - handle.join().unwrap(); - } - for (station, measurments) in stations.lock().unwrap().iter() { - let min = measurments.min; - let max = measurments.max; - let avg = measurments.sum / measurments.count as f64; - println!("{station}={min}/{max}/{avg:.2}"); - } -} - -fn format_nums(num: usize) -> String { - num.to_string() - .as_bytes() - .rchunks(3) - .rev() - .map(std::str::from_utf8) - .collect::, _>>() - .unwrap() - .join("_") -} + // let now = Instant::now(); + // let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); + // let reader = BufReader::new(file); + // reader.lines().for_each(|_x| {()}); + // println!("Time={} μs", now.elapsed().as_micros()); +} \ No newline at end of file