add solution I found which helped me get faster, unfortunately the solution itself is false (?)
This commit is contained in:
parent
0aa9d8be86
commit
07a8e7fc69
578
src/main/rust/Cargo.lock
generated
578
src/main/rust/Cargo.lock
generated
@ -17,12 +17,67 @@ version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is_terminal_polyfill",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
|
||||
dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.86"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
|
||||
|
||||
[[package]]
|
||||
name = "arbitrary-chunks"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ad8689a486416c401ea15715a4694de30054248ec627edbf31f49cb64ee4086"
|
||||
|
||||
[[package]]
|
||||
name = "async-channel"
|
||||
version = "2.3.1"
|
||||
@ -162,6 +217,28 @@ version = "2.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
|
||||
|
||||
[[package]]
|
||||
name = "bitvec"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
|
||||
dependencies = [
|
||||
"funty",
|
||||
"radium",
|
||||
"tap",
|
||||
"wyz",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-pseudorand"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2097358495d244a0643746f4d13eedba4608137008cf9dec54e53a3b700115a6"
|
||||
dependencies = [
|
||||
"chiapos-chacha8",
|
||||
"nanorand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "blocking"
|
||||
version = "1.6.1"
|
||||
@ -192,18 +269,42 @@ version = "3.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||
|
||||
[[package]]
|
||||
name = "cast"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6"
|
||||
dependencies = [
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chiapos-chacha8"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33f8be573a85f6c2bc1b8e43834c07e32f95e489b914bf856c0549c3c269cd0a"
|
||||
dependencies = [
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ciborium"
|
||||
version = "0.2.2"
|
||||
@ -231,6 +332,12 @@ dependencies = [
|
||||
"half",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cityhash-102-rs"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c61dc391dedb78a7117507d8efd692268859f279b97c04c7e4aab1235ef8301"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.5.13"
|
||||
@ -238,6 +345,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -246,8 +354,22 @@ version = "4.5.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "4.5.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -256,6 +378,32 @@ version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
|
||||
|
||||
[[package]]
|
||||
name = "colored"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common_traits"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6963264945d9ccb66c17ba1cc1af34d06812f45bc14c250dda5a1566905b0af0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"half",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "concurrent-queue"
|
||||
version = "2.5.0"
|
||||
@ -277,7 +425,7 @@ dependencies = [
|
||||
"clap",
|
||||
"criterion-plot",
|
||||
"is-terminal",
|
||||
"itertools",
|
||||
"itertools 0.10.5",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"oorandom",
|
||||
@ -298,7 +446,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
|
||||
dependencies = [
|
||||
"cast",
|
||||
"itertools",
|
||||
"itertools 0.10.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -381,12 +529,24 @@ version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
|
||||
|
||||
[[package]]
|
||||
name = "fastmurmur3"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d7e9bc68be4cdabbb8938140b01a8b5bc1191937f2c7e7ecc2fcebbe2d749df"
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
|
||||
|
||||
[[package]]
|
||||
name = "funty"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
|
||||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.30"
|
||||
@ -412,6 +572,26 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fxhash"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "half"
|
||||
version = "2.4.1"
|
||||
@ -422,6 +602,21 @@ dependencies = [
|
||||
"crunchy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashers"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2bca93b15ea5a746f220e56587f71e73c6165eab783df9e26590069953e3c30"
|
||||
dependencies = [
|
||||
"fxhash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.3.9"
|
||||
@ -434,6 +629,12 @@ version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc"
|
||||
|
||||
[[package]]
|
||||
name = "highway"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c706f1711006204c2ba8fb1a7bd55f689bbf7feca9ff40325206b5e140cff6df"
|
||||
|
||||
[[package]]
|
||||
name = "is-terminal"
|
||||
version = "0.4.12"
|
||||
@ -445,6 +646,12 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.5"
|
||||
@ -454,6 +661,15 @@ dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.11"
|
||||
@ -469,6 +685,12 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.158"
|
||||
@ -502,6 +724,30 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrohash"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3ba553cb19e2acbc54baa16faef215126243fe45e53357a3b2e9f4ebc7b0506c"
|
||||
|
||||
[[package]]
|
||||
name = "murmur2"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb585ade2549a017db2e35978b77c319214fa4b37cede841e27954dd6e8f3ca8"
|
||||
|
||||
[[package]]
|
||||
name = "murmur3"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b"
|
||||
|
||||
[[package]]
|
||||
name = "nanorand"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "729eb334247daa1803e0a094d0a5c55711b85571179f5ec6e53eccfdf7008958"
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
@ -522,14 +768,17 @@ name = "onebrc"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"clap",
|
||||
"colored",
|
||||
"criterion",
|
||||
"easy-parallel",
|
||||
"fast-float",
|
||||
"libc",
|
||||
"memchr",
|
||||
"memmap2",
|
||||
"ptr_hash",
|
||||
"rayon",
|
||||
"rustc-hash",
|
||||
"rustc-hash 2.0.0",
|
||||
"smol",
|
||||
]
|
||||
|
||||
@ -545,6 +794,12 @@ version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae"
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "947f833aaa585cf12b8ec7c0476c98784c49f33b861376ffc84ed92adebf2aba"
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.14"
|
||||
@ -605,6 +860,15 @@ dependencies = [
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
|
||||
dependencies = [
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.86"
|
||||
@ -614,6 +878,40 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ptr_hash"
|
||||
version = "0.1.1"
|
||||
source = "git+https://github.com/ragnargrootkoerkamp/ptrhash#727afbe0afa6939c756f89eb782de8c683e583fa"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bitvec",
|
||||
"cityhash-102-rs",
|
||||
"clap",
|
||||
"colored",
|
||||
"common_traits",
|
||||
"either",
|
||||
"fastmurmur3",
|
||||
"fastrand",
|
||||
"fxhash",
|
||||
"hashers",
|
||||
"highway",
|
||||
"itertools 0.11.0",
|
||||
"lazy_static",
|
||||
"metrohash",
|
||||
"murmur2",
|
||||
"murmur3",
|
||||
"radsort",
|
||||
"rand",
|
||||
"rand_chacha",
|
||||
"rayon",
|
||||
"rdst",
|
||||
"rustc-hash 1.1.0",
|
||||
"sucds",
|
||||
"tempfile",
|
||||
"wyhash",
|
||||
"xxhash-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.36"
|
||||
@ -623,6 +921,48 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "radium"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
|
||||
|
||||
[[package]]
|
||||
name = "radsort"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "019b4b213425016d7d84a153c4c73afb0946fbb4840e4eece7ba8848b9d6da22"
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.10.0"
|
||||
@ -643,6 +983,21 @@ dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rdst"
|
||||
version = "0.20.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e7970b4e577b76a96d5e56b5f6662b66d1a4e1f5bb026ee118fc31b373c2752"
|
||||
dependencies = [
|
||||
"arbitrary-chunks",
|
||||
"block-pseudorand",
|
||||
"criterion",
|
||||
"partition",
|
||||
"rayon",
|
||||
"tikv-jemallocator",
|
||||
"voracious_radix_sort",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.10.5"
|
||||
@ -672,6 +1027,12 @@ version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "2.0.0"
|
||||
@ -738,6 +1099,12 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.2"
|
||||
@ -773,6 +1140,22 @@ dependencies = [
|
||||
"futures-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "sucds"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d53d46182afe6ed822a94c54a532dc0d59691a8f49226bdc4596529ca864cdd6"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.72"
|
||||
@ -784,6 +1167,45 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tap"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"fastrand",
|
||||
"once_cell",
|
||||
"rustix",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemalloc-sys"
|
||||
version = "0.5.4+5.3.0-patched"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemallocator"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"tikv-jemalloc-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinytemplate"
|
||||
version = "1.2.1"
|
||||
@ -816,6 +1238,21 @@ version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "voracious_radix_sort"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "446e7ffcb6c27a71d05af7e51ef2ee5b71c48424b122a832f2439651e1914899"
|
||||
dependencies = [
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.5.0"
|
||||
@ -826,6 +1263,12 @@ dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.92"
|
||||
@ -899,13 +1342,22 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
|
||||
dependencies = [
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -914,7 +1366,22 @@ version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.48.5",
|
||||
"windows_aarch64_msvc 0.48.5",
|
||||
"windows_i686_gnu 0.48.5",
|
||||
"windows_i686_msvc 0.48.5",
|
||||
"windows_x86_64_gnu 0.48.5",
|
||||
"windows_x86_64_gnullvm 0.48.5",
|
||||
"windows_x86_64_msvc 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -923,28 +1390,46 @@ version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_aarch64_gnullvm 0.52.6",
|
||||
"windows_aarch64_msvc 0.52.6",
|
||||
"windows_i686_gnu 0.52.6",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
"windows_i686_msvc 0.52.6",
|
||||
"windows_x86_64_gnu 0.52.6",
|
||||
"windows_x86_64_gnullvm 0.52.6",
|
||||
"windows_x86_64_msvc 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.52.6"
|
||||
@ -957,26 +1442,95 @@ version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
|
||||
[[package]]
|
||||
name = "wyhash"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295"
|
||||
dependencies = [
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wyz"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
|
||||
dependencies = [
|
||||
"tap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xxhash-rust"
|
||||
version = "0.8.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a5cbf750400958819fb6178eaa83bee5cd9c29a26a40cc241df8c70fdd46984"
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.7.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.7.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
@ -15,6 +15,9 @@ rustc-hash = "2.0.0"
|
||||
libc = "0.2.158"
|
||||
smol = "2.0.1"
|
||||
easy-parallel = "3.3.1"
|
||||
clap = { version = "4.5.13", features = ["derive"] }
|
||||
colored = "2.1.0"
|
||||
ptr_hash = { git = "https://github.com/ragnargrootkoerkamp/ptrhash", default_features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
@ -22,6 +25,7 @@ criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
[features]
|
||||
json = []
|
||||
unsafe = []
|
||||
no_pdep = []
|
||||
|
||||
[[bench]]
|
||||
name = "reference_impl"
|
||||
|
87
src/main/rust/src/bin/rgk.rs
Normal file
87
src/main/rust/src/bin/rgk.rs
Normal file
@ -0,0 +1,87 @@
|
||||
use std::thread::available_parallelism;
|
||||
use clap::Parser;
|
||||
use colored::Colorize;
|
||||
use memmap2::Mmap;
|
||||
use onebrc::implementations::rgk::{find_city_names, format, run_parallel, to_str, Args, Record, S};
|
||||
|
||||
fn main() {
|
||||
let args = Args::parse();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
let filename = args.input.unwrap_or("../../../measurements.txt".to_string());
|
||||
let mmap: Mmap;
|
||||
let data;
|
||||
{
|
||||
let file = std::fs::File::open(filename).unwrap();
|
||||
mmap = unsafe { Mmap::map(&file).unwrap() };
|
||||
data = &*mmap;
|
||||
}
|
||||
|
||||
// Guaranteed to be aligned for SIMD.
|
||||
let offset = unsafe { data.align_to::<S>().0.len() };
|
||||
let data = &data[offset..];
|
||||
|
||||
// Build a perfect hash function on the cities found in the first 100k characters.
|
||||
let names = find_city_names(&data[..4000000]);
|
||||
|
||||
if args.stats {
|
||||
eprintln!("Num cities: {}", names.len());
|
||||
let mut lens = vec![0; 102];
|
||||
for n in &names {
|
||||
if *n.last().unwrap() == b';' {
|
||||
continue;
|
||||
}
|
||||
lens[n.len()] += 1;
|
||||
}
|
||||
for (len, count) in lens.iter().enumerate() {
|
||||
if *count != 0 {
|
||||
eprintln!("{}: {}", len, count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let phf = run_parallel(
|
||||
data,
|
||||
&names,
|
||||
args.threads
|
||||
.unwrap_or(available_parallelism().unwrap().into()),
|
||||
);
|
||||
|
||||
if args.print {
|
||||
print!("{{");
|
||||
let mut first = true;
|
||||
|
||||
let mut keys = phf.keys.clone();
|
||||
keys.sort_by(|kl, kr| to_str(kl).cmp(to_str(kr)));
|
||||
|
||||
for name in &keys {
|
||||
if *name.last().unwrap() != b';' {
|
||||
continue;
|
||||
}
|
||||
let namepos = &name[..name.len() - 1];
|
||||
|
||||
let rpos = phf.index(namepos);
|
||||
let rneg = phf.index(name);
|
||||
let (min, avg, max) = Record::merge_pos_neg(rpos, rneg);
|
||||
|
||||
if !first {
|
||||
print!(", ");
|
||||
}
|
||||
first = false;
|
||||
|
||||
print!(
|
||||
"{}={}/{}/{}",
|
||||
to_str(namepos),
|
||||
format(min),
|
||||
format(avg),
|
||||
format(max)
|
||||
);
|
||||
}
|
||||
println!("}}");
|
||||
}
|
||||
|
||||
eprintln!(
|
||||
"total: {}",
|
||||
format!("{:>5.2?}", start.elapsed()).bold().green()
|
||||
);
|
||||
}
|
@ -7,3 +7,4 @@ pub mod phcs;
|
||||
pub mod reference_impl;
|
||||
pub mod single_thread;
|
||||
pub mod smol;
|
||||
pub mod rgk;
|
||||
|
443
src/main/rust/src/implementations/rgk.rs
Normal file
443
src/main/rust/src/implementations/rgk.rs
Normal file
@ -0,0 +1,443 @@
|
||||
use ptr_hash::PtrHashParams;
|
||||
use std::{
|
||||
simd::{cmp::SimdPartialEq, Simd},
|
||||
vec::Vec,
|
||||
};
|
||||
use rustc_hash::FxHashSet;
|
||||
|
||||
type V = i32;
|
||||
|
||||
type PtrHash = ptr_hash::DefaultPtrHash<ptr_hash::hash::FxHash, u64>;
|
||||
|
||||
pub struct Phf {
|
||||
pub ptr_hash: PtrHash,
|
||||
pub keys: Vec<Vec<u8>>,
|
||||
pub slots: Vec<Record>,
|
||||
}
|
||||
|
||||
impl Phf {
|
||||
fn new(mut keys: Vec<Vec<u8>>) -> Self {
|
||||
keys.sort();
|
||||
|
||||
let num_slots = keys.len() * 5 / 2;
|
||||
let params = ptr_hash::PtrHashParams {
|
||||
alpha: 0.9,
|
||||
c: 1.5,
|
||||
slots_per_part: num_slots,
|
||||
..PtrHashParams::default()
|
||||
};
|
||||
|
||||
let mut hashes: Vec<u64> = keys.iter().map(|key| hash_name(key)).collect();
|
||||
hashes.sort();
|
||||
for (x, y) in hashes.iter().zip(hashes.iter().skip(1)) {
|
||||
assert!(*x != *y, "DUPLICATE HASH");
|
||||
}
|
||||
|
||||
let ptr_hash = PtrHash::new(&hashes, params);
|
||||
|
||||
let slots = vec![Record::default(); num_slots];
|
||||
|
||||
Self {
|
||||
ptr_hash,
|
||||
keys,
|
||||
slots,
|
||||
}
|
||||
}
|
||||
fn compute_index(&self, hash: u64) -> usize {
|
||||
self.ptr_hash.index_single_part(&hash)
|
||||
}
|
||||
fn get_index_mut(&mut self, idx: usize) -> &mut Record {
|
||||
&mut self.slots[idx]
|
||||
}
|
||||
fn index_hash_mut(&mut self, hash: u64) -> &mut Record {
|
||||
&mut self.slots[self.ptr_hash.index_single_part(&hash)]
|
||||
}
|
||||
pub fn index<'b>(&'b self, key: &[u8]) -> &'b Record {
|
||||
let hash = hash_name(key);
|
||||
&self.slots[self.compute_index(hash)]
|
||||
}
|
||||
fn index_mut<'b>(&'b mut self, key: &[u8]) -> &'b mut Record {
|
||||
self.index_hash_mut(hash_name(key))
|
||||
}
|
||||
fn merge(&mut self, r: Self) {
|
||||
// TODO: If key sets are equal or one is a subset of the other, merge
|
||||
// smaller into larger.
|
||||
let mut new_keys = vec![];
|
||||
let mut i1 = 0;
|
||||
let mut i2 = 0;
|
||||
while i1 < self.keys.len() && i2 < r.keys.len() {
|
||||
if self.keys[i1] == r.keys[i2] {
|
||||
new_keys.push(self.keys[i1].clone());
|
||||
i1 += 1;
|
||||
i2 += 1;
|
||||
continue;
|
||||
}
|
||||
if self.keys[i1] < r.keys[i2] {
|
||||
new_keys.push(self.keys[i1].clone());
|
||||
i1 += 1;
|
||||
continue;
|
||||
}
|
||||
if self.keys[i1] > r.keys[i2] {
|
||||
new_keys.push(r.keys[i2].clone());
|
||||
i2 += 1;
|
||||
continue;
|
||||
}
|
||||
panic!();
|
||||
}
|
||||
while i1 < self.keys.len() {
|
||||
new_keys.push(self.keys[i1].clone());
|
||||
i1 += 1;
|
||||
}
|
||||
while i2 < r.keys.len() {
|
||||
new_keys.push(r.keys[i2].clone());
|
||||
i2 += 1;
|
||||
}
|
||||
let mut new_phf = Self::new(new_keys);
|
||||
for key in &self.keys {
|
||||
new_phf.index_mut(key).merge(self.index(key));
|
||||
}
|
||||
for key in &r.keys {
|
||||
new_phf.index_mut(key).merge(r.index(key));
|
||||
}
|
||||
*self = new_phf;
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[repr(align(32))]
|
||||
pub struct Record {
|
||||
pub count: u64,
|
||||
// Storing these as two u32 is nice, because they are read as a single u64.
|
||||
/// Byte representation of string ~b"bc.d" or ~b"\0c.d".
|
||||
pub min: u32,
|
||||
/// Byte representation of string b"bc.d" or b"\0c.d".
|
||||
pub max: u32,
|
||||
pub sum: u64,
|
||||
}
|
||||
|
||||
impl Record {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
count: 0,
|
||||
min: 0,
|
||||
max: 0,
|
||||
sum: 0,
|
||||
}
|
||||
}
|
||||
fn add(&mut self, raw_value: u32, value: u64) {
|
||||
// assert2::debug_assert!(value < 1000);
|
||||
self.count += 1;
|
||||
self.sum += value;
|
||||
// See https://en.algorithmica.org/hpc/algorithms/argmin/
|
||||
if raw_value < self.min {
|
||||
self.min = raw_value;
|
||||
}
|
||||
if raw_value > self.max {
|
||||
self.max = raw_value;
|
||||
}
|
||||
}
|
||||
fn merge(&mut self, other: &Self) {
|
||||
self.count += other.count;
|
||||
self.sum += other.sum_to_val() as u64;
|
||||
self.min = self.min.min(other.min);
|
||||
self.max = self.max.max(other.max);
|
||||
}
|
||||
fn sum_to_val(&self) -> V {
|
||||
let m = (1 << 21) - 1;
|
||||
((self.sum & m) + 10 * ((self.sum >> 21) & m) + 100 * ((self.sum >> 42) & m)) as _
|
||||
}
|
||||
/// Return (min, avg, max)
|
||||
pub fn merge_pos_neg(pos: &Record, neg: &Record) -> (V, V, V) {
|
||||
let pos_sum = pos.sum as V;
|
||||
let neg_sum = neg.sum as V;
|
||||
let sum = pos_sum - neg_sum;
|
||||
let count = (pos.count + neg.count) as V;
|
||||
// round to nearest
|
||||
let avg = (sum + count / 2).div_floor(count);
|
||||
|
||||
let pos_max = raw_to_value(pos.max);
|
||||
let neg_max = -raw_to_value(neg.min);
|
||||
let max = pos_max.max(neg_max);
|
||||
|
||||
let pos_min = raw_to_value(pos.min);
|
||||
let neg_min = -raw_to_value(neg.max);
|
||||
let min = pos_min.min(neg_min);
|
||||
|
||||
(min, avg, max)
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads raw bytes and masks the ; and the b'0'=0x30.
|
||||
/// Returns something of the form 0x0b0c..0d or 0x000c..0d
|
||||
fn parse_to_raw(data: &[u8], start: usize, end: usize) -> u32 {
|
||||
let raw = u32::from_be_bytes(unsafe { *data.get_unchecked(start..).as_ptr().cast() });
|
||||
raw >> (8 * (4 - (end - start)))
|
||||
}
|
||||
|
||||
fn raw_to_pdep(raw: u32) -> u64 {
|
||||
#[cfg(feature = "no_pdep")]
|
||||
{
|
||||
let raw = raw as u64;
|
||||
(raw & 15) | ((raw & (15 << 16)) << (21 - 16)) | ((raw & (15 << 24)) << (42 - 24))
|
||||
}
|
||||
#[cfg(not(feature = "no_pdep"))]
|
||||
{
|
||||
let mask = 0x0f0f000f;
|
||||
let raw = raw & mask;
|
||||
// input 0011bbbb0011cccc........0011dddd
|
||||
// 0b bbbb xxxxcccc yyyyyyyyyyyydddd // Deposit here
|
||||
// 0b 1111 1111 1111 // Mask out trash using &
|
||||
let pdep = 0b0000000000000000001111000000000000011111111000001111111111111111u64;
|
||||
unsafe { core::arch::x86_64::_pdep_u64(raw as u64, pdep) }
|
||||
}
|
||||
}
|
||||
|
||||
fn raw_to_value(v: u32) -> V {
|
||||
let mask = 0x0f0f000f;
|
||||
let bytes = (v & mask).to_be_bytes();
|
||||
// s = bc.d
|
||||
let b = bytes[0] as V;
|
||||
let c = bytes[1] as V;
|
||||
let d = bytes[3] as V;
|
||||
b as V * 100 * (bytes[0] != 0) as V + c as V * 10 + d as V
|
||||
}
|
||||
|
||||
pub fn format(v: V) -> String {
|
||||
format!("{:.1}", v as f64 / 10.0)
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
fn hash_name(name: &[u8]) -> u64 {
|
||||
// Hash the first and last 8 bytes.
|
||||
// TODO: More robust hash that actually uses all characters.
|
||||
let head: [u8; 8] = unsafe { *name.get_unchecked(..8).split_first_chunk().unwrap().0 };
|
||||
let tail: [u8; 8] = unsafe {
|
||||
*name
|
||||
.get_unchecked(name.len().wrapping_sub(8)..)
|
||||
.split_first_chunk()
|
||||
.unwrap()
|
||||
.0
|
||||
};
|
||||
let shift = 64usize.saturating_sub(8 * name.len());
|
||||
let khead = u64::from_ne_bytes(head) << shift;
|
||||
let ktail = u64::from_ne_bytes(tail) >> shift;
|
||||
khead.wrapping_add(ktail)
|
||||
}
|
||||
|
||||
/// Number of SIMD lanes. AVX2 has 256 bits, so 32 lanes.
|
||||
const L: usize = 32;
|
||||
/// The Simd type.
|
||||
pub type S = Simd<u8, L>;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
struct State {
|
||||
start: usize,
|
||||
sep: usize,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
/// Find the regions between \n and ; (names) and between ; and \n (values),
|
||||
/// and calls `callback` for each line.
|
||||
#[inline(always)]
|
||||
fn iter_lines<'a>(
|
||||
mut data: &'a [u8],
|
||||
mut callback: impl FnMut(&'a [u8], State, State, State, State),
|
||||
) {
|
||||
// Make sure that the out-of-bounds reads we do are OK.
|
||||
data = &data[..data.len() - 32];
|
||||
|
||||
let sep = S::splat(b';');
|
||||
let end = S::splat(b'\n');
|
||||
|
||||
let find = |last: usize, sep: S| {
|
||||
let simd = S::from_array(unsafe { *data.get_unchecked(last..).as_ptr().cast() });
|
||||
let eq = sep.simd_eq(simd).to_bitmask() as u32;
|
||||
let offset = eq.trailing_zeros() as usize;
|
||||
last + offset
|
||||
};
|
||||
// Modified to be able to search regions longer than 32.
|
||||
let find_long = |mut last: usize, sep: S| {
|
||||
let simd = S::from_array(unsafe { *data.get_unchecked(last..).as_ptr().cast() });
|
||||
let mut eq = sep.simd_eq(simd).to_bitmask() as u32;
|
||||
if eq == 0 {
|
||||
while eq == 0 {
|
||||
last += 32;
|
||||
let simd = S::from_array(unsafe { *data.get_unchecked(last..).as_ptr().cast() });
|
||||
eq = sep.simd_eq(simd).to_bitmask() as u32;
|
||||
}
|
||||
}
|
||||
let offset = eq.trailing_zeros() as usize;
|
||||
last + offset
|
||||
};
|
||||
|
||||
let init_state = |idx: usize| {
|
||||
let first_end = find_long(idx, end);
|
||||
State {
|
||||
start: first_end + 1,
|
||||
sep: first_end + 1,
|
||||
end: 0,
|
||||
}
|
||||
};
|
||||
|
||||
let mut state0 = init_state(0);
|
||||
let mut state1 = init_state(data.len() / 4);
|
||||
let mut state2 = init_state(2 * data.len() / 4);
|
||||
let mut state3 = init_state(3 * data.len() / 4);
|
||||
|
||||
// Duplicate each line for each input state.
|
||||
macro_rules! step {
|
||||
[$($s:expr),*] => {
|
||||
$($s.sep = find_long($s.sep + 1, sep) ;)*
|
||||
$($s.end = find($s.sep + 1, end) ;)*
|
||||
callback(data, $($s, )*);
|
||||
$($s.start = $s.end + 1;)*
|
||||
}
|
||||
}
|
||||
|
||||
while state3.start < data.len() {
|
||||
step!(state0, state1, state2, state3);
|
||||
}
|
||||
}
|
||||
|
||||
fn run(data: &[u8], keys: &[Vec<u8>]) -> Phf {
|
||||
// Each thread has its own accumulator.
|
||||
let mut h = Phf::new(keys.to_vec());
|
||||
iter_lines(
|
||||
data,
|
||||
|data, mut s0: State, mut s1: State, mut s2: State, mut s3: State| {
|
||||
unsafe {
|
||||
// If value is negative, extend name by one character.
|
||||
s0.sep += (data.get_unchecked(s0.sep + 1) == &b'-') as usize;
|
||||
let name0 = data.get_unchecked(s0.start..s0.sep);
|
||||
|
||||
s1.sep += (data.get_unchecked(s1.sep + 1) == &b'-') as usize;
|
||||
let name1 = data.get_unchecked(s1.start..s1.sep);
|
||||
|
||||
s2.sep += (data.get_unchecked(s2.sep + 1) == &b'-') as usize;
|
||||
let name2 = data.get_unchecked(s2.start..s2.sep);
|
||||
|
||||
s3.sep += (data.get_unchecked(s3.sep + 1) == &b'-') as usize;
|
||||
let name3 = data.get_unchecked(s3.start..s3.sep);
|
||||
|
||||
let raw0 = parse_to_raw(data, s0.sep + 1, s0.end);
|
||||
let raw1 = parse_to_raw(data, s1.sep + 1, s1.end);
|
||||
let raw2 = parse_to_raw(data, s2.sep + 1, s2.end);
|
||||
let raw3 = parse_to_raw(data, s3.sep + 1, s3.end);
|
||||
|
||||
let h0 = hash_name(name0);
|
||||
let h1 = hash_name(name1);
|
||||
let h2 = hash_name(name2);
|
||||
let h3 = hash_name(name3);
|
||||
|
||||
let idx0 = h.compute_index(h0);
|
||||
let idx1 = h.compute_index(h1);
|
||||
let idx2 = h.compute_index(h2);
|
||||
let idx3 = h.compute_index(h3);
|
||||
|
||||
h.get_index_mut(idx0).add(raw0, raw_to_pdep(raw0));
|
||||
h.get_index_mut(idx1).add(raw1, raw_to_pdep(raw1));
|
||||
h.get_index_mut(idx2).add(raw2, raw_to_pdep(raw2));
|
||||
h.get_index_mut(idx3).add(raw3, raw_to_pdep(raw3));
|
||||
}
|
||||
},
|
||||
);
|
||||
h
|
||||
}
|
||||
|
||||
pub fn run_parallel(data: &[u8], keys: &[Vec<u8>], num_threads: usize) -> Phf {
|
||||
if num_threads == 0 {
|
||||
return run(data, keys);
|
||||
}
|
||||
|
||||
let phf = std::sync::Mutex::new(Phf::new(keys.to_vec()));
|
||||
|
||||
// Spawn one thread per core.
|
||||
std::thread::scope(|s| {
|
||||
let chunks = data.chunks(data.len() / num_threads + 1);
|
||||
for chunk in chunks {
|
||||
s.spawn(|| {
|
||||
// Each thread has its own accumulator.
|
||||
let thread_phf = run(chunk, keys);
|
||||
|
||||
// Merge results.
|
||||
phf.lock().unwrap().merge(thread_phf);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
phf.into_inner().unwrap()
|
||||
}
|
||||
|
||||
pub fn to_str(name: &[u8]) -> &str {
|
||||
std::str::from_utf8(name).unwrap()
|
||||
}
|
||||
|
||||
/// Returns a list of city names found in data.
|
||||
/// Each city is returned twice, once as `<city>` and once as `<city>;`,
|
||||
/// with the latter being used to accumulate negative temperatures.
|
||||
#[inline(never)]
|
||||
pub fn find_city_names(data: &[u8]) -> Vec<Vec<u8>> {
|
||||
let mut cities = FxHashSet::default();
|
||||
|
||||
let mut callback = |data: &[u8], state: State| {
|
||||
let State { start, sep, .. } = state;
|
||||
let name = unsafe { data.get_unchecked(start..sep) };
|
||||
cities.insert(name.to_vec());
|
||||
|
||||
// Do the same for the name with ; appended.
|
||||
let name = unsafe { data.get_unchecked(start..sep + 1) };
|
||||
cities.insert(name.to_vec());
|
||||
};
|
||||
iter_lines(data, |d, s0, s1, s2, s3| {
|
||||
flatten_callback(d, s0, s1, s2, s3, &mut callback)
|
||||
});
|
||||
|
||||
let mut cities: Vec<_> = cities.into_iter().collect();
|
||||
cities.sort();
|
||||
cities
|
||||
}
|
||||
|
||||
fn flatten_callback<'a>(
|
||||
data: &'a [u8],
|
||||
s0: State,
|
||||
s1: State,
|
||||
s2: State,
|
||||
s3: State,
|
||||
callback: &mut impl FnMut(&'a [u8], State),
|
||||
) {
|
||||
callback(data, s0);
|
||||
callback(data, s1);
|
||||
callback(data, s2);
|
||||
callback(data, s3);
|
||||
}
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
pub struct Args {
|
||||
pub input: Option<String>,
|
||||
|
||||
#[clap(short = 'j', long)]
|
||||
pub threads: Option<usize>,
|
||||
|
||||
#[clap(long)]
|
||||
pub print: bool,
|
||||
|
||||
#[clap(long)]
|
||||
pub stats: bool,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
#[test]
|
||||
fn parse_raw() {
|
||||
use super::*;
|
||||
let d = b"12.3";
|
||||
let raw = parse_to_raw(d, 0, 4);
|
||||
let v = raw_to_value(raw);
|
||||
assert_eq!(v, 123);
|
||||
|
||||
let d = b"12.3";
|
||||
let raw = parse_to_raw(d, 1, 4);
|
||||
let v = raw_to_value(raw);
|
||||
assert_eq!(v, 23);
|
||||
}
|
||||
}
|
@ -2,6 +2,7 @@
|
||||
#![feature(portable_simd)]
|
||||
#![feature(slice_split_once)]
|
||||
#![feature(hash_raw_entry)]
|
||||
#![feature(int_roundings)]
|
||||
|
||||
pub mod implementations;
|
||||
pub mod models;
|
||||
|
Loading…
Reference in New Issue
Block a user