add solution I found which helped me get faster, unfortunately the solution itself is false (?)

This commit is contained in:
Fabian Schmidt 2024-08-28 13:09:47 +02:00
parent 0aa9d8be86
commit 07a8e7fc69
6 changed files with 1102 additions and 12 deletions

578
src/main/rust/Cargo.lock generated
View File

@ -17,12 +17,67 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "anstream"
version = "0.6.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]] [[package]]
name = "anstyle" name = "anstyle"
version = "1.0.8" version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
[[package]]
name = "anstyle-parse"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
dependencies = [
"anstyle",
"windows-sys 0.52.0",
]
[[package]]
name = "anyhow"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
[[package]]
name = "arbitrary-chunks"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ad8689a486416c401ea15715a4694de30054248ec627edbf31f49cb64ee4086"
[[package]] [[package]]
name = "async-channel" name = "async-channel"
version = "2.3.1" version = "2.3.1"
@ -162,6 +217,28 @@ version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
[[package]]
name = "bitvec"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
dependencies = [
"funty",
"radium",
"tap",
"wyz",
]
[[package]]
name = "block-pseudorand"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2097358495d244a0643746f4d13eedba4608137008cf9dec54e53a3b700115a6"
dependencies = [
"chiapos-chacha8",
"nanorand",
]
[[package]] [[package]]
name = "blocking" name = "blocking"
version = "1.6.1" version = "1.6.1"
@ -192,18 +269,42 @@ version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]] [[package]]
name = "cast" name = "cast"
version = "0.3.0" version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6"
dependencies = [
"shlex",
]
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.0" version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chiapos-chacha8"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33f8be573a85f6c2bc1b8e43834c07e32f95e489b914bf856c0549c3c269cd0a"
dependencies = [
"rayon",
]
[[package]] [[package]]
name = "ciborium" name = "ciborium"
version = "0.2.2" version = "0.2.2"
@ -231,6 +332,12 @@ dependencies = [
"half", "half",
] ]
[[package]]
name = "cityhash-102-rs"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c61dc391dedb78a7117507d8efd692268859f279b97c04c7e4aab1235ef8301"
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.5.13" version = "4.5.13"
@ -238,6 +345,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc"
dependencies = [ dependencies = [
"clap_builder", "clap_builder",
"clap_derive",
] ]
[[package]] [[package]]
@ -246,8 +354,22 @@ version = "4.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99"
dependencies = [ dependencies = [
"anstream",
"anstyle", "anstyle",
"clap_lex", "clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
] ]
[[package]] [[package]]
@ -256,6 +378,32 @@ version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
[[package]]
name = "colorchoice"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
[[package]]
name = "colored"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8"
dependencies = [
"lazy_static",
"windows-sys 0.48.0",
]
[[package]]
name = "common_traits"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6963264945d9ccb66c17ba1cc1af34d06812f45bc14c250dda5a1566905b0af0"
dependencies = [
"anyhow",
"half",
]
[[package]] [[package]]
name = "concurrent-queue" name = "concurrent-queue"
version = "2.5.0" version = "2.5.0"
@ -277,7 +425,7 @@ dependencies = [
"clap", "clap",
"criterion-plot", "criterion-plot",
"is-terminal", "is-terminal",
"itertools", "itertools 0.10.5",
"num-traits", "num-traits",
"once_cell", "once_cell",
"oorandom", "oorandom",
@ -298,7 +446,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [ dependencies = [
"cast", "cast",
"itertools", "itertools 0.10.5",
] ]
[[package]] [[package]]
@ -381,12 +529,24 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
[[package]]
name = "fastmurmur3"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d7e9bc68be4cdabbb8938140b01a8b5bc1191937f2c7e7ecc2fcebbe2d749df"
[[package]] [[package]]
name = "fastrand" name = "fastrand"
version = "2.1.0" version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
[[package]]
name = "funty"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
[[package]] [[package]]
name = "futures-core" name = "futures-core"
version = "0.3.30" version = "0.3.30"
@ -412,6 +572,26 @@ dependencies = [
"pin-project-lite", "pin-project-lite",
] ]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]]
name = "getrandom"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]] [[package]]
name = "half" name = "half"
version = "2.4.1" version = "2.4.1"
@ -422,6 +602,21 @@ dependencies = [
"crunchy", "crunchy",
] ]
[[package]]
name = "hashers"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2bca93b15ea5a746f220e56587f71e73c6165eab783df9e26590069953e3c30"
dependencies = [
"fxhash",
]
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]] [[package]]
name = "hermit-abi" name = "hermit-abi"
version = "0.3.9" version = "0.3.9"
@ -434,6 +629,12 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc"
[[package]]
name = "highway"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c706f1711006204c2ba8fb1a7bd55f689bbf7feca9ff40325206b5e140cff6df"
[[package]] [[package]]
name = "is-terminal" name = "is-terminal"
version = "0.4.12" version = "0.4.12"
@ -445,6 +646,12 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]] [[package]]
name = "itertools" name = "itertools"
version = "0.10.5" version = "0.10.5"
@ -454,6 +661,15 @@ dependencies = [
"either", "either",
] ]
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]] [[package]]
name = "itoa" name = "itoa"
version = "1.0.11" version = "1.0.11"
@ -469,6 +685,12 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.158" version = "0.2.158"
@ -502,6 +724,30 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "metrohash"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ba553cb19e2acbc54baa16faef215126243fe45e53357a3b2e9f4ebc7b0506c"
[[package]]
name = "murmur2"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb585ade2549a017db2e35978b77c319214fa4b37cede841e27954dd6e8f3ca8"
[[package]]
name = "murmur3"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b"
[[package]]
name = "nanorand"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "729eb334247daa1803e0a094d0a5c55711b85571179f5ec6e53eccfdf7008958"
[[package]] [[package]]
name = "num-traits" name = "num-traits"
version = "0.2.19" version = "0.2.19"
@ -522,14 +768,17 @@ name = "onebrc"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"bstr", "bstr",
"clap",
"colored",
"criterion", "criterion",
"easy-parallel", "easy-parallel",
"fast-float", "fast-float",
"libc", "libc",
"memchr", "memchr",
"memmap2", "memmap2",
"ptr_hash",
"rayon", "rayon",
"rustc-hash", "rustc-hash 2.0.0",
"smol", "smol",
] ]
@ -545,6 +794,12 @@ version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae"
[[package]]
name = "partition"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "947f833aaa585cf12b8ec7c0476c98784c49f33b861376ffc84ed92adebf2aba"
[[package]] [[package]]
name = "pin-project-lite" name = "pin-project-lite"
version = "0.2.14" version = "0.2.14"
@ -605,6 +860,15 @@ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
[[package]]
name = "ppv-lite86"
version = "0.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
dependencies = [
"zerocopy",
]
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.86" version = "1.0.86"
@ -614,6 +878,40 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "ptr_hash"
version = "0.1.1"
source = "git+https://github.com/ragnargrootkoerkamp/ptrhash#727afbe0afa6939c756f89eb782de8c683e583fa"
dependencies = [
"anyhow",
"bitvec",
"cityhash-102-rs",
"clap",
"colored",
"common_traits",
"either",
"fastmurmur3",
"fastrand",
"fxhash",
"hashers",
"highway",
"itertools 0.11.0",
"lazy_static",
"metrohash",
"murmur2",
"murmur3",
"radsort",
"rand",
"rand_chacha",
"rayon",
"rdst",
"rustc-hash 1.1.0",
"sucds",
"tempfile",
"wyhash",
"xxhash-rust",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.36" version = "1.0.36"
@ -623,6 +921,48 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "radium"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
[[package]]
name = "radsort"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "019b4b213425016d7d84a153c4c73afb0946fbb4840e4eece7ba8848b9d6da22"
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.10.0" version = "1.10.0"
@ -643,6 +983,21 @@ dependencies = [
"crossbeam-utils", "crossbeam-utils",
] ]
[[package]]
name = "rdst"
version = "0.20.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e7970b4e577b76a96d5e56b5f6662b66d1a4e1f5bb026ee118fc31b373c2752"
dependencies = [
"arbitrary-chunks",
"block-pseudorand",
"criterion",
"partition",
"rayon",
"tikv-jemallocator",
"voracious_radix_sort",
]
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.10.5" version = "1.10.5"
@ -672,6 +1027,12 @@ version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]] [[package]]
name = "rustc-hash" name = "rustc-hash"
version = "2.0.0" version = "2.0.0"
@ -738,6 +1099,12 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]] [[package]]
name = "signal-hook-registry" name = "signal-hook-registry"
version = "1.4.2" version = "1.4.2"
@ -773,6 +1140,22 @@ dependencies = [
"futures-lite", "futures-lite",
] ]
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "sucds"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d53d46182afe6ed822a94c54a532dc0d59691a8f49226bdc4596529ca864cdd6"
dependencies = [
"anyhow",
"num-traits",
]
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.72" version = "2.0.72"
@ -784,6 +1167,45 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "tap"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "tempfile"
version = "3.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64"
dependencies = [
"cfg-if",
"fastrand",
"once_cell",
"rustix",
"windows-sys 0.59.0",
]
[[package]]
name = "tikv-jemalloc-sys"
version = "0.5.4+5.3.0-patched"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "tikv-jemallocator"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca"
dependencies = [
"libc",
"tikv-jemalloc-sys",
]
[[package]] [[package]]
name = "tinytemplate" name = "tinytemplate"
version = "1.2.1" version = "1.2.1"
@ -816,6 +1238,21 @@ version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "voracious_radix_sort"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "446e7ffcb6c27a71d05af7e51ef2ee5b71c48424b122a832f2439651e1914899"
dependencies = [
"rayon",
]
[[package]] [[package]]
name = "walkdir" name = "walkdir"
version = "2.5.0" version = "2.5.0"
@ -826,6 +1263,12 @@ dependencies = [
"winapi-util", "winapi-util",
] ]
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]] [[package]]
name = "wasm-bindgen" name = "wasm-bindgen"
version = "0.2.92" version = "0.2.92"
@ -899,13 +1342,22 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets 0.48.5",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.52.0" version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [ dependencies = [
"windows-targets", "windows-targets 0.52.6",
] ]
[[package]] [[package]]
@ -914,7 +1366,22 @@ version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [ dependencies = [
"windows-targets", "windows-targets 0.52.6",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
] ]
[[package]] [[package]]
@ -923,28 +1390,46 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [ dependencies = [
"windows_aarch64_gnullvm", "windows_aarch64_gnullvm 0.52.6",
"windows_aarch64_msvc", "windows_aarch64_msvc 0.52.6",
"windows_i686_gnu", "windows_i686_gnu 0.52.6",
"windows_i686_gnullvm", "windows_i686_gnullvm",
"windows_i686_msvc", "windows_i686_msvc 0.52.6",
"windows_x86_64_gnu", "windows_x86_64_gnu 0.52.6",
"windows_x86_64_gnullvm", "windows_x86_64_gnullvm 0.52.6",
"windows_x86_64_msvc", "windows_x86_64_msvc 0.52.6",
] ]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]] [[package]]
name = "windows_aarch64_gnullvm" name = "windows_aarch64_gnullvm"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]] [[package]]
name = "windows_aarch64_msvc" name = "windows_aarch64_msvc"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]] [[package]]
name = "windows_i686_gnu" name = "windows_i686_gnu"
version = "0.52.6" version = "0.52.6"
@ -957,26 +1442,95 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]] [[package]]
name = "windows_i686_msvc" name = "windows_i686_msvc"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]] [[package]]
name = "windows_x86_64_gnu" name = "windows_x86_64_gnu"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]] [[package]]
name = "windows_x86_64_gnullvm" name = "windows_x86_64_gnullvm"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]] [[package]]
name = "windows_x86_64_msvc" name = "windows_x86_64_msvc"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "wyhash"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295"
dependencies = [
"rand_core",
]
[[package]]
name = "wyz"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
dependencies = [
"tap",
]
[[package]]
name = "xxhash-rust"
version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a5cbf750400958819fb6178eaa83bee5cd9c29a26a40cc241df8c70fdd46984"
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"byteorder",
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View File

@ -15,6 +15,9 @@ rustc-hash = "2.0.0"
libc = "0.2.158" libc = "0.2.158"
smol = "2.0.1" smol = "2.0.1"
easy-parallel = "3.3.1" easy-parallel = "3.3.1"
clap = { version = "4.5.13", features = ["derive"] }
colored = "2.1.0"
ptr_hash = { git = "https://github.com/ragnargrootkoerkamp/ptrhash", default_features = false }
[dev-dependencies] [dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] } criterion = { version = "0.5.1", features = ["html_reports"] }
@ -22,6 +25,7 @@ criterion = { version = "0.5.1", features = ["html_reports"] }
[features] [features]
json = [] json = []
unsafe = [] unsafe = []
no_pdep = []
[[bench]] [[bench]]
name = "reference_impl" name = "reference_impl"

View File

@ -0,0 +1,87 @@
use std::thread::available_parallelism;
use clap::Parser;
use colored::Colorize;
use memmap2::Mmap;
use onebrc::implementations::rgk::{find_city_names, format, run_parallel, to_str, Args, Record, S};
fn main() {
let args = Args::parse();
let start = std::time::Instant::now();
let filename = args.input.unwrap_or("../../../measurements.txt".to_string());
let mmap: Mmap;
let data;
{
let file = std::fs::File::open(filename).unwrap();
mmap = unsafe { Mmap::map(&file).unwrap() };
data = &*mmap;
}
// Guaranteed to be aligned for SIMD.
let offset = unsafe { data.align_to::<S>().0.len() };
let data = &data[offset..];
// Build a perfect hash function on the cities found in the first 100k characters.
let names = find_city_names(&data[..4000000]);
if args.stats {
eprintln!("Num cities: {}", names.len());
let mut lens = vec![0; 102];
for n in &names {
if *n.last().unwrap() == b';' {
continue;
}
lens[n.len()] += 1;
}
for (len, count) in lens.iter().enumerate() {
if *count != 0 {
eprintln!("{}: {}", len, count);
}
}
}
let phf = run_parallel(
data,
&names,
args.threads
.unwrap_or(available_parallelism().unwrap().into()),
);
if args.print {
print!("{{");
let mut first = true;
let mut keys = phf.keys.clone();
keys.sort_by(|kl, kr| to_str(kl).cmp(to_str(kr)));
for name in &keys {
if *name.last().unwrap() != b';' {
continue;
}
let namepos = &name[..name.len() - 1];
let rpos = phf.index(namepos);
let rneg = phf.index(name);
let (min, avg, max) = Record::merge_pos_neg(rpos, rneg);
if !first {
print!(", ");
}
first = false;
print!(
"{}={}/{}/{}",
to_str(namepos),
format(min),
format(avg),
format(max)
);
}
println!("}}");
}
eprintln!(
"total: {}",
format!("{:>5.2?}", start.elapsed()).bold().green()
);
}

View File

@ -7,3 +7,4 @@ pub mod phcs;
pub mod reference_impl; pub mod reference_impl;
pub mod single_thread; pub mod single_thread;
pub mod smol; pub mod smol;
pub mod rgk;

View File

@ -0,0 +1,443 @@
use ptr_hash::PtrHashParams;
use std::{
simd::{cmp::SimdPartialEq, Simd},
vec::Vec,
};
use rustc_hash::FxHashSet;
type V = i32;
type PtrHash = ptr_hash::DefaultPtrHash<ptr_hash::hash::FxHash, u64>;
pub struct Phf {
pub ptr_hash: PtrHash,
pub keys: Vec<Vec<u8>>,
pub slots: Vec<Record>,
}
impl Phf {
fn new(mut keys: Vec<Vec<u8>>) -> Self {
keys.sort();
let num_slots = keys.len() * 5 / 2;
let params = ptr_hash::PtrHashParams {
alpha: 0.9,
c: 1.5,
slots_per_part: num_slots,
..PtrHashParams::default()
};
let mut hashes: Vec<u64> = keys.iter().map(|key| hash_name(key)).collect();
hashes.sort();
for (x, y) in hashes.iter().zip(hashes.iter().skip(1)) {
assert!(*x != *y, "DUPLICATE HASH");
}
let ptr_hash = PtrHash::new(&hashes, params);
let slots = vec![Record::default(); num_slots];
Self {
ptr_hash,
keys,
slots,
}
}
fn compute_index(&self, hash: u64) -> usize {
self.ptr_hash.index_single_part(&hash)
}
fn get_index_mut(&mut self, idx: usize) -> &mut Record {
&mut self.slots[idx]
}
fn index_hash_mut(&mut self, hash: u64) -> &mut Record {
&mut self.slots[self.ptr_hash.index_single_part(&hash)]
}
pub fn index<'b>(&'b self, key: &[u8]) -> &'b Record {
let hash = hash_name(key);
&self.slots[self.compute_index(hash)]
}
fn index_mut<'b>(&'b mut self, key: &[u8]) -> &'b mut Record {
self.index_hash_mut(hash_name(key))
}
fn merge(&mut self, r: Self) {
// TODO: If key sets are equal or one is a subset of the other, merge
// smaller into larger.
let mut new_keys = vec![];
let mut i1 = 0;
let mut i2 = 0;
while i1 < self.keys.len() && i2 < r.keys.len() {
if self.keys[i1] == r.keys[i2] {
new_keys.push(self.keys[i1].clone());
i1 += 1;
i2 += 1;
continue;
}
if self.keys[i1] < r.keys[i2] {
new_keys.push(self.keys[i1].clone());
i1 += 1;
continue;
}
if self.keys[i1] > r.keys[i2] {
new_keys.push(r.keys[i2].clone());
i2 += 1;
continue;
}
panic!();
}
while i1 < self.keys.len() {
new_keys.push(self.keys[i1].clone());
i1 += 1;
}
while i2 < r.keys.len() {
new_keys.push(r.keys[i2].clone());
i2 += 1;
}
let mut new_phf = Self::new(new_keys);
for key in &self.keys {
new_phf.index_mut(key).merge(self.index(key));
}
for key in &r.keys {
new_phf.index_mut(key).merge(r.index(key));
}
*self = new_phf;
}
}
#[derive(Clone, Debug)]
#[repr(align(32))]
pub struct Record {
pub count: u64,
// Storing these as two u32 is nice, because they are read as a single u64.
/// Byte representation of string ~b"bc.d" or ~b"\0c.d".
pub min: u32,
/// Byte representation of string b"bc.d" or b"\0c.d".
pub max: u32,
pub sum: u64,
}
impl Record {
fn default() -> Self {
Self {
count: 0,
min: 0,
max: 0,
sum: 0,
}
}
fn add(&mut self, raw_value: u32, value: u64) {
// assert2::debug_assert!(value < 1000);
self.count += 1;
self.sum += value;
// See https://en.algorithmica.org/hpc/algorithms/argmin/
if raw_value < self.min {
self.min = raw_value;
}
if raw_value > self.max {
self.max = raw_value;
}
}
fn merge(&mut self, other: &Self) {
self.count += other.count;
self.sum += other.sum_to_val() as u64;
self.min = self.min.min(other.min);
self.max = self.max.max(other.max);
}
fn sum_to_val(&self) -> V {
let m = (1 << 21) - 1;
((self.sum & m) + 10 * ((self.sum >> 21) & m) + 100 * ((self.sum >> 42) & m)) as _
}
/// Return (min, avg, max)
pub fn merge_pos_neg(pos: &Record, neg: &Record) -> (V, V, V) {
let pos_sum = pos.sum as V;
let neg_sum = neg.sum as V;
let sum = pos_sum - neg_sum;
let count = (pos.count + neg.count) as V;
// round to nearest
let avg = (sum + count / 2).div_floor(count);
let pos_max = raw_to_value(pos.max);
let neg_max = -raw_to_value(neg.min);
let max = pos_max.max(neg_max);
let pos_min = raw_to_value(pos.min);
let neg_min = -raw_to_value(neg.max);
let min = pos_min.min(neg_min);
(min, avg, max)
}
}
/// Reads raw bytes and masks the ; and the b'0'=0x30.
/// Returns something of the form 0x0b0c..0d or 0x000c..0d
fn parse_to_raw(data: &[u8], start: usize, end: usize) -> u32 {
let raw = u32::from_be_bytes(unsafe { *data.get_unchecked(start..).as_ptr().cast() });
raw >> (8 * (4 - (end - start)))
}
fn raw_to_pdep(raw: u32) -> u64 {
#[cfg(feature = "no_pdep")]
{
let raw = raw as u64;
(raw & 15) | ((raw & (15 << 16)) << (21 - 16)) | ((raw & (15 << 24)) << (42 - 24))
}
#[cfg(not(feature = "no_pdep"))]
{
let mask = 0x0f0f000f;
let raw = raw & mask;
// input 0011bbbb0011cccc........0011dddd
// 0b bbbb xxxxcccc yyyyyyyyyyyydddd // Deposit here
// 0b 1111 1111 1111 // Mask out trash using &
let pdep = 0b0000000000000000001111000000000000011111111000001111111111111111u64;
unsafe { core::arch::x86_64::_pdep_u64(raw as u64, pdep) }
}
}
fn raw_to_value(v: u32) -> V {
let mask = 0x0f0f000f;
let bytes = (v & mask).to_be_bytes();
// s = bc.d
let b = bytes[0] as V;
let c = bytes[1] as V;
let d = bytes[3] as V;
b as V * 100 * (bytes[0] != 0) as V + c as V * 10 + d as V
}
pub fn format(v: V) -> String {
format!("{:.1}", v as f64 / 10.0)
}
#[allow(unused)]
fn hash_name(name: &[u8]) -> u64 {
// Hash the first and last 8 bytes.
// TODO: More robust hash that actually uses all characters.
let head: [u8; 8] = unsafe { *name.get_unchecked(..8).split_first_chunk().unwrap().0 };
let tail: [u8; 8] = unsafe {
*name
.get_unchecked(name.len().wrapping_sub(8)..)
.split_first_chunk()
.unwrap()
.0
};
let shift = 64usize.saturating_sub(8 * name.len());
let khead = u64::from_ne_bytes(head) << shift;
let ktail = u64::from_ne_bytes(tail) >> shift;
khead.wrapping_add(ktail)
}
/// Number of SIMD lanes. AVX2 has 256 bits, so 32 lanes.
const L: usize = 32;
/// The Simd type.
pub type S = Simd<u8, L>;
#[derive(Copy, Clone)]
struct State {
start: usize,
sep: usize,
end: usize,
}
/// Find the regions between \n and ; (names) and between ; and \n (values),
/// and calls `callback` for each line.
#[inline(always)]
fn iter_lines<'a>(
mut data: &'a [u8],
mut callback: impl FnMut(&'a [u8], State, State, State, State),
) {
// Make sure that the out-of-bounds reads we do are OK.
data = &data[..data.len() - 32];
let sep = S::splat(b';');
let end = S::splat(b'\n');
let find = |last: usize, sep: S| {
let simd = S::from_array(unsafe { *data.get_unchecked(last..).as_ptr().cast() });
let eq = sep.simd_eq(simd).to_bitmask() as u32;
let offset = eq.trailing_zeros() as usize;
last + offset
};
// Modified to be able to search regions longer than 32.
let find_long = |mut last: usize, sep: S| {
let simd = S::from_array(unsafe { *data.get_unchecked(last..).as_ptr().cast() });
let mut eq = sep.simd_eq(simd).to_bitmask() as u32;
if eq == 0 {
while eq == 0 {
last += 32;
let simd = S::from_array(unsafe { *data.get_unchecked(last..).as_ptr().cast() });
eq = sep.simd_eq(simd).to_bitmask() as u32;
}
}
let offset = eq.trailing_zeros() as usize;
last + offset
};
let init_state = |idx: usize| {
let first_end = find_long(idx, end);
State {
start: first_end + 1,
sep: first_end + 1,
end: 0,
}
};
let mut state0 = init_state(0);
let mut state1 = init_state(data.len() / 4);
let mut state2 = init_state(2 * data.len() / 4);
let mut state3 = init_state(3 * data.len() / 4);
// Duplicate each line for each input state.
macro_rules! step {
[$($s:expr),*] => {
$($s.sep = find_long($s.sep + 1, sep) ;)*
$($s.end = find($s.sep + 1, end) ;)*
callback(data, $($s, )*);
$($s.start = $s.end + 1;)*
}
}
while state3.start < data.len() {
step!(state0, state1, state2, state3);
}
}
fn run(data: &[u8], keys: &[Vec<u8>]) -> Phf {
// Each thread has its own accumulator.
let mut h = Phf::new(keys.to_vec());
iter_lines(
data,
|data, mut s0: State, mut s1: State, mut s2: State, mut s3: State| {
unsafe {
// If value is negative, extend name by one character.
s0.sep += (data.get_unchecked(s0.sep + 1) == &b'-') as usize;
let name0 = data.get_unchecked(s0.start..s0.sep);
s1.sep += (data.get_unchecked(s1.sep + 1) == &b'-') as usize;
let name1 = data.get_unchecked(s1.start..s1.sep);
s2.sep += (data.get_unchecked(s2.sep + 1) == &b'-') as usize;
let name2 = data.get_unchecked(s2.start..s2.sep);
s3.sep += (data.get_unchecked(s3.sep + 1) == &b'-') as usize;
let name3 = data.get_unchecked(s3.start..s3.sep);
let raw0 = parse_to_raw(data, s0.sep + 1, s0.end);
let raw1 = parse_to_raw(data, s1.sep + 1, s1.end);
let raw2 = parse_to_raw(data, s2.sep + 1, s2.end);
let raw3 = parse_to_raw(data, s3.sep + 1, s3.end);
let h0 = hash_name(name0);
let h1 = hash_name(name1);
let h2 = hash_name(name2);
let h3 = hash_name(name3);
let idx0 = h.compute_index(h0);
let idx1 = h.compute_index(h1);
let idx2 = h.compute_index(h2);
let idx3 = h.compute_index(h3);
h.get_index_mut(idx0).add(raw0, raw_to_pdep(raw0));
h.get_index_mut(idx1).add(raw1, raw_to_pdep(raw1));
h.get_index_mut(idx2).add(raw2, raw_to_pdep(raw2));
h.get_index_mut(idx3).add(raw3, raw_to_pdep(raw3));
}
},
);
h
}
pub fn run_parallel(data: &[u8], keys: &[Vec<u8>], num_threads: usize) -> Phf {
if num_threads == 0 {
return run(data, keys);
}
let phf = std::sync::Mutex::new(Phf::new(keys.to_vec()));
// Spawn one thread per core.
std::thread::scope(|s| {
let chunks = data.chunks(data.len() / num_threads + 1);
for chunk in chunks {
s.spawn(|| {
// Each thread has its own accumulator.
let thread_phf = run(chunk, keys);
// Merge results.
phf.lock().unwrap().merge(thread_phf);
});
}
});
phf.into_inner().unwrap()
}
pub fn to_str(name: &[u8]) -> &str {
std::str::from_utf8(name).unwrap()
}
/// Returns a list of city names found in data.
/// Each city is returned twice, once as `<city>` and once as `<city>;`,
/// with the latter being used to accumulate negative temperatures.
#[inline(never)]
pub fn find_city_names(data: &[u8]) -> Vec<Vec<u8>> {
let mut cities = FxHashSet::default();
let mut callback = |data: &[u8], state: State| {
let State { start, sep, .. } = state;
let name = unsafe { data.get_unchecked(start..sep) };
cities.insert(name.to_vec());
// Do the same for the name with ; appended.
let name = unsafe { data.get_unchecked(start..sep + 1) };
cities.insert(name.to_vec());
};
iter_lines(data, |d, s0, s1, s2, s3| {
flatten_callback(d, s0, s1, s2, s3, &mut callback)
});
let mut cities: Vec<_> = cities.into_iter().collect();
cities.sort();
cities
}
fn flatten_callback<'a>(
data: &'a [u8],
s0: State,
s1: State,
s2: State,
s3: State,
callback: &mut impl FnMut(&'a [u8], State),
) {
callback(data, s0);
callback(data, s1);
callback(data, s2);
callback(data, s3);
}
#[derive(clap::Parser)]
pub struct Args {
pub input: Option<String>,
#[clap(short = 'j', long)]
pub threads: Option<usize>,
#[clap(long)]
pub print: bool,
#[clap(long)]
pub stats: bool,
}
#[cfg(test)]
mod test {
#[test]
fn parse_raw() {
use super::*;
let d = b"12.3";
let raw = parse_to_raw(d, 0, 4);
let v = raw_to_value(raw);
assert_eq!(v, 123);
let d = b"12.3";
let raw = parse_to_raw(d, 1, 4);
let v = raw_to_value(raw);
assert_eq!(v, 23);
}
}

View File

@ -2,6 +2,7 @@
#![feature(portable_simd)] #![feature(portable_simd)]
#![feature(slice_split_once)] #![feature(slice_split_once)]
#![feature(hash_raw_entry)] #![feature(hash_raw_entry)]
#![feature(int_roundings)]
pub mod implementations; pub mod implementations;
pub mod models; pub mod models;