Compare commits
	
		
			23 Commits
		
	
	
		
			53ea542f36
			...
			main
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| fdd92dd5f7 | |||
| 45b3014cbb | |||
| 98cd6e930c | |||
| 40a8d6d929 | |||
| 0ea10a3c1b | |||
| 3dbc9c32d1 | |||
| 4d586c809e | |||
| 5bb2363eee | |||
| eb2ed15e33 | |||
| dfcc8562e6 | |||
| 212e595a7e | |||
| 7b8943976f | |||
| aaa11c7b94 | |||
| 07a8e7fc69 | |||
| 0aa9d8be86 | |||
| b1c064a92f | |||
| ea06a600ce | |||
| ac5c45f8d5 | |||
| b8f589096f | |||
| c306083192 | |||
| a45ddd2dc0 | |||
| e832475fc3 | |||
| 608cbb59e5 | 
							
								
								
									
										91
									
								
								src/main/julia/main.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								src/main/julia/main.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,91 @@ | |||||||
|  | using Mmap | ||||||
|  |  | ||||||
|  | mutable struct StationMeasurements | ||||||
|  |     min::Float64 | ||||||
|  |     max::Float64 | ||||||
|  |     sum::Float64 | ||||||
|  |     count::Int64 | ||||||
|  | end | ||||||
|  |  | ||||||
|  | function update(sm, temp::Float64) | ||||||
|  |     if temp < min | ||||||
|  |         sm.min = temp | ||||||
|  |     elseif temp > max | ||||||
|  |         sm.max = temp | ||||||
|  |     end | ||||||
|  |     sm.sum += temp | ||||||
|  |     sm.count += 1 | ||||||
|  | end | ||||||
|  |  | ||||||
|  | function print_measurements(stations::Dict{String,StationMeasurements}) | ||||||
|  |     sorted_keys = sort(collect(keys(stations))) | ||||||
|  |     print("{") | ||||||
|  |     sm_vec = [] | ||||||
|  |     for city in sorted_keys | ||||||
|  |         sm = stations[city] | ||||||
|  |         min = round(sm.min; digits=1) | ||||||
|  |         max = round(sm.max; digits=1) | ||||||
|  |         avg = round((sm.sum / sm.count); digits=1) | ||||||
|  |         push!(sm_vec, "$city=$min/$avg/$max") | ||||||
|  |     end | ||||||
|  |     joined = join(sm_vec, ", ") | ||||||
|  |     print(joined) | ||||||
|  |     print("}") | ||||||
|  | end | ||||||
|  |  | ||||||
|  | function merge(stations_vec::Vector{Dict{String,StationMeasurements}}) | ||||||
|  |     merged = Dict{String,StationMeasurements}() | ||||||
|  |     for stations in stations_vec | ||||||
|  |         for (city, sm) in stations | ||||||
|  |             if haskey(merged, city) | ||||||
|  |                 merged_sm = merged[city] | ||||||
|  |                 sm.min = ifelse(merged_sm.min < sm.min, merged_sm.min, sm.min) | ||||||
|  |                 sm.max = ifelse(merged_sm.max > sm.max, merged_sm.max, sm.max) | ||||||
|  |                 sm.sum += merged_sm.sum | ||||||
|  |                 sm.count += merged_sm.count | ||||||
|  |             else | ||||||
|  |                 merged[city] = sm | ||||||
|  |             end | ||||||
|  |         end | ||||||
|  |     end | ||||||
|  |     merged | ||||||
|  | end | ||||||
|  |  | ||||||
|  | function process_chunk(data, chunk) | ||||||
|  |     stations = Dict{String,StationMeasurements}() | ||||||
|  |     for i in eachindex(chunk) | ||||||
|  |         if i == 1 | ||||||
|  |             continue | ||||||
|  |         end | ||||||
|  |         line = String(data[chunk[i-1]:chunk[i]-1]) | ||||||
|  |         station, temp_str = rsplit(line, ";") | ||||||
|  |         temp = parse(Float32, temp_str) | ||||||
|  |         if haskey(stations, station) | ||||||
|  |             sm = stations[station] | ||||||
|  |             sm.min = ifelse(temp < sm.min, temp, sm.min) | ||||||
|  |             sm.max = ifelse(temp > sm.max, temp, sm.max) | ||||||
|  |             sm.sum += temp | ||||||
|  |             sm.count += 1 | ||||||
|  |         else | ||||||
|  |             stations[station] = StationMeasurements(temp, temp, temp, 1) | ||||||
|  |         end | ||||||
|  |     end | ||||||
|  |     stations | ||||||
|  | end | ||||||
|  |  | ||||||
|  | function main() | ||||||
|  |     open("../../../measurements.txt", "r") do f | ||||||
|  |         sz = Base.stat(f).size | ||||||
|  |         data = mmap(f, Vector{UInt8}, sz) | ||||||
|  |         idxs = findall(isequal(0x0a), data) | ||||||
|  |         idxs_chunks = collect(Iterators.partition(idxs, length(idxs) ÷ Threads.nthreads())) | ||||||
|  |         tasks = map(idxs_chunks) do chunk | ||||||
|  |             Threads.@spawn process_chunk(data, chunk) | ||||||
|  |         end | ||||||
|  |         stations_vec = fetch.(tasks) | ||||||
|  |         stations = merge(stations_vec) | ||||||
|  |         print_measurements(stations) | ||||||
|  |     end | ||||||
|  | end | ||||||
|  |  | ||||||
|  | main() | ||||||
							
								
								
									
										57
									
								
								src/main/lua/main.lua
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								src/main/lua/main.lua
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | |||||||
|  | local file_path = "../../../measurements.txt" | ||||||
|  |  | ||||||
|  | local file = io.open(file_path, "rb") | ||||||
|  | if not file then | ||||||
|  | 	print("Unable to open file") | ||||||
|  | 	return | ||||||
|  | end | ||||||
|  |  | ||||||
|  | local function split_semi(inputstr) | ||||||
|  | 	local t = {} | ||||||
|  | 	for str in string.gmatch(inputstr, "([^;]+)") do | ||||||
|  | 		table.insert(t, str) | ||||||
|  | 	end | ||||||
|  | 	return t | ||||||
|  | end | ||||||
|  |  | ||||||
|  | local stations = {} | ||||||
|  |  | ||||||
|  | local iterations = 1 | ||||||
|  |  | ||||||
|  | for line in file:lines("*l") do | ||||||
|  | 	if iterations % 1000000 == 0 then | ||||||
|  | 		io.write("\x1b[J\x1b[H") | ||||||
|  | 		io.write(iterations / 10000000) | ||||||
|  | 		io.write("\n") | ||||||
|  | 	end | ||||||
|  | 	local split_line = split_semi(line) | ||||||
|  | 	local station = split_line[1] | ||||||
|  | 	local temp_str = string.gsub(split_line[2], "[ ]", "") | ||||||
|  | 	local temp = tonumber(temp_str) | ||||||
|  | 	if stations[station] == nil then | ||||||
|  | 		stations[station] = { min = temp, max = temp, sum = temp, count = 1 } | ||||||
|  | 	else | ||||||
|  | 		if temp < stations[station].min then | ||||||
|  | 			stations[station].min = temp | ||||||
|  | 		elseif temp > stations[station].max then | ||||||
|  | 			stations[station].max = temp | ||||||
|  | 		end | ||||||
|  | 		stations[station].sum = stations[station].sum + temp | ||||||
|  | 		stations[station].count = stations[station].count + 1 | ||||||
|  | 	end | ||||||
|  | 	iterations = iterations + 1 | ||||||
|  | end | ||||||
|  |  | ||||||
|  | local keys = {} | ||||||
|  | for k in pairs(stations) do table.insert(keys, k) end | ||||||
|  | table.sort(keys) | ||||||
|  |  | ||||||
|  | local fstations = {} | ||||||
|  | io.write("{") | ||||||
|  | for _, station in ipairs(keys) do | ||||||
|  | 	local avg = ((stations[station].sum / 10) / stations[station].count) | ||||||
|  | 	local res_str = string.format("%s=%.1f/%.1f/%.1f", station, stations[station].min, avg, stations[station].max) | ||||||
|  | 	table.insert(fstations, res_str) | ||||||
|  | end | ||||||
|  | io.write(table.concat(fstations, ",")) | ||||||
|  | print("}") | ||||||
							
								
								
									
										585
									
								
								src/main/rust/Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										585
									
								
								src/main/rust/Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -17,12 +17,67 @@ version = "0.1.6" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "anstream" | ||||||
|  | version = "0.6.15" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" | ||||||
|  | dependencies = [ | ||||||
|  |  "anstyle", | ||||||
|  |  "anstyle-parse", | ||||||
|  |  "anstyle-query", | ||||||
|  |  "anstyle-wincon", | ||||||
|  |  "colorchoice", | ||||||
|  |  "is_terminal_polyfill", | ||||||
|  |  "utf8parse", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "anstyle" | name = "anstyle" | ||||||
| version = "1.0.8" | version = "1.0.8" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" | checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "anstyle-parse" | ||||||
|  | version = "0.2.5" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" | ||||||
|  | dependencies = [ | ||||||
|  |  "utf8parse", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "anstyle-query" | ||||||
|  | version = "1.1.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" | ||||||
|  | dependencies = [ | ||||||
|  |  "windows-sys 0.52.0", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "anstyle-wincon" | ||||||
|  | version = "3.0.4" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" | ||||||
|  | dependencies = [ | ||||||
|  |  "anstyle", | ||||||
|  |  "windows-sys 0.52.0", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "anyhow" | ||||||
|  | version = "1.0.86" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "arbitrary-chunks" | ||||||
|  | version = "0.4.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "2ad8689a486416c401ea15715a4694de30054248ec627edbf31f49cb64ee4086" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "async-channel" | name = "async-channel" | ||||||
| version = "2.3.1" | version = "2.3.1" | ||||||
| @@ -162,6 +217,28 @@ version = "2.6.0" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" | checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "bitvec" | ||||||
|  | version = "1.0.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" | ||||||
|  | dependencies = [ | ||||||
|  |  "funty", | ||||||
|  |  "radium", | ||||||
|  |  "tap", | ||||||
|  |  "wyz", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "block-pseudorand" | ||||||
|  | version = "0.1.2" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "2097358495d244a0643746f4d13eedba4608137008cf9dec54e53a3b700115a6" | ||||||
|  | dependencies = [ | ||||||
|  |  "chiapos-chacha8", | ||||||
|  |  "nanorand", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "blocking" | name = "blocking" | ||||||
| version = "1.6.1" | version = "1.6.1" | ||||||
| @@ -192,18 +269,42 @@ version = "3.16.0" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" | checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "byteorder" | ||||||
|  | version = "1.5.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "cast" | name = "cast" | ||||||
| version = "0.3.0" | version = "0.3.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "cc" | ||||||
|  | version = "1.1.15" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6" | ||||||
|  | dependencies = [ | ||||||
|  |  "shlex", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "cfg-if" | name = "cfg-if" | ||||||
| version = "1.0.0" | version = "1.0.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "chiapos-chacha8" | ||||||
|  | version = "0.1.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "33f8be573a85f6c2bc1b8e43834c07e32f95e489b914bf856c0549c3c269cd0a" | ||||||
|  | dependencies = [ | ||||||
|  |  "rayon", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "ciborium" | name = "ciborium" | ||||||
| version = "0.2.2" | version = "0.2.2" | ||||||
| @@ -231,6 +332,12 @@ dependencies = [ | |||||||
|  "half", |  "half", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "cityhash-102-rs" | ||||||
|  | version = "0.1.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "3c61dc391dedb78a7117507d8efd692268859f279b97c04c7e4aab1235ef8301" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "clap" | name = "clap" | ||||||
| version = "4.5.13" | version = "4.5.13" | ||||||
| @@ -238,6 +345,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||||||
| checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" | checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "clap_builder", |  "clap_builder", | ||||||
|  |  "clap_derive", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| @@ -246,8 +354,22 @@ version = "4.5.13" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" | checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  |  "anstream", | ||||||
|  "anstyle", |  "anstyle", | ||||||
|  "clap_lex", |  "clap_lex", | ||||||
|  |  "strsim", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "clap_derive" | ||||||
|  | version = "4.5.13" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0" | ||||||
|  | dependencies = [ | ||||||
|  |  "heck", | ||||||
|  |  "proc-macro2", | ||||||
|  |  "quote", | ||||||
|  |  "syn", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| @@ -256,6 +378,32 @@ version = "0.7.2" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" | checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "colorchoice" | ||||||
|  | version = "1.0.2" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "colored" | ||||||
|  | version = "2.1.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8" | ||||||
|  | dependencies = [ | ||||||
|  |  "lazy_static", | ||||||
|  |  "windows-sys 0.48.0", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "common_traits" | ||||||
|  | version = "0.10.2" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "6963264945d9ccb66c17ba1cc1af34d06812f45bc14c250dda5a1566905b0af0" | ||||||
|  | dependencies = [ | ||||||
|  |  "anyhow", | ||||||
|  |  "half", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "concurrent-queue" | name = "concurrent-queue" | ||||||
| version = "2.5.0" | version = "2.5.0" | ||||||
| @@ -277,7 +425,7 @@ dependencies = [ | |||||||
|  "clap", |  "clap", | ||||||
|  "criterion-plot", |  "criterion-plot", | ||||||
|  "is-terminal", |  "is-terminal", | ||||||
|  "itertools", |  "itertools 0.10.5", | ||||||
|  "num-traits", |  "num-traits", | ||||||
|  "once_cell", |  "once_cell", | ||||||
|  "oorandom", |  "oorandom", | ||||||
| @@ -298,7 +446,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||||||
| checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "cast", |  "cast", | ||||||
|  "itertools", |  "itertools 0.10.5", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| @@ -332,6 +480,12 @@ version = "0.2.2" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "easy-parallel" | ||||||
|  | version = "3.3.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "2afbb9b0aef60e4f0d2b18129b6c0dff035a6f7dbbd17c2f38c1432102ee223c" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "either" | name = "either" | ||||||
| version = "1.13.0" | version = "1.13.0" | ||||||
| @@ -375,12 +529,24 @@ version = "0.2.0" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" | checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "fastmurmur3" | ||||||
|  | version = "0.2.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "2d7e9bc68be4cdabbb8938140b01a8b5bc1191937f2c7e7ecc2fcebbe2d749df" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "fastrand" | name = "fastrand" | ||||||
| version = "2.1.0" | version = "2.1.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" | checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "funty" | ||||||
|  | version = "2.0.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "futures-core" | name = "futures-core" | ||||||
| version = "0.3.30" | version = "0.3.30" | ||||||
| @@ -406,6 +572,26 @@ dependencies = [ | |||||||
|  "pin-project-lite", |  "pin-project-lite", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "fxhash" | ||||||
|  | version = "0.2.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" | ||||||
|  | dependencies = [ | ||||||
|  |  "byteorder", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "getrandom" | ||||||
|  | version = "0.2.15" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" | ||||||
|  | dependencies = [ | ||||||
|  |  "cfg-if", | ||||||
|  |  "libc", | ||||||
|  |  "wasi", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "half" | name = "half" | ||||||
| version = "2.4.1" | version = "2.4.1" | ||||||
| @@ -416,6 +602,21 @@ dependencies = [ | |||||||
|  "crunchy", |  "crunchy", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "hashers" | ||||||
|  | version = "1.0.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "b2bca93b15ea5a746f220e56587f71e73c6165eab783df9e26590069953e3c30" | ||||||
|  | dependencies = [ | ||||||
|  |  "fxhash", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "heck" | ||||||
|  | version = "0.5.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "hermit-abi" | name = "hermit-abi" | ||||||
| version = "0.3.9" | version = "0.3.9" | ||||||
| @@ -428,6 +629,12 @@ version = "0.4.0" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" | checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "highway" | ||||||
|  | version = "1.2.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "c706f1711006204c2ba8fb1a7bd55f689bbf7feca9ff40325206b5e140cff6df" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "is-terminal" | name = "is-terminal" | ||||||
| version = "0.4.12" | version = "0.4.12" | ||||||
| @@ -439,6 +646,12 @@ dependencies = [ | |||||||
|  "windows-sys 0.52.0", |  "windows-sys 0.52.0", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "is_terminal_polyfill" | ||||||
|  | version = "1.70.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "itertools" | name = "itertools" | ||||||
| version = "0.10.5" | version = "0.10.5" | ||||||
| @@ -448,6 +661,15 @@ dependencies = [ | |||||||
|  "either", |  "either", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "itertools" | ||||||
|  | version = "0.11.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" | ||||||
|  | dependencies = [ | ||||||
|  |  "either", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "itoa" | name = "itoa" | ||||||
| version = "1.0.11" | version = "1.0.11" | ||||||
| @@ -463,6 +685,12 @@ dependencies = [ | |||||||
|  "wasm-bindgen", |  "wasm-bindgen", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "lazy_static" | ||||||
|  | version = "1.5.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "libc" | name = "libc" | ||||||
| version = "0.2.158" | version = "0.2.158" | ||||||
| @@ -496,6 +724,30 @@ dependencies = [ | |||||||
|  "libc", |  "libc", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "metrohash" | ||||||
|  | version = "1.0.6" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "3ba553cb19e2acbc54baa16faef215126243fe45e53357a3b2e9f4ebc7b0506c" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "murmur2" | ||||||
|  | version = "0.1.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "fb585ade2549a017db2e35978b77c319214fa4b37cede841e27954dd6e8f3ca8" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "murmur3" | ||||||
|  | version = "0.5.2" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "nanorand" | ||||||
|  | version = "0.6.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "729eb334247daa1803e0a094d0a5c55711b85571179f5ec6e53eccfdf7008958" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "num-traits" | name = "num-traits" | ||||||
| version = "0.2.19" | version = "0.2.19" | ||||||
| @@ -516,13 +768,17 @@ name = "onebrc" | |||||||
| version = "0.1.0" | version = "0.1.0" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bstr", |  "bstr", | ||||||
|  |  "clap", | ||||||
|  |  "colored", | ||||||
|  "criterion", |  "criterion", | ||||||
|  |  "easy-parallel", | ||||||
|  "fast-float", |  "fast-float", | ||||||
|  "libc", |  "libc", | ||||||
|  "memchr", |  "memchr", | ||||||
|  "memmap2", |  "memmap2", | ||||||
|  |  "ptr_hash", | ||||||
|  "rayon", |  "rayon", | ||||||
|  "rustc-hash", |  "rustc-hash 2.0.0", | ||||||
|  "smol", |  "smol", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| @@ -538,6 +794,12 @@ version = "2.2.0" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" | checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "partition" | ||||||
|  | version = "0.1.2" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "947f833aaa585cf12b8ec7c0476c98784c49f33b861376ffc84ed92adebf2aba" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "pin-project-lite" | name = "pin-project-lite" | ||||||
| version = "0.2.14" | version = "0.2.14" | ||||||
| @@ -598,6 +860,15 @@ dependencies = [ | |||||||
|  "windows-sys 0.59.0", |  "windows-sys 0.59.0", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "ppv-lite86" | ||||||
|  | version = "0.2.20" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" | ||||||
|  | dependencies = [ | ||||||
|  |  "zerocopy", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "proc-macro2" | name = "proc-macro2" | ||||||
| version = "1.0.86" | version = "1.0.86" | ||||||
| @@ -607,6 +878,40 @@ dependencies = [ | |||||||
|  "unicode-ident", |  "unicode-ident", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "ptr_hash" | ||||||
|  | version = "0.1.1" | ||||||
|  | source = "git+https://github.com/ragnargrootkoerkamp/ptrhash#727afbe0afa6939c756f89eb782de8c683e583fa" | ||||||
|  | dependencies = [ | ||||||
|  |  "anyhow", | ||||||
|  |  "bitvec", | ||||||
|  |  "cityhash-102-rs", | ||||||
|  |  "clap", | ||||||
|  |  "colored", | ||||||
|  |  "common_traits", | ||||||
|  |  "either", | ||||||
|  |  "fastmurmur3", | ||||||
|  |  "fastrand", | ||||||
|  |  "fxhash", | ||||||
|  |  "hashers", | ||||||
|  |  "highway", | ||||||
|  |  "itertools 0.11.0", | ||||||
|  |  "lazy_static", | ||||||
|  |  "metrohash", | ||||||
|  |  "murmur2", | ||||||
|  |  "murmur3", | ||||||
|  |  "radsort", | ||||||
|  |  "rand", | ||||||
|  |  "rand_chacha", | ||||||
|  |  "rayon", | ||||||
|  |  "rdst", | ||||||
|  |  "rustc-hash 1.1.0", | ||||||
|  |  "sucds", | ||||||
|  |  "tempfile", | ||||||
|  |  "wyhash", | ||||||
|  |  "xxhash-rust", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "quote" | name = "quote" | ||||||
| version = "1.0.36" | version = "1.0.36" | ||||||
| @@ -616,6 +921,48 @@ dependencies = [ | |||||||
|  "proc-macro2", |  "proc-macro2", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "radium" | ||||||
|  | version = "0.7.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "radsort" | ||||||
|  | version = "0.1.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "019b4b213425016d7d84a153c4c73afb0946fbb4840e4eece7ba8848b9d6da22" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "rand" | ||||||
|  | version = "0.8.5" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" | ||||||
|  | dependencies = [ | ||||||
|  |  "libc", | ||||||
|  |  "rand_chacha", | ||||||
|  |  "rand_core", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "rand_chacha" | ||||||
|  | version = "0.3.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" | ||||||
|  | dependencies = [ | ||||||
|  |  "ppv-lite86", | ||||||
|  |  "rand_core", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "rand_core" | ||||||
|  | version = "0.6.4" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" | ||||||
|  | dependencies = [ | ||||||
|  |  "getrandom", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "rayon" | name = "rayon" | ||||||
| version = "1.10.0" | version = "1.10.0" | ||||||
| @@ -636,6 +983,21 @@ dependencies = [ | |||||||
|  "crossbeam-utils", |  "crossbeam-utils", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "rdst" | ||||||
|  | version = "0.20.14" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "6e7970b4e577b76a96d5e56b5f6662b66d1a4e1f5bb026ee118fc31b373c2752" | ||||||
|  | dependencies = [ | ||||||
|  |  "arbitrary-chunks", | ||||||
|  |  "block-pseudorand", | ||||||
|  |  "criterion", | ||||||
|  |  "partition", | ||||||
|  |  "rayon", | ||||||
|  |  "tikv-jemallocator", | ||||||
|  |  "voracious_radix_sort", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "regex" | name = "regex" | ||||||
| version = "1.10.5" | version = "1.10.5" | ||||||
| @@ -665,6 +1027,12 @@ version = "0.8.4" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" | checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "rustc-hash" | ||||||
|  | version = "1.1.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "rustc-hash" | name = "rustc-hash" | ||||||
| version = "2.0.0" | version = "2.0.0" | ||||||
| @@ -731,6 +1099,12 @@ dependencies = [ | |||||||
|  "serde", |  "serde", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "shlex" | ||||||
|  | version = "1.3.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "signal-hook-registry" | name = "signal-hook-registry" | ||||||
| version = "1.4.2" | version = "1.4.2" | ||||||
| @@ -766,6 +1140,22 @@ dependencies = [ | |||||||
|  "futures-lite", |  "futures-lite", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "strsim" | ||||||
|  | version = "0.11.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "sucds" | ||||||
|  | version = "0.8.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "d53d46182afe6ed822a94c54a532dc0d59691a8f49226bdc4596529ca864cdd6" | ||||||
|  | dependencies = [ | ||||||
|  |  "anyhow", | ||||||
|  |  "num-traits", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "syn" | name = "syn" | ||||||
| version = "2.0.72" | version = "2.0.72" | ||||||
| @@ -777,6 +1167,45 @@ dependencies = [ | |||||||
|  "unicode-ident", |  "unicode-ident", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "tap" | ||||||
|  | version = "1.0.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "tempfile" | ||||||
|  | version = "3.12.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" | ||||||
|  | dependencies = [ | ||||||
|  |  "cfg-if", | ||||||
|  |  "fastrand", | ||||||
|  |  "once_cell", | ||||||
|  |  "rustix", | ||||||
|  |  "windows-sys 0.59.0", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "tikv-jemalloc-sys" | ||||||
|  | version = "0.5.4+5.3.0-patched" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1" | ||||||
|  | dependencies = [ | ||||||
|  |  "cc", | ||||||
|  |  "libc", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "tikv-jemallocator" | ||||||
|  | version = "0.5.4" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca" | ||||||
|  | dependencies = [ | ||||||
|  |  "libc", | ||||||
|  |  "tikv-jemalloc-sys", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "tinytemplate" | name = "tinytemplate" | ||||||
| version = "1.2.1" | version = "1.2.1" | ||||||
| @@ -809,6 +1238,21 @@ version = "1.0.12" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "utf8parse" | ||||||
|  | version = "0.2.2" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "voracious_radix_sort" | ||||||
|  | version = "1.2.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "446e7ffcb6c27a71d05af7e51ef2ee5b71c48424b122a832f2439651e1914899" | ||||||
|  | dependencies = [ | ||||||
|  |  "rayon", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "walkdir" | name = "walkdir" | ||||||
| version = "2.5.0" | version = "2.5.0" | ||||||
| @@ -819,6 +1263,12 @@ dependencies = [ | |||||||
|  "winapi-util", |  "winapi-util", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "wasi" | ||||||
|  | version = "0.11.0+wasi-snapshot-preview1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "wasm-bindgen" | name = "wasm-bindgen" | ||||||
| version = "0.2.92" | version = "0.2.92" | ||||||
| @@ -892,13 +1342,22 @@ dependencies = [ | |||||||
|  "windows-sys 0.52.0", |  "windows-sys 0.52.0", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "windows-sys" | ||||||
|  | version = "0.48.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" | ||||||
|  | dependencies = [ | ||||||
|  |  "windows-targets 0.48.5", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "windows-sys" | name = "windows-sys" | ||||||
| version = "0.52.0" | version = "0.52.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "windows-targets", |  "windows-targets 0.52.6", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| @@ -907,7 +1366,22 @@ version = "0.59.0" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "windows-targets", |  "windows-targets 0.52.6", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "windows-targets" | ||||||
|  | version = "0.48.5" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" | ||||||
|  | dependencies = [ | ||||||
|  |  "windows_aarch64_gnullvm 0.48.5", | ||||||
|  |  "windows_aarch64_msvc 0.48.5", | ||||||
|  |  "windows_i686_gnu 0.48.5", | ||||||
|  |  "windows_i686_msvc 0.48.5", | ||||||
|  |  "windows_x86_64_gnu 0.48.5", | ||||||
|  |  "windows_x86_64_gnullvm 0.48.5", | ||||||
|  |  "windows_x86_64_msvc 0.48.5", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| @@ -916,28 +1390,46 @@ version = "0.52.6" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "windows_aarch64_gnullvm", |  "windows_aarch64_gnullvm 0.52.6", | ||||||
|  "windows_aarch64_msvc", |  "windows_aarch64_msvc 0.52.6", | ||||||
|  "windows_i686_gnu", |  "windows_i686_gnu 0.52.6", | ||||||
|  "windows_i686_gnullvm", |  "windows_i686_gnullvm", | ||||||
|  "windows_i686_msvc", |  "windows_i686_msvc 0.52.6", | ||||||
|  "windows_x86_64_gnu", |  "windows_x86_64_gnu 0.52.6", | ||||||
|  "windows_x86_64_gnullvm", |  "windows_x86_64_gnullvm 0.52.6", | ||||||
|  "windows_x86_64_msvc", |  "windows_x86_64_msvc 0.52.6", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "windows_aarch64_gnullvm" | ||||||
|  | version = "0.48.5" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "windows_aarch64_gnullvm" | name = "windows_aarch64_gnullvm" | ||||||
| version = "0.52.6" | version = "0.52.6" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "windows_aarch64_msvc" | ||||||
|  | version = "0.48.5" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "windows_aarch64_msvc" | name = "windows_aarch64_msvc" | ||||||
| version = "0.52.6" | version = "0.52.6" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "windows_i686_gnu" | ||||||
|  | version = "0.48.5" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "windows_i686_gnu" | name = "windows_i686_gnu" | ||||||
| version = "0.52.6" | version = "0.52.6" | ||||||
| @@ -950,26 +1442,95 @@ version = "0.52.6" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "windows_i686_msvc" | ||||||
|  | version = "0.48.5" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "windows_i686_msvc" | name = "windows_i686_msvc" | ||||||
| version = "0.52.6" | version = "0.52.6" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "windows_x86_64_gnu" | ||||||
|  | version = "0.48.5" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "windows_x86_64_gnu" | name = "windows_x86_64_gnu" | ||||||
| version = "0.52.6" | version = "0.52.6" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "windows_x86_64_gnullvm" | ||||||
|  | version = "0.48.5" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "windows_x86_64_gnullvm" | name = "windows_x86_64_gnullvm" | ||||||
| version = "0.52.6" | version = "0.52.6" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "windows_x86_64_msvc" | ||||||
|  | version = "0.48.5" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "windows_x86_64_msvc" | name = "windows_x86_64_msvc" | ||||||
| version = "0.52.6" | version = "0.52.6" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "wyhash" | ||||||
|  | version = "0.5.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295" | ||||||
|  | dependencies = [ | ||||||
|  |  "rand_core", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "wyz" | ||||||
|  | version = "0.5.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" | ||||||
|  | dependencies = [ | ||||||
|  |  "tap", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "xxhash-rust" | ||||||
|  | version = "0.8.12" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "6a5cbf750400958819fb6178eaa83bee5cd9c29a26a40cc241df8c70fdd46984" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "zerocopy" | ||||||
|  | version = "0.7.35" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" | ||||||
|  | dependencies = [ | ||||||
|  |  "byteorder", | ||||||
|  |  "zerocopy-derive", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "zerocopy-derive" | ||||||
|  | version = "0.7.35" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" | ||||||
|  | dependencies = [ | ||||||
|  |  "proc-macro2", | ||||||
|  |  "quote", | ||||||
|  |  "syn", | ||||||
|  | ] | ||||||
|   | |||||||
| @@ -14,6 +14,10 @@ rayon = "1.10.0" | |||||||
| rustc-hash = "2.0.0" | rustc-hash = "2.0.0" | ||||||
| libc = "0.2.158" | libc = "0.2.158" | ||||||
| smol = "2.0.1" | smol = "2.0.1" | ||||||
|  | easy-parallel = "3.3.1" | ||||||
|  | clap = { version = "4.5.13", features = ["derive"] } | ||||||
|  | colored = "2.1.0" | ||||||
|  | ptr_hash = { git = "https://github.com/ragnargrootkoerkamp/ptrhash", default-features = false } | ||||||
|  |  | ||||||
| [dev-dependencies] | [dev-dependencies] | ||||||
| criterion = { version = "0.5.1", features = ["html_reports"] } | criterion = { version = "0.5.1", features = ["html_reports"] } | ||||||
| @@ -21,6 +25,7 @@ criterion = { version = "0.5.1", features = ["html_reports"] } | |||||||
| [features] | [features] | ||||||
| json = [] | json = [] | ||||||
| unsafe = [] | unsafe = [] | ||||||
|  | no_pdep = [] | ||||||
|  |  | ||||||
| [[bench]] | [[bench]] | ||||||
| name = "reference_impl" | name = "reference_impl" | ||||||
| @@ -50,3 +55,8 @@ harness = false | |||||||
| lto = "fat" | lto = "fat" | ||||||
| strip = "symbols" | strip = "symbols" | ||||||
| panic = "abort" | panic = "abort" | ||||||
|  |  | ||||||
|  | [profile.flamegraph] | ||||||
|  | inherits = "release" | ||||||
|  | debug = true | ||||||
|  | strip = "none" | ||||||
|   | |||||||
| @@ -1,8 +1,8 @@ | |||||||
| use criterion::{Criterion, criterion_group, criterion_main}; | use criterion::{criterion_group, criterion_main, Criterion}; | ||||||
| use onebrc::implementations::flare_flo::run; | use onebrc::implementations::flare_flo::run; | ||||||
|  |  | ||||||
| pub fn criterion_benchmark(c: &mut Criterion) { | pub fn criterion_benchmark(c: &mut Criterion) { | ||||||
|     c.bench_function("flareflo", |b| {b.iter(|| run())}); |     c.bench_function("flareflo", |b| b.iter(run)); | ||||||
| } | } | ||||||
|  |  | ||||||
| criterion_group!(benches, criterion_benchmark); | criterion_group!(benches, criterion_benchmark); | ||||||
|   | |||||||
| @@ -1,8 +1,8 @@ | |||||||
| use criterion::{Criterion, criterion_group, criterion_main}; | use criterion::{criterion_group, criterion_main, Criterion}; | ||||||
| use onebrc::implementations::libraries::run; | use onebrc::implementations::libraries::run; | ||||||
|  |  | ||||||
| pub fn criterion_benchmark(c: &mut Criterion) { | pub fn criterion_benchmark(c: &mut Criterion) { | ||||||
|     c.bench_function("libraries", |b| {b.iter(|| run())}); |     c.bench_function("libraries", |b| b.iter(run)); | ||||||
| } | } | ||||||
|  |  | ||||||
| criterion_group!(benches, criterion_benchmark); | criterion_group!(benches, criterion_benchmark); | ||||||
|   | |||||||
| @@ -1,8 +1,8 @@ | |||||||
| use criterion::{Criterion, criterion_group, criterion_main}; | use criterion::{criterion_group, criterion_main, Criterion}; | ||||||
| use onebrc::implementations::multi_threaded::run; | use onebrc::implementations::multi_threaded::run; | ||||||
|  |  | ||||||
| pub fn criterion_benchmark(c: &mut Criterion) { | pub fn criterion_benchmark(c: &mut Criterion) { | ||||||
|     c.bench_function("multithreaded", |b| {b.iter(|| run())}); |     c.bench_function("multithreaded", |b| b.iter(run)); | ||||||
| } | } | ||||||
|  |  | ||||||
| criterion_group!(benches, criterion_benchmark); | criterion_group!(benches, criterion_benchmark); | ||||||
|   | |||||||
| @@ -1,8 +1,8 @@ | |||||||
| use criterion::{Criterion, criterion_group, criterion_main}; | use criterion::{criterion_group, criterion_main, Criterion}; | ||||||
| use onebrc::implementations::multi_threaded_smol::run; | use onebrc::implementations::multi_threaded_smol::run; | ||||||
|  |  | ||||||
| pub fn criterion_benchmark(c: &mut Criterion) { | pub fn criterion_benchmark(c: &mut Criterion) { | ||||||
|     c.bench_function("multithreadedsmol", |b| {b.iter(|| run())}); |     c.bench_function("multithreadedsmol", |b| b.iter(run)); | ||||||
| } | } | ||||||
|  |  | ||||||
| criterion_group!(benches, criterion_benchmark); | criterion_group!(benches, criterion_benchmark); | ||||||
|   | |||||||
| @@ -1,8 +1,8 @@ | |||||||
| use criterion::{Criterion, criterion_group, criterion_main}; | use criterion::{criterion_group, criterion_main, Criterion}; | ||||||
| use onebrc::implementations::phcs::run; | use onebrc::implementations::phcs::run; | ||||||
|  |  | ||||||
| pub fn criterion_benchmark(c: &mut Criterion) { | pub fn criterion_benchmark(c: &mut Criterion) { | ||||||
|     c.bench_function("phcs", |b| {b.iter(|| run())}); |     c.bench_function("phcs", |b| b.iter(run)); | ||||||
| } | } | ||||||
|  |  | ||||||
| criterion_group!(benches, criterion_benchmark); | criterion_group!(benches, criterion_benchmark); | ||||||
|   | |||||||
| @@ -1,8 +1,8 @@ | |||||||
| use criterion::{Criterion, criterion_group, criterion_main}; | use criterion::{criterion_group, criterion_main, Criterion}; | ||||||
| use onebrc::implementations::reference_impl::run; | use onebrc::implementations::reference_impl::run; | ||||||
|  |  | ||||||
| pub fn criterion_benchmark(c: &mut Criterion) { | pub fn criterion_benchmark(c: &mut Criterion) { | ||||||
|     c.bench_function("reference", |b| {b.iter(|| run())}); |     c.bench_function("reference", |b| b.iter(run)); | ||||||
| } | } | ||||||
|  |  | ||||||
| criterion_group!(benches, criterion_benchmark); | criterion_group!(benches, criterion_benchmark); | ||||||
|   | |||||||
| @@ -1,8 +1,8 @@ | |||||||
| use criterion::{Criterion, criterion_group, criterion_main}; | use criterion::{criterion_group, criterion_main, Criterion}; | ||||||
| use onebrc::implementations::single_thread::run; | use onebrc::implementations::single_thread::run; | ||||||
|  |  | ||||||
| pub fn criterion_benchmark(c: &mut Criterion) { | pub fn criterion_benchmark(c: &mut Criterion) { | ||||||
|     c.bench_function("singlethread", |b| {b.iter(|| run())}); |     c.bench_function("singlethread", |b| b.iter(run)); | ||||||
| } | } | ||||||
|  |  | ||||||
| criterion_group!(benches, criterion_benchmark); | criterion_group!(benches, criterion_benchmark); | ||||||
|   | |||||||
							
								
								
									
										91
									
								
								src/main/rust/src/bin/rgk.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								src/main/rust/src/bin/rgk.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,91 @@ | |||||||
|  | use clap::Parser; | ||||||
|  | use colored::Colorize; | ||||||
|  | use memmap2::Mmap; | ||||||
|  | use onebrc::implementations::rgk::{ | ||||||
|  |     find_city_names, format, run_parallel, to_str, Args, Record, S, | ||||||
|  | }; | ||||||
|  | use std::thread::available_parallelism; | ||||||
|  |  | ||||||
|  | fn main() { | ||||||
|  |     let args = Args::parse(); | ||||||
|  |  | ||||||
|  |     let start = std::time::Instant::now(); | ||||||
|  |     let filename = args | ||||||
|  |         .input | ||||||
|  |         .unwrap_or("../../../measurements.txt".to_string()); | ||||||
|  |     let mmap: Mmap; | ||||||
|  |     let data; | ||||||
|  |     { | ||||||
|  |         let file = std::fs::File::open(filename).unwrap(); | ||||||
|  |         mmap = unsafe { Mmap::map(&file).unwrap() }; | ||||||
|  |         data = &*mmap; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Guaranteed to be aligned for SIMD. | ||||||
|  |     let offset = unsafe { data.align_to::<S>().0.len() }; | ||||||
|  |     let data = &data[offset..]; | ||||||
|  |  | ||||||
|  |     // Build a perfect hash function on the cities found in the first 100k characters. | ||||||
|  |     let names = find_city_names(&data[..4000000]); | ||||||
|  |  | ||||||
|  |     if args.stats { | ||||||
|  |         eprintln!("Num cities: {}", names.len()); | ||||||
|  |         let mut lens = vec![0; 102]; | ||||||
|  |         for n in &names { | ||||||
|  |             if *n.last().unwrap() == b';' { | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |             lens[n.len()] += 1; | ||||||
|  |         } | ||||||
|  |         for (len, count) in lens.iter().enumerate() { | ||||||
|  |             if *count != 0 { | ||||||
|  |                 eprintln!("{}: {}", len, count); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let phf = run_parallel( | ||||||
|  |         data, | ||||||
|  |         &names, | ||||||
|  |         args.threads | ||||||
|  |             .unwrap_or(available_parallelism().unwrap().into()), | ||||||
|  |     ); | ||||||
|  |  | ||||||
|  |     if args.print { | ||||||
|  |         print!("{{"); | ||||||
|  |         let mut first = true; | ||||||
|  |  | ||||||
|  |         let mut keys = phf.keys.clone(); | ||||||
|  |         keys.sort_by(|kl, kr| to_str(kl).cmp(to_str(kr))); | ||||||
|  |  | ||||||
|  |         for name in &keys { | ||||||
|  |             if *name.last().unwrap() != b';' { | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |             let namepos = &name[..name.len() - 1]; | ||||||
|  |  | ||||||
|  |             let rpos = phf.index(namepos); | ||||||
|  |             let rneg = phf.index(name); | ||||||
|  |             let (min, avg, max) = Record::merge_pos_neg(rpos, rneg); | ||||||
|  |  | ||||||
|  |             if !first { | ||||||
|  |                 print!(", "); | ||||||
|  |             } | ||||||
|  |             first = false; | ||||||
|  |  | ||||||
|  |             print!( | ||||||
|  |                 "{}={}/{}/{}", | ||||||
|  |                 to_str(namepos), | ||||||
|  |                 format(min), | ||||||
|  |                 format(avg), | ||||||
|  |                 format(max) | ||||||
|  |             ); | ||||||
|  |         } | ||||||
|  |         println!("}}"); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     eprintln!( | ||||||
|  |         "total: {}", | ||||||
|  |         format!("{:>5.2?}", start.elapsed()).bold().green() | ||||||
|  |     ); | ||||||
|  | } | ||||||
| @@ -5,5 +5,6 @@ pub mod multi_threaded_smol; | |||||||
| pub mod multi_threaded_structured; | pub mod multi_threaded_structured; | ||||||
| pub mod phcs; | pub mod phcs; | ||||||
| pub mod reference_impl; | pub mod reference_impl; | ||||||
|  | pub mod rgk; | ||||||
| pub mod single_thread; | pub mod single_thread; | ||||||
| pub mod smol; | pub mod smol; | ||||||
|   | |||||||
| @@ -105,7 +105,7 @@ impl Citymap { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     pub fn into_key_values(self) -> Vec<(String, City)> { |     pub fn into_key_values(self) -> Vec<(String, City)> { | ||||||
|         self.map.into_iter().map(|(_, s)| s).collect() |         self.map.into_values().collect() | ||||||
|     } |     } | ||||||
|     pub fn merge_with(&mut self, rhs: Self) { |     pub fn merge_with(&mut self, rhs: Self) { | ||||||
|         for (k, v) in rhs.map.into_iter() { |         for (k, v) in rhs.map.into_iter() { | ||||||
| @@ -125,7 +125,7 @@ pub fn run() { | |||||||
|     let start = Instant::now(); |     let start = Instant::now(); | ||||||
|     let input = "../../../measurements.txt"; |     let input = "../../../measurements.txt"; | ||||||
|  |  | ||||||
|     let results = if args.find(|e| e == "st").is_some() { |     let results = if args.any(|e| e == "st") { | ||||||
|         citymap_single_thread(input) |         citymap_single_thread(input) | ||||||
|     } else { |     } else { | ||||||
|         citymap_multi_threaded(input) |         citymap_multi_threaded(input) | ||||||
| @@ -159,7 +159,6 @@ fn citymap_multi_threaded(path: &str) -> Citymap { | |||||||
|         threads.push(citymap_thread(path.to_owned(), range, i, sender.clone())); |         threads.push(citymap_thread(path.to_owned(), range, i, sender.clone())); | ||||||
|     } |     } | ||||||
|     let mut ranges = (0..cpus) |     let mut ranges = (0..cpus) | ||||||
|         .into_iter() |  | ||||||
|         .map(|_| receiver.recv().unwrap()) |         .map(|_| receiver.recv().unwrap()) | ||||||
|         .collect::<Vec<_>>(); |         .collect::<Vec<_>>(); | ||||||
|     ranges.sort_unstable_by_key(|e| e.start); |     ranges.sort_unstable_by_key(|e| e.start); | ||||||
| @@ -171,7 +170,7 @@ fn citymap_multi_threaded(path: &str) -> Citymap { | |||||||
|         }), |         }), | ||||||
|         "Ranges overlap or have gaps: {ranges:?}" |         "Ranges overlap or have gaps: {ranges:?}" | ||||||
|     ); |     ); | ||||||
|     let results = threads |     threads | ||||||
|         .into_iter() |         .into_iter() | ||||||
|         .map(|e| e.join().unwrap()) |         .map(|e| e.join().unwrap()) | ||||||
|         //.map(|e|dbg!(e)) |         //.map(|e|dbg!(e)) | ||||||
| @@ -179,8 +178,7 @@ fn citymap_multi_threaded(path: &str) -> Citymap { | |||||||
|             left.merge_with(right); |             left.merge_with(right); | ||||||
|             left |             left | ||||||
|         }) |         }) | ||||||
|         .unwrap(); |         .unwrap() | ||||||
|     results |  | ||||||
| } | } | ||||||
|  |  | ||||||
| fn citymap_thread( | fn citymap_thread( | ||||||
| @@ -204,7 +202,7 @@ fn citymap_thread( | |||||||
|                     head.truncate(len); |                     head.truncate(len); | ||||||
|  |  | ||||||
|                     for (i, &pos) in head.iter().enumerate() { |                     for (i, &pos) in head.iter().enumerate() { | ||||||
|                         if pos == '\n' as u8 { |                         if pos == b'\n' { | ||||||
|                             range.start += i as u64; |                             range.start += i as u64; | ||||||
|                             break; |                             break; | ||||||
|                         } |                         } | ||||||
| @@ -218,7 +216,7 @@ fn citymap_thread( | |||||||
|                     head.truncate(len); |                     head.truncate(len); | ||||||
|  |  | ||||||
|                     for (i, &pos) in head.iter().enumerate() { |                     for (i, &pos) in head.iter().enumerate() { | ||||||
|                         if pos == '\n' as u8 { |                         if pos == b'\n' { | ||||||
|                             range.end += i as u64; |                             range.end += i as u64; | ||||||
|                             break; |                             break; | ||||||
|                         } |                         } | ||||||
| @@ -249,7 +247,7 @@ fn citymap_naive(input: &mut impl BufRead) -> Citymap { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Skip over just newline strings that get created by the alignment process |         // Skip over just newline strings that get created by the alignment process | ||||||
|         if buf == &[b'\n'] { |         if buf == b"\n" { | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,7 +1,8 @@ | |||||||
| use crate::models::station_measurements::StationMeasurements; | use crate::models::station_measurements::StationMeasurements; | ||||||
| use crate::utils::parse; | use crate::utils::{hash, parse}; | ||||||
| use memmap2::MmapOptions; | use memmap2::MmapOptions; | ||||||
| use rustc_hash::{FxBuildHasher, FxHashMap as HashMap}; | use rustc_hash::{FxBuildHasher, FxHashMap as HashMap}; | ||||||
|  | use std::slice::from_raw_parts; | ||||||
| use std::sync::mpsc; | use std::sync::mpsc; | ||||||
| use std::time::Instant; | use std::time::Instant; | ||||||
| use std::{fs::File, thread}; | use std::{fs::File, thread}; | ||||||
| @@ -13,9 +14,11 @@ pub fn run() { | |||||||
|     const FILE_PATH: &str = "../../../measurements.txt"; |     const FILE_PATH: &str = "../../../measurements.txt"; | ||||||
|     let file = File::open(FILE_PATH).expect("File measurements.txt not found"); |     let file = File::open(FILE_PATH).expect("File measurements.txt not found"); | ||||||
|     let mmap = unsafe { MmapOptions::new().map(&file).unwrap() }; |     let mmap = unsafe { MmapOptions::new().map(&file).unwrap() }; | ||||||
|  |     let mmap_ptr = mmap.as_ptr(); | ||||||
|     let file_length = mmap.len(); |     let file_length = mmap.len(); | ||||||
|     let hasher = FxBuildHasher::default(); |     let hasher = FxBuildHasher; | ||||||
|     let mut stations: HashMap<String, StationMeasurements> = |     // Even if I could now just use the byte slice as a key, doing the hash is still faster | ||||||
|  |     let mut stations: HashMap<u64, (&[u8], StationMeasurements)> = | ||||||
|         HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); |         HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); | ||||||
|     let (tx, rx) = mpsc::channel(); |     let (tx, rx) = mpsc::channel(); | ||||||
|     let cores = thread::available_parallelism().unwrap().into(); |     let cores = thread::available_parallelism().unwrap().into(); | ||||||
| @@ -39,19 +42,19 @@ pub fn run() { | |||||||
|         for i in 0..cores { |         for i in 0..cores { | ||||||
|             let tx = tx.clone(); |             let tx = tx.clone(); | ||||||
|             let (start, end) = *bounds.get(i).unwrap(); |             let (start, end) = *bounds.get(i).unwrap(); | ||||||
|             let mmap_slice = &mmap[start..end]; |             let mmap_slice = unsafe { from_raw_parts(mmap_ptr.add(start), end - start) }; | ||||||
|             s.spawn(move || { |             s.spawn(move || { | ||||||
|                 let mut t_stations: HashMap<String, StationMeasurements> = |                 let mut t_stations: HashMap<u64, (&[u8], StationMeasurements)> = | ||||||
|                     HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); |                     HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); | ||||||
|                 for line in mmap_slice.split(|&byte| { byte == b'\n' }) { |                 for line in mmap_slice.split(|&byte| byte == b'\n') { | ||||||
|                     if line.len() == 0 { |                     if line.is_empty() { | ||||||
|                         break; |                         break; | ||||||
|                     } |                     } | ||||||
|                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); |                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||||
|                     let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; |                     let hash = hash::bytes(station); | ||||||
|                     let temp = parse::temp(temp); |                     let temp = parse::temp(temp); | ||||||
|                     let measurements_option = t_stations.get_mut(&station); |                     let measurements_option = t_stations.get_mut(&hash); | ||||||
|                     if let Some(measurements) = measurements_option { |                     if let Some((_, measurements)) = measurements_option { | ||||||
|                         measurements.update(temp); |                         measurements.update(temp); | ||||||
|                     } else { |                     } else { | ||||||
|                         let measurements = StationMeasurements { |                         let measurements = StationMeasurements { | ||||||
| @@ -60,7 +63,7 @@ pub fn run() { | |||||||
|                             count: 1, |                             count: 1, | ||||||
|                             sum: temp, |                             sum: temp, | ||||||
|                         }; |                         }; | ||||||
|                         t_stations.insert(station, measurements); |                         t_stations.insert(hash, (station, measurements)); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 let _ = tx.send(t_stations); |                 let _ = tx.send(t_stations); | ||||||
| @@ -68,18 +71,19 @@ pub fn run() { | |||||||
|         } |         } | ||||||
|         drop(tx); |         drop(tx); | ||||||
|         while let Ok(t_stations) = rx.recv() { |         while let Ok(t_stations) = rx.recv() { | ||||||
|             for (station, measurements) in t_stations.iter() { |             for (hash, (station, measurements)) in t_stations.iter() { | ||||||
|                 let joined_measurements_options = stations.get_mut(station); |                 let joined_measurements_options = stations.get_mut(hash); | ||||||
|                 if let Some(joined_measurements) = joined_measurements_options { |                 if let Some((_, joined_measurements)) = joined_measurements_options { | ||||||
|                     joined_measurements.merge(measurements); |                     joined_measurements.merge(measurements); | ||||||
|                 } else { |                 } else { | ||||||
|                     stations.insert(station.to_owned(), *measurements); |                     stations.insert(*hash, (station, *measurements)); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         let mut stations: Vec<String> = stations |         let mut stations: Vec<String> = stations | ||||||
|             .iter() |             .iter() | ||||||
|             .map(|(station, measurements)| { |             .map(|(_, (station, measurements))| { | ||||||
|  |                 let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||||
|                 let measurements = measurements.to_string(); |                 let measurements = measurements.to_string(); | ||||||
|                 #[cfg(feature = "json")] |                 #[cfg(feature = "json")] | ||||||
|                 { |                 { | ||||||
|   | |||||||
| @@ -1,11 +1,10 @@ | |||||||
|  | use crate::models::station_measurements::StationMeasurements; | ||||||
|  | use crate::utils::{hash, parse}; | ||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
| use std::io::{BufRead, Seek, SeekFrom}; | use std::io::{BufRead, Seek, SeekFrom}; | ||||||
| use std::sync::mpsc; | use std::sync::mpsc; | ||||||
| use std::time::Instant; | use std::time::Instant; | ||||||
| use std::{fs::File, io::BufReader, thread}; | use std::{fs::File, io::BufReader, thread}; | ||||||
| use crate::models::station_measurements::StationMeasurements; |  | ||||||
| use crate::utils::parse; |  | ||||||
| use crate::utils::parse::hashstr; |  | ||||||
|  |  | ||||||
| const DEFAULT_HASHMAP_LENGTH: usize = 10000; | const DEFAULT_HASHMAP_LENGTH: usize = 10000; | ||||||
|  |  | ||||||
| @@ -13,7 +12,7 @@ pub fn run() { | |||||||
|     const FILE_PATH: &str = "../../../measurements.txt"; |     const FILE_PATH: &str = "../../../measurements.txt"; | ||||||
|     let now = Instant::now(); |     let now = Instant::now(); | ||||||
|     thread::scope(|s| { |     thread::scope(|s| { | ||||||
|         let mut stations: HashMap<usize, (String, StationMeasurements)> = |         let mut stations: HashMap<u64, (String, StationMeasurements)> = | ||||||
|             HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); |             HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||||
|         let (tx, rx) = mpsc::channel(); |         let (tx, rx) = mpsc::channel(); | ||||||
|         let cores = thread::available_parallelism().unwrap().into(); |         let cores = thread::available_parallelism().unwrap().into(); | ||||||
| @@ -22,30 +21,29 @@ pub fn run() { | |||||||
|         let file_length = reader.seek(SeekFrom::End(0)).unwrap(); |         let file_length = reader.seek(SeekFrom::End(0)).unwrap(); | ||||||
|         let chunk_length = file_length as usize / cores; |         let chunk_length = file_length as usize / cores; | ||||||
|         let mut bounds = Vec::with_capacity(cores + 1); |         let mut bounds = Vec::with_capacity(cores + 1); | ||||||
|         bounds.push(0); |         let mut start = 0; | ||||||
|         for i in 1..cores { |         for i in 0..cores { | ||||||
|             let mut reader = BufReader::new(&file); |             let mut reader = BufReader::new(&file); | ||||||
|             let mut byte_start = chunk_length * i; |             let mut end = chunk_length * i; | ||||||
|             reader |             reader | ||||||
|                 .seek(SeekFrom::Start(byte_start as u64)) |                 .seek(SeekFrom::Start(end as u64)) | ||||||
|                 .expect("could not seek"); |                 .expect("could not seek"); | ||||||
|             let mut line = Vec::with_capacity(108); |             let mut line = Vec::with_capacity(108); | ||||||
|             let line_len = reader |             let line_len = reader | ||||||
|                 .read_until(b'\n', &mut line) |                 .read_until(b'\n', &mut line) | ||||||
|                 .expect("could not read bytes"); |                 .expect("could not read bytes"); | ||||||
|             byte_start += line_len; |             end += line_len; | ||||||
|             bounds.push(byte_start as u64); |             bounds.push((start, end)); | ||||||
|  |             start = end + 1; | ||||||
|         } |         } | ||||||
|         bounds.push(file_length); |  | ||||||
|         for i in 0..cores { |         for i in 0..cores { | ||||||
|             let tx = tx.clone(); |             let tx = tx.clone(); | ||||||
|             let mut currposition = *bounds.get(i).unwrap(); |             let (mut currposition, end) = *bounds.get(i).unwrap(); | ||||||
|             let end = *bounds.get(i + 1).unwrap(); |  | ||||||
|             s.spawn(move || { |             s.spawn(move || { | ||||||
|                 let file = File::open(FILE_PATH).expect("File measurements.txt not found"); |                 let file = File::open(FILE_PATH).expect("File measurements.txt not found"); | ||||||
|                 let mut reader = BufReader::new(&file); |                 let mut reader = BufReader::new(&file); | ||||||
|                 reader.seek(SeekFrom::Start(currposition)).unwrap(); |                 reader.seek(SeekFrom::Start(currposition as u64)).unwrap(); | ||||||
|                 let mut t_stations: HashMap<usize, (String, StationMeasurements)> = |                 let mut t_stations: HashMap<u64, (String, StationMeasurements)> = | ||||||
|                     HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); |                     HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||||
|                 let mut line = Vec::with_capacity(108); |                 let mut line = Vec::with_capacity(108); | ||||||
|                 loop { |                 loop { | ||||||
| @@ -56,8 +54,8 @@ pub fn run() { | |||||||
|                         break; |                         break; | ||||||
|                     } |                     } | ||||||
|                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); |                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||||
|                     let hash = hashstr(station); |                     let hash = hash::bytes(station); | ||||||
|                     let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; |                     let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||||
|                     let temp = parse::temp(temp.split_last().unwrap().1); |                     let temp = parse::temp(temp.split_last().unwrap().1); | ||||||
|                     let measurements_option = t_stations.get_mut(&hash); |                     let measurements_option = t_stations.get_mut(&hash); | ||||||
|                     if let Some((_, measurements)) = measurements_option { |                     if let Some((_, measurements)) = measurements_option { | ||||||
| @@ -69,9 +67,9 @@ pub fn run() { | |||||||
|                             count: 1, |                             count: 1, | ||||||
|                             sum: temp, |                             sum: temp, | ||||||
|                         }; |                         }; | ||||||
|                         t_stations.insert(hash, (station, measurements)); |                         t_stations.insert(hash, (station.to_string(), measurements)); | ||||||
|                     } |                     } | ||||||
|                     currposition += line_len as u64; |                     currposition += line_len; | ||||||
|                     if currposition >= end { |                     if currposition >= end { | ||||||
|                         break; |                         break; | ||||||
|                     } |                     } | ||||||
|   | |||||||
| @@ -1,10 +1,10 @@ | |||||||
| use smol::fs::File; | use smol::fs::File; | ||||||
| use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom}; | use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom}; | ||||||
| use rustc_hash::{FxHashMap as HashMap, FxBuildHasher}; | use std::collections::HashMap; | ||||||
|  |  | ||||||
| use crate::models::station_measurements::StationMeasurements; | use crate::models::station_measurements::StationMeasurements; | ||||||
| use crate::utils::parse; | use crate::utils::{hash, parse}; | ||||||
| use std::sync::mpsc; | use easy_parallel::Parallel; | ||||||
| use std::thread; | use std::thread; | ||||||
| use std::time::Instant; | use std::time::Instant; | ||||||
|  |  | ||||||
| @@ -13,11 +13,8 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000; | |||||||
| pub fn run() { | pub fn run() { | ||||||
|     const FILE_PATH: &str = "../../../measurements.txt"; |     const FILE_PATH: &str = "../../../measurements.txt"; | ||||||
|     let now = Instant::now(); |     let now = Instant::now(); | ||||||
|     thread::scope(|s| { |     let mut stations: HashMap<u64, (String, StationMeasurements)> = | ||||||
|         let hasher = FxBuildHasher::default(); |         HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||||
|         let mut stations: HashMap<String, StationMeasurements> = |  | ||||||
|             HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); |  | ||||||
|         let (tx, rx) = mpsc::channel(); |  | ||||||
|     let cores = thread::available_parallelism().unwrap().into(); |     let cores = thread::available_parallelism().unwrap().into(); | ||||||
|     let bounds = smol::block_on(async { |     let bounds = smol::block_on(async { | ||||||
|         let mut file = File::open(FILE_PATH) |         let mut file = File::open(FILE_PATH) | ||||||
| @@ -46,19 +43,18 @@ pub fn run() { | |||||||
|         bounds.push(file_length); |         bounds.push(file_length); | ||||||
|         bounds |         bounds | ||||||
|     }); |     }); | ||||||
|         for i in 0..cores { |     let t_stations_vec = Parallel::new() | ||||||
|             let tx = tx.clone(); |         .each(0..cores, |i| { | ||||||
|             let mut currposition = *bounds.get(i).unwrap(); |             let mut currposition = *bounds.get(i).unwrap(); | ||||||
|             let end = *bounds.get(i + 1).unwrap(); |             let end = *bounds.get(i + 1).unwrap(); | ||||||
|             s.spawn(move || { |  | ||||||
|             smol::block_on(async { |             smol::block_on(async { | ||||||
|                 let mut file = File::open(FILE_PATH) |                 let mut file = File::open(FILE_PATH) | ||||||
|                     .await |                     .await | ||||||
|                     .expect("File measurements.txt not found"); |                     .expect("File measurements.txt not found"); | ||||||
|                 let mut reader = BufReader::new(&mut file); |                 let mut reader = BufReader::new(&mut file); | ||||||
|                 reader.seek(SeekFrom::Start(currposition)).await.unwrap(); |                 reader.seek(SeekFrom::Start(currposition)).await.unwrap(); | ||||||
|                     let mut t_stations: HashMap<String, StationMeasurements> = |                 let mut t_stations: HashMap<u64, (String, StationMeasurements)> = | ||||||
|                         HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); |                     HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||||
|                 let mut line = Vec::with_capacity(108); |                 let mut line = Vec::with_capacity(108); | ||||||
|                 loop { |                 loop { | ||||||
|                     let line_len = reader |                     let line_len = reader | ||||||
| @@ -69,10 +65,11 @@ pub fn run() { | |||||||
|                         break; |                         break; | ||||||
|                     } |                     } | ||||||
|                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); |                     let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||||
|                         let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; |                     let hash = hash::bytes(station); | ||||||
|  |                     let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||||
|                     let temp = parse::temp(temp.split_last().unwrap().1); |                     let temp = parse::temp(temp.split_last().unwrap().1); | ||||||
|                         let measurements_option = t_stations.get_mut(&station); |                     let measurements_option = t_stations.get_mut(&hash); | ||||||
|                         if let Some(measurements) = measurements_option { |                     if let Some((_, measurements)) = measurements_option { | ||||||
|                         measurements.update(temp); |                         measurements.update(temp); | ||||||
|                     } else { |                     } else { | ||||||
|                         let measurements = StationMeasurements { |                         let measurements = StationMeasurements { | ||||||
| @@ -81,7 +78,7 @@ pub fn run() { | |||||||
|                             count: 1, |                             count: 1, | ||||||
|                             sum: temp, |                             sum: temp, | ||||||
|                         }; |                         }; | ||||||
|                             t_stations.insert(station, measurements); |                         t_stations.insert(hash, (station.to_string(), measurements)); | ||||||
|                     } |                     } | ||||||
|                     currposition += line_len as u64; |                     currposition += line_len as u64; | ||||||
|                     if currposition >= end { |                     if currposition >= end { | ||||||
| @@ -89,24 +86,23 @@ pub fn run() { | |||||||
|                     } |                     } | ||||||
|                     line.clear(); |                     line.clear(); | ||||||
|                 } |                 } | ||||||
|                     let _ = tx.send(t_stations); |                 t_stations | ||||||
|             }) |             }) | ||||||
|             }); |         }) | ||||||
|         } |         .run(); | ||||||
|         drop(tx); |     for t_stations in t_stations_vec { | ||||||
|         while let Ok(t_stations) = rx.recv() { |         for (hash, (station, measurements)) in t_stations.iter() { | ||||||
|             for (station, measurements) in t_stations.iter() { |             let joined_measurements_options = stations.get_mut(hash); | ||||||
|                 let joined_measurements_options = stations.get_mut(station); |             if let Some((_, joined_measurements)) = joined_measurements_options { | ||||||
|                 if let Some(joined_measurements) = joined_measurements_options { |  | ||||||
|                 joined_measurements.merge(measurements); |                 joined_measurements.merge(measurements); | ||||||
|             } else { |             } else { | ||||||
|                     stations.insert(station.to_owned(), *measurements); |                 stations.insert(*hash, (station.to_owned(), *measurements)); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     let mut stations: Vec<String> = stations |     let mut stations: Vec<String> = stations | ||||||
|         .iter() |         .iter() | ||||||
|             .map(|(station, measurements)| { |         .map(|(_, (station, measurements))| { | ||||||
|             let measurements = measurements.to_string(); |             let measurements = measurements.to_string(); | ||||||
|             #[cfg(feature = "json")] |             #[cfg(feature = "json")] | ||||||
|             { |             { | ||||||
| @@ -129,5 +125,4 @@ pub fn run() { | |||||||
|         println!("\n\n{{{stations}}}"); |         println!("\n\n{{{stations}}}"); | ||||||
|     } |     } | ||||||
|     println!("\n\nTime={} ms", now.elapsed().as_millis()); |     println!("\n\nTime={} ms", now.elapsed().as_millis()); | ||||||
|     }); |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,11 +1,11 @@ | |||||||
|  | use crate::models::station_measurements::StationMeasurements; | ||||||
|  | use crate::utils::parse; | ||||||
|  | use memmap2::MmapOptions; | ||||||
|  | use rustc_hash::{FxBuildHasher, FxHashMap}; | ||||||
|  | use std::ffi::CStr; | ||||||
| use std::sync::mpsc; | use std::sync::mpsc; | ||||||
| use std::time::Instant; | use std::time::Instant; | ||||||
| use std::{fs::File, thread}; | use std::{fs::File, thread}; | ||||||
| use std::ffi::CStr; |  | ||||||
| use memmap2::MmapOptions; |  | ||||||
| use rustc_hash::{FxBuildHasher, FxHashMap}; |  | ||||||
| use crate::models::station_measurements::StationMeasurements; |  | ||||||
| use crate::utils::parse; |  | ||||||
|  |  | ||||||
| const DEFAULT_HASHMAP_LENGTH: usize = 10000; | const DEFAULT_HASHMAP_LENGTH: usize = 10000; | ||||||
|  |  | ||||||
| @@ -16,7 +16,7 @@ pub fn run() { | |||||||
|     let file = File::open(FILE_PATH).expect("File structured_measurements.txt not found"); |     let file = File::open(FILE_PATH).expect("File structured_measurements.txt not found"); | ||||||
|     let mmap = unsafe { MmapOptions::new().map(&file).unwrap() }; |     let mmap = unsafe { MmapOptions::new().map(&file).unwrap() }; | ||||||
|     let file_length = mmap.len(); |     let file_length = mmap.len(); | ||||||
|     let hasher = FxBuildHasher::default(); |     let hasher = FxBuildHasher; | ||||||
|     let mut stations: FxHashMap<String, StationMeasurements> = |     let mut stations: FxHashMap<String, StationMeasurements> = | ||||||
|         FxHashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); |         FxHashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); | ||||||
|     let (tx, rx) = mpsc::channel(); |     let (tx, rx) = mpsc::channel(); | ||||||
| @@ -53,11 +53,16 @@ pub fn run() { | |||||||
|                     let (station, temp) = unsafe { line.split_at_unchecked(100) }; |                     let (station, temp) = unsafe { line.split_at_unchecked(100) }; | ||||||
|                     let station = { |                     let station = { | ||||||
|                         if station[station.len() - 1] == 0u8 { |                         if station[station.len() - 1] == 0u8 { | ||||||
|                             unsafe { std::str::from_utf8_unchecked(CStr::from_bytes_until_nul(station).unwrap().to_bytes()) } |                             unsafe { | ||||||
|  |                                 std::str::from_utf8_unchecked( | ||||||
|  |                                     CStr::from_bytes_until_nul(station).unwrap().to_bytes(), | ||||||
|  |                                 ) | ||||||
|  |                             } | ||||||
|                         } else { |                         } else { | ||||||
|                             unsafe { std::str::from_utf8_unchecked(station) } |                             unsafe { std::str::from_utf8_unchecked(station) } | ||||||
|                         } |                         } | ||||||
|                     }.to_owned(); |                     } | ||||||
|  |                     .to_owned(); | ||||||
|                     let temp = parse::temp_new(&temp[1..6]); |                     let temp = parse::temp_new(&temp[1..6]); | ||||||
|                     let measurements_option = t_stations.get_mut(&station); |                     let measurements_option = t_stations.get_mut(&station); | ||||||
|                     if let Some(measurements) = measurements_option { |                     if let Some(measurements) = measurements_option { | ||||||
|   | |||||||
| @@ -115,7 +115,7 @@ fn merge_hashmaps<'a>( | |||||||
| } | } | ||||||
|  |  | ||||||
| /// Parses a chunk of the input as StationData values. | /// Parses a chunk of the input as StationData values. | ||||||
| fn process_chunk<'a>(current_chunk_slice: &'a [u8]) -> HashMap<&'a [u8], StationData> { | fn process_chunk(current_chunk_slice: &[u8]) -> HashMap<&[u8], StationData> { | ||||||
|     let mut station_map: HashMap<&[u8], StationData> = HashMap::with_capacity(MAX_STATIONS); |     let mut station_map: HashMap<&[u8], StationData> = HashMap::with_capacity(MAX_STATIONS); | ||||||
|     let mut start = 0; |     let mut start = 0; | ||||||
|     while let Some(end) = current_chunk_slice[start..] |     while let Some(end) = current_chunk_slice[start..] | ||||||
| @@ -187,7 +187,7 @@ fn write_output_to_stdout(station_map: HashMap<&[u8], StationData>) -> io::Resul | |||||||
| pub fn run() -> io::Result<()> { | pub fn run() -> io::Result<()> { | ||||||
|     // won't accept non-utf-8 args |     // won't accept non-utf-8 args | ||||||
|     let args: Vec<String> = env::args().collect(); |     let args: Vec<String> = env::args().collect(); | ||||||
|     let file_name = match args.get(2).clone() { |     let file_name = match args.get(2) { | ||||||
|         Some(fname) => fname, |         Some(fname) => fname, | ||||||
|         None => "../../../measurements.txt", |         None => "../../../measurements.txt", | ||||||
|     }; |     }; | ||||||
|   | |||||||
| @@ -50,7 +50,7 @@ impl State { | |||||||
| fn make_map<'a>(i: impl Iterator<Item = &'a [u8]>) -> HashMap<&'a BStr, State> { | fn make_map<'a>(i: impl Iterator<Item = &'a [u8]>) -> HashMap<&'a BStr, State> { | ||||||
|     let mut state: HashMap<&'a BStr, State> = Default::default(); |     let mut state: HashMap<&'a BStr, State> = Default::default(); | ||||||
|     for line in i { |     for line in i { | ||||||
|         let (name, value) = line.split_once_str(&[b';']).unwrap(); |         let (name, value) = line.split_once_str(b";").unwrap(); | ||||||
|         let value = fast_float::parse(value).unwrap(); |         let value = fast_float::parse(value).unwrap(); | ||||||
|         state.entry(name.into()).or_default().update(value); |         state.entry(name.into()).or_default().update(value); | ||||||
|     } |     } | ||||||
| @@ -58,7 +58,7 @@ fn make_map<'a>(i: impl Iterator<Item = &'a [u8]>) -> HashMap<&'a BStr, State> { | |||||||
| } | } | ||||||
|  |  | ||||||
| fn solve_for_part((start, end): (usize, usize), mem: &[u8]) -> HashMap<&BStr, State> { | fn solve_for_part((start, end): (usize, usize), mem: &[u8]) -> HashMap<&BStr, State> { | ||||||
|     make_map((&mem[start..end]).lines()) |     make_map((mem[start..end]).lines()) | ||||||
| } | } | ||||||
|  |  | ||||||
| fn merge<'a>(a: &mut HashMap<&'a BStr, State>, b: &HashMap<&'a BStr, State>) { | fn merge<'a>(a: &mut HashMap<&'a BStr, State>, b: &HashMap<&'a BStr, State>) { | ||||||
| @@ -70,7 +70,7 @@ fn merge<'a>(a: &mut HashMap<&'a BStr, State>, b: &HashMap<&'a BStr, State>) { | |||||||
| pub fn run() { | pub fn run() { | ||||||
|     let now = Instant::now(); |     let now = Instant::now(); | ||||||
|     let cores: usize = std::thread::available_parallelism().unwrap().into(); |     let cores: usize = std::thread::available_parallelism().unwrap().into(); | ||||||
|     let path = match std::env::args().skip(1).next() { |     let path = match std::env::args().nth(1) { | ||||||
|         Some(path) => path, |         Some(path) => path, | ||||||
|         None => "../../../measurements.txt".to_owned(), |         None => "../../../measurements.txt".to_owned(), | ||||||
|     }; |     }; | ||||||
| @@ -104,7 +104,7 @@ pub fn run() { | |||||||
|     }); |     }); | ||||||
|  |  | ||||||
|     let mut all: Vec<_> = state.into_iter().collect(); |     let mut all: Vec<_> = state.into_iter().collect(); | ||||||
|     all.sort_unstable_by(|a, b| a.0.cmp(&b.0)); |     all.sort_unstable_by(|a, b| a.0.cmp(b.0)); | ||||||
|     #[cfg(feature = "json")] |     #[cfg(feature = "json")] | ||||||
|     { |     { | ||||||
|         print!("["); |         print!("["); | ||||||
|   | |||||||
							
								
								
									
										443
									
								
								src/main/rust/src/implementations/rgk.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										443
									
								
								src/main/rust/src/implementations/rgk.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,443 @@ | |||||||
|  | use ptr_hash::PtrHashParams; | ||||||
|  | use rustc_hash::FxHashSet; | ||||||
|  | use std::{ | ||||||
|  |     simd::{cmp::SimdPartialEq, Simd}, | ||||||
|  |     vec::Vec, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | type V = i32; | ||||||
|  |  | ||||||
|  | type PtrHash = ptr_hash::DefaultPtrHash<ptr_hash::hash::FxHash, u64>; | ||||||
|  |  | ||||||
|  | pub struct Phf { | ||||||
|  |     pub ptr_hash: PtrHash, | ||||||
|  |     pub keys: Vec<Vec<u8>>, | ||||||
|  |     pub slots: Vec<Record>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Phf { | ||||||
|  |     fn new(mut keys: Vec<Vec<u8>>) -> Self { | ||||||
|  |         keys.sort(); | ||||||
|  |  | ||||||
|  |         let num_slots = keys.len() * 5 / 2; | ||||||
|  |         let params = ptr_hash::PtrHashParams { | ||||||
|  |             alpha: 0.9, | ||||||
|  |             c: 1.5, | ||||||
|  |             slots_per_part: num_slots, | ||||||
|  |             ..PtrHashParams::default() | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let mut hashes: Vec<u64> = keys.iter().map(|key| hash_name(key)).collect(); | ||||||
|  |         hashes.sort(); | ||||||
|  |         for (x, y) in hashes.iter().zip(hashes.iter().skip(1)) { | ||||||
|  |             assert!(*x != *y, "DUPLICATE HASH"); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let ptr_hash = PtrHash::new(&hashes, params); | ||||||
|  |  | ||||||
|  |         let slots = vec![Record::default(); num_slots]; | ||||||
|  |  | ||||||
|  |         Self { | ||||||
|  |             ptr_hash, | ||||||
|  |             keys, | ||||||
|  |             slots, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     fn compute_index(&self, hash: u64) -> usize { | ||||||
|  |         self.ptr_hash.index_single_part(&hash) | ||||||
|  |     } | ||||||
|  |     fn get_index_mut(&mut self, idx: usize) -> &mut Record { | ||||||
|  |         &mut self.slots[idx] | ||||||
|  |     } | ||||||
|  |     fn index_hash_mut(&mut self, hash: u64) -> &mut Record { | ||||||
|  |         &mut self.slots[self.ptr_hash.index_single_part(&hash)] | ||||||
|  |     } | ||||||
|  |     pub fn index<'b>(&'b self, key: &[u8]) -> &'b Record { | ||||||
|  |         let hash = hash_name(key); | ||||||
|  |         &self.slots[self.compute_index(hash)] | ||||||
|  |     } | ||||||
|  |     fn index_mut<'b>(&'b mut self, key: &[u8]) -> &'b mut Record { | ||||||
|  |         self.index_hash_mut(hash_name(key)) | ||||||
|  |     } | ||||||
|  |     fn merge(&mut self, r: Self) { | ||||||
|  |         // TODO: If key sets are equal or one is a subset of the other, merge | ||||||
|  |         // smaller into larger. | ||||||
|  |         let mut new_keys = vec![]; | ||||||
|  |         let mut i1 = 0; | ||||||
|  |         let mut i2 = 0; | ||||||
|  |         while i1 < self.keys.len() && i2 < r.keys.len() { | ||||||
|  |             if self.keys[i1] == r.keys[i2] { | ||||||
|  |                 new_keys.push(self.keys[i1].clone()); | ||||||
|  |                 i1 += 1; | ||||||
|  |                 i2 += 1; | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |             if self.keys[i1] < r.keys[i2] { | ||||||
|  |                 new_keys.push(self.keys[i1].clone()); | ||||||
|  |                 i1 += 1; | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |             if self.keys[i1] > r.keys[i2] { | ||||||
|  |                 new_keys.push(r.keys[i2].clone()); | ||||||
|  |                 i2 += 1; | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |             panic!(); | ||||||
|  |         } | ||||||
|  |         while i1 < self.keys.len() { | ||||||
|  |             new_keys.push(self.keys[i1].clone()); | ||||||
|  |             i1 += 1; | ||||||
|  |         } | ||||||
|  |         while i2 < r.keys.len() { | ||||||
|  |             new_keys.push(r.keys[i2].clone()); | ||||||
|  |             i2 += 1; | ||||||
|  |         } | ||||||
|  |         let mut new_phf = Self::new(new_keys); | ||||||
|  |         for key in &self.keys { | ||||||
|  |             new_phf.index_mut(key).merge(self.index(key)); | ||||||
|  |         } | ||||||
|  |         for key in &r.keys { | ||||||
|  |             new_phf.index_mut(key).merge(r.index(key)); | ||||||
|  |         } | ||||||
|  |         *self = new_phf; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Debug)] | ||||||
|  | #[repr(align(32))] | ||||||
|  | pub struct Record { | ||||||
|  |     pub count: u64, | ||||||
|  |     // Storing these as two u32 is nice, because they are read as a single u64. | ||||||
|  |     /// Byte representation of string ~b"bc.d" or ~b"\0c.d". | ||||||
|  |     pub min: u32, | ||||||
|  |     /// Byte representation of string b"bc.d" or b"\0c.d". | ||||||
|  |     pub max: u32, | ||||||
|  |     pub sum: u64, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Record { | ||||||
|  |     fn default() -> Self { | ||||||
|  |         Self { | ||||||
|  |             count: 0, | ||||||
|  |             min: 0, | ||||||
|  |             max: 0, | ||||||
|  |             sum: 0, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     fn add(&mut self, raw_value: u32, value: u64) { | ||||||
|  |         // assert2::debug_assert!(value < 1000); | ||||||
|  |         self.count += 1; | ||||||
|  |         self.sum += value; | ||||||
|  |         // See https://en.algorithmica.org/hpc/algorithms/argmin/ | ||||||
|  |         if raw_value < self.min { | ||||||
|  |             self.min = raw_value; | ||||||
|  |         } | ||||||
|  |         if raw_value > self.max { | ||||||
|  |             self.max = raw_value; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     fn merge(&mut self, other: &Self) { | ||||||
|  |         self.count += other.count; | ||||||
|  |         self.sum += other.sum_to_val() as u64; | ||||||
|  |         self.min = self.min.min(other.min); | ||||||
|  |         self.max = self.max.max(other.max); | ||||||
|  |     } | ||||||
|  |     fn sum_to_val(&self) -> V { | ||||||
|  |         let m = (1 << 21) - 1; | ||||||
|  |         ((self.sum & m) + 10 * ((self.sum >> 21) & m) + 100 * ((self.sum >> 42) & m)) as _ | ||||||
|  |     } | ||||||
|  |     /// Return (min, avg, max) | ||||||
|  |     pub fn merge_pos_neg(pos: &Record, neg: &Record) -> (V, V, V) { | ||||||
|  |         let pos_sum = pos.sum as V; | ||||||
|  |         let neg_sum = neg.sum as V; | ||||||
|  |         let sum = pos_sum - neg_sum; | ||||||
|  |         let count = (pos.count + neg.count) as V; | ||||||
|  |         // round to nearest | ||||||
|  |         let avg = (sum + count / 2).div_floor(count); | ||||||
|  |  | ||||||
|  |         let pos_max = raw_to_value(pos.max); | ||||||
|  |         let neg_max = -raw_to_value(neg.min); | ||||||
|  |         let max = pos_max.max(neg_max); | ||||||
|  |  | ||||||
|  |         let pos_min = raw_to_value(pos.min); | ||||||
|  |         let neg_min = -raw_to_value(neg.max); | ||||||
|  |         let min = pos_min.min(neg_min); | ||||||
|  |  | ||||||
|  |         (min, avg, max) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Reads raw bytes and masks the ; and the b'0'=0x30. | ||||||
|  | /// Returns something of the form 0x0b0c..0d or 0x000c..0d | ||||||
|  | fn parse_to_raw(data: &[u8], start: usize, end: usize) -> u32 { | ||||||
|  |     let raw = u32::from_be_bytes(unsafe { *data.get_unchecked(start..).as_ptr().cast() }); | ||||||
|  |     raw >> (8 * (4 - (end - start))) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn raw_to_pdep(raw: u32) -> u64 { | ||||||
|  |     #[cfg(feature = "no_pdep")] | ||||||
|  |     { | ||||||
|  |         let raw = raw as u64; | ||||||
|  |         (raw & 15) | ((raw & (15 << 16)) << (21 - 16)) | ((raw & (15 << 24)) << (42 - 24)) | ||||||
|  |     } | ||||||
|  |     #[cfg(not(feature = "no_pdep"))] | ||||||
|  |     { | ||||||
|  |         let mask = 0x0f0f000f; | ||||||
|  |         let raw = raw & mask; | ||||||
|  |         // input                                     0011bbbb0011cccc........0011dddd | ||||||
|  |         //         0b                  bbbb             xxxxcccc     yyyyyyyyyyyydddd // Deposit here | ||||||
|  |         //         0b                  1111                 1111                 1111 // Mask out trash using & | ||||||
|  |         let pdep = 0b0000000000000000001111000000000000011111111000001111111111111111u64; | ||||||
|  |         unsafe { core::arch::x86_64::_pdep_u64(raw as u64, pdep) } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn raw_to_value(v: u32) -> V { | ||||||
|  |     let mask = 0x0f0f000f; | ||||||
|  |     let bytes = (v & mask).to_be_bytes(); | ||||||
|  |     // s = bc.d | ||||||
|  |     let b = bytes[0] as V; | ||||||
|  |     let c = bytes[1] as V; | ||||||
|  |     let d = bytes[3] as V; | ||||||
|  |     b as V * 100 * (bytes[0] != 0) as V + c as V * 10 + d as V | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub fn format(v: V) -> String { | ||||||
|  |     format!("{:.1}", v as f64 / 10.0) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[allow(unused)] | ||||||
|  | fn hash_name(name: &[u8]) -> u64 { | ||||||
|  |     // Hash the first and last 8 bytes. | ||||||
|  |     // TODO: More robust hash that actually uses all characters. | ||||||
|  |     let head: [u8; 8] = unsafe { *name.get_unchecked(..8).split_first_chunk().unwrap().0 }; | ||||||
|  |     let tail: [u8; 8] = unsafe { | ||||||
|  |         *name | ||||||
|  |             .get_unchecked(name.len().wrapping_sub(8)..) | ||||||
|  |             .split_first_chunk() | ||||||
|  |             .unwrap() | ||||||
|  |             .0 | ||||||
|  |     }; | ||||||
|  |     let shift = 64usize.saturating_sub(8 * name.len()); | ||||||
|  |     let khead = u64::from_ne_bytes(head) << shift; | ||||||
|  |     let ktail = u64::from_ne_bytes(tail) >> shift; | ||||||
|  |     khead.wrapping_add(ktail) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Number of SIMD lanes. AVX2 has 256 bits, so 32 lanes. | ||||||
|  | const L: usize = 32; | ||||||
|  | /// The Simd type. | ||||||
|  | pub type S = Simd<u8, L>; | ||||||
|  |  | ||||||
|  | #[derive(Copy, Clone)] | ||||||
|  | struct State { | ||||||
|  |     start: usize, | ||||||
|  |     sep: usize, | ||||||
|  |     end: usize, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Find the regions between \n and ; (names) and between ; and \n (values), | ||||||
|  | /// and calls `callback` for each line. | ||||||
|  | #[inline(always)] | ||||||
|  | fn iter_lines<'a>( | ||||||
|  |     mut data: &'a [u8], | ||||||
|  |     mut callback: impl FnMut(&'a [u8], State, State, State, State), | ||||||
|  | ) { | ||||||
|  |     // Make sure that the out-of-bounds reads we do are OK. | ||||||
|  |     data = &data[..data.len() - 32]; | ||||||
|  |  | ||||||
|  |     let sep = S::splat(b';'); | ||||||
|  |     let end = S::splat(b'\n'); | ||||||
|  |  | ||||||
|  |     let find = |last: usize, sep: S| { | ||||||
|  |         let simd = S::from_array(unsafe { *data.get_unchecked(last..).as_ptr().cast() }); | ||||||
|  |         let eq = sep.simd_eq(simd).to_bitmask() as u32; | ||||||
|  |         let offset = eq.trailing_zeros() as usize; | ||||||
|  |         last + offset | ||||||
|  |     }; | ||||||
|  |     // Modified to be able to search regions longer than 32. | ||||||
|  |     let find_long = |mut last: usize, sep: S| { | ||||||
|  |         let simd = S::from_array(unsafe { *data.get_unchecked(last..).as_ptr().cast() }); | ||||||
|  |         let mut eq = sep.simd_eq(simd).to_bitmask() as u32; | ||||||
|  |         if eq == 0 { | ||||||
|  |             while eq == 0 { | ||||||
|  |                 last += 32; | ||||||
|  |                 let simd = S::from_array(unsafe { *data.get_unchecked(last..).as_ptr().cast() }); | ||||||
|  |                 eq = sep.simd_eq(simd).to_bitmask() as u32; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         let offset = eq.trailing_zeros() as usize; | ||||||
|  |         last + offset | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let init_state = |idx: usize| { | ||||||
|  |         let first_end = find_long(idx, end); | ||||||
|  |         State { | ||||||
|  |             start: first_end + 1, | ||||||
|  |             sep: first_end + 1, | ||||||
|  |             end: 0, | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let mut state0 = init_state(0); | ||||||
|  |     let mut state1 = init_state(data.len() / 4); | ||||||
|  |     let mut state2 = init_state(2 * data.len() / 4); | ||||||
|  |     let mut state3 = init_state(3 * data.len() / 4); | ||||||
|  |  | ||||||
|  |     // Duplicate each line for each input state. | ||||||
|  |     macro_rules! step { | ||||||
|  |         [$($s:expr),*] => { | ||||||
|  |             $($s.sep = find_long($s.sep + 1, sep) ;)* | ||||||
|  |                 $($s.end = find($s.sep + 1, end) ;)* | ||||||
|  |                 callback(data, $($s, )*); | ||||||
|  |                 $($s.start = $s.end + 1;)* | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     while state3.start < data.len() { | ||||||
|  |         step!(state0, state1, state2, state3); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn run(data: &[u8], keys: &[Vec<u8>]) -> Phf { | ||||||
|  |     // Each thread has its own accumulator. | ||||||
|  |     let mut h = Phf::new(keys.to_vec()); | ||||||
|  |     iter_lines( | ||||||
|  |         data, | ||||||
|  |         |data, mut s0: State, mut s1: State, mut s2: State, mut s3: State| { | ||||||
|  |             unsafe { | ||||||
|  |                 // If value is negative, extend name by one character. | ||||||
|  |                 s0.sep += (data.get_unchecked(s0.sep + 1) == &b'-') as usize; | ||||||
|  |                 let name0 = data.get_unchecked(s0.start..s0.sep); | ||||||
|  |  | ||||||
|  |                 s1.sep += (data.get_unchecked(s1.sep + 1) == &b'-') as usize; | ||||||
|  |                 let name1 = data.get_unchecked(s1.start..s1.sep); | ||||||
|  |  | ||||||
|  |                 s2.sep += (data.get_unchecked(s2.sep + 1) == &b'-') as usize; | ||||||
|  |                 let name2 = data.get_unchecked(s2.start..s2.sep); | ||||||
|  |  | ||||||
|  |                 s3.sep += (data.get_unchecked(s3.sep + 1) == &b'-') as usize; | ||||||
|  |                 let name3 = data.get_unchecked(s3.start..s3.sep); | ||||||
|  |  | ||||||
|  |                 let raw0 = parse_to_raw(data, s0.sep + 1, s0.end); | ||||||
|  |                 let raw1 = parse_to_raw(data, s1.sep + 1, s1.end); | ||||||
|  |                 let raw2 = parse_to_raw(data, s2.sep + 1, s2.end); | ||||||
|  |                 let raw3 = parse_to_raw(data, s3.sep + 1, s3.end); | ||||||
|  |  | ||||||
|  |                 let h0 = hash_name(name0); | ||||||
|  |                 let h1 = hash_name(name1); | ||||||
|  |                 let h2 = hash_name(name2); | ||||||
|  |                 let h3 = hash_name(name3); | ||||||
|  |  | ||||||
|  |                 let idx0 = h.compute_index(h0); | ||||||
|  |                 let idx1 = h.compute_index(h1); | ||||||
|  |                 let idx2 = h.compute_index(h2); | ||||||
|  |                 let idx3 = h.compute_index(h3); | ||||||
|  |  | ||||||
|  |                 h.get_index_mut(idx0).add(raw0, raw_to_pdep(raw0)); | ||||||
|  |                 h.get_index_mut(idx1).add(raw1, raw_to_pdep(raw1)); | ||||||
|  |                 h.get_index_mut(idx2).add(raw2, raw_to_pdep(raw2)); | ||||||
|  |                 h.get_index_mut(idx3).add(raw3, raw_to_pdep(raw3)); | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |     ); | ||||||
|  |     h | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub fn run_parallel(data: &[u8], keys: &[Vec<u8>], num_threads: usize) -> Phf { | ||||||
|  |     if num_threads == 0 { | ||||||
|  |         return run(data, keys); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let phf = std::sync::Mutex::new(Phf::new(keys.to_vec())); | ||||||
|  |  | ||||||
|  |     // Spawn one thread per core. | ||||||
|  |     std::thread::scope(|s| { | ||||||
|  |         let chunks = data.chunks(data.len() / num_threads + 1); | ||||||
|  |         for chunk in chunks { | ||||||
|  |             s.spawn(|| { | ||||||
|  |                 // Each thread has its own accumulator. | ||||||
|  |                 let thread_phf = run(chunk, keys); | ||||||
|  |  | ||||||
|  |                 // Merge results. | ||||||
|  |                 phf.lock().unwrap().merge(thread_phf); | ||||||
|  |             }); | ||||||
|  |         } | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  |     phf.into_inner().unwrap() | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub fn to_str(name: &[u8]) -> &str { | ||||||
|  |     std::str::from_utf8(name).unwrap() | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Returns a list of city names found in data. | ||||||
|  | /// Each city is returned twice, once as `<city>` and once as `<city>;`, | ||||||
|  | /// with the latter being used to accumulate negative temperatures. | ||||||
|  | #[inline(never)] | ||||||
|  | pub fn find_city_names(data: &[u8]) -> Vec<Vec<u8>> { | ||||||
|  |     let mut cities = FxHashSet::default(); | ||||||
|  |  | ||||||
|  |     let mut callback = |data: &[u8], state: State| { | ||||||
|  |         let State { start, sep, .. } = state; | ||||||
|  |         let name = unsafe { data.get_unchecked(start..sep) }; | ||||||
|  |         cities.insert(name.to_vec()); | ||||||
|  |  | ||||||
|  |         // Do the same for the name with ; appended. | ||||||
|  |         let name = unsafe { data.get_unchecked(start..sep + 1) }; | ||||||
|  |         cities.insert(name.to_vec()); | ||||||
|  |     }; | ||||||
|  |     iter_lines(data, |d, s0, s1, s2, s3| { | ||||||
|  |         flatten_callback(d, s0, s1, s2, s3, &mut callback) | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  |     let mut cities: Vec<_> = cities.into_iter().collect(); | ||||||
|  |     cities.sort(); | ||||||
|  |     cities | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn flatten_callback<'a>( | ||||||
|  |     data: &'a [u8], | ||||||
|  |     s0: State, | ||||||
|  |     s1: State, | ||||||
|  |     s2: State, | ||||||
|  |     s3: State, | ||||||
|  |     callback: &mut impl FnMut(&'a [u8], State), | ||||||
|  | ) { | ||||||
|  |     callback(data, s0); | ||||||
|  |     callback(data, s1); | ||||||
|  |     callback(data, s2); | ||||||
|  |     callback(data, s3); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(clap::Parser)] | ||||||
|  | pub struct Args { | ||||||
|  |     pub input: Option<String>, | ||||||
|  |  | ||||||
|  |     #[clap(short = 'j', long)] | ||||||
|  |     pub threads: Option<usize>, | ||||||
|  |  | ||||||
|  |     #[clap(long)] | ||||||
|  |     pub print: bool, | ||||||
|  |  | ||||||
|  |     #[clap(long)] | ||||||
|  |     pub stats: bool, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[cfg(test)] | ||||||
|  | mod test { | ||||||
|  |     #[test] | ||||||
|  |     fn parse_raw() { | ||||||
|  |         use super::*; | ||||||
|  |         let d = b"12.3"; | ||||||
|  |         let raw = parse_to_raw(d, 0, 4); | ||||||
|  |         let v = raw_to_value(raw); | ||||||
|  |         assert_eq!(v, 123); | ||||||
|  |  | ||||||
|  |         let d = b"12.3"; | ||||||
|  |         let raw = parse_to_raw(d, 1, 4); | ||||||
|  |         let v = raw_to_value(raw); | ||||||
|  |         assert_eq!(v, 23); | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -1,16 +1,15 @@ | |||||||
|  | use crate::models::station_measurements::StationMeasurements; | ||||||
|  | use crate::utils::{hash, parse}; | ||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
| use std::fs::File; | use std::fs::File; | ||||||
| use std::io::{BufRead, BufReader}; | use std::io::{BufRead, BufReader}; | ||||||
| use std::time::Instant; | use std::time::Instant; | ||||||
| use crate::models::station_measurements::StationMeasurements; |  | ||||||
| use crate::utils::parse; |  | ||||||
| use crate::utils::parse::hashstr; |  | ||||||
|  |  | ||||||
| const DEFAULT_HASHMAP_LENGTH: usize = 10000; | const DEFAULT_HASHMAP_LENGTH: usize = 10000; | ||||||
|  |  | ||||||
| pub fn run() { | pub fn run() { | ||||||
|     let now = Instant::now(); |     let now = Instant::now(); | ||||||
|     let mut stations: HashMap<usize, (String, StationMeasurements)> = |     let mut stations: HashMap<u64, (String, StationMeasurements)> = | ||||||
|         HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); |         HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||||
|  |  | ||||||
|     let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); |     let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); | ||||||
| @@ -24,8 +23,8 @@ pub fn run() { | |||||||
|             break; |             break; | ||||||
|         } |         } | ||||||
|         let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); |         let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||||
|         let hash = hashstr(station); |         let hash = hash::bytes(station); | ||||||
|         let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; |         let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||||
|         let temp = parse::temp(temp.split_last().unwrap().1); |         let temp = parse::temp(temp.split_last().unwrap().1); | ||||||
|         let measurements_option = stations.get_mut(&hash); |         let measurements_option = stations.get_mut(&hash); | ||||||
|         if let Some((_, measurements)) = measurements_option { |         if let Some((_, measurements)) = measurements_option { | ||||||
| @@ -37,7 +36,7 @@ pub fn run() { | |||||||
|                 count: 1, |                 count: 1, | ||||||
|                 sum: temp, |                 sum: temp, | ||||||
|             }; |             }; | ||||||
|             stations.insert(hash, (station, measurements)); |             stations.insert(hash, (station.to_string(), measurements)); | ||||||
|         } |         } | ||||||
|         line.clear(); |         line.clear(); | ||||||
|     } |     } | ||||||
| @@ -51,5 +50,5 @@ pub fn run() { | |||||||
|     stations.sort(); |     stations.sort(); | ||||||
|     let stations = stations.join(","); |     let stations = stations.join(","); | ||||||
|     println!("{{{stations}}}"); |     println!("{{{stations}}}"); | ||||||
|     println!("Time={} μs", now.elapsed().as_micros()); |     println!("Time={} ms", now.elapsed().as_millis()); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -2,8 +2,7 @@ use smol::fs::File; | |||||||
| use smol::io::{AsyncBufReadExt, BufReader}; | use smol::io::{AsyncBufReadExt, BufReader}; | ||||||
|  |  | ||||||
| use crate::models::station_measurements::StationMeasurements; | use crate::models::station_measurements::StationMeasurements; | ||||||
| use crate::utils::parse; | use crate::utils::{hash, parse}; | ||||||
| use crate::utils::parse::hashstr; |  | ||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
| use std::time::Instant; | use std::time::Instant; | ||||||
|  |  | ||||||
| @@ -11,7 +10,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000; | |||||||
|  |  | ||||||
| pub fn run() { | pub fn run() { | ||||||
|     let now = Instant::now(); |     let now = Instant::now(); | ||||||
|     let mut stations: HashMap<usize, (String, StationMeasurements)> = |     let mut stations: HashMap<u64, (String, StationMeasurements)> = | ||||||
|         HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); |         HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||||
|  |  | ||||||
|     smol::block_on(async { |     smol::block_on(async { | ||||||
| @@ -29,8 +28,8 @@ pub fn run() { | |||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
|             let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); |             let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); | ||||||
|             let hash = hashstr(station); |             let hash = hash::bytes(station); | ||||||
|             let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; |             let station = unsafe { std::str::from_utf8_unchecked(station) }; | ||||||
|             let temp = parse::temp(temp.split_last().unwrap().1); |             let temp = parse::temp(temp.split_last().unwrap().1); | ||||||
|             let measurements_option = stations.get_mut(&hash); |             let measurements_option = stations.get_mut(&hash); | ||||||
|             if let Some((_, measurements)) = measurements_option { |             if let Some((_, measurements)) = measurements_option { | ||||||
| @@ -42,7 +41,7 @@ pub fn run() { | |||||||
|                     count: 1, |                     count: 1, | ||||||
|                     sum: temp, |                     sum: temp, | ||||||
|                 }; |                 }; | ||||||
|                 stations.insert(hash, (station, measurements)); |                 stations.insert(hash, (station.to_string(), measurements)); | ||||||
|             } |             } | ||||||
|             line.clear(); |             line.clear(); | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -2,7 +2,8 @@ | |||||||
| #![feature(portable_simd)] | #![feature(portable_simd)] | ||||||
| #![feature(slice_split_once)] | #![feature(slice_split_once)] | ||||||
| #![feature(hash_raw_entry)] | #![feature(hash_raw_entry)] | ||||||
|  | #![feature(int_roundings)] | ||||||
|  |  | ||||||
|  | pub mod implementations; | ||||||
| pub mod models; | pub mod models; | ||||||
| pub mod utils; | pub mod utils; | ||||||
| pub mod implementations; |  | ||||||
|   | |||||||
| @@ -1,4 +1,3 @@ | |||||||
|  |  | ||||||
| fn main() { | fn main() { | ||||||
|     // let now = Instant::now(); |     // let now = Instant::now(); | ||||||
|     // let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); |     // let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); | ||||||
|   | |||||||
| @@ -1,2 +1,2 @@ | |||||||
| pub mod station_measurements; |  | ||||||
| pub mod mmap; | pub mod mmap; | ||||||
|  | pub mod station_measurements; | ||||||
|   | |||||||
| @@ -12,7 +12,7 @@ pub struct Mmap<'a> { | |||||||
|  |  | ||||||
| /// To properly dispose of the mmap we have to manually call munmap. | /// To properly dispose of the mmap we have to manually call munmap. | ||||||
| /// So implementing drop for this smart-pointer type is necessary. | /// So implementing drop for this smart-pointer type is necessary. | ||||||
| impl<'a> Drop for Mmap<'a> { | impl Drop for Mmap<'_> { | ||||||
|     fn drop(&mut self) { |     fn drop(&mut self) { | ||||||
|         unsafe { |         unsafe { | ||||||
|             munmap( |             munmap( | ||||||
| @@ -25,7 +25,7 @@ impl<'a> Drop for Mmap<'a> { | |||||||
|  |  | ||||||
| // anti-pattern for non-smart pointer types. | // anti-pattern for non-smart pointer types. | ||||||
| // ref: https://rust-unofficial.github.io/patterns/anti_patterns/deref.html | // ref: https://rust-unofficial.github.io/patterns/anti_patterns/deref.html | ||||||
| impl<'a> Deref for Mmap<'a> { | impl Deref for Mmap<'_> { | ||||||
|     type Target = [u8]; |     type Target = [u8]; | ||||||
|  |  | ||||||
|     fn deref(&self) -> &Self::Target { |     fn deref(&self) -> &Self::Target { | ||||||
| @@ -50,7 +50,7 @@ impl<'a> Mmap<'a> { | |||||||
|             // We can advise the kernel on how we intend to use the mmap. |             // We can advise the kernel on how we intend to use the mmap. | ||||||
|             // But this did not improve my read performance in a meaningful way |             // But this did not improve my read performance in a meaningful way | ||||||
|             madvise(m, size, MADV_WILLNEED); |             madvise(m, size, MADV_WILLNEED); | ||||||
|             return Self::new(std::slice::from_raw_parts(m as *const u8, size)); |             Self::new(std::slice::from_raw_parts(m as *const u8, size)) | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,4 +1,5 @@ | |||||||
| pub mod byte_pos; | pub mod byte_pos; | ||||||
|  | pub mod hash; | ||||||
| pub mod parse; | pub mod parse; | ||||||
| pub mod write_structured_measurements; | pub mod write_structured_measurements; | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										40
									
								
								src/main/rust/src/utils/hash.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								src/main/rust/src/utils/hash.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | |||||||
|  | #[inline] | ||||||
|  | pub fn bytes(bytes: &[u8]) -> u64 { | ||||||
|  |     if cfg!(not(debug_assertions)) { | ||||||
|  |         // inspired by https://curiouscoding.nl/posts/1brc/ | ||||||
|  |         let head: [u8; 8] = unsafe { bytes.get_unchecked(..8).as_chunks::<8>().0[0] }; | ||||||
|  |         let tail: [u8; 8] = unsafe { bytes.get_unchecked(bytes.len() - 8..).as_chunks::<8>().0[0] }; | ||||||
|  |         let shift = 64usize.saturating_sub(8 * bytes.len()); | ||||||
|  |         let khead = u64::from_ne_bytes(head) << shift; | ||||||
|  |         let ktail = u64::from_ne_bytes(tail) >> shift; | ||||||
|  |         khead + ktail | ||||||
|  |     } else { | ||||||
|  |         // debug friendly but slow | ||||||
|  |         let mut head = [0u8; 8]; | ||||||
|  |         let mut tail = [0u8; 8]; | ||||||
|  |         let end = bytes.len().min(8); | ||||||
|  |         let start = bytes.len().saturating_sub(8); | ||||||
|  |         head[..end].copy_from_slice(&bytes[..end]); | ||||||
|  |         tail[..end].copy_from_slice(&bytes[start..]); | ||||||
|  |         let shift = 64usize.saturating_sub(8 * bytes.len()); | ||||||
|  |         let khead = u64::from_ne_bytes(head) << shift; | ||||||
|  |         let ktail = u64::from_ne_bytes(tail) >> shift; | ||||||
|  |         khead.wrapping_add(ktail) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[cfg(test)] | ||||||
|  | mod tests { | ||||||
|  |     use crate::utils::hash; | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn test_hashstr() { | ||||||
|  |         let hash_1 = hash::bytes(b"abcdefghijk"); | ||||||
|  |         let hash_2 = hash::bytes(b"kjihgfedcba"); | ||||||
|  |         let hash_3 = hash::bytes(b"abba"); | ||||||
|  |         let hash_4 = hash::bytes(b"baab"); | ||||||
|  |  | ||||||
|  |         assert_ne!(hash_1, hash_2); | ||||||
|  |         assert_ne!(hash_3, hash_4); | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -69,27 +69,9 @@ pub fn temp_simd(bytes: &[u8]) -> isize { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[inline] |  | ||||||
| pub fn hashstr(bytes: &[u8]) -> usize { |  | ||||||
|     let mut hash = 0; |  | ||||||
|     let (chunks, remainder) = bytes.as_chunks::<8>(); |  | ||||||
|     for &chunk in chunks { |  | ||||||
|         hash += usize::from_be_bytes(chunk); |  | ||||||
|     } |  | ||||||
|     let mut r = [0_u8; 8]; |  | ||||||
|     r[0] = remainder.len() as u8; |  | ||||||
|     let mut idx = 1; |  | ||||||
|     for &byte in remainder { |  | ||||||
|         r[idx] = byte; |  | ||||||
|         idx += 1; |  | ||||||
|     } |  | ||||||
|     hash += usize::from_be_bytes(r); |  | ||||||
|     hash |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| mod tests { | mod tests { | ||||||
|     use crate::utils::parse::{hashstr, temp_new}; |     use crate::utils::parse::temp_new; | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn test_temp_new_max() { |     fn test_temp_new_max() { | ||||||
| @@ -120,15 +102,4 @@ mod tests { | |||||||
|         let temp_neg_10 = temp_new("-9.9".as_bytes()); |         let temp_neg_10 = temp_new("-9.9".as_bytes()); | ||||||
|         assert_eq!(temp_neg_10, -99); |         assert_eq!(temp_neg_10, -99); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |  | ||||||
|     fn test_hashstr() { |  | ||||||
|         let hash_1 = hashstr(b"abcdefghijk"); |  | ||||||
|         let hash_2 = hashstr(b"kjihgfedcba"); |  | ||||||
|         let hash_3 = hashstr(b"abba"); |  | ||||||
|         let hash_4 = hashstr(b"baab"); |  | ||||||
|  |  | ||||||
|         assert_ne!(hash_1, hash_2); |  | ||||||
|         assert_ne!(hash_3, hash_4); |  | ||||||
|     } |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -3,7 +3,8 @@ use std::io::{BufRead, BufReader, BufWriter, Write}; | |||||||
|  |  | ||||||
| pub fn write_structured_measurements() { | pub fn write_structured_measurements() { | ||||||
|     let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); |     let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); | ||||||
|     let structured_file = File::create_new("structured_measurements.txt").expect("Could not create file"); |     let structured_file = | ||||||
|  |         File::create_new("structured_measurements.txt").expect("Could not create file"); | ||||||
|     let mut reader = BufReader::new(&file); |     let mut reader = BufReader::new(&file); | ||||||
|     let mut writer = BufWriter::new(&structured_file); |     let mut writer = BufWriter::new(&structured_file); | ||||||
|     let mut line = Vec::with_capacity(107); |     let mut line = Vec::with_capacity(107); | ||||||
| @@ -27,7 +28,9 @@ pub fn write_structured_measurements() { | |||||||
|         write_line[100] = b';'; |         write_line[100] = b';'; | ||||||
|         write_line[temp_val_start..temp_val_start + temp.len()].clone_from_slice(temp); |         write_line[temp_val_start..temp_val_start + temp.len()].clone_from_slice(temp); | ||||||
|         write_line[106] = b'\n'; |         write_line[106] = b'\n'; | ||||||
|         writer.write_all(write_line.as_slice()).expect("Could not write"); |         writer | ||||||
|  |             .write_all(write_line.as_slice()) | ||||||
|  |             .expect("Could not write"); | ||||||
|         line.clear(); |         line.clear(); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user