Compare commits
	
		
			14 Commits
		
	
	
		
			520df062d3
			...
			b4e3992c65
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| b4e3992c65 | |||
| 393f802741 | |||
| 327fe8564e | |||
| 14d608b209 | |||
| 2f82e8788d | |||
| 91adbf4c1c | |||
| 00096647ee | |||
| fe1053b74a | |||
| 3ffed9099c | |||
| 65df621cf6 | |||
| bd83b9bc2c | |||
| 15525282d6 | |||
| 6cc29fb645 | |||
| 6f548678f2 | 
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -9,7 +9,7 @@ release.properties
 | 
				
			|||||||
.project
 | 
					.project
 | 
				
			||||||
.classpath
 | 
					.classpath
 | 
				
			||||||
.settings/
 | 
					.settings/
 | 
				
			||||||
bin/
 | 
					./bin/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# IntelliJ
 | 
					# IntelliJ
 | 
				
			||||||
.idea
 | 
					.idea
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										194
									
								
								src/main/rust/Cargo.lock
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										194
									
								
								src/main/rust/Cargo.lock
									
									
									
										generated
									
									
									
										Normal file
									
								
							@@ -0,0 +1,194 @@
 | 
				
			|||||||
 | 
					# This file is automatically @generated by Cargo.
 | 
				
			||||||
 | 
					# It is not intended for manual editing.
 | 
				
			||||||
 | 
					version = 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "bstr"
 | 
				
			||||||
 | 
					version = "1.9.1"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "memchr",
 | 
				
			||||||
 | 
					 "regex-automata",
 | 
				
			||||||
 | 
					 "serde",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "crossbeam-deque"
 | 
				
			||||||
 | 
					version = "0.8.5"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "crossbeam-epoch",
 | 
				
			||||||
 | 
					 "crossbeam-utils",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "crossbeam-epoch"
 | 
				
			||||||
 | 
					version = "0.9.18"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "crossbeam-utils",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "crossbeam-utils"
 | 
				
			||||||
 | 
					version = "0.8.20"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "either"
 | 
				
			||||||
 | 
					version = "1.13.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "fast-float"
 | 
				
			||||||
 | 
					version = "0.2.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "libc"
 | 
				
			||||||
 | 
					version = "0.2.155"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "memchr"
 | 
				
			||||||
 | 
					version = "2.7.4"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "memmap"
 | 
				
			||||||
 | 
					version = "0.7.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "libc",
 | 
				
			||||||
 | 
					 "winapi",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "onebrc"
 | 
				
			||||||
 | 
					version = "0.1.0"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "bstr",
 | 
				
			||||||
 | 
					 "fast-float",
 | 
				
			||||||
 | 
					 "memchr",
 | 
				
			||||||
 | 
					 "memmap",
 | 
				
			||||||
 | 
					 "rayon",
 | 
				
			||||||
 | 
					 "rustc-hash",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "proc-macro2"
 | 
				
			||||||
 | 
					version = "1.0.86"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "unicode-ident",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "quote"
 | 
				
			||||||
 | 
					version = "1.0.36"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "proc-macro2",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "rayon"
 | 
				
			||||||
 | 
					version = "1.10.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "either",
 | 
				
			||||||
 | 
					 "rayon-core",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "rayon-core"
 | 
				
			||||||
 | 
					version = "1.12.1"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "crossbeam-deque",
 | 
				
			||||||
 | 
					 "crossbeam-utils",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "regex-automata"
 | 
				
			||||||
 | 
					version = "0.4.7"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "rustc-hash"
 | 
				
			||||||
 | 
					version = "2.0.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "serde"
 | 
				
			||||||
 | 
					version = "1.0.204"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "serde_derive",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "serde_derive"
 | 
				
			||||||
 | 
					version = "1.0.204"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "proc-macro2",
 | 
				
			||||||
 | 
					 "quote",
 | 
				
			||||||
 | 
					 "syn",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "syn"
 | 
				
			||||||
 | 
					version = "2.0.72"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "proc-macro2",
 | 
				
			||||||
 | 
					 "quote",
 | 
				
			||||||
 | 
					 "unicode-ident",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "unicode-ident"
 | 
				
			||||||
 | 
					version = "1.0.12"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "winapi"
 | 
				
			||||||
 | 
					version = "0.3.9"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "winapi-i686-pc-windows-gnu",
 | 
				
			||||||
 | 
					 "winapi-x86_64-pc-windows-gnu",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "winapi-i686-pc-windows-gnu"
 | 
				
			||||||
 | 
					version = "0.4.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "winapi-x86_64-pc-windows-gnu"
 | 
				
			||||||
 | 
					version = "0.4.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 | 
				
			||||||
							
								
								
									
										20
									
								
								src/main/rust/Cargo.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								src/main/rust/Cargo.toml
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,20 @@
 | 
				
			|||||||
 | 
					[package]
 | 
				
			||||||
 | 
					name = "onebrc"
 | 
				
			||||||
 | 
					version = "0.1.0"
 | 
				
			||||||
 | 
					edition = "2021"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[dependencies]
 | 
				
			||||||
 | 
					bstr = "1.9.1"
 | 
				
			||||||
 | 
					fast-float = "0.2.0"
 | 
				
			||||||
 | 
					memchr = "2.7.4"
 | 
				
			||||||
 | 
					memmap = "0.7.0"
 | 
				
			||||||
 | 
					#polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]}
 | 
				
			||||||
 | 
					rayon = "1.10.0"
 | 
				
			||||||
 | 
					rustc-hash = "2.0.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[profile.release]
 | 
				
			||||||
 | 
					lto = "fat"
 | 
				
			||||||
 | 
					strip = "symbols"
 | 
				
			||||||
 | 
					panic = "abort"
 | 
				
			||||||
							
								
								
									
										88
									
								
								src/main/rust/src/bin/multi_threaded.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								src/main/rust/src/bin/multi_threaded.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,88 @@
 | 
				
			|||||||
 | 
					use std::{
 | 
				
			||||||
 | 
					    fs::File,
 | 
				
			||||||
 | 
					    io::{BufRead, BufReader},
 | 
				
			||||||
 | 
					    thread,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					use std::collections::HashMap;
 | 
				
			||||||
 | 
					use std::sync::mpsc;
 | 
				
			||||||
 | 
					use std::time::Instant;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use onebrc::parse_temp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const DEFAULT_HASHMAP_LENGTH: usize = 10000;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn main() {
 | 
				
			||||||
 | 
					    //print!("\x1b[2J");
 | 
				
			||||||
 | 
					    print!("\x1b[s");
 | 
				
			||||||
 | 
					    thread::scope(|s| {
 | 
				
			||||||
 | 
					        let mut stations: HashMap<String, onebrc::StationMeasurements> = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
 | 
				
			||||||
 | 
					        let (tx, rx) = mpsc::channel();
 | 
				
			||||||
 | 
					        let now = Instant::now();
 | 
				
			||||||
 | 
					        let cores: usize = thread::available_parallelism().unwrap().into();
 | 
				
			||||||
 | 
					        let chunk_length = 1_000_000_000 / cores;
 | 
				
			||||||
 | 
					        for i in 0..cores {
 | 
				
			||||||
 | 
					            let print_line = i + 1;
 | 
				
			||||||
 | 
					            print!("\x1b[u\x1b[{print_line}B\x1b[0CThread #{i:0>2}: 0%");
 | 
				
			||||||
 | 
					            let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
 | 
				
			||||||
 | 
					            let reader = BufReader::new(file);
 | 
				
			||||||
 | 
					            let line_chunk = reader.lines().skip(chunk_length * i).take(chunk_length);
 | 
				
			||||||
 | 
					            let tx = tx.clone();
 | 
				
			||||||
 | 
					            s.spawn(move || {
 | 
				
			||||||
 | 
					                let mut t_stations: HashMap<String, onebrc::StationMeasurements> =
 | 
				
			||||||
 | 
					                    HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let now_read_line = Instant::now();
 | 
				
			||||||
 | 
					                let mut line_num = 0;
 | 
				
			||||||
 | 
					                line_chunk.for_each(|line| {
 | 
				
			||||||
 | 
					                    if line_num == 0 {
 | 
				
			||||||
 | 
					                        print!("\x1b[u\x1b[{print_line}B\x1b[30CStart read line {}ms", now_read_line.elapsed().as_millis());
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    if line_num % 10000 == 0 {
 | 
				
			||||||
 | 
					                        //let formatted = format_nums(line_num);
 | 
				
			||||||
 | 
					                        //print!("\x1b[u\x1b[{print_line}B\x1b[0CThread #{i:0>2}: {formatted}");
 | 
				
			||||||
 | 
					                        let percent = (line_num as f64 / chunk_length as f64) * 100.0;
 | 
				
			||||||
 | 
					                        print!("\x1b[u\x1b[{print_line}B\x1b[0CThread #{i:0>2}: {percent:.2}%");
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    line_num += 1;
 | 
				
			||||||
 | 
					                    let line = line.expect("could not read line");
 | 
				
			||||||
 | 
					                    let (station, temp) = line.split_once(';').expect("Error while splitting");
 | 
				
			||||||
 | 
					                    let temp = parse_temp(temp.as_bytes());
 | 
				
			||||||
 | 
					                    let measurements_option = t_stations.get_mut(station);
 | 
				
			||||||
 | 
					                    if let Some(measurements) = measurements_option {
 | 
				
			||||||
 | 
					                        measurements.update(temp);
 | 
				
			||||||
 | 
					                    } else {
 | 
				
			||||||
 | 
					                        let measurements = onebrc::StationMeasurements {
 | 
				
			||||||
 | 
					                            min: temp,
 | 
				
			||||||
 | 
					                            max: temp,
 | 
				
			||||||
 | 
					                            count: 1,
 | 
				
			||||||
 | 
					                            sum: temp,
 | 
				
			||||||
 | 
					                        };
 | 
				
			||||||
 | 
					                        t_stations.insert(station.to_owned(), measurements);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                });
 | 
				
			||||||
 | 
					                print!("\x1b[u\x1b[{print_line}B\x1b[60CTime reading lines in thread {i}={} ms", now_read_line.elapsed().as_millis());
 | 
				
			||||||
 | 
					                let _ = tx.send(t_stations);
 | 
				
			||||||
 | 
					            });
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        print!("\x1b[{cores}B");
 | 
				
			||||||
 | 
					        drop(tx);
 | 
				
			||||||
 | 
					        while let Ok(t_stations) = rx.recv() {
 | 
				
			||||||
 | 
					            for (station, measurements) in t_stations.iter() {
 | 
				
			||||||
 | 
					                let joined_measurements_options = stations.get_mut(station.as_str());
 | 
				
			||||||
 | 
					                if let Some(joined_measurements) = joined_measurements_options {
 | 
				
			||||||
 | 
					                    joined_measurements.merge(measurements);
 | 
				
			||||||
 | 
					                } else {
 | 
				
			||||||
 | 
					                    stations.insert(station.to_owned(), *measurements);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        let mut stations: Vec<String> = stations.iter().map(|(station, measurements)| {
 | 
				
			||||||
 | 
					            let measurements = measurements.to_string();
 | 
				
			||||||
 | 
					            format!("{station}={measurements}")
 | 
				
			||||||
 | 
					        }).collect();
 | 
				
			||||||
 | 
					        stations.sort();
 | 
				
			||||||
 | 
					        let _stations = stations.join(",");
 | 
				
			||||||
 | 
					        // println!("{{{stations}}}");
 | 
				
			||||||
 | 
					        println!("\n\nTime={} ms", now.elapsed().as_millis());
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										35
									
								
								src/main/rust/src/bin/polars.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								src/main/rust/src/bin/polars.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,35 @@
 | 
				
			|||||||
 | 
					use polars::prelude::*;
 | 
				
			||||||
 | 
					use std::time::Instant;
 | 
				
			||||||
 | 
					use std::vec;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn run_polars() -> Result<DataFrame, PolarsError> {
 | 
				
			||||||
 | 
					    let now = Instant::now();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let f1: Field = Field::new("station", DataType::String);
 | 
				
			||||||
 | 
					    let f2: Field = Field::new("measure", DataType::Float64);
 | 
				
			||||||
 | 
					    let sc: Schema = Schema::from_iter(vec![f1, f2]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let q = LazyCsvReader::new("../../../measurements.txt")
 | 
				
			||||||
 | 
					        .has_header(false)
 | 
				
			||||||
 | 
					        .with_schema(Some(Arc::new(sc)))
 | 
				
			||||||
 | 
					        .with_separator(b';')
 | 
				
			||||||
 | 
					        .finish()?
 | 
				
			||||||
 | 
					        .group_by(vec![col("station")])
 | 
				
			||||||
 | 
					        .agg(vec![
 | 
				
			||||||
 | 
					            col("measure").alias("min").min(),
 | 
				
			||||||
 | 
					            col("measure").alias("mean").mean(),
 | 
				
			||||||
 | 
					            col("measure").alias("max").max(),
 | 
				
			||||||
 | 
					        ])
 | 
				
			||||||
 | 
					        .sort("station", Default::default())
 | 
				
			||||||
 | 
					        .with_streaming(true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let df = q.collect()?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    println!("Time={} μs", now.elapsed().as_micros());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Ok(df)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn main() {
 | 
				
			||||||
 | 
					    run_polars();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										116
									
								
								src/main/rust/src/bin/referenceImpl.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								src/main/rust/src/bin/referenceImpl.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,116 @@
 | 
				
			|||||||
 | 
					use bstr::{BStr, ByteSlice};
 | 
				
			||||||
 | 
					use memmap::MmapOptions;
 | 
				
			||||||
 | 
					use rustc_hash::FxHashMap as HashMap;
 | 
				
			||||||
 | 
					use std::{fmt::Display, fs::File};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use rayon::prelude::*;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug)]
 | 
				
			||||||
 | 
					struct State {
 | 
				
			||||||
 | 
					    min: f64,
 | 
				
			||||||
 | 
					    max: f64,
 | 
				
			||||||
 | 
					    count: u64,
 | 
				
			||||||
 | 
					    sum: f64,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl Default for State {
 | 
				
			||||||
 | 
					    fn default() -> Self {
 | 
				
			||||||
 | 
					        Self {
 | 
				
			||||||
 | 
					            min: f64::MAX,
 | 
				
			||||||
 | 
					            max: f64::MIN,
 | 
				
			||||||
 | 
					            count: 0,
 | 
				
			||||||
 | 
					            sum: 0.0,
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl Display for State {
 | 
				
			||||||
 | 
					    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 | 
				
			||||||
 | 
					        let avg = self.sum / (self.count as f64);
 | 
				
			||||||
 | 
					        write!(f, "{:.1}/{avg:.1}/{:.1}", self.min, self.max)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl State {
 | 
				
			||||||
 | 
					    fn update(&mut self, v: f64) {
 | 
				
			||||||
 | 
					        self.min = self.min.min(v);
 | 
				
			||||||
 | 
					        self.max = self.max.max(v);
 | 
				
			||||||
 | 
					        self.count += 1;
 | 
				
			||||||
 | 
					        self.sum += v;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn merge(&mut self, other: &Self) {
 | 
				
			||||||
 | 
					        self.min = self.min.min(other.min);
 | 
				
			||||||
 | 
					        self.max = self.max.max(other.max);
 | 
				
			||||||
 | 
					        self.count += other.count;
 | 
				
			||||||
 | 
					        self.sum += other.sum;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn make_map<'a>(i: impl Iterator<Item = &'a [u8]>) -> HashMap<&'a BStr, State> {
 | 
				
			||||||
 | 
					    let mut state: HashMap<&'a BStr, State> = Default::default();
 | 
				
			||||||
 | 
					    for line in i {
 | 
				
			||||||
 | 
					        let (name, value) = line.split_once_str(&[b';']).unwrap();
 | 
				
			||||||
 | 
					        let value = fast_float::parse(value).unwrap();
 | 
				
			||||||
 | 
					        state.entry(name.into()).or_default().update(value);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    state
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn solve_for_part((start, end): (usize, usize), mem: &[u8]) -> HashMap<&BStr, State> {
 | 
				
			||||||
 | 
					    make_map((&mem[start..end]).lines())
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn merge<'a>(a: &mut HashMap<&'a BStr, State>, b: &HashMap<&'a BStr, State>) {
 | 
				
			||||||
 | 
					    for (k, v) in b {
 | 
				
			||||||
 | 
					        a.entry(k).or_default().merge(v);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn main() {
 | 
				
			||||||
 | 
					    let cores: usize = std::thread::available_parallelism().unwrap().into();
 | 
				
			||||||
 | 
					    let path = match std::env::args().skip(1).next() {
 | 
				
			||||||
 | 
					        Some(path) => path,
 | 
				
			||||||
 | 
					        None => "measurements.txt".to_owned(),
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    let file = File::open(path).unwrap();
 | 
				
			||||||
 | 
					    let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let chunk_size = mmap.len() / cores;
 | 
				
			||||||
 | 
					    let mut chunks: Vec<(usize, usize)> = vec![];
 | 
				
			||||||
 | 
					    let mut start = 0;
 | 
				
			||||||
 | 
					    for _ in 0..cores {
 | 
				
			||||||
 | 
					        let end = (start + chunk_size).min(mmap.len());
 | 
				
			||||||
 | 
					        let next_new_line = match memchr::memchr(b'\n', &mmap[end..]) {
 | 
				
			||||||
 | 
					            Some(v) => v,
 | 
				
			||||||
 | 
					            None => {
 | 
				
			||||||
 | 
					                assert_eq!(end, mmap.len());
 | 
				
			||||||
 | 
					                0
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					        let end = end + next_new_line;
 | 
				
			||||||
 | 
					        chunks.push((start, end));
 | 
				
			||||||
 | 
					        start = end + 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let parts: Vec<_> = chunks
 | 
				
			||||||
 | 
					        .par_iter()
 | 
				
			||||||
 | 
					        .map(|r| solve_for_part(*r, &mmap))
 | 
				
			||||||
 | 
					        .collect();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let state: HashMap<&BStr, State> = parts.into_iter().fold(Default::default(), |mut a, b| {
 | 
				
			||||||
 | 
					        merge(&mut a, &b);
 | 
				
			||||||
 | 
					        a
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut all: Vec<_> = state.into_iter().collect();
 | 
				
			||||||
 | 
					    all.sort_unstable_by(|a, b| a.0.cmp(&b.0));
 | 
				
			||||||
 | 
					    print!("{{");
 | 
				
			||||||
 | 
					    for (i, (name, state)) in all.into_iter().enumerate() {
 | 
				
			||||||
 | 
					        if i == 0 {
 | 
				
			||||||
 | 
					            print!("{name}={state}");
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            print!(", {name}={state}");
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    println!("}}");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										44
									
								
								src/main/rust/src/bin/single_thread.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								src/main/rust/src/bin/single_thread.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,44 @@
 | 
				
			|||||||
 | 
					use std::{
 | 
				
			||||||
 | 
					    fs::File,
 | 
				
			||||||
 | 
					    io::{BufRead, BufReader},
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					use std::collections::HashMap;
 | 
				
			||||||
 | 
					use std::time::Instant;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const DEFAULT_HASHMAP_LENGTH: usize = 10000;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn main() {
 | 
				
			||||||
 | 
					    let now = Instant::now();
 | 
				
			||||||
 | 
					    let mut stations: HashMap<String, onebrc::StationMeasurements> =
 | 
				
			||||||
 | 
					        HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
 | 
				
			||||||
 | 
					    let reader = BufReader::new(file);
 | 
				
			||||||
 | 
					    for line_result in reader.lines() {
 | 
				
			||||||
 | 
					        let line = line_result.expect("could not read line");
 | 
				
			||||||
 | 
					        let (station, temp) = line.split_once(';').unwrap();
 | 
				
			||||||
 | 
					        let temp = onebrc::parse_temp(temp.as_bytes());
 | 
				
			||||||
 | 
					        let measurements_option = stations.get_mut(station);
 | 
				
			||||||
 | 
					        if let Some(measurements) = measurements_option {
 | 
				
			||||||
 | 
					            measurements.update(temp);
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            let measurements = onebrc::StationMeasurements {
 | 
				
			||||||
 | 
					                min: temp,
 | 
				
			||||||
 | 
					                max: temp,
 | 
				
			||||||
 | 
					                count: 1,
 | 
				
			||||||
 | 
					                sum: temp,
 | 
				
			||||||
 | 
					            };
 | 
				
			||||||
 | 
					            stations.insert(station.to_owned(), measurements);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let mut stations: Vec<String> = stations.iter().map(|(station, measurements)| {
 | 
				
			||||||
 | 
					        let measurements = measurements.to_string();
 | 
				
			||||||
 | 
					        format!("{station}={measurements}")
 | 
				
			||||||
 | 
					    }).collect();
 | 
				
			||||||
 | 
					    stations.sort();
 | 
				
			||||||
 | 
					    let stations = stations.join(",");
 | 
				
			||||||
 | 
					    println!("{{{stations}}}");
 | 
				
			||||||
 | 
					    println!("Time={} μs", now.elapsed().as_micros());
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										67
									
								
								src/main/rust/src/lib.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								src/main/rust/src/lib.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,67 @@
 | 
				
			|||||||
 | 
					use std::fmt::Display;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Copy, Clone)]
 | 
				
			||||||
 | 
					pub struct StationMeasurements {
 | 
				
			||||||
 | 
					    pub min: isize,
 | 
				
			||||||
 | 
					    pub max: isize,
 | 
				
			||||||
 | 
					    pub count: isize,
 | 
				
			||||||
 | 
					    pub sum: isize,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl StationMeasurements {
 | 
				
			||||||
 | 
					    pub fn update(&mut self, v: isize) {
 | 
				
			||||||
 | 
					        self.min = self.min.min(v);
 | 
				
			||||||
 | 
					        self.max = self.max.max(v);
 | 
				
			||||||
 | 
					        self.count += 1;
 | 
				
			||||||
 | 
					        self.sum += v;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn merge(&mut self, other: &Self) {
 | 
				
			||||||
 | 
					        self.min = self.min.min(other.min);
 | 
				
			||||||
 | 
					        self.max = self.max.max(other.max);
 | 
				
			||||||
 | 
					        self.count += other.count;
 | 
				
			||||||
 | 
					        self.sum += other.sum;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl Display for StationMeasurements {
 | 
				
			||||||
 | 
					    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 | 
				
			||||||
 | 
					        let min = self.min as f64 / 10.0;
 | 
				
			||||||
 | 
					        let max = self.max as f64 / 10.0;
 | 
				
			||||||
 | 
					        let avg = (self.sum as f64 / self.count as f64) / 10.0;
 | 
				
			||||||
 | 
					        write!(f, "{min}/{avg:.1}/{max}")
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn format_nums(num: usize) -> String {
 | 
				
			||||||
 | 
					    num.to_string()
 | 
				
			||||||
 | 
					        .as_bytes()
 | 
				
			||||||
 | 
					        .rchunks(3)
 | 
				
			||||||
 | 
					        .rev()
 | 
				
			||||||
 | 
					        .map(std::str::from_utf8)
 | 
				
			||||||
 | 
					        .collect::<Result<Vec<&str>, _>>()
 | 
				
			||||||
 | 
					        .unwrap()
 | 
				
			||||||
 | 
					        .join("_")
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[inline]
 | 
				
			||||||
 | 
					pub const fn get_digit(b: u8) -> u32 {
 | 
				
			||||||
 | 
					    (b as u32).wrapping_sub('0' as u32)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[inline]
 | 
				
			||||||
 | 
					pub fn parse_temp(bytes: &[u8]) -> isize {
 | 
				
			||||||
 | 
					    let is_negative = bytes[0] == b'-';
 | 
				
			||||||
 | 
					    let as_decimal = match (is_negative, bytes.len()) {
 | 
				
			||||||
 | 
					        (true, 4) => get_digit(bytes[1]) * 10 + get_digit(bytes[3]),
 | 
				
			||||||
 | 
					        (true, 5) => get_digit(bytes[1]) * 100 + get_digit(bytes[2]) * 10 + get_digit(bytes[4]),
 | 
				
			||||||
 | 
					        (false, 3) => get_digit(bytes[0]) * 10 + get_digit(bytes[2]),
 | 
				
			||||||
 | 
					        (false, 4) => get_digit(bytes[0]) * 100 + get_digit(bytes[1]) * 10 + get_digit(bytes[3]),
 | 
				
			||||||
 | 
					        _x => panic!(),
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    if is_negative {
 | 
				
			||||||
 | 
					        -(as_decimal as isize)
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        as_decimal as isize
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										8
									
								
								src/main/rust/src/main.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								src/main/rust/src/main.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,8 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					fn main() {
 | 
				
			||||||
 | 
					    // let now = Instant::now();
 | 
				
			||||||
 | 
					    // let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");
 | 
				
			||||||
 | 
					    // let reader = BufReader::new(file);
 | 
				
			||||||
 | 
					    // reader.lines().for_each(|_x| {()});
 | 
				
			||||||
 | 
					    // println!("Time={} μs", now.elapsed().as_micros());
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user