The polars solution I saw on reddit a few months ago is also super slow. I'm thinking it might be my macbook
This commit is contained in:
parent
b6e8b41bb1
commit
0adcf3dec5
1310
rust/Cargo.lock
generated
1310
rust/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -7,3 +7,4 @@ edition = "2021"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
hashbrown = "0.14.3"
|
hashbrown = "0.14.3"
|
||||||
|
polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]}
|
||||||
|
35
rust/src/bin/polars.rs
Normal file
35
rust/src/bin/polars.rs
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
use polars::prelude::*;
|
||||||
|
use std::time::Instant;
|
||||||
|
use std::vec;
|
||||||
|
|
||||||
|
fn run_polars() -> Result<DataFrame, PolarsError> {
|
||||||
|
let now = Instant::now();
|
||||||
|
|
||||||
|
let f1: Field = Field::new("station", DataType::String);
|
||||||
|
let f2: Field = Field::new("measure", DataType::Float64);
|
||||||
|
let sc: Schema = Schema::from_iter(vec![f1, f2]);
|
||||||
|
|
||||||
|
let q = LazyCsvReader::new("../measurements.txt")
|
||||||
|
.has_header(false)
|
||||||
|
.with_schema(Some(Arc::new(sc)))
|
||||||
|
.with_separator(b';')
|
||||||
|
.finish()?
|
||||||
|
.group_by(vec![col("station")])
|
||||||
|
.agg(vec![
|
||||||
|
col("measure").alias("min").min(),
|
||||||
|
col("measure").alias("mean").mean(),
|
||||||
|
col("measure").alias("max").max(),
|
||||||
|
])
|
||||||
|
.sort("station", Default::default())
|
||||||
|
.with_streaming(true);
|
||||||
|
|
||||||
|
let df = q.collect()?;
|
||||||
|
|
||||||
|
println!("Time={} μs", now.elapsed().as_micros());
|
||||||
|
|
||||||
|
Ok(df)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
run_polars();
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user