- implementation by padreati
This commit is contained in:
parent
d20e71d483
commit
1721848570
20
calculate_average_padreati.sh
Executable file
20
calculate_average_padreati.sh
Executable file
@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# Copyright 2023 The original authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
JAVA_OPTS="--enable-preview --add-modules jdk.incubator.vector"
|
||||||
|
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_padreati
|
5
pom.xml
5
pom.xml
@ -103,6 +103,11 @@
|
|||||||
<version>3.8.1</version>
|
<version>3.8.1</version>
|
||||||
<configuration>
|
<configuration>
|
||||||
<parameters>true</parameters>
|
<parameters>true</parameters>
|
||||||
|
<compilerArgs>
|
||||||
|
<compilerArg>--enable-preview</compilerArg>
|
||||||
|
<compilerArg>--add-modules</compilerArg>
|
||||||
|
<compilerArg>java.base,jdk.incubator.vector</compilerArg>
|
||||||
|
</compilerArgs>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
|
197
src/main/java/dev/morling/onebrc/CalculateAverage_padreati.java
Normal file
197
src/main/java/dev/morling/onebrc/CalculateAverage_padreati.java
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2023 The original authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package dev.morling.onebrc;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.StructuredTaskScope;
|
||||||
|
|
||||||
|
import jdk.incubator.vector.ByteVector;
|
||||||
|
import jdk.incubator.vector.VectorOperators;
|
||||||
|
import jdk.incubator.vector.VectorSpecies;
|
||||||
|
|
||||||
|
public class CalculateAverage_padreati {
|
||||||
|
|
||||||
|
private static final VectorSpecies<Byte> species = ByteVector.SPECIES_PREFERRED;
|
||||||
|
private static final String FILE = "./measurements.txt";
|
||||||
|
private static final int CHUNK_SIZE = 1024 * 1024;
|
||||||
|
|
||||||
|
private record ResultRow(double min, double mean, double max) {
|
||||||
|
public String toString() {
|
||||||
|
return round(min) + "/" + round(mean) + "/" + round(max);
|
||||||
|
}
|
||||||
|
|
||||||
|
private double round(double value) {
|
||||||
|
return Math.round(value * 10.0) / 10.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private record MeasurementAggregator(double min, double max, double sum, long count) {
|
||||||
|
|
||||||
|
public MeasurementAggregator(double seed) {
|
||||||
|
this(seed, seed, seed, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public MeasurementAggregator merge(MeasurementAggregator b) {
|
||||||
|
return new MeasurementAggregator(
|
||||||
|
Math.min(min, b.min),
|
||||||
|
Math.max(max, b.max),
|
||||||
|
sum + b.sum,
|
||||||
|
count + b.count
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ResultRow toResultRow() {
|
||||||
|
return new ResultRow(min, sum / count, max);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws IOException {
|
||||||
|
new CalculateAverage_padreati().run();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void run() throws IOException {
|
||||||
|
File file = new File(FILE);
|
||||||
|
var splits = findFileSplits();
|
||||||
|
List<StructuredTaskScope.Subtask<Map<String, MeasurementAggregator>>> subtasks = new ArrayList<>();
|
||||||
|
try (var scope = new StructuredTaskScope.ShutdownOnFailure()) {
|
||||||
|
for (int i = 0; i < splits.size(); i++) {
|
||||||
|
long splitStart = splits.get(i);
|
||||||
|
long splitEnd = i < splits.size() - 1 ? splits.get(i + 1) : file.length() + 1;
|
||||||
|
subtasks.add(scope.fork(() -> chunkProcessor(file, splitStart, splitEnd)));
|
||||||
|
}
|
||||||
|
scope.join();
|
||||||
|
scope.throwIfFailed();
|
||||||
|
|
||||||
|
var resultList = subtasks.stream().map(StructuredTaskScope.Subtask::get).toList();
|
||||||
|
TreeMap<String, ResultRow> measurements = collapseResults(resultList);
|
||||||
|
System.out.println(measurements);
|
||||||
|
|
||||||
|
}
|
||||||
|
catch (InterruptedException | ExecutionException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Long> findFileSplits() throws IOException {
|
||||||
|
var splits = new ArrayList<Long>();
|
||||||
|
splits.add(0L);
|
||||||
|
|
||||||
|
File file = new File(FILE);
|
||||||
|
long next = CHUNK_SIZE;
|
||||||
|
while (true) {
|
||||||
|
if (next >= file.length()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
try (FileInputStream fis = new FileInputStream(file)) {
|
||||||
|
long skip = fis.skip(next);
|
||||||
|
if (skip != next) {
|
||||||
|
throw new RuntimeException();
|
||||||
|
}
|
||||||
|
// find first new line
|
||||||
|
while (true) {
|
||||||
|
int ch = fis.read();
|
||||||
|
if (ch != '\n') {
|
||||||
|
next++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// skip eventual \\r
|
||||||
|
if (fis.read() == '\r') {
|
||||||
|
next++;
|
||||||
|
}
|
||||||
|
splits.add(next + 1);
|
||||||
|
next += CHUNK_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, MeasurementAggregator> chunkProcessor(File source, long start, long end) throws IOException {
|
||||||
|
var map = new HashMap<String, MeasurementAggregator>();
|
||||||
|
byte[] buffer = new byte[(int) (end - start)];
|
||||||
|
int len;
|
||||||
|
try (FileInputStream bis = new FileInputStream(source)) {
|
||||||
|
bis.skip(start);
|
||||||
|
len = bis.read(buffer, 0, buffer.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Integer> nlIndexes = new ArrayList<>();
|
||||||
|
List<Integer> commaIndexes = new ArrayList<>();
|
||||||
|
|
||||||
|
int loopBound = species.loopBound(len);
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
for (; i < loopBound; i += species.length()) {
|
||||||
|
ByteVector v = ByteVector.fromArray(species, buffer, i);
|
||||||
|
var mask = v.compare(VectorOperators.EQ, '\n');
|
||||||
|
for (int j = 0; j < species.length(); j++) {
|
||||||
|
if (mask.laneIsSet(j)) {
|
||||||
|
nlIndexes.add(i + j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mask = v.compare(VectorOperators.EQ, ';');
|
||||||
|
for (int j = 0; j < species.length(); j++) {
|
||||||
|
if (mask.laneIsSet(j)) {
|
||||||
|
commaIndexes.add(i + j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (; i < len; i++) {
|
||||||
|
if (buffer[i] == '\n') {
|
||||||
|
nlIndexes.add(i);
|
||||||
|
}
|
||||||
|
if (buffer[i] == ';') {
|
||||||
|
commaIndexes.add(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int startLine = 0;
|
||||||
|
for (int j = 0; j < nlIndexes.size(); j++) {
|
||||||
|
int endLine = nlIndexes.get(j);
|
||||||
|
int commaIndex = commaIndexes.get(j);
|
||||||
|
String key = new String(buffer, startLine, commaIndex - startLine);
|
||||||
|
double value = Double.parseDouble(new String(buffer, commaIndex + 1, endLine - commaIndex - 1));
|
||||||
|
map.merge(key, new MeasurementAggregator(value), MeasurementAggregator::merge);
|
||||||
|
startLine = endLine + 1;
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
private TreeMap<String, ResultRow> collapseResults(List<Map<String, MeasurementAggregator>> resultList) {
|
||||||
|
HashMap<String, MeasurementAggregator> aggregate = new HashMap<>();
|
||||||
|
for (var map : resultList) {
|
||||||
|
for (var entry : map.entrySet()) {
|
||||||
|
aggregate.merge(entry.getKey(), entry.getValue(), MeasurementAggregator::merge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TreeMap<String, ResultRow> measurements = new TreeMap<>();
|
||||||
|
for (var entry : aggregate.entrySet()) {
|
||||||
|
measurements.put(entry.getKey(), entry.getValue().toResultRow());
|
||||||
|
}
|
||||||
|
return measurements;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user