Add davecom Entry

This commit is contained in:
David Kopec 2024-01-05 10:35:05 -05:00 committed by GitHub
parent 8703674765
commit 8a282ab71b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 228 additions and 0 deletions

20
calculate_average_davecom.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/sh
#
# Copyright 2023 The original authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
JAVA_OPTS=""
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_davecom

View File

@ -0,0 +1,208 @@
/*
* Copyright 2023 The original authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dev.morling.onebrc;
import java.io.IOException;
import java.math.RoundingMode;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
import java.text.DecimalFormat;
import java.util.concurrent.ConcurrentHashMap;
import java.util.HashMap;
import java.util.IntSummaryStatistics;
import java.util.ArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class CalculateAverage_davecom {
/*
* Original Header Could Not Be Changed to Match Checks so...
* Copyright 2024 David Kopec
* Licensed under the Apache License, Version 2.0 (the "License");
* Created by David Kopec with some inspiration from seijikun's solution
* and assistance from GitHub Copilot.
*/
private static final String FILE = "./measurements.txt";
private static final ConcurrentHashMap<String, Integer> mins = new ConcurrentHashMap<>();
private static final ConcurrentHashMap<String, Integer> maxs = new ConcurrentHashMap<>();
private static final ConcurrentHashMap<String, Integer> sums = new ConcurrentHashMap<>();
private static final ConcurrentHashMap<String, Integer> counts = new ConcurrentHashMap<>();
public static void processChunk(MappedByteBuffer chunk, long chunkSize) {
// setup
chunk.load();
HashMap<String, IntSummaryStatistics> values = new HashMap<>();
// do the actual processing
long end = chunk.position() + chunkSize;
byte[] name = new byte[128];
int value = 0;
byte b = 0;
boolean negate = false;
long nameStart = 0;
long nameEnd = 0;
int nameLength = 0;
while (chunk.position() < end) {
// read name up to semicolon
nameStart = chunk.position();
b = chunk.get();
while (b != ';') {
b = chunk.get();
}
nameEnd = chunk.position() - 1;
nameLength = (int) (nameEnd - nameStart);
// generate byte array for name
chunk.get(chunk.position() - nameLength - 1, name, 0, nameLength);
// convert name to string
String nameString = new String(name, 0, nameLength);
// read value
value = 0;
b = chunk.get();
negate = false;
while (b != '\n') {
if (b == '.') {
b = chunk.get();
continue;
}
else if (b == '-') {
negate = true;
b = chunk.get();
continue;
}
value = value * 10 + (b - '0');
b = chunk.get();
}
if (negate) {
value = -value;
}
if (values.containsKey(nameString)) {
values.get(nameString).accept(value);
}
else {
IntSummaryStatistics stats = new IntSummaryStatistics();
stats.accept(value);
values.put(nameString, stats);
}
}
for (String nameStr : values.keySet()) {
IntSummaryStatistics stats = values.get(nameStr);
mins.compute(nameStr, (k, v) -> v == null ? stats.getMin() : Math.min(v, stats.getMin()));
maxs.compute(nameStr, (k, v) -> v == null ? stats.getMax() : Math.max(v, stats.getMax()));
sums.compute(nameStr, (k, v) -> v == null ? (int) stats.getSum() : (v + (int) stats.getSum()));
counts.compute(nameStr, (k, v) -> v == null ? (int) stats.getCount() : (v + (int) stats.getCount()));
}
}
public static void outputResults() {
// output results sorted by name with format {name}={min}/{mean}/{max} in one giant string
// fast string concatenation starting with { and ending with } with a comma between each (no newlines)
StringBuilder sb = new StringBuilder();
sb.append('{');
var sortedNames = mins.keySet().stream().sorted().toArray(String[]::new);
DecimalFormat df = new DecimalFormat("0.0");
df.setRoundingMode(RoundingMode.HALF_UP);
for (String name : sortedNames) {
double min = ((double) mins.get(name)) / 10;
double max = ((double) maxs.get(name)) / 10;
double average = ((double) sums.get(name)) / ((double) counts.get(name)) / 10;
sb.append(name);
sb.append('=');
sb.append(df.format(min));
sb.append('/');
sb.append(df.format(average));
sb.append('/');
sb.append(df.format(max));
sb.append(',');
sb.append(' ');
}
if (sb.length() > 1) {
sb.deleteCharAt(sb.length() - 2);
sb.deleteCharAt(sb.length() - 1);
}
sb.append('}');
System.out.println(sb.toString());
}
public static void main(String[] args) throws IOException {
// create thread pool
ExecutorService es = Executors.newVirtualThreadPerTaskExecutor();
// load file
FileChannel fc = FileChannel.open(Path.of(FILE));
// configuration information
long fileSize = fc.size();
int numProcessors = Runtime.getRuntime().availableProcessors();
int numChunks = numProcessors * 2000;
// System.out.println("numProcessors: " + numProcessors);
// System.out.println("numChunks: " + numChunks);
// create check buffer
ByteBuffer bb = ByteBuffer.allocateDirect(128);
// find appropriate chunks
// System.out.println("fileSize: " + fileSize);
long chunkLimit = fileSize / numChunks;
long chunkStart = 0;
long chunkEnd = chunkLimit;
// int chunkNum = 0;
while (chunkEnd < fileSize) {
// System.out.println("initiated chunkNum: " + chunkNum);
// find the next newline
fc.position(chunkEnd);
bb.clear();
fc.read(bb);
bb.flip();
while (bb.get() != '\n' && bb.position() < bb.limit()) {
}
chunkEnd = chunkEnd + bb.position();
if (chunkEnd > fileSize) {
chunkEnd = fileSize - 1;
}
// process chunk
long chunkSize = chunkEnd - chunkStart;
if (chunkSize < 1) {
break;
}
// System.out.println("chunkStart: " + chunkStart);
// System.out.println("chunkEnd: " + chunkEnd);
// System.out.println("chunkSize: " + chunkSize);
MappedByteBuffer chunk = fc.map(FileChannel.MapMode.READ_ONLY, chunkStart, chunkSize);
// final int chunkNumFinal = chunkNum;
es.submit(() -> {
// System.out.println("started chunkNum: " + chunkNumFinal);
processChunk(chunk, chunkSize);
// System.out.println("finished chunkNum: " + chunkNumFinal);
});
chunkStart = chunkEnd;
chunkEnd = chunkEnd + chunkLimit;
if (chunkEnd > fileSize) {
chunkEnd = fileSize - 1;
}
// chunkNum++;
}
es.close();
outputResults();
}
}