diff --git a/calculate_average_davecom.sh b/calculate_average_davecom.sh new file mode 100755 index 0000000..1231137 --- /dev/null +++ b/calculate_average_davecom.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="" +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_davecom diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_davecom.java b/src/main/java/dev/morling/onebrc/CalculateAverage_davecom.java new file mode 100644 index 0000000..a4f7619 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_davecom.java @@ -0,0 +1,208 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.math.RoundingMode; +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.text.DecimalFormat; +import java.util.concurrent.ConcurrentHashMap; +import java.util.HashMap; +import java.util.IntSummaryStatistics; +import java.util.ArrayList; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +public class CalculateAverage_davecom { + + /* + * Original Header Could Not Be Changed to Match Checks so... + * Copyright 2024 David Kopec + * Licensed under the Apache License, Version 2.0 (the "License"); + * Created by David Kopec with some inspiration from seijikun's solution + * and assistance from GitHub Copilot. + */ + + private static final String FILE = "./measurements.txt"; + + private static final ConcurrentHashMap mins = new ConcurrentHashMap<>(); + private static final ConcurrentHashMap maxs = new ConcurrentHashMap<>(); + private static final ConcurrentHashMap sums = new ConcurrentHashMap<>(); + private static final ConcurrentHashMap counts = new ConcurrentHashMap<>(); + + public static void processChunk(MappedByteBuffer chunk, long chunkSize) { + // setup + chunk.load(); + HashMap values = new HashMap<>(); + + // do the actual processing + long end = chunk.position() + chunkSize; + byte[] name = new byte[128]; + int value = 0; + byte b = 0; + boolean negate = false; + long nameStart = 0; + long nameEnd = 0; + int nameLength = 0; + while (chunk.position() < end) { + // read name up to semicolon + nameStart = chunk.position(); + b = chunk.get(); + while (b != ';') { + b = chunk.get(); + } + nameEnd = chunk.position() - 1; + nameLength = (int) (nameEnd - nameStart); + // generate byte array for name + chunk.get(chunk.position() - nameLength - 1, name, 0, nameLength); + // convert name to string + String nameString = new String(name, 0, nameLength); + // read value + value = 0; + b = chunk.get(); + negate = false; + while (b != '\n') { + if (b == '.') { + b = chunk.get(); + continue; + } + else if (b == '-') { + negate = true; + b = chunk.get(); + continue; + } + value = value * 10 + (b - '0'); + b = chunk.get(); + } + if (negate) { + value = -value; + } + + if (values.containsKey(nameString)) { + values.get(nameString).accept(value); + } + else { + IntSummaryStatistics stats = new IntSummaryStatistics(); + stats.accept(value); + values.put(nameString, stats); + } + } + + for (String nameStr : values.keySet()) { + IntSummaryStatistics stats = values.get(nameStr); + mins.compute(nameStr, (k, v) -> v == null ? stats.getMin() : Math.min(v, stats.getMin())); + maxs.compute(nameStr, (k, v) -> v == null ? stats.getMax() : Math.max(v, stats.getMax())); + sums.compute(nameStr, (k, v) -> v == null ? (int) stats.getSum() : (v + (int) stats.getSum())); + counts.compute(nameStr, (k, v) -> v == null ? (int) stats.getCount() : (v + (int) stats.getCount())); + } + } + + public static void outputResults() { + // output results sorted by name with format {name}={min}/{mean}/{max} in one giant string + // fast string concatenation starting with { and ending with } with a comma between each (no newlines) + StringBuilder sb = new StringBuilder(); + sb.append('{'); + var sortedNames = mins.keySet().stream().sorted().toArray(String[]::new); + DecimalFormat df = new DecimalFormat("0.0"); + df.setRoundingMode(RoundingMode.HALF_UP); + for (String name : sortedNames) { + double min = ((double) mins.get(name)) / 10; + double max = ((double) maxs.get(name)) / 10; + double average = ((double) sums.get(name)) / ((double) counts.get(name)) / 10; + sb.append(name); + sb.append('='); + sb.append(df.format(min)); + sb.append('/'); + sb.append(df.format(average)); + sb.append('/'); + sb.append(df.format(max)); + sb.append(','); + sb.append(' '); + } + if (sb.length() > 1) { + sb.deleteCharAt(sb.length() - 2); + sb.deleteCharAt(sb.length() - 1); + } + sb.append('}'); + System.out.println(sb.toString()); + } + + public static void main(String[] args) throws IOException { + // create thread pool + ExecutorService es = Executors.newVirtualThreadPerTaskExecutor(); + + // load file + FileChannel fc = FileChannel.open(Path.of(FILE)); + + // configuration information + long fileSize = fc.size(); + int numProcessors = Runtime.getRuntime().availableProcessors(); + int numChunks = numProcessors * 2000; + // System.out.println("numProcessors: " + numProcessors); + // System.out.println("numChunks: " + numChunks); + + // create check buffer + ByteBuffer bb = ByteBuffer.allocateDirect(128); + + // find appropriate chunks + // System.out.println("fileSize: " + fileSize); + long chunkLimit = fileSize / numChunks; + long chunkStart = 0; + long chunkEnd = chunkLimit; + // int chunkNum = 0; + while (chunkEnd < fileSize) { + // System.out.println("initiated chunkNum: " + chunkNum); + // find the next newline + fc.position(chunkEnd); + bb.clear(); + fc.read(bb); + bb.flip(); + while (bb.get() != '\n' && bb.position() < bb.limit()) { + } + chunkEnd = chunkEnd + bb.position(); + if (chunkEnd > fileSize) { + chunkEnd = fileSize - 1; + } + // process chunk + long chunkSize = chunkEnd - chunkStart; + if (chunkSize < 1) { + break; + } + // System.out.println("chunkStart: " + chunkStart); + // System.out.println("chunkEnd: " + chunkEnd); + // System.out.println("chunkSize: " + chunkSize); + MappedByteBuffer chunk = fc.map(FileChannel.MapMode.READ_ONLY, chunkStart, chunkSize); + // final int chunkNumFinal = chunkNum; + es.submit(() -> { + // System.out.println("started chunkNum: " + chunkNumFinal); + processChunk(chunk, chunkSize); + // System.out.println("finished chunkNum: " + chunkNumFinal); + }); + chunkStart = chunkEnd; + chunkEnd = chunkEnd + chunkLimit; + if (chunkEnd > fileSize) { + chunkEnd = fileSize - 1; + } + // chunkNum++; + } + es.close(); + + outputResults(); + } +}