/* * Copyright 2023 The original authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package dev.morling.onebrc; import java.io.IOException; import java.math.RoundingMode; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.Path; import java.text.DecimalFormat; import java.util.concurrent.ConcurrentHashMap; import java.util.HashMap; import java.util.IntSummaryStatistics; import java.util.ArrayList; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; public class CalculateAverage_davecom { /* * Original Header Could Not Be Changed to Match Checks so... * Copyright 2024 David Kopec * Licensed under the Apache License, Version 2.0 (the "License"); * Created by David Kopec with some inspiration from seijikun's solution * and assistance from GitHub Copilot. */ private static final String FILE = "./measurements.txt"; private static final ConcurrentHashMap mins = new ConcurrentHashMap<>(); private static final ConcurrentHashMap maxs = new ConcurrentHashMap<>(); private static final ConcurrentHashMap sums = new ConcurrentHashMap<>(); private static final ConcurrentHashMap counts = new ConcurrentHashMap<>(); public static void processChunk(MappedByteBuffer chunk, long chunkSize) { // setup chunk.load(); HashMap values = new HashMap<>(); // do the actual processing long end = chunk.position() + chunkSize; byte[] name = new byte[128]; int value = 0; byte b = 0; boolean negate = false; long nameStart = 0; long nameEnd = 0; int nameLength = 0; while (chunk.position() < end) { // read name up to semicolon nameStart = chunk.position(); b = chunk.get(); while (b != ';') { b = chunk.get(); } nameEnd = chunk.position() - 1; nameLength = (int) (nameEnd - nameStart); // generate byte array for name chunk.get(chunk.position() - nameLength - 1, name, 0, nameLength); // convert name to string String nameString = new String(name, 0, nameLength); // read value value = 0; b = chunk.get(); negate = false; while (b != '\n') { if (b == '.') { b = chunk.get(); continue; } else if (b == '-') { negate = true; b = chunk.get(); continue; } value = value * 10 + (b - '0'); b = chunk.get(); } if (negate) { value = -value; } if (values.containsKey(nameString)) { values.get(nameString).accept(value); } else { IntSummaryStatistics stats = new IntSummaryStatistics(); stats.accept(value); values.put(nameString, stats); } } for (String nameStr : values.keySet()) { IntSummaryStatistics stats = values.get(nameStr); mins.compute(nameStr, (k, v) -> v == null ? stats.getMin() : Math.min(v, stats.getMin())); maxs.compute(nameStr, (k, v) -> v == null ? stats.getMax() : Math.max(v, stats.getMax())); sums.compute(nameStr, (k, v) -> v == null ? (int) stats.getSum() : (v + (int) stats.getSum())); counts.compute(nameStr, (k, v) -> v == null ? (int) stats.getCount() : (v + (int) stats.getCount())); } } public static void outputResults() { // output results sorted by name with format {name}={min}/{mean}/{max} in one giant string // fast string concatenation starting with { and ending with } with a comma between each (no newlines) StringBuilder sb = new StringBuilder(); sb.append('{'); var sortedNames = mins.keySet().stream().sorted().toArray(String[]::new); DecimalFormat df = new DecimalFormat("0.0"); df.setRoundingMode(RoundingMode.HALF_UP); for (String name : sortedNames) { double min = ((double) mins.get(name)) / 10; double max = ((double) maxs.get(name)) / 10; double average = ((double) sums.get(name)) / ((double) counts.get(name)) / 10; sb.append(name); sb.append('='); sb.append(df.format(min)); sb.append('/'); sb.append(df.format(average)); sb.append('/'); sb.append(df.format(max)); sb.append(','); sb.append(' '); } if (sb.length() > 1) { sb.deleteCharAt(sb.length() - 2); sb.deleteCharAt(sb.length() - 1); } sb.append('}'); System.out.println(sb.toString()); } public static void main(String[] args) throws IOException { // create thread pool ExecutorService es = Executors.newVirtualThreadPerTaskExecutor(); // load file FileChannel fc = FileChannel.open(Path.of(FILE)); // configuration information long fileSize = fc.size(); int numProcessors = Runtime.getRuntime().availableProcessors(); int numChunks = numProcessors * 2000; // System.out.println("numProcessors: " + numProcessors); // System.out.println("numChunks: " + numChunks); // create check buffer ByteBuffer bb = ByteBuffer.allocateDirect(128); // find appropriate chunks // System.out.println("fileSize: " + fileSize); long chunkLimit = fileSize / numChunks; long chunkStart = 0; long chunkEnd = chunkLimit; // int chunkNum = 0; while (chunkEnd < fileSize) { // System.out.println("initiated chunkNum: " + chunkNum); // find the next newline fc.position(chunkEnd); bb.clear(); fc.read(bb); bb.flip(); while (bb.get() != '\n' && bb.position() < bb.limit()) { } chunkEnd = chunkEnd + bb.position(); if (chunkEnd > fileSize) { chunkEnd = fileSize - 1; } // process chunk long chunkSize = chunkEnd - chunkStart; if (chunkSize < 1) { break; } // System.out.println("chunkStart: " + chunkStart); // System.out.println("chunkEnd: " + chunkEnd); // System.out.println("chunkSize: " + chunkSize); MappedByteBuffer chunk = fc.map(FileChannel.MapMode.READ_ONLY, chunkStart, chunkSize); // final int chunkNumFinal = chunkNum; es.submit(() -> { // System.out.println("started chunkNum: " + chunkNumFinal); processChunk(chunk, chunkSize); // System.out.println("finished chunkNum: " + chunkNumFinal); }); chunkStart = chunkEnd; chunkEnd = chunkEnd + chunkLimit; if (chunkEnd > fileSize) { chunkEnd = fileSize - 1; } // chunkNum++; } es.close(); outputResults(); } }