Go implementation by AlexanderYastrebov (#298)
* Go implementation by AlexanderYastrebov This is a proof-of-concept to demonstrate non-java submission. It requires Docker with BuildKit plugin to build and export binary. Updates * #67 * #253 * Use collision-free id lookup * Use number of buckets greater than max number of keys
This commit is contained in:
parent
464ba6209b
commit
4daeb94b04
1
.gitignore
vendored
1
.gitignore
vendored
@ -46,3 +46,4 @@ out/
|
||||
/measurements*.txt
|
||||
/*.out
|
||||
out_expected.txt
|
||||
/*-timing.json
|
||||
|
20
calculate_average_AlexanderYastrebov.sh
Executable file
20
calculate_average_AlexanderYastrebov.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
INPUT=${1:-"measurements.txt"}
|
||||
|
||||
target/AlexanderYastrebov/1brc "$INPUT"
|
@ -53,3 +53,4 @@ vemana;Subrahmanyam
|
||||
jincongho;Jin Cong Ho
|
||||
yonatang;Yonatan Graber
|
||||
adriacabeza;Adrià Cabeza
|
||||
AlexanderYastrebov;Alexander Yastrebov
|
||||
|
18
prepare_AlexanderYastrebov.sh
Executable file
18
prepare_AlexanderYastrebov.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
DOCKER_BUILDKIT=1 docker build -o target/AlexanderYastrebov src/main/go/AlexanderYastrebov
|
22
src/main/go/AlexanderYastrebov/Dockerfile
Normal file
22
src/main/go/AlexanderYastrebov/Dockerfile
Normal file
@ -0,0 +1,22 @@
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
FROM golang AS build-stage
|
||||
COPY . src/
|
||||
RUN cd src && go build .
|
||||
|
||||
FROM scratch AS export-stage
|
||||
COPY --from=build-stage /go/src/1brc /
|
58
src/main/go/AlexanderYastrebov/README.md
Normal file
58
src/main/go/AlexanderYastrebov/README.md
Normal file
@ -0,0 +1,58 @@
|
||||
# 1brc in go
|
||||
|
||||
It uses Docker with BuildKit plugin to build and [export binary](https://docs.docker.com/engine/reference/commandline/build/#output) binary,
|
||||
see [prepare_AlexanderYastrebov.sh](../../../../prepare_AlexanderYastrebov.sh)
|
||||
and [calculate_average_AlexanderYastrebov.sh](../../../../calculate_average_AlexanderYastrebov.sh).
|
||||
|
||||
Demo:
|
||||
```sh
|
||||
$ ./test.sh AlexanderYastrebov
|
||||
[+] Building 0.2s (9/9) FINISHED
|
||||
=> [internal] load .dockerignore 0.0s
|
||||
=> => transferring context: 2B 0.0s
|
||||
=> [internal] load build definition from Dockerfile 0.0s
|
||||
=> => transferring dockerfile: 172B 0.0s
|
||||
=> [internal] load metadata for docker.io/library/golang:latest 0.0s
|
||||
=> [internal] load build context 0.0s
|
||||
=> => transferring context: 145B 0.0s
|
||||
=> [build-stage 1/3] FROM docker.io/library/golang 0.0s
|
||||
=> CACHED [build-stage 2/3] COPY . src/ 0.0s
|
||||
=> CACHED [build-stage 3/3] RUN cd src && go build . 0.0s
|
||||
=> CACHED [export-stage 1/1] COPY --from=build-stage /go/src/1brc / 0.0s
|
||||
=> exporting to client directory 0.1s
|
||||
=> => copying files 2.03MB 0.0s
|
||||
Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-10000-unique-keys.txt
|
||||
Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-10.txt
|
||||
Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-1.txt
|
||||
Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-20.txt
|
||||
Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-2.txt
|
||||
Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-3.txt
|
||||
Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-boundaries.txt
|
||||
Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-complex-utf8.txt
|
||||
Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-dot.txt
|
||||
Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-shortest.txt
|
||||
Validating calculate_average_AlexanderYastrebov.sh -- src/test/resources/samples/measurements-short.txt
|
||||
|
||||
# Run once to setup the benchmark
|
||||
# ./create_measurements.sh 1000000000
|
||||
# mv measurements.txt measurements_1B.txt
|
||||
# ln -s measurements_1B.txt measurements.txt
|
||||
# ./calculate_average_baseline.sh > out_expected.txt
|
||||
|
||||
$ wc -l measurements_1B.txt
|
||||
1000000000 measurements_1B.txt
|
||||
|
||||
$ ./evaluate2.sh AlexanderYastrebov royvanrijn
|
||||
... 0.0s
|
||||
Benchmark 1: ./calculate_average_AlexanderYastrebov.sh 2>&1
|
||||
Time (mean ± σ): 16.786 s ± 0.545 s [User: 56.030 s, System: 10.068 s]
|
||||
Range (min … max): 15.918 s … 17.309 s 5 runs
|
||||
...
|
||||
Benchmark 1: ./calculate_average_royvanrijn.sh 2>&1
|
||||
Time (mean ± σ): 16.731 s ± 0.190 s [User: 56.485 s, System: 10.279 s]
|
||||
Range (min … max): 16.490 s … 16.951 s 5 runs
|
||||
|
||||
Summary
|
||||
AlexanderYastrebov: trimmed mean 16.901712789513336, raw times 16.69836470718,17.30911065018,16.83413600418,15.91787706218,17.17263765718
|
||||
royvanrijn: trimmed mean 16.738037123633333, raw times 16.4900939703,16.9513459953,16.5794539913,16.8297746273,16.8048827523
|
||||
```
|
283
src/main/go/AlexanderYastrebov/calc.go
Normal file
283
src/main/go/AlexanderYastrebov/calc.go
Normal file
@ -0,0 +1,283 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"runtime"
|
||||
"sort"
|
||||
"sync"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
type measurement struct {
|
||||
min, max, sum, count int64
|
||||
}
|
||||
|
||||
func main() {
|
||||
if len(os.Args) != 2 {
|
||||
log.Fatalf("Missing measurements filename")
|
||||
}
|
||||
|
||||
measurements := processFile(os.Args[1])
|
||||
|
||||
ids := make([]string, 0, len(measurements))
|
||||
for id := range measurements {
|
||||
ids = append(ids, id)
|
||||
}
|
||||
sort.Strings(ids)
|
||||
|
||||
fmt.Print("{")
|
||||
for i, id := range ids {
|
||||
if i > 0 {
|
||||
fmt.Print(", ")
|
||||
}
|
||||
m := measurements[id]
|
||||
fmt.Printf("%s=%.1f/%.1f/%.1f", id, round(float64(m.min)/10.0), round(float64(m.sum)/10.0/float64(m.count)), round(float64(m.max)/10.0))
|
||||
}
|
||||
fmt.Println("}")
|
||||
}
|
||||
|
||||
func processFile(filename string) map[string]*measurement {
|
||||
f, err := os.Open(filename)
|
||||
if err != nil {
|
||||
log.Fatalf("Open: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
log.Fatalf("Stat: %v", err)
|
||||
}
|
||||
|
||||
size := fi.Size()
|
||||
if size <= 0 || size != int64(int(size)) {
|
||||
log.Fatalf("Invalid file size: %d", size)
|
||||
}
|
||||
|
||||
data, err := syscall.Mmap(int(f.Fd()), 0, int(size), syscall.PROT_READ, syscall.MAP_SHARED)
|
||||
if err != nil {
|
||||
log.Fatalf("Mmap: %v", err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err := syscall.Munmap(data); err != nil {
|
||||
log.Fatalf("Munmap: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
return process(data)
|
||||
}
|
||||
|
||||
func process(data []byte) map[string]*measurement {
|
||||
nChunks := runtime.NumCPU()
|
||||
|
||||
chunkSize := len(data) / nChunks
|
||||
if chunkSize == 0 {
|
||||
chunkSize = len(data)
|
||||
}
|
||||
|
||||
chunks := make([]int, 0, nChunks)
|
||||
offset := 0
|
||||
for offset < len(data) {
|
||||
offset += chunkSize
|
||||
if offset >= len(data) {
|
||||
chunks = append(chunks, len(data))
|
||||
break
|
||||
}
|
||||
|
||||
nlPos := bytes.IndexByte(data[offset:], '\n')
|
||||
if nlPos == -1 {
|
||||
chunks = append(chunks, len(data))
|
||||
break
|
||||
} else {
|
||||
offset += nlPos + 1
|
||||
chunks = append(chunks, offset)
|
||||
}
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(chunks))
|
||||
|
||||
results := make([]map[string]*measurement, len(chunks))
|
||||
start := 0
|
||||
for i, chunk := range chunks {
|
||||
go func(data []byte, i int) {
|
||||
results[i] = processChunk(data)
|
||||
wg.Done()
|
||||
}(data[start:chunk], i)
|
||||
start = chunk
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
measurements := make(map[string]*measurement)
|
||||
for _, r := range results {
|
||||
for id, rm := range r {
|
||||
m := measurements[id]
|
||||
if m == nil {
|
||||
measurements[id] = rm
|
||||
} else {
|
||||
m.min = min(m.min, rm.min)
|
||||
m.max = max(m.max, rm.max)
|
||||
m.sum += rm.sum
|
||||
m.count += rm.count
|
||||
}
|
||||
}
|
||||
}
|
||||
return measurements
|
||||
}
|
||||
|
||||
func processChunk(data []byte) map[string]*measurement {
|
||||
// Use fixed size linear probe lookup table
|
||||
const (
|
||||
// use power of 2 for fast modulo calculation,
|
||||
// should be larger than max number of keys which is 10_000
|
||||
entriesSize = 1 << 14
|
||||
|
||||
// use FNV-1a hash
|
||||
fnv1aOffset64 = 14695981039346656037
|
||||
fnv1aPrime64 = 1099511628211
|
||||
)
|
||||
|
||||
type entry struct {
|
||||
m measurement
|
||||
hash uint64
|
||||
vlen int
|
||||
value [128]byte // use power of 2 > 100 for alignment
|
||||
}
|
||||
entries := make([]entry, entriesSize)
|
||||
entriesCount := 0
|
||||
|
||||
// keep short and inlinable
|
||||
getMeasurement := func(hash uint64, value []byte) *measurement {
|
||||
i := hash & uint64(entriesSize-1)
|
||||
entry := &entries[i]
|
||||
|
||||
// bytes.Equal could be commented to speedup assuming no hash collisions
|
||||
for entry.vlen > 0 && !(entry.hash == hash && bytes.Equal(entry.value[:entry.vlen], value)) {
|
||||
i = (i + 1) & uint64(entriesSize-1)
|
||||
entry = &entries[i]
|
||||
}
|
||||
|
||||
if entry.vlen == 0 {
|
||||
entry.hash = hash
|
||||
entry.vlen = copy(entry.value[:], value)
|
||||
entriesCount++
|
||||
}
|
||||
return &entry.m
|
||||
}
|
||||
|
||||
// assume valid input
|
||||
for len(data) > 0 {
|
||||
|
||||
idHash := uint64(fnv1aOffset64)
|
||||
semiPos := 0
|
||||
for i, b := range data {
|
||||
if b == ';' {
|
||||
semiPos = i
|
||||
break
|
||||
}
|
||||
|
||||
// calculate FNV-1a hash
|
||||
idHash ^= uint64(b)
|
||||
idHash *= fnv1aPrime64
|
||||
}
|
||||
|
||||
idData := data[:semiPos]
|
||||
|
||||
data = data[semiPos+1:]
|
||||
|
||||
var temp int64
|
||||
// parseNumber
|
||||
{
|
||||
negative := data[0] == '-'
|
||||
if negative {
|
||||
data = data[1:]
|
||||
}
|
||||
|
||||
_ = data[3]
|
||||
if data[1] == '.' {
|
||||
// 1.2\n
|
||||
temp = int64(data[0])*10 + int64(data[2]) - '0'*(10+1)
|
||||
data = data[4:]
|
||||
// 12.3\n
|
||||
} else {
|
||||
_ = data[4]
|
||||
temp = int64(data[0])*100 + int64(data[1])*10 + int64(data[3]) - '0'*(100+10+1)
|
||||
data = data[5:]
|
||||
}
|
||||
|
||||
if negative {
|
||||
temp = -temp
|
||||
}
|
||||
}
|
||||
|
||||
m := getMeasurement(idHash, idData)
|
||||
if m.count == 0 {
|
||||
m.min = temp
|
||||
m.max = temp
|
||||
m.sum = temp
|
||||
m.count = 1
|
||||
} else {
|
||||
m.min = min(m.min, temp)
|
||||
m.max = max(m.max, temp)
|
||||
m.sum += temp
|
||||
m.count++
|
||||
}
|
||||
}
|
||||
|
||||
result := make(map[string]*measurement, entriesCount)
|
||||
for i := range entries {
|
||||
entry := &entries[i]
|
||||
if entry.m.count > 0 {
|
||||
result[string(entry.value[:entry.vlen])] = &entry.m
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func round(x float64) float64 {
|
||||
return roundJava(x*10.0) / 10.0
|
||||
}
|
||||
|
||||
// roundJava returns the closest integer to the argument, with ties
|
||||
// rounding to positive infinity, see java's Math.round
|
||||
func roundJava(x float64) float64 {
|
||||
t := math.Trunc(x)
|
||||
if x < 0.0 && t-x == 0.5 {
|
||||
//return t
|
||||
} else if math.Abs(x-t) >= 0.5 {
|
||||
t += math.Copysign(1, x)
|
||||
}
|
||||
|
||||
if t == 0 { // check -0
|
||||
return 0.0
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// parseNumber reads decimal number that matches "^-?[0-9]{1,2}[.][0-9]" pattern,
|
||||
// e.g.: -12.3, -3.4, 5.6, 78.9 and return the value*10, i.e. -123, -34, 56, 789.
|
||||
func parseNumber(data []byte) int64 {
|
||||
negative := data[0] == '-'
|
||||
if negative {
|
||||
data = data[1:]
|
||||
}
|
||||
|
||||
var result int64
|
||||
switch len(data) {
|
||||
// 1.2
|
||||
case 3:
|
||||
result = int64(data[0])*10 + int64(data[2]) - '0'*(10+1)
|
||||
// 12.3
|
||||
case 4:
|
||||
result = int64(data[0])*100 + int64(data[1])*10 + int64(data[3]) - '0'*(100+10+1)
|
||||
}
|
||||
|
||||
if negative {
|
||||
return -result
|
||||
}
|
||||
return result
|
||||
}
|
86
src/main/go/AlexanderYastrebov/calc_test.go
Normal file
86
src/main/go/AlexanderYastrebov/calc_test.go
Normal file
@ -0,0 +1,86 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRoundJava(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
value float64
|
||||
expected string
|
||||
}{
|
||||
{value: -1.5, expected: "-1.0"},
|
||||
{value: -1.0, expected: "-1.0"},
|
||||
{value: -0.7, expected: "-1.0"},
|
||||
{value: -0.5, expected: "0.0"},
|
||||
{value: -0.3, expected: "0.0"},
|
||||
{value: 0.0, expected: "0.0"},
|
||||
{value: 0.3, expected: "0.0"},
|
||||
{value: 0.5, expected: "1.0"},
|
||||
{value: 0.7, expected: "1.0"},
|
||||
{value: 1.0, expected: "1.0"},
|
||||
{value: 1.5, expected: "2.0"},
|
||||
} {
|
||||
if rounded := roundJava(tc.value); fmt.Sprintf("%.1f", rounded) != tc.expected {
|
||||
t.Errorf("Wrong rounding of %v, expected: %s, got: %.1f", tc.value, tc.expected, rounded)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNumber(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
value string
|
||||
expected string
|
||||
}{
|
||||
{value: "-99.9", expected: "-999"},
|
||||
{value: "-12.3", expected: "-123"},
|
||||
{value: "-1.5", expected: "-15"},
|
||||
{value: "-1.0", expected: "-10"},
|
||||
{value: "0.0", expected: "0"},
|
||||
{value: "0.3", expected: "3"},
|
||||
{value: "12.3", expected: "123"},
|
||||
{value: "99.9", expected: "999"},
|
||||
} {
|
||||
if number := parseNumber([]byte(tc.value)); fmt.Sprintf("%d", number) != tc.expected {
|
||||
t.Errorf("Wrong parsing of %v, expected: %s, got: %d", tc.value, tc.expected, number)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var parseNumberSink int64
|
||||
|
||||
func BenchmarkParseNumber(b *testing.B) {
|
||||
data1 := []byte("1.2")
|
||||
data2 := []byte("-12.3")
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
parseNumberSink = parseNumber(data1) + parseNumber(data2)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkProcess(b *testing.B) {
|
||||
// $ ./create_measurements.sh 1000000 && mv measurements.txt measurements-1e6.txt
|
||||
// Created file with 1,000,000 measurements in 514 ms
|
||||
const filename = "../../../../measurements-1e6.txt"
|
||||
|
||||
data, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
measurements := process(data)
|
||||
rows := int64(0)
|
||||
for _, m := range measurements {
|
||||
rows += m.count
|
||||
}
|
||||
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
b.ReportMetric(float64(rows), "rows/op")
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
process(data)
|
||||
}
|
||||
}
|
3
src/main/go/AlexanderYastrebov/go.mod
Normal file
3
src/main/go/AlexanderYastrebov/go.mod
Normal file
@ -0,0 +1,3 @@
|
||||
module github.com/AlexanderYastrebov/1brc
|
||||
|
||||
go 1.21.5
|
Loading…
Reference in New Issue
Block a user