Faster version of the data generator
This commit is contained in:
parent
7d485d0e8b
commit
04bd2d69b6
19
create_measurements2.sh
Executable file
19
create_measurements2.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
java --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CreateMeasurements2 $1
|
623
src/main/java/dev/morling/onebrc/CreateMeasurements2.java
Normal file
623
src/main/java/dev/morling/onebrc/CreateMeasurements2.java
Normal file
@ -0,0 +1,623 @@
|
||||
/*
|
||||
* Copyright 2023 The original authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package dev.morling.onebrc;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
import org.rschwietzke.CheaperCharBuffer;
|
||||
import org.rschwietzke.FastRandom;
|
||||
|
||||
/**
|
||||
* Faster version with some data faking instead of a real Gaussian distribution
|
||||
* Good enough for our purppose I guess.
|
||||
*/
|
||||
public class CreateMeasurements2 {
|
||||
|
||||
private static final String FILE = "./measurements2.txt";
|
||||
|
||||
static class WeatherStation {
|
||||
final static char[] NUMBERS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
|
||||
|
||||
final String id;
|
||||
final int meanTemperature;
|
||||
|
||||
final char[] firstPart;
|
||||
final FastRandom r = new FastRandom(ThreadLocalRandom.current().nextLong());
|
||||
|
||||
WeatherStation(String id, double meanTemperature) {
|
||||
this.id = id;
|
||||
this.meanTemperature = (int) meanTemperature;
|
||||
// make it directly copyable
|
||||
this.firstPart = (id + ";").toCharArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* We write out data into the buffer to avoid string conversion
|
||||
* We also no longer use double and gaussian, because for our
|
||||
* purpose, the fake numbers here will do it. Less
|
||||
*
|
||||
* @param buffer the buffer to append to
|
||||
*/
|
||||
void measurement(final CheaperCharBuffer buffer) {
|
||||
// fake -10.9 to +10.9 variance without double operations and rounding
|
||||
// gives us -10 to +10
|
||||
int m = meanTemperature + (r.nextInt(21) - 10);
|
||||
// gives us a decimal digit 0 to 9 as char
|
||||
char d = NUMBERS[r.nextInt(10)];
|
||||
|
||||
// just append, only one number has to be converted and we can do
|
||||
// better... if we watn
|
||||
buffer.append(firstPart, 0, firstPart.length)
|
||||
.append(String.valueOf(m)).append('.').append(d)
|
||||
.append('\n');
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
if (args.length != 1) {
|
||||
System.out.println("Usage: create_measurements.sh <number of records to create>");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
int size = 0;
|
||||
try {
|
||||
size = Integer.parseInt(args[0]);
|
||||
}
|
||||
catch (NumberFormatException e) {
|
||||
System.out.println("Invalid value for <number of records to create>");
|
||||
System.out.println("Usage: CreateMeasurements <number of records to create>");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
// @formatter:off
|
||||
// data from https://en.wikipedia.org/wiki/List_of_cities_by_average_temperature;
|
||||
// converted using https://wikitable2csv.ggor.de/
|
||||
// brought to form using DuckDB:
|
||||
// D copy (
|
||||
// select City, regexp_extract(Year,'(.*)\n.*', 1) as AverageTemp
|
||||
// from (
|
||||
// select City,Year
|
||||
// from read_csv_auto('List_of_cities_by_average_temperature_1.csv', header = true)
|
||||
// union
|
||||
// select City,Year
|
||||
// from read_csv_auto('List_of_cities_by_average_temperature_2.csv', header = true)
|
||||
// union
|
||||
// select City,Year
|
||||
// from read_csv_auto('List_of_cities_by_average_temperature_3.csv', header = true)
|
||||
// union
|
||||
// select City,Year
|
||||
// from read_csv_auto('List_of_cities_by_average_temperature_4.csv', header = true)
|
||||
// union
|
||||
// select City,Year
|
||||
// from read_csv_auto('List_of_cities_by_average_temperature_5.csv', header = true)
|
||||
// )
|
||||
// ) TO 'output.csv' (HEADER, DELIMITER ',');
|
||||
// @formatter:on
|
||||
final List<WeatherStation> stations = Arrays.asList(
|
||||
new WeatherStation("Abha", 18.0),
|
||||
new WeatherStation("Abidjan", 26.0),
|
||||
new WeatherStation("Abéché", 29.4),
|
||||
new WeatherStation("Accra", 26.4),
|
||||
new WeatherStation("Addis Ababa", 16.0),
|
||||
new WeatherStation("Adelaide", 17.3),
|
||||
new WeatherStation("Aden", 29.1),
|
||||
new WeatherStation("Ahvaz", 25.4),
|
||||
new WeatherStation("Albuquerque", 14.0),
|
||||
new WeatherStation("Alexandra", 11.0),
|
||||
new WeatherStation("Alexandria", 20.0),
|
||||
new WeatherStation("Algiers", 18.2),
|
||||
new WeatherStation("Alice Springs", 21.0),
|
||||
new WeatherStation("Almaty", 10.0),
|
||||
new WeatherStation("Amsterdam", 10.2),
|
||||
new WeatherStation("Anadyr", -6.9),
|
||||
new WeatherStation("Anchorage", 2.8),
|
||||
new WeatherStation("Andorra la Vella", 9.8),
|
||||
new WeatherStation("Ankara", 12.0),
|
||||
new WeatherStation("Antananarivo", 17.9),
|
||||
new WeatherStation("Antsiranana", 25.2),
|
||||
new WeatherStation("Arkhangelsk", 1.3),
|
||||
new WeatherStation("Ashgabat", 17.1),
|
||||
new WeatherStation("Asmara", 15.6),
|
||||
new WeatherStation("Assab", 30.5),
|
||||
new WeatherStation("Astana", 3.5),
|
||||
new WeatherStation("Athens", 19.2),
|
||||
new WeatherStation("Atlanta", 17.0),
|
||||
new WeatherStation("Auckland", 15.2),
|
||||
new WeatherStation("Austin", 20.7),
|
||||
new WeatherStation("Baghdad", 22.77),
|
||||
new WeatherStation("Baguio", 19.5),
|
||||
new WeatherStation("Baku", 15.1),
|
||||
new WeatherStation("Baltimore", 13.1),
|
||||
new WeatherStation("Bamako", 27.8),
|
||||
new WeatherStation("Bangkok", 28.6),
|
||||
new WeatherStation("Bangui", 26.0),
|
||||
new WeatherStation("Banjul", 26.0),
|
||||
new WeatherStation("Barcelona", 18.2),
|
||||
new WeatherStation("Bata", 25.1),
|
||||
new WeatherStation("Batumi", 14.0),
|
||||
new WeatherStation("Beijing", 12.9),
|
||||
new WeatherStation("Beirut", 20.9),
|
||||
new WeatherStation("Belgrade", 12.5),
|
||||
new WeatherStation("Belize City", 26.7),
|
||||
new WeatherStation("Benghazi", 19.9),
|
||||
new WeatherStation("Bergen", 7.7),
|
||||
new WeatherStation("Berlin", 10.3),
|
||||
new WeatherStation("Bilbao", 14.7),
|
||||
new WeatherStation("Birao", 26.5),
|
||||
new WeatherStation("Bishkek", 11.3),
|
||||
new WeatherStation("Bissau", 27.0),
|
||||
new WeatherStation("Blantyre", 22.2),
|
||||
new WeatherStation("Bloemfontein", 15.6),
|
||||
new WeatherStation("Boise", 11.4),
|
||||
new WeatherStation("Bordeaux", 14.2),
|
||||
new WeatherStation("Bosaso", 30.0),
|
||||
new WeatherStation("Boston", 10.9),
|
||||
new WeatherStation("Bouaké", 26.0),
|
||||
new WeatherStation("Bratislava", 10.5),
|
||||
new WeatherStation("Brazzaville", 25.0),
|
||||
new WeatherStation("Bridgetown", 27.0),
|
||||
new WeatherStation("Brisbane", 21.4),
|
||||
new WeatherStation("Brussels", 10.5),
|
||||
new WeatherStation("Bucharest", 10.8),
|
||||
new WeatherStation("Budapest", 11.3),
|
||||
new WeatherStation("Bujumbura", 23.8),
|
||||
new WeatherStation("Bulawayo", 18.9),
|
||||
new WeatherStation("Burnie", 13.1),
|
||||
new WeatherStation("Busan", 15.0),
|
||||
new WeatherStation("Cabo San Lucas", 23.9),
|
||||
new WeatherStation("Cairns", 25.0),
|
||||
new WeatherStation("Cairo", 21.4),
|
||||
new WeatherStation("Calgary", 4.4),
|
||||
new WeatherStation("Canberra", 13.1),
|
||||
new WeatherStation("Cape Town", 16.2),
|
||||
new WeatherStation("Changsha", 17.4),
|
||||
new WeatherStation("Charlotte", 16.1),
|
||||
new WeatherStation("Chiang Mai", 25.8),
|
||||
new WeatherStation("Chicago", 9.8),
|
||||
new WeatherStation("Chihuahua", 18.6),
|
||||
new WeatherStation("Chișinău", 10.2),
|
||||
new WeatherStation("Chittagong", 25.9),
|
||||
new WeatherStation("Chongqing", 18.6),
|
||||
new WeatherStation("Christchurch", 12.2),
|
||||
new WeatherStation("City of San Marino", 11.8),
|
||||
new WeatherStation("Colombo", 27.4),
|
||||
new WeatherStation("Columbus", 11.7),
|
||||
new WeatherStation("Conakry", 26.4),
|
||||
new WeatherStation("Copenhagen", 9.1),
|
||||
new WeatherStation("Cotonou", 27.2),
|
||||
new WeatherStation("Cracow", 9.3),
|
||||
new WeatherStation("Da Lat", 17.9),
|
||||
new WeatherStation("Da Nang", 25.8),
|
||||
new WeatherStation("Dakar", 24.0),
|
||||
new WeatherStation("Dallas", 19.0),
|
||||
new WeatherStation("Damascus", 17.0),
|
||||
new WeatherStation("Dampier", 26.4),
|
||||
new WeatherStation("Dar es Salaam", 25.8),
|
||||
new WeatherStation("Darwin", 27.6),
|
||||
new WeatherStation("Denpasar", 23.7),
|
||||
new WeatherStation("Denver", 10.4),
|
||||
new WeatherStation("Detroit", 10.0),
|
||||
new WeatherStation("Dhaka", 25.9),
|
||||
new WeatherStation("Dikson", -11.1),
|
||||
new WeatherStation("Dili", 26.6),
|
||||
new WeatherStation("Djibouti", 29.9),
|
||||
new WeatherStation("Dodoma", 22.7),
|
||||
new WeatherStation("Dolisie", 24.0),
|
||||
new WeatherStation("Douala", 26.7),
|
||||
new WeatherStation("Dubai", 26.9),
|
||||
new WeatherStation("Dublin", 9.8),
|
||||
new WeatherStation("Dunedin", 11.1),
|
||||
new WeatherStation("Durban", 20.6),
|
||||
new WeatherStation("Dushanbe", 14.7),
|
||||
new WeatherStation("Edinburgh", 9.3),
|
||||
new WeatherStation("Edmonton", 4.2),
|
||||
new WeatherStation("El Paso", 18.1),
|
||||
new WeatherStation("Entebbe", 21.0),
|
||||
new WeatherStation("Erbil", 19.5),
|
||||
new WeatherStation("Erzurum", 5.1),
|
||||
new WeatherStation("Fairbanks", -2.3),
|
||||
new WeatherStation("Fianarantsoa", 17.9),
|
||||
new WeatherStation("Flores, Petén", 26.4),
|
||||
new WeatherStation("Frankfurt", 10.6),
|
||||
new WeatherStation("Fresno", 17.9),
|
||||
new WeatherStation("Fukuoka", 17.0),
|
||||
new WeatherStation("Gabès", 19.5),
|
||||
new WeatherStation("Gaborone", 21.0),
|
||||
new WeatherStation("Gagnoa", 26.0),
|
||||
new WeatherStation("Gangtok", 15.2),
|
||||
new WeatherStation("Garissa", 29.3),
|
||||
new WeatherStation("Garoua", 28.3),
|
||||
new WeatherStation("George Town", 27.9),
|
||||
new WeatherStation("Ghanzi", 21.4),
|
||||
new WeatherStation("Gjoa Haven", -14.4),
|
||||
new WeatherStation("Guadalajara", 20.9),
|
||||
new WeatherStation("Guangzhou", 22.4),
|
||||
new WeatherStation("Guatemala City", 20.4),
|
||||
new WeatherStation("Halifax", 7.5),
|
||||
new WeatherStation("Hamburg", 9.7),
|
||||
new WeatherStation("Hamilton", 13.8),
|
||||
new WeatherStation("Hanga Roa", 20.5),
|
||||
new WeatherStation("Hanoi", 23.6),
|
||||
new WeatherStation("Harare", 18.4),
|
||||
new WeatherStation("Harbin", 5.0),
|
||||
new WeatherStation("Hargeisa", 21.7),
|
||||
new WeatherStation("Hat Yai", 27.0),
|
||||
new WeatherStation("Havana", 25.2),
|
||||
new WeatherStation("Helsinki", 5.9),
|
||||
new WeatherStation("Heraklion", 18.9),
|
||||
new WeatherStation("Hiroshima", 16.3),
|
||||
new WeatherStation("Ho Chi Minh City", 27.4),
|
||||
new WeatherStation("Hobart", 12.7),
|
||||
new WeatherStation("Hong Kong", 23.3),
|
||||
new WeatherStation("Honiara", 26.5),
|
||||
new WeatherStation("Honolulu", 25.4),
|
||||
new WeatherStation("Houston", 20.8),
|
||||
new WeatherStation("Ifrane", 11.4),
|
||||
new WeatherStation("Indianapolis", 11.8),
|
||||
new WeatherStation("Iqaluit", -9.3),
|
||||
new WeatherStation("Irkutsk", 1.0),
|
||||
new WeatherStation("Istanbul", 13.9),
|
||||
new WeatherStation("İzmir", 17.9),
|
||||
new WeatherStation("Jacksonville", 20.3),
|
||||
new WeatherStation("Jakarta", 26.7),
|
||||
new WeatherStation("Jayapura", 27.0),
|
||||
new WeatherStation("Jerusalem", 18.3),
|
||||
new WeatherStation("Johannesburg", 15.5),
|
||||
new WeatherStation("Jos", 22.8),
|
||||
new WeatherStation("Juba", 27.8),
|
||||
new WeatherStation("Kabul", 12.1),
|
||||
new WeatherStation("Kampala", 20.0),
|
||||
new WeatherStation("Kandi", 27.7),
|
||||
new WeatherStation("Kankan", 26.5),
|
||||
new WeatherStation("Kano", 26.4),
|
||||
new WeatherStation("Kansas City", 12.5),
|
||||
new WeatherStation("Karachi", 26.0),
|
||||
new WeatherStation("Karonga", 24.4),
|
||||
new WeatherStation("Kathmandu", 18.3),
|
||||
new WeatherStation("Khartoum", 29.9),
|
||||
new WeatherStation("Kingston", 27.4),
|
||||
new WeatherStation("Kinshasa", 25.3),
|
||||
new WeatherStation("Kolkata", 26.7),
|
||||
new WeatherStation("Kuala Lumpur", 27.3),
|
||||
new WeatherStation("Kumasi", 26.0),
|
||||
new WeatherStation("Kunming", 15.7),
|
||||
new WeatherStation("Kuopio", 3.4),
|
||||
new WeatherStation("Kuwait City", 25.7),
|
||||
new WeatherStation("Kyiv", 8.4),
|
||||
new WeatherStation("Kyoto", 15.8),
|
||||
new WeatherStation("La Ceiba", 26.2),
|
||||
new WeatherStation("La Paz", 23.7),
|
||||
new WeatherStation("Lagos", 26.8),
|
||||
new WeatherStation("Lahore", 24.3),
|
||||
new WeatherStation("Lake Havasu City", 23.7),
|
||||
new WeatherStation("Lake Tekapo", 8.7),
|
||||
new WeatherStation("Las Palmas de Gran Canaria", 21.2),
|
||||
new WeatherStation("Las Vegas", 20.3),
|
||||
new WeatherStation("Launceston", 13.1),
|
||||
new WeatherStation("Lhasa", 7.6),
|
||||
new WeatherStation("Libreville", 25.9),
|
||||
new WeatherStation("Lisbon", 17.5),
|
||||
new WeatherStation("Livingstone", 21.8),
|
||||
new WeatherStation("Ljubljana", 10.9),
|
||||
new WeatherStation("Lodwar", 29.3),
|
||||
new WeatherStation("Lomé", 26.9),
|
||||
new WeatherStation("London", 11.3),
|
||||
new WeatherStation("Los Angeles", 18.6),
|
||||
new WeatherStation("Louisville", 13.9),
|
||||
new WeatherStation("Luanda", 25.8),
|
||||
new WeatherStation("Lubumbashi", 20.8),
|
||||
new WeatherStation("Lusaka", 19.9),
|
||||
new WeatherStation("Luxembourg City", 9.3),
|
||||
new WeatherStation("Lviv", 7.8),
|
||||
new WeatherStation("Lyon", 12.5),
|
||||
new WeatherStation("Madrid", 15.0),
|
||||
new WeatherStation("Mahajanga", 26.3),
|
||||
new WeatherStation("Makassar", 26.7),
|
||||
new WeatherStation("Makurdi", 26.0),
|
||||
new WeatherStation("Malabo", 26.3),
|
||||
new WeatherStation("Malé", 28.0),
|
||||
new WeatherStation("Managua", 27.3),
|
||||
new WeatherStation("Manama", 26.5),
|
||||
new WeatherStation("Mandalay", 28.0),
|
||||
new WeatherStation("Mango", 28.1),
|
||||
new WeatherStation("Manila", 28.4),
|
||||
new WeatherStation("Maputo", 22.8),
|
||||
new WeatherStation("Marrakesh", 19.6),
|
||||
new WeatherStation("Marseille", 15.8),
|
||||
new WeatherStation("Maun", 22.4),
|
||||
new WeatherStation("Medan", 26.5),
|
||||
new WeatherStation("Mek'ele", 22.7),
|
||||
new WeatherStation("Melbourne", 15.1),
|
||||
new WeatherStation("Memphis", 17.2),
|
||||
new WeatherStation("Mexicali", 23.1),
|
||||
new WeatherStation("Mexico City", 17.5),
|
||||
new WeatherStation("Miami", 24.9),
|
||||
new WeatherStation("Milan", 13.0),
|
||||
new WeatherStation("Milwaukee", 8.9),
|
||||
new WeatherStation("Minneapolis", 7.8),
|
||||
new WeatherStation("Minsk", 6.7),
|
||||
new WeatherStation("Mogadishu", 27.1),
|
||||
new WeatherStation("Mombasa", 26.3),
|
||||
new WeatherStation("Monaco", 16.4),
|
||||
new WeatherStation("Moncton", 6.1),
|
||||
new WeatherStation("Monterrey", 22.3),
|
||||
new WeatherStation("Montreal", 6.8),
|
||||
new WeatherStation("Moscow", 5.8),
|
||||
new WeatherStation("Mumbai", 27.1),
|
||||
new WeatherStation("Murmansk", 0.6),
|
||||
new WeatherStation("Muscat", 28.0),
|
||||
new WeatherStation("Mzuzu", 17.7),
|
||||
new WeatherStation("N'Djamena", 28.3),
|
||||
new WeatherStation("Naha", 23.1),
|
||||
new WeatherStation("Nairobi", 17.8),
|
||||
new WeatherStation("Nakhon Ratchasima", 27.3),
|
||||
new WeatherStation("Napier", 14.6),
|
||||
new WeatherStation("Napoli", 15.9),
|
||||
new WeatherStation("Nashville", 15.4),
|
||||
new WeatherStation("Nassau", 24.6),
|
||||
new WeatherStation("Ndola", 20.3),
|
||||
new WeatherStation("New Delhi", 25.0),
|
||||
new WeatherStation("New Orleans", 20.7),
|
||||
new WeatherStation("New York City", 12.9),
|
||||
new WeatherStation("Ngaoundéré", 22.0),
|
||||
new WeatherStation("Niamey", 29.3),
|
||||
new WeatherStation("Nicosia", 19.7),
|
||||
new WeatherStation("Niigata", 13.9),
|
||||
new WeatherStation("Nouadhibou", 21.3),
|
||||
new WeatherStation("Nouakchott", 25.7),
|
||||
new WeatherStation("Novosibirsk", 1.7),
|
||||
new WeatherStation("Nuuk", -1.4),
|
||||
new WeatherStation("Odesa", 10.7),
|
||||
new WeatherStation("Odienné", 26.0),
|
||||
new WeatherStation("Oklahoma City", 15.9),
|
||||
new WeatherStation("Omaha", 10.6),
|
||||
new WeatherStation("Oranjestad", 28.1),
|
||||
new WeatherStation("Oslo", 5.7),
|
||||
new WeatherStation("Ottawa", 6.6),
|
||||
new WeatherStation("Ouagadougou", 28.3),
|
||||
new WeatherStation("Ouahigouya", 28.6),
|
||||
new WeatherStation("Ouarzazate", 18.9),
|
||||
new WeatherStation("Oulu", 2.7),
|
||||
new WeatherStation("Palembang", 27.3),
|
||||
new WeatherStation("Palermo", 18.5),
|
||||
new WeatherStation("Palm Springs", 24.5),
|
||||
new WeatherStation("Palmerston North", 13.2),
|
||||
new WeatherStation("Panama City", 28.0),
|
||||
new WeatherStation("Parakou", 26.8),
|
||||
new WeatherStation("Paris", 12.3),
|
||||
new WeatherStation("Perth", 18.7),
|
||||
new WeatherStation("Petropavlovsk-Kamchatsky", 1.9),
|
||||
new WeatherStation("Philadelphia", 13.2),
|
||||
new WeatherStation("Phnom Penh", 28.3),
|
||||
new WeatherStation("Phoenix", 23.9),
|
||||
new WeatherStation("Pittsburgh", 10.8),
|
||||
new WeatherStation("Podgorica", 15.3),
|
||||
new WeatherStation("Pointe-Noire", 26.1),
|
||||
new WeatherStation("Pontianak", 27.7),
|
||||
new WeatherStation("Port Moresby", 26.9),
|
||||
new WeatherStation("Port Sudan", 28.4),
|
||||
new WeatherStation("Port Vila", 24.3),
|
||||
new WeatherStation("Port-Gentil", 26.0),
|
||||
new WeatherStation("Portland (OR)", 12.4),
|
||||
new WeatherStation("Porto", 15.7),
|
||||
new WeatherStation("Prague", 8.4),
|
||||
new WeatherStation("Praia", 24.4),
|
||||
new WeatherStation("Pretoria", 18.2),
|
||||
new WeatherStation("Pyongyang", 10.8),
|
||||
new WeatherStation("Rabat", 17.2),
|
||||
new WeatherStation("Rangpur", 24.4),
|
||||
new WeatherStation("Reggane", 28.3),
|
||||
new WeatherStation("Reykjavík", 4.3),
|
||||
new WeatherStation("Riga", 6.2),
|
||||
new WeatherStation("Riyadh", 26.0),
|
||||
new WeatherStation("Rome", 15.2),
|
||||
new WeatherStation("Roseau", 26.2),
|
||||
new WeatherStation("Rostov-on-Don", 9.9),
|
||||
new WeatherStation("Sacramento", 16.3),
|
||||
new WeatherStation("Saint Petersburg", 5.8),
|
||||
new WeatherStation("Saint-Pierre", 5.7),
|
||||
new WeatherStation("Salt Lake City", 11.6),
|
||||
new WeatherStation("San Antonio", 20.8),
|
||||
new WeatherStation("San Diego", 17.8),
|
||||
new WeatherStation("San Francisco", 14.6),
|
||||
new WeatherStation("San Jose", 16.4),
|
||||
new WeatherStation("San José", 22.6),
|
||||
new WeatherStation("San Juan", 27.2),
|
||||
new WeatherStation("San Salvador", 23.1),
|
||||
new WeatherStation("Sana'a", 20.0),
|
||||
new WeatherStation("Santo Domingo", 25.9),
|
||||
new WeatherStation("Sapporo", 8.9),
|
||||
new WeatherStation("Sarajevo", 10.1),
|
||||
new WeatherStation("Saskatoon", 3.3),
|
||||
new WeatherStation("Seattle", 11.3),
|
||||
new WeatherStation("Ségou", 28.0),
|
||||
new WeatherStation("Seoul", 12.5),
|
||||
new WeatherStation("Seville", 19.2),
|
||||
new WeatherStation("Shanghai", 16.7),
|
||||
new WeatherStation("Singapore", 27.0),
|
||||
new WeatherStation("Skopje", 12.4),
|
||||
new WeatherStation("Sochi", 14.2),
|
||||
new WeatherStation("Sofia", 10.6),
|
||||
new WeatherStation("Sokoto", 28.0),
|
||||
new WeatherStation("Split", 16.1),
|
||||
new WeatherStation("St. John's", 5.0),
|
||||
new WeatherStation("St. Louis", 13.9),
|
||||
new WeatherStation("Stockholm", 6.6),
|
||||
new WeatherStation("Surabaya", 27.1),
|
||||
new WeatherStation("Suva", 25.6),
|
||||
new WeatherStation("Suwałki", 7.2),
|
||||
new WeatherStation("Sydney", 17.7),
|
||||
new WeatherStation("Tabora", 23.0),
|
||||
new WeatherStation("Tabriz", 12.6),
|
||||
new WeatherStation("Taipei", 23.0),
|
||||
new WeatherStation("Tallinn", 6.4),
|
||||
new WeatherStation("Tamale", 27.9),
|
||||
new WeatherStation("Tamanrasset", 21.7),
|
||||
new WeatherStation("Tampa", 22.9),
|
||||
new WeatherStation("Tashkent", 14.8),
|
||||
new WeatherStation("Tauranga", 14.8),
|
||||
new WeatherStation("Tbilisi", 12.9),
|
||||
new WeatherStation("Tegucigalpa", 21.7),
|
||||
new WeatherStation("Tehran", 17.0),
|
||||
new WeatherStation("Tel Aviv", 20.0),
|
||||
new WeatherStation("Thessaloniki", 16.0),
|
||||
new WeatherStation("Thiès", 24.0),
|
||||
new WeatherStation("Tijuana", 17.8),
|
||||
new WeatherStation("Timbuktu", 28.0),
|
||||
new WeatherStation("Tirana", 15.2),
|
||||
new WeatherStation("Toamasina", 23.4),
|
||||
new WeatherStation("Tokyo", 15.4),
|
||||
new WeatherStation("Toliara", 24.1),
|
||||
new WeatherStation("Toluca", 12.4),
|
||||
new WeatherStation("Toronto", 9.4),
|
||||
new WeatherStation("Tripoli", 20.0),
|
||||
new WeatherStation("Tromsø", 2.9),
|
||||
new WeatherStation("Tucson", 20.9),
|
||||
new WeatherStation("Tunis", 18.4),
|
||||
new WeatherStation("Ulaanbaatar", -0.4),
|
||||
new WeatherStation("Upington", 20.4),
|
||||
new WeatherStation("Ürümqi", 7.4),
|
||||
new WeatherStation("Vaduz", 10.1),
|
||||
new WeatherStation("Valencia", 18.3),
|
||||
new WeatherStation("Valletta", 18.8),
|
||||
new WeatherStation("Vancouver", 10.4),
|
||||
new WeatherStation("Veracruz", 25.4),
|
||||
new WeatherStation("Vienna", 10.4),
|
||||
new WeatherStation("Vientiane", 25.9),
|
||||
new WeatherStation("Villahermosa", 27.1),
|
||||
new WeatherStation("Vilnius", 6.0),
|
||||
new WeatherStation("Virginia Beach", 15.8),
|
||||
new WeatherStation("Vladivostok", 4.9),
|
||||
new WeatherStation("Warsaw", 8.5),
|
||||
new WeatherStation("Washington, D.C.", 14.6),
|
||||
new WeatherStation("Wau", 27.8),
|
||||
new WeatherStation("Wellington", 12.9),
|
||||
new WeatherStation("Whitehorse", -0.1),
|
||||
new WeatherStation("Wichita", 13.9),
|
||||
new WeatherStation("Willemstad", 28.0),
|
||||
new WeatherStation("Winnipeg", 3.0),
|
||||
new WeatherStation("Wrocław", 9.6),
|
||||
new WeatherStation("Xi'an", 14.1),
|
||||
new WeatherStation("Yakutsk", -8.8),
|
||||
new WeatherStation("Yangon", 27.5),
|
||||
new WeatherStation("Yaoundé", 23.8),
|
||||
new WeatherStation("Yellowknife", -4.3),
|
||||
new WeatherStation("Yerevan", 12.4),
|
||||
new WeatherStation("Yinchuan", 9.0),
|
||||
new WeatherStation("Zagreb", 10.7),
|
||||
new WeatherStation("Zanzibar City", 26.0),
|
||||
new WeatherStation("Zürich", 9.3));
|
||||
|
||||
File file = new File(FILE);
|
||||
|
||||
// break the loop and unroll it manually
|
||||
int strideSize = 50_000_000;
|
||||
int outer = size / strideSize;
|
||||
int remainder = size - (outer * strideSize);
|
||||
|
||||
try (final BufferedWriter bw = new BufferedWriter(new FileWriter(file))) {
|
||||
for (int i = 0; i < outer; i++) {
|
||||
produce(bw, stations, strideSize);
|
||||
|
||||
// we avoid a modulo if here and use the stride size to print and update
|
||||
System.out.println("Wrote %,d measurements in %s ms".formatted((i + 1) * strideSize, System.currentTimeMillis() - start));
|
||||
}
|
||||
// there might be a rest
|
||||
produce(bw, stations, remainder);
|
||||
|
||||
// write fully before taking measurements
|
||||
bw.flush();
|
||||
System.out.println("Created file with %,d measurements in %s ms".formatted(size, System.currentTimeMillis() - start));
|
||||
}
|
||||
}
|
||||
|
||||
private static void produce(BufferedWriter bw, List<WeatherStation> stations, int count) throws IOException {
|
||||
final int stationCount = stations.size();
|
||||
final int rest = count % 8;
|
||||
|
||||
// use a fast ranodm impl without atomics to be able to utilize the cpu better
|
||||
// and avoid sideeffects, FastRandom is very fake random and does not have a state
|
||||
final FastRandom r1 = new FastRandom(ThreadLocalRandom.current().nextLong());
|
||||
final FastRandom r2 = new FastRandom(ThreadLocalRandom.current().nextLong());
|
||||
final FastRandom r3 = new FastRandom(ThreadLocalRandom.current().nextLong());
|
||||
final FastRandom r4 = new FastRandom(ThreadLocalRandom.current().nextLong());
|
||||
|
||||
// write to a fix buffer first, don't create strings ever
|
||||
// reuse buffer
|
||||
final CheaperCharBuffer sb = new CheaperCharBuffer(200);
|
||||
|
||||
// manual loop unroll for less jumps
|
||||
for (int i = 0; i < count; i = i + 8) {
|
||||
{
|
||||
// try to fill teh cpu pipeline as much as possible with
|
||||
// independent operations
|
||||
int s1 = r1.nextInt(stationCount);
|
||||
int s2 = r2.nextInt(stationCount);
|
||||
int s3 = r3.nextInt(stationCount);
|
||||
int s4 = r4.nextInt(stationCount);
|
||||
// get us the ojects one after the other to have the array
|
||||
// in our L1 cache and not push it out with other data
|
||||
var w1 = stations.get(s1);
|
||||
var w2 = stations.get(s2);
|
||||
var w3 = stations.get(s3);
|
||||
var w4 = stations.get(s4);
|
||||
// write our data to our buffer
|
||||
w1.measurement(sb);
|
||||
w2.measurement(sb);
|
||||
w3.measurement(sb);
|
||||
w4.measurement(sb);
|
||||
}
|
||||
{
|
||||
int s1 = r1.nextInt(stationCount);
|
||||
int s2 = r2.nextInt(stationCount);
|
||||
int s3 = r3.nextInt(stationCount);
|
||||
int s4 = r4.nextInt(stationCount);
|
||||
var w1 = stations.get(s1);
|
||||
var w2 = stations.get(s2);
|
||||
var w3 = stations.get(s3);
|
||||
var w4 = stations.get(s4);
|
||||
w1.measurement(sb);
|
||||
w2.measurement(sb);
|
||||
w3.measurement(sb);
|
||||
w4.measurement(sb);
|
||||
}
|
||||
// write the buffer directly, no intermediate string copy
|
||||
bw.write(sb.data_, 0, sb.length_);
|
||||
|
||||
// reuse buffer, reset only, no cleaning
|
||||
sb.clear();
|
||||
}
|
||||
|
||||
// there might be a rest to write
|
||||
for (int i = 0; i < rest; i++) {
|
||||
sb.clear();
|
||||
|
||||
int s = r1.nextInt(stationCount);
|
||||
var w = stations.get(s);
|
||||
w.measurement(sb);
|
||||
|
||||
bw.write(sb.data_, 0, sb.length_);
|
||||
}
|
||||
}
|
||||
}
|
715
src/main/java/org/rschwietzke/CheaperCharBuffer.java
Normal file
715
src/main/java/org/rschwietzke/CheaperCharBuffer.java
Normal file
@ -0,0 +1,715 @@
|
||||
/*
|
||||
* Copyright 2023 The original authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.rschwietzke;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* <p>This class is meant to replaces the old {@link CheaperCharBuffer} in all areas
|
||||
* where performance and memory-efficency is key. XMLString compatibility
|
||||
* remains in place in case one has used that in their own code.
|
||||
*
|
||||
* <p>This buffer is mutable and when you use it, make sure you work with
|
||||
* it responsibly. In many cases, we will reuse the buffer to avoid fresh
|
||||
* memory allocations, hence you have to pay attention to its usage pattern.
|
||||
* It is not meant to be a general String replacement.
|
||||
*
|
||||
* <p>This class avoids many of the standard runtime checks that will result
|
||||
* in a runtime or array exception anyway. Why check twice and raise the
|
||||
* same exception?
|
||||
*
|
||||
* @author René Schwietzke
|
||||
* @since 3.10.0
|
||||
*/
|
||||
public class CheaperCharBuffer implements CharSequence {
|
||||
// our data, can grow - that is not safe and has be altered from the original code
|
||||
// to allow speed
|
||||
public char[] data_;
|
||||
|
||||
// the current size of the string data
|
||||
public int length_;
|
||||
|
||||
// the current size of the string data
|
||||
private final int growBy_;
|
||||
|
||||
// how much do we grow if needed, half a cache line
|
||||
public static final int CAPACITY_GROWTH = 64 / 2;
|
||||
|
||||
// what is our start size?
|
||||
// a cache line is 64 byte mostly, the overhead is mostly 24 bytes
|
||||
// a char is two bytes, let's use one cache lines
|
||||
public static final int INITIAL_CAPACITY = (64 - 24) / 2;
|
||||
|
||||
// static empty version; DON'T MODIFY IT
|
||||
public static final CheaperCharBuffer EMPTY = new CheaperCharBuffer(0);
|
||||
|
||||
// the <EFBFBD> character
|
||||
private static final char REPLACEMENT_CHARACTER = '\uFFFD';
|
||||
|
||||
/**
|
||||
* Constructs an XMLCharBuffer with a default size.
|
||||
*/
|
||||
public CheaperCharBuffer() {
|
||||
this.data_ = new char[INITIAL_CAPACITY];
|
||||
this.length_ = 0;
|
||||
this.growBy_ = CAPACITY_GROWTH;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an XMLCharBuffer with a desired size.
|
||||
*
|
||||
* @param startSize the size of the buffer to start with
|
||||
*/
|
||||
public CheaperCharBuffer(final int startSize) {
|
||||
this(startSize, CAPACITY_GROWTH);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an XMLCharBuffer with a desired size.
|
||||
*
|
||||
* @param startSize the size of the buffer to start with
|
||||
* @param growBy by how much do we want to grow when needed
|
||||
*/
|
||||
public CheaperCharBuffer(final int startSize, final int growBy) {
|
||||
this.data_ = new char[startSize];
|
||||
this.length_ = 0;
|
||||
this.growBy_ = Math.max(1, growBy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an XMLCharBuffer from another buffer. Copies the data
|
||||
* over. The new buffer capacity matches the length of the source.
|
||||
*
|
||||
* @param src the source buffer to copy from
|
||||
*/
|
||||
public CheaperCharBuffer(final CheaperCharBuffer src) {
|
||||
this(src, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an XMLCharBuffer from another buffer. Copies the data
|
||||
* over. You can add more capacity on top of the source length. If
|
||||
* you specify 0, the capacity will match the src length.
|
||||
*
|
||||
* @param src the source buffer to copy from
|
||||
* @param addCapacity how much capacity to add to origin length
|
||||
*/
|
||||
public CheaperCharBuffer(final CheaperCharBuffer src, final int addCapacity) {
|
||||
this.data_ = Arrays.copyOf(src.data_, src.length_ + Math.max(0, addCapacity));
|
||||
this.length_ = src.length();
|
||||
this.growBy_ = Math.max(1, CAPACITY_GROWTH);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an XMLCharBuffer from a string. To avoid
|
||||
* too much allocation, we just take the string array as is and
|
||||
* don't allocate extra space in the first place.
|
||||
*
|
||||
* @param src the string to copy from
|
||||
*/
|
||||
public CheaperCharBuffer(final String src) {
|
||||
this.data_ = src.toCharArray();
|
||||
this.length_ = src.length();
|
||||
this.growBy_ = CAPACITY_GROWTH;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an XMLString structure preset with the specified values.
|
||||
* There will not be any room to grow, if you need that, construct an
|
||||
* empty one and append.
|
||||
*
|
||||
* <p>There are not range checks performed. Make sure your data is correct.
|
||||
*
|
||||
* @param ch The character array, must not be null
|
||||
* @param offset The offset into the character array.
|
||||
* @param length The length of characters from the offset.
|
||||
*/
|
||||
public CheaperCharBuffer(final char[] ch, final int offset, final int length) {
|
||||
// just as big as we need it
|
||||
this(length);
|
||||
append(ch, offset, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check capacity and grow if needed automatically
|
||||
*
|
||||
* @param minimumCapacity how much space do we need at least
|
||||
*/
|
||||
private void ensureCapacity(final int minimumCapacity) {
|
||||
if (minimumCapacity > this.data_.length) {
|
||||
final int newSize = Math.max(minimumCapacity + this.growBy_, (this.data_.length << 1) + 2);
|
||||
this.data_ = Arrays.copyOf(this.data_, newSize);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current max capacity without growth. Does not
|
||||
* indicate how much capacity is already in use. Use {@link #length()}
|
||||
* for that.
|
||||
*
|
||||
* @return the current capacity, not taken any usage into account
|
||||
*/
|
||||
public int capacity() {
|
||||
return this.data_.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends a single character to the buffer.
|
||||
*
|
||||
* @param c the character to append
|
||||
* @return this instance
|
||||
*/
|
||||
public CheaperCharBuffer append(final char c) {
|
||||
final int oldLength = this.length_++;
|
||||
|
||||
// ensureCapacity is not inlined by the compiler, so put that here for the most
|
||||
// called method of all appends. Duplicate code, but for a reason.
|
||||
if (oldLength == this.data_.length) {
|
||||
final int newSize = Math.max(oldLength + this.growBy_, (this.data_.length << 1) + 2);
|
||||
this.data_ = Arrays.copyOf(this.data_, newSize);
|
||||
}
|
||||
|
||||
this.data_[oldLength] = c;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a string to this buffer without copying the string first.
|
||||
*
|
||||
* @param src the string to append
|
||||
* @return this instance
|
||||
*/
|
||||
public CheaperCharBuffer append(final String src) {
|
||||
final int start = this.length_;
|
||||
this.length_ = this.length_ + src.length();
|
||||
ensureCapacity(this.length_);
|
||||
|
||||
// copy char by char because we don't get a copy for free
|
||||
// from a string yet, this might change when immutable arrays
|
||||
// make it into Java, but that will not be very soon
|
||||
for (int i = 0; i < src.length(); i++) {
|
||||
this.data_[start + i] = src.charAt(i);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add another buffer to this one.
|
||||
*
|
||||
* @param src the buffer to append
|
||||
* @return this instance
|
||||
*/
|
||||
public CheaperCharBuffer append(final CheaperCharBuffer src) {
|
||||
final int start = this.length_;
|
||||
this.length_ = this.length_ + src.length();
|
||||
ensureCapacity(this.length_);
|
||||
|
||||
System.arraycopy(src.data_, 0, this.data_, start, src.length_);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add data from a char array to this buffer with the ability to specify
|
||||
* a range to copy from
|
||||
*
|
||||
* @param src the source char array
|
||||
* @param offset the pos to start to copy from
|
||||
* @param length the length of the data to copy
|
||||
*
|
||||
* @return this instance
|
||||
*/
|
||||
public CheaperCharBuffer append(final char[] src, final int offset, final int length) {
|
||||
final int start = this.length_;
|
||||
this.length_ = start + length;
|
||||
|
||||
ensureCapacity(this.length_);
|
||||
|
||||
System.arraycopy(src, offset, this.data_, start, length);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current length
|
||||
*
|
||||
* @return the length of the charbuffer data
|
||||
*/
|
||||
public int length() {
|
||||
return length_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tell us how much the capacity grows if needed
|
||||
*
|
||||
* @return the value that determines how much we grow the backing
|
||||
* array in case we have to
|
||||
*/
|
||||
public int getGrowBy() {
|
||||
return this.growBy_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the buffer to 0 length. It won't resize it to avoid memory
|
||||
* churn.
|
||||
*
|
||||
* @return this instance for fluid programming
|
||||
*/
|
||||
public CheaperCharBuffer clear() {
|
||||
this.length_ = 0;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the buffer to 0 length and sets the new data. This
|
||||
* is a little cheaper than clear().append(c) depending on
|
||||
* the where and the inlining decisions.
|
||||
*
|
||||
* @param c the char to set
|
||||
* @return this instance for fluid programming
|
||||
*/
|
||||
public CheaperCharBuffer clearAndAppend(final char c) {
|
||||
this.length_ = 0;
|
||||
|
||||
if (this.data_.length > 0) {
|
||||
this.data_[this.length_] = c;
|
||||
this.length_++;
|
||||
}
|
||||
else {
|
||||
// the rare case when we don't have any buffer at hand
|
||||
append(c);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this buffer end with this string? If we check for
|
||||
* the empty string, we get true. If we would support JDK 11, we could
|
||||
* use Arrays.mismatch and be way faster.
|
||||
*
|
||||
* @param s the string to check the end against
|
||||
* @return true of the end matches the buffer, false otherwise
|
||||
*/
|
||||
public boolean endsWith(final String s) {
|
||||
// length does not match, cannot be the end
|
||||
if (this.length_ < s.length()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// check the string by each char, avoids a copy of the string
|
||||
final int start = this.length_ - s.length();
|
||||
|
||||
// change this to Arrays.mismatch when going JDK 11 or higher
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
if (this.data_[i + start] != s.charAt(i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces the buffer to the content between start and end marker when
|
||||
* only whitespaces are found before the startMarker as well as after the end marker.
|
||||
* If both strings overlap due to identical characters such as "foo" and "oof"
|
||||
* and the buffer is " foof ", we don't do anything.
|
||||
*
|
||||
* <p>If a marker is empty, it behaves like {@link java.lang.String#trim()} on that side.
|
||||
*
|
||||
* @param startMarker the start string to find, must not be null
|
||||
* @param endMarker the end string to find, must not be null
|
||||
* @return this instance
|
||||
*
|
||||
* @deprecated Use the new method {@link #trimToContent(String, String)} instead.
|
||||
*/
|
||||
public CheaperCharBuffer reduceToContent(final String startMarker, final String endMarker) {
|
||||
return trimToContent(startMarker, endMarker);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces the buffer to the content between start and end marker when
|
||||
* only whitespaces are found before the startMarker as well as after the end marker.
|
||||
* If both strings overlap due to identical characters such as "foo" and "oof"
|
||||
* and the buffer is " foof ", we don't do anything.
|
||||
*
|
||||
* <p>If a marker is empty, it behaves like {@link java.lang.String#trim()} on that side.
|
||||
*
|
||||
* @param startMarker the start string to find, must not be null
|
||||
* @param endMarker the end string to find, must not be null
|
||||
* @return this instance
|
||||
*/
|
||||
public CheaperCharBuffer trimToContent(final String startMarker, final String endMarker) {
|
||||
// if both are longer or same length than content, don't do anything
|
||||
final int markerLength = startMarker.length() + endMarker.length();
|
||||
if (markerLength >= this.length_) {
|
||||
return this;
|
||||
}
|
||||
|
||||
// run over starting whitespaces
|
||||
int sPos = 0;
|
||||
for (; sPos < this.length_ - markerLength; sPos++) {
|
||||
if (!Character.isWhitespace(this.data_[sPos])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// run over ending whitespaces
|
||||
int ePos = this.length_ - 1;
|
||||
for (; ePos > sPos - markerLength; ePos--) {
|
||||
if (!Character.isWhitespace(this.data_[ePos])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// if we have less content than marker length, give up
|
||||
// this also helps when markers overlap such as
|
||||
// <!-- and --> and the string is " <!---> "
|
||||
if (ePos - sPos + 1 < markerLength) {
|
||||
return this;
|
||||
}
|
||||
|
||||
// check the start
|
||||
for (int i = 0; i < startMarker.length(); i++) {
|
||||
if (startMarker.charAt(i) != this.data_[i + sPos]) {
|
||||
// no start match, stop and don't do anything
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
// check the end, ePos is when the first good char
|
||||
// occurred
|
||||
final int endStartCheckPos = ePos - endMarker.length() + 1;
|
||||
for (int i = 0; i < endMarker.length(); i++) {
|
||||
if (endMarker.charAt(i) != this.data_[endStartCheckPos + i]) {
|
||||
// no start match, stop and don't do anything
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
// shift left and cut length
|
||||
final int newLength = ePos - sPos + 1 - markerLength;
|
||||
System.arraycopy(this.data_,
|
||||
sPos + startMarker.length(),
|
||||
this.data_,
|
||||
0, newLength);
|
||||
this.length_ = newLength;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we have only whitespaces
|
||||
*
|
||||
* @return true if we have only whitespace, false otherwise
|
||||
*/
|
||||
public boolean isWhitespace() {
|
||||
for (int i = 0; i < this.length_; i++) {
|
||||
if (!Character.isWhitespace(this.data_[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Trims the string similar to {@link java.lang.String#trim()}
|
||||
*
|
||||
* @return a string with removed whitespace at the beginning and the end
|
||||
*/
|
||||
public CheaperCharBuffer trim() {
|
||||
// clean the end first, because it is cheap
|
||||
return trimTrailing().trimLeading();
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all whitespace before the first non-whitespace char.
|
||||
* If all are whitespaces, we get an empty buffer
|
||||
*
|
||||
* @return this instance
|
||||
*/
|
||||
public CheaperCharBuffer trimLeading() {
|
||||
// run over starting whitespace
|
||||
int sPos = 0;
|
||||
for (; sPos < this.length_; sPos++) {
|
||||
if (!Character.isWhitespace(this.data_[sPos])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (sPos == 0) {
|
||||
// nothing to do
|
||||
return this;
|
||||
}
|
||||
else if (sPos == this.length_) {
|
||||
// only whitespace
|
||||
this.length_ = 0;
|
||||
return this;
|
||||
}
|
||||
|
||||
// shift left
|
||||
final int newLength = this.length_ - sPos;
|
||||
System.arraycopy(this.data_,
|
||||
sPos,
|
||||
this.data_,
|
||||
0, newLength);
|
||||
this.length_ = newLength;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all whitespace at the end.
|
||||
* If all are whitespace, we get an empty buffer
|
||||
*
|
||||
* @return this instance
|
||||
*
|
||||
* @deprecated Use {@link #trimTrailing()} instead.
|
||||
*/
|
||||
public CheaperCharBuffer trimWhitespaceAtEnd() {
|
||||
return trimTrailing();
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all whitespace at the end.
|
||||
* If all are whitespace, we get an empty buffer
|
||||
*
|
||||
* @return this instance
|
||||
*/
|
||||
public CheaperCharBuffer trimTrailing() {
|
||||
// run over ending whitespaces
|
||||
int ePos = this.length_ - 1;
|
||||
for (; ePos >= 0; ePos--) {
|
||||
if (!Character.isWhitespace(this.data_[ePos])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
this.length_ = ePos + 1;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shortens the buffer by that many positions. If the count is
|
||||
* larger than the length, we get just an empty buffer. If you pass in negative
|
||||
* values, we are failing, likely often silently. It is all about performance and
|
||||
* not a general all-purpose API.
|
||||
*
|
||||
* @param count a positive number, no runtime checks, if count is larger than
|
||||
* length, we get length = 0
|
||||
* @return this instance
|
||||
*/
|
||||
public CheaperCharBuffer shortenBy(final int count) {
|
||||
final int newLength = this.length_ - count;
|
||||
this.length_ = newLength < 0 ? 0 : newLength;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the characters as char array, this will be a copy!
|
||||
*
|
||||
* @return a copy of the underlying char darta
|
||||
*/
|
||||
public char[] getChars() {
|
||||
return Arrays.copyOf(this.data_, this.length_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string representation of this buffer. This will be a copy
|
||||
* operation. If the buffer is emoty, we get a constant empty String back
|
||||
* to avoid any overhead.
|
||||
*
|
||||
* @return a string of the content of this buffer
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
if (this.length_ > 0) {
|
||||
return new String(this.data_, 0, this.length_);
|
||||
}
|
||||
else {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the char a the given position. Will complain if
|
||||
* we try to read outside the range. We do a range check here
|
||||
* because we might not notice when we are within the buffer
|
||||
* but outside the current length.
|
||||
*
|
||||
* @param index the position to read from
|
||||
* @return the char at the position
|
||||
* @throws IndexOutOfBoundsException
|
||||
* in case one tries to read outside of valid buffer range
|
||||
*/
|
||||
@Override
|
||||
public char charAt(final int index) {
|
||||
if (index > this.length_ - 1 || index < 0) {
|
||||
throw new IndexOutOfBoundsException(
|
||||
"Tried to read outside of the valid buffer data");
|
||||
}
|
||||
|
||||
return this.data_[index];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the char at the given position. No checks are
|
||||
* performed. It is up to the caller to make sure we
|
||||
* read correctly. Reading outside of the array will
|
||||
* cause an {@link IndexOutOfBoundsException} but using an
|
||||
* incorrect position in the array (such as beyond length)
|
||||
* might stay unnoticed! This is a performance method,
|
||||
* use at your own risk.
|
||||
*
|
||||
* @param index the position to read from
|
||||
* @return the char at the position
|
||||
*/
|
||||
public char unsafeCharAt(final int index) {
|
||||
return this.data_[index];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a content copy of this buffer
|
||||
*
|
||||
* @return a copy of this buffer, the capacity might differ
|
||||
*/
|
||||
@Override
|
||||
public CheaperCharBuffer clone() {
|
||||
return new CheaperCharBuffer(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a <code>CharSequence</code> that is a subsequence of this sequence.
|
||||
* The subsequence starts with the <code>char</code> value at the specified index and
|
||||
* ends with the <code>char</code> value at index <tt>end - 1</tt>. The length
|
||||
* (in <code>char</code>s) of the
|
||||
* returned sequence is <tt>end - start</tt>, so if <tt>start == end</tt>
|
||||
* then an empty sequence is returned.
|
||||
*
|
||||
* @param start the start index, inclusive
|
||||
* @param end the end index, exclusive
|
||||
*
|
||||
* @return the specified subsequence
|
||||
*
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if <tt>start</tt> or <tt>end</tt> are negative,
|
||||
* if <tt>end</tt> is greater than <tt>length()</tt>,
|
||||
* or if <tt>start</tt> is greater than <tt>end</tt>
|
||||
*
|
||||
* @return a charsequence of this buffer
|
||||
*/
|
||||
@Override
|
||||
public CharSequence subSequence(final int start, final int end) {
|
||||
if (start < 0) {
|
||||
throw new StringIndexOutOfBoundsException(start);
|
||||
}
|
||||
if (end > this.length_) {
|
||||
throw new StringIndexOutOfBoundsException(end);
|
||||
}
|
||||
|
||||
final int l = end - start;
|
||||
if (l < 0) {
|
||||
throw new StringIndexOutOfBoundsException(l);
|
||||
}
|
||||
|
||||
return new String(this.data_, start, l);
|
||||
}
|
||||
|
||||
/**
|
||||
* Two buffers are identical when the length and
|
||||
* the content of the backing array (only for the
|
||||
* data in view) are identical.
|
||||
*
|
||||
* @param o the object to compare with
|
||||
* @return true if length and array content match, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (o instanceof CharSequence) {
|
||||
final CharSequence ob = (CharSequence) o;
|
||||
|
||||
if (ob.length() != this.length_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// ok, in JDK 11 or up, we could use an
|
||||
// Arrays.mismatch, but we cannot do that
|
||||
// due to JDK 8 compatibility
|
||||
for (int i = 0; i < this.length_; i++) {
|
||||
if (ob.charAt(i) != this.data_[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// length and content match, be happy
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* We don't cache the hashcode because we mutate often. Don't use this in
|
||||
* hashmaps as key. But you can use that to look up in a hashmap against
|
||||
* a string using the CharSequence interface.
|
||||
*
|
||||
* @return the hashcode, similar to what a normal string would deliver
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = 0;
|
||||
|
||||
for (int i = 0; i < this.length_; i++) {
|
||||
h = ((h << 5) - h) + this.data_[i];
|
||||
}
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a character to an XMLCharBuffer. The character is an int value, and
|
||||
* can either be a single UTF-16 character or a supplementary character
|
||||
* represented by two UTF-16 code points.
|
||||
*
|
||||
* @param value The character value.
|
||||
* @return this instance for fluid programming
|
||||
*
|
||||
* @throws IllegalArgumentException if the specified
|
||||
* {@code codePoint} is not a valid Unicode code point.
|
||||
*/
|
||||
public CheaperCharBuffer appendCodePoint(final int value) {
|
||||
if (value <= Character.MAX_VALUE) {
|
||||
return this.append((char) value);
|
||||
}
|
||||
else {
|
||||
try {
|
||||
final char[] chars = Character.toChars(value);
|
||||
return this.append(chars, 0, chars.length);
|
||||
}
|
||||
catch (final IllegalArgumentException e) {
|
||||
// when value is not valid as UTF-16
|
||||
this.append(REPLACEMENT_CHARACTER);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
76
src/main/java/org/rschwietzke/FastRandom.java
Normal file
76
src/main/java/org/rschwietzke/FastRandom.java
Normal file
@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright 2023 The original authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.rschwietzke;
|
||||
|
||||
/**
|
||||
* Ultra-fast pseudo random generator that is not synchronized!
|
||||
* Don't use anything from Random by inheritance, this will inherit
|
||||
* a volatile! Not my idea, copyied in parts some demo random
|
||||
* generator lessons.
|
||||
*
|
||||
* @author rschwietzke
|
||||
*
|
||||
*/
|
||||
public class FastRandom {
|
||||
private long seed;
|
||||
|
||||
public FastRandom() {
|
||||
this.seed = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public FastRandom(long seed) {
|
||||
this.seed = seed;
|
||||
}
|
||||
|
||||
protected int next(int nbits) {
|
||||
// N.B. Not thread-safe!
|
||||
long x = this.seed;
|
||||
x ^= (x << 21);
|
||||
x ^= (x >>> 35);
|
||||
x ^= (x << 4);
|
||||
this.seed = x;
|
||||
|
||||
x &= ((1L << nbits) - 1);
|
||||
|
||||
return (int) x;
|
||||
}
|
||||
|
||||
/**
|
||||
* Borrowed from the JDK
|
||||
*
|
||||
* @param bound
|
||||
* @return
|
||||
*/
|
||||
public int nextInt(int bound) {
|
||||
int r = next(31);
|
||||
int m = bound - 1;
|
||||
if ((bound & m) == 0) // i.e., bound is a power of 2
|
||||
r = (int) ((bound * (long) r) >> 31);
|
||||
else {
|
||||
for (int u = r; u - (r = u % bound) + m < 0; u = next(31))
|
||||
;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* Borrowed from the JDK
|
||||
* @return
|
||||
*/
|
||||
public int nextInt() {
|
||||
return next(32);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user