Faster version of the data generator
This commit is contained in:
		
				
					committed by
					
						 Gunnar Morling
						Gunnar Morling
					
				
			
			
				
	
			
			
			
						parent
						
							7d485d0e8b
						
					
				
				
					commit
					04bd2d69b6
				
			
							
								
								
									
										19
									
								
								create_measurements2.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										19
									
								
								create_measurements2.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,19 @@ | ||||
| #!/bin/sh | ||||
| # | ||||
| #  Copyright 2023 The original authors | ||||
| # | ||||
| #  Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| #  you may not use this file except in compliance with the License. | ||||
| #  You may obtain a copy of the License at | ||||
| # | ||||
| #      http://www.apache.org/licenses/LICENSE-2.0 | ||||
| # | ||||
| #  Unless required by applicable law or agreed to in writing, software | ||||
| #  distributed under the License is distributed on an "AS IS" BASIS, | ||||
| #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| #  See the License for the specific language governing permissions and | ||||
| #  limitations under the License. | ||||
| # | ||||
|  | ||||
|  | ||||
| java --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CreateMeasurements2 $1 | ||||
| @@ -52,7 +52,7 @@ public class CalculateAverage_royvanrijn { | ||||
|  | ||||
|     public static void main(String[] args) throws IOException { | ||||
|  | ||||
| //        long before = System.currentTimeMillis(); | ||||
|         // long before = System.currentTimeMillis(); | ||||
|  | ||||
|         Map<String, Measurement> resultMap = Files.lines(Path.of(FILE)).parallel() | ||||
|                 .map(record -> { | ||||
| @@ -73,7 +73,7 @@ public class CalculateAverage_royvanrijn { | ||||
|                 resultMap.entrySet().stream().sorted(Map.Entry.comparingByKey()).map(Object::toString).collect(Collectors.joining(", "))); | ||||
|         System.out.println("}"); | ||||
|  | ||||
| //        System.out.println("Took: " + (System.currentTimeMillis() - before)); | ||||
|         // System.out.println("Took: " + (System.currentTimeMillis() - before)); | ||||
|  | ||||
|     } | ||||
| } | ||||
|   | ||||
							
								
								
									
										623
									
								
								src/main/java/dev/morling/onebrc/CreateMeasurements2.java
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										623
									
								
								src/main/java/dev/morling/onebrc/CreateMeasurements2.java
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,623 @@ | ||||
| /* | ||||
|  *  Copyright 2023 The original authors | ||||
|  * | ||||
|  *  Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  *  you may not use this file except in compliance with the License. | ||||
|  *  You may obtain a copy of the License at | ||||
|  * | ||||
|  *      http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  *  Unless required by applicable law or agreed to in writing, software | ||||
|  *  distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  *  See the License for the specific language governing permissions and | ||||
|  *  limitations under the License. | ||||
|  */ | ||||
| package dev.morling.onebrc; | ||||
|  | ||||
| import java.io.BufferedWriter; | ||||
| import java.io.File; | ||||
| import java.io.FileWriter; | ||||
| import java.io.IOException; | ||||
| import java.util.Arrays; | ||||
| import java.util.List; | ||||
| import java.util.concurrent.ThreadLocalRandom; | ||||
|  | ||||
| import org.rschwietzke.CheaperCharBuffer; | ||||
| import org.rschwietzke.FastRandom; | ||||
|  | ||||
| /** | ||||
|  * Faster version with some data faking instead of a real Gaussian distribution | ||||
|  * Good enough for our purppose I guess. | ||||
|  */ | ||||
| public class CreateMeasurements2 { | ||||
|  | ||||
|     private static final String FILE = "./measurements2.txt"; | ||||
|  | ||||
|     static class WeatherStation { | ||||
|         final static char[] NUMBERS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; | ||||
|  | ||||
|         final String id; | ||||
|         final int meanTemperature; | ||||
|  | ||||
|         final char[] firstPart; | ||||
|         final FastRandom r = new FastRandom(ThreadLocalRandom.current().nextLong()); | ||||
|  | ||||
|         WeatherStation(String id, double meanTemperature) { | ||||
|             this.id = id; | ||||
|             this.meanTemperature = (int) meanTemperature; | ||||
|             // make it directly copyable | ||||
|             this.firstPart = (id + ";").toCharArray(); | ||||
|         } | ||||
|  | ||||
|         /** | ||||
|          * We write out data into the buffer to avoid string conversion | ||||
|          * We also no longer use double and gaussian, because for our | ||||
|          * purpose, the fake numbers here will do it. Less | ||||
|          * | ||||
|          * @param buffer the buffer to append to | ||||
|          */ | ||||
|         void measurement(final CheaperCharBuffer buffer) { | ||||
|         	// fake -10.9 to +10.9 variance without double operations and rounding | ||||
|         	// gives us -10 to +10 | ||||
|             int m = meanTemperature + (r.nextInt(21) - 10); | ||||
|             // gives us a decimal digit 0 to 9 as char | ||||
|             char d = NUMBERS[r.nextInt(10)]; | ||||
|  | ||||
|             // just append, only one number has to be converted and we can do | ||||
|             // better... if we watn | ||||
|             buffer.append(firstPart, 0, firstPart.length) | ||||
|                     .append(String.valueOf(m)).append('.').append(d) | ||||
|                     .append('\n'); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     public static void main(String[] args) throws Exception { | ||||
|         long start = System.currentTimeMillis(); | ||||
|  | ||||
|         if (args.length != 1) { | ||||
|             System.out.println("Usage: create_measurements.sh <number of records to create>"); | ||||
|             System.exit(1); | ||||
|         } | ||||
|  | ||||
|         int size = 0; | ||||
|         try { | ||||
|             size = Integer.parseInt(args[0]); | ||||
|         } | ||||
|         catch (NumberFormatException e) { | ||||
|             System.out.println("Invalid value for <number of records to create>"); | ||||
|             System.out.println("Usage: CreateMeasurements <number of records to create>"); | ||||
|             System.exit(1); | ||||
|         } | ||||
|  | ||||
|         // @formatter:off | ||||
|         // data from https://en.wikipedia.org/wiki/List_of_cities_by_average_temperature; | ||||
|         // converted using https://wikitable2csv.ggor.de/ | ||||
|         // brought to form using DuckDB: | ||||
|         // D copy ( | ||||
|         //     select City, regexp_extract(Year,'(.*)\n.*', 1) as AverageTemp | ||||
|         //     from ( | ||||
|         //         select City,Year | ||||
|         //         from read_csv_auto('List_of_cities_by_average_temperature_1.csv', header = true) | ||||
|         //         union | ||||
|         //         select City,Year | ||||
|         //         from read_csv_auto('List_of_cities_by_average_temperature_2.csv', header = true) | ||||
|         //         union | ||||
|         //         select City,Year | ||||
|         //         from read_csv_auto('List_of_cities_by_average_temperature_3.csv', header = true) | ||||
|         //         union | ||||
|         //         select City,Year | ||||
|         //         from read_csv_auto('List_of_cities_by_average_temperature_4.csv', header = true) | ||||
|         //         union | ||||
|         //         select City,Year | ||||
|         //         from read_csv_auto('List_of_cities_by_average_temperature_5.csv', header = true) | ||||
|         //         ) | ||||
|         // ) TO 'output.csv' (HEADER, DELIMITER ','); | ||||
|         // @formatter:on | ||||
|         final List<WeatherStation> stations = Arrays.asList( | ||||
|                 new WeatherStation("Abha", 18.0), | ||||
|                 new WeatherStation("Abidjan", 26.0), | ||||
|                 new WeatherStation("Abéché", 29.4), | ||||
|                 new WeatherStation("Accra", 26.4), | ||||
|                 new WeatherStation("Addis Ababa", 16.0), | ||||
|                 new WeatherStation("Adelaide", 17.3), | ||||
|                 new WeatherStation("Aden", 29.1), | ||||
|                 new WeatherStation("Ahvaz", 25.4), | ||||
|                 new WeatherStation("Albuquerque", 14.0), | ||||
|                 new WeatherStation("Alexandra", 11.0), | ||||
|                 new WeatherStation("Alexandria", 20.0), | ||||
|                 new WeatherStation("Algiers", 18.2), | ||||
|                 new WeatherStation("Alice Springs", 21.0), | ||||
|                 new WeatherStation("Almaty", 10.0), | ||||
|                 new WeatherStation("Amsterdam", 10.2), | ||||
|                 new WeatherStation("Anadyr", -6.9), | ||||
|                 new WeatherStation("Anchorage", 2.8), | ||||
|                 new WeatherStation("Andorra la Vella", 9.8), | ||||
|                 new WeatherStation("Ankara", 12.0), | ||||
|                 new WeatherStation("Antananarivo", 17.9), | ||||
|                 new WeatherStation("Antsiranana", 25.2), | ||||
|                 new WeatherStation("Arkhangelsk", 1.3), | ||||
|                 new WeatherStation("Ashgabat", 17.1), | ||||
|                 new WeatherStation("Asmara", 15.6), | ||||
|                 new WeatherStation("Assab", 30.5), | ||||
|                 new WeatherStation("Astana", 3.5), | ||||
|                 new WeatherStation("Athens", 19.2), | ||||
|                 new WeatherStation("Atlanta", 17.0), | ||||
|                 new WeatherStation("Auckland", 15.2), | ||||
|                 new WeatherStation("Austin", 20.7), | ||||
|                 new WeatherStation("Baghdad", 22.77), | ||||
|                 new WeatherStation("Baguio", 19.5), | ||||
|                 new WeatherStation("Baku", 15.1), | ||||
|                 new WeatherStation("Baltimore", 13.1), | ||||
|                 new WeatherStation("Bamako", 27.8), | ||||
|                 new WeatherStation("Bangkok", 28.6), | ||||
|                 new WeatherStation("Bangui", 26.0), | ||||
|                 new WeatherStation("Banjul", 26.0), | ||||
|                 new WeatherStation("Barcelona", 18.2), | ||||
|                 new WeatherStation("Bata", 25.1), | ||||
|                 new WeatherStation("Batumi", 14.0), | ||||
|                 new WeatherStation("Beijing", 12.9), | ||||
|                 new WeatherStation("Beirut", 20.9), | ||||
|                 new WeatherStation("Belgrade", 12.5), | ||||
|                 new WeatherStation("Belize City", 26.7), | ||||
|                 new WeatherStation("Benghazi", 19.9), | ||||
|                 new WeatherStation("Bergen", 7.7), | ||||
|                 new WeatherStation("Berlin", 10.3), | ||||
|                 new WeatherStation("Bilbao", 14.7), | ||||
|                 new WeatherStation("Birao", 26.5), | ||||
|                 new WeatherStation("Bishkek", 11.3), | ||||
|                 new WeatherStation("Bissau", 27.0), | ||||
|                 new WeatherStation("Blantyre", 22.2), | ||||
|                 new WeatherStation("Bloemfontein", 15.6), | ||||
|                 new WeatherStation("Boise", 11.4), | ||||
|                 new WeatherStation("Bordeaux", 14.2), | ||||
|                 new WeatherStation("Bosaso", 30.0), | ||||
|                 new WeatherStation("Boston", 10.9), | ||||
|                 new WeatherStation("Bouaké", 26.0), | ||||
|                 new WeatherStation("Bratislava", 10.5), | ||||
|                 new WeatherStation("Brazzaville", 25.0), | ||||
|                 new WeatherStation("Bridgetown", 27.0), | ||||
|                 new WeatherStation("Brisbane", 21.4), | ||||
|                 new WeatherStation("Brussels", 10.5), | ||||
|                 new WeatherStation("Bucharest", 10.8), | ||||
|                 new WeatherStation("Budapest", 11.3), | ||||
|                 new WeatherStation("Bujumbura", 23.8), | ||||
|                 new WeatherStation("Bulawayo", 18.9), | ||||
|                 new WeatherStation("Burnie", 13.1), | ||||
|                 new WeatherStation("Busan", 15.0), | ||||
|                 new WeatherStation("Cabo San Lucas", 23.9), | ||||
|                 new WeatherStation("Cairns", 25.0), | ||||
|                 new WeatherStation("Cairo", 21.4), | ||||
|                 new WeatherStation("Calgary", 4.4), | ||||
|                 new WeatherStation("Canberra", 13.1), | ||||
|                 new WeatherStation("Cape Town", 16.2), | ||||
|                 new WeatherStation("Changsha", 17.4), | ||||
|                 new WeatherStation("Charlotte", 16.1), | ||||
|                 new WeatherStation("Chiang Mai", 25.8), | ||||
|                 new WeatherStation("Chicago", 9.8), | ||||
|                 new WeatherStation("Chihuahua", 18.6), | ||||
|                 new WeatherStation("Chișinău", 10.2), | ||||
|                 new WeatherStation("Chittagong", 25.9), | ||||
|                 new WeatherStation("Chongqing", 18.6), | ||||
|                 new WeatherStation("Christchurch", 12.2), | ||||
|                 new WeatherStation("City of San Marino", 11.8), | ||||
|                 new WeatherStation("Colombo", 27.4), | ||||
|                 new WeatherStation("Columbus", 11.7), | ||||
|                 new WeatherStation("Conakry", 26.4), | ||||
|                 new WeatherStation("Copenhagen", 9.1), | ||||
|                 new WeatherStation("Cotonou", 27.2), | ||||
|                 new WeatherStation("Cracow", 9.3), | ||||
|                 new WeatherStation("Da Lat", 17.9), | ||||
|                 new WeatherStation("Da Nang", 25.8), | ||||
|                 new WeatherStation("Dakar", 24.0), | ||||
|                 new WeatherStation("Dallas", 19.0), | ||||
|                 new WeatherStation("Damascus", 17.0), | ||||
|                 new WeatherStation("Dampier", 26.4), | ||||
|                 new WeatherStation("Dar es Salaam", 25.8), | ||||
|                 new WeatherStation("Darwin", 27.6), | ||||
|                 new WeatherStation("Denpasar", 23.7), | ||||
|                 new WeatherStation("Denver", 10.4), | ||||
|                 new WeatherStation("Detroit", 10.0), | ||||
|                 new WeatherStation("Dhaka", 25.9), | ||||
|                 new WeatherStation("Dikson", -11.1), | ||||
|                 new WeatherStation("Dili", 26.6), | ||||
|                 new WeatherStation("Djibouti", 29.9), | ||||
|                 new WeatherStation("Dodoma", 22.7), | ||||
|                 new WeatherStation("Dolisie", 24.0), | ||||
|                 new WeatherStation("Douala", 26.7), | ||||
|                 new WeatherStation("Dubai", 26.9), | ||||
|                 new WeatherStation("Dublin", 9.8), | ||||
|                 new WeatherStation("Dunedin", 11.1), | ||||
|                 new WeatherStation("Durban", 20.6), | ||||
|                 new WeatherStation("Dushanbe", 14.7), | ||||
|                 new WeatherStation("Edinburgh", 9.3), | ||||
|                 new WeatherStation("Edmonton", 4.2), | ||||
|                 new WeatherStation("El Paso", 18.1), | ||||
|                 new WeatherStation("Entebbe", 21.0), | ||||
|                 new WeatherStation("Erbil", 19.5), | ||||
|                 new WeatherStation("Erzurum", 5.1), | ||||
|                 new WeatherStation("Fairbanks", -2.3), | ||||
|                 new WeatherStation("Fianarantsoa", 17.9), | ||||
|                 new WeatherStation("Flores,  Petén", 26.4), | ||||
|                 new WeatherStation("Frankfurt", 10.6), | ||||
|                 new WeatherStation("Fresno", 17.9), | ||||
|                 new WeatherStation("Fukuoka", 17.0), | ||||
|                 new WeatherStation("Gabès", 19.5), | ||||
|                 new WeatherStation("Gaborone", 21.0), | ||||
|                 new WeatherStation("Gagnoa", 26.0), | ||||
|                 new WeatherStation("Gangtok", 15.2), | ||||
|                 new WeatherStation("Garissa", 29.3), | ||||
|                 new WeatherStation("Garoua", 28.3), | ||||
|                 new WeatherStation("George Town", 27.9), | ||||
|                 new WeatherStation("Ghanzi", 21.4), | ||||
|                 new WeatherStation("Gjoa Haven", -14.4), | ||||
|                 new WeatherStation("Guadalajara", 20.9), | ||||
|                 new WeatherStation("Guangzhou", 22.4), | ||||
|                 new WeatherStation("Guatemala City", 20.4), | ||||
|                 new WeatherStation("Halifax", 7.5), | ||||
|                 new WeatherStation("Hamburg", 9.7), | ||||
|                 new WeatherStation("Hamilton", 13.8), | ||||
|                 new WeatherStation("Hanga Roa", 20.5), | ||||
|                 new WeatherStation("Hanoi", 23.6), | ||||
|                 new WeatherStation("Harare", 18.4), | ||||
|                 new WeatherStation("Harbin", 5.0), | ||||
|                 new WeatherStation("Hargeisa", 21.7), | ||||
|                 new WeatherStation("Hat Yai", 27.0), | ||||
|                 new WeatherStation("Havana", 25.2), | ||||
|                 new WeatherStation("Helsinki", 5.9), | ||||
|                 new WeatherStation("Heraklion", 18.9), | ||||
|                 new WeatherStation("Hiroshima", 16.3), | ||||
|                 new WeatherStation("Ho Chi Minh City", 27.4), | ||||
|                 new WeatherStation("Hobart", 12.7), | ||||
|                 new WeatherStation("Hong Kong", 23.3), | ||||
|                 new WeatherStation("Honiara", 26.5), | ||||
|                 new WeatherStation("Honolulu", 25.4), | ||||
|                 new WeatherStation("Houston", 20.8), | ||||
|                 new WeatherStation("Ifrane", 11.4), | ||||
|                 new WeatherStation("Indianapolis", 11.8), | ||||
|                 new WeatherStation("Iqaluit", -9.3), | ||||
|                 new WeatherStation("Irkutsk", 1.0), | ||||
|                 new WeatherStation("Istanbul", 13.9), | ||||
|                 new WeatherStation("İzmir", 17.9), | ||||
|                 new WeatherStation("Jacksonville", 20.3), | ||||
|                 new WeatherStation("Jakarta", 26.7), | ||||
|                 new WeatherStation("Jayapura", 27.0), | ||||
|                 new WeatherStation("Jerusalem", 18.3), | ||||
|                 new WeatherStation("Johannesburg", 15.5), | ||||
|                 new WeatherStation("Jos", 22.8), | ||||
|                 new WeatherStation("Juba", 27.8), | ||||
|                 new WeatherStation("Kabul", 12.1), | ||||
|                 new WeatherStation("Kampala", 20.0), | ||||
|                 new WeatherStation("Kandi", 27.7), | ||||
|                 new WeatherStation("Kankan", 26.5), | ||||
|                 new WeatherStation("Kano", 26.4), | ||||
|                 new WeatherStation("Kansas City", 12.5), | ||||
|                 new WeatherStation("Karachi", 26.0), | ||||
|                 new WeatherStation("Karonga", 24.4), | ||||
|                 new WeatherStation("Kathmandu", 18.3), | ||||
|                 new WeatherStation("Khartoum", 29.9), | ||||
|                 new WeatherStation("Kingston", 27.4), | ||||
|                 new WeatherStation("Kinshasa", 25.3), | ||||
|                 new WeatherStation("Kolkata", 26.7), | ||||
|                 new WeatherStation("Kuala Lumpur", 27.3), | ||||
|                 new WeatherStation("Kumasi", 26.0), | ||||
|                 new WeatherStation("Kunming", 15.7), | ||||
|                 new WeatherStation("Kuopio", 3.4), | ||||
|                 new WeatherStation("Kuwait City", 25.7), | ||||
|                 new WeatherStation("Kyiv", 8.4), | ||||
|                 new WeatherStation("Kyoto", 15.8), | ||||
|                 new WeatherStation("La Ceiba", 26.2), | ||||
|                 new WeatherStation("La Paz", 23.7), | ||||
|                 new WeatherStation("Lagos", 26.8), | ||||
|                 new WeatherStation("Lahore", 24.3), | ||||
|                 new WeatherStation("Lake Havasu City", 23.7), | ||||
|                 new WeatherStation("Lake Tekapo", 8.7), | ||||
|                 new WeatherStation("Las Palmas de Gran Canaria", 21.2), | ||||
|                 new WeatherStation("Las Vegas", 20.3), | ||||
|                 new WeatherStation("Launceston", 13.1), | ||||
|                 new WeatherStation("Lhasa", 7.6), | ||||
|                 new WeatherStation("Libreville", 25.9), | ||||
|                 new WeatherStation("Lisbon", 17.5), | ||||
|                 new WeatherStation("Livingstone", 21.8), | ||||
|                 new WeatherStation("Ljubljana", 10.9), | ||||
|                 new WeatherStation("Lodwar", 29.3), | ||||
|                 new WeatherStation("Lomé", 26.9), | ||||
|                 new WeatherStation("London", 11.3), | ||||
|                 new WeatherStation("Los Angeles", 18.6), | ||||
|                 new WeatherStation("Louisville", 13.9), | ||||
|                 new WeatherStation("Luanda", 25.8), | ||||
|                 new WeatherStation("Lubumbashi", 20.8), | ||||
|                 new WeatherStation("Lusaka", 19.9), | ||||
|                 new WeatherStation("Luxembourg City", 9.3), | ||||
|                 new WeatherStation("Lviv", 7.8), | ||||
|                 new WeatherStation("Lyon", 12.5), | ||||
|                 new WeatherStation("Madrid", 15.0), | ||||
|                 new WeatherStation("Mahajanga", 26.3), | ||||
|                 new WeatherStation("Makassar", 26.7), | ||||
|                 new WeatherStation("Makurdi", 26.0), | ||||
|                 new WeatherStation("Malabo", 26.3), | ||||
|                 new WeatherStation("Malé", 28.0), | ||||
|                 new WeatherStation("Managua", 27.3), | ||||
|                 new WeatherStation("Manama", 26.5), | ||||
|                 new WeatherStation("Mandalay", 28.0), | ||||
|                 new WeatherStation("Mango", 28.1), | ||||
|                 new WeatherStation("Manila", 28.4), | ||||
|                 new WeatherStation("Maputo", 22.8), | ||||
|                 new WeatherStation("Marrakesh", 19.6), | ||||
|                 new WeatherStation("Marseille", 15.8), | ||||
|                 new WeatherStation("Maun", 22.4), | ||||
|                 new WeatherStation("Medan", 26.5), | ||||
|                 new WeatherStation("Mek'ele", 22.7), | ||||
|                 new WeatherStation("Melbourne", 15.1), | ||||
|                 new WeatherStation("Memphis", 17.2), | ||||
|                 new WeatherStation("Mexicali", 23.1), | ||||
|                 new WeatherStation("Mexico City", 17.5), | ||||
|                 new WeatherStation("Miami", 24.9), | ||||
|                 new WeatherStation("Milan", 13.0), | ||||
|                 new WeatherStation("Milwaukee", 8.9), | ||||
|                 new WeatherStation("Minneapolis", 7.8), | ||||
|                 new WeatherStation("Minsk", 6.7), | ||||
|                 new WeatherStation("Mogadishu", 27.1), | ||||
|                 new WeatherStation("Mombasa", 26.3), | ||||
|                 new WeatherStation("Monaco", 16.4), | ||||
|                 new WeatherStation("Moncton", 6.1), | ||||
|                 new WeatherStation("Monterrey", 22.3), | ||||
|                 new WeatherStation("Montreal", 6.8), | ||||
|                 new WeatherStation("Moscow", 5.8), | ||||
|                 new WeatherStation("Mumbai", 27.1), | ||||
|                 new WeatherStation("Murmansk", 0.6), | ||||
|                 new WeatherStation("Muscat", 28.0), | ||||
|                 new WeatherStation("Mzuzu", 17.7), | ||||
|                 new WeatherStation("N'Djamena", 28.3), | ||||
|                 new WeatherStation("Naha", 23.1), | ||||
|                 new WeatherStation("Nairobi", 17.8), | ||||
|                 new WeatherStation("Nakhon Ratchasima", 27.3), | ||||
|                 new WeatherStation("Napier", 14.6), | ||||
|                 new WeatherStation("Napoli", 15.9), | ||||
|                 new WeatherStation("Nashville", 15.4), | ||||
|                 new WeatherStation("Nassau", 24.6), | ||||
|                 new WeatherStation("Ndola", 20.3), | ||||
|                 new WeatherStation("New Delhi", 25.0), | ||||
|                 new WeatherStation("New Orleans", 20.7), | ||||
|                 new WeatherStation("New York City", 12.9), | ||||
|                 new WeatherStation("Ngaoundéré", 22.0), | ||||
|                 new WeatherStation("Niamey", 29.3), | ||||
|                 new WeatherStation("Nicosia", 19.7), | ||||
|                 new WeatherStation("Niigata", 13.9), | ||||
|                 new WeatherStation("Nouadhibou", 21.3), | ||||
|                 new WeatherStation("Nouakchott", 25.7), | ||||
|                 new WeatherStation("Novosibirsk", 1.7), | ||||
|                 new WeatherStation("Nuuk", -1.4), | ||||
|                 new WeatherStation("Odesa", 10.7), | ||||
|                 new WeatherStation("Odienné", 26.0), | ||||
|                 new WeatherStation("Oklahoma City", 15.9), | ||||
|                 new WeatherStation("Omaha", 10.6), | ||||
|                 new WeatherStation("Oranjestad", 28.1), | ||||
|                 new WeatherStation("Oslo", 5.7), | ||||
|                 new WeatherStation("Ottawa", 6.6), | ||||
|                 new WeatherStation("Ouagadougou", 28.3), | ||||
|                 new WeatherStation("Ouahigouya", 28.6), | ||||
|                 new WeatherStation("Ouarzazate", 18.9), | ||||
|                 new WeatherStation("Oulu", 2.7), | ||||
|                 new WeatherStation("Palembang", 27.3), | ||||
|                 new WeatherStation("Palermo", 18.5), | ||||
|                 new WeatherStation("Palm Springs", 24.5), | ||||
|                 new WeatherStation("Palmerston North", 13.2), | ||||
|                 new WeatherStation("Panama City", 28.0), | ||||
|                 new WeatherStation("Parakou", 26.8), | ||||
|                 new WeatherStation("Paris", 12.3), | ||||
|                 new WeatherStation("Perth", 18.7), | ||||
|                 new WeatherStation("Petropavlovsk-Kamchatsky", 1.9), | ||||
|                 new WeatherStation("Philadelphia", 13.2), | ||||
|                 new WeatherStation("Phnom Penh", 28.3), | ||||
|                 new WeatherStation("Phoenix", 23.9), | ||||
|                 new WeatherStation("Pittsburgh", 10.8), | ||||
|                 new WeatherStation("Podgorica", 15.3), | ||||
|                 new WeatherStation("Pointe-Noire", 26.1), | ||||
|                 new WeatherStation("Pontianak", 27.7), | ||||
|                 new WeatherStation("Port Moresby", 26.9), | ||||
|                 new WeatherStation("Port Sudan", 28.4), | ||||
|                 new WeatherStation("Port Vila", 24.3), | ||||
|                 new WeatherStation("Port-Gentil", 26.0), | ||||
|                 new WeatherStation("Portland (OR)", 12.4), | ||||
|                 new WeatherStation("Porto", 15.7), | ||||
|                 new WeatherStation("Prague", 8.4), | ||||
|                 new WeatherStation("Praia", 24.4), | ||||
|                 new WeatherStation("Pretoria", 18.2), | ||||
|                 new WeatherStation("Pyongyang", 10.8), | ||||
|                 new WeatherStation("Rabat", 17.2), | ||||
|                 new WeatherStation("Rangpur", 24.4), | ||||
|                 new WeatherStation("Reggane", 28.3), | ||||
|                 new WeatherStation("Reykjavík", 4.3), | ||||
|                 new WeatherStation("Riga", 6.2), | ||||
|                 new WeatherStation("Riyadh", 26.0), | ||||
|                 new WeatherStation("Rome", 15.2), | ||||
|                 new WeatherStation("Roseau", 26.2), | ||||
|                 new WeatherStation("Rostov-on-Don", 9.9), | ||||
|                 new WeatherStation("Sacramento", 16.3), | ||||
|                 new WeatherStation("Saint Petersburg", 5.8), | ||||
|                 new WeatherStation("Saint-Pierre", 5.7), | ||||
|                 new WeatherStation("Salt Lake City", 11.6), | ||||
|                 new WeatherStation("San Antonio", 20.8), | ||||
|                 new WeatherStation("San Diego", 17.8), | ||||
|                 new WeatherStation("San Francisco", 14.6), | ||||
|                 new WeatherStation("San Jose", 16.4), | ||||
|                 new WeatherStation("San José", 22.6), | ||||
|                 new WeatherStation("San Juan", 27.2), | ||||
|                 new WeatherStation("San Salvador", 23.1), | ||||
|                 new WeatherStation("Sana'a", 20.0), | ||||
|                 new WeatherStation("Santo Domingo", 25.9), | ||||
|                 new WeatherStation("Sapporo", 8.9), | ||||
|                 new WeatherStation("Sarajevo", 10.1), | ||||
|                 new WeatherStation("Saskatoon", 3.3), | ||||
|                 new WeatherStation("Seattle", 11.3), | ||||
|                 new WeatherStation("Ségou", 28.0), | ||||
|                 new WeatherStation("Seoul", 12.5), | ||||
|                 new WeatherStation("Seville", 19.2), | ||||
|                 new WeatherStation("Shanghai", 16.7), | ||||
|                 new WeatherStation("Singapore", 27.0), | ||||
|                 new WeatherStation("Skopje", 12.4), | ||||
|                 new WeatherStation("Sochi", 14.2), | ||||
|                 new WeatherStation("Sofia", 10.6), | ||||
|                 new WeatherStation("Sokoto", 28.0), | ||||
|                 new WeatherStation("Split", 16.1), | ||||
|                 new WeatherStation("St. John's", 5.0), | ||||
|                 new WeatherStation("St. Louis", 13.9), | ||||
|                 new WeatherStation("Stockholm", 6.6), | ||||
|                 new WeatherStation("Surabaya", 27.1), | ||||
|                 new WeatherStation("Suva", 25.6), | ||||
|                 new WeatherStation("Suwałki", 7.2), | ||||
|                 new WeatherStation("Sydney", 17.7), | ||||
|                 new WeatherStation("Tabora", 23.0), | ||||
|                 new WeatherStation("Tabriz", 12.6), | ||||
|                 new WeatherStation("Taipei", 23.0), | ||||
|                 new WeatherStation("Tallinn", 6.4), | ||||
|                 new WeatherStation("Tamale", 27.9), | ||||
|                 new WeatherStation("Tamanrasset", 21.7), | ||||
|                 new WeatherStation("Tampa", 22.9), | ||||
|                 new WeatherStation("Tashkent", 14.8), | ||||
|                 new WeatherStation("Tauranga", 14.8), | ||||
|                 new WeatherStation("Tbilisi", 12.9), | ||||
|                 new WeatherStation("Tegucigalpa", 21.7), | ||||
|                 new WeatherStation("Tehran", 17.0), | ||||
|                 new WeatherStation("Tel Aviv", 20.0), | ||||
|                 new WeatherStation("Thessaloniki", 16.0), | ||||
|                 new WeatherStation("Thiès", 24.0), | ||||
|                 new WeatherStation("Tijuana", 17.8), | ||||
|                 new WeatherStation("Timbuktu", 28.0), | ||||
|                 new WeatherStation("Tirana", 15.2), | ||||
|                 new WeatherStation("Toamasina", 23.4), | ||||
|                 new WeatherStation("Tokyo", 15.4), | ||||
|                 new WeatherStation("Toliara", 24.1), | ||||
|                 new WeatherStation("Toluca", 12.4), | ||||
|                 new WeatherStation("Toronto", 9.4), | ||||
|                 new WeatherStation("Tripoli", 20.0), | ||||
|                 new WeatherStation("Tromsø", 2.9), | ||||
|                 new WeatherStation("Tucson", 20.9), | ||||
|                 new WeatherStation("Tunis", 18.4), | ||||
|                 new WeatherStation("Ulaanbaatar", -0.4), | ||||
|                 new WeatherStation("Upington", 20.4), | ||||
|                 new WeatherStation("Ürümqi", 7.4), | ||||
|                 new WeatherStation("Vaduz", 10.1), | ||||
|                 new WeatherStation("Valencia", 18.3), | ||||
|                 new WeatherStation("Valletta", 18.8), | ||||
|                 new WeatherStation("Vancouver", 10.4), | ||||
|                 new WeatherStation("Veracruz", 25.4), | ||||
|                 new WeatherStation("Vienna", 10.4), | ||||
|                 new WeatherStation("Vientiane", 25.9), | ||||
|                 new WeatherStation("Villahermosa", 27.1), | ||||
|                 new WeatherStation("Vilnius", 6.0), | ||||
|                 new WeatherStation("Virginia Beach", 15.8), | ||||
|                 new WeatherStation("Vladivostok", 4.9), | ||||
|                 new WeatherStation("Warsaw", 8.5), | ||||
|                 new WeatherStation("Washington, D.C.", 14.6), | ||||
|                 new WeatherStation("Wau", 27.8), | ||||
|                 new WeatherStation("Wellington", 12.9), | ||||
|                 new WeatherStation("Whitehorse", -0.1), | ||||
|                 new WeatherStation("Wichita", 13.9), | ||||
|                 new WeatherStation("Willemstad", 28.0), | ||||
|                 new WeatherStation("Winnipeg", 3.0), | ||||
|                 new WeatherStation("Wrocław", 9.6), | ||||
|                 new WeatherStation("Xi'an", 14.1), | ||||
|                 new WeatherStation("Yakutsk", -8.8), | ||||
|                 new WeatherStation("Yangon", 27.5), | ||||
|                 new WeatherStation("Yaoundé", 23.8), | ||||
|                 new WeatherStation("Yellowknife", -4.3), | ||||
|                 new WeatherStation("Yerevan", 12.4), | ||||
|                 new WeatherStation("Yinchuan", 9.0), | ||||
|                 new WeatherStation("Zagreb", 10.7), | ||||
|                 new WeatherStation("Zanzibar City", 26.0), | ||||
|                 new WeatherStation("Zürich", 9.3)); | ||||
|  | ||||
|         File file = new File(FILE); | ||||
|  | ||||
|         // break the loop and unroll it manually | ||||
|         int strideSize = 50_000_000; | ||||
|         int outer = size / strideSize; | ||||
|         int remainder = size - (outer * strideSize); | ||||
|  | ||||
|         try (final BufferedWriter bw = new BufferedWriter(new FileWriter(file))) { | ||||
|             for (int i = 0; i < outer; i++) { | ||||
|                 produce(bw, stations, strideSize); | ||||
|  | ||||
|                 // we avoid a modulo if here and use the stride size to print and update | ||||
|                 System.out.println("Wrote %,d measurements in %s ms".formatted((i + 1) * strideSize, System.currentTimeMillis() - start)); | ||||
|             } | ||||
|             // there might be a rest | ||||
|             produce(bw, stations, remainder); | ||||
|  | ||||
|             // write fully before taking measurements | ||||
|             bw.flush(); | ||||
|             System.out.println("Created file with %,d measurements in %s ms".formatted(size, System.currentTimeMillis() - start)); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private static void produce(BufferedWriter bw, List<WeatherStation> stations, int count) throws IOException { | ||||
|         final int stationCount = stations.size(); | ||||
|         final int rest = count % 8; | ||||
|  | ||||
|         // use a fast ranodm impl without atomics to be able to utilize the cpu better | ||||
|         // and avoid sideeffects, FastRandom is very fake random and does not have a state | ||||
|         final FastRandom r1 = new FastRandom(ThreadLocalRandom.current().nextLong()); | ||||
|         final FastRandom r2 = new FastRandom(ThreadLocalRandom.current().nextLong()); | ||||
|         final FastRandom r3 = new FastRandom(ThreadLocalRandom.current().nextLong()); | ||||
|         final FastRandom r4 = new FastRandom(ThreadLocalRandom.current().nextLong()); | ||||
|  | ||||
|         // write to a fix buffer first, don't create strings ever | ||||
|         // reuse buffer | ||||
|         final CheaperCharBuffer sb = new CheaperCharBuffer(200); | ||||
|  | ||||
|         // manual loop unroll for less jumps | ||||
|         for (int i = 0; i < count; i = i + 8) { | ||||
|         	{ | ||||
|         		// try to fill teh cpu pipeline as much as possible with | ||||
|         		// independent operations | ||||
|         		int s1 = r1.nextInt(stationCount); | ||||
|         		int s2 = r2.nextInt(stationCount); | ||||
|         		int s3 = r3.nextInt(stationCount); | ||||
|         		int s4 = r4.nextInt(stationCount); | ||||
|         		// get us the ojects one after the other to have the array | ||||
|         		// in our L1 cache and not push it out with other data | ||||
|         		var w1 = stations.get(s1); | ||||
|         		var w2 = stations.get(s2); | ||||
|         		var w3 = stations.get(s3); | ||||
|         		var w4 = stations.get(s4); | ||||
|         		// write our data to our buffer | ||||
|         		w1.measurement(sb); | ||||
|         		w2.measurement(sb); | ||||
|         		w3.measurement(sb); | ||||
|         		w4.measurement(sb); | ||||
|         	} | ||||
|         	{ | ||||
|         		int s1 = r1.nextInt(stationCount); | ||||
|         		int s2 = r2.nextInt(stationCount); | ||||
|         		int s3 = r3.nextInt(stationCount); | ||||
|         		int s4 = r4.nextInt(stationCount); | ||||
|         		var w1 = stations.get(s1); | ||||
|         		var w2 = stations.get(s2); | ||||
|         		var w3 = stations.get(s3); | ||||
|         		var w4 = stations.get(s4); | ||||
|         		w1.measurement(sb); | ||||
|         		w2.measurement(sb); | ||||
|         		w3.measurement(sb); | ||||
|         		w4.measurement(sb); | ||||
|         	} | ||||
|             // write the buffer directly, no intermediate string copy | ||||
|             bw.write(sb.data_, 0, sb.length_); | ||||
|  | ||||
|             // reuse buffer, reset only, no cleaning | ||||
|             sb.clear(); | ||||
|         } | ||||
|  | ||||
|         // there might be a rest to write | ||||
|         for (int i = 0; i < rest; i++) { | ||||
|             sb.clear(); | ||||
|  | ||||
|             int s = r1.nextInt(stationCount); | ||||
|             var w = stations.get(s); | ||||
|             w.measurement(sb); | ||||
|  | ||||
|             bw.write(sb.data_, 0, sb.length_); | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										715
									
								
								src/main/java/org/rschwietzke/CheaperCharBuffer.java
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										715
									
								
								src/main/java/org/rschwietzke/CheaperCharBuffer.java
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,715 @@ | ||||
| /* | ||||
|  *  Copyright 2023 The original authors | ||||
|  * | ||||
|  *  Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  *  you may not use this file except in compliance with the License. | ||||
|  *  You may obtain a copy of the License at | ||||
|  * | ||||
|  *      http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  *  Unless required by applicable law or agreed to in writing, software | ||||
|  *  distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  *  See the License for the specific language governing permissions and | ||||
|  *  limitations under the License. | ||||
|  */ | ||||
| package org.rschwietzke; | ||||
|  | ||||
| import java.util.Arrays; | ||||
|  | ||||
| /** | ||||
|  * <p>This class is meant to replaces the old {@link CheaperCharBuffer} in all areas | ||||
|  * where performance and memory-efficency is key. XMLString compatibility | ||||
|  * remains in place in case one has used that in their own code. | ||||
|  * | ||||
|  * <p>This buffer is mutable and when you use it, make sure you work with | ||||
|  * it responsibly. In many cases, we will reuse the buffer to avoid fresh | ||||
|  * memory allocations, hence you have to pay attention to its usage pattern. | ||||
|  * It is not meant to be a general String replacement. | ||||
|  * | ||||
|  * <p>This class avoids many of the standard runtime checks that will result | ||||
|  * in a runtime or array exception anyway. Why check twice and raise the | ||||
|  * same exception? | ||||
|  * | ||||
|  * @author René Schwietzke | ||||
|  * @since 3.10.0 | ||||
|  */ | ||||
| public class CheaperCharBuffer implements CharSequence { | ||||
|     // our data, can grow - that is not safe and has be altered from the original code | ||||
|     // to allow speed | ||||
|     public char[] data_; | ||||
|  | ||||
|     // the current size of the string data | ||||
|     public int length_; | ||||
|  | ||||
|     // the current size of the string data | ||||
|     private final int growBy_; | ||||
|  | ||||
|     // how much do we grow if needed, half a cache line | ||||
|     public static final int CAPACITY_GROWTH = 64 / 2; | ||||
|  | ||||
|     // what is our start size? | ||||
|     // a cache line is 64 byte mostly, the overhead is mostly 24 bytes | ||||
|     // a char is two bytes, let's use one cache lines | ||||
|     public static final int INITIAL_CAPACITY = (64 - 24) / 2; | ||||
|  | ||||
|     // static empty version; DON'T MODIFY IT | ||||
|     public static final CheaperCharBuffer EMPTY = new CheaperCharBuffer(0); | ||||
|  | ||||
|     // the <20> character | ||||
|     private static final char REPLACEMENT_CHARACTER = '\uFFFD'; | ||||
|  | ||||
|     /** | ||||
|      * Constructs an XMLCharBuffer with a default size. | ||||
|      */ | ||||
|     public CheaperCharBuffer() { | ||||
|         this.data_ = new char[INITIAL_CAPACITY]; | ||||
|         this.length_ = 0; | ||||
|         this.growBy_ = CAPACITY_GROWTH; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Constructs an XMLCharBuffer with a desired size. | ||||
|      * | ||||
|      * @param startSize the size of the buffer to start with | ||||
|      */ | ||||
|     public CheaperCharBuffer(final int startSize) { | ||||
|         this(startSize, CAPACITY_GROWTH); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Constructs an XMLCharBuffer with a desired size. | ||||
|      * | ||||
|      * @param startSize the size of the buffer to start with | ||||
|      * @param growBy by how much do we want to grow when needed | ||||
|      */ | ||||
|     public CheaperCharBuffer(final int startSize, final int growBy) { | ||||
|         this.data_ = new char[startSize]; | ||||
|         this.length_ = 0; | ||||
|         this.growBy_ = Math.max(1, growBy); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Constructs an XMLCharBuffer from another buffer. Copies the data | ||||
|      * over. The new buffer capacity matches the length of the source. | ||||
|      * | ||||
|      * @param src the source buffer to copy from | ||||
|      */ | ||||
|     public CheaperCharBuffer(final CheaperCharBuffer src) { | ||||
|         this(src, 0); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Constructs an XMLCharBuffer from another buffer. Copies the data | ||||
|      * over. You can add more capacity on top of the source length. If | ||||
|      * you specify 0, the capacity will match the src length. | ||||
|      * | ||||
|      * @param src the source buffer to copy from | ||||
|      * @param addCapacity how much capacity to add to origin length | ||||
|      */ | ||||
|     public CheaperCharBuffer(final CheaperCharBuffer src, final int addCapacity) { | ||||
|         this.data_ = Arrays.copyOf(src.data_, src.length_ + Math.max(0, addCapacity)); | ||||
|         this.length_ = src.length(); | ||||
|         this.growBy_ = Math.max(1, CAPACITY_GROWTH); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Constructs an XMLCharBuffer from a string. To avoid | ||||
|      * too much allocation, we just take the string array as is and | ||||
|      * don't allocate extra space in the first place. | ||||
|      * | ||||
|      * @param src the string to copy from | ||||
|      */ | ||||
|     public CheaperCharBuffer(final String src) { | ||||
|         this.data_ = src.toCharArray(); | ||||
|         this.length_ = src.length(); | ||||
|         this.growBy_ = CAPACITY_GROWTH; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Constructs an XMLString structure preset with the specified values. | ||||
|      * There will not be any room to grow, if you need that, construct an | ||||
|      * empty one and append. | ||||
|      * | ||||
|      * <p>There are not range checks performed. Make sure your data is correct. | ||||
|      * | ||||
|      * @param ch     The character array, must not be null | ||||
|      * @param offset The offset into the character array. | ||||
|      * @param length The length of characters from the offset. | ||||
|      */ | ||||
|     public CheaperCharBuffer(final char[] ch, final int offset, final int length) { | ||||
|         // just as big as we need it | ||||
|         this(length); | ||||
|         append(ch, offset, length); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Check capacity and grow if needed automatically | ||||
|      * | ||||
|      * @param minimumCapacity how much space do we need at least | ||||
|      */ | ||||
|     private void ensureCapacity(final int minimumCapacity) { | ||||
|         if (minimumCapacity > this.data_.length) { | ||||
|             final int newSize = Math.max(minimumCapacity + this.growBy_, (this.data_.length << 1) + 2); | ||||
|             this.data_ = Arrays.copyOf(this.data_, newSize); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Returns the current max capacity without growth. Does not | ||||
|      * indicate how much capacity is already in use. Use {@link #length()} | ||||
|      * for that. | ||||
|      * | ||||
|      * @return the current capacity, not taken any usage into account | ||||
|      */ | ||||
|     public int capacity() { | ||||
|         return this.data_.length; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Appends a single character to the buffer. | ||||
|      * | ||||
|      * @param c the character to append | ||||
|      * @return this instance | ||||
|      */ | ||||
|     public CheaperCharBuffer append(final char c) { | ||||
|         final int oldLength = this.length_++; | ||||
|  | ||||
|         // ensureCapacity is not inlined by the compiler, so put that here for the most | ||||
|         // called method of all appends. Duplicate code, but for a reason. | ||||
|         if (oldLength == this.data_.length) { | ||||
|             final int newSize = Math.max(oldLength + this.growBy_, (this.data_.length << 1) + 2); | ||||
|             this.data_ = Arrays.copyOf(this.data_, newSize); | ||||
|         } | ||||
|  | ||||
|         this.data_[oldLength] = c; | ||||
|  | ||||
|         return this; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Append a string to this buffer without copying the string first. | ||||
|      * | ||||
|      * @param src the string to append | ||||
|      * @return this instance | ||||
|      */ | ||||
|     public CheaperCharBuffer append(final String src) { | ||||
|         final int start = this.length_; | ||||
|         this.length_ = this.length_ + src.length(); | ||||
|         ensureCapacity(this.length_); | ||||
|  | ||||
|         // copy char by char because we don't get a copy for free | ||||
|         // from a string yet, this might change when immutable arrays | ||||
|         // make it into Java, but that will not be very soon | ||||
|         for (int i = 0; i < src.length(); i++) { | ||||
|             this.data_[start + i] = src.charAt(i); | ||||
|         } | ||||
|  | ||||
|         return this; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Add another buffer to this one. | ||||
|      * | ||||
|      * @param src the buffer to append | ||||
|      * @return this instance | ||||
|      */ | ||||
|     public CheaperCharBuffer append(final CheaperCharBuffer src) { | ||||
|         final int start = this.length_; | ||||
|         this.length_ = this.length_ + src.length(); | ||||
|         ensureCapacity(this.length_); | ||||
|  | ||||
|         System.arraycopy(src.data_, 0, this.data_, start, src.length_); | ||||
|  | ||||
|         return this; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Add data from a char array to this buffer with the ability to specify | ||||
|      * a range to copy from | ||||
|      * | ||||
|      * @param src the source char array | ||||
|      * @param offset the pos to start to copy from | ||||
|      * @param length the length of the data to copy | ||||
|      * | ||||
|      * @return this instance | ||||
|      */ | ||||
|     public CheaperCharBuffer append(final char[] src, final int offset, final int length) { | ||||
|         final int start = this.length_; | ||||
|         this.length_ = start + length; | ||||
|  | ||||
|         ensureCapacity(this.length_); | ||||
|  | ||||
|         System.arraycopy(src, offset, this.data_, start, length); | ||||
|  | ||||
|         return this; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Returns the current length | ||||
|      * | ||||
|      * @return the length of the charbuffer data | ||||
|      */ | ||||
|     public int length() { | ||||
|         return length_; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Tell us how much the capacity grows if needed | ||||
|      * | ||||
|      * @return the value that determines how much we grow the backing | ||||
|      *      array in case we have to | ||||
|      */ | ||||
|     public int getGrowBy() { | ||||
|         return this.growBy_; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Resets the buffer to 0 length. It won't resize it to avoid memory | ||||
|      * churn. | ||||
|      * | ||||
|      * @return this instance for fluid programming | ||||
|      */ | ||||
|     public CheaperCharBuffer clear() { | ||||
|         this.length_ = 0; | ||||
|  | ||||
|         return this; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Resets the buffer to 0 length and sets the new data. This | ||||
|      * is a little cheaper than clear().append(c) depending on | ||||
|      * the where  and the inlining decisions. | ||||
|      * | ||||
|      * @param c the char to set | ||||
|      * @return this instance for fluid programming | ||||
|      */ | ||||
|     public CheaperCharBuffer clearAndAppend(final char c) { | ||||
|         this.length_ = 0; | ||||
|  | ||||
|         if (this.data_.length > 0) { | ||||
|             this.data_[this.length_] = c; | ||||
|             this.length_++; | ||||
|         } | ||||
|         else { | ||||
|             // the rare case when we don't have any buffer at hand | ||||
|             append(c); | ||||
|         } | ||||
|  | ||||
|         return this; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Does this buffer end with this string? If we check for | ||||
|      * the empty string, we get true. If we would support JDK 11, we could | ||||
|      * use Arrays.mismatch and be way faster. | ||||
|      * | ||||
|      * @param s the string to check the end against | ||||
|      * @return true of the end matches the buffer, false otherwise | ||||
|      */ | ||||
|     public boolean endsWith(final String s) { | ||||
|         // length does not match, cannot be the end | ||||
|         if (this.length_ < s.length()) { | ||||
|             return false; | ||||
|         } | ||||
|  | ||||
|         // check the string by each char, avoids a copy of the string | ||||
|         final int start = this.length_ - s.length(); | ||||
|  | ||||
|         // change this to Arrays.mismatch when going JDK 11 or higher | ||||
|         for (int i = 0; i < s.length(); i++) { | ||||
|             if (this.data_[i + start] != s.charAt(i)) { | ||||
|                 return false; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return true; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Reduces the buffer to the content between start and end marker when | ||||
|      * only whitespaces are found before the startMarker as well as after the end marker. | ||||
|      * If both strings overlap due to identical characters such as "foo" and "oof" | ||||
|      * and the buffer is " foof ", we don't do anything. | ||||
|      * | ||||
|      * <p>If a marker is empty, it behaves like {@link java.lang.String#trim()} on that side. | ||||
|      * | ||||
|      * @param startMarker the start string to find, must not be null | ||||
|      * @param endMarker the end string to find, must not be null | ||||
|      * @return this instance | ||||
|      * | ||||
|      * @deprecated Use the new method {@link #trimToContent(String, String)} instead. | ||||
|      */ | ||||
|     public CheaperCharBuffer reduceToContent(final String startMarker, final String endMarker) { | ||||
|         return trimToContent(startMarker, endMarker); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Reduces the buffer to the content between start and end marker when | ||||
|      * only whitespaces are found before the startMarker as well as after the end marker. | ||||
|      * If both strings overlap due to identical characters such as "foo" and "oof" | ||||
|      * and the buffer is " foof ", we don't do anything. | ||||
|      * | ||||
|      * <p>If a marker is empty, it behaves like {@link java.lang.String#trim()} on that side. | ||||
|      * | ||||
|      * @param startMarker the start string to find, must not be null | ||||
|      * @param endMarker the end string to find, must not be null | ||||
|      * @return this instance | ||||
|      */ | ||||
|     public CheaperCharBuffer trimToContent(final String startMarker, final String endMarker) { | ||||
|         // if both are longer or same length than content, don't do anything | ||||
|         final int markerLength = startMarker.length() + endMarker.length(); | ||||
|         if (markerLength >= this.length_) { | ||||
|             return this; | ||||
|         } | ||||
|  | ||||
|         // run over starting whitespaces | ||||
|         int sPos = 0; | ||||
|         for (; sPos < this.length_ - markerLength; sPos++) { | ||||
|             if (!Character.isWhitespace(this.data_[sPos])) { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // run over ending whitespaces | ||||
|         int ePos = this.length_ - 1; | ||||
|         for (; ePos > sPos - markerLength; ePos--) { | ||||
|             if (!Character.isWhitespace(this.data_[ePos])) { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // if we have less content than marker length, give up | ||||
|         // this also helps when markers overlap such as | ||||
|         // <!-- and --> and the string is " <!---> " | ||||
|         if (ePos - sPos + 1 < markerLength) { | ||||
|             return this; | ||||
|         } | ||||
|  | ||||
|         // check the start | ||||
|         for (int i = 0; i < startMarker.length(); i++) { | ||||
|             if (startMarker.charAt(i) != this.data_[i + sPos]) { | ||||
|                 // no start match, stop and don't do anything | ||||
|                 return this; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // check the end, ePos is when the first good char | ||||
|         // occurred | ||||
|         final int endStartCheckPos = ePos - endMarker.length() + 1; | ||||
|         for (int i = 0; i < endMarker.length(); i++) { | ||||
|             if (endMarker.charAt(i) != this.data_[endStartCheckPos + i]) { | ||||
|                 // no start match, stop and don't do anything | ||||
|                 return this; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // shift left and cut length | ||||
|         final int newLength = ePos - sPos + 1 - markerLength; | ||||
|         System.arraycopy(this.data_, | ||||
|                 sPos + startMarker.length(), | ||||
|                 this.data_, | ||||
|                 0, newLength); | ||||
|         this.length_ = newLength; | ||||
|  | ||||
|         return this; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Check if we have only whitespaces | ||||
|      * | ||||
|      * @return true if we have only whitespace, false otherwise | ||||
|      */ | ||||
|     public boolean isWhitespace() { | ||||
|         for (int i = 0; i < this.length_; i++) { | ||||
|             if (!Character.isWhitespace(this.data_[i])) { | ||||
|                 return false; | ||||
|             } | ||||
|         } | ||||
|         return true; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Trims the string similar to {@link java.lang.String#trim()} | ||||
|      * | ||||
|      * @return a string with removed whitespace at the beginning and the end | ||||
|      */ | ||||
|     public CheaperCharBuffer trim() { | ||||
|         // clean the end first, because it is cheap | ||||
|         return trimTrailing().trimLeading(); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Removes all whitespace before the first non-whitespace char. | ||||
|      * If all are whitespaces, we get an empty buffer | ||||
|      * | ||||
|      * @return this instance | ||||
|      */ | ||||
|     public CheaperCharBuffer trimLeading() { | ||||
|         // run over starting whitespace | ||||
|         int sPos = 0; | ||||
|         for (; sPos < this.length_; sPos++) { | ||||
|             if (!Character.isWhitespace(this.data_[sPos])) { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if (sPos == 0) { | ||||
|             // nothing to do | ||||
|             return this; | ||||
|         } | ||||
|         else if (sPos == this.length_) { | ||||
|             // only whitespace | ||||
|             this.length_ = 0; | ||||
|             return this; | ||||
|         } | ||||
|  | ||||
|         // shift left | ||||
|         final int newLength = this.length_ - sPos; | ||||
|         System.arraycopy(this.data_, | ||||
|                 sPos, | ||||
|                 this.data_, | ||||
|                 0, newLength); | ||||
|         this.length_ = newLength; | ||||
|  | ||||
|         return this; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Removes all whitespace at the end. | ||||
|      * If all are whitespace, we get an empty buffer | ||||
|      * | ||||
|      * @return this instance | ||||
|      * | ||||
|      * @deprecated Use {@link #trimTrailing()} instead. | ||||
|      */ | ||||
|     public CheaperCharBuffer trimWhitespaceAtEnd() { | ||||
|         return trimTrailing(); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Removes all whitespace at the end. | ||||
|      * If all are whitespace, we get an empty buffer | ||||
|      * | ||||
|      * @return this instance | ||||
|      */ | ||||
|     public CheaperCharBuffer trimTrailing() { | ||||
|         // run over ending whitespaces | ||||
|         int ePos = this.length_ - 1; | ||||
|         for (; ePos >= 0; ePos--) { | ||||
|             if (!Character.isWhitespace(this.data_[ePos])) { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         this.length_ = ePos + 1; | ||||
|  | ||||
|         return this; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Shortens the buffer by that many positions. If the count is | ||||
|      * larger than the length, we get just an empty buffer. If you pass in negative | ||||
|      * values, we are failing, likely often silently. It is all about performance and | ||||
|      * not a general all-purpose API. | ||||
|      * | ||||
|      * @param count a positive number, no runtime checks, if count is larger than | ||||
|      *      length, we get length = 0 | ||||
|      * @return this instance | ||||
|      */ | ||||
|     public CheaperCharBuffer shortenBy(final int count) { | ||||
|         final int newLength = this.length_ - count; | ||||
|         this.length_ = newLength < 0 ? 0 : newLength; | ||||
|  | ||||
|         return this; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Get the characters as char array, this will be a copy! | ||||
|      * | ||||
|      * @return a copy of the underlying char darta | ||||
|      */ | ||||
|     public char[] getChars() { | ||||
|         return Arrays.copyOf(this.data_, this.length_); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Returns a string representation of this buffer. This will be a copy | ||||
|      * operation. If the buffer is emoty, we get a constant empty String back | ||||
|      * to avoid any overhead. | ||||
|      * | ||||
|      * @return a string of the content of this buffer | ||||
|      */ | ||||
|     @Override | ||||
|     public String toString() { | ||||
|         if (this.length_ > 0) { | ||||
|             return new String(this.data_, 0, this.length_); | ||||
|         } | ||||
|         else { | ||||
|             return ""; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Returns the char a the given position. Will complain if | ||||
|      * we try to read outside the range. We do a range check here | ||||
|      * because we might not notice when we are within the buffer | ||||
|      * but outside the current length. | ||||
|      * | ||||
|      * @param index the position to read from | ||||
|      * @return the char at the position | ||||
|      * @throws IndexOutOfBoundsException | ||||
|      *      in case one tries to read outside of valid buffer range | ||||
|      */ | ||||
|     @Override | ||||
|     public char charAt(final int index) { | ||||
|         if (index > this.length_ - 1 || index < 0) { | ||||
|             throw new IndexOutOfBoundsException( | ||||
|                     "Tried to read outside of the valid buffer data"); | ||||
|         } | ||||
|  | ||||
|         return this.data_[index]; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Returns the char at the given position. No checks are | ||||
|      * performed. It is up to the caller to make sure we | ||||
|      * read correctly. Reading outside of the array will | ||||
|      * cause an {@link IndexOutOfBoundsException} but using an | ||||
|      * incorrect position in the array (such as beyond length) | ||||
|      * might stay unnoticed! This is a performance method, | ||||
|      * use at your own risk. | ||||
|      * | ||||
|      * @param index the position to read from | ||||
|      * @return the char at the position | ||||
|      */ | ||||
|     public char unsafeCharAt(final int index) { | ||||
|         return this.data_[index]; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Returns a content copy of this buffer | ||||
|      * | ||||
|      * @return a copy of this buffer, the capacity might differ | ||||
|      */ | ||||
|     @Override | ||||
|     public CheaperCharBuffer clone() { | ||||
|         return new CheaperCharBuffer(this); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Returns a <code>CharSequence</code> that is a subsequence of this sequence. | ||||
|      * The subsequence starts with the <code>char</code> value at the specified index and | ||||
|      * ends with the <code>char</code> value at index <tt>end - 1</tt>.  The length | ||||
|      * (in <code>char</code>s) of the | ||||
|      * returned sequence is <tt>end - start</tt>, so if <tt>start == end</tt> | ||||
|      * then an empty sequence is returned. | ||||
|      * | ||||
|      * @param   start   the start index, inclusive | ||||
|      * @param   end     the end index, exclusive | ||||
|      * | ||||
|      * @return  the specified subsequence | ||||
|      * | ||||
|      * @throws  IndexOutOfBoundsException | ||||
|      *          if <tt>start</tt> or <tt>end</tt> are negative, | ||||
|      *          if <tt>end</tt> is greater than <tt>length()</tt>, | ||||
|      *          or if <tt>start</tt> is greater than <tt>end</tt> | ||||
|      * | ||||
|      * @return a charsequence of this buffer | ||||
|      */ | ||||
|     @Override | ||||
|     public CharSequence subSequence(final int start, final int end) { | ||||
|         if (start < 0) { | ||||
|             throw new StringIndexOutOfBoundsException(start); | ||||
|         } | ||||
|         if (end > this.length_) { | ||||
|             throw new StringIndexOutOfBoundsException(end); | ||||
|         } | ||||
|  | ||||
|         final int l = end - start; | ||||
|         if (l < 0) { | ||||
|             throw new StringIndexOutOfBoundsException(l); | ||||
|         } | ||||
|  | ||||
|         return new String(this.data_, start, l); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Two buffers are identical when the length and | ||||
|      * the content of the backing array (only for the | ||||
|      * data in view) are identical. | ||||
|      * | ||||
|      * @param o the object to compare with | ||||
|      * @return true if length and array content match, false otherwise | ||||
|      */ | ||||
|     @Override | ||||
|     public boolean equals(final Object o) { | ||||
|         if (o instanceof CharSequence) { | ||||
|             final CharSequence ob = (CharSequence) o; | ||||
|  | ||||
|             if (ob.length() != this.length_) { | ||||
|                 return false; | ||||
|             } | ||||
|  | ||||
|             // ok, in JDK 11 or up, we could use an | ||||
|             // Arrays.mismatch, but we cannot do that | ||||
|             // due to JDK 8 compatibility | ||||
|             for (int i = 0; i < this.length_; i++) { | ||||
|                 if (ob.charAt(i) != this.data_[i]) { | ||||
|                     return false; | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             // length and content match, be happy | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * We don't cache the hashcode because we mutate often. Don't use this in | ||||
|      * hashmaps as key. But you can use that to look up in a hashmap against | ||||
|      * a string using the CharSequence interface. | ||||
|      * | ||||
|      * @return the hashcode, similar to what a normal string would deliver | ||||
|      */ | ||||
|     @Override | ||||
|     public int hashCode() { | ||||
|         int h = 0; | ||||
|  | ||||
|         for (int i = 0; i < this.length_; i++) { | ||||
|             h = ((h << 5) - h) + this.data_[i]; | ||||
|         } | ||||
|  | ||||
|         return h; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Append a character to an XMLCharBuffer. The character is an int value, and | ||||
|      * can either be a single UTF-16 character or a supplementary character | ||||
|      * represented by two UTF-16 code points. | ||||
|      * | ||||
|      * @param value The character value. | ||||
|      * @return this instance for fluid programming | ||||
|      * | ||||
|      * @throws IllegalArgumentException if the specified | ||||
|      *          {@code codePoint} is not a valid Unicode code point. | ||||
|      */ | ||||
|     public CheaperCharBuffer appendCodePoint(final int value) { | ||||
|         if (value <= Character.MAX_VALUE) { | ||||
|             return this.append((char) value); | ||||
|         } | ||||
|         else { | ||||
|             try { | ||||
|                 final char[] chars = Character.toChars(value); | ||||
|                 return this.append(chars, 0, chars.length); | ||||
|             } | ||||
|             catch (final IllegalArgumentException e) { | ||||
|                 // when value is not valid as UTF-16 | ||||
|                 this.append(REPLACEMENT_CHARACTER); | ||||
|                 throw e; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										76
									
								
								src/main/java/org/rschwietzke/FastRandom.java
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								src/main/java/org/rschwietzke/FastRandom.java
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,76 @@ | ||||
| /* | ||||
|  *  Copyright 2023 The original authors | ||||
|  * | ||||
|  *  Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  *  you may not use this file except in compliance with the License. | ||||
|  *  You may obtain a copy of the License at | ||||
|  * | ||||
|  *      http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  *  Unless required by applicable law or agreed to in writing, software | ||||
|  *  distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  *  See the License for the specific language governing permissions and | ||||
|  *  limitations under the License. | ||||
|  */ | ||||
| package org.rschwietzke; | ||||
|  | ||||
| /** | ||||
|  * Ultra-fast pseudo random generator that is not synchronized! | ||||
|  * Don't use anything from Random by inheritance, this will inherit | ||||
|  * a volatile! Not my idea, copyied in parts some demo random | ||||
|  * generator lessons. | ||||
|  * | ||||
|  * @author rschwietzke | ||||
|  * | ||||
|  */ | ||||
| public class FastRandom { | ||||
|     private long seed; | ||||
|  | ||||
|     public FastRandom() { | ||||
|         this.seed = System.currentTimeMillis(); | ||||
|     } | ||||
|  | ||||
|     public FastRandom(long seed) { | ||||
|         this.seed = seed; | ||||
|     } | ||||
|  | ||||
|     protected int next(int nbits) { | ||||
|         // N.B. Not thread-safe! | ||||
|         long x = this.seed; | ||||
|         x ^= (x << 21); | ||||
|         x ^= (x >>> 35); | ||||
|         x ^= (x << 4); | ||||
|         this.seed = x; | ||||
|  | ||||
|         x &= ((1L << nbits) - 1); | ||||
|  | ||||
|         return (int) x; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Borrowed from the JDK | ||||
|      * | ||||
|      * @param bound | ||||
|      * @return | ||||
|      */ | ||||
|     public int nextInt(int bound) { | ||||
|         int r = next(31); | ||||
|         int m = bound - 1; | ||||
|         if ((bound & m) == 0) // i.e., bound is a power of 2 | ||||
|             r = (int) ((bound * (long) r) >> 31); | ||||
|         else { | ||||
|             for (int u = r; u - (r = u % bound) + m < 0; u = next(31)) | ||||
|                 ; | ||||
|         } | ||||
|         return r; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Borrowed from the JDK | ||||
|      * @return | ||||
|      */ | ||||
|     public int nextInt() { | ||||
|         return next(32); | ||||
|     } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user