More accurate file size estimate
This commit is contained in:
		
				
					committed by
					
						 Gunnar Morling
						Gunnar Morling
					
				
			
			
				
	
			
			
			
						parent
						
							c92346790e
						
					
				
				
					commit
					6daa93cca1
				
			| @@ -84,22 +84,18 @@ def estimate_file_size(weather_station_names, num_rows_to_create): | ||||
|     """ | ||||
|     Tries to estimate how large a file the test data will be | ||||
|     """ | ||||
|     max_string = float('-inf') | ||||
|     min_string = float('inf') | ||||
|     per_record_size = 0 | ||||
|     record_size_unit = "bytes" | ||||
|     total_name_bytes = sum(len(s.encode("utf-8")) for s in weather_station_names) | ||||
|     avg_name_bytes = total_name_bytes / float(len(weather_station_names)) | ||||
|  | ||||
|     for station in weather_station_names: | ||||
|         if len(station) > max_string: | ||||
|             max_string = len(station) | ||||
|         if len(station) < min_string: | ||||
|             min_string = len(station) | ||||
|         per_record_size = ((max_string + min_string * 2) + len(",-123.4")) / 2 | ||||
|     # avg_temp_bytes = sum(len(str(n / 10)) for n in range(-999, 1000)) / 1999 | ||||
|     avg_temp_bytes = 4.400200100050025 | ||||
|  | ||||
|     total_file_size = num_rows_to_create * per_record_size | ||||
|     human_file_size = convert_bytes(total_file_size) | ||||
|     # add 2 for separator and newline | ||||
|     avg_line_length = avg_name_bytes + avg_temp_bytes + 2 | ||||
|  | ||||
|     return f"Estimated max file size is:  {human_file_size}.\nTrue size is probably much smaller (around half)." | ||||
|     human_file_size = convert_bytes(num_rows_to_create * avg_line_length) | ||||
|  | ||||
|     return f"Estimated max file size is:  {human_file_size}." | ||||
|  | ||||
|  | ||||
| def build_test_data(weather_station_names, num_rows_to_create): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user