batched writing to disk (#659)
instead of writing result line by line, implemented random.choices for randomisation of multiple stations and writing large batche ot the disk, also instead of "round" just using :.1f which is probably quicker on a large scale, because it's not a mathematical function
This commit is contained in:
parent
0c5c22882b
commit
f55317973c
@ -110,15 +110,18 @@ def build_test_data(weather_station_names, num_rows_to_create):
|
|||||||
coldest_temp = -99.9
|
coldest_temp = -99.9
|
||||||
hottest_temp = 99.9
|
hottest_temp = 99.9
|
||||||
station_names_10k_max = random.choices(weather_station_names, k=10_000)
|
station_names_10k_max = random.choices(weather_station_names, k=10_000)
|
||||||
progress_step = max(1, int(num_rows_to_create / 100))
|
batch_size = 10000 # instead of writing line by line to file, process a batch of stations and put it to disk
|
||||||
|
progress_step = max(1, (num_rows_to_create // batch_size) // 100)
|
||||||
print('Building test data...')
|
print('Building test data...')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open("../../../data/measurements.txt", 'w') as file:
|
with open("../../../data/measurements.txt", 'w') as file:
|
||||||
for s in range(0,num_rows_to_create):
|
for s in range(0,num_rows_to_create // batch_size):
|
||||||
random_station = random.choice(station_names_10k_max)
|
|
||||||
random_temp = round(random.uniform(coldest_temp, hottest_temp), 1)
|
batch = random.choices(station_names_10k_max, k=batch_size)
|
||||||
file.write(f"{random_station};{random_temp}\n")
|
prepped_deviated_batch = '\n'.join([f"{station};{random.uniform(coldest_temp, hottest_temp):.1f}" for station in batch]) # :.1f should quicker than round on a large scale, because round utilizes mathematical operation
|
||||||
|
file.write(prepped_deviated_batch + '\n')
|
||||||
|
|
||||||
# Update progress bar every 1%
|
# Update progress bar every 1%
|
||||||
if s % progress_step == 0 or s == num_rows_to_create - 1:
|
if s % progress_step == 0 or s == num_rows_to_create - 1:
|
||||||
sys.stdout.write('\r')
|
sys.stdout.write('\r')
|
||||||
|
Loading…
Reference in New Issue
Block a user