import numpy as np
import pandas as pd
import math
j = 0
k = 0
time_array = []
average_pa = []
for i in range(3600):
time_array.append(j)
average_pa.append(k)
j += 0.1
time_array1 = tuple(time_array)
pa1 = 0.0
time1 = 0.0
chunk_size = 1000
for chunk in pd.read_csv("data_fiftydyne.txt", header=1, delimiter='\t',
chunksize=chunk_size,
skip_blank_lines=True, error_bad_lines=False, keep_default_na=False):
file2 = chunk.to_numpy()
for jj in range(file2.shape[0]):
time1 = file2[jj][9]
if type(time1) == str:
time1 = 362
pa1 = file2[jj][11]
ff = open("aver_pa_time_fiftydyne.csv", "a")
ff.truncate(0)
ff.seek(0)
for ii in range(3600):
if math.floor(10 * time_array1[ii]) == math.floor(float(10 * time1)):
average_pa[ii] += pas1
print(pa1)
print(time1)
ff.write(str(average_pa[ii]) + ',' + str(time_array1[ii]) + '\n')
ff.close()
This is code works in serial. How to parallelize this code? The size of the file is 67GB. It takes 5 days to run in series on CPU with 4.2GHz and 8GB RAM.