df = pd.DataFrame()
for chunk in pd.read_csv('rows.csv',skipinitialspace=True,encoding='utf8', engine='python', chunksize=1000):
df = pd.concat([df, chunk], ignore_index=True)
class ParalelMultiProcess():
global df
def cretatedf():
df.dropna(inplace=True)
class Compare:
global df
def read_files(i):
x=(i+1)
for t in range(x,2000):
try:
print(str(k)+str(df["Product"].iloc[i])+" "+str(df["Product"].iloc[t]))
except:
string="Something went wrong"
pools.append(string)
class ParallelExtractor:
def __init__(self) :
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.submit(ParalelMultiProcess.createdf())
def runprocess(self):
start_time=time.time()
with multiprocessing.Pool(processes=20) as pool:#computer freeze here
pool.map(Compare.read_files, range(1,2000))
print(time.time()-start_time)
I'm trying to organize large data and compare rows in each column with each other. and this process is taking a long time so i want to use multiprocess and reduce working time but when i run this code program is stopping or computer is freezing .I tried to do it using thread, but it does not reduce the program's runtime and my aim is to process this data faster.How do I process 1m of data by giving multiprocess? What should I do so that the computer does not freeze? What is the maximum number of processes I can give?