I dont understand why empty cells and rows of the attached image are not removed with the following code. output file I want "Speaker 1 Text" column to align with "Speaker 2 Text" column
import os
path = os.path.abspath('/Users/daneweickert/Library/CloudStorage/GoogleDrive-weickertdane99@gmail.com/My Drive/Business/gpt3/audio/MMA/sean_omalley/interviews/interview_transcript/tester.csv')
if os.path.isfile(path):
print("File exist")
else:
print("File does not exist")
import pandas as pd
df = pd.read_csv(path)
# Create empty columns called "Speaker 1 Text" and "Speaker 2 Text"
df["Speaker 1 Text"] = ""
df["Speaker 2 Text"] = ""
# Iterate through each row of the data
for i in range(len(df)):
# Check if the current row contains the string "SPEAKER 1"
if "SPEAKER 1" in df.iloc[i][0]:
# If it does, check if the next row is empty
while pd.isna(df.iloc[i+1][0]):
i+=1
# If the next row is not empty, copy the text from the row after that to the "Speaker 1 Text" column
df.at[i+1, "Speaker 1 Text"] = df.iloc[i+1][0]
# Check if the current row contains the string "SPEAKER 2"
elif "SPEAKER 2" in df.iloc[i][0]:
# If it does, check if the next row is empty
while pd.isna(df.iloc[i+1][0]):
i+=1
# If the next row is not empty, copy the text from the row after that to the "Speaker 2 Text" column
df.at[i+1, "Speaker 2 Text"] = df.iloc[i+1][0]
# Remove the first column
df = df.drop(columns=df.columns[0])
# Remove rows with missing values in either column
df.dropna(subset=["Speaker 1 Text", "Speaker 2 Text"], thresh=1, axis=0, inplace=True)
# Save the modified data to a new CSV file
destination = os.path.join('/Users/daneweickert/Library/CloudStorage/GoogleDrive-weickertdane99@gmail.com/My Drive/Business/gpt3/test')
df.to_csv(os.path.join(destination, "modified_data.csv"),index=False)