import os
import sys
valid_lines = []
corrupt_lines = []
'''
The validate_data function will check the students.csv line by line for corrupt data.
- Valid lines should be added to the valid_lines list.
- Invalid lines should be added to the corrupt_lines list.
Example input: 0896801,Kari,Wilmore,1970-06-18,INF
This data is valid and the line should be added to the valid_lines list unchanged.
Example input: 0773226,Junette,Gur_ry,1995-12-05,
This data is invalid and the line should be added to the corrupt_lines list in the following format:
0773226,Junette,Gur_ry,1995-12-05, => INVALID DATA: ['0773226', 'Gur_ry', '']
In the above example the studentnumber does not start with '08' or '09',
the last name contains a special character and the student program is empty.
Don't forget to put the students.csv file in the same location as this file!
'''
def validate_data(line):
# TYPE YOUR SOLUTION CODE HERE
#CSV STUDENT NUMBER
student_number_valid = True
index = 0
studentnumber, firstname, lastname, birthdate, studyprogram = line.split(",")
for element in line.split(","):
if index == 0:
student_number = element
if len(studentnumber) > 0:
csv_s_n = studentnumber[0]
csv_s_n2 = studentnumber[1]
if csv_s_n == '0' and csv_s_n2 == '8' or '9':
valid_lines.append(studentnumber)
else:
corrupt_lines.append(studentnumber)
else:
corrupt_lines.append(studentnumber)
#CSV NAME
if len(firstname) > 0:
if firstname.isalpha() == True:
valid_lines.append(firstname)
else:
corrupt_lines.append(firstname)
else:
corrupt_lines.append(firstname)
if len(lastname) > 0:
if lastname.isalpha() == True:
valid_lines.append(lastname)
else:
corrupt_lines.append(lastname)
else:
corrupt_lines.append(lastname)
#CSV BIRTHDAY
if len(birthdate.split()) == 3:
year1, month1, day1 = birthdate.split("-")
year1=int(year1)
valid_months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
valid_days = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10","11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"]
thirty_day_months = ["04", "06", "09", "11"]
if not year1 in range(1960, 2004 + 1):
birthdate = (f"{year1}-{month1}-{day1}")
corrupt_lines.append(birthdate)
if not month1 in valid_months:
birthdate = (f"{year1}-{month1}-{day1}")
corrupt_lines.append(birthdate)
if month1 == "02":
valid_days.remove("31")
valid_days.remove("30")
valid_days.remove("29")
if not day1 in valid_days:
birthdate = (f"{year1}-{month1}-{day1}")
corrupt_lines.append(birthdate)
# validate day for february
if month1 in thirty_day_months:
valid_days.remove("31")
if not day1 in valid_days:
birthdate = (f"{year1}-{month1}-{day1}")
corrupt_lines.append(birthdate)
# validate day for 30-day months
else:
birthdate = (f"{year1}-{month1}-{day1}")
valid_lines.append(birthdate)
else:
corrupt_lines.append(birthdate)
#CSV STUDYPROGRAM
if len(studyprogram) > 1:
if studyprogram == "INF" or "TINF" or "CMD" or "AI":
valid_lines.append(studyprogram)
else:
corrupt_lines.append(studyprogram)
else:
corrupt_lines.append(studyprogram)
def main(csv_file):
with open(os.path.join(sys.path[0], csv_file), newline='') as csv_file:
# skip header line
next(csv_file)
for line in csv_file:
validate_data(line.strip())
print('### VALID LINES ###')
print("\n".join(valid_lines))
print('### CORRUPT LINES ###')
print("\n".join(corrupt_lines))
if __name__ == "__main__":
main('students.csv')
As you can read, the function validate_data should check the imported file for corrupt and valid lines, then append them to the correct list, and print them. It works, except that, as you can probably see, the lines will not print in a single line.
I'm sure I have to make two other lists to append the correct data into a single line, doing the same with the corrupt data, but when I try it fails.