My goal is to run my data de-identifying script and download my de-identified data from Flask. Right now, I have created a page to upload my file onto my web and I want to execute my de-identifying script using the execute button on my HTML and download the file.
My HTML:
{% block title %}Upload{% endblock %}
{% block main %}
<div class="container">
<div class="row">
<div class="col">
<h1>Upload the file</h1>
<hr>
<form action="/upload-file" method="POST" enctype="multipart/form-data">
<div class="form-group">
<label>Select file</label>
<div class="custom-file">
<input type="file" class="custom-file-input" name="Dataset" id="Dataset">
<label class="custom-file-label" for="Dataset">Select file...</label>
</div>
</div>
<button type="submit" class="btn btn-primary">De-indentify</button>
</form>
<form action="/upload-file" method="GET" enctype="multipart/form-data">
<button type="submit" class="btn btn-primary">Execute</button>
</form>
</div>
</div>
</div>
{% endblock %}
My App Flask route:
app.config["FILE_UPLOADS"] = "app/app/static/csv/uploads"
app.config["ALLOWED_FILE_EXTENSIONS"] = ["csv"]
def allowed_file(filename):
# We only want files with a . in the filename
if not "." in filename:
return False
# Split the extension from the filename
ext = filename.rsplit(".", 1)[1]
# Check if the extension is in ALLOWED_IMAGE_EXTENSIONS
if ext.upper() in app.config["ALLOWED_FILE_EXTENSIONS"]:
return True
else:
return False
@app.route("/upload-file", methods=["GET", "POST"])
def upload_file():
if request.method == "POST":
if request.files:
Dataset = request.files["Dataset"]
if Dataset.filename == "":
print("File must have a filename ")
return redirect(request.url)
if allowed_file(Dataset.filename):
print("That file extension is not allowed")
return redirect(request.url)
else:
filename = secure_filename(Dataset.filename)
Dataset.save(os.path.join(
app.config["FILE_UPLOADS"], filename))
print("Dataset saved")
return redirect(request.url)
return render_template("public/upload_file.html")
The file that I have uploaded:
Housing,Houseprice,Name,Neighbourhood,State
123556,100000,John,Bloomingdale,Washington
111777,250000,Ian,Bloomingdale,Washington
998273,250000,Tom,Spring Valley,California
My de-identifying script:
import pandas as pd
import uuid as u
# generate a pseudo-identifier sequesnce using python random number generator library uudi.
def uudi_generator(length):
uudi_list= list()
i=0
while i < length:
uudi_list.append(u.uuid4())
i+=1
return uudi_list
#import dataset
dataset = pd.read_csv('C:\\mylocation\\housing.csv', index_col=False)
# pseudo identifier
sLength = len(dataset['Housing'])
dataset.insert(0, 'uuid', pd.Series(uudi_generator(sLength), index=dataset.index))
#delete identifiabLe record from dataset
del dataset['Name']
del dataset['Neigbourhood']