I am trying to write an AES script that takes the name of a few columns and encrypts then and generates a new csv file with encrypted columns.
The script can also take a password and encrypted csv file to decrypt it.
The decryption part of the code isn't working.
I have tried playing around with encode and decode. And checking for logic errors.
import pandas as pd
import sys
import base64
from Crypto.Cipher import AES
from Crypto import Random
from Crypto.Protocol.KDF import PBKDF2
from Crypto.Util.Padding import pad, unpad
import os
import time
import getpass
'''
# AES 256 encryption/decryption using pycrypto library
Credits: https://www.quickprogrammingtips.com/python/aes-256-encryption-and-decryption-in-python.html
https://stackoverflow.com/questions/12524994/encrypt-decrypt-using-pycrypto-aes-256
'''
BLOCK_SIZE = 16
def get_private_key_salt(password, salt=None):
if salt == None:
salt = os.urandom(16)
kdf = PBKDF2(password, salt, 64, 100000)
key = kdf[:32]
return [key,salt]
def encrypt(raw, private_key):
iv = os.urandom(AES.block_size)
cipher = AES.new(private_key, AES.MODE_CBC, iv)
return base64.b64encode(iv + cipher.encrypt(pad(raw.encode('utf-8'),BLOCK_SIZE)))
def decrypt(enc, private_key):
enc = base64.b64decode(enc)
iv = enc[:16]
cipher = AES.new(private_key, AES.MODE_CBC, iv)
#line56
return unpad(cipher.decrypt(enc[16:]),BLOCK_SIZE)
def encrypt_cols(encryption_dict,password):
encrypted_dict = {}
encrypted_dict['salt'] = []
for dict_key in encryption_dict.keys():
encrypted_dict[dict_key] = []
for index in range(0,len(encryption_dict[dict_key])):
try:
key_salt_pair = get_private_key_salt(password,encrypted_dict['salt'][index])
except IndexError:
key_salt_pair = get_private_key_salt(password)
encrypted_dict['salt'].append(key_salt_pair[1])
cipher_text = encrypt(str(encryption_dict[dict_key][index]), key_salt_pair[0])
encrypted_dict[dict_key].append(cipher_text.decode("utf-8"))
return encrypted_dict
def decrypt_cols(encrypted_dict,password):
decrypted_dict = {}
decrypted_dict['salt'] = encrypted_dict['salt']
for dict_key in encrypted_dict.keys():
decrypted_dict[dict_key] = []
if dict_key != 'salt':
for index in range(0,len(encrypted_dict[dict_key])):
key_salt_pair = get_private_key_salt(password,encrypted_dict['salt'][index])
cipher_text = encrypted_dict[dict_key][index].encode('utf-8')
#line88
plain_text = decrypt(cipher_text, key_salt_pair[0]).decode('utf-8')
decrypted_dict[dict_key].append(plain_text)
return decrypted_dict
def prep_encryption_cols(encrypt_list):
encryption_dict = {}
for col_name in encrypt_list:
try:
encryption_dict[col_name]=df[col_name]
except KeyError:
print('No column with name \''+col_name+'\' Found. De-identification Failed!')
sys.exit()
return encryption_dict
def encryption(df, encrypt_list,password):
encryption_dict = prep_encryption_cols(encrypt_list)
encrypted_dict = encrypt_cols(encryption_dict,password)
for key in encrypted_dict.keys():
df[key] = encrypted_dict[key]
def decryption(df, decrypt_list, password):
encrypted_dict = {}
for col in decrypt_list:
encrypted_dict[col] = df[col]
encrypted_dict['salt'] = df['salt']
decrypted_dict = decrypt_cols(encrypted_dict,password)
del df['salt']
for key in decrypted_dict.keys():
if key != 'salt':
df[key] = decrypted_dict[key]
password = getpass.getpass(prompt='Type Password Here:')
'''
ENCRYPTION
'''
start = time.time()
#read csv file
data = pd.read_csv('test.csv')
#convert it into dataframe
df = pd.DataFrame(data)
encrypt_list = ['FIELD2', 'FIELD3']
del_list = ['FIELD4','FIELD5']
encryption(df, encrypt_list, password)
df.to_csv('encrypted_test.csv')
end = time.time()
print('It took '+str(end - start)+' seconds to encrypt')
'''
DECRYPTION
'''
start = time.time()
data = pd.read_csv('encrypted_test.csv')
df = pd.DataFrame(data)
decrypt_list = ['FIELD2', 'FIELD3']
decryption(df, decrypt_list, password)
df.to_csv('decrypted_test.csv')
end = time.time()
print('It took '+str(end - start)+' seconds to decrypt')
I get this error when I store the encrypted data in csv file and then read it for decryption
C:\Users\user\Desktop\>python3 clean.py
It took 0.06599974632263184 seconds to encrypt
Traceback (most recent call last):
File "clean.py", line 167, in <module>
decryption(df, decrypt_list, password)
File "clean.py", line 128, in decryption
decrypted_dict = decrypt_cols(encrypted_dict,password)
File "clean.py", line 88, in decrypt_cols
plain_text = decrypt(cipher_text, key_salt_pair[0]).decode('utf-8')
File "clean.py", line 56, in decrypt
return unpad(cipher.decrypt(enc[16:]),BLOCK_SIZE)
File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\Crypto\Util\Padding.py", line 90, in unpad
raise ValueError("Padding is incorrect.")
ValueError: Padding is incorrect.
The program performs fine if I directly use the dataframe to perform decryption without storing it in csv file.
Thank you for your time.