I have a python script that I am using using to make sql queries. The problem is that my VM only has 512mbs of RAM and some of the sql queries are too RAM intensive and therefore the kernel automatically kills the script. How can I make this code more RAM efficient? One idea is to more aggressively write the data out to disk instead of accumulating it in RAM. Does anyone know an easy implementation of that? I would greatly appreciate your help!
Code
from __future__ import print_function
try:
import psycopg2
except ImportError:
raise ImportError('\n\033[33mpsycopg2 library missing. pip install psycopg2\033[1;m\n')
sys.exit(1)
import re
import sys
import json
import pprint
import time
outfilepath = "crtsh_output/crtsh_flat_file"
DB_HOST = 'crt.sh'
DB_NAME = 'certwatch'
DB_USER = 'guest'
# DELAY = 0
def connect_to_db():
filepath = 'forager.txt'
with open(filepath) as fp:
unique_domains = ''
try:
conn = psycopg2.connect("dbname={0} user={1} host={2}".format(DB_NAME, DB_USER, DB_HOST))
cursor = conn.cursor()
for cnt, domain_name in enumerate(fp):
print("Line {}: {}".format(cnt, domain_name))
print(domain_name)
domain_name = domain_name.rstrip()
cursor.execute('''SELECT c.id, x509_commonName(c.certificate), x509_issuerName(c.certificate), x509_notBefore(c.certificate), x509_notAfter(c.certificate), x509_issuerName(c.certificate), x509_keyAlgorithm(c.certificate), x509_keySize(c.certificate), x509_publicKeyMD5(c.certificate), x509_publicKey(c.certificate), x509_rsaModulus(c.certificate), x509_serialNumber(c.certificate), x509_signatureHashAlgorithm(c.certificate), x509_signatureKeyAlgorithm(c.certificate), x509_subjectName(c.certificate), x509_name(c.certificate), x509_name_print(c.certificate), x509_commonName(c.certificate), x509_subjectKeyIdentifier(c.certificate), x509_extKeyUsages(c.certificate), x509_certPolicies(c.certificate), x509_canIssueCerts(c.certificate), x509_getPathLenConstraint(c.certificate), x509_altNames(c.certificate), x509_altNames_raw(c.certificate), x509_cRLDistributionPoints(c.certificate), x509_authorityInfoAccess(c.certificate), x509_print(c.certificate), x509_anyNamesWithNULs(c.certificate), x509_extensions(c.certificate), x509_tbscert_strip_ct_ext(c.certificate), x509_hasROCAFingerprint(c.certificate)
FROM certificate c, certificate_identity ci WHERE
c.id= ci.certificate_id AND ci.name_type = 'dNSName' AND lower(ci.name_value) =
lower(%s) AND x509_notAfter(c.certificate) > statement_timestamp()''', (domain_name,))
unique_domains = cursor.fetchall()
pprint.pprint(unique_domains)
outfilepath = "crtsh2" + ".json"
with open(outfilepath, 'a') as outfile:
outfile.write(json.dumps(unique_domains, sort_keys=True, indent=4, default=str, ensure_ascii = False))
# time.sleep(DELAY)
# conn.rollback()
except Exception as error:
# print("\n\033[1;31m[!] Unable to connect to the database\n\033[1;m")
# if tries < 3:
time.sleep(1) # give the DB a bit to recover if you want
# connect_to_db(tries+1)
# else:
raise error
if __name__ == "__main__":
connect_to_db()