-1

I have a python script that performs an sql query and writes the output of the query to a .json file. However, every time it writes to the json file for me it overwrites the previously written text. I want each sql query to be written to a new and separate .json. Below is my code that is not working. Any help would be greatly appreciated!

from __future__ import print_function

try:
    import psycopg2
except ImportError:
    raise ImportError('\n\033[33mpsycopg2 library missing. pip install psycopg2\033[1;m\n')
    sys.exit(1)

import re
import sys
import json

DB_HOST = 'crt.sh'
DB_NAME = 'certwatch'
DB_USER = 'guest'
OUTPUT_DIR="output/"

def connect_to_db(domain_name):
    try:
        conn = psycopg2.connect("dbname={0} user={1} host={2}".format(DB_NAME, DB_USER, DB_HOST))
        cursor = conn.cursor()
        cursor.execute("SELECT ci.NAME_VALUE NAME_VALUE FROM certificate_identity ci WHERE ci.NAME_TYPE = 'dNSName' AND reverse(lower(ci.NAME_VALUE)) LIKE reverse(lower('%{}'));".format(domain_name))
    except:
        print("\n\033[1;31m[!] Unable to connect to the database\n\033[1;m")
    return cursor


def get_unique_emails(cursor, domain_name):
    unique_emails = []
    for result in cursor.fetchall():
        matches=re.findall(r"\'(.+?)\'",str(result))
        for email in matches:
            if email not in unique_emails:
                if "{}".format(domain_name) in email:
                    unique_emails.append(email)
    return unique_emails


def print_unique_emails(unique_emails):
    print("\033[1;32m[+] Total unique emails found: {}\033[1;m".format(len(unique_emails)))
    for unique_email in sorted(unique_emails):
        print(unique_email)


if __name__ == '__main__':
    filepath = 'test.txt'
    with open(filepath) as fp:
        for cnt, domain_name in enumerate(fp):
            print("Line {}: {}".format(cnt, domain_name))
            print(domain_name)

        domain_name = domain_name.rstrip()
        cursor = connect_to_db(domain_name)
        unique_emails = get_unique_emails(cursor, domain_name)
        print_unique_emails(unique_emails)
        outfilepath = OUTPUT_DIR + unique_emails + ".json"
        with open(outfilepath, 'w') as outfile:
            outfile.write(json.dumps(unique_emails, sort_keys=True, indent=4))
bedford
  • 181
  • 1
  • 13
  • 2
    Use append instead of write. i.e 'a' instead of 'w' in your file writer. – sjaymj62 Jul 20 '18 at 14:00
  • @sjaymj62 Thanks for your help! How do I configure my code to write each query to a separate .json file? For example, if I query google.com and apple.com I want to have google.com.json and apple.com.json files. Your assistance is greatly appreciated! – bedford Jul 20 '18 at 17:10

2 Answers2

2
with open(outfilepath, 'w') as outfile:
    outfile.write(json.dumps(unique_emails, sort_keys=True, indent=4))

You are currently opening the file to write. You want to append to the file. You can do this by changing w to a

with open(outfilepath, 'a') as outfile:
    outfile.write(json.dumps(unique_emails, sort_keys=True, indent=4))

You can read the documentation on open() here.

ltd9938
  • 1,444
  • 1
  • 15
  • 29
  • Thanks for your help! How do I configure my code to write each query to a separate .json file? For example, if I query google.com and apple.com I want to have google.com.json and apple.com.json files. Your assistance is greatly appreciated! – bedford Jul 20 '18 at 17:11
  • @bedford Just change the the value of `outputfilepath` to whatever you want the file to be called. – ltd9938 Jul 20 '18 at 17:14
1

I think it's because your not looping when your writing the json file, you have a single write, so it just writes to the one file. So you need to do something like what you did when you ... enumerate(fp):. Make another for loop, looping over each domain, and change your OUTPUT_DIR + unique_emails + ".json" to be OUTPUT_DIR + domain_name + ".json".

DanB
  • 59
  • 3