0

I wrote the following script to anonymize e-mail addresses in a txt file:

import io, os, sys
import re

def main():

try:
    # Open the file.
    myfile = open('emails.txt', 'r')

    # Read the file's contents.
    content = myfile.read()
    content = re.sub(r'.+(?=@.+\.(com|edu))', "xxxx", content)

    myfile = open('emails.txt', 'w')
    myfile.write(content)   
    # Close the file.
    myfile.close()

except IOError:
    print('An error occured trying to read the file.')

except:
    print('An error occured.')

main()

I was wondering how I could make this work for all files in a directory and its subdirectories.

user2063763
  • 167
  • 1
  • 1
  • 4

2 Answers2

1

os.walk() is what you want. I made changes to your snippet to demonstrate:

#!/usr/bin/env python

import re
from os import walk
from os.path import join

def main():
    for (dirpath, _, filenames) in walk('/path/to/root'):
        for filename in filenames:
            # Build the path to the current file.
            path_to_file = join(dirpath, filename)
            content = None
            # Open the file.
            with open(path_to_file, 'r') as myfile:
                print 'Reading {0}'.format(path_to_file)
                # Read the file's contents.
                content = myfile.read()
                content = re.sub(r'.+(?=@.+\.(com|edu))', "xxxx", content)

            with open(path_to_file, 'w') as myfile:
                myfile.write(content)

main()
Chris
  • 3,438
  • 5
  • 25
  • 27
  • Thank you, I was missing the "path_to_file = join(dirpath, filename)". So it was working only for the current directory but not the inside ones. – user2063763 Mar 06 '13 at 01:12
0

Using glob.glob

import io, os, sys
import re
import glob

def main():
    try:
        # Open the file.
        for f in glob.iglob('/path/to/root/*'):
            if not os.path.isfile(f):
                continue
            myfile = open(f, 'r')

            # Read the file's contents.
            content = myfile.read()
            content = re.sub(r'.+(?=@.+\.(com|edu))', "xxxx", content)

            myfile = open(f.replace('.txt', '.new.txt'), 'w')
            myfile.write(content)
            # Close the file.
            myfile.close()

        except IOError:
            print('An error occured trying to read the file.')
        except:
            print('An error occured.')

main()
shantanoo
  • 3,617
  • 1
  • 24
  • 37