You should figure out a correct specification for the files you're trying to match before coding it up. The pseudo-regexps you gave for the filenames you are trying to match ("[number][a-z]
or [a-z][number]
") don't even include the examples you gave, such as 0-file
.
Simple version
However, taking your stated specification at face value, assuming you wish to include uppercase Latin letters as well, here's a simple function that will match [number][a-z]
or [a-z][number]
, and return the appropriate prefix, suffix, and number of numeric digits.
import re
def find_number_in_filename(fn):
m = re.match(r"(\d+)([A-Za-z]+)$", fn)
if m:
prefix, suffix, num_length = "", m.group(2), len(m.group(1))
return prefix, suffix, num_length
m = re.match(r"([A-Za-z]+)(\d+)$", fn)
if m:
prefix, suffix, num_length = m.group(1), "", len(m.group(2))
return prefix, suffix, num_length
return fn, "", 0
example_fn = ("000foo", "bar14", "baz0", "file10name")
for fn in example_fn:
prefix, suffix, num_length = find_number_in_filename(fn)
if num_length == 0:
print "%s: does not match" % fn
else:
print "%s -> %s[%d-digits]%s" % (fn, prefix, num_length, suffix)
all_numbered_versions = [("%s%0"+str(num_length)+"d%s") % (prefix, ii, suffix) for ii in range(0,10**num_length)]
print "\t", all_numbered_versions[0], "through", all_numbered_versions[-1]
The output will be:
000foo -> [3-digits]foo
000foo through 999foo
bar14 -> bar[2-digits]
bar00 through bar99
baz0 -> baz[1-digits]
baz0 through baz9
file10name: does not match
Notice that I'm using a standard printf
-style string format to convert numbers to 0-padded strings, e.g. %03d
for 3-digit numbers with 0-padding. Using the newer str.format
may be preferable for future-proofing.
Handle full paths and extensions gracefully
If your input includes full paths and filenames with extensions (e.g. /home/someone/project/foo000.txt
) and you want to match based on the last piece of the path only, then use os.path.split
and .splitext
to do the trick.
UPDATE: fixed missing path separator
import re
import os.path
def find_number_in_filename(path):
# remove the path and the extension
head, tail = os.path.split(path)
head = os.path.join(head, "") # include / or \ on the end of head if it's missing
fn, ext = os.path.splitext(tail)
m = re.match(r"(\d+)([A-Za-z]+)$", fn)
if m:
prefix, suffix, num_length = head, m.group(2)+ext, len(m.group(1))
return prefix, suffix, num_length
m = re.match(r"([A-Za-z]+)(\d+)$", fn)
if m:
prefix, suffix, num_length = head+m.group(1), ext, len(m.group(2))
return prefix, suffix, num_length
return path, "", 0
example_paths = ("/tmp/bar14.so", "/home/someone/0000baz.txt", "/home/someone/baz00bar.zip")
for path in example_paths:
prefix, suffix, num_length = find_number_in_filename(path)
if num_length == 0:
print "%s: does not match" % path
else:
print "%s -> %s[%d-digits]%s" % (path, prefix, num_length, suffix)
all_numbered_versions = [("%s%0"+str(num_length)+"d%s") % (prefix, ii, suffix) for ii in range(0,10**num_length)]
print "\t", all_numbered_versions[0], "through", all_numbered_versions[-1]