I'm trying to compare two sets of string for partial or complete overlapping substrings and then finding how much the overlap is between them. This is what I have so far but I have a few unit tests that aren't passing. Any help would be appreciated.
# The function findLargestOverlap receives two parameters target and candidate, both strings.
# If either strand is empty or the strands are not the same length, return -1.
# It should find the largest overlap and return the size of the overlap.
def findLargestOverlap(target, candidate):
if (len(target) == 0 or len(candidate) == 0) or (len(target) != len(candidate)):
return -1
elif len(target) == len(candidate):
return 0
else:
match = SequenceMatcher(None, target, candidate).find_longest_match(0, len(target), 0, len(candidate))
return len(match)
Below are my Unittests
"""
Do Not Edit this file. You may and are encouraged to look at it for reference.
"""
import unittest
import dnaSequencing
class TestFindLargestOverlap(unittest.TestCase):
def test001_findLargestOverlapExists(self):
self.assertTrue('findLargestOverlap' in dir(dnaSequencing),
'Function "findLargestOverlap" was not defined, check your spelling')
def test002_targetStrandEmpty(self):
from dnaSequencing import findLargestOverlap
self.assertEqual(findLargestOverlap('', 'bbb'), -1,
'The target strand is blank so the function should return a value of -1')
def test003_candidateStrandEmpty(self):
from dnaSequencing import findLargestOverlap
self.assertEqual(findLargestOverlap('aaa', ''), -1,
'The candidate strand is blank so the function should return a value of -1')
def test004_bothStrandsEmpty(self):
from dnaSequencing import findLargestOverlap
self.assertEqual(findLargestOverlap('', ''), -1,
'Both strands are blank so the function should return a value of -1')
def test005_strandsAreNotEqualLengths1(self):
from dnaSequencing import findLargestOverlap
self.assertEqual(findLargestOverlap('aa', 'bbb'), -1,
'The target and candidate strings are different lengths so the function should return a value of -1')
def test006_strandsAreNotEqualLengths2(self):
from dnaSequencing import findLargestOverlap
self.assertEqual(findLargestOverlap('aaa', 'bb'), -1,
'The target and candidate strings are different lengths so the function should return a value of -1')
def test007_zeroOverlap(self):
from dnaSequencing import findLargestOverlap
self.assertEqual(findLargestOverlap('aaa', 'bbb'), 0, 'There is no overlap between the strings "aaa" and "bbb"')
def test008_zeroOverlap(self):
from dnaSequencing import findLargestOverlap
self.assertEqual(findLargestOverlap('abccccba', 'bbccccbb'), 0, 'There is no overlap between the strings "abccccba" and "bbccccbb"')
def test009_partialOverlap(self):
from dnaSequencing import findLargestOverlap
self.assertEqual(findLargestOverlap('abcdefg', 'defgabc'), 4, 'There is an overlap of "defg" (4 characters)')
def test010_completeOverlap(self):
from dnaSequencing import findLargestOverlap
self.assertEqual(findLargestOverlap('aaaaaaaa', 'aaaaaaaa'), 8, 'There is an overlap of "aaaaaaaa" (8 characters)')
if __name__ == '__main__':
unittest.main()