0

I'm trying to compare two sets of string for partial or complete overlapping substrings and then finding how much the overlap is between them. This is what I have so far but I have a few unit tests that aren't passing. Any help would be appreciated.

# The function findLargestOverlap receives two parameters target and candidate, both strings.
# If either strand is empty or the strands are not the same length, return -1.
# It should find the largest overlap and return the size of the overlap.

def findLargestOverlap(target, candidate):
    if (len(target) == 0 or len(candidate) == 0) or (len(target) != len(candidate)):
        return -1
    elif len(target) == len(candidate):
        return 0
    else:
        match = SequenceMatcher(None, target, candidate).find_longest_match(0, len(target), 0, len(candidate))
        return len(match)

Below are my Unittests

"""
Do Not Edit this file. You may and are encouraged to look at it for reference.
"""

import unittest
import dnaSequencing

class TestFindLargestOverlap(unittest.TestCase):
    def test001_findLargestOverlapExists(self):
        self.assertTrue('findLargestOverlap' in dir(dnaSequencing),
                    'Function "findLargestOverlap" was not defined, check your spelling')

def test002_targetStrandEmpty(self):
    from dnaSequencing import findLargestOverlap
    self.assertEqual(findLargestOverlap('', 'bbb'), -1,
                     'The target strand is blank so the function should return a value of -1')

def test003_candidateStrandEmpty(self):
    from dnaSequencing import findLargestOverlap
    self.assertEqual(findLargestOverlap('aaa', ''), -1,
                     'The candidate strand is blank so the function should return a value of -1')

def test004_bothStrandsEmpty(self):
    from dnaSequencing import findLargestOverlap
    self.assertEqual(findLargestOverlap('', ''), -1,
                     'Both strands are blank so the function should return a value of -1')

def test005_strandsAreNotEqualLengths1(self):
    from dnaSequencing import findLargestOverlap
    self.assertEqual(findLargestOverlap('aa', 'bbb'), -1,
                     'The target and candidate strings are different lengths so the function should return a value of -1')

def test006_strandsAreNotEqualLengths2(self):
    from dnaSequencing import findLargestOverlap
    self.assertEqual(findLargestOverlap('aaa', 'bb'), -1,
                     'The target and candidate strings are different lengths so the function should return a value of -1')

def test007_zeroOverlap(self):
    from dnaSequencing import findLargestOverlap
    self.assertEqual(findLargestOverlap('aaa', 'bbb'), 0, 'There is no overlap between the strings "aaa" and "bbb"')

def test008_zeroOverlap(self):
    from dnaSequencing import findLargestOverlap
    self.assertEqual(findLargestOverlap('abccccba', 'bbccccbb'), 0, 'There is no overlap between the strings "abccccba" and "bbccccbb"')

def test009_partialOverlap(self):
    from dnaSequencing import findLargestOverlap
    self.assertEqual(findLargestOverlap('abcdefg', 'defgabc'), 4, 'There is an overlap of "defg" (4 characters)')

def test010_completeOverlap(self):
    from dnaSequencing import findLargestOverlap
    self.assertEqual(findLargestOverlap('aaaaaaaa', 'aaaaaaaa'), 8, 'There is an overlap of "aaaaaaaa" (8 characters)')

if __name__ == '__main__':
    unittest.main()
Lynds.
  • 127
  • 12
  • Just a note: Your comparison can simplify `(len(target) == 0 or len(candidate) == 0)` to `not (target and candidate)` which will evaluate `True` if they are both non-empty. Also, any time `target` and `candidate` are the same length, your code will go to the `elif` and return `0` (but that may be your intention). – Zach Gates Aug 30 '17 at 04:36
  • Can you provide some example data and describe what test-cases are not passing. – AChampion Aug 30 '17 at 04:47
  • I've added my tests. – Lynds. Aug 30 '17 at 05:05
  • I think the elif check for the same length returning 0 will fail test_010 – brennan Aug 30 '17 at 05:34
  • Possible duplicate of [Find the longest substring in two genomic sequence](https://stackoverflow.com/questions/45919830/find-the-longest-substring-in-two-genomic-sequence) – Joe Aug 30 '17 at 07:13

0 Answers0