It seems to me that you (OP) are not very concerned with performance of the code but more interested in a potential approch to solve it without readymade functions. So here is an example I came up with to compute the longest common substring. I have to note that this only returns the first largest common substring found even when there can be several of the same length. This is something you could modify to fit your needs. And please don't expect this to be super fast - it won't.
foo <- function(str1, str2, ignore.case = FALSE, verbose = FALSE) {
if(ignore.case) {
str1 <- tolower(str1)
str2 <- tolower(str2)
}
if(nchar(str1) < nchar(str2)) {
x <- str2
str2 <- str1
str1 <- x
}
x <- strsplit(str2, "")[[1L]]
n <- length(x)
s <- sequence(seq_len(n))
s <- split(s, cumsum(s == 1L))
s <- rep(list(s), n)
for(i in seq_along(s)) {
s[[i]] <- lapply(s[[i]], function(x) {
x <- x + (i-1L)
x[x <= n]
})
s[[i]] <- unique(s[[i]])
}
s <- unlist(s, recursive = FALSE)
s <- unique(s[order(-lengths(s))])
i <- 1L
len_s <- length(s)
while(i < len_s) {
lcs <- paste(x[s[[i]]], collapse = "")
if(verbose) cat("now checking:", lcs, "\n")
check <- grepl(lcs, str1, fixed = TRUE)
if(check) {
cat("the (first) longest common substring is:", lcs, "of length", nchar(lcs), "\n")
break
} else {
i <- i + 1L
}
}
}
str1 <- 'ABCDE'
str2 <- 'CDEFG'
foo(str1, str2)
# the (first) longest common substring is: CDE of length 3
str1 <- 'ATTAGACCTG'
str2 <- 'CCTGCCGGAA'
foo(str1, str2)
# the (first) longest common substring is: CCTG of length 4
str1 <- 'foobarandfoo'
str2 <- 'barand'
foo(str1, str2)
# the (first) longest common substring is: barand of length 6
str1 <- 'EFGABCDE'
str2 <- 'ABCDECDE'
foo(str1, str2)
# the (first) longest common substring is: ABCDE of length 5
set.seed(2018)
str1 <- paste(sample(c(LETTERS, letters), 500, TRUE), collapse = "")
str2 <- paste(sample(c(LETTERS, letters), 250, TRUE), collapse = "")
foo(str1, str2, ignore.case = TRUE)
# the (first) longest common substring is: oba of length 3
foo(str1, str2, ignore.case = FALSE)
# the (first) longest common substring is: Vh of length 2