Use a tree structure to hold the substrings per codepoint. This eliminates the need to
Note that this is efficient only if the needle set is almost constant. It is not inefficient if there are individual additions or removals of substrings though, but a different initialization each time to arrange a lot of strings into a tree structure would definitely slower it.
StringSearcher
:
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
class StringSearcher{
private NeedleTree needles = new NeedleTree(-1);
private boolean caseSensitive;
private List<Integer> lengths = new ArrayList<>();
private int maxLength;
public StringSearcher(List<String> inputs, boolean caseSensitive){
this.caseSensitive = caseSensitive;
for(String input : inputs){
if(!lengths.contains(input.length())){
lengths.add(input.length());
}
NeedleTree tree = needles;
for(int i = 0; i < input.length(); i++){
tree = tree.child(caseSensitive ? input.codePointat(i) : Character.toLowerCase(input.codePointAt(i)));
}
tree.markSelfSet();
}
maxLength = Collections.max(legnths);
}
public boolean matches(String haystack){
if(!caseSensitive){
haystack = haystack.toLowerCase();
}
for(int i = 0; i < haystack.length(); i++){
String substring = haystack.substring(i, i + maxLength); // maybe we can even skip this and use from haystack directly?
NeedleTree tree = needles;
for(int j = 0; j < substring.maxLength; j++){
tree = tree.childOrNull(substring.codePointAt(j));
if(tree == null){
break;
}
if(tree.isSelfSet()){
return true;
}
}
}
return false;
}
}
NeedleTree.java
:
import java.util.HashMap;
import java.util.Map;
class NeedleTree{
private int codePoint;
private boolean selfSet;
private Map<Integer, NeedleTree> children = new HashMap<>();
public NeedleTree(int codePoint){
this.codePoint = codePoint;
}
public NeedleTree childOrNull(int codePoint){
return children.get(codePoint);
}
public NeedleTree child(int codePoint){
NeedleTree child = children.get(codePoint);
if(child == null){
child = children.put(codePoint, new NeedleTree(codePoint));
}
return child;
}
public boolean isSelfSet(){
return selfSet;
}
public void markSelfSet(){
selfSet = true;
}
}