As requested by Victor I'll post another answer. In this case CharSequence
is implemented as a wrapper around another CharSequence
. As the Matcher
instance requests characters the CountingCharSequence
reports to a listener interface.
It's slightly dangerous to do this as CharSequence.toString()
method returns a true String
instance which cannot be monitored. On the other hand, it seems that the current implementation is relatively simple to implement and it does work. toString()
is called, but that seems to be to populate the groups when a match has been found. Better write some unit tests around it though.
Oh, and as I have to print the "100%" mark manually there is probably a rounding error or off-by-one error. Happy debugging :P
public class RegExProgress {
// the org. LinkScanner provided by Victor
public static class LinkScanner {
private static final Pattern hrefPattern = Pattern.compile("<a\\b[^>]*href=\"(.*?)\".*?>(.*?)</a>");
public Collection<String> scan(CharSequence html) {
ArrayList<String> links = new ArrayList<>();
Matcher hrefMatcher = hrefPattern.matcher(html);
while (hrefMatcher.find()) {
String link = hrefMatcher.group(1);
links.add(link);
}
return links;
}
}
interface ProgressListener {
void listen(int characterOffset);
}
static class SyncedProgressListener implements ProgressListener {
private final int size;
private final double blockSize;
private final double percentageOfBlock;
private int block;
public SyncedProgressListener(int max, int blocks) {
this.size = max;
this.blockSize = (double) size / (double) blocks - 0.000_001d;
this.percentageOfBlock = (double) size / blockSize;
this.block = 0;
print();
}
public synchronized void listen(int characterOffset) {
if (characterOffset >= blockSize * (block + 1)) {
this.block = (int) ((double) characterOffset / blockSize);
print();
}
}
private void print() {
System.out.printf("%d%%%n", (int) (block * percentageOfBlock));
}
}
static class CountingCharSequence implements CharSequence {
private final CharSequence wrapped;
private final int start;
private final int end;
private ProgressListener progressListener;
public CountingCharSequence(CharSequence wrapped, ProgressListener progressListener) {
this.wrapped = wrapped;
this.progressListener = progressListener;
this.start = 0;
this.end = wrapped.length();
}
public CountingCharSequence(CharSequence wrapped, int start, int end, ProgressListener pl) {
this.wrapped = wrapped;
this.progressListener = pl;
this.start = start;
this.end = end;
}
@Override
public CharSequence subSequence(int start, int end) {
// this may not be needed, as charAt() has to be called eventually
System.out.printf("subSequence(%d, %d)%n", start, end);
int newStart = this.start + start;
int newEnd = this.start + end - start;
progressListener.listen(newStart);
return new CountingCharSequence(wrapped, newStart, newEnd, progressListener);
}
@Override
public int length() {
System.out.printf("length(): %d%n", end - start);
return end - start;
}
@Override
public char charAt(int index) {
//System.out.printf("charAt(%d)%n", index);
int realIndex = start + index;
progressListener.listen(realIndex);
return this.wrapped.charAt(realIndex);
}
@Override
public String toString() {
System.out.printf(" >>> toString() <<< %n", start, end);
return wrapped.toString();
}
}
public static void main(String[] args) throws Exception {
LinkScanner scanner = new LinkScanner();
String content = new String(Files.readAllBytes(Paths.get("regex - Java - How to measure a Matcher processing - Stack Overflow.htm")));
SyncedProgressListener pl = new SyncedProgressListener(content.length(), 10);
CountingCharSequence ccs = new CountingCharSequence(content, pl);
Collection<String> urls = scanner.scan(ccs);
// OK, I admit, this is because of an off-by one error
System.out.printf("100%% - %d%n", urls.size());
}
}