It took me a little bit to build a test that I would be fairly comfortable with; to actually judge the numbers I would get...
@BenchmarkMode(value = { Mode.AverageTime })
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
@State(Scope.Benchmark)
public class StreamVsLoop {
public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder().include(StreamVsLoop.class.getSimpleName())
.jvmArgs("-ea")
.shouldFailOnError(true)
.build();
new Runner(opt).run();
}
@State(Scope.Thread)
public static class StringInput {
private String[] letters = { "q", "a", "z", "w", "s", "x", "e", "d", "c", "r", "f", "v", "t", "g", "b",
"y", "h", "n", "u", "j", "m", "i", "k", "o", "l", "p" };
public String s = "";
@Param(value = { "1000", "10000", "100000" })
int next;
@TearDown(Level.Iteration)
public void tearDown() {
if (next == 1000) {
long count = Arrays.stream(s.split(",")).filter(x -> x.length() == 5).count();
assert count == 99;
}
if (next == 10000) {
long count = Arrays.stream(s.split(",")).filter(x -> x.length() == 5).count();
assert count == 999;
}
if (next == 100000) {
long count = Arrays.stream(s.split(",")).filter(x -> x.length() == 5).count();
assert count == 9999;
}
s = null;
}
/**
* a very brute-force tentative to have 1/2 elements to be filtered and 1/2 not
* highly inneficiant, but this is not part of the measurment, so who cares?
*/
@Setup(Level.Iteration)
public void setUp() {
for (int i = 0; i < next; i++) {
int index = ThreadLocalRandom.current().nextInt(0, letters.length);
String letter = letters[index];
if (next == 1000) {
if (i < 500 && i % 4 == 0) {
s = s + "," + letter;
} else if (i > 500 && i % 5 == 0) {
s = s + "," + letter;
} else {
s = s + letter;
}
} else if (next == 10000) {
if (i < 5000 && i % 4 == 0) {
s = s + "," + letter;
} else if (i > 5000 && i % 5 == 0) {
s = s + "," + letter;
} else {
s = s + letter;
}
} else if (next == 100000) {
if (i < 50000 && i % 4 == 0) {
s = s + "," + letter;
} else if (i > 50000 && i % 5 == 0) {
s = s + "," + letter;
} else {
s = s + letter;
}
}
}
}
}
@Benchmark
@Fork
public String stream(StringInput si) {
Stream<String> tokens = Arrays.stream(si.s.split(","));
return tokens.filter(t -> t.length() != 4)
.map(String::toUpperCase)
.sorted().distinct()
.collect(Collectors.joining(","));
}
@Benchmark
@Fork(1)
public String loop(StringInput si) {
String[] tokens = si.s.split(",");
Set<String> resultSet = new TreeSet<>();
for (String t : tokens) {
if (t.length() != 4) {
resultSet.add(t.toUpperCase());
}
}
return String.join(",", resultSet);
}
@Benchmark
@Fork(1)
public String sortedDistinct(StringInput si) {
return Pattern.compile(",").splitAsStream(si.s)
.filter(t -> t.length() != 4)
.map(String::toUpperCase)
.sorted()
.distinct()
.collect(Collectors.joining(","));
}
@Benchmark
@Fork(1)
public String distinctSorted(StringInput si) {
return Pattern.compile(",").splitAsStream(si.s)
.filter(t -> t.length() != 4)
.map(String::toUpperCase)
.distinct()
.sorted()
.collect(Collectors.joining(","));
}
@Benchmark
@Fork(1)
public String variant3(StringInput si) {
String s = si.s;
Set<String> resultSet = new TreeSet<>();
int o = 0, p;
for (p = s.indexOf(','); p >= 0; p = s.indexOf(',', o = p + 1)) {
if (p - o == 4) {
continue;
}
resultSet.add(s.substring(o, p).toUpperCase());
}
if (s.length() - o != 4) {
resultSet.add(s.substring(o).toUpperCase());
}
return String.join(",", resultSet);
}
}
streamvsLoop.StreamVsLoop.distinctSorted 1000 0.028
streamvsLoop.StreamVsLoop.sortedDistinct 1000 0.024
streamvsLoop.StreamVsLoop.loop 1000 0.016
streamvsLoop.StreamVsLoop.stream 1000 0.020
streamvsLoop.StreamVsLoop.variant3 1000 0.012
streamvsLoop.StreamVsLoop.distinctSorted 10000 0.394
streamvsLoop.StreamVsLoop.sortedDistinct 10000 0.359
streamvsLoop.StreamVsLoop.loop 10000 0.274
streamvsLoop.StreamVsLoop.stream 10000 0.304 ± 0.006
streamvsLoop.StreamVsLoop.variant3 10000 0.234
streamvsLoop.StreamVsLoop.distinctSorted 100000 4.950
streamvsLoop.StreamVsLoop.sortedDistinct 100000 4.432
streamvsLoop.StreamVsLoop.loop 100000 5.457
streamvsLoop.StreamVsLoop.stream 100000 3.927 ± 0.048
streamvsLoop.StreamVsLoop.variant3 100000 3.595
Holger's method wins, but boy is the difference small between the other solutions, once the code is hot enough.