The arguments to a method invocation can have side effects, e.g method(variable = value)
, which might even be impossible to remove, e.g. when would lead to accessing an uninitialized variable after the removed invocation. On the bytecode level, the instructions belonging to the argument evaluation can be interleaved with arbitrary unrelated instructions.
But when we limit the scope, we can have a solution. In your example, all invocations are invokevirtual
instructions invoked on either, the implied this
or the value of a static
field. For these invocations, we can indeed use ASM’s Analyzer
with SourceInterpreter
to identify the initial aload
or getstatic
instruction and assume all instruction from this one and the invocation instruction as belonging to the method call expression.
We can use code like
public class IdentifyCall {
static IdentifyCall getInputs(
String internalClassName, MethodNode toAnalyze) throws AnalyzerException {
Map<AbstractInsnNode, Set<AbstractInsnNode>> sources = new HashMap<>();
SourceInterpreter i = new SourceInterpreter();
Analyzer<SourceValue> analyzer = new Analyzer<>(i);
return new IdentifyCall(toAnalyze.instructions, analyzer.analyze(internalClassName, toAnalyze));
}
private final InsnList instructions;
private final Frame<SourceValue>[] frames;
private IdentifyCall(InsnList il, Frame<SourceValue>[] analyzed) {
instructions = il;
frames = analyzed;
}
int[] getSpan(AbstractInsnNode i) {
MethodInsnNode mn = (MethodInsnNode)i;
// can't use getArgumentsAndReturnSizes, as for the frame, double and long do not count as 2
int nArg = mn.desc.startsWith("()")? 0: Type.getArgumentTypes(mn.desc).length;
int end = instructions.indexOf(mn);
Frame<SourceValue> f = frames[end];
SourceValue receiver = f.getStack(f.getStackSize() - nArg - 1);
if(receiver.insns.size() != 1) throw new UnsupportedOperationException();
AbstractInsnNode n = receiver.insns.iterator().next();
if(n.getOpcode() != Opcodes.ALOAD && n.getOpcode() != Opcodes.GETSTATIC)
throw new UnsupportedOperationException(""+n.getOpcode());
return new int[] { instructions.indexOf(n), end };
}
}
and demonstrate it with the following example
public class IdentifyCallExample {
private void toAnalyze() {
System.out.println(
anotherMethod1(
anotherMethod2("a", "b") ?
"c" : anotherMethod3("d", "e") ? "f" : "g",
anotherMethod4("h", "i") ? "j" : "k"
) ? "l" : "m"
);
}
boolean anotherMethod1(String str, String oof) {
return true;
}
boolean anotherMethod2(String str, String oof) {
return true;
}
boolean anotherMethod3(String str, String oof) {
return true;
}
boolean anotherMethod4(String str, String oof) {
return true;
}
public static void main(String[] args) throws AnalyzerException, IOException {
Class<?> me = MethodHandles.lookup().lookupClass();
ClassReader r = new ClassReader(me.getResourceAsStream(me.getSimpleName()+".class"));
ClassNode cn = new ClassNode();
r.accept(cn, ClassReader.SKIP_DEBUG|ClassReader.SKIP_FRAMES);
MethodNode toAnalyze = null;
for(MethodNode mn: cn.methods)
if(mn.name.equals("toAnalyze")) {
toAnalyze = mn;
break;
}
List<int[]> invocations = new ArrayList<>();
final InsnList instructions = toAnalyze.instructions;
IdentifyCall identifyCall
= IdentifyCall.getInputs(me.getName().replace('.', '/'), toAnalyze);
for(int ix = 0, num = instructions.size(); ix < num; ix++) {
AbstractInsnNode instr = instructions.get(ix);
if(instr.getOpcode()!= Opcodes.INVOKEVIRTUAL) continue;
invocations.add(identifyCall.getSpan(instr));
}
printIt(invocations, instructions);
}
private static void printIt(List<int[]> invocations, final InsnList instructions) {
List<Level> levels = toTree(invocations);
Textifier toText = new Textifier();
TraceMethodVisitor tmv = new TraceMethodVisitor(toText);
for(int ix = 0, num = instructions.size(); ix < num; ix++) {
AbstractInsnNode instr = instructions.get(ix);
boolean line = false;
level: for(Level l: levels) {
if(ix >= l.lo && ix <= l.hi) {
for(int[] b: l.branches) {
if(ix < b[0] || ix > b[1]) continue;
System.out.print(line?
(b[0] == ix? b[1] == ix? "─[": "┬─": b[1] == ix? "┴─": "┼─"):
(b[0] == ix? b[1] == ix? " [": "┌─": b[1] == ix? "└─": "│ "));
line |= b[0] == ix || b[1] == ix;
continue level;
}
}
System.out.print(line? "──": " ");
}
instr.accept(tmv);
System.out.print(toText.text.get(0));
toText.text.clear();
}
}
static class Level {
int lo, hi;
ArrayDeque<int[]> branches=new ArrayDeque<>();
Level(int[] b) { lo=b[0]; hi=b[1]; branches.add(b); }
boolean insert(int[] b) {
if(b[1]<=lo) { branches.addFirst(b); lo=b[0]; }
else if(b[0]>=hi) { branches.addLast(b); hi=b[1]; }
else return b[0]>lo && b[1] < hi
&& (b[0]+b[1])>>1 > (lo+hi)>>1? tryTail(b, lo, hi): tryHead(b, lo, hi);
return true;
}
private boolean tryHead(int[] b, int lo, int hi) {
int[] head=branches.removeFirst();
try {
if(head[1] > b[0]) return false;
if(branches.isEmpty() || (lo=branches.getFirst()[0])>=b[1]) {
branches.addFirst(b);
return true;
}
else return b[0]>lo && b[1] < hi
&& (b[0]+b[1])>>1 > (lo+hi)>>1? tryTail(b, lo, hi): tryHead(b, lo, hi);
} finally { branches.addFirst(head); }
}
private boolean tryTail(int[] b, int lo, int hi) {
int[] tail=branches.removeLast();
try {
if(tail[0] < b[1]) return false;
if(branches.isEmpty() || (hi=branches.getLast()[1])<=b[0]) {
branches.addLast(b);
return true;
}
else return b[0]>lo && b[1] < hi
&& (b[0]+b[1])>>1 > (lo+hi)>>1? tryTail(b, lo, hi): tryHead(b, lo, hi);
} finally { branches.addLast(tail); }
}
}
static List<Level> toTree(List<int[]> list) {
if(list.isEmpty()) return Collections.emptyList();
if(list.size()==1) return Collections.singletonList(new Level(list.get(0)));
list.sort(Comparator.comparingInt(b -> b[1] - b[0]));
ArrayList<Level> l=new ArrayList<>();
insert: for(int[] b: list) {
for(Level level: l) if(level.insert(b)) continue insert;
l.add(new Level(b));
}
if(l.size() > 1) Collections.reverse(l);
return l;
}
}
which will print
┌───── GETSTATIC java/lang/System.out : Ljava/io/PrintStream;
│ ┌─── ALOAD 0
│ │ ┌─ ALOAD 0
│ │ │ LDC "a"
│ │ │ LDC "b"
│ │ └─ INVOKEVIRTUAL simple/IdentifyCallExample.anotherMethod2 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L0
│ │ LDC "c"
│ │ GOTO L1
│ │ L0
│ │ ┌─ ALOAD 0
│ │ │ LDC "d"
│ │ │ LDC "e"
│ │ └─ INVOKEVIRTUAL simple/IdentifyCallExample.anotherMethod3 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L2
│ │ LDC "f"
│ │ GOTO L1
│ │ L2
│ │ LDC "g"
│ │ L1
│ │ ┌─ ALOAD 0
│ │ │ LDC "h"
│ │ │ LDC "i"
│ │ └─ INVOKEVIRTUAL simple/IdentifyCallExample.anotherMethod4 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L3
│ │ LDC "j"
│ │ GOTO L4
│ │ L3
│ │ LDC "k"
│ │ L4
│ └─── INVOKEVIRTUAL simple/IdentifyCallExample.anotherMethod1 (Ljava/lang/String;Ljava/lang/String;)Z
│ IFEQ L5
│ LDC "l"
│ GOTO L6
│ L5
│ LDC "m"
│ L6
└───── INVOKEVIRTUAL java/io/PrintStream.println (Ljava/lang/String;)V
RETURN
When we want to support more complex receiver expressions or static
methods, whose first argument can be an arbitrary expression, things get more complicated. The Frame<SourceValue>
allows us to identify the instructions which pushed the current values to the operand stack, but in case of an expression like a + b
, it would be the iadd
instruction only and we have to analyze the iadd
instruction’s frame to get its inputs. Instead of implementing this for every kind of instruction, it’s easier to extend the interpreter, to get the information and store it, e.g in a Map
, as Analyzer
has done this work already. Then, we can collect all inputs recursively.
But this only provides the direct and indirect input sources, but in case of conditional expressions, we also need the input(s) to the condition. For this, we have to identify and store the conditional branches. Whenever an input is reported to potentially originate from different source instructions, we have to check the related branches and add their conditions.
Then we use again the simplifying assumption, that all instructions between the first and last also belong to the invocation expression.
The more elaborated code looks like
public class IdentifyCall {
private final InsnList instructions;
private final Map<AbstractInsnNode, Set<SourceValue>> sources;
private final TreeMap<int[],AbstractInsnNode> conditionals;
private IdentifyCall(InsnList il,
Map<AbstractInsnNode, Set<SourceValue>> s, TreeMap<int[], AbstractInsnNode> c) {
instructions = il;
sources = s;
conditionals = c;
}
Set<AbstractInsnNode> getAllInputsOf(AbstractInsnNode instr) {
Set<AbstractInsnNode> source = new HashSet<>();
List<SourceValue> pending = new ArrayList<>(sources.get(instr));
for (int pIx = 0; pIx < pending.size(); pIx++) {
SourceValue sv = pending.get(pIx);
final boolean branch = sv.insns.size() > 1;
for(AbstractInsnNode in: sv.insns) {
if(source.add(in))
pending.addAll(sources.getOrDefault(in, Collections.emptySet()));
if(branch) {
int ix = instructions.indexOf(in);
conditionals.forEach((b,i) -> {
if(b[0] <= ix && b[1] >= ix && source.add(i))
pending.addAll(sources.getOrDefault(i, Collections.emptySet()));
});
}
}
}
return source;
}
static IdentifyCall getInputs(
String internalClassName, MethodNode toAnalyze) throws AnalyzerException {
InsnList instructions = toAnalyze.instructions;
Map<AbstractInsnNode, Set<SourceValue>> sources = new HashMap<>();
SourceInterpreter i = new SourceInterpreter() {
@Override
public SourceValue unaryOperation(AbstractInsnNode insn, SourceValue value) {
sources.computeIfAbsent(insn, x -> new HashSet<>()).add(value);
return super.unaryOperation(insn, value);
}
@Override
public SourceValue binaryOperation(AbstractInsnNode insn, SourceValue v1, SourceValue v2) {
addAll(insn, Arrays.asList(v1, v2));
return super.binaryOperation(insn, v1, v2);
}
@Override
public SourceValue ternaryOperation(AbstractInsnNode insn, SourceValue v1, SourceValue v2, SourceValue v3) {
addAll(insn, Arrays.asList(v1, v2, v3));
return super.ternaryOperation(insn, v1, v2, v3);
}
@Override
public SourceValue naryOperation(AbstractInsnNode insn, List<? extends SourceValue> values) {
addAll(insn, values);
return super.naryOperation(insn, values);
}
private void addAll(AbstractInsnNode insn, List<? extends SourceValue> values) {
sources.computeIfAbsent(insn, x -> new HashSet<>()).addAll(values);
}
};
TreeMap<int[],AbstractInsnNode> conditionals = new TreeMap<>(
Comparator.comparingInt((int[] a) -> a[0]).thenComparingInt(a -> a[1]));
Analyzer<SourceValue> analyzer = new Analyzer<>(i) {
@Override
protected void newControlFlowEdge(int insn, int successor) {
if(insn != successor - 1) {
AbstractInsnNode instruction = instructions.get(insn);
Set<SourceValue> dep = sources.get(instruction);
if(dep != null && !dep.isEmpty())
conditionals.put(new int[]{ insn, successor }, instruction);
}
}
};
analyzer.analyze(internalClassName, toAnalyze);
return new IdentifyCall(instructions, sources, conditionals);
}
}
Then, we also use a more elaborated example code:
public class IdentifyCallExample {
private void toAnalyze() {
(Math.random()>0.5? System.out: System.err).println(
anotherMethod1(
anotherMethod2("a", "b") ?
"c" : anotherMethod3("d", "e") ? "f" : "g",
anotherMethod4("h", "i") ? "j" : "k"
) ? "l" : "m"
);
}
static boolean anotherMethod1(String str, String oof) {
return true;
}
static boolean anotherMethod2(String str, String oof) {
return true;
}
static boolean anotherMethod3(String str, String oof) {
return true;
}
static boolean anotherMethod4(String str, String oof) {
return true;
}
public static void main(String[] args) throws AnalyzerException, IOException {
Class<?> me = MethodHandles.lookup().lookupClass();
ClassReader r = new ClassReader(me.getResourceAsStream(me.getSimpleName()+".class"));
ClassNode cn = new ClassNode();
r.accept(cn, ClassReader.SKIP_DEBUG|ClassReader.SKIP_FRAMES);
MethodNode toAnalyze = null;
for(MethodNode mn: cn.methods)
if(mn.name.equals("toAnalyze")) {
toAnalyze = mn;
break;
}
List<int[]> invocations = new ArrayList<>();
final InsnList instructions = toAnalyze.instructions;
IdentifyCall sources = IdentifyCall.getInputs(me.getName().replace('.', '/'), toAnalyze);
for(int ix = 0, num = instructions.size(); ix < num; ix++) {
AbstractInsnNode instr = instructions.get(ix);
if(instr.getType() != AbstractInsnNode.METHOD_INSN) continue;
IntSummaryStatistics s = sources.getAllInputsOf(instr).stream()
.mapToInt(instructions::indexOf).summaryStatistics();
s.accept(ix);
invocations.add(new int[]{s.getMin(), s.getMax()});
}
printIt(invocations, instructions);
}
// remainder as in the simple variant
which will now print
┌────[ INVOKESTATIC java/lang/Math.random ()D
│ LDC 0.5
│ DCMPL
│ IFLE L0
│ GETSTATIC java/lang/System.out : Ljava/io/PrintStream;
│ GOTO L1
│ L0
│ GETSTATIC java/lang/System.err : Ljava/io/PrintStream;
│ L1
│ ┌─┬─ LDC "a"
│ │ │ LDC "b"
│ │ └─ INVOKESTATIC complex/IdentifyCallExample.anotherMethod2 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L2
│ │ LDC "c"
│ │ GOTO L3
│ │ L2
│ │ ┌─ LDC "d"
│ │ │ LDC "e"
│ │ └─ INVOKESTATIC complex/IdentifyCallExample.anotherMethod3 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L4
│ │ LDC "f"
│ │ GOTO L3
│ │ L4
│ │ LDC "g"
│ │ L3
│ │ ┌─ LDC "h"
│ │ │ LDC "i"
│ │ └─ INVOKESTATIC complex/IdentifyCallExample.anotherMethod4 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L5
│ │ LDC "j"
│ │ GOTO L6
│ │ L5
│ │ LDC "k"
│ │ L6
│ └─── INVOKESTATIC complex/IdentifyCallExample.anotherMethod1 (Ljava/lang/String;Ljava/lang/String;)Z
│ IFEQ L7
│ LDC "l"
│ GOTO L8
│ L7
│ LDC "m"
│ L8
└───── INVOKEVIRTUAL java/io/PrintStream.println (Ljava/lang/String;)V
RETURN
This may still not catch every possible case, but could be sufficient for your use cases.