The roots of this question go from this.
As the title says the question is: "How to do proper separate compilation of 2 IRs instructing by ASAN (OR/AND libfuzzer)?". Before we dive into the actual problem, there is an example that works without separate compilation and the IR layer.
WORKING EXAMPLE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
char* sanitize_cookie_path(const char* cookie_path) {
size_t len;
char* new_path = strdup(cookie_path);
if (!new_path) {
return NULL;
}
if (new_path[0] == '\"') {
memmove((void*)new_path, (const void*)(new_path + 1), strlen(new_path));
}
if (new_path[strlen(new_path) - 1] == '\"') {
new_path[strlen(new_path) - 1] = 0x0;
}
if (new_path[0] !='/') {
free(new_path);
new_path = strdup("/");
return new_path;
}
len = strlen(new_path);
if (1 < len && new_path[len - 1] == '/') {
new_path[len - 1] = 0x0;
}
return new_path;
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
char* string_ = new char[size + 1];
memcpy(string_, data, size);
string_[size] = 0x0;
sanitize_cookie_path(string_);
delete[] string_;
return 0;
}
The above code is compiled with the command below and works just fine as expected:
$ clang++ -O0 -g -fno-omit-frame-pointer -fsanitize=address,fuzzer -fsanitize-coverage=trace-cmp,trace-gep,trace-div sanitize_cookie_path.c -o fuzzer
$ ./fuzzer
=================================================================
==10081==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x60200000004f at pc 0x55ec692f7e51 bp 0x7ffde3767450 sp 0x7ffde3767448
ACTUAL PROBLEM
Now complicate a task a bit. I have one piece of code below:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stddef.h>
char* sanitize_cookie_path(const char* cookie_path) {
size_t len;
char* new_path = strdup(cookie_path);
if (!new_path) {
return NULL;
}
if (new_path[0] == '\"') {
memmove((void*)new_path, (const void*)(new_path + 1), strlen(new_path));
}
if (new_path[strlen(new_path) - 1] == '\"') {
new_path[strlen(new_path) - 1] = 0x0;
}
if (new_path[0] !='/') {
free(new_path);
new_path = strdup("/");
return new_path;
}
len = strlen(new_path);
if (1 < len && new_path[len - 1] == '/') {
new_path[len - 1] = 0x0;
}
return new_path;
}
// Ditch this part further
int main(int argc, char** argv) {
return 0;
}
//
The compilation command of the code above:
$ clang -O0 -emit-llvm sanitize_cookie_path.c -S -o sanitize_cookie_path.ll
Another piece of code below:
#include <cstdio>
#include <cstdint>
#include <cstring>
char* sanitize_cookie_path(const char* cookie_path);
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
char* string_ = new char[size + 1];
memcpy(string_, data, size);
string_[size] = 0x0;
sanitize_cookie_path(string_);
delete[] string_;
return 0;
}
The compilation command of the code above:
$ clang -O0 -emit-llvm fuzz_sanitize_cookie_path.cc -S -o fuzz_sanitize_cookie_path.ll
At this point I tune 2 IRs (resolve C++ name mangling, delete "main" part) so that they fit one another and then I compile both of them with the command below and run:
$ clang++ -O0 -g -fno-omit-frame-pointer -fsanitize=address,fuzzer -fsanitize-coverage=trace-cmp,trace-gep,trace-div sanitize_cookie_path.ll fuzz_sanitize_cookie_path.ll -o fuzzer
$ ./fuzzer
The result is not what is expected, to be precise it is:
=================================================================
==10186==ERROR: LeakSanitizer: detected memory leaks
, which is not correct in terms of the vulnerability in sanitize_cookie_path
.
WHAT I TRIED
Below I'll list some of the variants of commands I tried to do and their respective output.
#1
$ clang -O0 -fno-omit-frame-pointer -fsanitize=address -fsanitize-coverage=trace-cmp,trace-gep,trace-div -emit-llvm sanitize_cookie_path.c -S -o sanitize_cookie_path.ll
$ clang -O0 -fno-omit-frame-pointer -fsanitize=address -fsanitize-coverage=trace-cmp,trace-gep,trace-div -emit-llvm fuzz_sanitize_cookie_path.cc -S -o fuzz_sanitize_cookie_path.ll
$ clang++ -O0 -g -fno-omit-frame-pointer -fsanitize=address,fuzzer -fsanitize-coverage=trace-cmp,trace-gep,trace-div sanitize_cookie_path.ll fuzz_sanitize_cookie_path.ll -o fuzzer
$ ./fuzzer
AddressSanitizer:DEADLYSIGNAL
=================================================================
==10333==ERROR: AddressSanitizer: SEGV on unknown address 0x01810fff7000 (pc 0x557746efa890 bp 0x7fff83a152a0 sp 0x7fff83a15230 T0)
==10333==The signal is caused by a READ memory access.
#2
$ clang -O0 -fno-omit-frame-pointer -fsanitize=address -fsanitize-coverage=trace-cmp,trace-gep,trace-div -emit-llvm sanitize_cookie_path.c -S -o sanitize_cookie_path.ll
$ clang -O0 -emit-llvm fuzz_sanitize_cookie_path.cc -S -o fuzz_sanitize_cookie_path.ll
$ clang++ -O0 -g -fno-omit-frame-pointer -fsanitize=address,fuzzer -fsanitize-coverage=trace-cmp,trace-gep,trace-div sanitize_cookie_path.ll fuzz_sanitize_cookie_path.ll -o fuzzer
$ ./fuzzer
AddressSanitizer:DEADLYSIGNAL
=================================================================
==10387==ERROR: AddressSanitizer: SEGV on unknown address 0x01810fff7001 (pc 0x561d9ef01d63 bp 0x7ffdd7eb7180 sp 0x7ffdd7eb7030 T0)
==10387==The signal is caused by a READ memory access.
#3
$ clang -O0 -fno-omit-frame-pointer -fsanitize=address -fsanitize-coverage=trace-cmp,trace-gep,trace-div -emit-llvm sanitize_cookie_path.c -S -o sanitize_cookie_path.ll
$ clang -O0 -emit-llvm fuzz_sanitize_cookie_path.cc -S -o fuzz_sanitize_cookie_path.ll
$ clang++ -O0 -g -fno-omit-frame-pointer -fsanitize=fuzzer -fsanitize-coverage=trace-cmp,trace-gep,trace-div sanitize_cookie_path.ll fuzz_sanitize_cookie_path.ll -o fuzzer
$ ./fuzzer
clang-14: error: linker command failed with exit code 1 (use -v to see invocation)
How to attain the same results as in the working example but with separate compilation? I would like you to have an exterior look at the problem, please.