AFL-master.gitignore> travischeck_fuzzer_stats.sh.travis.ymlafl-analyze.cafl-as.cafl-as.hafl-cminafl-fuzz.cafl-gcc.cafl-gotcpu.cafl-plotafl-showmap.cafl-tmin.cafl-whatsupalloc-inl.handroid-ashmem.hAndroid.bpconfig.hdebug.h> dictionariesgif.dicthtml_tags.dictjpeg.dictjs.dictjson.dictpdf.dictperl.dictpng.dictregexp.dictsql.dicttiff.dictwebp.dictxml.dict> experimental> argv_fuzzingargv-fuzz-inl.htest.c> asan_cgroupslimit_memory.sh> bash_shellshockshellshock-fuzz.diff> canvas_harnesscanvas_harness.html> clang_asm_normalizeas> crash_triagetriage_crashes.sh> distributed_fuzzingsync_script.sh> libpng_no_checksumlibpng-nocrc.patch> persistent_demopersistent_demo.c> post_librarypost_library.so.cpost_library_png.so.chash.h> libdislocatorlibdislocator.so.cMakefile> libtokencaplibtokencap.so.cMakefile> llvm_modeafl-clang-fast.cafl-llvm-pass.so.ccafl-llvm-rt.o.cMakefileMakefile> qemu_modebuild_qemu_support.sh> patchesafl-qemu-cpu-inl.hconfigure.diffcpu-exec.diffelfload.diffmemfd.diffsyscall.difftest-instr.ctest-libfuzzer-target.ctypes.h
xxxxxxxxxx
# Binaries produced by "make".
afl-analyze
afl-as
afl-clang
afl-clang++
afl-fuzz
afl-g++
afl-gcc
afl-gotcpu
afl-showmap
afl-tmin
as
# Binaries produced by "make -C llvm_mode"
afl-clang-fast
afl-clang-fast++
afl-llvm-pass.so
afl-llvm-rt-32.o
afl-llvm-rt-64.o
afl-llvm-rt.o
xxxxxxxxxx
usage() {
echo "Usage: $0 -o <out_dir> -k <key> -v <value> [-p <precision>]" 1>&2;
echo " " 1>&2;
echo "Checks if a key:value appears in the fuzzer_stats report" 1>&2;
echo " " 1>&2;
echo -n "If \"value\" is numeric and \"precision\" is defined, checks if the stat " 1>&2;
echo "printed by afl is value+/-precision." 1>&2;
exit 1; }
while getopts "o:k:v:p:" opt; do
case "${opt}" in
o)
o=${OPTARG}
;;
k)
k=${OPTARG}
;;
v)
v=${OPTARG}
;;
p)
p=${OPTARG}
;;
*)
usage
;;
esac
done
if [ -z $o ] || [ -z $k ] || [ -z $v ]; then usage; fi
# xargs to trim the surrounding whitespaces
stat_v=$( grep $k "$o"/fuzzer_stats | cut -d ":" -f 2 | xargs )
v=$( echo "$v" | xargs )
if [ -z stat_v ];
then echo "ERROR: key $k not found in fuzzer_stats." 1>&2
exit 1
fi
re_percent='^[0-9]+([.][0-9]+)?\%$'
# if the argument is a number in percentage, get rid of the %
if [[ "$v" =~ $re_percent ]]; then v=${v: :-1}; fi
if [[ "$stat_v" =~ $re_percent ]]; then stat_v=${stat_v: :-1}; fi
re_numeric='^[0-9]+([.][0-9]+)?$'
# if the argument is not a number, we check for strict equality
if (! [[ "$v" =~ $re_numeric ]]) || (! [[ "$stat_v" =~ $re ]]);
then if [ "$v" != "$stat_v" ];
then echo "ERROR: \"$k:$stat_v\" (should be $v)." 1>&2
exit 2;
fi
# checks if the stat reported by afl is in the range
elif [ "$stat_v" -lt $(( v - p )) ] || [ "$stat_v" -gt $(( v + p )) ];
then echo "ERROR: key $k:$stat_v is out of correct range." 1>&2
exit 3;
fi
echo "OK: key $k:$stat_v" 1>&2
xxxxxxxxxx
language: c
env:
- AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_STOP_MANUALLY=1
- AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_EXIT_WHEN_DONE=1
# TODO: test AFL_BENCH_UNTIL_CRASH once we have a target that crashes
- AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_BENCH_JUST_ONE=1
before_install:
- sudo apt update
- sudo apt install -y libtool libtool-bin automake bison libglib2.0
# TODO: Look into splitting off some builds using a build matrix.
# TODO: Move this all into a bash script so we don't need to write bash in yaml.
script:
- make
- ./afl-gcc ./test-instr.c -o test-instr-gcc
- mkdir seeds
- echo "" > seeds/nil_seed
- if [ -z "$AFL_STOP_MANUALLY" ];
then ./afl-fuzz -i seeds -o out/ -- ./test-instr-gcc;
else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-gcc;
fi
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 3
- rm -r out/*
- ./afl-clang ./test-instr.c -o test-instr-clang
- if [ -z "$AFL_STOP_MANUALLY" ];
then ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang;
else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang;
fi
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 2
- make clean
- CC=clang CXX=clang++ make
- cd llvm_mode
# TODO: Build with different versions of clang/LLVM since LLVM passes don't
# have a stable API.
- CC=clang CXX=clang++ LLVM_CONFIG=llvm-config make
- cd ..
- rm -r out/*
- ./afl-clang-fast ./test-instr.c -o test-instr-clang-fast
- if [ -z "$AFL_STOP_MANUALLY" ];
then ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang-fast;
else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang-fast;
fi
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 3
# Test fuzzing libFuzzer targets and trace-pc-guard instrumentation.
- clang -g -fsanitize-coverage=trace-pc-guard ./test-libfuzzer-target.c -c
- clang -c -w llvm_mode/afl-llvm-rt.o.c
- wget https://raw.githubusercontent.com/llvm/llvm-project/main/compiler-rt/lib/fuzzer/afl/afl_driver.cpp
- clang++ afl_driver.cpp afl-llvm-rt.o.o test-libfuzzer-target.o -o test-libfuzzer-target
- timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-libfuzzer-target
- cd qemu_mode
- ./build_qemu_support.sh
- cd ..
- gcc ./test-instr.c -o test-no-instr
- if [ -z "$AFL_STOP_MANUALLY" ];
then ./afl-fuzz -Q -i seeds -o out/ -- ./test-no-instr;
else timeout --preserve-status 5s ./afl-fuzz -Q -i seeds -o out/ -- ./test-no-instr;
fi
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 12 -p 9
xxxxxxxxxx
/*
Copyright 2016 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - file format analyzer
-----------------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
A nifty utility that grabs an input file and takes a stab at explaining
its structure by observing how changes to it affect the execution path.
If the output scrolls past the edge of the screen, pipe it to 'less -r'.
*/
static s32 child_pid; /* PID of the tested program */
static u8* trace_bits; /* SHM with instrumentation bitmap */
static u8 *in_file, /* Analyzer input test case */
*prog_in, /* Targeted program input file */
*target_path, /* Path to target binary */
*doc_path; /* Path to docs */
static u8 *in_data; /* Input data for analysis */
static u32 in_len, /* Input data length */
orig_cksum, /* Original checksum */
total_execs, /* Total number of execs */
exec_hangs, /* Total number of hangs */
exec_tmout = EXEC_TIMEOUT; /* Exec timeout (ms) */
static u64 mem_limit = MEM_LIMIT; /* Memory limit (MB) */
static s32 shm_id, /* ID of the SHM region */
dev_null_fd = -1; /* FD to /dev/null */
static u8 edges_only, /* Ignore hit counts? */
use_hex_offsets, /* Show hex offsets? */
use_stdin = 1; /* Use stdin for program input? */
static volatile u8
stop_soon, /* Ctrl-C pressed? */
child_timed_out; /* Child timed out? */
/* Constants used for describing byte behavior. */
/* Changing byte is a no-op. */
/* Some changes have no effect. */
/* Changes produce variable paths. */
/* Changes produce fixed patterns. */
/* Potential length field */
/* Potential checksum */
/* Potential "suspect" blob */
/* Classify tuple counts. This is a slow & naive version, but good enough here. */
static u8 count_class_lookup[256] = {
[0] = 0,
[1] = 1,
[2] = 2,
[3] = 4,
[4 ... 7] = 8,
[8 ... 15] = 16,
[16 ... 31] = 32,
[32 ... 127] = 64,
[128 ... 255] = 128
};
static void classify_counts(u8* mem) {
u32 i = MAP_SIZE;
if (edges_only) {
while (i--) {
if (*mem) *mem = 1;
mem++;
}
} else {
while (i--) {
*mem = count_class_lookup[*mem];
mem++;
}
}
}
/* See if any bytes are set in the bitmap. */
static inline u8 anything_set(void) {
u32* ptr = (u32*)trace_bits;
u32 i = (MAP_SIZE >> 2);
while (i--) if (*(ptr++)) return 1;
return 0;
}
/* Get rid of shared memory and temp files (atexit handler). */
static void remove_shm(void) {
unlink(prog_in); /* Ignore errors */
shmctl(shm_id, IPC_RMID, NULL);
}
/* Configure shared memory. */
static void setup_shm(void) {
u8* shm_str;
shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600);
if (shm_id < 0) PFATAL("shmget() failed");
atexit(remove_shm);
shm_str = alloc_printf("%d", shm_id);
setenv(SHM_ENV_VAR, shm_str, 1);
ck_free(shm_str);
trace_bits = shmat(shm_id, NULL, 0);
if (trace_bits == (void *)-1) PFATAL("shmat() failed");
}
/* Read initial file. */
static void read_initial_file(void) {
struct stat st;
s32 fd = open(in_file, O_RDONLY);
if (fd < 0) PFATAL("Unable to open '%s'", in_file);
if (fstat(fd, &st) || !st.st_size)
FATAL("Zero-sized input file.");
if (st.st_size >= TMIN_MAX_FILE)
FATAL("Input file is too large (%u MB max)", TMIN_MAX_FILE / 1024 / 1024);
in_len = st.st_size;
in_data = ck_alloc_nozero(in_len);
ck_read(fd, in_data, in_len, in_file);
close(fd);
OKF("Read %u byte%s from '%s'.", in_len, in_len == 1 ? "" : "s", in_file);
}
/* Write output file. */
static s32 write_to_file(u8* path, u8* mem, u32 len) {
s32 ret;
unlink(path); /* Ignore errors */
ret = open(path, O_RDWR | O_CREAT | O_EXCL, 0600);
if (ret < 0) PFATAL("Unable to create '%s'", path);
ck_write(ret, mem, len, path);
lseek(ret, 0, SEEK_SET);
return ret;
}
/* Handle timeout signal. */
static void handle_timeout(int sig) {
child_timed_out = 1;
if (child_pid > 0) kill(child_pid, SIGKILL);
}
/* Execute target application. Returns exec checksum, or 0 if program
times out. */
static u32 run_target(char** argv, u8* mem, u32 len, u8 first_run) {
static struct itimerval it;
int status = 0;
s32 prog_in_fd;
u32 cksum;
memset(trace_bits, 0, MAP_SIZE);
MEM_BARRIER();
prog_in_fd = write_to_file(prog_in, mem, len);
child_pid = fork();
if (child_pid < 0) PFATAL("fork() failed");
if (!child_pid) {
struct rlimit r;
if (dup2(use_stdin ? prog_in_fd : dev_null_fd, 0) < 0 ||
dup2(dev_null_fd, 1) < 0 ||
dup2(dev_null_fd, 2) < 0) {
*(u32*)trace_bits = EXEC_FAIL_SIG;
PFATAL("dup2() failed");
}
close(dev_null_fd);
close(prog_in_fd);
if (mem_limit) {
r.rlim_max = r.rlim_cur = ((rlim_t)mem_limit) << 20;
setrlimit(RLIMIT_AS, &r); /* Ignore errors */
setrlimit(RLIMIT_DATA, &r); /* Ignore errors */
/* ^RLIMIT_AS */
}
r.rlim_max = r.rlim_cur = 0;
setrlimit(RLIMIT_CORE, &r); /* Ignore errors */
execv(target_path, argv);
*(u32*)trace_bits = EXEC_FAIL_SIG;
exit(0);
}
close(prog_in_fd);
/* Configure timeout, wait for child, cancel timeout. */
child_timed_out = 0;
it.it_value.tv_sec = (exec_tmout / 1000);
it.it_value.tv_usec = (exec_tmout % 1000) * 1000;
setitimer(ITIMER_REAL, &it, NULL);
if (waitpid(child_pid, &status, 0) <= 0) FATAL("waitpid() failed");
child_pid = 0;
it.it_value.tv_sec = 0;
it.it_value.tv_usec = 0;
setitimer(ITIMER_REAL, &it, NULL);
MEM_BARRIER();
/* Clean up bitmap, analyze exit condition, etc. */
if (*(u32*)trace_bits == EXEC_FAIL_SIG)
FATAL("Unable to execute '%s'", argv[0]);
classify_counts(trace_bits);
total_execs++;
if (stop_soon) {
SAYF(cRST cLRD "\n+++ Analysis aborted by user +++\n" cRST);
exit(1);
}
/* Always discard inputs that time out. */
if (child_timed_out) {
exec_hangs++;
return 0;
}
cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
/* We don't actually care if the target is crashing or not,
except that when it does, the checksum should be different. */
if (WIFSIGNALED(status) ||
(WIFEXITED(status) && WEXITSTATUS(status) == MSAN_ERROR) ||
(WIFEXITED(status) && WEXITSTATUS(status))) {
cksum ^= 0xffffffff;
}
if (first_run) orig_cksum = cksum;
return cksum;
}
/* Helper function to display a human-readable character. */
static void show_char(u8 val) {
switch (val) {
case 0 ... 32:
case 127 ... 255: SAYF("#%02x", val); break;
default: SAYF(" %c ", val);
}
}
/* Show the legend */
static void show_legend(void) {
SAYF(" " cLGR bgGRA " 01 " cRST " - no-op block "
cBLK bgLGN " 01 " cRST " - suspected length field\n"
" " cBRI bgGRA " 01 " cRST " - superficial content "
cBLK bgYEL " 01 " cRST " - suspected cksum or magic int\n"
" " cBLK bgCYA " 01 " cRST " - critical stream "
cBLK bgLRD " 01 " cRST " - suspected checksummed block\n"
" " cBLK bgMGN " 01 " cRST " - \"magic value\" section\n\n");
}
/* USE_COLOR */
/* Interpret and report a pattern in the input file. */
static void dump_hex(u8* buf, u32 len, u8* b_data) {
u32 i;
for (i = 0; i < len; i++) {
u32 rlen = 1, off;
u32 rlen = 1;
/* ^USE_COLOR */
u8 rtype = b_data[i] & 0x0f;
/* Look ahead to determine the length of run. */
while (i + rlen < len && (b_data[i] >> 7) == (b_data[i + rlen] >> 7)) {
if (rtype < (b_data[i + rlen] & 0x0f)) rtype = b_data[i + rlen] & 0x0f;
rlen++;
}
/* Try to do some further classification based on length & value. */
if (rtype == RESP_FIXED) {
switch (rlen) {
case 2: {
u16 val = *(u16*)(in_data + i);
/* Small integers may be length fields. */
if (val && (val <= in_len || SWAP16(val) <= in_len)) {
rtype = RESP_LEN;
break;
}
/* Uniform integers may be checksums. */
if (val && abs(in_data[i] - in_data[i + 1]) > 32) {
rtype = RESP_CKSUM;
break;
}
break;
}
case 4: {
u32 val = *(u32*)(in_data + i);
/* Small integers may be length fields. */
if (val && (val <= in_len || SWAP32(val) <= in_len)) {
rtype = RESP_LEN;
break;
}
/* Uniform integers may be checksums. */
if (val && (in_data[i] >> 7 != in_data[i + 1] >> 7 ||
in_data[i] >> 7 != in_data[i + 2] >> 7 ||
in_data[i] >> 7 != in_data[i + 3] >> 7)) {
rtype = RESP_CKSUM;
break;
}
break;
}
case 1: case 3: case 5 ... MAX_AUTO_EXTRA - 1: break;
default: rtype = RESP_SUSPECT;
}
}
/* Print out the entire run. */
for (off = 0; off < rlen; off++) {
/* Every 16 digits, display offset. */
if (!((i + off) % 16)) {
if (off) SAYF(cRST cLCY ">");
if (use_hex_offsets)
SAYF(cRST cGRA "%s[%06x] " cRST, (i + off) ? "\n" : "", i + off);
else
SAYF(cRST cGRA "%s[%06u] " cRST, (i + off) ? "\n" : "", i + off);
}
switch (rtype) {
case RESP_NONE: SAYF(cLGR bgGRA); break;
case RESP_MINOR: SAYF(cBRI bgGRA); break;
case RESP_VARIABLE: SAYF(cBLK bgCYA); break;
case RESP_FIXED: SAYF(cBLK bgMGN); break;
case RESP_LEN: SAYF(cBLK bgLGN); break;
case RESP_CKSUM: SAYF(cBLK bgYEL); break;
case RESP_SUSPECT: SAYF(cBLK bgLRD); break;
}
show_char(in_data[i + off]);
if (off != rlen - 1 && (i + off + 1) % 16) SAYF(" "); else SAYF(cRST " ");
}
if (use_hex_offsets)
SAYF(" Offset %x, length %u: ", i, rlen);
else
SAYF(" Offset %u, length %u: ", i, rlen);
switch (rtype) {
case RESP_NONE: SAYF("no-op block\n"); break;
case RESP_MINOR: SAYF("superficial content\n"); break;
case RESP_VARIABLE: SAYF("critical stream\n"); break;
case RESP_FIXED: SAYF("\"magic value\" section\n"); break;
case RESP_LEN: SAYF("suspected length field\n"); break;
case RESP_CKSUM: SAYF("suspected cksum or magic int\n"); break;
case RESP_SUSPECT: SAYF("suspected checksummed block\n"); break;
}
/* ^USE_COLOR */
i += rlen - 1;
}
SAYF(cRST "\n");
/* USE_COLOR */
}
/* Actually analyze! */
static void analyze(char** argv) {
u32 i;
u32 boring_len = 0, prev_xff = 0, prev_x01 = 0, prev_s10 = 0, prev_a10 = 0;
u8* b_data = ck_alloc(in_len + 1);
u8 seq_byte = 0;
b_data[in_len] = 0xff; /* Intentional terminator. */
ACTF("Analyzing input file (this may take a while)...\n");
show_legend();
/* USE_COLOR */
for (i = 0; i < in_len; i++) {
u32 xor_ff, xor_01, sub_10, add_10;
u8 xff_orig, x01_orig, s10_orig, a10_orig;
/* Perform walking byte adjustments across the file. We perform four
operations designed to elicit some response from the underlying
code. */
in_data[i] ^= 0xff;
xor_ff = run_target(argv, in_data, in_len, 0);
in_data[i] ^= 0xfe;
xor_01 = run_target(argv, in_data, in_len, 0);
in_data[i] = (in_data[i] ^ 0x01) - 0x10;
sub_10 = run_target(argv, in_data, in_len, 0);
in_data[i] += 0x20;
add_10 = run_target(argv, in_data, in_len, 0);
in_data[i] -= 0x10;
/* Classify current behavior. */
xff_orig = (xor_ff == orig_cksum);
x01_orig = (xor_01 == orig_cksum);
s10_orig = (sub_10 == orig_cksum);
a10_orig = (add_10 == orig_cksum);
if (xff_orig && x01_orig && s10_orig && a10_orig) {
b_data[i] = RESP_NONE;
boring_len++;
} else if (xff_orig || x01_orig || s10_orig || a10_orig) {
b_data[i] = RESP_MINOR;
boring_len++;
} else if (xor_ff == xor_01 && xor_ff == sub_10 && xor_ff == add_10) {
b_data[i] = RESP_FIXED;
} else b_data[i] = RESP_VARIABLE;
/* When all checksums change, flip most significant bit of b_data. */
if (prev_xff != xor_ff && prev_x01 != xor_01 &&
prev_s10 != sub_10 && prev_a10 != add_10) seq_byte ^= 0x80;
b_data[i] |= seq_byte;
prev_xff = xor_ff;
prev_x01 = xor_01;
prev_s10 = sub_10;
prev_a10 = add_10;
}
dump_hex(in_data, in_len, b_data);
SAYF("\n");
OKF("Analysis complete. Interesting bits: %0.02f%% of the input file.",
100.0 - ((double)boring_len * 100) / in_len);
if (exec_hangs)
WARNF(cLRD "Encountered %u timeouts - results may be skewed." cRST,
exec_hangs);
ck_free(b_data);
}
/* Handle Ctrl-C and the like. */
static void handle_stop_sig(int sig) {
stop_soon = 1;
if (child_pid > 0) kill(child_pid, SIGKILL);
}
/* Do basic preparations - persistent fds, filenames, etc. */
static void set_up_environment(void) {
u8* x;
dev_null_fd = open("/dev/null", O_RDWR);
if (dev_null_fd < 0) PFATAL("Unable to open /dev/null");
if (!prog_in) {
u8* use_dir = ".";
if (access(use_dir, R_OK | W_OK | X_OK)) {
use_dir = getenv("TMPDIR");
if (!use_dir) use_dir = "/tmp";
}
prog_in = alloc_printf("%s/.afl-analyze-temp-%u", use_dir, getpid());
}
/* Set sane defaults... */
x = getenv("ASAN_OPTIONS");
if (x) {
if (!strstr(x, "abort_on_error=1"))
FATAL("Custom ASAN_OPTIONS set without abort_on_error=1 - please fix!");
if (!strstr(x, "symbolize=0"))
FATAL("Custom ASAN_OPTIONS set without symbolize=0 - please fix!");
}
x = getenv("MSAN_OPTIONS");
if (x) {
if (!strstr(x, "exit_code=" STRINGIFY(MSAN_ERROR)))
FATAL("Custom MSAN_OPTIONS set without exit_code="
STRINGIFY(MSAN_ERROR) " - please fix!");
if (!strstr(x, "symbolize=0"))
FATAL("Custom MSAN_OPTIONS set without symbolize=0 - please fix!");
}
setenv("ASAN_OPTIONS", "abort_on_error=1:"
"detect_leaks=0:"
"symbolize=0:"
"allocator_may_return_null=1", 0);
setenv("MSAN_OPTIONS", "exit_code=" STRINGIFY(MSAN_ERROR) ":"
"symbolize=0:"
"abort_on_error=1:"
"allocator_may_return_null=1:"
"msan_track_origins=0", 0);
if (getenv("AFL_PRELOAD")) {
setenv("LD_PRELOAD", getenv("AFL_PRELOAD"), 1);
setenv("DYLD_INSERT_LIBRARIES", getenv("AFL_PRELOAD"), 1);
}
}
/* Setup signal handlers, duh. */
static void setup_signal_handlers(void) {
struct sigaction sa;
sa.sa_handler = NULL;
sa.sa_flags = SA_RESTART;
sa.sa_sigaction = NULL;
sigemptyset(&sa.sa_mask);
/* Various ways of saying "stop". */
sa.sa_handler = handle_stop_sig;
sigaction(SIGHUP, &sa, NULL);
sigaction(SIGINT, &sa, NULL);
sigaction(SIGTERM, &sa, NULL);
/* Exec timeout notifications. */
sa.sa_handler = handle_timeout;
sigaction(SIGALRM, &sa, NULL);
}
/* Detect @@ in args. */
static void detect_file_args(char** argv) {
u32 i = 0;
u8* cwd = getcwd(NULL, 0);
if (!cwd) PFATAL("getcwd() failed");
while (argv[i]) {
u8* aa_loc = strstr(argv[i], "@@");
if (aa_loc) {
u8 *aa_subst, *n_arg;
/* Be sure that we're always using fully-qualified paths. */
if (prog_in[0] == '/') aa_subst = prog_in;
else aa_subst = alloc_printf("%s/%s", cwd, prog_in);
/* Construct a replacement argv value. */
*aa_loc = 0;
n_arg = alloc_printf("%s%s%s", argv[i], aa_subst, aa_loc + 2);
argv[i] = n_arg;
*aa_loc = '@';
if (prog_in[0] != '/') ck_free(aa_subst);
}
i++;
}
free(cwd); /* not tracked */
}
/* Display usage hints. */
static void usage(u8* argv0) {
SAYF("\n%s [ options ] -- /path/to/target_app [ ... ]\n\n"
"Required parameters:\n\n"
" -i file - input test case to be analyzed by the tool\n"
"Execution control settings:\n\n"
" -f file - input file read by the tested program (stdin)\n"
" -t msec - timeout for each run (%u ms)\n"
" -m megs - memory limit for child process (%u MB)\n"
" -Q - use binary-only instrumentation (QEMU mode)\n\n"
"Analysis settings:\n\n"
" -e - look for edge coverage only, ignore hit counts\n\n"
"Other stuff:\n\n"
" -V - show version number and exit\n\n"
"For additional tips, please consult %s/README.\n\n",
argv0, EXEC_TIMEOUT, MEM_LIMIT, doc_path);
exit(1);
}
/* Find binary. */
static void find_binary(u8* fname) {
u8* env_path = 0;
struct stat st;
if (strchr(fname, '/') || !(env_path = getenv("PATH"))) {
target_path = ck_strdup(fname);
if (stat(target_path, &st) || !S_ISREG(st.st_mode) ||
!(st.st_mode & 0111) || st.st_size < 4)
FATAL("Program '%s' not found or not executable", fname);
} else {
while (env_path) {
u8 *cur_elem, *delim = strchr(env_path, ':');
if (delim) {
cur_elem = ck_alloc(delim - env_path + 1);
memcpy(cur_elem, env_path, delim - env_path);
delim++;
} else cur_elem = ck_strdup(env_path);
env_path = delim;
if (cur_elem[0])
target_path = alloc_printf("%s/%s", cur_elem, fname);
else
target_path = ck_strdup(fname);
ck_free(cur_elem);
if (!stat(target_path, &st) && S_ISREG(st.st_mode) &&
(st.st_mode & 0111) && st.st_size >= 4) break;
ck_free(target_path);
target_path = 0;
}
if (!target_path) FATAL("Program '%s' not found or not executable", fname);
}
}
/* Fix up argv for QEMU. */
static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
char** new_argv = ck_alloc(sizeof(char*) * (argc + 4));
u8 *tmp, *cp, *rsl, *own_copy;
/* Workaround for a QEMU stability glitch. */
setenv("QEMU_LOG", "nochain", 1);
memcpy(new_argv + 3, argv + 1, sizeof(char*) * argc);
/* Now we need to actually find qemu for argv[0]. */
new_argv[2] = target_path;
new_argv[1] = "--";
tmp = getenv("AFL_PATH");
if (tmp) {
cp = alloc_printf("%s/afl-qemu-trace", tmp);
if (access(cp, X_OK))
FATAL("Unable to find '%s'", tmp);
target_path = new_argv[0] = cp;
return new_argv;
}
own_copy = ck_strdup(own_loc);
rsl = strrchr(own_copy, '/');
if (rsl) {
*rsl = 0;
cp = alloc_printf("%s/afl-qemu-trace", own_copy);
ck_free(own_copy);
if (!access(cp, X_OK)) {
target_path = new_argv[0] = cp;
return new_argv;
}
} else ck_free(own_copy);
if (!access(BIN_PATH "/afl-qemu-trace", X_OK)) {
target_path = new_argv[0] = BIN_PATH "/afl-qemu-trace";
return new_argv;
}
FATAL("Unable to find 'afl-qemu-trace'.");
}
/* Main entry point */
int main(int argc, char** argv) {
s32 opt;
u8 mem_limit_given = 0, timeout_given = 0, qemu_mode = 0;
char** use_argv;
doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
SAYF(cCYA "afl-analyze " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
while ((opt = getopt(argc,argv,"+i:f:m:t:eQV")) > 0)
switch (opt) {
case 'i':
if (in_file) FATAL("Multiple -i options not supported");
in_file = optarg;
break;
case 'f':
if (prog_in) FATAL("Multiple -f options not supported");
use_stdin = 0;
prog_in = optarg;
break;
case 'e':
if (edges_only) FATAL("Multiple -e options not supported");
edges_only = 1;
break;
case 'm': {
u8 suffix = 'M';
if (mem_limit_given) FATAL("Multiple -m options not supported");
mem_limit_given = 1;
if (!strcmp(optarg, "none")) {
mem_limit = 0;
break;
}
if (sscanf(optarg, "%llu%c", &mem_limit, &suffix) < 1 ||
optarg[0] == '-') FATAL("Bad syntax used for -m");
switch (suffix) {
case 'T': mem_limit *= 1024 * 1024; break;
case 'G': mem_limit *= 1024; break;
case 'k': mem_limit /= 1024; break;
case 'M': break;
default: FATAL("Unsupported suffix or bad syntax for -m");
}
if (mem_limit < 5) FATAL("Dangerously low value of -m");
if (sizeof(rlim_t) == 4 && mem_limit > 2000)
FATAL("Value of -m out of range on 32-bit systems");
}
break;
case 't':
if (timeout_given) FATAL("Multiple -t options not supported");
timeout_given = 1;
exec_tmout = atoi(optarg);
if (exec_tmout < 10 || optarg[0] == '-')
FATAL("Dangerously low value of -t");
break;
case 'Q':
if (qemu_mode) FATAL("Multiple -Q options not supported");
if (!mem_limit_given) mem_limit = MEM_LIMIT_QEMU;
qemu_mode = 1;
break;
case 'V': /* Show version number */
/* Version number has been printed already, just quit. */
exit(0);
default:
usage(argv[0]);
}
if (optind == argc || !in_file) usage(argv[0]);
use_hex_offsets = !!getenv("AFL_ANALYZE_HEX");
setup_shm();
setup_signal_handlers();
set_up_environment();
find_binary(argv[optind]);
detect_file_args(argv + optind);
if (qemu_mode)
use_argv = get_qemu_argv(argv[0], argv + optind, argc - optind);
else
use_argv = argv + optind;
SAYF("\n");
read_initial_file();
ACTF("Performing dry run (mem limit = %llu MB, timeout = %u ms%s)...",
mem_limit, exec_tmout, edges_only ? ", edges only" : "");
run_target(use_argv, in_data, in_len, 1);
if (child_timed_out)
FATAL("Target binary times out (adjusting -t may help).");
if (!anything_set()) FATAL("No instrumentation detected.");
analyze(use_argv);
OKF("We're done here. Have a nice day!\n");
exit(0);
}
xxxxxxxxxx
/*
Copyright 2013 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - wrapper for GNU as
---------------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
The sole purpose of this wrapper is to preprocess assembly files generated
by GCC / clang and inject the instrumentation bits included from afl-as.h. It
is automatically invoked by the toolchain when compiling programs using
afl-gcc / afl-clang.
Note that it's an explicit non-goal to instrument hand-written assembly,
be it in separate .s files or in __asm__ blocks. The only aspiration this
utility has right now is to be able to skip them gracefully and allow the
compilation process to continue.
That said, see experimental/clang_asm_normalize/ for a solution that may
allow clang users to make things work even with hand-crafted assembly. Just
note that there is no equivalent for GCC.
*/
static u8** as_params; /* Parameters passed to the real 'as' */
static u8* input_file; /* Originally specified input file */
static u8* modified_file; /* Instrumented file for the real 'as' */
static u8 be_quiet, /* Quiet mode (no stderr output) */
clang_mode, /* Running in clang mode? */
pass_thru, /* Just pass data through? */
just_version, /* Just show version? */
sanitizer; /* Using ASAN / MSAN */
static u32 inst_ratio = 100, /* Instrumentation probability (%) */
as_par_cnt = 1; /* Number of params to 'as' */
/* If we don't find --32 or --64 in the command line, default to
instrumentation for whichever mode we were compiled with. This is not
perfect, but should do the trick for almost all use cases. */
static u8 use_64bit = 1;
static u8 use_64bit = 0;
/* __APPLE__ */
/* ^WORD_SIZE_64 */
/* Examine and modify parameters to pass to 'as'. Note that the file name
is always the last parameter passed by GCC, so we exploit this property
to keep the code simple. */
static void edit_params(int argc, char** argv) {
u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
u32 i;
u8 use_clang_as = 0;
/* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
with the code generated by newer versions of clang that are hand-built
by the user. See the thread here: http://goo.gl/HBWDtn.
To work around this, when using clang and running without AFL_AS
specified, we will actually call 'clang -c' instead of 'as -q' to
compile the assembly file.
The tools aren't cmdline-compatible, but at least for now, we can
seemingly get away with this by making only very minor tweaks. Thanks
to Nico Weber for the idea. */
if (clang_mode && !afl_as) {
use_clang_as = 1;
afl_as = getenv("AFL_CC");
if (!afl_as) afl_as = getenv("AFL_CXX");
if (!afl_as) afl_as = "clang";
}
/* __APPLE__ */
/* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
is not set. We need to check these non-standard variables to properly
handle the pass_thru logic later on. */
if (!tmp_dir) tmp_dir = getenv("TEMP");
if (!tmp_dir) tmp_dir = getenv("TMP");
if (!tmp_dir) tmp_dir = "/tmp";
as_params = ck_alloc((argc + 32) * sizeof(u8*));
as_params[0] = afl_as ? afl_as : (u8*)"as";
as_params[argc] = 0;
for (i = 1; i < argc - 1; i++) {
if (!strcmp(argv[i], "--64")) use_64bit = 1;
else if (!strcmp(argv[i], "--32")) use_64bit = 0;
/* The Apple case is a bit different... */
if (!strcmp(argv[i], "-arch") && i + 1 < argc) {
if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1;
else if (!strcmp(argv[i + 1], "i386"))
FATAL("Sorry, 32-bit Apple platforms are not supported.");
}
/* Strip options that set the preference for a particular upstream
assembler in Xcode. */
if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
continue;
/* __APPLE__ */
as_params[as_par_cnt++] = argv[i];
}
/* When calling clang as the upstream assembler, append -c -x assembler
and hope for the best. */
if (use_clang_as) {
as_params[as_par_cnt++] = "-c";
as_params[as_par_cnt++] = "-x";
as_params[as_par_cnt++] = "assembler";
}
/* __APPLE__ */
input_file = argv[argc - 1];
if (input_file[0] == '-') {
if (!strcmp(input_file + 1, "-version")) {
just_version = 1;
modified_file = input_file;
goto wrap_things_up;
}
if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)");
else input_file = NULL;
} else {
/* Check if this looks like a standard invocation as a part of an attempt
to compile a program, rather than using gcc on an ad-hoc .s file in
a format we may not understand. This works around an issue compiling
NSS. */
if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
strncmp(input_file, "/var/tmp/", 9) &&
strncmp(input_file, "/tmp/", 5)) pass_thru = 1;
}
modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
(u32)time(NULL));
wrap_things_up:
as_params[as_par_cnt++] = modified_file;
as_params[as_par_cnt] = NULL;
}
/* Process input file, generate modified_file. Insert instrumentation in all
the appropriate places. */
static void add_instrumentation(void) {
static u8 line[MAX_LINE];
FILE* inf;
FILE* outf;
s32 outfd;
u32 ins_lines = 0;
u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0,
skip_intel = 0, skip_app = 0, instrument_next = 0;
u8* colon_pos;
/* __APPLE__ */
if (input_file) {
inf = fopen(input_file, "r");
if (!inf) PFATAL("Unable to read '%s'", input_file);
} else inf = stdin;
outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);
if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);
outf = fdopen(outfd, "w");
if (!outf) PFATAL("fdopen() failed");
while (fgets(line, MAX_LINE, inf)) {
/* In some cases, we want to defer writing the instrumentation trampoline
until after all the labels, macros, comments, etc. If we're in this
mode, and if the line starts with a tab followed by a character, dump
the trampoline now. */
if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
instrument_next && line[0] == '\t' && isalpha(line[1])) {
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE));
instrument_next = 0;
ins_lines++;
}
/* Output the actual line, call it a day in pass-thru mode. */
fputs(line, outf);
if (pass_thru) continue;
/* All right, this is where the actual fun begins. For one, we only want to
instrument the .text section. So, let's keep track of that in processed
files - and let's set instr_ok accordingly. */
if (line[0] == '\t' && line[1] == '.') {
/* OpenBSD puts jump tables directly inline with the code, which is
a bit annoying. They use a specific format of p2align directives
around them, so we use that as a signal. */
if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;
if (!strncmp(line + 2, "text\n", 5) ||
!strncmp(line + 2, "section\t.text", 13) ||
!strncmp(line + 2, "section\t__TEXT,__text", 21) ||
!strncmp(line + 2, "section __TEXT,__text", 21)) {
instr_ok = 1;
continue;
}
if (!strncmp(line + 2, "section\t", 8) ||
!strncmp(line + 2, "section ", 8) ||
!strncmp(line + 2, "bss\n", 4) ||
!strncmp(line + 2, "data\n", 5)) {
instr_ok = 0;
continue;
}
}
/* Detect off-flavor assembly (rare, happens in gdb). When this is
encountered, we set skip_csect until the opposite directive is
seen, and we do not instrument. */
if (strstr(line, ".code")) {
if (strstr(line, ".code32")) skip_csect = use_64bit;
if (strstr(line, ".code64")) skip_csect = !use_64bit;
}
/* Detect syntax changes, as could happen with hand-written assembly.
Skip Intel blocks, resume instrumentation when back to AT&T. */
if (strstr(line, ".intel_syntax")) skip_intel = 1;
if (strstr(line, ".att_syntax")) skip_intel = 0;
/* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
if (line[0] == '#' || line[1] == '#') {
if (strstr(line, "#APP")) skip_app = 1;
if (strstr(line, "#NO_APP")) skip_app = 0;
}
/* If we're in the right mood for instrumenting, check for function
names or conditional labels. This is a bit messy, but in essence,
we want to catch:
^main: - function entry point (always instrumented)
^.L0: - GCC branch label
^.LBB0_0: - clang branch label (but only in clang mode)
^\tjnz foo - conditional branches
...but not:
^# BB#0: - clang comments
^ # BB#0: - ditto
^.Ltmp0: - clang non-branch labels
^.LC0 - GCC non-branch labels
^.LBB0_0: - ditto (when in GCC mode)
^\tjmp foo - non-conditional jumps
Additionally, clang and GCC on MacOS X follow a different convention
with no leading dots on labels, hence the weird maze of #ifdefs
later on.
*/
if (skip_intel || skip_app || skip_csect || !instr_ok ||
line[0] == '#' || line[0] == ' ') continue;
/* Conditional branch instruction (jnz, etc). We append the instrumentation
right after the branch (to instrument the not-taken path) and at the
branch destination label (handled later on). */
if (line[0] == '\t') {
if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE));
ins_lines++;
}
continue;
}
/* Label of some sort. This may be a branch destination, but we need to
tread carefully and account for several different formatting
conventions. */
/* Apple: L<whatever><digit>: */
if ((colon_pos = strstr(line, ":"))) {
if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
/* Everybody else: .L<whatever>: */
if (strstr(line, ":")) {
if (line[0] == '.') {
/* __APPLE__ */
/* .L0: or LBB0_0: style jump destination */
/* Apple: L<num> / LBB<num> */
if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
&& R(100) < inst_ratio) {
/* Apple: .L<num> / .LBB<num> */
if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
&& R(100) < inst_ratio) {
/* __APPLE__ */
/* An optimization is possible here by adding the code only if the
label is mentioned in the code in contexts other than call / jmp.
That said, this complicates the code by requiring two-pass
processing (messy with stdin), and results in a speed gain
typically under 10%, because compilers are generally pretty good
about not generating spurious intra-function jumps.
We use deferred output chiefly to avoid disrupting
.Lfunc_begin0-style exception handling calculations (a problem on
MacOS X). */
if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;
}
} else {
/* Function label (always instrumented, deferred mode). */
instrument_next = 1;
}
}
}
if (ins_lines)
fputs(use_64bit ? main_payload_64 : main_payload_32, outf);
if (input_file) fclose(inf);
fclose(outf);
if (!be_quiet) {
if (!ins_lines) WARNF("No instrumentation targets found%s.",
pass_thru ? " (pass-thru mode)" : "");
else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
ins_lines, use_64bit ? "64" : "32",
getenv("AFL_HARDEN") ? "hardened" :
(sanitizer ? "ASAN/MSAN" : "non-hardened"),
inst_ratio);
}
}
/* Main entry point */
int main(int argc, char** argv) {
s32 pid;
u32 rand_seed;
int status;
u8* inst_ratio_str = getenv("AFL_INST_RATIO");
struct timeval tv;
struct timezone tz;
clang_mode = !!getenv(CLANG_ENV_VAR);
if (isatty(2) && !getenv("AFL_QUIET")) {
SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
} else be_quiet = 1;
if (argc < 2) {
SAYF("\n"
"This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n"
"executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n"
"don't want to run this program directly.\n\n"
"Rarely, when dealing with extremely complex projects, it may be advisable to\n"
"set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n"
"instrumenting every discovered branch.\n\n");
exit(1);
}
gettimeofday(&tv, &tz);
rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
srandom(rand_seed);
edit_params(argc, argv);
if (inst_ratio_str) {
if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100)
FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
}
if (getenv(AS_LOOP_ENV_VAR))
FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");
setenv(AS_LOOP_ENV_VAR, "1", 1);
/* When compiling with ASAN, we don't have a particularly elegant way to skip
ASAN-specific branches. But we can probabilistically compensate for
that... */
if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
sanitizer = 1;
inst_ratio /= 3;
}
if (!just_version) add_instrumentation();
if (!(pid = fork())) {
execvp(as_params[0], (char**)as_params);
FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
}
if (pid < 0) PFATAL("fork() failed");
if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);
exit(WEXITSTATUS(status));
}
xxxxxxxxxx
/*
Copyright 2013 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - injectable parts
-------------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
Forkserver design by Jann Horn <jannhorn@googlemail.com>
This file houses the assembly-level instrumentation injected into fuzzed
programs. The instrumentation stores XORed pairs of data: identifiers of the
currently executing branch and the one that executed immediately before.
TL;DR: the instrumentation does shm_trace_map[cur_loc ^ prev_loc]++
The code is designed for 32-bit and 64-bit x86 systems. Both modes should
work everywhere except for Apple systems. Apple does relocations differently
from everybody else, so since their OSes have been 64-bit for a longer while,
I didn't go through the mental effort of porting the 32-bit code.
In principle, similar code should be easy to inject into any well-behaved
binary-only code (e.g., using DynamoRIO). Conditional jumps offer natural
targets for instrumentation, and should offer comparable probe density.
*/
/*
------------------
Performances notes
------------------
Contributions to make this code faster are appreciated! Here are some
rough notes that may help with the task:
- Only the trampoline_fmt and the non-setup __afl_maybe_log code paths are
really worth optimizing; the setup / fork server stuff matters a lot less
and should be mostly just kept readable.
- We're aiming for modern CPUs with out-of-order execution and large
pipelines; the code is mostly follows intuitive, human-readable
instruction ordering, because "textbook" manual reorderings make no
substantial difference.
- Interestingly, instrumented execution isn't a lot faster if we store a
variable pointer to the setup, log, or return routine and then do a reg
call from within trampoline_fmt. It does speed up non-instrumented
execution quite a bit, though, since that path just becomes
push-call-ret-pop.
- There is also not a whole lot to be gained by doing SHM attach at a
fixed address instead of retrieving __afl_area_ptr. Although it allows us
to have a shorter log routine inserted for conditional jumps and jump
labels (for a ~10% perf gain), there is a risk of bumping into other
allocations created by the program or by tools such as ASAN.
- popf is *awfully* slow, which is why we're doing the lahf / sahf +
overflow test trick. Unfortunately, this forces us to taint eax / rax, but
this dependency on a commonly-used register still beats the alternative of
using pushf / popf.
One possible optimization is to avoid touching flags by using a circular
buffer that stores just a sequence of current locations, with the XOR stuff
happening offline. Alas, this doesn't seem to have a huge impact:
https://groups.google.com/d/msg/afl-users/MsajVf4fRLo/2u6t88ntUBIJ
- Preforking one child a bit sooner, and then waiting for the "go" command
from within the child, doesn't offer major performance gains; fork() seems
to be relatively inexpensive these days. Preforking multiple children does
help, but badly breaks the "~1 core per fuzzer" design, making it harder to
scale up. Maybe there is some middle ground.
Perhaps of note: in the 64-bit version for all platforms except for Apple,
the instrumentation is done slightly differently than on 32-bit, with
__afl_prev_loc and __afl_area_ptr being local to the object file (.lcomm),
rather than global (.comm). This is to avoid GOTRELPC lookups in the critical
code path, which AFAICT, are otherwise unavoidable if we want gcc -shared to
work; simple relocations between .bss and .text won't work on most 64-bit
platforms in such a case.
(Fun fact: on Apple systems, .lcomm can segfault the linker.)
The side effect is that state transitions are measured in a somewhat
different way, with previous tuple being recorded separately within the scope
of every .c file. This should have no impact in any practical sense.
Another side effect of this design is that getenv() will be called once per
every .o file when running in non-instrumented mode; and since getenv() tends
to be optimized in funny ways, we need to be very careful to save every
oddball register it may touch.
*/
static const u8* trampoline_fmt_32 =
"\n"
"/* --- AFL TRAMPOLINE (32-BIT) --- */\n"
"\n"
".align 4\n"
"\n"
"leal -16(%%esp), %%esp\n"
"movl %%edi, 0(%%esp)\n"
"movl %%edx, 4(%%esp)\n"
"movl %%ecx, 8(%%esp)\n"
"movl %%eax, 12(%%esp)\n"
"movl $0x%08x, %%ecx\n"
"call __afl_maybe_log\n"
"movl 12(%%esp), %%eax\n"
"movl 8(%%esp), %%ecx\n"
"movl 4(%%esp), %%edx\n"
"movl 0(%%esp), %%edi\n"
"leal 16(%%esp), %%esp\n"
"\n"
"/* --- END --- */\n"
"\n";
static const u8* trampoline_fmt_64 =
"\n"
"/* --- AFL TRAMPOLINE (64-BIT) --- */\n"
"\n"
".align 4\n"
"\n"
"leaq -(128+24)(%%rsp), %%rsp\n"
"movq %%rdx, 0(%%rsp)\n"
"movq %%rcx, 8(%%rsp)\n"
"movq %%rax, 16(%%rsp)\n"
"movq $0x%08x, %%rcx\n"
"call __afl_maybe_log\n"
"movq 16(%%rsp), %%rax\n"
"movq 8(%%rsp), %%rcx\n"
"movq 0(%%rsp), %%rdx\n"
"leaq (128+24)(%%rsp), %%rsp\n"
"\n"
"/* --- END --- */\n"
"\n";
static const u8* main_payload_32 =
"\n"
"/* --- AFL MAIN PAYLOAD (32-BIT) --- */\n"
"\n"
".text\n"
".att_syntax\n"
".code32\n"
".align 8\n"
"\n"
"__afl_maybe_log:\n"
"\n"
" lahf\n"
" seto %al\n"
"\n"
" /* Check if SHM region is already mapped. */\n"
"\n"
" movl __afl_area_ptr, %edx\n"
" testl %edx, %edx\n"
" je __afl_setup\n"
"\n"
"__afl_store:\n"
"\n"
" /* Calculate and store hit for the code location specified in ecx. There\n"
" is a double-XOR way of doing this without tainting another register,\n"
" and we use it on 64-bit systems; but it's slower for 32-bit ones. */\n"
"\n"
" movl __afl_prev_loc, %edi\n"
" xorl %ecx, %edi\n"
" shrl $1, %ecx\n"
" movl %ecx, __afl_prev_loc\n"
" movl %ecx, %edi\n"
/* ^!COVERAGE_ONLY */
"\n"
" orb $1, (%edx, %edi, 1)\n"
" incb (%edx, %edi, 1)\n"
/* ^SKIP_COUNTS */
"\n"
"__afl_return:\n"
"\n"
" addb $127, %al\n"
" sahf\n"
" ret\n"
"\n"
".align 8\n"
"\n"
"__afl_setup:\n"
"\n"
" /* Do not retry setup if we had previous failures. */\n"
"\n"
" cmpb $0, __afl_setup_failure\n"
" jne __afl_return\n"
"\n"
" /* Map SHM, jumping to __afl_setup_abort if something goes wrong.\n"
" We do not save FPU/MMX/SSE registers here, but hopefully, nobody\n"
" will notice this early in the game. */\n"
"\n"
" pushl %eax\n"
" pushl %ecx\n"
"\n"
" pushl $.AFL_SHM_ENV\n"
" call getenv\n"
" addl $4, %esp\n"
"\n"
" testl %eax, %eax\n"
" je __afl_setup_abort\n"
"\n"
" pushl %eax\n"
" call atoi\n"
" addl $4, %esp\n"
"\n"
" pushl $0 /* shmat flags */\n"
" pushl $0 /* requested addr */\n"
" pushl %eax /* SHM ID */\n"
" call shmat\n"
" addl $12, %esp\n"
"\n"
" cmpl $-1, %eax\n"
" je __afl_setup_abort\n"
"\n"
" /* Store the address of the SHM region. */\n"
"\n"
" movl %eax, __afl_area_ptr\n"
" movl %eax, %edx\n"
"\n"
" popl %ecx\n"
" popl %eax\n"
"\n"
"__afl_forkserver:\n"
"\n"
" /* Enter the fork server mode to avoid the overhead of execve() calls. */\n"
"\n"
" pushl %eax\n"
" pushl %ecx\n"
" pushl %edx\n"
"\n"
" /* Phone home and tell the parent that we're OK. (Note that signals with\n"
" no SA_RESTART will mess it up). If this fails, assume that the fd is\n"
" closed because we were execve()d from an instrumented binary, or because\n"
" the parent doesn't want to use the fork server. */\n"
"\n"
" pushl $4 /* length */\n"
" pushl $__afl_temp /* data */\n"
" pushl $" STRINGIFY((FORKSRV_FD + 1)) " /* file desc */\n"
" call write\n"
" addl $12, %esp\n"
"\n"
" cmpl $4, %eax\n"
" jne __afl_fork_resume\n"
"\n"
"__afl_fork_wait_loop:\n"
"\n"
" /* Wait for parent by reading from the pipe. Abort if read fails. */\n"
"\n"
" pushl $4 /* length */\n"
" pushl $__afl_temp /* data */\n"
" pushl $" STRINGIFY(FORKSRV_FD) " /* file desc */\n"
" call read\n"
" addl $12, %esp\n"
"\n"
" cmpl $4, %eax\n"
" jne __afl_die\n"
"\n"
" /* Once woken up, create a clone of our process. This is an excellent use\n"
" case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n"
" caches getpid() results and offers no way to update the value, breaking\n"
" abort(), raise(), and a bunch of other things :-( */\n"
"\n"
" call fork\n"
"\n"
" cmpl $0, %eax\n"
" jl __afl_die\n"
" je __afl_fork_resume\n"
"\n"
" /* In parent process: write PID to pipe, then wait for child. */\n"
"\n"
" movl %eax, __afl_fork_pid\n"
"\n"
" pushl $4 /* length */\n"
" pushl $__afl_fork_pid /* data */\n"
" pushl $" STRINGIFY((FORKSRV_FD + 1)) " /* file desc */\n"
" call write\n"
" addl $12, %esp\n"
"\n"
" pushl $0 /* no flags */\n"
" pushl $__afl_temp /* status */\n"
" pushl __afl_fork_pid /* PID */\n"
" call waitpid\n"
" addl $12, %esp\n"
"\n"
" cmpl $0, %eax\n"
" jle __afl_die\n"
"\n"
" /* Relay wait status to pipe, then loop back. */\n"
"\n"
" pushl $4 /* length */\n"
" pushl $__afl_temp /* data */\n"
" pushl $" STRINGIFY((FORKSRV_FD + 1)) " /* file desc */\n"
" call write\n"
" addl $12, %esp\n"
"\n"
" jmp __afl_fork_wait_loop\n"
"\n"
"__afl_fork_resume:\n"
"\n"
" /* In child process: close fds, resume execution. */\n"
"\n"
" pushl $" STRINGIFY(FORKSRV_FD) "\n"
" call close\n"
"\n"
" pushl $" STRINGIFY((FORKSRV_FD + 1)) "\n"
" call close\n"
"\n"
" addl $8, %esp\n"
"\n"
" popl %edx\n"
" popl %ecx\n"
" popl %eax\n"
" jmp __afl_store\n"
"\n"
"__afl_die:\n"
"\n"
" xorl %eax, %eax\n"
" call _exit\n"
"\n"
"__afl_setup_abort:\n"
"\n"
" /* Record setup failure so that we don't keep calling\n"
" shmget() / shmat() over and over again. */\n"
"\n"
" incb __afl_setup_failure\n"
" popl %ecx\n"
" popl %eax\n"
" jmp __afl_return\n"
"\n"
".AFL_VARS:\n"
"\n"
" .comm __afl_area_ptr, 4, 32\n"
" .comm __afl_setup_failure, 1, 32\n"
" .comm __afl_prev_loc, 4, 32\n"
/* !COVERAGE_ONLY */
" .comm __afl_fork_pid, 4, 32\n"
" .comm __afl_temp, 4, 32\n"
"\n"
".AFL_SHM_ENV:\n"
" .asciz \"" SHM_ENV_VAR "\"\n"
"\n"
"/* --- END --- */\n"
"\n";
/* The OpenBSD hack is due to lahf and sahf not being recognized by some
versions of binutils: http://marc.info/?l=openbsd-cvs&m=141636589924400
The Apple code is a bit different when calling libc functions because
they are doing relocations differently from everybody else. We also need
to work around the crash issue with .lcomm and the fact that they don't
recognize .string. */
/* ^__APPLE__ */
static const u8* main_payload_64 =
"\n"
"/* --- AFL MAIN PAYLOAD (64-BIT) --- */\n"
"\n"
".text\n"
".att_syntax\n"
".code64\n"
".align 8\n"
"\n"
"__afl_maybe_log:\n"
"\n"
" .byte 0x9f /* lahf */\n"
" lahf\n"
/* ^__OpenBSD__, etc */
" seto %al\n"
"\n"
" /* Check if SHM region is already mapped. */\n"
"\n"
" movq __afl_area_ptr(%rip), %rdx\n"
" testq %rdx, %rdx\n"
" je __afl_setup\n"
"\n"
"__afl_store:\n"
"\n"
" /* Calculate and store hit for the code location specified in rcx. */\n"
"\n"
" xorq __afl_prev_loc(%rip), %rcx\n"
" xorq %rcx, __afl_prev_loc(%rip)\n"
" shrq $1, __afl_prev_loc(%rip)\n"
/* ^!COVERAGE_ONLY */
"\n"
" orb $1, (%rdx, %rcx, 1)\n"
" incb (%rdx, %rcx, 1)\n"
/* ^SKIP_COUNTS */
"\n"
"__afl_return:\n"
"\n"
" addb $127, %al\n"
" .byte 0x9e /* sahf */\n"
" sahf\n"
/* ^__OpenBSD__, etc */
" ret\n"
"\n"
".align 8\n"
"\n"
"__afl_setup:\n"
"\n"
" /* Do not retry setup if we had previous failures. */\n"
"\n"
" cmpb $0, __afl_setup_failure(%rip)\n"
" jne __afl_return\n"
"\n"
" /* Check out if we have a global pointer on file. */\n"
"\n"
" movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n"
" movq (%rdx), %rdx\n"
" movq __afl_global_area_ptr(%rip), %rdx\n"
/* !^__APPLE__ */
" testq %rdx, %rdx\n"
" je __afl_setup_first\n"
"\n"
" movq %rdx, __afl_area_ptr(%rip)\n"
" jmp __afl_store\n"
"\n"
"__afl_setup_first:\n"
"\n"
" /* Save everything that is not yet saved and that may be touched by\n"
" getenv() and several other libcalls we'll be relying on. */\n"
"\n"
" leaq -352(%rsp), %rsp\n"
"\n"
" movq %rax, 0(%rsp)\n"
" movq %rcx, 8(%rsp)\n"
" movq %rdi, 16(%rsp)\n"
" movq %rsi, 32(%rsp)\n"
" movq %r8, 40(%rsp)\n"
" movq %r9, 48(%rsp)\n"
" movq %r10, 56(%rsp)\n"
" movq %r11, 64(%rsp)\n"
"\n"
" movq %xmm0, 96(%rsp)\n"
" movq %xmm1, 112(%rsp)\n"
" movq %xmm2, 128(%rsp)\n"
" movq %xmm3, 144(%rsp)\n"
" movq %xmm4, 160(%rsp)\n"
" movq %xmm5, 176(%rsp)\n"
" movq %xmm6, 192(%rsp)\n"
" movq %xmm7, 208(%rsp)\n"
" movq %xmm8, 224(%rsp)\n"
" movq %xmm9, 240(%rsp)\n"
" movq %xmm10, 256(%rsp)\n"
" movq %xmm11, 272(%rsp)\n"
" movq %xmm12, 288(%rsp)\n"
" movq %xmm13, 304(%rsp)\n"
" movq %xmm14, 320(%rsp)\n"
" movq %xmm15, 336(%rsp)\n"
"\n"
" /* Map SHM, jumping to __afl_setup_abort if something goes wrong. */\n"
"\n"
" /* The 64-bit ABI requires 16-byte stack alignment. We'll keep the\n"
" original stack ptr in the callee-saved r12. */\n"
"\n"
" pushq %r12\n"
" movq %rsp, %r12\n"
" subq $16, %rsp\n"
" andq $0xfffffffffffffff0, %rsp\n"
"\n"
" leaq .AFL_SHM_ENV(%rip), %rdi\n"
CALL_L64("getenv")
"\n"
" testq %rax, %rax\n"
" je __afl_setup_abort\n"
"\n"
" movq %rax, %rdi\n"
CALL_L64("atoi")
"\n"
" xorq %rdx, %rdx /* shmat flags */\n"
" xorq %rsi, %rsi /* requested addr */\n"
" movq %rax, %rdi /* SHM ID */\n"
CALL_L64("shmat")
"\n"
" cmpq $-1, %rax\n"
" je __afl_setup_abort\n"
"\n"
" /* Store the address of the SHM region. */\n"
"\n"
" movq %rax, %rdx\n"
" movq %rax, __afl_area_ptr(%rip)\n"
"\n"
" movq %rax, __afl_global_area_ptr(%rip)\n"
" movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n"
" movq %rax, (%rdx)\n"
/* ^__APPLE__ */
" movq %rax, %rdx\n"
"\n"
"__afl_forkserver:\n"
"\n"
" /* Enter the fork server mode to avoid the overhead of execve() calls. We\n"
" push rdx (area ptr) twice to keep stack alignment neat. */\n"
"\n"
" pushq %rdx\n"
" pushq %rdx\n"
"\n"
" /* Phone home and tell the parent that we're OK. (Note that signals with\n"
" no SA_RESTART will mess it up). If this fails, assume that the fd is\n"
" closed because we were execve()d from an instrumented binary, or because\n"
" the parent doesn't want to use the fork server. */\n"
"\n"
" movq $4, %rdx /* length */\n"
" leaq __afl_temp(%rip), %rsi /* data */\n"
" movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi /* file desc */\n"
CALL_L64("write")
"\n"
" cmpq $4, %rax\n"
" jne __afl_fork_resume\n"
"\n"
"__afl_fork_wait_loop:\n"
"\n"
" /* Wait for parent by reading from the pipe. Abort if read fails. */\n"
"\n"
" movq $4, %rdx /* length */\n"
" leaq __afl_temp(%rip), %rsi /* data */\n"
" movq $" STRINGIFY(FORKSRV_FD) ", %rdi /* file desc */\n"
CALL_L64("read")
" cmpq $4, %rax\n"
" jne __afl_die\n"
"\n"
" /* Once woken up, create a clone of our process. This is an excellent use\n"
" case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n"
" caches getpid() results and offers no way to update the value, breaking\n"
" abort(), raise(), and a bunch of other things :-( */\n"
"\n"
CALL_L64("fork")
" cmpq $0, %rax\n"
" jl __afl_die\n"
" je __afl_fork_resume\n"
"\n"
" /* In parent process: write PID to pipe, then wait for child. */\n"
"\n"
" movl %eax, __afl_fork_pid(%rip)\n"
"\n"
" movq $4, %rdx /* length */\n"
" leaq __afl_fork_pid(%rip), %rsi /* data */\n"
" movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi /* file desc */\n"
CALL_L64("write")
"\n"
" movq $0, %rdx /* no flags */\n"
" leaq __afl_temp(%rip), %rsi /* status */\n"
" movq __afl_fork_pid(%rip), %rdi /* PID */\n"
CALL_L64("waitpid")
" cmpq $0, %rax\n"
" jle __afl_die\n"
"\n"
" /* Relay wait status to pipe, then loop back. */\n"
"\n"
" movq $4, %rdx /* length */\n"
" leaq __afl_temp(%rip), %rsi /* data */\n"
" movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi /* file desc */\n"
CALL_L64("write")
"\n"
" jmp __afl_fork_wait_loop\n"
"\n"
"__afl_fork_resume:\n"
"\n"
" /* In child process: close fds, resume execution. */\n"
"\n"
" movq $" STRINGIFY(FORKSRV_FD) ", %rdi\n"
CALL_L64("close")
"\n"
" movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi\n"
CALL_L64("close")
"\n"
" popq %rdx\n"
" popq %rdx\n"
"\n"
" movq %r12, %rsp\n"
" popq %r12\n"
"\n"
" movq 0(%rsp), %rax\n"
" movq 8(%rsp), %rcx\n"
" movq 16(%rsp), %rdi\n"
" movq 32(%rsp), %rsi\n"
" movq 40(%rsp), %r8\n"
" movq 48(%rsp), %r9\n"
" movq 56(%rsp), %r10\n"
" movq 64(%rsp), %r11\n"
"\n"
" movq 96(%rsp), %xmm0\n"
" movq 112(%rsp), %xmm1\n"
" movq 128(%rsp), %xmm2\n"
" movq 144(%rsp), %xmm3\n"
" movq 160(%rsp), %xmm4\n"
" movq 176(%rsp), %xmm5\n"
" movq 192(%rsp), %xmm6\n"
" movq 208(%rsp), %xmm7\n"
" movq 224(%rsp), %xmm8\n"
" movq 240(%rsp), %xmm9\n"
" movq 256(%rsp), %xmm10\n"
" movq 272(%rsp), %xmm11\n"
" movq 288(%rsp), %xmm12\n"
" movq 304(%rsp), %xmm13\n"
" movq 320(%rsp), %xmm14\n"
" movq 336(%rsp), %xmm15\n"
"\n"
" leaq 352(%rsp), %rsp\n"
"\n"
" jmp __afl_store\n"
"\n"
"__afl_die:\n"
"\n"
" xorq %rax, %rax\n"
CALL_L64("_exit")
"\n"
"__afl_setup_abort:\n"
"\n"
" /* Record setup failure so that we don't keep calling\n"
" shmget() / shmat() over and over again. */\n"
"\n"
" incb __afl_setup_failure(%rip)\n"
"\n"
" movq %r12, %rsp\n"
" popq %r12\n"
"\n"
" movq 0(%rsp), %rax\n"
" movq 8(%rsp), %rcx\n"
" movq 16(%rsp), %rdi\n"
" movq 32(%rsp), %rsi\n"
" movq 40(%rsp), %r8\n"
" movq 48(%rsp), %r9\n"
" movq 56(%rsp), %r10\n"
" movq 64(%rsp), %r11\n"
"\n"
" movq 96(%rsp), %xmm0\n"
" movq 112(%rsp), %xmm1\n"
" movq 128(%rsp), %xmm2\n"
" movq 144(%rsp), %xmm3\n"
" movq 160(%rsp), %xmm4\n"
" movq 176(%rsp), %xmm5\n"
" movq 192(%rsp), %xmm6\n"
" movq 208(%rsp), %xmm7\n"
" movq 224(%rsp), %xmm8\n"
" movq 240(%rsp), %xmm9\n"
" movq 256(%rsp), %xmm10\n"
" movq 272(%rsp), %xmm11\n"
" movq 288(%rsp), %xmm12\n"
" movq 304(%rsp), %xmm13\n"
" movq 320(%rsp), %xmm14\n"
" movq 336(%rsp), %xmm15\n"
"\n"
" leaq 352(%rsp), %rsp\n"
"\n"
" jmp __afl_return\n"
"\n"
".AFL_VARS:\n"
"\n"
" .comm __afl_area_ptr, 8\n"
" .comm __afl_prev_loc, 8\n"
/* !COVERAGE_ONLY */
" .comm __afl_fork_pid, 4\n"
" .comm __afl_temp, 4\n"
" .comm __afl_setup_failure, 1\n"
" .lcomm __afl_area_ptr, 8\n"
" .lcomm __afl_prev_loc, 8\n"
/* !COVERAGE_ONLY */
" .lcomm __afl_fork_pid, 4\n"
" .lcomm __afl_temp, 4\n"
" .lcomm __afl_setup_failure, 1\n"
/* ^__APPLE__ */
" .comm __afl_global_area_ptr, 8, 8\n"
"\n"
".AFL_SHM_ENV:\n"
" .asciz \"" SHM_ENV_VAR "\"\n"
"\n"
"/* --- END --- */\n"
"\n";
/* !_HAVE_AFL_AS_H */
xxxxxxxxxx
#
# american fuzzy lop - corpus minimization tool
# ---------------------------------------------
#
# Written and maintained by Michal Zalewski <lcamtuf@google.com>
#
# Copyright 2014, 2015 Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# This tool tries to find the smallest subset of files in the input directory
# that still trigger the full range of instrumentation data points seen in
# the starting corpus. This has two uses:
#
# - Screening large corpora of input files before using them as a seed for
# afl-fuzz. The tool will remove functionally redundant files and likely
# leave you with a much smaller set.
#
# (In this case, you probably also want to consider running afl-tmin on
# the individual files later on to reduce their size.)
#
# - Minimizing the corpus generated organically by afl-fuzz, perhaps when
# planning to feed it to more resource-intensive tools. The tool achieves
# this by removing all entries that used to trigger unique behaviors in the
# past, but have been made obsolete by later finds.
#
# Note that the tool doesn't modify the files themselves. For that, you want
# afl-tmin.
#
# This script must use bash because other shells may have hardcoded limits on
# array sizes.
#
echo "corpus minimization tool for afl-fuzz by <lcamtuf@google.com>"
echo
#########
# SETUP #
#########
# Process command-line options...
MEM_LIMIT=100
TIMEOUT=none
unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE
while getopts "+i:o:f:m:t:eQC" opt; do
case "$opt" in
"i")
IN_DIR="$OPTARG"
;;
"o")
OUT_DIR="$OPTARG"
;;
"f")
STDIN_FILE="$OPTARG"
;;
"m")
MEM_LIMIT="$OPTARG"
MEM_LIMIT_GIVEN=1
;;
"t")
TIMEOUT="$OPTARG"
;;
"e")
EXTRA_PAR="$EXTRA_PAR -e"
;;
"C")
export AFL_CMIN_CRASHES_ONLY=1
;;
"Q")
EXTRA_PAR="$EXTRA_PAR -Q"
test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
QEMU_MODE=1
;;
"?")
exit 1
;;
esac
done
shift $((OPTIND-1))
TARGET_BIN="$1"
if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then
cat 1>&2 <<_EOF_
Usage: $0 [ options ] -- /path/to/target_app [ ... ]
Required parameters:
-i dir - input directory with the starting corpus
-o dir - output directory for minimized files
Execution control settings:
-f file - location read by the fuzzed program (stdin)
-m megs - memory limit for child process ($MEM_LIMIT MB)
-t msec - run time limit for child process (none)
-Q - use binary-only instrumentation (QEMU mode)
Minimization settings:
-C - keep crashing inputs, reject everything else
-e - solve for edge coverage only, ignore hit counts
For additional tips, please consult docs/README.
_EOF_
exit 1
fi
# Do a sanity check to discourage the use of /tmp, since we can't really
# handle this safely from a shell script.
if [ "$AFL_ALLOW_TMP" = "" ]; then
echo "$IN_DIR" | grep -qE '^(/var)?/tmp/'
T1="$?"
echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/'
T2="$?"
echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/'
T3="$?"
echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/'
T4="$?"
echo "$PWD" | grep -qE '^(/var)?/tmp/'
T5="$?"
if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then
echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2
exit 1
fi
fi
# If @@ is specified, but there's no -f, let's come up with a temporary input
# file name.
TRACE_DIR="$OUT_DIR/.traces"
if [ "$STDIN_FILE" = "" ]; then
if echo "$*" | grep -qF '@@'; then
STDIN_FILE="$TRACE_DIR/.cur_input"
fi
fi
# Check for obvious errors.
if [ ! "$MEM_LIMIT" = "none" ]; then
if [ "$MEM_LIMIT" -lt "5" ]; then
echo "[-] Error: dangerously low memory limit." 1>&2
exit 1
fi
fi
if [ ! "$TIMEOUT" = "none" ]; then
if [ "$TIMEOUT" -lt "10" ]; then
echo "[-] Error: dangerously low timeout." 1>&2
exit 1
fi
fi
if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then
TNEW="`which "$TARGET_BIN" 2>/dev/null`"
if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then
echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2
exit 1
fi
TARGET_BIN="$TNEW"
fi
if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" ]; then
if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then
echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2
exit 1
fi
fi
if [ ! -d "$IN_DIR" ]; then
echo "[-] Error: directory '$IN_DIR' not found." 1>&2
exit 1
fi
test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue"
find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null
rm -rf "$TRACE_DIR" 2>/dev/null
rmdir "$OUT_DIR" 2>/dev/null
if [ -d "$OUT_DIR" ]; then
echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2
exit 1
fi
mkdir -m 700 -p "$TRACE_DIR" || exit 1
if [ ! "$STDIN_FILE" = "" ]; then
rm -f "$STDIN_FILE" || exit 1
touch "$STDIN_FILE" || exit 1
fi
if [ "$AFL_PATH" = "" ]; then
SHOWMAP="${0%/afl-cmin}/afl-showmap"
else
SHOWMAP="$AFL_PATH/afl-showmap"
fi
if [ ! -x "$SHOWMAP" ]; then
echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2
rm -rf "$TRACE_DIR"
exit 1
fi
IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`))
if [ "$IN_COUNT" = "0" ]; then
echo "[+] Hmm, no inputs in the target directory. Nothing to be done."
rm -rf "$TRACE_DIR"
exit 1
fi
FIRST_FILE=`ls "$IN_DIR" | head -1`
# Make sure that we're not dealing with a directory.
if [ -d "$IN_DIR/$FIRST_FILE" ]; then
echo "[-] Error: The target directory contains subdirectories - please fix." 1>&2
rm -rf "$TRACE_DIR"
exit 1
fi
# Check for the more efficient way to copy files...
if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then
CP_TOOL=ln
else
CP_TOOL=cp
fi
# Make sure that we can actually get anything out of afl-showmap before we
# waste too much time.
echo "[*] Testing the target binary..."
if [ "$STDIN_FILE" = "" ]; then
AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE"
else
cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE"
AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
fi
FIRST_COUNT=$((`grep -c . "$TRACE_DIR/.run_test"`))
if [ "$FIRST_COUNT" -gt "0" ]; then
echo "[+] OK, $FIRST_COUNT tuples recorded."
else
echo "[-] Error: no instrumentation output detected (perhaps crash or timeout)." 1>&2
test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
exit 1
fi
# Let's roll!
#############################
# STEP 1: COLLECTING TRACES #
#############################
echo "[*] Obtaining traces for input files in '$IN_DIR'..."
(
CUR=0
if [ "$STDIN_FILE" = "" ]; then
while read -r fn; do
CUR=$((CUR+1))
printf "\\r Processing file $CUR/$IN_COUNT... "
"$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn"
done < <(ls "$IN_DIR")
else
while read -r fn; do
CUR=$((CUR+1))
printf "\\r Processing file $CUR/$IN_COUNT... "
cp "$IN_DIR/$fn" "$STDIN_FILE"
"$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
done < <(ls "$IN_DIR")
fi
)
echo
##########################
# STEP 2: SORTING TUPLES #
##########################
# With this out of the way, we sort all tuples by popularity across all
# datasets. The reasoning here is that we won't be able to avoid the files
# that trigger unique tuples anyway, so we will want to start with them and
# see what's left.
echo "[*] Sorting trace sets (this may take a while)..."
ls "$IN_DIR" | sed "s#^#$TRACE_DIR/#" | tr '\n' '\0' | xargs -0 -n 1 cat | \
sort | uniq -c | sort -n >"$TRACE_DIR/.all_uniq"
TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`))
echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files."
#####################################
# STEP 3: SELECTING CANDIDATE FILES #
#####################################
# The next step is to find the best candidate for each tuple. The "best"
# part is understood simply as the smallest input that includes a particular
# tuple in its trace. Empirical evidence suggests that this produces smaller
# datasets than more involved algorithms that could be still pulled off in
# a shell script.
echo "[*] Finding best candidates for each tuple..."
CUR=0
while read -r fn; do
CUR=$((CUR+1))
printf "\\r Processing file $CUR/$IN_COUNT... "
sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list"
done < <(ls -rS "$IN_DIR")
echo
##############################
# STEP 4: LOADING CANDIDATES #
##############################
# At this point, we have a file of tuple-file pairs, sorted by file size
# in ascending order (as a consequence of ls -rS). By doing sort keyed
# only by tuple (-k 1,1) and configured to output only the first line for
# every key (-s -u), we end up with the smallest file for each tuple.
echo "[*] Sorting candidate list (be patient)..."
sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \
sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script"
if [ ! -s "$TRACE_DIR/.candidate_script" ]; then
echo "[-] Error: no traces obtained from test cases, check syntax!" 1>&2
test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
exit 1
fi
# The sed command converted the sorted list to a shell script that populates
# BEST_FILE[tuple]="fname". Let's load that!
. "$TRACE_DIR/.candidate_script"
##########################
# STEP 5: WRITING OUTPUT #
##########################
# The final trick is to grab the top pick for each tuple, unless said tuple is
# already set due to the inclusion of an earlier candidate; and then put all
# tuples associated with the newly-added file to the "already have" list. The
# loop works from least popular tuples and toward the most common ones.
echo "[*] Processing candidates and writing output files..."
CUR=0
touch "$TRACE_DIR/.already_have"
while read -r cnt tuple; do
CUR=$((CUR+1))
printf "\\r Processing tuple $CUR/$TUPLE_COUNT... "
# If we already have this tuple, skip it.
grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue
FN=${BEST_FILE[tuple]}
$CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN"
if [ "$((CUR % 5))" = "0" ]; then
sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp"
mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have"
else
cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have"
fi
done <"$TRACE_DIR/.all_uniq"
echo
OUT_COUNT=`ls -- "$OUT_DIR" | wc -l`
if [ "$OUT_COUNT" = "1" ]; then
echo "[!] WARNING: All test cases had the same traces, check syntax!"
fi
echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'."
echo
test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
exit 0
xxxxxxxxxx
/*
Copyright 2013 Google LLC All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
american fuzzy lop - fuzzer code
--------------------------------
Written and maintained by Michal Zalewski <lcamtuf@google.com>
Forkserver design by Jann Horn <jannhorn@googlemail.com>
This is the real deal: the program takes an instrumented binary and
attempts a variety of basic fuzzing tricks, paying close attention to
how they affect the execution path.
*/
/* __APPLE__ || __FreeBSD__ || __OpenBSD__ */
/* For systems that have sched_setaffinity; right now just Linux, but one
can hope... */
/* __linux__ */
/* A toggle to export some variables when building as a library. Not very
useful for the general public. */
/* ^AFL_LIB */
/* Lots of globals, but mostly for the status UI and other things where it
really makes no sense to haul them around as function parameters. */
EXP_ST u8 *in_dir, /* Input directory with test cases */
*out_file, /* File to fuzz, if any */
*out_dir, /* Working & output directory */
*sync_dir, /* Synchronization directory */
*sync_id, /* Fuzzer ID */
*use_banner, /* Display banner */
*in_bitmap, /* Input bitmap */
*doc_path, /* Path to documentation dir */
*target_path, /* Path to target binary */
*orig_cmdline; /* Original command line */
EXP_ST u32 exec_tmout = EXEC_TIMEOUT; /* Configurable exec timeout (ms) */
static u32 hang_tmout = EXEC_TIMEOUT; /* Timeout used for hang det (ms) */
EXP_ST u64 mem_limit = MEM_LIMIT; /* Memory cap for child (MB) */
EXP_ST u32 cpu_to_bind = 0; /* id of free CPU core to bind */
static u32 stats_update_freq = 1; /* Stats update frequency (execs) */
EXP_ST u8 skip_deterministic, /* Skip deterministic stages? */
force_deterministic, /* Force deterministic stages? */
use_splicing, /* Recombine input files? */
dumb_mode, /* Run in non-instrumented mode? */
score_changed, /* Scoring for favorites changed? */
kill_signal, /* Signal that killed the child */
resuming_fuzz, /* Resuming an older fuzzing job? */
timeout_given, /* Specific timeout given? */
cpu_to_bind_given, /* Specified cpu_to_bind given? */
not_on_tty, /* stdout is not a tty */
term_too_small, /* terminal dimensions too small */
uses_asan, /* Target uses ASAN? */
no_forkserver, /* Disable forkserver? */
crash_mode, /* Crash mode! Yeah! */
in_place_resume, /* Attempt in-place resume? */
auto_changed, /* Auto-generated tokens changed? */
no_cpu_meter_red, /* Feng shui on the status screen */
no_arith, /* Skip most arithmetic ops */
shuffle_queue, /* Shuffle input queue? */
bitmap_changed = 1, /* Time to update bitmap? */
qemu_mode, /* Running in QEMU mode? */
skip_requested, /* Skip request, via SIGUSR1 */
run_over10m, /* Run time over 10 minutes? */
persistent_mode, /* Running in persistent mode? */
deferred_mode, /* Deferred forkserver mode? */
fast_cal; /* Try to calibrate faster? */
static s32 out_fd, /* Persistent fd for out_file */
dev_urandom_fd = -1, /* Persistent fd for /dev/urandom */
dev_null_fd = -1, /* Persistent fd for /dev/null */
fsrv_ctl_fd, /* Fork server control pipe (write) */
fsrv_st_fd; /* Fork server status pipe (read) */
static s32 forksrv_pid, /* PID of the fork server */
child_pid = -1, /* PID of the fuzzed program */
out_dir_fd = -1; /* FD of the lock file */
EXP_ST u8* trace_bits; /* SHM with instrumentation bitmap */
EXP_ST u8 virgin_bits[MAP_SIZE], /* Regions yet untouched by fuzzing */
virgin_tmout[MAP_SIZE], /* Bits we haven't seen in tmouts */
virgin_crash[MAP_SIZE]; /* Bits we haven't seen in crashes */
static u8 var_bytes[MAP_SIZE]; /* Bytes that appear to be variable */
static s32 shm_id; /* ID of the SHM region */
static volatile u8 stop_soon, /* Ctrl-C pressed? */
clear_screen = 1, /* Window resized? */
child_timed_out; /* Traced process timed out? */
EXP_ST u32 queued_paths, /* Total number of queued testcases */
queued_variable, /* Testcases with variable behavior */
queued_at_start, /* Total number of initial inputs */
queued_discovered, /* Items discovered during this run */
queued_imported, /* Items imported via -S */
queued_favored, /* Paths deemed favorable */
queued_with_cov, /* Paths with new coverage bytes */
pending_not_fuzzed, /* Queued but not done yet */
pending_favored, /* Pending favored paths */
cur_skipped_paths, /* Abandoned inputs in cur cycle */
cur_depth, /* Current path depth */
max_depth, /* Max path depth */
useless_at_start, /* Number of useless starting paths */
var_byte_count, /* Bitmap bytes with var behavior */
current_entry, /* Current queue entry ID */
havoc_div = 1; /* Cycle count divisor for havoc */
EXP_ST u64 total_crashes, /* Total number of crashes */
unique_crashes, /* Crashes with unique signatures */
total_tmouts, /* Total number of timeouts */
unique_tmouts, /* Timeouts with unique signatures */
unique_hangs, /* Hangs with unique signatures */
total_execs, /* Total execve() calls */
slowest_exec_ms, /* Slowest testcase non hang in ms */
start_time, /* Unix start time (ms) */
last_path_time, /* Time for most recent path (ms) */
last_crash_time, /* Time for most recent crash (ms) */
last_hang_time, /* Time for most recent hang (ms) */
last_crash_execs, /* Exec counter at last crash */
queue_cycle, /* Queue round counter */
cycles_wo_finds, /* Cycles without any new paths */
trim_execs, /* Execs done to trim input files */
bytes_trim_in, /* Bytes coming into the trimmer */
bytes_trim_out, /* Bytes coming outa the trimmer */
blocks_eff_total, /* Blocks subject to effector maps */
blocks_eff_select; /* Blocks selected as fuzzable */
static u32 subseq_tmouts; /* Number of timeouts in a row */
static u8 *stage_name = "init", /* Name of the current fuzz stage */
*stage_short, /* Short stage name */
*syncing_party; /* Currently syncing with... */
static s32 stage_cur, stage_max; /* Stage progression */
static s32 splicing_with = -1; /* Splicing with which test case? */
static u32 master_id, master_max; /* Master instance job splitting */
static u32 syncing_case; /* Syncing with case #... */
static s32 stage_cur_byte, /* Byte offset of current stage op */
stage_cur_val; /* Value used for stage op */
static u8 stage_val_type; /* Value type (STAGE_VAL_*) */
static u64 stage_finds[32], /* Patterns found per fuzz stage */
stage_cycles[32]; /* Execs per fuzz stage */
static u32 rand_cnt; /* Random number counter */
static u64 total_cal_us, /* Total calibration time (us) */
total_cal_cycles; /* Total calibration cycles */
static u64 total_bitmap_size, /* Total bit count for all bitmaps */
total_bitmap_entries; /* Number of bitmaps counted */
static s32 cpu_core_count; /* CPU core count */
static s32 cpu_aff = -1; /* Selected CPU core */
/* HAVE_AFFINITY */
static FILE* plot_file; /* Gnuplot output file */
struct queue_entry {
u8* fname; /* File name for the test case */
u32 len; /* Input length */
u8 cal_failed, /* Calibration failed? */
trim_done, /* Trimmed? */
was_fuzzed, /* Had any fuzzing done yet? */
passed_det, /* Deterministic stages passed? */
has_new_cov, /* Triggers new coverage? */
var_behavior, /* Variable behavior? */
favored, /* Currently favored? */
fs_redundant; /* Marked as redundant in the fs? */
u32 bitmap_size, /* Number of bits set in bitmap */
exec_cksum; /* Checksum of the execution trace */
u64 exec_us, /* Execution time (us) */
handicap, /* Number of queue cycles behind */
depth; /* Path depth */
u8* trace_mini; /* Trace bytes, if kept */
u32 tc_ref; /* Trace bytes ref count */
struct queue_entry *next, /* Next element, if any */
*next_100; /* 100 elements ahead */
};
static struct queue_entry *queue, /* Fuzzing queue (linked list) */
*queue_cur, /* Current offset within the queue */
*queue_top, /* Top of the list */
*q_prev100; /* Previous 100 marker */
static struct queue_entry*
top_rated[MAP_SIZE]; /* Top entries for bitmap bytes */
struct extra_data {
u8* data; /* Dictionary token data */
u32 len; /* Dictionary token length */
u32 hit_cnt; /* Use count in the corpus */
};
static struct extra_data* extras; /* Extra tokens to fuzz with */
static u32 extras_cnt; /* Total number of tokens read */
static struct extra_data* a_extras; /* Automatically selected extras */
static u32 a_extras_cnt; /* Total number of tokens available */
static u8* (*post_handler)(u8* buf, u32* len);
/* Interesting values, as per config.h */
static s8 interesting_8[] = { INTERESTING_8 };
static s16 interesting_16[] = { INTERESTING_8, INTERESTING_16 };
static s32 interesting_32[] = { INTERESTING_8, INTERESTING_16, INTERESTING_32 };
/* Fuzzing stages */
enum {
/* 00 */ STAGE_FLIP1,
/* 01 */ STAGE_FLIP2,
/* 02 */ STAGE_FLIP4,
/* 03 */ STAGE_FLIP8,
/* 04 */ STAGE_FLIP16,
/* 05 */ STAGE_FLIP32,
/* 06 */ STAGE_ARITH8,
/* 07 */ STAGE_ARITH16,
/* 08 */ STAGE_ARITH32,
/* 09 */ STAGE_INTEREST8,
/* 10 */ STAGE_INTEREST16,
/* 11 */ STAGE_INTEREST32,
/* 12 */ STAGE_EXTRAS_UO,
/* 13 */ STAGE_EXTRAS_UI,
/* 14 */ STAGE_EXTRAS_AO,
/* 15 */ STAGE_HAVOC,
/* 16 */ STAGE_SPLICE
};
/* Stage value types */
enum {
/* 00 */ STAGE_VAL_NONE,
/* 01 */ STAGE_VAL_LE,
/* 02 */ STAGE_VAL_BE
};
/* Execution status fault codes */
enum {
/* 00 */ FAULT_NONE,
/* 01 */ FAULT_TMOUT,
/* 02 */ FAULT_CRASH,
/* 03 */ FAULT_ERROR,
/* 04 */ FAULT_NOINST,
/* 05 */ FAULT_NOBITS
};
/* Get unix time in milliseconds */
static u64 get_cur_time(void) {
struct timeval tv;
struct timezone tz;
gettimeofday(&tv, &tz);
return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000);
}
/* Get unix time in microseconds */
static u64 get_cur_time_us(void) {
struct timeval tv;
struct timezone tz;
gettimeofday(&tv, &tz);
return (tv.tv_sec * 1000000ULL) + tv.tv_usec;
}
/* Generate a random number (from 0 to limit - 1). This may
have slight bias. */
static inline u32 UR(u32 limit) {
if (unlikely(!rand_cnt--)) {
u32 seed[2];
ck_read(dev_urandom_fd, &seed, sizeof(seed), "/dev/urandom");
srandom(seed[0]);
rand_cnt = (RESEED_RNG / 2) + (seed[1] % RESEED_RNG);
}
return random() % limit;
}
/* Shuffle an array of pointers. Might be slightly biased. */
static void shuffle_ptrs(void** ptrs, u32 cnt) {
u32 i;
for (i = 0; i < cnt - 2; i++) {
u32 j = i + UR(cnt - i);
void *s = ptrs[i];
ptrs[i] = ptrs[j];
ptrs[j] = s;
}
}
/* Build a list of processes bound to specific cores. Returns -1 if nothing
can be found. Assumes an upper bound of 4k CPUs. */
static void bind_to_free_cpu(void) {
DIR* d;
struct dirent* de;
cpu_set_t c;
u8 cpu_used[4096] = { 0 };
u32 i;
if (cpu_core_count < 2) return;
if (getenv("AFL_NO_AFFINITY")) {
WARNF("Not binding to a CPU core (AFL_NO_AFFINITY set).");
return;
}
d = opendir("/proc");
if (!d) {
WARNF("Unable to access /proc - can't scan for free CPU cores.");
return;
}
ACTF("Checking CPU core loadout...");
/* Introduce some jitter, in case multiple AFL tasks are doing the same
thing at the same time... */
usleep(R(1000) * 250);
/* Scan all /proc/<pid>/status entries, checking for Cpus_allowed_list.
Flag all processes bound to a specific CPU using cpu_used[]. This will
fail for some exotic binding setups, but is likely good enough in almost
all real-world use cases. */
while ((de = readdir(d))) {
u8* fn;
FILE* f;
u8 tmp[MAX_LINE];
u8 has_vmsize = 0;
if (!isdigit(de->d_name[0])) continue;
fn = alloc_printf("/proc/%s/status", de->d_name);
if (!(f = fopen(fn, "r"))) {
ck_free(fn);
continue;
}
while (fgets(tmp, MAX_LINE, f)) {
u32 hval;
/* Processes without VmSize are probably kernel tasks. */
if (!strncmp(tmp, "VmSize:\t", 8)) has_vmsize = 1;
if (!strncmp(tmp, "Cpus_allowed_list:\t", 19) &&
!strchr(tmp, '-') && !strchr(tmp, ',') &&
sscanf(tmp + 19, "%u", &hval) == 1 && hval < sizeof(cpu_used) &&
has_vmsize) {
cpu_used[hval] = 1;
break;
}
}
ck_free(fn);
fclose(f);
}
closedir(d);
if (cpu_to_bind_given) {
if (cpu_to_bind >= cpu_core_count)
FATAL("The CPU core id to bind should be between 0 and %u", cpu_core_count - 1);
if (cpu_used[cpu_to_bind])
FATAL("The CPU core #%u to bind is not free!", cpu_to_bind);
i = cpu_to_bind;
} else {
for (i = 0; i < cpu_core_count; i++) if (!cpu_used[i]) break;
}
if (i == cpu_core_count) {
SAYF("\n" cLRD "[-] " cRST
"Uh-oh, looks like all %u CPU cores on your system are allocated to\n"
" other instances of afl-fuzz (or similar CPU-locked tasks). Starting\n"
" another fuzzer on this machine is probably a bad plan, but if you are\n"
" absolutely sure, you can set AFL_NO_AFFINITY and try again.\n",
cpu_core_count);
FATAL("No more free CPU cores");
}
OKF("Found a free CPU core, binding to #%u.", i);
cpu_aff = i;
CPU_ZERO(&c);
CPU_SET(i, &c);
if (sched_setaffinity(0, sizeof(c), &c))
PFATAL("sched_setaffinity failed");
}
/* HAVE_AFFINITY */
/* Helper function to compare buffers; returns first and last differing offset. We
use this to find reasonable locations for splicing two files. */
static void locate_diffs(u8* ptr1, u8* ptr2, u32 len, s32* first, s32* last) {
s32 f_loc = -1;
s32 l_loc = -1;
u32 pos;
for (pos = 0; pos < len; pos++) {
if (*(ptr1++) != *(ptr2++)) {
if (f_loc == -1) f_loc = pos;
l_loc = pos;
}
}
*first = f_loc;
*last = l_loc;
return;
}
/* !IGNORE_FINDS */
/* Describe integer. Uses 12 cyclic static buffers for return values. The value
returned should be five characters or less for all the integers we reasonably
expect to see. */
static u8* DI(u64 val) {
static u8 tmp[12][16];
static u8 cur;
cur = (cur + 1) % 12;
/* 0-9999 */
CHK_FORMAT(1, 10000, "%llu", u64);
/* 10.0k - 99.9k */
CHK_FORMAT(1000, 99.95, "%0.01fk", double);
/* 100k - 999k */
CHK_FORMAT(1000, 1000, "%lluk", u64);
/* 1.00M - 9.99M */
CHK_FORMAT(1000 * 1000, 9.995, "%0.02fM", double);
/* 10.0M - 99.9M */
CHK_FORMAT(1000 * 1000, 99.95, "%0.01fM", double);
/* 100M - 999M */
CHK_FORMAT(1000 * 1000, 1000, "%lluM", u64);
/* 1.00G - 9.99G */
CHK_FORMAT(1000LL * 1000 * 1000, 9.995, "%0.02fG", double);
/* 10.0G - 99.9G */
CHK_FORMAT(1000LL * 1000 * 1000, 99.95, "%0.01fG", double);
/* 100G - 999G */
CHK_FORMAT(1000LL * 1000 * 1000, 1000, "%lluG", u64);
/* 1.00T - 9.99G */
CHK_FORMAT(1000LL * 1000 * 1000 * 1000, 9.995, "%0.02fT", double);
/* 10.0T - 99.9T */
CHK_FORMAT(1000LL * 1000 * 1000 * 1000, 99.95, "%0.01fT", double);
/* 100T+ */
strcpy(tmp[cur], "infty");
return tmp[cur];
}
/* Describe float. Similar to the above, except with a single
static buffer. */
static u8* DF(double val) {
static u8 tmp[16];
if (val < 99.995) {
sprintf(tmp, "%0.02f", val);
return tmp;
}
if (val < 999.95) {
sprintf(tmp, "%0.01f", val);
return tmp;
}
return DI((u64)val);
}
/* Describe integer as memory size. */
static u8* DMS(u64 val) {
static u8 tmp[12][16];
static u8 cur;
cur = (cur + 1) % 12;
/* 0-9999 */
CHK_FORMAT(1, 10000, "%llu B", u64);
/* 10.0k - 99.9k */
CHK_FORMAT(1024, 99.95, "%0.01f kB", double);
/* 100k - 999k */
CHK_FORMAT(1024, 1000, "%llu kB", u64);
/* 1.00M - 9.99M */
CHK_FORMAT(1024 * 1024, 9.995, "%0.02f MB", double);
/* 10.0M - 99.9M */
CHK_FORMAT(1024 * 1024, 99.95, "%0.01f MB", double);
/* 100M - 999M */
CHK_FORMAT(1024 * 1024, 1000, "%llu MB", u64);
/* 1.00G - 9.99G */
CHK_FORMAT(1024LL * 1024 * 1024, 9.995, "%0.02f GB", double);
/* 10.0G - 99.9G */
CHK_FORMAT(1024LL * 1024 * 1024, 99.95, "%0.01f GB", double);
/* 100G - 999G */
CHK_FORMAT(1024LL * 1024 * 1024, 1000, "%llu GB", u64);
/* 1.00T - 9.99G */
CHK_FORMAT(1024LL * 1024 * 1024 * 1024, 9.995, "%0.02f TB", double);
/* 10.0T - 99.9T */
CHK_FORMAT(1024LL * 1024 * 1024 * 1024, 99.95, "%0.01f TB", double);
/* 100T+ */
strcpy(tmp[cur], "infty");
return tmp[cur];
}
/* Describe time delta. Returns one static buffer, 34 chars of less. */
static u8* DTD(u64 cur_ms, u64 event_ms) {
static u8 tmp[64];
u64 delta;
s32 t_d, t_h, t_m, t_s;
if (!event_ms) return "none seen yet";
delta = cur_ms - event_ms;
t_d = delta / 1000 / 60 / 60 / 24;
t_h = (delta / 1000 / 60 / 60) % 24;
t_m = (delta / 1000 / 60) % 60;
t_s = (delta / 1000) % 60;
sprintf(tmp, "%s days, %u hrs, %u min, %u sec", DI(t_d), t_h, t_m, t_s);
return tmp;
}
/* Mark deterministic checks as done for a particular queue entry. We use the
.state file to avoid repeating deterministic fuzzing when resuming aborted
scans. */
static void mark_as_det_done(struct queue_entry* q) {
u8* fn = strrchr(q->fname, '/');
s32 fd;
fn = alloc_printf("%s/queue/.state/deterministic_done/%s", out_dir, fn + 1);
fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
if (fd < 0) PFATAL("Unable to create '%s'", fn);
close(fd);
ck_free(fn);
q->passed_det = 1;
}
/* Mark as variable. Create symlinks if possible to make it easier to examine
the files. */
static void mark_as_variable(struct queue_entry* q) {
u8 *fn = strrchr(q->fname, '/') + 1, *ldest;
ldest = alloc_printf("../../%s", fn);
fn = alloc_printf("%s/queue/.state/variable_behavior/%s", out_dir, fn);
if (symlink(ldest, fn)) {
s32 fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
if (fd < 0) PFATAL("Unable to create '%s'", fn);
close(fd);
}
ck_free(ldest);
ck_free(fn);
q->var_behavior = 1;
}
/* Mark / unmark as redundant (edge-only). This is not used for restoring state,
but may be useful for post-processing datasets. */
static void mark_as_redundant(struct queue_entry* q, u8 state) {
u8* fn;
s32 fd;
if (state == q->fs_redundant) return;
q->fs_redundant = state;
fn = strrchr(q->fname, '/');
fn = alloc_printf("%s/queue/.state/redundant_edges/%s", out_dir, fn + 1);
if (state) {
fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
if (fd < 0) PFATAL("Unable to create '%s'", fn);
close(fd);
} else {
if (unlink(fn)) PFATAL("Unable to remove '%s'", fn);
}
ck_free(fn);
}
/* Append new test case to the queue. */
static void add_to_queue(u8* fname, u32 len, u8 passed_det) {
struct queue_entry* q = ck_alloc(sizeof(struct queue_entry));
q->fname = fname;
q->len = len;
q->depth = cur_depth + 1;
q->passed_det = passed_det;
if (q->depth > max_depth) max_depth = q->depth;
if (queue_top) {
queue_top->next = q;
queue_top = q;
} else q_prev100 = queue = queue_top = q;
queued_paths++;
pending_not_fuzzed++;
cycles_wo_finds = 0;
/* Set next_100 pointer for every 100th element (index 0, 100, etc) to allow faster iteration. */
if ((queued_paths - 1) % 100 == 0 && queued_paths > 1) {
q_prev100->next_100 = q;
q_prev100 = q;
}
last_path_time = get_cur_time();
}
/* Destroy the entire queue. */
EXP_ST void destroy_queue(void) {
struct queue_entry *q = queue, *n;
while (q) {
n = q->next;
ck_free(q->fname);
ck_free(q->trace_mini);
ck_free(q);
q = n;
}
}
/* Write bitmap to file. The bitmap is useful mostly for the secret
-B option, to focus a separate fuzzing session on a particular
interesting input without rediscovering all the others. */
EXP_ST void write_bitmap(void) {
u8* fname;
s32 fd;
if (!bitmap_changed) return;
bitmap_changed = 0;
fname = alloc_printf("%s/fuzz_bitmap", out_dir);
fd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0600);
if (fd < 0) PFATAL("Unable to open '%s'", fname);
ck_write(fd, virgin_bits, MAP_SIZE, fname);
close(fd);
ck_free(fname);
}
/* Read bitmap from file. This is for the -B option again. */
EXP_ST void read_bitmap(u8* fname) {
s32 fd = open(fname, O_RDONLY);
if (fd < 0) PFATAL("Unable to open '%s'", fname);
ck_read(fd, virgin_bits, MAP_SIZE, fname);
close(fd);
}
/* Check if the current execution path brings anything new to the table.
Update virgin bits to reflect the finds. Returns 1 if the only change is
the hit-count for a particular tuple; 2 if there are new tuples seen.
Updates the map, so subsequent calls will always return 0.
This function is called after every exec() on a fairly large buffer, so
it needs to be fast. We do this in 32-bit and 64-bit flavors. */
static inline u8 has_new_bits(u8* virgin_map) {
u64* current = (u64*)trace_bits;
u64* virgin = (u64*)virgin_map;
u32 i = (MAP_SIZE >> 3);
u32* current = (u32*)trace_bits;
u32* virgin = (u32*)virgin_map;
u32 i = (MAP_SIZE >> 2);
/* ^WORD_SIZE_64 */
u8 ret = 0;
while (i--) {
/* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap
that have not been already cleared from the virgin map - since this will
almost always be the case. */
if (unlikely(*current) && unlikely(*current & *virgin)) {
if (likely(ret < 2)) {
u8* cur = (u8*)current;
u8* vir = (u8*)virgin;
/* Looks like we have not found any new bytes yet; see if any non-zero
bytes in current[] are pristine in virgin[]. */
if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) ||
(cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff) ||
(cur[4] && vir[4] == 0xff) || (cur[5] && vir[5] == 0xff) ||
(cur[6] && vir[6] == 0xff) || (cur[7] && vir[7] == 0xff)) ret = 2;
else ret = 1;
if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) ||
(cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff)) ret = 2;
else ret = 1;
/* ^WORD_SIZE_64 */
}
*virgin &= ~*current;
}
current++;
virgin++;
}
if (ret && virgin_map == virgin_bits) bitmap_changed = 1;
return ret;
}
/* Count the number of bits set in the provided bitmap. Used for the status
screen several times every second, does not have to be fast. */
static u32 count_bits(u8* mem) {
u32* ptr = (u32*)mem;
u32 i = (MAP_SIZE >> 2);
u32 ret = 0;
while (i--) {
u32 v = *(ptr++);
/* This gets called on the inverse, virgin bitmap; optimize for sparse
data. */
if (v == 0xffffffff) {
ret += 32;
continue;
}
v -= ((v >> 1) & 0x55555555);
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
ret += (((v + (v >> 4)) & 0xF0F0F0F) * 0x01010101) >> 24;
}
return ret;
}
/* Count the number of bytes set in the bitmap. Called fairly sporadically,
mostly to update the status screen or calibrate and examine confirmed
new paths. */
static u32 count_bytes(u8* mem) {
u32* ptr = (u32*)mem;
u32 i = (MAP_SIZE >> 2);
u32 ret = 0;
while (i--) {
u32 v = *(ptr++);
if (!v) continue;
if (v & FF(0)) ret++;
if (v & FF(1)) ret++;
if (v & FF(2)) ret++;
if (v & FF(3)) ret++;
}
return ret;
}
/* Count the number of non-255 bytes set in the bitmap. Used strictly for the
status screen, several calls per second or so. */
static u32 count_non_255_bytes(u8* mem) {
u32* ptr = (u32*)mem;
u32 i = (MAP_SIZE >> 2);
u32 ret = 0;
while (i--) {
u32 v = *(ptr++);
/* This is called on the virgin bitmap, so optimize for the most likely
case. */
if (v == 0xffffffff) continue;
if ((v & FF(0)) != FF(0)) ret++;
if ((v & FF(1)) != FF(1)) ret++;
if ((v & FF(2)) != FF(2)) ret++;
if ((v & FF(3)) != FF(3)) ret++;
}
return ret;
}
/* Destructively simplify trace by eliminating hit count information
and replacing it with 0x80 or 0x01 depending on whether the tuple
is hit or not. Called on every new crash or timeout, should be
reasonably fast. */
static const u8 simplify_lookup[256] = {
[0] = 1,
[1 ... 255] = 128
};
static void simplify_trace(u64* mem) {
u32 i = MAP_SIZE >> 3;
while (i--) {
/* Optimize for sparse bitmaps. */
if (unlikely(*mem)) {
u8* mem8 = (u8*)mem;
mem8[0] = simplify_lookup[mem8[0]];
mem8[1] = simplify_lookup[mem8[1]];
mem8[2] = simplify_lookup[mem8[2]];
mem8[3] = simplify_lookup[mem8[3]];
mem8[4] = simplify_lookup[mem8[4]];
mem8[5] = simplify_lookup[mem8[5]];
mem8[6] = simplify_lookup[mem8[6]];
mem8[7] = simplify_lookup[mem8[7]];
} else *mem = 0x0101010101010101ULL;
mem++;
}
}
static void simplify_trace(u32* mem) {
u32 i = MAP_SIZE >> 2;
while (i--) {
/* Optimize for sparse bitmaps. */
if (unlikely(*mem)) {
u8* mem8 = (u8*)mem;
mem8[0] = simplify_lookup[mem8[0]];
mem8[1] = simplify_lookup[mem8[1]];
mem8[2] = simplify_lookup[mem8[2]];
mem8[3] = simplify_lookup[mem8[3]];
} else *mem = 0x01010101;
mem++;
}
}
/* ^WORD_SIZE_64 */
/* Destructively classify execution counts in a trace. This is used as a
preprocessing step for any newly acquired traces. Called on every exec,
must be fast. */
static const u8 count_class_lookup8[256] = {
[0] = 0,
[1] = 1,
[2] = 2,
[3] = 4,
[4 ... 7] = 8,
[8 ... 15] = 16,
[16 ... 31] = 32,
[32 ... 127] = 64,
[128 ... 255] = 128
};
static u16 count_class_lookup16[65536];
EXP_ST void init_count_class16(void) {
u32 b1, b2;
for (b1 = 0; b1 < 256; b1++)
for (b2 = 0; b2 < 256; b2++)
count_class_lookup16[(b1 << 8) + b2] =
(count_class_lookup8[b1] << 8) |
count_class_lookup8[b2];
}