AFL 源代碼速通筆記 - 網安 - 專業的網絡安全產業、社區、知識平臺

AFL 作為一個現在仍然適用且比較經典的 fuzzer，筆者打算從它開始。

本篇文章原叫做《AFL 源代碼閱讀筆記》，結果跟著大佬們的筆記去讀（sakura師傅的筆記確實是神中神，本文也有很多地方照搬了師傅的原文，因為說實話我覺得自己也寫不到那么詳細），囫圇吞棗般速通了，前前后后三天時間，但感覺尚且沒有自己實現的能力。

一、afl-gcc 原理

首先，一般我們用 afl 去 fuzz 一些項目的時候都需要用 afl-gcc 去代替 gcc 進行編譯。先說結論，這一步的目的其實是為了向代碼中插樁，完成插樁后其實還是調用原生的 gcc 進行編譯。

其實這個描述有些偏頗，插樁其實是 afl-as 負責的，不過在這里，筆者將 afl-gcc 和 afl-as 放到同一節，因此用了這樣的表述，下文會具體分析 afl-as 的原理。

首先需要說明的是，gcc 對代碼的編譯流程的分層次的：

源代碼-->預編譯后的源代碼-->匯編代碼-->機器碼-->鏈接后的二進制文件

其中，從源代碼到匯編代碼的步驟由 gcc 完成；而匯編代碼到機器碼的部分由 as 完成。

而 afl-gcc 的源代碼如下：

int main(int argc, char** argv) {
 .......
 find_as(argv[0]);
 edit_params(argc, argv);
 execvp(cc_params[0], (char**)cc_params);
 FATAL("Oops, failed to execute '%s' - check your PATH", cc_params[0]);
 return 0;
}

find_as：查找 as 這個二進制程序，用 afl-as 替換它
edit_params：修改參數
execvp：調用原生 gcc 對代碼進行編譯

static void edit_params(u32 argc, char** argv) {
 ......
#else
 if (!strcmp(name, "afl-g++")) {
 u8* alt_cxx = getenv("AFL_CXX");
 cc_params[0] = alt_cxx ? alt_cxx : (u8*)"g++";
 } else if (!strcmp(name, "afl-gcj")) {
 u8* alt_cc = getenv("AFL_GCJ");
 cc_params[0] = alt_cc ? alt_cc : (u8*)"gcj";
 } else {
 u8* alt_cc = getenv("AFL_CC");
 cc_params[0] = alt_cc ? alt_cc : (u8*)"gcc";
 }
#endif /* __APPLE__ */
 }
 while (--argc) {
 u8* cur = *(++argv);
 if (!strncmp(cur, "-B", 2)) {
 if (!be_quiet) WARNF("-B is already set, overriding");
 if (!cur[2] && argc > 1) { argc--; argv++; }
 continue;
 }
 if (!strcmp(cur, "-integrated-as")) continue;
 if (!strcmp(cur, "-pipe")) continue;
#if defined(__FreeBSD__) && defined(__x86_64__)
 if (!strcmp(cur, "-m32")) m32_set = 1;
#endif
 if (!strcmp(cur, "-fsanitize=address") ||
 !strcmp(cur, "-fsanitize=memory")) asan_set = 1;
 if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1;
 cc_params[cc_par_cnt++] = cur;
 }
 cc_params[cc_par_cnt++] = "-B";
 cc_params[cc_par_cnt++] = as_path;
 if (clang_mode)
 cc_params[cc_par_cnt++] = "-no-integrated-as";
 if (getenv("AFL_HARDEN")) {
 cc_params[cc_par_cnt++] = "-fstack-protector-all";
 if (!fortify_set)
 cc_params[cc_par_cnt++] = "-D_FORTIFY_SOURCE=2";
 }
 if (asan_set) {
 /* Pass this on to afl-as to adjust map density. */
 setenv("AFL_USE_ASAN", "1", 1);
 } else if (getenv("AFL_USE_ASAN")) {
 if (getenv("AFL_USE_MSAN"))
 FATAL("ASAN and MSAN are mutually exclusive");
 if (getenv("AFL_HARDEN"))
 FATAL("ASAN and AFL_HARDEN are mutually exclusive");
 cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE";
 cc_params[cc_par_cnt++] = "-fsanitize=address";
 } else if (getenv("AFL_USE_MSAN")) {
 if (getenv("AFL_USE_ASAN"))
 FATAL("ASAN and MSAN are mutually exclusive");
 if (getenv("AFL_HARDEN"))
 FATAL("MSAN and AFL_HARDEN are mutually exclusive");
 cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE";
 cc_params[cc_par_cnt++] = "-fsanitize=memory";
 }
 if (!getenv("AFL_DONT_OPTIMIZE")) {
#if defined(__FreeBSD__) && defined(__x86_64__)
 /* On 64-bit FreeBSD systems, clang -g -m32 is broken, but -m32 itself
 works OK. This has nothing to do with us, but let's avoid triggering
 that bug. */
 if (!clang_mode || !m32_set)
 cc_params[cc_par_cnt++] = "-g";
#else
 cc_params[cc_par_cnt++] = "-g";
#endif
 cc_params[cc_par_cnt++] = "-O3";
 cc_params[cc_par_cnt++] = "-funroll-loops";
 /* Two indicators that you're building for fuzzing; one of them is
 AFL-specific, the other is shared with libfuzzer. */
 cc_params[cc_par_cnt++] = "-D__AFL_COMPILER=1";
 cc_params[cc_par_cnt++] = "-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1";
 }
 if (getenv("AFL_NO_BUILTIN")) {
 cc_params[cc_par_cnt++] = "-fno-builtin-strcmp";
 cc_params[cc_par_cnt++] = "-fno-builtin-strncmp";
 cc_params[cc_par_cnt++] = "-fno-builtin-strcasecmp";
 cc_params[cc_par_cnt++] = "-fno-builtin-strncasecmp";
 cc_params[cc_par_cnt++] = "-fno-builtin-memcmp";
 cc_params[cc_par_cnt++] = "-fno-builtin-strstr";
 cc_params[cc_par_cnt++] = "-fno-builtin-strcasestr";
 }
 cc_params[cc_par_cnt] = NULL;
}

挺長的，不過邏輯基本上都是重復的，主要做兩件事：

給 gcc 添加一些額外的參數
根據參數設置一些 flag

在完成了匯編以后，接下來會使用 afl-as 對生成的匯編代碼進行插樁：

int main(int argc, char** argv) {
 ......
 gettimeofday(&tv, &tz);
 rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
 srandom(rand_seed);
 edit_params(argc, argv);
 ......
 if (!just_version) add_instrumentation();
 if (!(pid = fork())) {
 execvp(as_params[0], (char**)as_params);
 FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
 }
 if (pid < 0) PFATAL("fork() failed");
 if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
 if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);
 exit(WEXITSTATUS(status));
}

在afl-as中，仍然使用edit_params編輯和修改參數，并使用add_instrumentation來對生成的匯編代碼進行插樁。完成插樁后，用 fork 生成子進程，并調用原生的 as 進行編譯。

插樁邏輯也很樸素：

static void add_instrumentation(void) {
 ......
 /* If we're in the right mood for instrumenting, check for function
 names or conditional labels. This is a bit messy, but in essence,
 we want to catch:
 ^main: - function entry point (always instrumented)
 ^.L0: - GCC branch label
 ^.LBB0_0: - clang branch label (but only in clang mode)
 ^\tjnz foo - conditional branches
 ...but not:
 ^# BB#0: - clang comments
 ^ # BB#0: - ditto
 ^.Ltmp0: - clang non-branch labels
 ^.LC0 - GCC non-branch labels
 ^.LBB0_0: - ditto (when in GCC mode)
 ^\tjmp foo - non-conditional jumps
 Additionally, clang and GCC on MacOS X follow a different convention
 with no leading dots on labels, hence the weird maze of #ifdefs
 later on.
 */
 if (skip_intel || skip_app || skip_csect || !instr_ok ||
 line[0] == '#' || line[0] == ' ') continue;
 /* Conditional branch instruction (jnz, etc). We append the instrumentation
 right after the branch (to instrument the not-taken path) and at the
 branch destination label (handled later on). */
 if (line[0] == '\t') {
 if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {
 fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
 R(MAP_SIZE));
 ins_lines++;
 }
 continue;
 }
 /* Label of some sort. This may be a branch destination, but we need to
 tread carefully and account for several different formatting
 conventions. */
#ifdef __APPLE__
 /* Apple: L: */
 if ((colon_pos = strstr(line, ":"))) {
 if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
#else
 /* Everybody else: .L: */
 if (strstr(line, ":")) {
 if (line[0] == '.') {
#endif /* __APPLE__ */
 /* .L0: or LBB0_0: style jump destination */
#ifdef __APPLE__
 /* Apple: L / LBB */
 if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
 && R(100) < inst_ratio) {
#else
 /* Apple: .L / .LBB */
 if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
 && R(100) < inst_ratio) {
#endif /* __APPLE__ */
 /* An optimization is possible here by adding the code only if the
 label is mentioned in the code in contexts other than call / jmp.
 That said, this complicates the code by requiring two-pass
 processing (messy with stdin), and results in a speed gain
 typically under 10%, because compilers are generally pretty good
 about not generating spurious intra-function jumps.
 We use deferred output chiefly to avoid disrupting
 .Lfunc_begin0-style exception handling calculations (a problem on
 MacOS X). */
 if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;
 }
 } else {
 /* Function label (always instrumented, deferred mode). */
 instrument_next = 1;
 }
 }
 }
 if (ins_lines)
 fputs(use_64bit ? main_payload_64 : main_payload_32, outf);
 if (input_file) fclose(inf);
 fclose(outf);
 if (!be_quiet) {
 if (!ins_lines) WARNF("No instrumentation targets found%s.",
 pass_thru ? " (pass-thru mode)" : "");
 else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
 ins_lines, use_64bit ? "64" : "32",
 getenv("AFL_HARDEN") ? "hardened" : 
 (sanitizer ? "ASAN/MSAN" : "non-hardened"),
 inst_ratio);
 }
}

簡單來說就是一個循環讀取每行匯編代碼，并對特定的匯編代碼進行插樁：

首先需要保證代碼位于text內存段
如果是main函數或分支跳轉指令則進行插樁
如果是注釋或強制跳轉指令則不插樁

插樁的具體代碼保存在afl-as.h中，在最后一節中筆者會另外介紹，這里我們可以暫時忽略它的實現細節繼續往下。

二、afl-fuzz

按照順序，現在程序是編譯好了，接下來就要用afl-fuzz對它進行模糊測試了。

一般來說，我們會用afl-fuzz -i input -o output -- programe啟動 fuzzer，對應的，afl-fuzz.c中的前半部分都在做參數解析的工作：

int main(int argc, char** argv) {
 ......
 gettimeofday(&tv, &tz);
 srandom(tv.tv_sec ^ tv.tv_usec ^ getpid());
 while ((opt = getopt(argc, argv, "+i:o:f:m:b:t:T:dnCB:S:M:x:QV")) > 0)
 switch (opt) {
 case 'i': /* input dir */
 if (in_dir) FATAL("Multiple -i options not supported");
 in_dir = optarg;
 if (!strcmp(in_dir, "-")) in_place_resume = 1;
 break;
 case 'o': /* output dir */
 if (out_dir) FATAL("Multiple -o options not supported");
 out_dir = optarg;
 break;
 ......
 case 'V': /* Show version number */
 /* Version number has been printed already, just quit. */
 exit(0);
 default:
 usage(argv[0]);
 }

這部分我們大致看一下就行了，主要的關注點自然不在參數解析部分。

int main(int argc, char** argv) {
 ......
 setup_signal_handlers();
 check_asan_opts();
 if (sync_id) fix_up_sync();
 if (!strcmp(in_dir, out_dir))
 FATAL("Input and output directories can't be the same");
 if (dumb_mode) {
 if (crash_mode) FATAL("-C and -n are mutually exclusive");
 if (qemu_mode) FATAL("-Q and -n are mutually exclusive");
 }
 if (getenv("AFL_NO_FORKSRV")) no_forkserver = 1;
 if (getenv("AFL_NO_CPU_RED")) no_cpu_meter_red = 1;
 if (getenv("AFL_NO_ARITH")) no_arith = 1;
 if (getenv("AFL_SHUFFLE_QUEUE")) shuffle_queue = 1;
 if (getenv("AFL_FAST_CAL")) fast_cal = 1;
 if (getenv("AFL_HANG_TMOUT")) {
 hang_tmout = atoi(getenv("AFL_HANG_TMOUT"));
 if (!hang_tmout) FATAL("Invalid value of AFL_HANG_TMOUT");
 }
 if (dumb_mode == 2 && no_forkserver)
 FATAL("AFL_DUMB_FORKSRV and AFL_NO_FORKSRV are mutually exclusive");
 if (getenv("AFL_PRELOAD")) {
 setenv("LD_PRELOAD", getenv("AFL_PRELOAD"), 1);
 setenv("DYLD_INSERT_LIBRARIES", getenv("AFL_PRELOAD"), 1);
 }
 if (getenv("AFL_LD_PRELOAD"))
 FATAL("Use AFL_PRELOAD instead of AFL_LD_PRELOAD");
 save_cmdline(argc, argv);
 fix_up_banner(argv[optind]);
 check_if_tty();
 get_core_count();
#ifdef HAVE_AFFINITY
 bind_to_free_cpu();
#endif /* HAVE_AFFINITY */
 check_crash_handling();
 check_cpu_governor();
 setup_post();
 setup_shm();
 init_count_class16();
 setup_dirs_fds();
 read_testcases();
 load_auto();
 pivot_inputs();
 if (extras_dir) load_extras(extras_dir);
 if (!timeout_given) find_timeout();
 detect_file_args(argv + optind + 1);
 if (!out_file) setup_stdio_file();
 check_binary(argv[optind]);
 start_time = get_cur_time();
 if (qemu_mode)
 use_argv = get_qemu_argv(argv[0], argv + optind, argc - optind);
 else
 use_argv = argv + optind;
 perform_dry_run(use_argv);
 cull_queue();
 show_init_stats();
 seek_to = find_start_position();
 write_stats_file(0, 0, 0);
 save_auto();
 if (stop_soon) goto stop_fuzzing;
 /* Woop woop woop */
 if (!not_on_tty) {
 sleep(4);
 start_time += 4000;
 if (stop_soon) goto stop_fuzzing;
 }
 while (1) {
 u8 skipped_fuzz;
 cull_queue();
 if (!queue_cur) {
 queue_cycle++;
 current_entry = 0;
 cur_skipped_paths = 0;
 queue_cur = queue;
 while (seek_to) {
 current_entry++;
 seek_to--;
 queue_cur = queue_cur->next;
 }
 show_stats();
 if (not_on_tty) {
 ACTF("Entering queue cycle %llu.", queue_cycle);
 fflush(stdout);
 }
 /* If we had a full queue cycle with no new finds, try
 recombination strategies next. */
 if (queued_paths == prev_queued) {
 if (use_splicing) cycles_wo_finds++; else use_splicing = 1;
 } else cycles_wo_finds = 0;
 prev_queued = queued_paths;
 if (sync_id && queue_cycle == 1 && getenv("AFL_IMPORT_FIRST"))
 sync_fuzzers(use_argv);
 }
 skipped_fuzz = fuzz_one(use_argv);
 if (!stop_soon && sync_id && !skipped_fuzz) {
 if (!(sync_interval_cnt++ % SYNC_INTERVAL))
 sync_fuzzers(use_argv);
 }
 if (!stop_soon && exit_1) stop_soon = 2;
 if (stop_soon) break;
 queue_cur = queue_cur->next;
 current_entry++;
 }
 if (queue_cur) show_stats();
 /* If we stopped programmatically, we kill the forkserver and the current runner. 
 If we stopped manually, this is done by the signal handler. */
 if (stop_soon == 2) {
 if (child_pid > 0) kill(child_pid, SIGKILL);
 if (forksrv_pid > 0) kill(forksrv_pid, SIGKILL);
 }
 /* Now that we've killed the forkserver, we wait for it to be able to get rusage stats. */
 if (waitpid(forksrv_pid, NULL, 0) <= 0) {
 WARNF("error waitpid");
 }
 write_bitmap();
 write_stats_file(0, 0, 0);
 save_auto();
stop_fuzzing:
 SAYF(CURSOR_SHOW cLRD "+++ Testing aborted %s +++" cRST,
 stop_soon == 2 ? "programmatically" : "by user");
 /* Running for more than 30 minutes but still doing first cycle? */
 if (queue_cycle == 1 && get_cur_time() - start_time > 30 * 60 * 1000) {
 SAYF("" cYEL "[!] " cRST
 "Stopped during the first cycle, results may be incomplete."
 " (For info on resuming, see %s/README.)", doc_path);
 }
 fclose(plot_file);
 destroy_queue();
 destroy_extras();
 ck_free(target_path);
 ck_free(sync_id);
 alloc_report();
 OKF("We're done here. Have a nice day!");
 exit(0);
}

setup_signal_handlers

設置一些信號處理函數，比如說退出信號時要主動釋放子進程、窗口大小調整時要跟蹤變化等。

check_asan_opts

讀取環境變量ASAN_OPTIONS和MSAN_OPTIONS，做一些檢查。

fix_up_sync

save_cmdline

保存當前的命令

fix_up_banner

創建一個 banner

check_if_tty

檢查是否在tty終端上面運行。

get_core_count

計數logical CPU cores。

check_crash_handling

檢查崩潰處理函數，確保崩潰后不會進入程序。

 s32 fd = open("/proc/sys/kernel/core_pattern", O_RDONLY);
 u8 fchar;
 if (fd < 0) return;
 ACTF("Checking core_pattern...");
 if (read(fd, &fchar, 1) == 1 && fchar == '|') {
 SAYF("" cLRD "[-] " cRST
 "Hmm, your system is configured to send core dump notifications to an"
 " external utility. This will cause issues: there will be an extended delay"
 " between stumbling upon a crash and having this information relayed to the"
 " fuzzer via the standard waitpid() API."
 " To avoid having crashes misinterpreted as timeouts, please log in as root" 
 " and temporarily modify /proc/sys/kernel/core_pattern, like so:"
 " echo core >/proc/sys/kernel/core_pattern");
 if (!getenv("AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES"))
 FATAL("Pipe at the beginning of 'core_pattern'");
 }
 close(fd);

筆者在 Ubuntu20 上跑 AFL 就會遇到這個問題，因為在默認情況下，系統會將崩潰信息通過管道發送給外部程序，由于這會影響到效率，因此通過echo core >/proc/sys/kernel/core_pattern修改保存崩潰信息的方式，將它保存為本地文件。

check_cpu_governor

檢查cpu的調節器，來使得cpu可以處于高效的運行狀態。

setup_post

如果用戶指定了環境變量AFL_POST_LIBRARY，那么就會從對應的路徑下加載動態庫并加載afl_postprocess函數并保存在post_handler中。

setup_shm

EXP_ST void setup_shm(void) {
 u8* shm_str;
 if (!in_bitmap) memset(virgin_bits, 255, MAP_SIZE);
 memset(virgin_tmout, 255, MAP_SIZE);
 memset(virgin_crash, 255, MAP_SIZE);
 shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600);
 if (shm_id < 0) PFATAL("shmget() failed");
 atexit(remove_shm);
 shm_str = alloc_printf("%d", shm_id);
 /* If somebody is asking us to fuzz instrumented binaries in dumb mode,
 we don't want them to detect instrumentation, since we won't be sending
 fork server commands. This should be replaced with better auto-detection
 later on, perhaps? */
 if (!dumb_mode) setenv(SHM_ENV_VAR, shm_str, 1);
 ck_free(shm_str);
 trace_bits = shmat(shm_id, NULL, 0);
 if (trace_bits == (void *)-1) PFATAL("shmat() failed");
}

初始化virgin_bits數組用于保存后續模糊測試中覆蓋的路徑，virgin_tmout保存超時的路徑，virgin_crash保存崩潰的路徑。

同時建立共享內存trace_bits，該變量用于儲存樣例運行時的路徑。

同時將共享內存的唯一標識符shm_id轉為字符串后保存在環境變量SHM_ENV_VAR中。

init_count_class16

初始化count_class_lookup16數組，幫助快速歸類統計路徑覆蓋的數量。

setup_dirs_fds

創建輸出目錄。

read_testcases

讀取測試樣例。

static void read_testcases(void) {
 struct dirent **nl;
 s32 nl_cnt;
 u32 i;
 u8* fn;
 /* Auto-detect non-in-place resumption attempts. */
 fn = alloc_printf("%s/queue", in_dir);
 if (!access(fn, F_OK)) in_dir = fn; else ck_free(fn);
 ACTF("Scanning '%s'...", in_dir);
 /* We use scandir() + alphasort() rather than readdir() because otherwise,
 the ordering of test cases would vary somewhat randomly and would be
 difficult to control. */
 nl_cnt = scandir(in_dir, &nl, NULL, alphasort);
 if (nl_cnt < 0) {
 if (errno == ENOENT || errno == ENOTDIR)
 SAYF("" cLRD "[-] " cRST
 "The input directory does not seem to be valid - try again. The fuzzer needs"
 " one or more test case to start with - ideally, a small file under 1 kB"
 " or so. The cases must be stored as regular files directly in the input"
 " directory.");
 PFATAL("Unable to open '%s'", in_dir);
 }
 if (shuffle_queue && nl_cnt > 1) {
 ACTF("Shuffling queue...");
 shuffle_ptrs((void**)nl, nl_cnt);
 }
 for (i = 0; i < nl_cnt; i++) {
 struct stat st;
 u8* fn = alloc_printf("%s/%s", in_dir, nl[i]->d_name);
 u8* dfn = alloc_printf("%s/.state/deterministic_done/%s", in_dir, nl[i]->d_name);
 u8 passed_det = 0;
 free(nl[i]); /* not tracked */
 if (lstat(fn, &st) || access(fn, R_OK))
 PFATAL("Unable to access '%s'", fn);
 /* This also takes care of . and .. */
 if (!S_ISREG(st.st_mode) || !st.st_size || strstr(fn, "/README.testcases")) {
 ck_free(fn);
 ck_free(dfn);
 continue;
 }
 if (st.st_size > MAX_FILE) 
 FATAL("Test case '%s' is too big (%s, limit is %s)", fn,
 DMS(st.st_size), DMS(MAX_FILE));
 /* Check for metadata that indicates that deterministic fuzzing
 is complete for this entry. We don't want to repeat deterministic
 fuzzing when resuming aborted scans, because it would be pointless
 and probably very time-consuming. */
 if (!access(dfn, F_OK)) passed_det = 1;
 ck_free(dfn);
 add_to_queue(fn, st.st_size, passed_det);
 }

首先獲取輸入樣例的文件夾路徑in_dir
掃描in_dir，如果目錄下文件的數量少于等于 0 則報錯
如果設置了shuffle_queue就打亂順序
遍歷所有文件名，保存在fn中
過濾掉.和..這樣的路徑
如果文件的大小超過了MAX_FILE則終止
add_to_queue

add_to_queue

static void add_to_queue(u8* fname, u32 len, u8 passed_det) {
 struct queue_entry* q = ck_alloc(sizeof(struct queue_entry));
 q->fname = fname;
 q->len = len;
 q->depth = cur_depth + 1;
 q->passed_det = passed_det;
 if (q->depth > max_depth) max_depth = q->depth;
 if (queue_top) {
 queue_top->next = q;
 queue_top = q;
 } else q_prev100 = queue = queue_top = q;
 queued_paths++;
 pending_not_fuzzed++;
 cycles_wo_finds = 0;
 /* Set next_100 pointer for every 100th element (index 0, 100, etc) to allow faster iteration. */
 if ((queued_paths - 1) % 100 == 0 && queued_paths > 1) {
 q_prev100->next_100 = q;
 q_prev100 = q;
 }
 last_path_time = get_cur_time();
}

afl-fuzz維護一個queue_entry的鏈表，該鏈表用來保存測試樣例，每次調用add_to_queue都會將新樣例儲存到鏈表頭部。

另外還有一個q_prev100也是queue_entry的鏈表，但它每 100 個測試樣例保存一次。

load_auto

嘗試在輸入目錄下尋找自動生成的字典文件，調用maybe_add_auto將相應的字典加入到全局變量a_extras中，用于后續字典模式的變異當中。

pivot_inputs

在輸出文件夾中創建與輸入樣例間的硬鏈接，稱之為orignal。

load_extras

如果指定了-x參數（字典模式），加載對應的字典到全局變量extras當中，用于后續字典模式的變異當中。

find_timeout

如果指定了resuming_fuzz，即從輸出目錄當中恢復模糊測試狀態，會從之前的模糊測試狀態fuzzer_stats文件中計算中timeout值，保存在exec_tmout中。

detect_file_args

檢測輸入的命令行中是否包含@@參數，如果包含的話需要將@@替換成目錄文件"%s/.cur_input", out_dir，使得模糊測試目標程序的命令完整；同時將目錄文件"%s/.cur_input"路徑保存在out_file當中，后續變異的內容保存在該文件路徑中，用于運行測試目標文件。

setup_stdio_file

如果目標程序的輸入不是來源于文件而是來源于標準輸入的話，則將目錄文件"%s/.cur_input"文件打開保存在out_fd文件句柄中，后續將標準輸入重定向到該文件中；結合detect_file_args函數實現了將變異的內容保存在"%s/.cur_input"文件中，運行目標測試文件并進行模糊測試。

check_binary

檢查二進制文件是否合法。

perform_dry_run

將每個測試樣例作為輸入去運行目標程序，檢查程序是否能夠正常工作：

static void perform_dry_run(char** argv) {
 struct queue_entry* q = queue;
 u32 cal_failures = 0;
 u8* skip_crashes = getenv("AFL_SKIP_CRASHES");
 while (q) {
 u8* use_mem;
 u8 res;
 s32 fd;
 u8* fn = strrchr(q->fname, '/') + 1;
 ACTF("Attempting dry run with '%s'...", fn);
 fd = open(q->fname, O_RDONLY);
 if (fd < 0) PFATAL("Unable to open '%s'", q->fname);
 use_mem = ck_alloc_nozero(q->len);
 if (read(fd, use_mem, q->len) != q->len)
 FATAL("Short read from '%s'", q->fname);
 close(fd);
 res = calibrate_case(argv, q, use_mem, 0, 1);
 ck_free(use_mem);
 if (stop_soon) return;
 if (res == crash_mode || res == FAULT_NOBITS)
 SAYF(cGRA " len = %u, map size = %u, exec speed = %llu us" cRST, 
 q->len, q->bitmap_size, q->exec_us);
 ......
 if (q->var_behavior) WARNF("Instrumentation output varies across runs.");
 q = q->next;
 }
 if (cal_failures) {
 if (cal_failures == queued_paths)
 FATAL("All test cases time out%s, giving up!",
 skip_crashes ? " or crash" : "");
 WARNF("Skipped %u test cases (%0.02f%%) due to timeouts%s.", cal_failures,
 ((double)cal_failures) * 100 / queued_paths,
 skip_crashes ? " or crashes" : "");
 if (cal_failures * 5 > queued_paths)
 WARNF(cLRD "High percentage of rejected test cases, check settings!");
 }
 OKF("All test cases processed.");
}

對每個測試樣例使用calibrate_case進行測試，并返回運行結果，然后處理其中異常的情況，比如說程序崩潰或運行超時等。

calibrate_case

static u8 calibrate_case(char** argv, struct queue_entry* q, u8* use_mem,
 u32 handicap, u8 from_queue) {
 static u8 first_trace[MAP_SIZE];
 u8 fault = 0, new_bits = 0, var_detected = 0, hnb = 0,
 first_run = (q->exec_cksum == 0);
 u64 start_us, stop_us;
 s32 old_sc = stage_cur, old_sm = stage_max;
 u32 use_tmout = exec_tmout;
 u8* old_sn = stage_name;
 /* Be a bit more generous about timeouts when resuming sessions, or when
 trying to calibrate already-added finds. This helps avoid trouble due
 to intermittent latency. */
 if (!from_queue || resuming_fuzz)
 use_tmout = MAX(exec_tmout + CAL_TMOUT_ADD,
 exec_tmout * CAL_TMOUT_PERC / 100);
 q->cal_failed++;
 stage_name = "calibration";
 stage_max = fast_cal ? 3 : CAL_CYCLES;
 /* Make sure the forkserver is up before we do anything, and let's not
 count its spin-up time toward binary calibration. */
 if (dumb_mode != 1 && !no_forkserver && !forksrv_pid)
 init_forkserver(argv);
 if (q->exec_cksum) {
 memcpy(first_trace, trace_bits, MAP_SIZE);
 hnb = has_new_bits(virgin_bits);
 if (hnb > new_bits) new_bits = hnb;
 }
 start_us = get_cur_time_us();
 for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
 u32 cksum;
 if (!first_run && !(stage_cur % stats_update_freq)) show_stats();
 write_to_testcase(use_mem, q->len);
 fault = run_target(argv, use_tmout);
 /* stop_soon is set by the handler for Ctrl+C. When it's pressed,
 we want to bail out quickly. */
 if (stop_soon || fault != crash_mode) goto abort_calibration;
 if (!dumb_mode && !stage_cur && !count_bytes(trace_bits)) {
 fault = FAULT_NOINST;
 goto abort_calibration;
 }
 cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
 if (q->exec_cksum != cksum) {
 hnb = has_new_bits(virgin_bits);
 if (hnb > new_bits) new_bits = hnb;
 if (q->exec_cksum) {
 u32 i;
 for (i = 0; i < MAP_SIZE; i++) {
 if (!var_bytes[i] && first_trace[i] != trace_bits[i]) {
 var_bytes[i] = 1;
 stage_max = CAL_CYCLES_LONG;
 }
 }
 var_detected = 1;
 } else {
 q->exec_cksum = cksum;
 memcpy(first_trace, trace_bits, MAP_SIZE);
 }
 }
 }
 stop_us = get_cur_time_us();
 total_cal_us += stop_us - start_us;
 total_cal_cycles += stage_max;
 /* OK, let's collect some stats about the performance of this test case.
 This is used for fuzzing air time calculations in calculate_score(). */
 q->exec_us = (stop_us - start_us) / stage_max;
 q->bitmap_size = count_bytes(trace_bits);
 q->handicap = handicap;
 q->cal_failed = 0;
 total_bitmap_size += q->bitmap_size;
 total_bitmap_entries++;
 update_bitmap_score(q);
 /* If this case didn't result in new output from the instrumentation, tell
 parent. This is a non-critical problem, but something to warn the user
 about. */
 if (!dumb_mode && first_run && !fault && !new_bits) fault = FAULT_NOBITS;
abort_calibration:
 if (new_bits == 2 && !q->has_new_cov) {
 q->has_new_cov = 1;
 queued_with_cov++;
 }
 /* Mark variable paths. */
 if (var_detected) {
 var_byte_count = count_bytes(var_bytes);
 if (!q->var_behavior) {
 mark_as_variable(q);
 queued_variable++;
 }
 }
 stage_name = old_sn;
 stage_cur = old_sc;
 stage_max = old_sm;
 if (!first_run) show_stats();
 return fault;
}

該函數用以對樣例進行測試，在后續的測試過程中也會反復調用。此處，其主要的工作是：

判斷樣例是否是首次運行，記錄在first_run
設置超時閾值use_tmout
調用init_forkserver初始化fork server
多次運行測試樣例，記錄數據

init_forkserver

fork server 是 AFL 中一個重要的機制。

afl-fuzz主動建立一個子進程為fork server，而模糊測試則是通過fork server調用 fork 建立子進程來進行測試。

參考在源代碼注釋中的這篇文章可以有更加深入的理解：

https://lcamtuf.blogspot.com/2014/10/fuzzing-binaries-without-execve.html

之所以需要設計它，筆者在這里給出一個比較概括的理由：一般來說，如果我們想要測試輸入樣例，就需要用fork+execve去執行相關的二進制程序，但是執行程序是需要加載代碼、動態庫、符號解析等各種耗時的行為，這會讓 AFL 不夠效率。

但是這個過程其實是存在浪費的，可以注意到，如果我們要對相同的二進制程序進行多次不同的輸入樣本進行測試，那按照原本的操作，我們應該多次執行fork+execve，而浪費就出現在這，因為我們明明已經加載好了一切，卻又要因此重復加載釋放。

因此fork server的設計主要就是為了解決這個浪費。它通過向代碼中進行插樁的方式，使得在二進制程序中去建立一個fork server（對，它實際上是由目標程序去建立的），然后這個fork server會在完成一切初始化后，停止在某一個地方（往往設定在main函數）等待 fuzzer 去喊開始執行。

一旦 fuzzer 喊了開始，就會由這個fork server去調用fork然后往下執行。而我們知道，fork由于寫時復制的機制存在，它其實并沒有過多的開銷，可以完全繼承原有的所有上下文信息，從而避開了多次execve的加載開銷。

摘抄一段這部分插樁的內容：

__afl_forkserver:
 /* Phone home and tell the parent that we're OK. */
 pushl $4 /* length */
 pushl $__afl_temp /* data */
 pushl $199 /* file desc */
 call write
 addl $12, %esp
__afl_fork_wait_loop:
 /* Wait for parent by reading from the pipe. This will block until
 the parent sends us something. Abort if read fails. */
 pushl $4 /* length */
 pushl $__afl_temp /* data */
 pushl $198 /* file desc */
 call read
 addl $12, %esp
 cmpl $4, %eax
 jne __afl_die
 /* Once woken up, create a clone of our process. */
 call fork
 cmpl $0, %eax
 jl __afl_die
 je __afl_fork_resume
 /* In parent process: write PID to pipe, then wait for child. 
 Parent will handle timeouts and SIGKILL the child as needed. */
 movl %eax, __afl_fork_pid
 pushl $4 /* length */
 pushl $__afl_fork_pid /* data */
 pushl $199 /* file desc */
 call write
 addl $12, %esp
 pushl $2 /* WUNTRACED */
 pushl $__afl_temp /* status */
 pushl __afl_fork_pid /* PID */
 call waitpid
 addl $12, %esp
 cmpl $0, %eax
 jle __afl_die
 /* Relay wait status to pipe, then loop back. */
 pushl $4 /* length */
 pushl $__afl_temp /* data */
 pushl $199 /* file desc */
 call write
 addl $12, %esp
 jmp __afl_fork_wait_loop
__afl_fork_resume:
 /* In child process: close fds, resume execution. */
 pushl $198
 call close
 pushl $199
 call close
 addl $8, %esp
 ret

fork server主要是通過管道和 afl-fuzz 中的fork server進行通信的，但他們其實不做過多的事情，往往只是通知一下程序運行的狀態。因為真正的反饋信息，包括路徑的發現等這部分功能是通過共享內存去實現的，它們不需要用fork server這種效率較低的方案去記錄數據。

剩下的就是關閉一些不需要的文件或管道了，代碼姑且貼在這里，以備未來有需要時可以現查：

EXP_ST void init_forkserver(char** argv) {
 static struct itimerval it;
 int st_pipe[2], ctl_pipe[2];
 int status;
 s32 rlen;
 ACTF("Spinning up the fork server...");
 if (pipe(st_pipe) || pipe(ctl_pipe)) PFATAL("pipe() failed");
 forksrv_pid = fork();
 if (forksrv_pid < 0) PFATAL("fork() failed");
 if (!forksrv_pid) {
 struct rlimit r;
 /* Umpf. On OpenBSD, the default fd limit for root users is set to
 soft 128. Let's try to fix that... */
 if (!getrlimit(RLIMIT_NOFILE, &r) && r.rlim_cur < FORKSRV_FD + 2) {
 r.rlim_cur = FORKSRV_FD + 2;
 setrlimit(RLIMIT_NOFILE, &r); /* Ignore errors */
 }
 if (mem_limit) {
 r.rlim_max = r.rlim_cur = ((rlim_t)mem_limit) << 20;
#ifdef RLIMIT_AS
 setrlimit(RLIMIT_AS, &r); /* Ignore errors */
#else
 /* This takes care of OpenBSD, which doesn't have RLIMIT_AS, but
 according to reliable sources, RLIMIT_DATA covers anonymous
 maps - so we should be getting good protection against OOM bugs. */
 setrlimit(RLIMIT_DATA, &r); /* Ignore errors */
#endif /* ^RLIMIT_AS */
 }
 /* Dumping cores is slow and can lead to anomalies if SIGKILL is delivered
 before the dump is complete. */
 r.rlim_max = r.rlim_cur = 0;
 setrlimit(RLIMIT_CORE, &r); /* Ignore errors */
 /* Isolate the process and configure standard descriptors. If out_file is
 specified, stdin is /dev/null; otherwise, out_fd is cloned instead. */
 setsid();
 dup2(dev_null_fd, 1);
 dup2(dev_null_fd, 2);
 if (out_file) {
 dup2(dev_null_fd, 0);
 } else {
 dup2(out_fd, 0);
 close(out_fd);
 }
 /* Set up control and status pipes, close the unneeded original fds. */
 if (dup2(ctl_pipe[0], FORKSRV_FD) < 0) PFATAL("dup2() failed");
 if (dup2(st_pipe[1], FORKSRV_FD + 1) < 0) PFATAL("dup2() failed");
 close(ctl_pipe[0]);
 close(ctl_pipe[1]);
 close(st_pipe[0]);
 close(st_pipe[1]);
 close(out_dir_fd);
 close(dev_null_fd);
 close(dev_urandom_fd);
 close(fileno(plot_file));
 /* This should improve performance a bit, since it stops the linker from
 doing extra work post-fork(). */
 if (!getenv("LD_BIND_LAZY")) setenv("LD_BIND_NOW", "1", 0);
 /* Set sane defaults for ASAN if nothing else specified. */
 setenv("ASAN_OPTIONS", "abort_on_error=1:"
 "detect_leaks=0:"
 "symbolize=0:"
 "allocator_may_return_null=1", 0);
 /* MSAN is tricky, because it doesn't support abort_on_error=1 at this
 point. So, we do this in a very hacky way. */
 setenv("MSAN_OPTIONS", "exit_code=" STRINGIFY(MSAN_ERROR) ":"
 "symbolize=0:"
 "abort_on_error=1:"
 "allocator_may_return_null=1:"
 "msan_track_origins=0", 0);
 execv(target_path, argv);
 /* Use a distinctive bitmap signature to tell the parent about execv()
 falling through. */
 *(u32*)trace_bits = EXEC_FAIL_SIG;
 exit(0);
 }
 /* Close the unneeded endpoints. */
 close(ctl_pipe[0]);
 close(st_pipe[1]);
 fsrv_ctl_fd = ctl_pipe[1];
 fsrv_st_fd = st_pipe[0];
 /* Wait for the fork server to come up, but don't wait too long. */
 it.it_value.tv_sec = ((exec_tmout * FORK_WAIT_MULT) / 1000);
 it.it_value.tv_usec = ((exec_tmout * FORK_WAIT_MULT) % 1000) * 1000;
 setitimer(ITIMER_REAL, &it, NULL);
 rlen = read(fsrv_st_fd, &status, 4);
 it.it_value.tv_sec = 0;
 it.it_value.tv_usec = 0;
 setitimer(ITIMER_REAL, &it, NULL);
 /* If we have a four-byte "hello" message from the server, we're all set.
 Otherwise, try to figure out what went wrong. */
 if (rlen == 4) {
 OKF("All right - fork server is up.");
 return;
 }
 if (child_timed_out)
 FATAL("Timeout while initializing fork server (adjusting -t may help)");
 if (waitpid(forksrv_pid, &status, 0) <= 0)
 PFATAL("waitpid() failed");
 if (WIFSIGNALED(status)) {
 if (mem_limit && mem_limit < 500 && uses_asan) {
 SAYF("" cLRD "[-] " cRST
 "Whoops, the target binary crashed suddenly, before receiving any input"
 " from the fuzzer! Since it seems to be built with ASAN and you have a"
 " restrictive memory limit configured, this is expected; please read"
 " %s/notes_for_asan.txt for help.", doc_path);
 } else if (!mem_limit) {
 SAYF("" cLRD "[-] " cRST
 "Whoops, the target binary crashed suddenly, before receiving any input"
 " from the fuzzer! There are several probable explanations:"
 " - The binary is just buggy and explodes entirely on its own. If so, you"
 " need to fix the underlying problem or find a better replacement."
#ifdef __APPLE__
 " - On MacOS X, the semantics of fork() syscalls are non-standard and may"
 " break afl-fuzz performance optimizations when running platform-specific"
 " targets. To fix this, set AFL_NO_FORKSRV=1 in the environment."
#endif /* __APPLE__ */
 " - Less likely, there is a horrible bug in the fuzzer. If other options"
 " fail, poke  for troubleshooting tips.");
 } else {
 SAYF("" cLRD "[-] " cRST
 "Whoops, the target binary crashed suddenly, before receiving any input"
 " from the fuzzer! There are several probable explanations:"
 " - The current memory limit (%s) is too restrictive, causing the"
 " target to hit an OOM condition in the dynamic linker. Try bumping up"
 " the limit with the -m setting in the command line. A simple way confirm"
 " this diagnosis would be:"
#ifdef RLIMIT_AS
 " ( ulimit -Sv $[%llu << 10]; /path/to/fuzzed_app )"
#else
 " ( ulimit -Sd $[%llu << 10]; /path/to/fuzzed_app )"
#endif /* ^RLIMIT_AS */
 " Tip: you can use http://jwilk.net/software/recidivm to quickly"
 " estimate the required amount of virtual memory for the binary."
 " - The binary is just buggy and explodes entirely on its own. If so, you"
 " need to fix the underlying problem or find a better replacement."
#ifdef __APPLE__
 " - On MacOS X, the semantics of fork() syscalls are non-standard and may"
 " break afl-fuzz performance optimizations when running platform-specific"
 " targets. To fix this, set AFL_NO_FORKSRV=1 in the environment."
#endif /* __APPLE__ */
 " - Less likely, there is a horrible bug in the fuzzer. If other options"
 " fail, poke  for troubleshooting tips.",
 DMS(mem_limit << 20), mem_limit - 1);
 }
 FATAL("Fork server crashed with signal %d", WTERMSIG(status));
 }
 if (*(u32*)trace_bits == EXEC_FAIL_SIG)
 FATAL("Unable to execute target application ('%s')", argv[0]);
 if (mem_limit && mem_limit < 500 && uses_asan) {
 SAYF("" cLRD "[-] " cRST
 "Hmm, looks like the target binary terminated before we could complete a"
 " handshake with the injected code. Since it seems to be built with ASAN and"
 " you have a restrictive memory limit configured, this is expected; please"
 " read %s/notes_for_asan.txt for help.", doc_path);
 } else if (!mem_limit) {
 SAYF("" cLRD "[-] " cRST
 "Hmm, looks like the target binary terminated before we could complete a"
 " handshake with the injected code. Perhaps there is a horrible bug in the"
 " fuzzer. Poke  for troubleshooting tips.");
 } else {
 SAYF("" cLRD "[-] " cRST
 "Hmm, looks like the target binary terminated before we could complete a"
 " handshake with the injected code. There are %s probable explanations:"
 "%s"
 " - The current memory limit (%s) is too restrictive, causing an OOM"
 " fault in the dynamic linker. This can be fixed with the -m option. A"
 " simple way to confirm the diagnosis may be:"
#ifdef RLIMIT_AS
 " ( ulimit -Sv $[%llu << 10]; /path/to/fuzzed_app )"
#else
 " ( ulimit -Sd $[%llu << 10]; /path/to/fuzzed_app )"
#endif /* ^RLIMIT_AS */
 " Tip: you can use http://jwilk.net/software/recidivm to quickly"
 " estimate the required amount of virtual memory for the binary."
 " - Less likely, there is a horrible bug in the fuzzer. If other options"
 " fail, poke  for troubleshooting tips.",
 getenv(DEFER_ENV_VAR) ? "three" : "two",
 getenv(DEFER_ENV_VAR) ?
 " - You are using deferred forkserver, but __AFL_INIT() is never"
 " reached before the program terminates." : "",
 DMS(mem_limit << 20), mem_limit - 1);
 }
 FATAL("Fork server handshake failed");
}

cull_queue

將運行過的種子根據運行的效果進行排序，后續模糊測試根據排序的結果來挑選樣例進行模糊測試。

show_init_stats

初始化UI。

find_start_position

如果是恢復運行，則調用該函數來尋找到對應的樣例的位置。

write_stats_file

更新統計信息文件以進行無人值守的監視。

save_auto

保存自動提取的token，用于后續字典模式的fuzz。

afl-fuzz 主循環

首先調用cull_queue來優化隊列
如果queue_cur為空，代表所有queue都被執行完一輪
設置queue_cycle計數器加一，即代表所有queue被完整執行了多少輪。
設置current_entry為0，和queue_cur為queue首元素，開始新一輪fuzz。
如果是resume fuzz情況，則先檢查seek_to是否為空，如果不為空，就從seek_to指定的queue項開始執行。
刷新展示界面show_stats。
如果在一輪執行之后的queue里的case數，和執行之前一樣，代表在完整的一輪執行里都沒有發現任何一個新的case。
·如果use_splicing為1，就設置cycles_wo_finds計數器加1
·否則，設置use_splicing為1，代表我們接下來要通過splice重組queue里的case。
執行skipped_fuzz = fuzz_one(use_argv)來對queue_cur進行一次測試。
注意fuzz_one并不一定真的執行當前queue_cur，它是有一定策略的，如果不執行，就直接返回1，否則返回0。
如果skipped_fuzz為0，且存在sync_id。
sync_interval_cnt計數器加一，如果其結果是SYNC_INTERVAL(默認是5)的倍數，就進行一次sync。
queue_cur = queue_cur->next;current_entry++;，開始測試下一個queue。

  
while (1) {
 u8 skipped_fuzz;
 cull_queue();
 if (!queue_cur) {
 queue_cycle++;
 current_entry = 0;
 cur_skipped_paths = 0;
 queue_cur = queue;
 while (seek_to) {
 current_entry++;
 seek_to--;
 queue_cur = queue_cur->next;
 }
 show_stats();
 if (not_on_tty) {
 ACTF("Entering queue cycle %llu.", queue_cycle);
 fflush(stdout);
 }
 /* If we had a full queue cycle with no new finds, try
 recombination strategies next. */
 if (queued_paths == prev_queued) {
 if (use_splicing) cycles_wo_finds++; else use_splicing = 1;
 } else cycles_wo_finds = 0;
 prev_queued = queued_paths;
 if (sync_id && queue_cycle == 1 && getenv("AFL_IMPORT_FIRST"))
 sync_fuzzers(use_argv);
 }
 skipped_fuzz = fuzz_one(use_argv);
 if (!stop_soon && sync_id && !skipped_fuzz) {
 if (!(sync_interval_cnt++ % SYNC_INTERVAL))
 sync_fuzzers(use_argv);
 }
 if (!stop_soon && exit_1) stop_soon = 2;
 if (stop_soon) break;
 queue_cur = queue_cur->next;
 current_entry++;
 }

fuzz_one

從測試樣例的隊列中取出current_entry進行測試，成功則返回 0 ，否則返回 1。這里主要是對該函數主要內容進行記錄，不做細節的代碼分析。

打開queue_cur并映射到orig_in和in_buf
分配len大小的內存，并初始化為全0，然后將地址賦值給out_buf

CALIBRATION 階段

若queue_cur->cal_failed < CAL_CHANCES且queue_cur->cal_failed >0，則調用calibrate_case

TRIMMING 階段

如果樣例沒經過該階段，那么就調用trim_case修剪樣例
將修剪后的結果重新放入out_buf

縮減的思路是這樣的：如果對一個樣本進行縮減后，它所覆蓋的路徑并未發生變化，那么就說明縮減的這部分內容是可有可無的，因此可以刪除。

具體策略如下：

如果這個case的大小len小于5字節，就直接返回
設定stage_name為tmp，該變量僅用來標識本次縮減所使用的策略
計算len_p2，其值是大于等于q->len的第一個2的冪次。
取len_p2/16為remove_len作為起始步長。
進入循環，終止條件為remove_len小于終止步長len_p2/1024, 每輪循環步長會除2。
若無變化
如有變化
則從q->len中減去remove_len個字節，并由此重新計算出一個len_p2，這里注意一下while (remove_len >= MAX(len_p2 / TRIM_END_STEPS, TRIM_MIN_BYTES))
將in_buf+remove_pos+remove_len到最后的字節，前移到in_buf+remove_pos處，等于刪除了remove_pos向后的remove_len個字節。
如果needs_write為 0，則設置其為 1，并保存當前trace_bits到clean_trace中。
remove_pos加上remove_len
初始化一些必要數據后，再次進入循環，這次是按照當前設定的步長對樣本進行遍歷
用run_target運行樣例，trim_execs計數器加一
對比路徑是否變化
如果needs_write為1
刪除原來的q->fname，創建一個新的q->fname，將in_buf里的內容寫入，然后用clean_trace恢復trace_bits的值。
進行一次update_bitmap_score

static u8 trim_case(char** argv, struct queue_entry* q, u8* in_buf) {
 static u8 tmp[64];
 static u8 clean_trace[MAP_SIZE];
 u8 needs_write = 0, fault = 0;
 u32 trim_exec = 0;
 u32 remove_len;
 u32 len_p2;
 /* Although the trimmer will be less useful when variable behavior is
 detected, it will still work to some extent, so we don't check for
 this. */
 if (q->len < 5) return 0;
 stage_name = tmp;
 bytes_trim_in += q->len;
 /* Select initial chunk len, starting with large steps. */
 len_p2 = next_p2(q->len);
 remove_len = MAX(len_p2 / TRIM_START_STEPS, TRIM_MIN_BYTES);
 /* Continue until the number of steps gets too high or the stepover
 gets too small. */
 while (remove_len >= MAX(len_p2 / TRIM_END_STEPS, TRIM_MIN_BYTES)) {
 u32 remove_pos = remove_len;
 sprintf(tmp, "trim %s/%s", DI(remove_len), DI(remove_len));
 stage_cur = 0;
 stage_max = q->len / remove_len;
 while (remove_pos < q->len) {
 u32 trim_avail = MIN(remove_len, q->len - remove_pos);
 u32 cksum;
 write_with_gap(in_buf, q->len, remove_pos, trim_avail);
 fault = run_target(argv, exec_tmout);
 trim_execs++;
 if (stop_soon || fault == FAULT_ERROR) goto abort_trimming;
 /* Note that we don't keep track of crashes or hangs here; maybe TODO? */
 cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
 /* If the deletion had no impact on the trace, make it permanent. This
 isn't perfect for variable-path inputs, but we're just making a
 best-effort pass, so it's not a big deal if we end up with false
 negatives every now and then. */
 if (cksum == q->exec_cksum) {
 u32 move_tail = q->len - remove_pos - trim_avail;
 q->len -= trim_avail;
 len_p2 = next_p2(q->len);
 memmove(in_buf + remove_pos, in_buf + remove_pos + trim_avail, 
 move_tail);
 /* Let's save a clean trace, which will be needed by
 update_bitmap_score once we're done with the trimming stuff. */
 if (!needs_write) {
 needs_write = 1;
 memcpy(clean_trace, trace_bits, MAP_SIZE);
 }
 } else remove_pos += remove_len;
 /* Since this can be slow, update the screen every now and then. */
 if (!(trim_exec++ % stats_update_freq)) show_stats();
 stage_cur++;
 }
 remove_len >>= 1;
 }
 /* If we have made changes to in_buf, we also need to update the on-disk
 version of the test case. */
 if (needs_write) {
 s32 fd;
 unlink(q->fname); /* ignore errors */
 fd = open(q->fname, O_WRONLY | O_CREAT | O_EXCL, 0600);
 if (fd < 0) PFATAL("Unable to create '%s'", q->fname);
 ck_write(fd, in_buf, q->len, q->fname);
 close(fd);
 memcpy(trace_bits, clean_trace, MAP_SIZE);
 update_bitmap_score(q);
 }
abort_trimming:
 bytes_trim_out += q->len;
 return fault;
}

PERFORMANCE SCORE 階段

perf_score =calculate_score(queue_cur)
如果skip_deterministic為1，或者queue_cur被 fuzz 過，或者queue_cur的passed_det為1，則跳轉去havoc_stage階段
設置doing_det為 1

SIMPLE BITFLIP 階段

這個階段讀起來感覺比較抽象。首先定義了這么一個宏：

#define FLIP_BIT(_ar, _b) do { \
 u8* _arf = (u8*)(_ar); \
 u32 _bf = (_b); \
 _arf[(_bf) >> 3] ^= (128 >> ((_bf) & 7)); \
 } while (0)

這個宏的操作是對一個 bit 進行反轉。

而接下來首先有一個循環：

 stage_short = "flip1";
 stage_max = len << 3;
 stage_name = "bitflip 1/1";
 stage_val_type = STAGE_VAL_NONE;
 orig_hit_cnt = queued_paths + unique_crashes;
 prev_cksum = queue_cur->exec_cksum;
 for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
 stage_cur_byte = stage_cur >> 3;
 FLIP_BIT(out_buf, stage_cur);
 if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
 FLIP_BIT(out_buf, stage_cur);
 ......

stage_max是輸入的總 bit 數，然后分別對每個 bit 進行翻轉后用common_fuzz_stuff進行測試，然后再將其翻轉回來。

而如果對某個字節的最后一個 bit 翻轉后測試，發現路徑并未增加，就能夠將其認為是一個token。

token默認最小是3，最大是32,每次發現新token時，通過maybe_add_auto添加到a_extras數組里。
stage_finds[STAGE_FLIP1]的值加上在整個FLIP_BIT中新發現的路徑和Crash總和
stage_cycles[STAGE_FLIP1]的值加上在整個FLIP_BIT中執行的target次數stage_max
設置stage_name為bitflip 2/1,原理和之前一樣，只是這次是連續翻轉相鄰的兩位。

然后在后面的一個循環中又做類似的事，但每次會翻轉兩個 bit：

 stage_name = "bitflip 2/1";
 stage_short = "flip2";
 stage_max = (len << 3) - 1;
 orig_hit_cnt = new_hit_cnt;
 for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
 stage_cur_byte = stage_cur >> 3;
 FLIP_BIT(out_buf, stage_cur);
 FLIP_BIT(out_buf, stage_cur + 1);
 if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
 FLIP_BIT(out_buf, stage_cur);
 FLIP_BIT(out_buf, stage_cur + 1);
 }

然后保存結果到stage_finds[STAGE_FLIP2]和stage_cycles[STAGE_FLIP2]里。
同理，設置stage_name為bitflip 4/1，翻轉連續的四位并記錄。
構建Effector map
進入bitflip 8/8的階段，這個階段就是對每個字節的所有 bit 都進行翻轉，然后用common_fuzz_stuff進行測試
如果其造成執行路徑與原始路徑不一致，就將該byte在effector map中標記為1，即“有效”的，否則標記為 0，即“無效”的。
這樣做的邏輯是：如果一個byte完全翻轉，都無法帶來執行路徑的變化，那么這個byte很有可能是屬于”data”，而非”metadata”（例如size, flag等），對整個fuzzing的意義不大。所以，在隨后的一些變異中，會參考effector map，跳過那些“無效”的byte，從而節省了執行資源。

然后進入bitflip 16/8部分，按對每兩個字節進行一次翻轉然后測試：

  
for (i = 0; i < len - 1; i++) {
 /* Let's consult the effector map... */
 if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) {
 stage_max--;
 continue;
 }
 stage_cur_byte = i;
 *(u16*)(out_buf + i) ^= 0xFFFF;
 if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
 stage_cur++;
 *(u16*)(out_buf + i) ^= 0xFFFF;
 }

這里要注意在翻轉之前會先檢查eff_map里對應于這兩個字節的標志是否為0，如果為0，則這兩個字節是無效的數據，stage_max減一，然后開始變異下一個字。
common_fuzz_stuff執行變異后的結果，然后還原。

最后是bitflip 32/8階段，每 4 個字節進行翻轉然后測試：

 stage_name = "bitflip 32/8";
 stage_short = "flip32";
 stage_cur = 0;
 stage_max = len - 3;
 orig_hit_cnt = new_hit_cnt;
 for (i = 0; i < len - 3; i++) {
 /* Let's consult the effector map... */
 if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] &&
 !eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) {
 stage_max--;
 continue;
 }
 stage_cur_byte = i;
 *(u32*)(out_buf + i) ^= 0xFFFFFFFF;
 if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
 stage_cur++;
 *(u32*)(out_buf + i) ^= 0xFFFFFFFF;
 }

在每次翻轉之前會檢查eff_map里對應于這四個字節的標志是否為0，如果是0，則這兩個字節是無效的數據，stage_max減一，然后開始變異下一組雙字。

ARITHMETIC INC/DEC 階段

arith 8/8，每次對8個bit進行加減運算，按照每8個 bit 的步長從頭開始，即對文件的每個 byte 進行整數加減變異。
arith 16/8，每次對16個bit進行加減運算，按照每8個bit的步長從頭開始，即對文件的每個word進行整數加減變異。
arith 32/8，每次對32個bit進行加減運算，按照每8個bit的步長從頭開始，即對文件的每個dword進行整數加減變異。
加減變異的上限，在config.h中的宏ARITH_MAX定義，默認為 35。所以，對目標整數會進行+1, +2, …, +35, -1, -2, …, -35 的變異。特別地，由于整數存在大端序和小端序兩種表示方式，AFL會貼心地對這兩種整數表示方式都進行變異。
此外，AFL 還會智能地跳過某些arithmetic變異。第一種情況就是前面提到的 effector map ：如果一個整數的所有 bytes 都被判斷為“無效”，那么就跳過對整數的變異。第二種情況是之前 bitflip 已經生成過的變異：如果加/減某個數后，其效果與之前的某種bitflip相同，那么這次變異肯定在上一個階段已經執行過了，此次便不會再執行。

此處展示 arith 8/8 部分代碼：

 stage_name = "arith 8/8";
 stage_short = "arith8";
 stage_cur = 0;
 stage_max = 2 * len * ARITH_MAX;
 stage_val_type = STAGE_VAL_LE;
 orig_hit_cnt = new_hit_cnt;
 for (i = 0; i < len; i++) {
 u8 orig = out_buf[i];
 /* Let's consult the effector map... */
 if (!eff_map[EFF_APOS(i)]) {
 stage_max -= 2 * ARITH_MAX;
 continue;
 }
 stage_cur_byte = i;
 for (j = 1; j <= ARITH_MAX; j++) {
 u8 r = orig ^ (orig + j);
 /* Do arithmetic operations only if the result couldn't be a product
 of a bitflip. */
 if (!could_be_bitflip(r)) {
 stage_cur_val = j;
 out_buf[i] = orig + j;
 if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
 stage_cur++;
 } else stage_max--;
 r = orig ^ (orig - j);
 if (!could_be_bitflip(r)) {
 stage_cur_val = -j;
 out_buf[i] = orig - j;
 if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
 stage_cur++;
 } else stage_max--;
 out_buf[i] = orig;
 }
 }
 new_hit_cnt = queued_paths + unique_crashes;
 stage_finds[STAGE_ARITH8] += new_hit_cnt - orig_hit_cnt;
 stage_cycles[STAGE_ARITH8] += stage_max;

INTERESTING VALUES 階段

interest 8/8，每次對8個bit進替換，按照每8個bit的步長從頭開始，即對文件的每個byte進行替換。
interest 16/8，每次對16個bit進替換，按照每8個bit的步長從頭開始，即對文件的每個word進行替換。
interest 32/8，每次對32個bit進替換，按照每8個bit的步長從頭開始，即對文件的每個dword進行替換。
而用于替換的interesting values是AFL預設的一些比較特殊的數,這些數的定義在config.h文件中：

static s8 interesting_8[] = { INTERESTING_8 };
static s16 interesting_16[] = { INTERESTING_8, INTERESTING_16 };
static s32 interesting_32[] = { INTERESTING_8, INTERESTING_16, INTERESTING_32 };

同樣，effector map仍然會用于判斷是否需要變異；此外，如果某個interesting value，是可以通過bitflip或者arithmetic變異達到，那么這樣的重復性變異也是會跳過的。

此處給出interest 8/8部分代碼：

 stage_name = "interest 8/8";
 stage_short = "int8";
 stage_cur = 0;
 stage_max = len * sizeof(interesting_8);
 stage_val_type = STAGE_VAL_LE;
 orig_hit_cnt = new_hit_cnt;
 /* Setting 8-bit integers. */
 for (i = 0; i < len; i++) {
 u8 orig = out_buf[i];
 /* Let's consult the effector map... */
 if (!eff_map[EFF_APOS(i)]) {
 stage_max -= sizeof(interesting_8);
 continue;
 }
 stage_cur_byte = i;
 for (j = 0; j < sizeof(interesting_8); j++) {
 /* Skip if the value could be a product of bitflips or arithmetics. */
 if (could_be_bitflip(orig ^ (u8)interesting_8[j]) ||
 could_be_arith(orig, (u8)interesting_8[j], 1)) {
 stage_max--;
 continue;
 }
 stage_cur_val = interesting_8[j];
 out_buf[i] = interesting_8[j];
 if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
 out_buf[i] = orig;
 stage_cur++;
 }
 }
 new_hit_cnt = queued_paths + unique_crashes;
 stage_finds[STAGE_INTEREST8] += new_hit_cnt - orig_hit_cnt;
 stage_cycles[STAGE_INTEREST8] += stage_max;

DICTIONARY STUFF 階段

通過-x選項指定一個詞典，如果沒有則跳過前兩個階段；
user extras(over),從頭開始,將用戶提供的tokens依次替換到原文件中，stage_max為extras_cnt * len；
user extras(insert),從頭開始,將用戶提供的tokens依次插入到原文件中，stage_max為extras_cnt * len；
如果在之前的分析中提取到了 tokens，則進入auto extras階段；
auto extras(over),從頭開始,將自動檢測的tokens依次替換到原文件中，stage_max為MIN(a_extras_cnt, USE_AUTO_EXTRAS) * len

此處給出auto extras (over)部分的源代碼：

  
if (!a_extras_cnt) goto skip_extras;
 stage_name = "auto extras (over)";
 stage_short = "ext_AO";
 stage_cur = 0;
 stage_max = MIN(a_extras_cnt, USE_AUTO_EXTRAS) * len;
 stage_val_type = STAGE_VAL_NONE;
 orig_hit_cnt = new_hit_cnt;
 for (i = 0; i < len; i++) {
 u32 last_len = 0;
 stage_cur_byte = i;
 for (j = 0; j < MIN(a_extras_cnt, USE_AUTO_EXTRAS); j++) {
 /* See the comment in the earlier code; extras are sorted by size. */
 if (a_extras[j].len > len - i ||
 !memcmp(a_extras[j].data, out_buf + i, a_extras[j].len) ||
 !memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, a_extras[j].len))) {
 stage_max--;
 continue;
 }
 last_len = a_extras[j].len;
 memcpy(out_buf + i, a_extras[j].data, last_len);
 if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
 stage_cur++;
 }
 /* Restore all the clobbered memory. */
 memcpy(out_buf + i, in_buf + i, last_len);
 }
 new_hit_cnt = queued_paths + unique_crashes;
 stage_finds[STAGE_EXTRAS_AO] += new_hit_cnt - orig_hit_cnt;
 stage_cycles[STAGE_EXTRAS_AO] += stage_max;

RANDOM HAVOC 階段

該部分使用一個巨大的 switch ，通過隨機數進行跳轉，并在每個分支中使用隨機數來完成隨機性的行為：

首先指定出變換的此處上限use_stacking = 1 << (1 + UR(HAVOC_STACK_POW2))
然后進入循環，生成一個隨機數去選擇下列中的某一個情況來對樣例進行變換
隨機選取某個bit進行翻轉
隨機選取某個byte，將其設置為隨機的interesting value
隨機選取某個word，并隨機選取大、小端序，將其設置為隨機的interesting value
隨機選取某個dword，并隨機選取大、小端序，將其設置為隨機的interesting value
隨機選取某個byte，對其減去一個隨機數
隨機選取某個byte，對其加上一個隨機數
隨機選取某個word，并隨機選取大、小端序，對其減去一個隨機數
隨機選取某個word，并隨機選取大、小端序，對其加上一個隨機數
隨機選取某個dword，并隨機選取大、小端序，對其減去一個隨機數
隨機選取某個dword，并隨機選取大、小端序，對其加上一個隨機數
隨機選取某個byte，將其設置為隨機數
隨機刪除一段bytes
隨機選取一個位置，插入一段隨機長度的內容，其中75%的概率是插入原文中隨機位置的內容，25%的概率是插入一段隨機選取的數
隨機選取一個位置，替換為一段隨機長度的內容，其中75%的概率是替換成原文中隨機位置的內容，25%的概率是替換成一段隨機選取的數
隨機選取一個位置，用隨機選取的token（用戶提供的或自動生成的）替換
隨機選取一個位置，用隨機選取的token（用戶提供的或自動生成的）插入
然后調用common_fuzz_stuff進行測試
重復上述過程stage_max次

  
for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
 u32 use_stacking = 1 << (1 + UR(HAVOC_STACK_POW2));
 stage_cur_val = use_stacking;
 for (i = 0; i < use_stacking; i++) {
 switch (UR(15 + ((extras_cnt + a_extras_cnt) ? 2 : 0))) {
 case 0:
 /* Flip a single bit somewhere. Spooky! */
 FLIP_BIT(out_buf, UR(temp_len << 3));
 break;
 case 1: 
 /* Set byte to interesting value. */
 out_buf[UR(temp_len)] = interesting_8[UR(sizeof(interesting_8))];
 break;
 case 2:
 /* Set word to interesting value, randomly choosing endian. */
 if (temp_len < 2) break;
 if (UR(2)) {
 *(u16*)(out_buf + UR(temp_len - 1)) =
 interesting_16[UR(sizeof(interesting_16) >> 1)];
 } else {
 *(u16*)(out_buf + UR(temp_len - 1)) = SWAP16(
 interesting_16[UR(sizeof(interesting_16) >> 1)]);
 }
 break;
 case 3:
 /* Set dword to interesting value, randomly choosing endian. */
 if (temp_len < 4) break;
 if (UR(2)) {
 *(u32*)(out_buf + UR(temp_len - 3)) =
 interesting_32[UR(sizeof(interesting_32) >> 2)];
 } else {
 *(u32*)(out_buf + UR(temp_len - 3)) = SWAP32(
 interesting_32[UR(sizeof(interesting_32) >> 2)]);
 }
 break;
 case 4:
 /* Randomly subtract from byte. */
 out_buf[UR(temp_len)] -= 1 + UR(ARITH_MAX);
 break;
 case 5:
 /* Randomly add to byte. */
 out_buf[UR(temp_len)] += 1 + UR(ARITH_MAX);
 break;
 case 6:
 /* Randomly subtract from word, random endian. */
 if (temp_len < 2) break;
 if (UR(2)) {
 u32 pos = UR(temp_len - 1);
 *(u16*)(out_buf + pos) -= 1 + UR(ARITH_MAX);
 } else {
 u32 pos = UR(temp_len - 1);
 u16 num = 1 + UR(ARITH_MAX);
 *(u16*)(out_buf + pos) =
 SWAP16(SWAP16(*(u16*)(out_buf + pos)) - num);
 }
 break;
 case 7:
 /* Randomly add to word, random endian. */
 if (temp_len < 2) break;
 if (UR(2)) {
 u32 pos = UR(temp_len - 1);
 *(u16*)(out_buf + pos) += 1 + UR(ARITH_MAX);
 } else {
 u32 pos = UR(temp_len - 1);
 u16 num = 1 + UR(ARITH_MAX);
 *(u16*)(out_buf + pos) =
 SWAP16(SWAP16(*(u16*)(out_buf + pos)) + num);
 }
 break;
 case 8:
 /* Randomly subtract from dword, random endian. */
 if (temp_len < 4) break;
 if (UR(2)) {
 u32 pos = UR(temp_len - 3);
 *(u32*)(out_buf + pos) -= 1 + UR(ARITH_MAX);
 } else {
 u32 pos = UR(temp_len - 3);
 u32 num = 1 + UR(ARITH_MAX);
 *(u32*)(out_buf + pos) =
 SWAP32(SWAP32(*(u32*)(out_buf + pos)) - num);
 }
 break;
 case 9:
 /* Randomly add to dword, random endian. */
 if (temp_len < 4) break;
 if (UR(2)) {
 u32 pos = UR(temp_len - 3);
 *(u32*)(out_buf + pos) += 1 + UR(ARITH_MAX);
 } else {
 u32 pos = UR(temp_len - 3);
 u32 num = 1 + UR(ARITH_MAX);
 *(u32*)(out_buf + pos) =
 SWAP32(SWAP32(*(u32*)(out_buf + pos)) + num);
 }
 break;
 case 10:
 /* Just set a random byte to a random value. Because,
 why not. We use XOR with 1-255 to eliminate the
 possibility of a no-op. */
 out_buf[UR(temp_len)] ^= 1 + UR(255);
 break;
 case 11 ... 12: {
 /* Delete bytes. We're making this a bit more likely
 than insertion (the next option) in hopes of keeping
 files reasonably small. */
 u32 del_from, del_len;
 if (temp_len < 2) break;
 /* Don't delete too much. */
 del_len = choose_block_len(temp_len - 1);
 del_from = UR(temp_len - del_len + 1);
 memmove(out_buf + del_from, out_buf + del_from + del_len,
 temp_len - del_from - del_len);
 temp_len -= del_len;
 break;
 }
 case 13:
 if (temp_len + HAVOC_BLK_XL < MAX_FILE) {
 /* Clone bytes (75%) or insert a block of constant bytes (25%). */
 u8 actually_clone = UR(4);
 u32 clone_from, clone_to, clone_len;
 u8* new_buf;
 if (actually_clone) {
 clone_len = choose_block_len(temp_len);
 clone_from = UR(temp_len - clone_len + 1);
 } else {
 clone_len = choose_block_len(HAVOC_BLK_XL);
 clone_from = 0;
 }
 clone_to = UR(temp_len);
 new_buf = ck_alloc_nozero(temp_len + clone_len);
 /* Head */
 memcpy(new_buf, out_buf, clone_to);
 /* Inserted part */
 if (actually_clone)
 memcpy(new_buf + clone_to, out_buf + clone_from, clone_len);
 else
 memset(new_buf + clone_to,
 UR(2) ? UR(256) : out_buf[UR(temp_len)], clone_len);
 /* Tail */
 memcpy(new_buf + clone_to + clone_len, out_buf + clone_to,
 temp_len - clone_to);
 ck_free(out_buf);
 out_buf = new_buf;
 temp_len += clone_len;
 }
 break;
 case 14: {
 /* Overwrite bytes with a randomly selected chunk (75%) or fixed
 bytes (25%). */
 u32 copy_from, copy_to, copy_len;
 if (temp_len < 2) break;
 copy_len = choose_block_len(temp_len - 1);
 copy_from = UR(temp_len - copy_len + 1);
 copy_to = UR(temp_len - copy_len + 1);
 if (UR(4)) {
 if (copy_from != copy_to)
 memmove(out_buf + copy_to, out_buf + copy_from, copy_len);
 } else memset(out_buf + copy_to,
 UR(2) ? UR(256) : out_buf[UR(temp_len)], copy_len);
 break;
 }
 /* Values 15 and 16 can be selected only if there are any extras
 present in the dictionaries. */
 case 15: {
 /* Overwrite bytes with an extra. */
 if (!extras_cnt || (a_extras_cnt && UR(2))) {
 /* No user-specified extras or odds in our favor. Let's use an
 auto-detected one. */
 u32 use_extra = UR(a_extras_cnt);
 u32 extra_len = a_extras[use_extra].len;
 u32 insert_at;
 if (extra_len > temp_len) break;
 insert_at = UR(temp_len - extra_len + 1);
 memcpy(out_buf + insert_at, a_extras[use_extra].data, extra_len);
 } else {
 /* No auto extras or odds in our favor. Use the dictionary. */
 u32 use_extra = UR(extras_cnt);
 u32 extra_len = extras[use_extra].len;
 u32 insert_at;
 if (extra_len > temp_len) break;
 insert_at = UR(temp_len - extra_len + 1);
 memcpy(out_buf + insert_at, extras[use_extra].data, extra_len);
 }
 break;
 }
 case 16: {
 u32 use_extra, extra_len, insert_at = UR(temp_len + 1);
 u8* new_buf;
 /* Insert an extra. Do the same dice-rolling stuff as for the
 previous case. */
 if (!extras_cnt || (a_extras_cnt && UR(2))) {
 use_extra = UR(a_extras_cnt);
 extra_len = a_extras[use_extra].len;
 if (temp_len + extra_len >= MAX_FILE) break;
 new_buf = ck_alloc_nozero(temp_len + extra_len);
 /* Head */
 memcpy(new_buf, out_buf, insert_at);
 /* Inserted part */
 memcpy(new_buf + insert_at, a_extras[use_extra].data, extra_len);
 } else {
 use_extra = UR(extras_cnt);
 extra_len = extras[use_extra].len;
 if (temp_len + extra_len >= MAX_FILE) break;
 new_buf = ck_alloc_nozero(temp_len + extra_len);
 /* Head */
 memcpy(new_buf, out_buf, insert_at);
 /* Inserted part */
 memcpy(new_buf + insert_at, extras[use_extra].data, extra_len);
 }
 /* Tail */
 memcpy(new_buf + insert_at + extra_len, out_buf + insert_at,
 temp_len - insert_at);
 ck_free(out_buf);
 out_buf = new_buf;
 temp_len += extra_len;
 break;
 }
 }
 }
 if (common_fuzz_stuff(argv, out_buf, temp_len))
 goto abandon_entry;
 /* out_buf might have been mangled a bit, so let's restore it to its
 original size and shape. */
 if (temp_len < len) out_buf = ck_realloc(out_buf, len);
 temp_len = len;
 memcpy(out_buf, in_buf, len);
 /* If we're finding new stuff, let's run for a bit longer, limits
 permitting. */
 if (queued_paths != havoc_queued) {
 if (perf_score <= HAVOC_MAX_MULT * 100) {
 stage_max *= 2;
 perf_score *= 2;
 }
 havoc_queued = queued_paths;
 }
 }

SPLICING 階段

最后一個階段，它會隨機選擇出另外一個輸入樣例，然后對當前的輸入樣例和另外一個樣例都選擇出合適的偏移量，然后從該處將他們拼接起來，然后重新進入到RANDOM HAVOC階段。

#ifndef IGNORE_FINDS
 /************
 * SPLICING *
 ************/
 /* This is a last-resort strategy triggered by a full round with no findings.
 It takes the current input file, randomly selects another input, and
 splices them together at some offset, then relies on the havoc
 code to mutate that blob. */
retry_splicing:
 if (use_splicing && splice_cycle++ < SPLICE_CYCLES &&
 queued_paths > 1 && queue_cur->len > 1) {
 struct queue_entry* target;
 u32 tid, split_at;
 u8* new_buf;
 s32 f_diff, l_diff;
 /* First of all, if we've modified in_buf for havoc, let's clean that
 up... */
 if (in_buf != orig_in) {
 ck_free(in_buf);
 in_buf = orig_in;
 len = queue_cur->len;
 }
 /* Pick a random queue entry and seek to it. Don't splice with yourself. */
 do { tid = UR(queued_paths); } while (tid == current_entry);
 splicing_with = tid;
 target = queue;
 while (tid >= 100) { target = target->next_100; tid -= 100; }
 while (tid--) target = target->next;
 /* Make sure that the target has a reasonable length. */
 while (target && (target->len < 2 || target == queue_cur)) {
 target = target->next;
 splicing_with++;
 }
 if (!target) goto retry_splicing;
 /* Read the testcase into a new buffer. */
 fd = open(target->fname, O_RDONLY);
 if (fd < 0) PFATAL("Unable to open '%s'", target->fname);
 new_buf = ck_alloc_nozero(target->len);
 ck_read(fd, new_buf, target->len, target->fname);
 close(fd);
 /* Find a suitable splicing location, somewhere between the first and
 the last differing byte. Bail out if the difference is just a single
 byte or so. */
 locate_diffs(in_buf, new_buf, MIN(len, target->len), &f_diff, &l_diff);
 if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) {
 ck_free(new_buf);
 goto retry_splicing;
 }
 /* Split somewhere between the first and last differing byte. */
 split_at = f_diff + UR(l_diff - f_diff);
 /* Do the thing. */
 len = target->len;
 memcpy(new_buf, in_buf, split_at);
 in_buf = new_buf;
 ck_free(out_buf);
 out_buf = ck_alloc_nozero(len);
 memcpy(out_buf, in_buf, len);
 goto havoc_stage;
 }

結束

設置ret_val的值為 0
如果queue_cur通過了評估，且was_fuzzed字段是 0，就設置queue_cur->was_fuzzed為 1，然后pending_not_fuzzed計數器減一
如果queue_cur是favored,pending_favored計數器減一。

sync_fuzzers

讀取其他 sync 文件夾下的 queue 文件，然后保存到自己的 queue 里。

打開sync_dir文件夾
while循環讀取該文件夾下的目錄和文件while ((sd_ent = readdir(sd)))
同樣跳過.開頭的文件和標識小于min_accept的文件，因為這些文件應該已經被 sync 過了。
如果標識syncing_case大于等于next_min_accept，就設置next_min_accept為syncing_case + 1
開始同步這個 case
stage_cur計數器加一，如果stage_cur是stats_update_freq的倍數，就刷新一次展示界面。
設置syncing_party的值為sd_ent->d_name
如果save_if_interesting返回 1，queued_imported計數器就加 1
如果 case 大小為 0 或者大于MAX_FILE(默認是1M),就不進行 sync。
否則 mmap 這個文件到內存內存里，然后write_to_testcase(mem, st.st_size),并run_target,然后通過save_if_interesting來決定是否要導入這個文件到自己的 queue 里，如果發現了新的 path，就導入。
跳過.開頭的文件和sync_id即我們自己的輸出文件夾
讀取out_dir/.synced/sd_ent->d_name文件即id_fd里的前4個字節到min_accept里，設置next_min_accept為min_accept，這個值代表之前從這個文件夾里讀取到的最后一個queue的id。
設置stage_name為sprintf(stage_tmp, "sync %u", ++sync_cnt);，設置stage_cur為 0，stage_max為 0
循環讀取sync_dir/sd_ent->d_name/queue文件夾里的目錄和文件
向id_fd寫入當前的next_min_accept值

總結來說，這個函數就是先讀取有哪些 fuzzer 文件夾，然后讀取其他 fuzzer 文件夾下的 queue 文件夾里的 case，并依次執行，如果發現了新 path，就保存到自己的 queue 文件夾里，而且將最后一個 sync 的 case id 寫入到.synced/其他fuzzer文件夾名文件里，以避免重復運行。

common_fuzz_stuff

因為 fuzz_one 部分過于龐大，而這個函數又不是那么特殊，因此把它拉出來做一個簡短的說明。

若有post_handler，那么就對樣例調用post_handler
將樣例寫入文件，然后run_target執行
如果執行結果是超時則做如下操作：

  
if (fault == FAULT_TMOUT) {
 if (subseq_tmouts++ > TMOUT_LIMIT) {
 cur_skipped_paths++;
 return 1;
 }
 } else subseq_tmouts = 0;

如果發現了新路徑，那么保存并增加queued_discovered計數器
更新頁面show_stats

EXP_ST u8 common_fuzz_stuff(char** argv, u8* out_buf, u32 len) {
 u8 fault;
 if (post_handler) {
 out_buf = post_handler(out_buf, &len);
 if (!out_buf || !len) return 0;
 }
 write_to_testcase(out_buf, len);
 fault = run_target(argv, exec_tmout);
 if (stop_soon) return 1;
 if (fault == FAULT_TMOUT) {
 if (subseq_tmouts++ > TMOUT_LIMIT) {
 cur_skipped_paths++;
 return 1;
 }
 } else subseq_tmouts = 0;
 /* Users can hit us with SIGUSR1 to request the current input
 to be abandoned. */
 if (skip_requested) {
 skip_requested = 0;
 cur_skipped_paths++;
 return 1;
 }
 /* This handles FAULT_ERROR for us: */
 queued_discovered += save_if_interesting(argv, out_buf, len, fault);
 if (!(stage_cur % stats_update_freq) || stage_cur + 1 == stage_max)
 show_stats();
 return 0;
}

save_if_interesting

執行結果是否發現了新路徑，決定是否保存或跳過。如果保存了這個 case，則返回 1，否則返回 0。

如果沒有新的路徑發現或者路徑命中次數相同，就直接返回0
將 case 保存到fn = alloc_printf("%s/queue/id:%06u,%s", out_dir, queued_paths, describe_op(hnb))文件里
將新樣本加入隊列add_to_queue
如果hnb的值是2，代表發現了新路徑，設置剛剛加入到隊列里的 queue 的has_new_cov字段為 1，即queue_top->has_new_cov = 1，然后queued_with_cov計數器加一
保存hash到其exec_cksum
評估這個queue，calibrate_case(argv, queue_top, mem, queue_cycle - 1, 0)
根據fault結果進入不同的分支
total_tmouts計數器加一
如果unique_hangs的個數超過能保存的最大數量KEEP_UNIQUE_HANG則返回
若不是 dumb mode，就simplify_trace((u64 *) trace_bits)進行規整。
沒有發現新的超時路徑，就直接返回
否則，代表發現了新的超時路徑，unique_tmouts計數器加一
若hang_tmout大于exec_tmout，則以hang_tmout為timeout，重新執行一次runt_target
若出現崩潰，就跳轉到keep_as_crash
若沒有超時則直接返回
否則就使unique_hangs計數器加一，更新last_hang_time的值，并保存到alloc_printf("%s/hangs/id:%06llu,%s", out_dir, unique_hangs, describe_op(0))文件。
total_crashes計數器加一
如果unique_crashes大于能保存的最大數量KEEP_UNIQUE_CRASH即5000，就直接返回keeping的值
如果不是dumb mode，就simplify_trace((u64 *) trace_bits)進行規整
沒有發現新的crash路徑，就直接返回
否則，代表發現了新的crash路徑，unique_crashes計數器加一，并將結果保存到alloc_printf("%s/crashes/id:%06llu,sig:%02u,%s", out_dir,unique_crashes, kill_signal, describe_op(0))文件。
更新last_crash_time和last_crash_execs
若是出現錯誤，則直接拋出異常
若是崩潰
若是超時
若是其他情況，則直接返回

三、插樁與路徑發現的記錄

其實插樁已經敘述過一部分了，在上文中的fork server部分，筆者就介紹過該機制就是通過插樁實現的。

但還有一部分內容沒有涉及，新路徑是如何在發現的同時被通知給 fuzzer 的？

在插樁階段，我們為每個分支跳轉都添加了一小段代碼，這里筆者以 64 位的情況進行說明：

static const u8* trampoline_fmt_64 =
 ""
 "/* --- AFL TRAMPOLINE (64-BIT) --- */"
 ""
 ".align 4"
 ""
 "leaq -(128+24)(%%rsp), %%rsp"
 "movq %%rdx, 0(%%rsp)"
 "movq %%rcx, 8(%%rsp)"
 "movq %%rax, 16(%%rsp)"
 "movq $0x%08x, %%rcx"
 "call __afl_maybe_log"
 "movq 16(%%rsp), %%rax"
 "movq 8(%%rsp), %%rcx"
 "movq 0(%%rsp), %%rdx"
 "leaq (128+24)(%%rsp), %%rsp"
 ""
 "/* --- END --- */"
 "";

它首先保存了一部分將要被破壞的寄存器，然后調用了__afl_maybe_log來記錄路徑的發現。該函數同樣是由匯編編寫的，但我們可以用一些其他工具來反編譯它：

char __fastcall _afl_maybe_log(__int64 a1, __int64 a2, __int64 a3, __int64 a4)
{
 char v4; // of
 char v5; // al
 __int64 v6; // rdx
 __int64 v7; // rcx
 char *v9; // rax
 int v10; // eax
 void *v11; // rax
 int v12; // edi
 __int64 v13; // rax
 __int64 v14; // rax
 __int64 v15; // [rsp-10h] [rbp-180h]
 char v16; // [rsp+10h] [rbp-160h]
 __int64 v17; // [rsp+18h] [rbp-158h]
 v5 = v4;
 v6 = _afl_area_ptr;
 if ( !_afl_area_ptr )
 {
 if ( _afl_setup_failure )
 return v5 + 127;
 v6 = _afl_global_area_ptr;
 if ( _afl_global_area_ptr )
 {
 _afl_area_ptr = _afl_global_area_ptr;
 }
 else
 {
 v16 = v4;
 v17 = a4;
 v9 = getenv("__AFL_SHM_ID");
 if ( !v9 || (v10 = atoi(v9), v11 = shmat(v10, 0LL, 0), v11 == -1LL) )
 {
 ++_afl_setup_failure;
 v5 = v16;
 return v5 + 127;
 }
 _afl_area_ptr = v11;
 _afl_global_area_ptr = v11;
 v15 = v11;
 if ( write(199, &_afl_temp, 4uLL) == 4 )
 {
 while ( 1 )
 {
 v12 = 198;
 if ( read(198, &_afl_temp, 4uLL) != 4 )
 break;
 LODWORD(v13) = fork();
 if ( v13 < 0 )
 break;
 if ( !v13 )
 goto __afl_fork_resume;
 _afl_fork_pid = v13;
 write(199, &_afl_fork_pid, 4uLL);
 v12 = _afl_fork_pid;
 LODWORD(v14) = waitpid(_afl_fork_pid, &_afl_temp, 0);
 if ( v14 <= 0 )
 break;
 write(199, &_afl_temp, 4uLL);
 }
 _exit(v12);
 }
__afl_fork_resume:
 close(198);
 close(199);
 v6 = v15;
 v5 = v16;
 a4 = v17;
 }
 }
 v7 = _afl_prev_loc ^ a4;
 _afl_prev_loc ^= v7;
 _afl_prev_loc = _afl_prev_loc >> 1;
 ++*(v6 + v7);
 return v5 + 127;
}

前面的一大段代碼其實都是為了去建立我們在上文所說的“共享內存”，在完成初始化后調用最后這么一小段代碼進行記錄：

 v7 = _afl_prev_loc ^ a4;
 _afl_prev_loc ^= v7;
 _afl_prev_loc = _afl_prev_loc >> 1;
 ++*(v6 + v7);

此處v6即為共享內存的地址，而a4為cur_location，因此v7=cur_location ^ prev_location，它將作為索引，使得共享內存中的對應偏移處的值增加。而在 fuzzer 部分就可以通過檢查這塊內存來發現是否有新路徑被得到了。

另外，_afl_prev_loc = _afl_prev_loc >> 1；的目的是為了避開A->A和B->B以及A->B和B->A被識別為相同路徑的情況。

四、其他閱讀材料

sakuraのAFL源碼全注釋

https://eternalsakura13.com/2020/08/23/afl/

fuzzer AFL 源碼分析

https://tttang.com/user/f1tao

AFL二三事——源碼分析

https://paper.seebug.org/1732/#afl-afl-asc

AFL漏洞挖掘技術漫談（一）：用AFL開始你的第一次Fuzzing