blob: 9b480de7e8dcb5b4ef0078f223c93e4c6d9b050d [file] [log] [blame]
/*
*
* honggfuzz - sanitizer coverage feedback parsing
* -----------------------------------------------
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*
*/
/*
* Clang sanitizer coverage (sancov) data parsing functions. Supported methods:
* - raw unified data (preferred method)
* - individual data per executable/DSO (not preferred since lots of data lost if instrumented
* code exits abnormally or with sanitizer unhandled signal (common in Android OS)
*
* For raw-unpack method a global (shared across workers) Trie is created for the chosen
* initial seed and maintained until seed is replaced. Trie nodes store the loaded (as exposed
* from *.sancov.map file) execs/DSOs from target application using the map name as key. Trie node
* data struct (trieData_t) maintains information for each instrumented map including a bitmap with
* all hit relative BB addresses (realBBAddr - baseAddr to circumvent ASLR). Map's bitmap is updated
* while new areas on target application are discovered based on absolute elitism implemented at
* fuzz_sanCovFeedback().
*
* For individual data files a pid (fuzzer's thread or remote process) based filename search is
* performed to identify all files belonging to examined execution. This method doesn't implement
* yet bitmap runtime data to detect newly discovered areas. It's mainly used so far as a comparison
* metric for raw-unpack method and stability check for sancov experimental features such as
* coverage counters: http://clang.llvm.org/docs/SanitizerCoverage.html
*/
#include "sancov.h"
#include <ctype.h>
#include <dirent.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "libhfcommon/common.h"
#include "libhfcommon/files.h"
#include "libhfcommon/log.h"
#include "libhfcommon/util.h"
#include "sanitizers.h"
/* sancov files magic values */
#define kMagic32 0xC0BFFFFFFFFFFF32
#define kMagic64 0xC0BFFFFFFFFFFF64
/*
* Each DSO/executable that has been compiled with enabled coverage instrumentation
* is detected from compiler_rt runtime library when loaded. When coverage_direct
* method is selected, runtime library is pre-allocating kPcArrayMmapSize [1] byte
* chunks until the total size of chunks is greater than the number of inserted
* guards. This effectively means that we might have a large unused (zero-filled)
* area that we can't identify at runtime (we need to do binary inspection).
*
* Runtime maintained data structs size overhead is not affected since fixed-size
* bitmap is used. However, the way the display coverage statistics are generated
* is not very accurate because:
* a) ASan compiled DSO might get loaded although not followed from monitoring
execution affecting the counters
* b) Not all zero-fill chunks translate into non-hit basic block as they might
* be the chunk padding
*
* Probably there aren't many we can do to deal with this issue without introducing
* a huge performance overhead at an already costly feedback method.
*
* [1]
'https://llvm.org/svn/llvm-project/compiler-rt/branches/release_38/lib/sanitizer_common/sanitizer_coverage_libcdep.cc'
*/
#define kPcArrayMmapSize (64 * 1024)
/*
* bitmap implementation
*/
static bitmap_t* sancov_newBitmap(uint32_t capacity) {
bitmap_t* pBM = util_Malloc(sizeof(bitmap_t));
pBM->capacity = capacity;
pBM->nChunks = (capacity + 31) / 32;
pBM->pChunks = util_Calloc(pBM->nChunks * sizeof(uint32_t));
return pBM;
}
static inline bool sancov_queryBitmap(bitmap_t* pBM, uint32_t index) {
if (index > pBM->capacity) {
LOG_E("bitmap overflow (%u)", index);
return false;
}
if (pBM->pChunks[index / 32] & (1 << (index % 32))) {
return true;
}
return false;
}
static inline void sancov_setBitmap(bitmap_t* pBM, uint32_t index) {
/* This will be removed. So far checks only to verify accepted ranges. */
if (index >= pBM->capacity) {
LOG_E("Out of range index (%u > %u)", index, pBM->capacity);
}
pBM->pChunks[index / 32] |= (1 << (index % 32));
}
static inline void sancov_destroyBitmap(bitmap_t* pBM) {
free(pBM->pChunks);
free(pBM);
}
/*
* Trie implementation
*/
static node_t* sancov_trieCreateNode(char key) {
node_t* node = (node_t*)util_Malloc(sizeof(node_t));
node->key = key;
node->next = NULL;
node->children = NULL;
node->parent = NULL;
node->prev = NULL;
/* Zero init node's data struct */
memset(&node->data, 0, sizeof(trieData_t));
return node;
}
static node_t* sancov_trieSearch(node_t* root, const char* key) {
node_t *pNodeLevel = root, *pNodePtr = NULL;
int nodeLevelId = 0;
while (1) {
node_t *pNodeFound = NULL, *pCurNode = NULL;
for (pCurNode = pNodeLevel; pCurNode != NULL; pCurNode = pCurNode->next) {
if (pCurNode->key == *key) {
pNodeFound = pCurNode;
nodeLevelId++;
break;
}
}
if (pNodeFound == NULL) {
return NULL;
}
if (*key == '\0') {
pNodePtr = pCurNode;
return pNodePtr;
}
pNodeLevel = pNodeFound->children;
key++;
}
}
static void sancov_trieAdd(node_t** root, const char* key) {
if (*root == NULL) {
LOG_E("Invalid Trie (NULL root node)");
return;
}
/* Traverse Trie */
node_t* pTravNode = (*root)->children;
if (pTravNode == NULL) {
/* First node */
for (pTravNode = *root; *key != '\0'; pTravNode = pTravNode->children) {
pTravNode->children = sancov_trieCreateNode(*key);
pTravNode->children->parent = pTravNode;
key++;
}
pTravNode->children = sancov_trieCreateNode('\0');
pTravNode->children->parent = pTravNode;
return;
}
while (*key != '\0') {
if (*key == pTravNode->key) {
key++;
pTravNode = pTravNode->children;
} else {
break;
}
}
while (pTravNode->next) {
if (*key == pTravNode->next->key) {
key++;
sancov_trieAdd(&(pTravNode->next), key);
return;
}
pTravNode = pTravNode->next;
}
if (*key) {
pTravNode->next = sancov_trieCreateNode(*key);
} else {
pTravNode->next = sancov_trieCreateNode(*key);
}
pTravNode->next->parent = pTravNode->parent;
pTravNode->next->prev = pTravNode;
if (!*key) {
return;
}
key++;
for (pTravNode = pTravNode->next; *key; pTravNode = pTravNode->children) {
pTravNode->children = sancov_trieCreateNode(*key);
pTravNode->children->parent = pTravNode;
key++;
}
pTravNode->children = sancov_trieCreateNode('\0');
pTravNode->children->parent = pTravNode;
return;
}
static inline void sancov_trieFreeNode(node_t* node) {
/* First destroy bitmap heap buffers allocated for instrumented maps */
if (node->data.pBM) {
sancov_destroyBitmap(node->data.pBM);
}
free(node);
}
static inline void sancov_trieCreate(node_t** root) {
/* Create root node if new Trie */
*root = sancov_trieCreateNode('\0');
}
/* Destroy Trie - iterate nodes and free memory */
HF_ATTR_UNUSED static void sancov_trieDestroy(node_t* root) {
node_t* pNode = root;
node_t* pNodeTmp = root;
while (pNode) {
while (pNode->children) {
pNode = pNode->children;
}
if (pNode->prev && pNode->next) {
pNodeTmp = pNode;
pNode->next->prev = pNode->prev;
pNode->prev->next = pNode->next;
sancov_trieFreeNode(pNodeTmp);
} else if (pNode->prev && !pNode->next) {
pNodeTmp = pNode;
pNode->prev->next = NULL;
sancov_trieFreeNode(pNodeTmp);
} else if (!pNode->prev && pNode->next) {
pNodeTmp = pNode;
pNode->parent->children = pNode->next;
pNode->next->prev = NULL;
pNode = pNode->next;
sancov_trieFreeNode(pNodeTmp);
} else {
pNodeTmp = pNode;
if (pNode->parent == NULL) {
/* Root */
sancov_trieFreeNode(pNodeTmp);
return;
}
pNode = pNode->parent;
pNode->children = NULL;
sancov_trieFreeNode(pNodeTmp);
}
}
}
/* Modified interpolation search algorithm to search for nearest address fit */
static inline uint64_t sancov_interpSearch(uint64_t* buf, uint64_t size, uint64_t key) {
/* Avoid extra checks assuming caller always provides non-zero array size */
uint64_t low = 0;
uint64_t high = size - 1;
uint64_t mid = high;
while (buf[high] != buf[low] && key >= buf[low] && key <= buf[high]) {
mid = low + (key - buf[low]) * ((high - low) / (buf[high] - buf[low]));
if (buf[mid] < key) {
low = mid + 1;
} else if (key < buf[mid]) {
high = mid - 1;
} else {
return mid;
}
}
return mid;
}
/* qsort struct comparison function (memMap_t struct start addr field) */
static int sancov_qsortCmp(const void* a, const void* b) {
memMap_t* pA = (memMap_t*)a;
memMap_t* pB = (memMap_t*)b;
if (pA->start < pB->start) {
return -1;
} else if (pA->start > pB->start) {
return 1;
} else {
/* Normally we should never hit that case */
LOG_W("Duplicate map start addr detected");
return 0;
}
}
static bool sancov_sanCovParseRaw(run_t* run) {
int dataFd = -1;
uint8_t* dataBuf = NULL;
off_t dataFileSz = 0, pos = 0;
bool is32bit = true;
char covFile[PATH_MAX] = {0};
pid_t targetPid = (run->global->linux.pid > 0) ? run->global->linux.pid : run->pid;
/* Fuzzer local runtime data structs - need free() before exit */
uint64_t* startMapsIndex = NULL;
memMap_t* mapsBuf = NULL;
/* Local counters */
uint64_t nBBs = 0; /* Total BB hits found in raw file */
uint64_t nZeroBBs = 0; /* Number of non-hit instrumented BBs */
uint64_t mapsNum = 0; /* Total number of entries in map file */
uint64_t noCovMapsNum = 0; /* Loaded DSOs not compiled with coverage */
/* File line-by-line read help buffers */
__block char* pLine = NULL;
size_t lineSz = 0;
/* Coverage data analysis starts by parsing map file listing */
snprintf(covFile, sizeof(covFile), "%s/%s/%d.sancov.map", run->global->io.workDir,
_HF_SANCOV_DIR, targetPid);
if (!files_exists(covFile)) {
LOG_D("sancov map file not found");
return false;
}
FILE* fCovMap = fopen(covFile, "rb");
if (fCovMap == NULL) {
PLOG_E("Couldn't open '%s' - R/O mode", covFile);
return false;
}
defer {
fclose(fCovMap);
};
/* First line contains PC length (32/64-bit) */
if (getline(&pLine, &lineSz, fCovMap) == -1) {
LOG_E("Invalid map file '%s'", covFile);
return false;
}
defer {
free(pLine);
pLine = NULL;
};
int pcLen = atoi(pLine);
if (pcLen == 32) {
is32bit = true;
} else if (pcLen == 64) {
is32bit = false;
} else {
LOG_E("Invalid PC length (%d) in map file '%s'", pcLen, covFile);
}
/* See if #maps is available from previous run to avoid realloc inside loop */
uint64_t prevMapsNum = ATOMIC_GET(run->global->sanCovCnts.dsoCnt);
if (prevMapsNum > 0) {
mapsBuf = util_Malloc(prevMapsNum * sizeof(memMap_t));
}
/* It's OK to free(NULL) */
defer {
free(mapsBuf);
};
/* Iterate map entries */
for (;;) {
if (getline(&pLine, &lineSz, fCovMap) == -1) {
break;
}
/* Trim trailing whitespaces, not sure if needed copied from upstream sancov.py */
char* lineEnd = pLine + strlen(pLine) - 1;
while (lineEnd > pLine && isspace((int)*lineEnd)) {
lineEnd--;
}
*(lineEnd + 1) = 0;
/*
* Each line has following format:
* Start End Base bin/DSO name
* b5843000 b584e6ac b5843000 liblog.so
*/
memMap_t mapData = {.start = 0};
char* savePtr = NULL;
mapData.start = strtoull(strtok_r(pLine, " ", &savePtr), NULL, 16);
mapData.end = strtoull(strtok_r(NULL, " ", &savePtr), NULL, 16);
mapData.base = strtoull(strtok_r(NULL, " ", &savePtr), NULL, 16);
char* mapName = strtok_r(NULL, " ", &savePtr);
memcpy(mapData.mapName, mapName, strlen(mapName));
/* Interaction with global Trie should mutex wrap to avoid threads races */
{
MX_SCOPED_LOCK(&run->global->sanCov_mutex);
/* Add entry to Trie with zero data if not already */
if (!sancov_trieSearch(run->global->covMetadata->children, mapData.mapName)) {
sancov_trieAdd(&run->global->covMetadata, mapData.mapName);
}
}
/* If no DSO number history (first run) or new DSO loaded, realloc local maps metadata buf
*/
if (prevMapsNum == 0 || prevMapsNum < mapsNum) {
if ((mapsBuf = util_Realloc(mapsBuf, (size_t)(mapsNum + 1) * sizeof(memMap_t))) ==
NULL) {
PLOG_E("realloc failed (sz=%" PRIu64 ")", (mapsNum + 1) * sizeof(memMap_t));
return false;
}
}
/* Add entry to local maps metadata array */
memcpy(&mapsBuf[mapsNum], &mapData, sizeof(memMap_t));
/* Increase loaded maps counter (includes non-instrumented DSOs too) */
mapsNum++;
}
/* Delete .sancov.map file */
if (run->global->linux.pid == 0 && run->global->exe.persistent == false) {
unlink(covFile);
}
/* Create a quick index array with maps start addresses */
startMapsIndex = util_Malloc(mapsNum * sizeof(uint64_t));
defer {
free(startMapsIndex);
};
/* Sort quick maps index */
qsort(mapsBuf, mapsNum, sizeof(memMap_t), sancov_qsortCmp);
for (size_t i = 0; i < mapsNum; i++) {
startMapsIndex[i] = mapsBuf[i].start;
}
/* mmap() .sancov.raw file */
snprintf(covFile, sizeof(covFile), "%s/%s/%d.sancov.raw", run->global->io.workDir,
_HF_SANCOV_DIR, targetPid);
dataBuf = files_mapFile(covFile, &dataFileSz, &dataFd, false);
if (dataBuf == NULL) {
LOG_E("Couldn't open and map '%s' in R/O mode", covFile);
return false;
}
defer {
munmap(dataBuf, dataFileSz);
close(dataFd);
};
/*
* Avoid cost of size checks inside raw data read loop by defining the read function
* & pivot size based on PC length.
*/
uint64_t (*pReadRawBBAddrFunc)(const uint8_t*) = NULL;
uint8_t pivot = 0;
if (is32bit) {
pReadRawBBAddrFunc = &util_getUINT32;
pivot = 4;
} else {
pReadRawBBAddrFunc = &util_getUINT64;
pivot = 8;
}
/*
* Take advantage of data locality (next processed addr is very likely to belong
* to same map) to avoid Trie node search for each read entry.
*/
node_t* curMap = NULL;
uint64_t prevIndex = 0;
/* Iterate over data buffer containing list of hit BB addresses */
while (pos < dataFileSz) {
uint64_t bbAddr = pReadRawBBAddrFunc(dataBuf + pos);
pos += pivot;
/* Don't bother for zero BB addr (inserted checks without hit) */
if (bbAddr == 0x0) {
nZeroBBs++;
continue;
} else {
/* Find best hit based on start addr & verify range for errors */
uint64_t bestFit = sancov_interpSearch(startMapsIndex, mapsNum, bbAddr);
if (bbAddr >= mapsBuf[bestFit].start && bbAddr < mapsBuf[bestFit].end) {
/* Increase exe/DSO total BB counter */
mapsBuf[bestFit].bbCnt++;
/* Update current Trie node if map changed */
if (curMap == NULL || (prevIndex != bestFit)) {
prevIndex = bestFit;
/* Interaction with global Trie should mutex wrap to avoid threads races */
{
MX_SCOPED_LOCK(&run->global->sanCov_mutex);
curMap = sancov_trieSearch(
run->global->covMetadata->children, mapsBuf[bestFit].mapName);
if (curMap == NULL) {
LOG_E("Corrupted Trie - '%s' not found", mapsBuf[bestFit].mapName);
continue;
}
/* Maintain bitmaps only for exec/DSOs with coverage enabled - allocate on
* first use */
if (curMap->data.pBM == NULL) {
LOG_D("Allocating bitmap for map '%s'", mapsBuf[bestFit].mapName);
curMap->data.pBM = sancov_newBitmap(_HF_SANCOV_BITMAP_SIZE);
/*
* If bitmap allocation failed, unset cached Trie node ptr
* to execute this selection branch again.
*/
if (curMap->data.pBM == NULL) {
curMap = NULL;
continue;
}
}
}
}
/* If new relative BB addr update DSO's bitmap */
uint32_t relAddr = (uint32_t)(bbAddr - mapsBuf[bestFit].base);
if (!sancov_queryBitmap(curMap->data.pBM, relAddr)) {
/* Interaction with global Trie should mutex wrap to avoid threads races */
{
MX_SCOPED_LOCK(&run->global->sanCov_mutex);
sancov_setBitmap(curMap->data.pBM, relAddr);
}
/* Also increase new BBs counter at worker's thread runtime data */
mapsBuf[bestFit].newBBCnt++;
}
} else {
/*
* Normally this should never get executed. If hit, sanitizer
* coverage data collection come across some kind of bug.
*/
LOG_E("Invalid BB addr (%#" PRIx64 ") at offset %" PRId64, bbAddr, (uint64_t)pos);
}
}
nBBs++;
}
/* Finally iterate over all instrumented maps to sum-up the number of newly met BB addresses */
for (uint64_t i = 0; i < mapsNum; i++) {
if (mapsBuf[i].bbCnt > 0) {
run->sanCovCnts.newBBCnt += mapsBuf[i].newBBCnt;
} else {
noCovMapsNum++;
}
}
/* Successful parsing - update fuzzer worker's counters */
run->sanCovCnts.hitBBCnt = nBBs;
run->sanCovCnts.totalBBCnt = nBBs + nZeroBBs;
run->sanCovCnts.dsoCnt = mapsNum;
run->sanCovCnts.iDsoCnt = mapsNum - noCovMapsNum; /* Instrumented DSOs */
if (run->global->linux.pid == 0 && run->global->exe.persistent == false) {
unlink(covFile);
}
return true;
}
static bool sancov_sanCovParse(run_t* run) {
int dataFd = -1;
uint8_t* dataBuf = NULL;
off_t dataFileSz = 0, pos = 0;
bool is32bit = true;
char covFile[PATH_MAX] = {0};
DIR* pSanCovDir = NULL;
pid_t targetPid = (run->global->linux.pid > 0) ? run->global->linux.pid : run->pid;
snprintf(covFile, sizeof(covFile), "%s/%s/%s.%d.sancov", run->global->io.workDir,
_HF_SANCOV_DIR, files_basename(run->global->exe.cmdline[0]), targetPid);
if (!files_exists(covFile)) {
LOG_D("Target sancov file not found");
return false;
}
/* Local cache file suffix to use for file search of target pid data */
char pidFSuffix[13] = {0};
snprintf(pidFSuffix, sizeof(pidFSuffix), "%d.sancov", targetPid);
/* Total BBs counter summarizes all DSOs */
uint64_t nBBs = 0;
/* Iterate sancov dir for files generated against target pid */
snprintf(covFile, sizeof(covFile), "%s/%s", run->global->io.workDir, _HF_SANCOV_DIR);
pSanCovDir = opendir(covFile);
if (pSanCovDir == NULL) {
PLOG_E("opendir('%s')", covFile);
return false;
}
defer {
closedir(pSanCovDir);
};
struct dirent* pDir = NULL;
while ((pDir = readdir(pSanCovDir)) != NULL) {
/* Parse files with target's pid */
if (strstr(pDir->d_name, pidFSuffix)) {
snprintf(covFile, sizeof(covFile), "%s/%s/%s", run->global->io.workDir, _HF_SANCOV_DIR,
pDir->d_name);
dataBuf = files_mapFile(covFile, &dataFileSz, &dataFd, false);
if (dataBuf == NULL) {
LOG_E("Couldn't open and map '%s' in R/O mode", covFile);
return false;
}
defer {
munmap(dataBuf, dataFileSz);
close(dataFd);
};
if (dataFileSz < 8) {
LOG_E("Coverage data file too short");
return false;
}
/* Check magic values & derive PC length */
uint64_t magic = util_getUINT64(dataBuf);
if (magic == kMagic32) {
is32bit = true;
} else if (magic == kMagic64) {
is32bit = false;
} else {
LOG_E("Invalid coverage data file");
return false;
}
pos += 8;
/*
* Avoid cost of size checks inside raw data read loop by defining the read function
* & pivot size based on PC length.
*/
uint64_t (*pReadRawBBAddrFunc)(const uint8_t*) = NULL;
uint8_t pivot = 0;
if (is32bit) {
pReadRawBBAddrFunc = &util_getUINT32;
pivot = 4;
} else {
pReadRawBBAddrFunc = &util_getUINT64;
pivot = 8;
}
while (pos < dataFileSz) {
uint32_t bbAddr = pReadRawBBAddrFunc(dataBuf + pos);
pos += pivot;
if (bbAddr == 0x0) {
continue;
}
nBBs++;
}
}
}
/* Successful parsing - update fuzzer worker counters */
run->sanCovCnts.hitBBCnt = nBBs;
if (run->global->linux.pid == 0 && run->global->exe.persistent == false) {
unlink(covFile);
}
return true;
}
/*
* Sanitizer coverage data are stored in FS can be parsed via two methods:
* raw unpack & separate bin/DSO sancov file. Separate bin/DSO sancov file
* method is usually avoided since coverage data are lost if sanitizer unhandled
* signal. Additionally, the FS I/O overhead is bigger compared to raw unpack
* method which uses runtime data structures.
*
* Enabled methods are controlled from sanitizer flags in arch.c
*/
void sancov_Analyze(run_t* run) {
if (!run->global->useSanCov) {
return;
}
/*
* For now supported methods are implemented in fail-over nature. This will
* change in the future when best method is concluded.
*/
if (sancov_sanCovParseRaw(run) == false) {
sancov_sanCovParse(run);
}
}
bool sancov_Init(honggfuzz_t* hfuzz) {
if (hfuzz->useSanCov == false) {
return true;
}
sancov_trieCreate(&hfuzz->covMetadata);
char sanCovOutDir[PATH_MAX] = {0};
snprintf(sanCovOutDir, sizeof(sanCovOutDir), "%s/%s", hfuzz->io.workDir, _HF_SANCOV_DIR);
if (!files_exists(sanCovOutDir)) {
if (mkdir(sanCovOutDir, S_IRWXU | S_IXGRP | S_IXOTH) != 0) {
PLOG_E("mkdir() '%s' failed", sanCovOutDir);
}
}
return true;
}