cache insns for fast lookup in mapping.c. based on the idea of Dang Hoang Vu
diff --git a/arch/AArch64/mapping.c b/arch/AArch64/mapping.c
index 0fb5ec0..3ffb6f8 100644
--- a/arch/AArch64/mapping.c
+++ b/arch/AArch64/mapping.c
@@ -254,6 +254,8 @@
}
static insn_map insns[] = {
+ { 0, 0, { 0 }, { 0 }, { 0 }, 0, 0 }, // dummy item
+
{ AArch64_ABS16b, ARM64_INS_ABS, { 0 }, { 0 }, { ARM64_GRP_NEON, 0 }, 0, 0 },
{ AArch64_ABS2d, ARM64_INS_ABS, { 0 }, { 0 }, { ARM64_GRP_NEON, 0 }, 0, 0 },
{ AArch64_ABS2s, ARM64_INS_ABS, { 0 }, { 0 }, { ARM64_GRP_NEON, 0 }, 0, 0 },
@@ -2990,10 +2992,13 @@
// { AArch64_SUBSxxx_lsl, ARM64_INS_NEGS, { 0 }, { ARM64_REG_NZCV, 0 }, { 0 } },
};
+static unsigned short *insn_cache = NULL;
+
+// given internal insn id, return public instruction info
void AArch64_get_insn_id(cs_insn *insn, unsigned int id, int detail)
{
- int i = insn_find(insns, ARR_SIZE(insns), id);
- if (i != -1) {
+ int i = insn_find(insns, ARR_SIZE(insns), id, &insn_cache);
+ if (i != 0) {
insn->id = insns[i].mapid;
if (detail) {
@@ -3523,3 +3528,10 @@
return (i != -1)? i : ARM64_REG_INVALID;
}
+void AArch64_free_cache(void)
+{
+ if (insn_cache)
+ free(insn_cache);
+
+ insn_cache = NULL;
+}
diff --git a/arch/AArch64/mapping.h b/arch/AArch64/mapping.h
index 08b2870..4bcd40d 100644
--- a/arch/AArch64/mapping.h
+++ b/arch/AArch64/mapping.h
@@ -21,4 +21,7 @@
// map instruction name to public instruction ID
arm64_reg AArch64_map_insn(const char *name);
+// free insn cache
+void AArch64_free_cache(void);
+
#endif
diff --git a/arch/AArch64/module.c b/arch/AArch64/module.c
index cbf638b..f22253c 100644
--- a/arch/AArch64/module.c
+++ b/arch/AArch64/module.c
@@ -30,10 +30,16 @@
return CS_ERR_OK;
}
+static void destroy(cs_struct *handle)
+{
+ AArch64_free_cache();
+}
+
static void __attribute__ ((constructor)) __init_arm64__()
{
arch_init[CS_ARCH_ARM64] = init;
- arch_option[CS_ARCH_ARM] = option;
+ arch_option[CS_ARCH_ARM64] = option;
+ arch_destroy[CS_ARCH_ARM64] = destroy;
// support this arch
all_arch |= (1 << CS_ARCH_ARM64);
diff --git a/arch/ARM/mapping.c b/arch/ARM/mapping.c
index 98e5575..d8b5584 100644
--- a/arch/ARM/mapping.c
+++ b/arch/ARM/mapping.c
@@ -135,6 +135,8 @@
}
static insn_map insns[] = {
+ { 0, 0, { 0 }, { 0 }, { 0 }, 0, 0 }, // dummy item
+
{ ARM_ADCri, ARM_INS_ADC, { ARM_REG_CPSR, 0 }, { ARM_REG_CPSR, 0 }, { ARM_GRP_ARM, 0 }, 0, 0 },
{ ARM_ADCrr, ARM_INS_ADC, { ARM_REG_CPSR, 0 }, { ARM_REG_CPSR, 0 }, { ARM_GRP_ARM, 0 }, 0, 0 },
{ ARM_ADCrsi, ARM_INS_ADC, { ARM_REG_CPSR, 0 }, { ARM_REG_CPSR, 0 }, { ARM_GRP_ARM, 0 }, 0, 0 },
@@ -2298,10 +2300,13 @@
{ ARM_tUXTH, ARM_INS_UXTH, { 0 }, { 0 }, { ARM_GRP_THUMB, ARM_GRP_THUMB1ONLY, ARM_GRP_V6, 0 }, 0, 0 },
};
+
+static unsigned short *insn_cache = NULL;
+
void ARM_get_insn_id(cs_insn *insn, unsigned int id, int detail)
{
- int i = insn_find(insns, ARR_SIZE(insns), id);
- if (i != -1) {
+ int i = insn_find(insns, ARR_SIZE(insns), id, &insn_cache);
+ if (i != 0) {
insn->id = insns[i].mapid;
if (detail) {
@@ -2788,11 +2793,19 @@
bool ARM_rel_branch(unsigned int id)
{
- int i = insn_find(insns, ARR_SIZE(insns), id);
- if (i != -1)
+ int i = insn_find(insns, ARR_SIZE(insns), id, &insn_cache);
+ if (i != 0)
return (insns[i].branch && !insns[i].indirect_branch);
else {
printf("ALERT: rel_branch() got incorrect id!\n");
return false;
}
}
+
+void ARM_free_cache(void)
+{
+ if (insn_cache)
+ free(insn_cache);
+
+ insn_cache = NULL;
+}
diff --git a/arch/ARM/mapping.h b/arch/ARM/mapping.h
index 10866fb..62b8d92 100644
--- a/arch/ARM/mapping.h
+++ b/arch/ARM/mapping.h
@@ -25,4 +25,7 @@
// check if this insn is relative branch
bool ARM_rel_branch(unsigned int insn_id);
+// free insn cache
+void ARM_free_cache(void);
+
#endif
diff --git a/arch/ARM/module.c b/arch/ARM/module.c
index 7484a34..54108dd 100644
--- a/arch/ARM/module.c
+++ b/arch/ARM/module.c
@@ -43,10 +43,16 @@
return CS_ERR_OK;
}
+static void destroy(cs_struct *handle)
+{
+ ARM_free_cache();
+}
+
static void __attribute__ ((constructor)) __init_arm__()
{
arch_init[CS_ARCH_ARM] = init;
arch_option[CS_ARCH_ARM] = option;
+ arch_destroy[CS_ARCH_ARM] = destroy;
// support this arch
all_arch |= (1 << CS_ARCH_ARM);
diff --git a/arch/Mips/mapping.c b/arch/Mips/mapping.c
index 4893ddb..4036293 100644
--- a/arch/Mips/mapping.c
+++ b/arch/Mips/mapping.c
@@ -182,6 +182,8 @@
}
static insn_map insns[] = {
+ { 0, 0, { 0 }, { 0 }, { 0 }, 0, 0 }, // dummy item
+
{ Mips_ABSQ_S_PH, MIPS_INS_ABSQ_S, { 0 }, { MIPS_REG_DSPOUTFLAG20, 0 }, { MIPS_GRP_DSP, 0 }, 0, 0 },
{ Mips_ABSQ_S_QB, MIPS_INS_ABSQ_S, { 0 }, { MIPS_REG_DSPOUTFLAG20, 0 }, { MIPS_GRP_DSPR2, 0 }, 0, 0 },
{ Mips_ABSQ_S_W, MIPS_INS_ABSQ_S, { 0 }, { MIPS_REG_DSPOUTFLAG20, 0 }, { MIPS_GRP_DSP, 0 }, 0, 0 },
@@ -1386,6 +1388,8 @@
{ Mips_SUBu, MIPS_INS_NEGU, { 0 }, { 0 }, { MIPS_GRP_STDENC, 0 }, 0, 0 },
};
+static unsigned short *insn_cache = NULL;
+
// given internal insn id, return public instruction info
void Mips_get_insn_id(cs_insn *insn, unsigned int id, int detail)
{
@@ -1417,8 +1421,8 @@
}
}
- i = insn_find(insns, ARR_SIZE(insns), id);
- if (i != -1) {
+ i = insn_find(insns, ARR_SIZE(insns), id, &insn_cache);
+ if (i != 0) {
insn->id = insns[i].mapid;
if (detail) {
@@ -2030,3 +2034,11 @@
// cannot find this register
return 0;
}
+
+void Mips_free_cache(void)
+{
+ if (insn_cache)
+ free(insn_cache);
+
+ insn_cache = NULL;
+}
diff --git a/arch/Mips/mapping.h b/arch/Mips/mapping.h
index 78642de..7d9e74c 100644
--- a/arch/Mips/mapping.h
+++ b/arch/Mips/mapping.h
@@ -25,4 +25,7 @@
// map internal raw register to 'public' register
mips_reg Mips_map_register(unsigned int r);
+// free insn cache
+void Mips_free_cache(void);
+
#endif
diff --git a/arch/Mips/module.c b/arch/Mips/module.c
index 73fd8f8..e4d12c9 100644
--- a/arch/Mips/module.c
+++ b/arch/Mips/module.c
@@ -41,10 +41,16 @@
return CS_ERR_OK;
}
+static void destroy(cs_struct *handle)
+{
+ Mips_free_cache();
+}
+
static void __attribute__ ((constructor)) __init_mips__()
{
arch_init[CS_ARCH_MIPS] = init;
arch_option[CS_ARCH_MIPS] = option;
+ arch_destroy[CS_ARCH_MIPS] = destroy;
// support this arch
all_arch |= (1 << CS_ARCH_MIPS);
diff --git a/arch/PowerPC/module.c b/arch/PowerPC/module.c
index 8f37347..3e53d9b 100644
--- a/arch/PowerPC/module.c
+++ b/arch/PowerPC/module.c
@@ -33,10 +33,15 @@
return CS_ERR_OK;
}
+static void destroy(cs_struct *handle)
+{
+}
+
static void __attribute__ ((constructor)) __init_mips__()
{
arch_init[CS_ARCH_PPC] = init;
arch_option[CS_ARCH_PPC] = option;
+ arch_destroy[CS_ARCH_PPC] = destroy;
// support this arch
all_arch |= (1 << CS_ARCH_PPC);
diff --git a/arch/X86/mapping.c b/arch/X86/mapping.c
index f668c78..620a626 100644
--- a/arch/X86/mapping.c
+++ b/arch/X86/mapping.c
@@ -1605,6 +1605,8 @@
#include "X86GenInstrInfo.inc"
static insn_map insns[] = {
+ { 0, 0, { 0 }, { 0 }, { 0 }, 0, 0 }, // dummy item
+
{ X86_AAA, X86_INS_AAA, { 0 }, { 0 }, { X86_GRP_MODE32, 0 }, 0, 0 },
{ X86_AAD8i8, X86_INS_AAD, { 0 }, { 0 }, { X86_GRP_MODE32, 0 }, 0, 0 },
{ X86_AAM8i8, X86_INS_AAM, { 0 }, { 0 }, { X86_GRP_MODE32, 0 }, 0, 0 },
@@ -6604,11 +6606,13 @@
}
}
+static unsigned short *insn_cache = NULL;
+
// given internal insn id, return public instruction info
void X86_get_insn_id(cs_insn *insn, unsigned int id, int detail)
{
- int i = insn_find(insns, ARR_SIZE(insns), id);
- if (i != -1) {
+ int i = insn_find(insns, ARR_SIZE(insns), id, &insn_cache);
+ if (i != 0) {
insn->id = insns[i].mapid;
if (detail) {
@@ -6636,3 +6640,10 @@
return insn_reverse_id(insns, ARR_SIZE(insns), id);
}
+void X86_free_cache(void)
+{
+ if (insn_cache)
+ free(insn_cache);
+
+ insn_cache = NULL;
+}
diff --git a/arch/X86/mapping.h b/arch/X86/mapping.h
index a6ec385..1065626 100644
--- a/arch/X86/mapping.h
+++ b/arch/X86/mapping.h
@@ -37,4 +37,7 @@
// post printer for X86.
void X86_post_printer(csh handle, cs_insn *pub_insn, char *insn_asm);
+// free insn cache
+void X86_free_cache(void);
+
#endif
diff --git a/arch/X86/module.c b/arch/X86/module.c
index 6c008a9..eab9f4c 100644
--- a/arch/X86/module.c
+++ b/arch/X86/module.c
@@ -44,10 +44,16 @@
return CS_ERR_OK;
}
+static void destroy(cs_struct *handle)
+{
+ X86_free_cache();
+}
+
static void __attribute__ ((constructor)) __init_x86__()
{
arch_init[CS_ARCH_X86] = init;
arch_option[CS_ARCH_X86] = option;
+ arch_destroy[CS_ARCH_X86] = destroy;
// support this arch
all_arch |= (1 << CS_ARCH_X86);
diff --git a/cs.c b/cs.c
index 47fa14b..52a0d28 100644
--- a/cs.c
+++ b/cs.c
@@ -15,6 +15,7 @@
cs_err (*arch_init[MAX_ARCH])(cs_struct *) = { NULL };
cs_err (*arch_option[MAX_ARCH]) (cs_struct*, cs_opt_type, size_t value);
+void (*arch_destroy[MAX_ARCH]) (cs_struct*);
unsigned int all_arch = 0;
@@ -101,6 +102,9 @@
memset(ud, 0, sizeof(*ud));
free(ud);
+ if (arch_destroy[ud->arch])
+ arch_destroy[ud->arch](ud);
+
return CS_ERR_OK;
}
diff --git a/cs_priv.h b/cs_priv.h
index d659873..b80b7f8 100644
--- a/cs_priv.h
+++ b/cs_priv.h
@@ -54,6 +54,9 @@
// support cs_option() for all archs
extern cs_err (*arch_option[MAX_ARCH]) (cs_struct*, cs_opt_type, size_t value);
+// deinitialized functions: to be called when cs_close() is called
+extern void (*arch_destroy[MAX_ARCH]) (cs_struct*);
+
extern unsigned int all_arch;
#endif
diff --git a/include/capstone.h b/include/capstone.h
index 1ef0876..64c889a 100644
--- a/include/capstone.h
+++ b/include/capstone.h
@@ -190,6 +190,10 @@
/*
Close CS handle: MUST do to release the handle when it is not used anymore.
+ NOTE: this must be only called when there is no longer usage of Capstone,
+ not even access to cs_insn array. The reason is the this API releases some
+ cached memory, thus access to any Capstone API after cs_close() might crash
+ your application.
@handle: handle returned by cs_open()
diff --git a/utils.c b/utils.c
index 1504048..edf3c02 100644
--- a/utils.c
+++ b/utils.c
@@ -1,6 +1,7 @@
/* Capstone Disassembler Engine */
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
+#include <stdlib.h>
#include <string.h>
#include "utils.h"
@@ -20,26 +21,32 @@
return -1;
}
-// binary searching
-int insn_find(insn_map *m, unsigned int max, unsigned int id)
+// create a cache for fast id lookup
+static unsigned short *make_id2insn(insn_map *insns, unsigned int size)
{
- unsigned int i, begin, end;
+ // NOTE: assume that the max id is always put at the end of insns array
+ unsigned short max_id = insns[size - 1].id;
+ unsigned int i;
- begin = 0;
- end = max;
+ unsigned short *cache = (unsigned short *)calloc(sizeof(*cache), max_id);
- while(begin <= end) {
- i = (begin + end) / 2;
- if (id == m[i].id)
- return i;
- else if (id < m[i].id)
- end = i - 1;
- else
- begin = i + 1;
- }
+ for (i = 1; i < size; i++)
+ cache[insns[i].id] = i;
- // found nothing
- return -1;
+ return cache;
+}
+
+// look for @id in @insns, given its size in @max. first time call will update @cache.
+// return 0 if not found
+unsigned short insn_find(insn_map *insns, unsigned int max, unsigned int id, unsigned short **cache)
+{
+ if (id > insns[max - 1].id)
+ return 0;
+
+ if (*cache == NULL)
+ *cache = make_id2insn(insns, max);
+
+ return (*cache)[id];
}
int name2id(name_map* map, int max, const char *name)
@@ -79,3 +86,4 @@
return c;
}
+
diff --git a/utils.h b/utils.h
index dc73c7a..fb320fc 100644
--- a/utils.h
+++ b/utils.h
@@ -29,8 +29,9 @@
// or -1 if given string is not in the list
int str_in_list(char **list, char *s);
-// binary searching in @m, given its size in @max, and @id
-int insn_find(insn_map *m, unsigned int max, unsigned int id);
+// look for @id in @m, given its size in @max. first time call will update @cache.
+// return 0 if not found
+unsigned short insn_find(insn_map *m, unsigned int max, unsigned int id, unsigned short **cache);
// map id to string
typedef struct name_map {