x86: properly handle LOCK/REP in the core, so remove buch of hacks
diff --git a/MCInst.h b/MCInst.h
index 22dfc56..6a988b6 100644
--- a/MCInst.h
+++ b/MCInst.h
@@ -145,6 +145,11 @@
 	uint64_t address;	// address of this insn
 	cs_struct *csh;	// save the main csh
 	uint8_t x86_imm_size;	// save immediate size to print immediate properly
+
+	// (Optional) instruction prefix, which can be up to 5 bytes.
+	// A prefix byte gets value 0 when irrelevant.
+	// This is copied from cs_x86 struct
+	uint8_t x86_prefix[5];
 };
 
 void MCInst_Init(MCInst *inst);
diff --git a/arch/AArch64/AArch64Disassembler.c b/arch/AArch64/AArch64Disassembler.c
index a513ae4..7c48c02 100644
--- a/arch/AArch64/AArch64Disassembler.c
+++ b/arch/AArch64/AArch64Disassembler.c
@@ -289,7 +289,7 @@
 	return MCDisassembler_Fail;
 }
 
-bool AArch64_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
+bool AArch64_getInstruction(csh ud, const uint8_t *code, size_t code_len,
 		MCInst *instr, uint16_t *size, uint64_t address, void *info)
 {
 	DecodeStatus status = _getInstruction((cs_struct *)ud, instr,
diff --git a/arch/AArch64/AArch64Disassembler.h b/arch/AArch64/AArch64Disassembler.h
index a8c1d65..0f84c85 100644
--- a/arch/AArch64/AArch64Disassembler.h
+++ b/arch/AArch64/AArch64Disassembler.h
@@ -12,7 +12,7 @@
 
 void AArch64_init(MCRegisterInfo *MRI);
 
-bool AArch64_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
+bool AArch64_getInstruction(csh ud, const uint8_t *code, size_t code_len,
 		MCInst *instr, uint16_t *size, uint64_t address, void *info);
 
 #endif
diff --git a/arch/ARM/ARMDisassembler.c b/arch/ARM/ARMDisassembler.c
index 74b5b5e..aaea84b 100644
--- a/arch/ARM/ARMDisassembler.c
+++ b/arch/ARM/ARMDisassembler.c
@@ -854,7 +854,7 @@
 	return MCDisassembler_Fail;
 }
 
-bool Thumb_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr,
+bool Thumb_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr,
 		uint16_t *size, uint64_t address, void *info)
 {
 	DecodeStatus status = _Thumb_getInstruction((cs_struct *)ud, instr, code, code_len, size, address);
@@ -863,7 +863,7 @@
 	return status != MCDisassembler_Fail;
 }
 
-bool ARM_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr,
+bool ARM_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr,
 		uint16_t *size, uint64_t address, void *info)
 {
 	DecodeStatus status = _ARM_getInstruction((cs_struct *)ud, instr, code, code_len, size, address);
diff --git a/arch/ARM/ARMDisassembler.h b/arch/ARM/ARMDisassembler.h
index 606481e..585c6a4 100644
--- a/arch/ARM/ARMDisassembler.h
+++ b/arch/ARM/ARMDisassembler.h
@@ -9,9 +9,9 @@
 
 void ARM_init(MCRegisterInfo *MRI);
 
-bool ARM_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
+bool ARM_getInstruction(csh handle, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
 
-bool Thumb_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
+bool Thumb_getInstruction(csh handle, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
 
 uint64_t ARM_getFeatureBits(int mode);
 
diff --git a/arch/Mips/MipsDisassembler.c b/arch/Mips/MipsDisassembler.c
index e14a67b..6937499 100644
--- a/arch/Mips/MipsDisassembler.c
+++ b/arch/Mips/MipsDisassembler.c
@@ -280,7 +280,7 @@
 	return MCDisassembler_Fail;
 }
 
-bool Mips_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr,
+bool Mips_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr,
 		uint16_t *size, uint64_t address, void *info)
 {
 	cs_struct *handle = (cs_struct *)(uintptr_t)ud;
@@ -320,7 +320,7 @@
 	return MCDisassembler_Fail;
 }
 
-bool Mips64_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr,
+bool Mips64_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *instr,
 		uint16_t *size, uint64_t address, void *info)
 {
 	cs_struct *handle = (cs_struct *)(uintptr_t)ud;
diff --git a/arch/Mips/MipsDisassembler.h b/arch/Mips/MipsDisassembler.h
index 8f19ba5..df50d46 100644
--- a/arch/Mips/MipsDisassembler.h
+++ b/arch/Mips/MipsDisassembler.h
@@ -11,10 +11,10 @@
 
 void Mips_init(MCRegisterInfo *MRI);
 
-bool Mips_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len,
+bool Mips_getInstruction(csh handle, const uint8_t *code, size_t code_len,
 		MCInst *instr, uint16_t *size, uint64_t address, void *info);
 
-bool Mips64_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len,
+bool Mips64_getInstruction(csh handle, const uint8_t *code, size_t code_len,
 		MCInst *instr, uint16_t *size, uint64_t address, void *info);
 
 #endif
diff --git a/arch/PowerPC/PPCDisassembler.c b/arch/PowerPC/PPCDisassembler.c
index da4aeda..43cab0b 100644
--- a/arch/PowerPC/PPCDisassembler.c
+++ b/arch/PowerPC/PPCDisassembler.c
@@ -299,7 +299,7 @@
 	return MCDisassembler_Fail;
 }
 
-bool PPC_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
+bool PPC_getInstruction(csh ud, const uint8_t *code, size_t code_len,
 		MCInst *instr, uint16_t *size, uint64_t address, void *info)
 {
 	DecodeStatus status = getInstruction(instr,
diff --git a/arch/PowerPC/PPCDisassembler.h b/arch/PowerPC/PPCDisassembler.h
index f05522a..c26d970 100644
--- a/arch/PowerPC/PPCDisassembler.h
+++ b/arch/PowerPC/PPCDisassembler.h
@@ -12,7 +12,7 @@
 
 void PPC_init(MCRegisterInfo *MRI);
 
-bool PPC_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
+bool PPC_getInstruction(csh ud, const uint8_t *code, size_t code_len,
 		MCInst *instr, uint16_t *size, uint64_t address, void *info);
 
 #endif
diff --git a/arch/Sparc/SparcDisassembler.c b/arch/Sparc/SparcDisassembler.c
index 1601c5d..f2e72ae 100644
--- a/arch/Sparc/SparcDisassembler.c
+++ b/arch/Sparc/SparcDisassembler.c
@@ -219,7 +219,7 @@
 	return MCDisassembler_Success;
 }
 
-bool Sparc_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *MI,
+bool Sparc_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *MI,
 		uint16_t *size, uint64_t address, void *info)
 {
 	uint32_t Insn;
diff --git a/arch/Sparc/SparcDisassembler.h b/arch/Sparc/SparcDisassembler.h
index d36735f..bafa230 100644
--- a/arch/Sparc/SparcDisassembler.h
+++ b/arch/Sparc/SparcDisassembler.h
@@ -12,7 +12,7 @@
 
 void Sparc_init(MCRegisterInfo *MRI);
 
-bool Sparc_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
+bool Sparc_getInstruction(csh ud, const uint8_t *code, size_t code_len,
 		MCInst *instr, uint16_t *size, uint64_t address, void *info);
 
 #endif
diff --git a/arch/SystemZ/SystemZDisassembler.c b/arch/SystemZ/SystemZDisassembler.c
index f86c5ad..c6d50af 100644
--- a/arch/SystemZ/SystemZDisassembler.c
+++ b/arch/SystemZ/SystemZDisassembler.c
@@ -295,7 +295,7 @@
 #define GET_SUBTARGETINFO_ENUM
 #include "SystemZGenSubtargetInfo.inc"
 #include "SystemZGenDisassemblerTables.inc"
-bool SystemZ_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *MI,
+bool SystemZ_getInstruction(csh ud, const uint8_t *code, size_t code_len, MCInst *MI,
 		uint16_t *size, uint64_t address, void *info)
 {
 	uint64_t Inst;
diff --git a/arch/SystemZ/SystemZDisassembler.h b/arch/SystemZ/SystemZDisassembler.h
index 3752791..e77a738 100644
--- a/arch/SystemZ/SystemZDisassembler.h
+++ b/arch/SystemZ/SystemZDisassembler.h
@@ -12,7 +12,7 @@
 
 void SystemZ_init(MCRegisterInfo *MRI);
 
-bool SystemZ_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
+bool SystemZ_getInstruction(csh ud, const uint8_t *code, size_t code_len,
 		MCInst *instr, uint16_t *size, uint64_t address, void *info);
 
 #endif
diff --git a/arch/X86/X86Disassembler.c b/arch/X86/X86Disassembler.c
index 76e529d..8a3311b 100644
--- a/arch/X86/X86Disassembler.c
+++ b/arch/X86/X86Disassembler.c
@@ -691,7 +691,7 @@
 	c = 0;
 	for(i = 0; i < 0x100; i++) {
 		if (inter->prefixPresent[i] > 0) {
-			pub->x86.prefix[c] = inter->prefixPresent[i];
+			pub->x86.prefix[c] = i;
 			c++;
 		}
 	}
@@ -720,6 +720,7 @@
 	pub->x86.sib_base = x86_map_sib_base(inter->sibBase);
 }
 
+#if 0
 // classify a byte intn prefix group (or 0 if it is not a prefix)
 static uint8_t prefix_group(uint8_t c)
 {
@@ -743,9 +744,10 @@
 			return 4;
 	}
 }
+#endif
 
 // Public interface for the disassembler
-bool X86_getInstruction(csh ud, const uint8_t *code, uint8_t **modcode, size_t code_len,
+bool X86_getInstruction(csh ud, const uint8_t *code, size_t code_len,
 		MCInst *instr, uint16_t *size, uint64_t address, void *_info)
 {
 	cs_struct *handle = (cs_struct *)(uintptr_t)ud;
@@ -753,71 +755,8 @@
 	struct reader_info info;
 	int ret;
 	bool result;
-	size_t i;
-	int count = 0;
-	uint8_t p;
-	uint8_t *buffer;
 
-	// hack: shuffle LOCK/REP/REPNE prefixes to the front.
-	// this is because LLVM make a cut at these prefixes to create a new insn.
-	if (*modcode != NULL)
-		// so we actually work on the modified buffer
-		buffer = *modcode;
-	else
-		buffer = (uint8_t *)code;
-
-	// find the first non-prefix byte
-	for (i = 0; i < code_len; i++) {
-		p = prefix_group(buffer[i]);
-		if (p == 1)
-			count++;
-		else if (p == 0) {
-			// the first ever non-prefix byte
-			// ignore if there is no prefix from Group 1 (LOCK/REP/REPNE)
-			if (i == 0 || count == 0)
-				break;
-			else {
-				// x86 instruction has no more than 16 bytes
-				uint8_t b1, b2;
-				size_t j;
-				uint8_t *prefixes;
-
-				// create @modcode for modifying if we didnt do that before
-				if (*modcode == NULL) {
-					uint8_t *tmpbuf = cs_mem_malloc(code_len);
-					// copy @code to @modcode
-					memcpy(tmpbuf, code, code_len);
-					buffer = tmpbuf;
-					*modcode = tmpbuf;
-				}
-
-				// save all prefix bytes in original code
-				prefixes = cs_mem_malloc(i);
-				memcpy(prefixes, buffer, i);
-
-				b1 = 0;
-				b2 = count;
-				for (j = 0; j < i; j++) {
-					if (prefix_group(prefixes[j]) == 1) {
-						// this is one of LOCK/REP/REPNE, so put it at the front
-						buffer[b1] = prefixes[j];
-						b1++;
-					} else {
-						// put this prefix at the back, after LOCK/REP/REPNE
-						buffer[b2] = prefixes[j];
-						b2++;
-					}
-				}
-
-				cs_mem_free(prefixes);
-
-				// done, break out of this loop
-				break;
-			}
-		}
-	}
-
-	info.code = buffer;
+	info.code = code;
 	info.size = code_len;
 	info.offset = address;
 
@@ -844,12 +783,23 @@
 
 		return false;
 	} else {
+		int i, c;
+
 		*size = (uint16_t)insn.length;
 		result = (!translateInstruction(instr, &insn)) ?  true : false;
 		if (result) {
 			if (handle->detail)
 				update_pub_insn(&instr->flat_insn, &insn);
 
+			// copy all prefixes
+			c = 0;
+			for(i = 0; i < 0x100; i++) {
+				if (insn.prefixPresent[i] > 0) {
+					instr->x86_prefix[c] = i;
+					c++;
+				}
+			}
+
 			// save immediate size to print immediate properly
 			instr->x86_imm_size = insn.immediateSize;
 		}
diff --git a/arch/X86/X86Disassembler.h b/arch/X86/X86Disassembler.h
index 8a85190..7c1f47c 100644
--- a/arch/X86/X86Disassembler.h
+++ b/arch/X86/X86Disassembler.h
@@ -95,7 +95,7 @@
 #undef INSTRUCTION_SPECIFIER_FIELDS
 #undef INSTRUCTION_IDS
 
-bool X86_getInstruction(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len,
+bool X86_getInstruction(csh handle, const uint8_t *code, size_t code_len,
 		MCInst *instr, uint16_t *size, uint64_t address, void *info);
 
 #endif
diff --git a/arch/X86/X86DisassemblerDecoder.c b/arch/X86/X86DisassemblerDecoder.c
index 5764175..94a8f41 100644
--- a/arch/X86/X86DisassemblerDecoder.c
+++ b/arch/X86/X86DisassemblerDecoder.c
@@ -425,8 +425,8 @@
 		 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
 		 * break and let it be disassembled as a normal "instruction".
 		 */
-		if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
-			break;
+		//if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
+		//	break;
 
 		if (insn->readerCursor - 1 == insn->startLocation
 				&& (byte == 0xf2 || byte == 0xf3)
@@ -460,8 +460,8 @@
 					return -1;
 				unconsumeByte(insn);
 			}
-			if (nextByte != 0x0f && nextByte != 0x90)
-				break;
+			//if (nextByte != 0x0f && nextByte != 0x90)
+			//	break;
 		}
 
 		switch (byte) {
diff --git a/arch/X86/X86GenAsmWriter.inc b/arch/X86/X86GenAsmWriter.inc
index 12ae90e..5d2745f 100644
--- a/arch/X86/X86GenAsmWriter.inc
+++ b/arch/X86/X86GenAsmWriter.inc
@@ -12498,6 +12498,24 @@
   uint64_t Bits = (Bits2 << 32) | Bits1;
   // assert(Bits != 0 && "Cannot print this instruction.");
 #ifndef CAPSTONE_DIET
+  int i;
+
+  for(i = 0; i < ARR_SIZE(MI->x86_prefix); i++) {
+    switch(MI->x86_prefix[i]) {
+      default:
+        break;
+      case 0xf0:
+        SStream_concat(O, "lock|");
+        break;
+      case 0xf2:
+        SStream_concat(O, "repne|");
+        break;
+      case 0xf3:
+        SStream_concat(O, "rep|");
+        break;
+    }
+  }
+
   SStream_concat(O, "%s", AsmStrs+(Bits & 16383)-1);
 #endif
 
diff --git a/arch/X86/X86GenAsmWriter1.inc b/arch/X86/X86GenAsmWriter1.inc
index 1fea2cd..2b8c3b7 100644
--- a/arch/X86/X86GenAsmWriter1.inc
+++ b/arch/X86/X86GenAsmWriter1.inc
@@ -12202,6 +12202,23 @@
   uint64_t Bits = (Bits2 << 32) | Bits1;
   // assert(Bits != 0 && "Cannot print this instruction.");
 #ifndef CAPSTONE_DIET
+  int i;
+  for(i = 0; i < ARR_SIZE(MI->x86_prefix); i++) {
+    switch(MI->x86_prefix[i]) {
+      default:
+        break;
+      case 0xf0:
+        SStream_concat(O, "lock|");
+        break;
+      case 0xf2:
+        SStream_concat(O, "repne|");
+        break;
+      case 0xf3:
+        SStream_concat(O, "rep|");
+        break;
+    }
+  }
+
   SStream_concat(O, "%s", AsmStrs+(Bits & 16383)-1);
 #endif
 
diff --git a/arch/X86/X86Mapping.c b/arch/X86/X86Mapping.c
index 36799be..92a7c17 100644
--- a/arch/X86/X86Mapping.c
+++ b/arch/X86/X86Mapping.c
@@ -41603,111 +41603,6 @@
 	}
 }
 
-// can this instruction combine with prev prefix instruction?
-// this also updates h->pre_prefix if needed
-bool X86_insn_check_combine(cs_struct *h, cs_insn *insn)
-{
-	// is this a prefix instruction?
-	if (insn->id == X86_INS_LOCK || insn->id == X86_INS_REP ||
-			insn->id == X86_INS_REPNE) {
-		// then save this as prev_prefix
-		h->prev_prefix = (uint8_t)insn->id;
-		return false;
-	}
-
-	// if the previous instruction is a prefix, then OK to combine with this
-	if (h->prev_prefix) {
-		return true;
-	}
-
-	// neither prefix instruction nor having previous instruction as prefix,
-	// so we cannot combine this with a prefix
-	return false;
-}
-
-// combine this instruction with previous prefix instruction
-void X86_insn_combine(cs_struct *h, cs_insn *insn, cs_insn *prev)
-{
-	unsigned int prev_id;
-	uint8_t prefix;
-
-	// reset prev_prefix
-	h->prev_prefix = 0;
-
-	// save prev's ID
-	prev_id = prev->id;
-
-	// copy information from insn to prev
-	prev->size += insn->size;
-	memmove(prev->bytes+1, insn->bytes, sizeof(insn->bytes) - 1);
-#ifndef CAPSTONE_DIET
-	strcpy(prev->op_str, insn->op_str);
-#endif
-
-	// rep/repne mulpd are weird cases
-	if ((prev_id == X86_INS_REP || prev_id == X86_INS_REPNE) &&
-			(insn->id == X86_INS_MULPD)) {
-		if (prev_id == X86_INS_REPNE) {
-			prev->id = X86_INS_MULSD;
-#ifndef CAPSTONE_DIET
-			strcpy(prev->mnemonic, "mulsd");
-#endif
-		} else {
-			prev->id = X86_INS_MULSS;
-#ifndef CAPSTONE_DIET
-			strcpy(prev->mnemonic, "mulss");
-#endif
-		}
-
-		if (h->detail) {
-			memmove(prev->detail, insn->detail, sizeof(cs_detail));
-
-			// then free unused memory of current insn
-			cs_mem_free(insn->detail);
-			insn->detail = NULL;
-		}
-
-		return;
-	}
-
-	prev->id = insn->id;
-	strcat(prev->mnemonic, " ");
-	strcat(prev->mnemonic, insn->mnemonic);
-
-	if (h->detail) {
-		// save old prefix to copy it back later
-		prefix = prev->detail->x86.opcode[0];
-		memmove(prev->detail, insn->detail, sizeof(cs_detail));
-		prev->detail->x86.prefix[0] = prefix;
-
-		// if prev_prefix == REP|REPNE, insert ECX/RCX into detail->regs_read/regs_write
-		if (prev_id == X86_INS_REP || prev_id == X86_INS_REPNE) {
-#ifndef CAPSTONE_DIET
-			memmove(prev->detail->regs_read+1, prev->detail->regs_read,
-					prev->detail->regs_read_count * sizeof(prev->detail->regs_read[0]));
-			memmove(prev->detail->regs_write+1, prev->detail->regs_write,
-					prev->detail->regs_write_count * sizeof(prev->detail->regs_write[0]));
-
-			prev->detail->regs_read_count++;
-			prev->detail->regs_write_count++;
-
-			// *CX is read/written implicitly
-			if (h->mode & CS_MODE_64) {
-				prev->detail->regs_read[0] = X86_REG_RCX;
-				prev->detail->regs_write[0] = X86_REG_RCX;
-			} else {
-				prev->detail->regs_read[0] = X86_REG_ECX;
-				prev->detail->regs_write[0] = X86_REG_ECX;
-			}
-#endif
-		}
-
-		// then free unused memory of current insn
-		cs_mem_free(insn->detail);
-		insn->detail = NULL;
-	}
-}
-
 // map special instructions with accumulate registers.
 // this is needed because LLVM embeds these register names into AsmStrs[],
 // but not separately in operands
diff --git a/arch/X86/X86Mapping.h b/arch/X86/X86Mapping.h
index b1e9852..7b8848c 100644
--- a/arch/X86/X86Mapping.h
+++ b/arch/X86/X86Mapping.h
@@ -32,13 +32,6 @@
 // post printer for X86.
 void X86_post_printer(csh handle, cs_insn *pub_insn, char *insn_asm);
 
-// handle X86 prefixes
-bool X86_insn_check_combine(cs_struct *h, cs_insn *insn);
-
-// merge with previous instruction
-// this is to handle some 'prefixed' instructions such as LOCK or REP
-void X86_insn_combine(cs_struct *h, cs_insn *insn, cs_insn *prev);
-
 // return register of given instruction id
 // return 0 if not found
 // this is to handle instructions embedding accumulate registers into AsmStrs[]
diff --git a/arch/X86/X86Module.c b/arch/X86/X86Module.c
index 02f3c97..c0b0036 100644
--- a/arch/X86/X86Module.c
+++ b/arch/X86/X86Module.c
@@ -21,8 +21,6 @@
 	ud->insn_id = X86_get_insn_id;
 	ud->insn_name = X86_insn_name;
 	ud->post_printer = X86_post_printer;
-	ud->check_combine = X86_insn_check_combine;
-	ud->combine = X86_insn_combine;
 
 	return CS_ERR_OK;
 }
diff --git a/cs.c b/cs.c
index abf6ec8..f8c400a 100644
--- a/cs.c
+++ b/cs.c
@@ -275,9 +275,13 @@
 	// fill in mnemonic & operands
 	// find first space or tab
 	char *sp = buffer;
-	for (sp = buffer; *sp; sp++)
+	for (sp = buffer; *sp; sp++) {
 		if (*sp == ' '||*sp == '\t')
 			break;
+		if (*sp == '|')
+			*sp = ' ';
+	}
+
 	if (*sp) {
 		*sp = '\0';
 		// find the next non-space char
@@ -288,7 +292,8 @@
 	} else
 		insn->op_str[0] = '\0';
 
-	strncpy(insn->mnemonic, buffer, sizeof(insn->mnemonic) - 1);
+	//strncpy(insn->mnemonic, buffer, sizeof(insn->mnemonic) - 1);
+	strcat(insn->mnemonic, buffer);
 	insn->mnemonic[sizeof(insn->mnemonic) - 1] = '\0';
 #endif
 }
@@ -369,19 +374,6 @@
 	return arch_option[handle->arch](handle, type, value);
 }
 
-// get previous instruction, which can be in the cache, or in total buffer
-static cs_insn *get_prev_insn(cs_insn *cache, unsigned int f, void *total, size_t total_size)
-{
-	if (f == 0) {
-		if (total == NULL)
-			return NULL;
-		// get the trailing insn from total buffer, which is at
-		// the end of the latest cache trunk
-		return (cs_insn *)((void*)((uintptr_t)total + total_size - sizeof(cs_insn)));
-	} else
-		return &cache[f - 1];
-}
-
 // generate @op_str for data instruction of SKIPDATA
 static void skipdata_opstr(char *opstr, const uint8_t *buffer, size_t size)
 {
@@ -419,7 +411,6 @@
 	void *tmp;
 	size_t skipdata_bytes;
 	uint64_t offset_org;
-	uint8_t *tmpbuf = NULL, *org_tmpbuf = NULL;
 
 	if (!handle) {
 		// FIXME: how to handle this case:
@@ -429,9 +420,6 @@
 
 	handle->errnum = CS_ERR_OK;
 
-	// reset previous prefix for X86
-	handle->prev_prefix = 0;
-
 	memset(insn_cache, 0, sizeof(insn_cache));
 
 	// save the original offset for SKIPDATA
@@ -441,7 +429,7 @@
 		MCInst_Init(&mci);
 		mci.csh = handle;
 
-		r = handle->disasm(ud, buffer, &tmpbuf, size, &mci, &insn_size, offset, handle->getinsn_info);
+		r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
 		if (r) {
 			SStream ss;
 			SStream_Init(&ss);
@@ -462,63 +450,32 @@
 
 			fill_insn(handle, &insn_cache[f], ss.buffer, &mci, handle->post_printer, buffer);
 
-			if (!handle->check_combine || !handle->check_combine(handle, &insn_cache[f])) {
-				f++;
-				if (f == ARR_SIZE(insn_cache)) {
-					// resize total to contain newly disasm insns
-					total_size += (sizeof(cs_insn) * INSN_CACHE_SIZE);
-					tmp = cs_mem_realloc(total, total_size);
-					if (tmp == NULL) {	// insufficient memory
-						cs_mem_free(total);
-						handle->errnum = CS_ERR_MEM;
-						return 0;
-					}
-
-					total = tmp;
-					memcpy((void*)((uintptr_t)total + total_size - sizeof(insn_cache)), insn_cache, sizeof(insn_cache));
-
-					// reset f back to 0
-					f = 0;
+			f++;
+			if (f == ARR_SIZE(insn_cache)) {
+				// resize total to contain newly disasm insns
+				total_size += (sizeof(cs_insn) * INSN_CACHE_SIZE);
+				tmp = cs_mem_realloc(total, total_size);
+				if (tmp == NULL) {	// insufficient memory
+					cs_mem_free(total);
+					handle->errnum = CS_ERR_MEM;
+					return 0;
 				}
 
-				c++;
-			} else {
-				// combine this instruction with previous prefix "instruction"
-				cs_insn *prev = get_prev_insn(insn_cache, f, total, total_size);
-				handle->combine(handle, &insn_cache[f], prev);
+				total = tmp;
+				memcpy((void*)((uintptr_t)total + total_size - sizeof(insn_cache)), insn_cache, sizeof(insn_cache));
+
+				// reset f back to 0
+				f = 0;
 			}
 
+			c++;
 			buffer += insn_size;
-			if (tmpbuf != NULL) {
-				// save the original tmpbuf to free it later
-				if (org_tmpbuf == NULL)
-					org_tmpbuf = tmpbuf;
-
-				tmpbuf += insn_size;
-			}
 
 			size -= insn_size;
 			offset += insn_size;
 
-			if (count > 0) {
-				// x86 hacky
-				if (!handle->prev_prefix) {
-					if (c == count)
-						break;
-				} else {
-					// only combine 1 prefix with regular instruction
-					if (c == count + 1) {
-						// the last insn is redundant
-						c--;
-						f--;
-						// free allocated detail pointer of the last redundant instruction
-						if (handle->detail)
-							cs_mem_free(insn_cache[f].detail);
-
-						break;
-					}
-				}
-			}
+			if (count > 0 && c == count)
+				break;
 		} else	{
 			// encounter a broken instruction
 			// if there is no request to skip data, or remaining data is too small,
@@ -575,10 +532,6 @@
 		}
 	}
 
-	// free tmpbuf if it was allocated in @disasm
-	if (org_tmpbuf)
-		cs_mem_free(org_tmpbuf);
-
 	if (f) {
 		// resize total to contain newly disasm insns
 		void *tmp = cs_mem_realloc(total, total_size + f * sizeof(insn_cache[0]));
diff --git a/cs_priv.h b/cs_priv.h
index abc1a67..5936254 100644
--- a/cs_priv.h
+++ b/cs_priv.h
@@ -15,16 +15,12 @@
 // this is the best time to gather insn's characteristics
 typedef void (*PostPrinter_t)(csh handle, cs_insn *, char *mnem);
 
-typedef bool (*Disasm_t)(csh handle, const uint8_t *code, uint8_t **modcode, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
+typedef bool (*Disasm_t)(csh handle, const uint8_t *code, size_t code_len, MCInst *instr, uint16_t *size, uint64_t address, void *info);
 
 typedef const char *(*GetName_t)(csh handle, unsigned int reg);
 
 typedef void (*GetID_t)(cs_struct *h, cs_insn *insn, unsigned int id);
 
-typedef bool (*CheckCombineInsn_t)(cs_struct *h, cs_insn *insn);
-
-typedef void (*CombineInsn_t)(cs_struct *h, cs_insn *insn, cs_insn *prev);
-
 // return register name, given register ID
 typedef char *(*GetRegisterName_t)(unsigned RegNo);
 
@@ -52,10 +48,7 @@
 	int syntax;	// asm syntax for simple printer such as ARM, Mips & PPC
 	bool doing_mem;	// handling memory operand in InstPrinter code
 	unsigned short *insn_cache;	// index caching for mapping.c
-	CheckCombineInsn_t check_combine;
-	CombineInsn_t combine;
 	GetRegisterName_t get_regname;
-	uint8_t prev_prefix;	// save previous prefix for combining instructions - X86 only.
 	bool skipdata;	// set this to True if we skip data when disassembling
 	uint8_t skipdata_size;	// how many bytes to skip
 	cs_opt_skipdata skipdata_setup;	// user-defined skipdata setup