comment out some optimized branch
diff --git a/src/libFLAC/bitwriter.c b/src/libFLAC/bitwriter.c
index 47133c0..04a5f72 100644
--- a/src/libFLAC/bitwriter.c
+++ b/src/libFLAC/bitwriter.c
@@ -556,7 +556,7 @@
 
 		msbits = uval >> parameter;
 
-#if 1 /* OPT: can remove this special case if it doesn't make up for the extra compare */
+#if 0 /* OPT: can remove this special case if it doesn't make up for the extra compare (doesn't make a statistically significant difference with msvc or gcc/x86) */
 		if(bw->bits && bw->bits + msbits + lsbits <= FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */
 			/* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free bwword to work in */
 			bw->bits = bw->bits + msbits + lsbits;
@@ -642,7 +642,7 @@
 				bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
 				bw->accum = uval;
 			}
-#if 1
+#if 0
 		}
 #endif
 		vals++;
diff --git a/src/libFLAC/stream_decoder.c b/src/libFLAC/stream_decoder.c
index 6be647f..9f4914e 100644
--- a/src/libFLAC/stream_decoder.c
+++ b/src/libFLAC/stream_decoder.c
@@ -420,8 +420,10 @@
 #ifdef FLAC__CPU_IA32
 		FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32);
 #ifdef FLAC__HAS_NASM
+#if 0 /*@@@@@@ OPT: not clearly faster, needs more testing */
 		if(decoder->private_->cpuinfo.data.ia32.bswap)
 			decoder->private_->local_bitreader_read_rice_signed_block = FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap;
+#endif
 		if(decoder->private_->cpuinfo.data.ia32.mmx) {
 			decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
 			decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32_mmx;