Reduce the number of branches around ncwrs* and ucwrs* calls with k==0.
This slightly reduces the executable size and might improve
performance on platforms without good branch prediction.
diff --git a/libcelt/cwrs.c b/libcelt/cwrs.c
index 055c2eb..4adf428 100644
--- a/libcelt/cwrs.c
+++ b/libcelt/cwrs.c
@@ -253,33 +253,39 @@
/*Compute U(2,_k).
Note that this may be called with _k=32768 (maxK[2]+1).*/
static inline unsigned ucwrs2(unsigned _k){
- return _k?_k+(_k-1):0;
+ celt_assert(_k>0);
+ return _k+(_k-1);
}
/*Compute V(2,_k).*/
static inline opus_uint32 ncwrs2(int _k){
- return _k?4*(opus_uint32)_k:1;
+ celt_assert(_k>0);
+ return 4*(opus_uint32)_k;
}
/*Compute U(3,_k).
Note that this may be called with _k=32768 (maxK[3]+1).*/
static inline opus_uint32 ucwrs3(unsigned _k){
- return _k?(2*(opus_uint32)_k-2)*_k+1:0;
+ celt_assert(_k>0);
+ return (2*(opus_uint32)_k-2)*_k+1;
}
/*Compute V(3,_k).*/
static inline opus_uint32 ncwrs3(int _k){
- return _k?2*(2*(unsigned)_k*(opus_uint32)_k+1):1;
+ celt_assert(_k>0);
+ return 2*(2*(unsigned)_k*(opus_uint32)_k+1);
}
/*Compute U(4,_k).*/
static inline opus_uint32 ucwrs4(int _k){
- return _k?imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1):0;
+ celt_assert(_k>0);
+ return imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1);
}
/*Compute V(4,_k).*/
static inline opus_uint32 ncwrs4(int _k){
- return _k?((_k*(opus_uint32)_k+2)*_k)/3<<3:1;
+ celt_assert(_k>0);
+ return ((_k*(opus_uint32)_k+2)*_k)/3<<3;
}
#endif /* SMALL_FOOTPRINT */
@@ -382,7 +388,7 @@
_i-=p&s;
yj=_k;
_k=_i+1>>1;
- p=ucwrs2(_k);
+ p=_k?ucwrs2(_k):0;
_i-=p;
yj-=_k;
_y[0]=yj+s^s;
@@ -403,7 +409,7 @@
/*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all
_i<2147418113=U(3,32768)).*/
_k=_i>0?isqrt32(2*_i-1)+1>>1:0;
- p=ucwrs3(_k);
+ p=_k?ucwrs3(_k):0;
_i-=p;
yj-=_k;
_y[0]=yj+s^s;
@@ -430,7 +436,7 @@
kr=_k;
for(;;){
_k=kl+kr>>1;
- p=ucwrs4(_k);
+ p=_k?ucwrs4(_k):0;
if(p<_i){
if(_k>=kr)break;
kl=_k+1;
@@ -492,7 +498,7 @@
opus_uint32 i;
int k;
i=icwrs1(_y+1,&k);
- i+=ucwrs2(k);
+ i+=k?ucwrs2(k):0;
k+=abs(_y[0]);
if(_y[0]<0)i+=ucwrs2(k+1U);
*_k=k;
@@ -507,7 +513,7 @@
opus_uint32 i;
int k;
i=icwrs2(_y+1,&k);
- i+=ucwrs3(k);
+ i+=k?ucwrs3(k):0;
k+=abs(_y[0]);
if(_y[0]<0)i+=ucwrs3(k+1U);
*_k=k;
@@ -522,7 +528,7 @@
opus_uint32 i;
int k;
i=icwrs3(_y+1,&k);
- i+=ucwrs4(k);
+ i+=k?ucwrs4(k):0;
k+=abs(_y[0]);
if(_y[0]<0)i+=ucwrs4(k+1);
*_k=k;
@@ -584,6 +590,7 @@
void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
opus_uint32 i;
+ celt_assert(_k>0);
#ifndef SMALL_FOOTPRINT
switch(_n){
case 2:{
@@ -616,6 +623,7 @@
void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec)
{
+ celt_assert(_k>0);
#ifndef SMALL_FOOTPRINT
switch(_n){
case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break;