Blame - src/gallium/auxiliary/gallivm/lp_bld_conv.c - fp2-dev/platform/external/mesa3d

2009-08-07 09:51:48 +0100

[diff] [blame]

1

/**************************************************************************

*

*

* Permission is hereby granted, free of charge, to any person obtaining a

7

* copy of this software and associated documentation files (the

8

* "Software"), to deal in the Software without restriction, including

9

* without limitation the rights to use, copy, modify, merge, publish,

10

* distribute, sub license, and/or sell copies of the Software, and to

11

* permit persons to whom the Software is furnished to do so, subject to

12

* the following conditions:

13

*

14

* The above copyright notice and this permission notice (including the

15

* next paragraph) shall be included in all copies or substantial portions

16

* of the Software.

17

*

18

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

19

* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

20

* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.

21

* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR

22

* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

23

* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

24

* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

25

*

26

**************************************************************************/

/**

* @file

José Fonseca

2009-08-22 22:26:55 +0100

[diff] [blame]

31

* Helper functions for type conversions.

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

32

*

José Fonseca

2009-08-22 22:26:55 +0100

[diff] [blame]

33

* We want to use the fastest type for a given computation whenever feasible.

34

* The other side of this is that we need to be able convert between several

35

* types accurately and efficiently.

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

36

*

José Fonseca

2009-08-22 22:26:55 +0100

[diff] [blame]

37

* Conversion between types of different bit width is quite complex since a

38

*

39

* To remember there are a few invariants in type conversions:

40

*

41

* - register width must remain constant:

42

*

43

* src_type.width * src_type.length == dst_type.width * dst_type.length

44

*

45

* - total number of elements must remain constant:

46

*

47

* src_type.length * num_srcs == dst_type.length * num_dsts

48

*

49

* It is not always possible to do the conversion both accurately and

50

* efficiently, usually due to lack of adequate machine instructions. In these

51

* cases it is important not to cut shortcuts here and sacrifice accuracy, as

52

* there this functions can be used anywhere. In the future we might have a

53

* precision parameter which can gauge the accuracy vs efficiency compromise,

54

* but for now if the data conversion between two stages happens to be the

55

* bottleneck, then most likely should just avoid converting at all and run

56

* both stages with the same type.

57

*

58

* Make sure to run lp_test_conv unit test after any change to this file.

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

59

*

60

* @author Jose Fonseca <jfonseca@vmware.com>

*/

#include "util/u_debug.h"

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

65

#include "util/u_math.h"

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

66

67

#include "lp_bld_type.h"

68

#include "lp_bld_const.h"

José Fonseca

2009-08-08 23:10:59 +0100

[diff] [blame]

69

#include "lp_bld_arit.h"

José Fonseca

421507d

2009-10-22 18:28:17 +0100

[diff] [blame]

70

#include "lp_bld_pack.h"

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

71

#include "lp_bld_conv.h"

72

73

José Fonseca

2009-08-22 22:26:55 +0100

[diff] [blame]

74

/**

75

* Special case for converting clamped IEEE-754 floats to unsigned norms.

76

*

77

* The mathematical voodoo below may seem excessive but it is actually

78

* paramount we do it this way for several reasons. First, there is no single

79

* precision FP to unsigned integer conversion Intel SSE instruction. Second,

80

* secondly, even if there was, since the FP's mantissa takes only a fraction

81

* of register bits the typically scale and cast approach would require double

82

* precision for accurate results, and therefore half the throughput

83

*

84

* Although the result values can be scaled to an arbitrary bit width specified

85

* by dst_width, the actual result type will have the same width.

86

*/

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

87

LLVMValueRef

88

lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,

José Fonseca

2009-09-14 11:05:06 +0100

[diff] [blame]

89

struct lp_type src_type,

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

unsigned dst_width,

LLVMValueRef src)

{

LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type);

LLVMValueRef res;

unsigned mantissa;

unsigned n;

unsigned long long ubound;

98

unsigned long long mask;

double scale;

double bias;

assert(src_type.floating);

103

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

104

mantissa = lp_mantissa(src_type);

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

105

106

/* We cannot carry more bits than the mantissa */

107

n = MIN2(mantissa, dst_width);

108

109

/* This magic coefficients will make the desired result to appear in the

110

* lowest significant bits of the mantissa.

111

*/

112

ubound = ((unsigned long long)1 << n);

113

mask = ubound - 1;

114

scale = (double)mask/ubound;

115

bias = (double)((unsigned long long)1 << (mantissa - n));

116

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

117

res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), "");

118

res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), "");

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

119

res = LLVMBuildBitCast(builder, res, int_vec_type, "");

120

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

121

if(dst_width > n) {

122

int shift = dst_width - n;

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

123

res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), "");

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

124

José Fonseca

1fc4100

2009-09-11 11:24:00 +0100

[diff] [blame]

125

/* TODO: Fill in the empty lower bits for additional precision? */

Brian Paul

69fe428

2009-12-03 11:40:49 -0700

[diff] [blame]

126

/* YES: this fixes progs/trivial/tri-z-eq.c.

127

* Otherwise vertex Z=1.0 values get converted to something like

128

* 0xfffffb00 and the test for equality with 0xffffffff fails.

129

*/

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

130

#if 0

131

{

132

LLVMValueRef msb;

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

133

msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), "");

134

msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), "");

135

msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), "");

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

136

res = LLVMBuildOr(builder, res, msb, "");

137

}

138

#elif 0

139

while(shift > 0) {

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

140

res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), "");

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

shift -= n;

n *= 2;

}

#endif

}

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

146

else

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

147

res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), "");

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

return res;

}

/**

José Fonseca

2009-08-22 22:26:55 +0100

[diff] [blame]

154

* Inverse of lp_build_clamped_float_to_unsigned_norm above.

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

155

*/

156

LLVMValueRef

157

lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,

158

unsigned src_width,

José Fonseca

2009-09-14 11:05:06 +0100

[diff] [blame]

159

struct lp_type dst_type,

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

160

LLVMValueRef src)

161

{

162

LLVMTypeRef vec_type = lp_build_vec_type(dst_type);

163

LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type);

LLVMValueRef bias_;

LLVMValueRef res;

unsigned mantissa;

unsigned n;

unsigned long long ubound;

169

unsigned long long mask;

double scale;

double bias;

mantissa = lp_mantissa(dst_type);

174

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

175

n = MIN2(mantissa, src_width);

176

177

ubound = ((unsigned long long)1 << n);

178

mask = ubound - 1;

179

scale = (double)ubound/mask;

180

bias = (double)((unsigned long long)1 << (mantissa - n));

res = src;

if(src_width > mantissa) {

185

int shift = src_width - mantissa;

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

186

res = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(dst_type, shift), "");

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

187

}

188

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

189

bias_ = lp_build_const_scalar(dst_type, bias);

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

190

191

res = LLVMBuildOr(builder,

192

res,

193

LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");

194

195

res = LLVMBuildBitCast(builder, res, vec_type, "");

196

197

res = LLVMBuildSub(builder, res, bias_, "");

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

198

res = LLVMBuildMul(builder, res, lp_build_const_scalar(dst_type, scale), "");

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

return res;

}

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

204

/**

José Fonseca

2009-08-22 22:26:55 +0100

[diff] [blame]

205

* Generic type conversion.

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

206

*

José Fonseca

2009-08-22 22:26:55 +0100

[diff] [blame]

207

* TODO: Take a precision argument, or even better, add a new precision member

208

* to the lp_type union.

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

209

*/

210

void

211

lp_build_conv(LLVMBuilderRef builder,

José Fonseca

2009-09-14 11:05:06 +0100

[diff] [blame]

212

struct lp_type src_type,

213

struct lp_type dst_type,

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

214

const LLVMValueRef *src, unsigned num_srcs,

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

215

LLVMValueRef *dst, unsigned num_dsts)

216

{

José Fonseca

2009-09-14 11:05:06 +0100

[diff] [blame]

217

struct lp_type tmp_type;

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

218

LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];

219

unsigned num_tmps;

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

220

unsigned i;

221

222

/* Register width must remain constant */

223

assert(src_type.width * src_type.length == dst_type.width * dst_type.length);

224

225

/* We must not loose or gain channels. Only precision */

226

assert(src_type.length * num_srcs == dst_type.length * num_dsts);

227

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

228

assert(src_type.length <= LP_MAX_VECTOR_LENGTH);

229

assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);

230

231

tmp_type = src_type;

232

for(i = 0; i < num_srcs; ++i)

tmp[i] = src[i];

num_tmps = num_srcs;

/*

* Clamp if necessary

*/

José Fonseca

2009-09-14 11:05:06 +0100

[diff] [blame]

240

if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) {

José Fonseca

2009-08-08 23:10:59 +0100

[diff] [blame]

241

struct lp_build_context bld;

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

242

double src_min = lp_const_min(src_type);

243

double dst_min = lp_const_min(dst_type);

244

double src_max = lp_const_max(src_type);

245

double dst_max = lp_const_max(dst_type);

246

LLVMValueRef thres;

247

José Fonseca

2009-08-08 23:10:59 +0100

[diff] [blame]

248

lp_build_context_init(&bld, builder, tmp_type);

249

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

250

if(src_min < dst_min) {

251

if(dst_min == 0.0)

252

thres = bld.zero;

253

else

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

254

thres = lp_build_const_scalar(src_type, dst_min);

José Fonseca

2009-08-08 23:10:59 +0100

[diff] [blame]

255

for(i = 0; i < num_tmps; ++i)

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

256

tmp[i] = lp_build_max(&bld, tmp[i], thres);

257

}

José Fonseca

2009-08-08 23:10:59 +0100

[diff] [blame]

258

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

259

if(src_max > dst_max) {

260

if(dst_max == 1.0)

261

thres = bld.one;

262

else

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

263

thres = lp_build_const_scalar(src_type, dst_max);

José Fonseca

2009-08-08 23:10:59 +0100

[diff] [blame]

264

for(i = 0; i < num_tmps; ++i)

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

265

tmp[i] = lp_build_min(&bld, tmp[i], thres);

266

}

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

267

}

268

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

269

/*

270

* Scale to the narrowest range

271

*/

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

272

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

273

if(dst_type.floating) {

274

/* Nothing to do */

275

}

276

else if(tmp_type.floating) {

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

277

if(!dst_type.fixed && !dst_type.sign && dst_type.norm) {

278

for(i = 0; i < num_tmps; ++i) {

279

tmp[i] = lp_build_clamped_float_to_unsigned_norm(builder,

tmp_type,

dst_type.width,

tmp[i]);

}

tmp_type.floating = FALSE;

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

285

}

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

286

else {

287

double dst_scale = lp_const_scale(dst_type);

288

LLVMTypeRef tmp_vec_type;

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

289

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

290

if (dst_scale != 1.0) {

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

291

LLVMValueRef scale = lp_build_const_scalar(tmp_type, dst_scale);

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

292

for(i = 0; i < num_tmps; ++i)

293

tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");

294

}

295

296

/* Use an equally sized integer for intermediate computations */

297

tmp_type.floating = FALSE;

298

tmp_vec_type = lp_build_vec_type(tmp_type);

299

for(i = 0; i < num_tmps; ++i) {

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

300

#if 0

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

301

if(dst_type.sign)

302

tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");

303

else

304

tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, "");

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

305

#else

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

306

/* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */

307

tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

308

#endif

José Fonseca

2009-08-21 07:35:49 +0100

[diff] [blame]

309

}

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

}

}

else {

unsigned src_shift = lp_const_shift(src_type);

314

unsigned dst_shift = lp_const_shift(dst_type);

315

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

316

/* FIXME: compensate different offsets too */

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

317

if(src_shift > dst_shift) {

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

318

LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, src_shift - dst_shift);

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

319

for(i = 0; i < num_tmps; ++i)

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

320

if(src_type.sign)

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

321

tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, "");

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

322

else

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

323

tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, "");

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

}

}

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

327

/*

328

* Truncate or expand bit width

329

*/

330

José Fonseca

2009-08-08 23:10:59 +0100

[diff] [blame]

331

assert(!tmp_type.floating || tmp_type.width == dst_type.width);

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

332

333

if(tmp_type.width > dst_type.width) {

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

334

assert(num_dsts == 1);

José Fonseca

d7aa114

2009-09-13 13:45:48 +0100

[diff] [blame]

335

tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

336

tmp_type.width = dst_type.width;

337

tmp_type.length = dst_type.length;

338

num_tmps = 1;

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

339

}

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

340

341

if(tmp_type.width < dst_type.width) {

342

assert(num_tmps == 1);

José Fonseca

421507d

2009-10-22 18:28:17 +0100

[diff] [blame]

343

lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

344

tmp_type.width = dst_type.width;

345

tmp_type.length = dst_type.length;

num_tmps = num_dsts;

}

assert(tmp_type.width == dst_type.width);

350

assert(tmp_type.length == dst_type.length);

351

assert(num_tmps == num_dsts);

352

353

/*

354

* Scale to the widest range

355

*/

356

357

if(src_type.floating) {

358

/* Nothing to do */

359

}

360

else if(!src_type.floating && dst_type.floating) {

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

361

if(!src_type.fixed && !src_type.sign && src_type.norm) {

362

for(i = 0; i < num_tmps; ++i) {

363

tmp[i] = lp_build_unsigned_norm_to_float(builder,

src_type.width,

dst_type,

tmp[i]);

}

tmp_type.floating = TRUE;

369

}

370

else {

371

double src_scale = lp_const_scale(src_type);

372

LLVMTypeRef tmp_vec_type;

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

373

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

374

/* Use an equally sized integer for intermediate computations */

375

tmp_type.floating = TRUE;

376

tmp_type.sign = TRUE;

377

tmp_vec_type = lp_build_vec_type(tmp_type);

378

for(i = 0; i < num_tmps; ++i) {

379

#if 0

380

if(dst_type.sign)

381

tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, "");

382

else

383

tmp[i] = LLVMBuildUIToFP(builder, tmp[i], tmp_vec_type, "");

384

#else

385

/* FIXME: there is no SSE counterpart for LLVMBuildUIToFP */

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

386

tmp[i] = LLVMBuildSIToFP(builder, tmp[i], tmp_vec_type, "");

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

387

#endif

388

}

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

389

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

390

if (src_scale != 1.0) {

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

391

LLVMValueRef scale = lp_build_const_scalar(tmp_type, 1.0/src_scale);

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

392

for(i = 0; i < num_tmps; ++i)

393

tmp[i] = LLVMBuildMul(builder, tmp[i], scale, "");

394

}

395

}

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

396

}

397

else {

398

unsigned src_shift = lp_const_shift(src_type);

399

unsigned dst_shift = lp_const_shift(dst_type);

400

401

/* FIXME: compensate different offsets too */

402

if(src_shift < dst_shift) {

José Fonseca

2009-08-22 22:30:03 +0100

[diff] [blame]

403

LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, dst_shift - src_shift);

José Fonseca

2009-08-07 14:34:13 +0100

[diff] [blame]

404

for(i = 0; i < num_tmps; ++i)

405

tmp[i] = LLVMBuildShl(builder, tmp[i], shift, "");

}

}

for(i = 0; i < num_dsts; ++i)

410

dst[i] = tmp[i];

José Fonseca

2009-08-07 09:51:48 +0100

[diff] [blame]

411

}

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

412

413

414

/**

José Fonseca

2009-08-22 22:26:55 +0100

[diff] [blame]

415

* Bit mask conversion.

416

*

417

* This will convert the integer masks that match the given types.

418

*

419

* The mask values should 0 or -1, i.e., all bits either set to zero or one.

420

* Any other value will likely cause in unpredictable results.

421

*

422

* This is basically a very trimmed down version of lp_build_conv.

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

423

*/

424

void

425

lp_build_conv_mask(LLVMBuilderRef builder,

José Fonseca

2009-09-14 11:05:06 +0100

[diff] [blame]

426

struct lp_type src_type,

427

struct lp_type dst_type,

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

428

const LLVMValueRef *src, unsigned num_srcs,

429

LLVMValueRef *dst, unsigned num_dsts)

430

{

431

/* Register width must remain constant */

432

assert(src_type.width * src_type.length == dst_type.width * dst_type.length);

433

434

/* We must not loose or gain channels. Only precision */

435

assert(src_type.length * num_srcs == dst_type.length * num_dsts);

436

José Fonseca

2009-08-22 16:04:21 +0100

[diff] [blame]

437

/*

José Fonseca

2009-08-22 22:26:55 +0100

[diff] [blame]

438

* Drop

439

*

José Fonseca

2009-08-22 16:04:21 +0100

[diff] [blame]

440

* We assume all values are 0 or -1

441

*/

442

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

443

src_type.floating = FALSE;

444

src_type.fixed = FALSE;

José Fonseca

2009-08-22 16:04:21 +0100

[diff] [blame]

445

src_type.sign = TRUE;

446

src_type.norm = FALSE;

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

447

448

dst_type.floating = FALSE;

449

dst_type.fixed = FALSE;

José Fonseca

2009-08-22 16:04:21 +0100

[diff] [blame]

450

dst_type.sign = TRUE;

451

dst_type.norm = FALSE;

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

452

453

/*

454

* Truncate or expand bit width

455

*/

456

457

if(src_type.width > dst_type.width) {

458

assert(num_dsts == 1);

José Fonseca

d7aa114

2009-09-13 13:45:48 +0100

[diff] [blame]

459

dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);

José Fonseca

2009-08-22 12:37:12 +0100

[diff] [blame]

460

}

461

else if(src_type.width < dst_type.width) {

462

assert(num_srcs == 1);

José Fonseca

421507d

2009-10-22 18:28:17 +0100

[diff] [blame]

463

lp_build_unpack(builder, src_type, dst_type, src[0], dst, num_dsts);

José Fonseca