Blame - Eigen/src/Core/Assign_MKL.h - fp2-dev/platform/external/eigen

Narayan Kamath

c981c48

2012-11-02 10:59:05 +0000

[diff] [blame]

/*

Redistribution and use in source and binary forms, with or without modification,

5

are permitted provided that the following conditions are met:

6

7

* Redistributions of source code must retain the above copyright notice, this

8

list of conditions and the following disclaimer.

9

* Redistributions in binary form must reproduce the above copyright notice,

10

this list of conditions and the following disclaimer in the documentation

11

and/or other materials provided with the distribution.

12

* Neither the name of Intel Corporation nor the names of its contributors may

13

be used to endorse or promote products derived from this software without

14

specific prior written permission.

15

16

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND

17

ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

18

WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

19

DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR

20

ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

21

(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

22

LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON

23

ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

24

(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

25

SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

26

27

********************************************************************************

28

* Content : Eigen bindings to Intel(R) MKL

29

* MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()

30

********************************************************************************

31

*/

32

33

#ifndef EIGEN_ASSIGN_VML_H

34

#define EIGEN_ASSIGN_VML_H

namespace Eigen {

namespace internal {

template<typename Op> struct vml_call

41

{ enum { IsSupported = 0 }; };

42

43

template<typename Dst, typename Src, typename UnaryOp>

44

class vml_assign_traits

{

private:

enum {

DstHasDirectAccess = Dst::Flags & DirectAccessBit,

49

SrcHasDirectAccess = Src::Flags & DirectAccessBit,

50

51

StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),

52

InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)

53

: int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)

54

: int(Dst::RowsAtCompileTime),

55

InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)

56

: int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)

57

: int(Dst::MaxRowsAtCompileTime),

58

MaxSizeAtCompileTime = Dst::SizeAtCompileTime,

59

60

MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess

61

&& Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,

62

MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),

63

VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,

64

LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,

65

MayEnableVml = MightEnableVml && LargeEnough,

66

MayLinearize = MayEnableVml && MightLinearize

};

public:

enum {

Traversal = MayLinearize ? LinearVectorizedTraversal

71

: MayEnableVml ? InnerVectorizedTraversal

: DefaultTraversal

};

};

template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,

77

int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >

78

struct vml_assign_impl

79

: assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>

{

};

template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>

84

struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>

85

{

86

typedef typename Derived1::Scalar Scalar;

87

typedef typename Derived1::Index Index;

88

static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)

89

{

90

// in case we want to (or have to) skip VML at runtime we can call:

91

// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);

92

const Index innerSize = dst.innerSize();

93

const Index outerSize = dst.outerSize();

94

for(Index outer = 0; outer < outerSize; ++outer) {

95

const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :

96

&(src.nestedExpression().coeffRef(0, outer));

97

Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));

98

vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );

}

}

};

template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>

104

struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>

105

{

106

static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)

107

{

108

// in case we want to (or have to) skip VML at runtime we can call:

109

// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);

110

vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );

}

};

// Macroses

#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \

117

template<typename Derived1, typename Derived2, typename UnaryOp> \

118

struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \

119

static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \

120

vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \

} \

};

EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)

125

EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)

126

EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)

127

EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)

128

EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)

129

EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)

130

EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)

131

EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)

132

EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)

133

EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)

134

EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)

135

136

137

#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)

138

#define EIGEN_MKL_VML_MODE VML_HA

139

#else

140

#define EIGEN_MKL_VML_MODE VML_LA

141

#endif

142

143

#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \

144

template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \

145

enum { IsSupported = 1 }; \

146

static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \

147

int size, const EIGENTYPE* src, EIGENTYPE* dst) { \

148

VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \

} \

};

#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \

153

template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \

154

enum { IsSupported = 1 }; \

155

static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \

156

int size, const EIGENTYPE* src, EIGENTYPE* dst) { \

157

MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \

158

VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \

} \

};

#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \

163

template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \

164

enum { IsSupported = 1 }; \

165

static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \

166

int size, const EIGENTYPE* src, EIGENTYPE* dst) { \

167

EIGENTYPE exponent = func.m_exponent; \

168

MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \

169

VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \

170

(VMLTYPE*)dst, &vmlMode); \

} \

};

#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \

175

EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \

176

EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)

177

178

#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \

179

EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \

180

EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)

181

182

#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \

183

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \

184

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)

185

186

187

#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \

188

EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \

189

EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)

190

191

#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \

192

EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \

193

EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)

194

195

#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \

196

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \

197

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)

198

199

200

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)

201

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)

202

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)

203

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)

204

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)

205

//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)

206

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)

207

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)

208

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)

209

210

EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)

211

212

// The vm*powx functions are not avaibale in the windows version of MKL.

Carlos Hernandez

7faaa9f

2014-08-05 17:53:32 -0700

[diff] [blame]

213

#ifndef _WIN32

Narayan Kamath

c981c48

2012-11-02 10:59:05 +0000

[diff] [blame]

214

EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)

215

EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)

216

EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)

217

EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)

218

#endif

219

220

} // end namespace internal

221

222

} // end namespace Eigen

223

224

#endif // EIGEN_ASSIGN_VML_H