Add BLAS to supported intrinsics.
Change-Id: I8e776b2ffdbac09a73924035eee2eca0a12facb3
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk
index 6599932..7c63c95 100644
--- a/cpu_ref/Android.mk
+++ b/cpu_ref/Android.mk
@@ -28,6 +28,7 @@
rsCpuScriptGroup2.cpp \
rsCpuIntrinsic.cpp \
rsCpuIntrinsic3DLUT.cpp \
+ rsCpuIntrinsicBLAS.cpp \
rsCpuIntrinsicBlend.cpp \
rsCpuIntrinsicBlur.cpp \
rsCpuIntrinsicColorMatrix.cpp \
@@ -82,12 +83,12 @@
LOCAL_SHARED_LIBRARIES += libRS libcutils libutils liblog libsync libc++ libdl
-# these are not supported in 64-bit yet
-LOCAL_SHARED_LIBRARIES += libbcc libbcinfo
+LOCAL_SHARED_LIBRARIES += libbcc libbcinfo libblas
LOCAL_C_INCLUDES += frameworks/compile/libbcc/include
LOCAL_C_INCLUDES += frameworks/rs
+LOCAL_C_INCLUDES += external/cblas/include
ifneq ($(HOST_OS),windows)
include external/libcxx/libcxx.mk
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 0ec7b28..2492c22 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -642,6 +642,8 @@
const Script *s, const Element *e);
extern RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx,
const Script *s, const Element *e);
+extern RsdCpuScriptImpl * rsdIntrinsic_BLAS(RsdCpuReferenceImpl *ctx,
+ const Script *s, const Element *e);
RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script *s,
RsScriptIntrinsicID iid, Element *e) {
@@ -678,6 +680,11 @@
case RS_SCRIPT_INTRINSIC_ID_RESIZE:
i = rsdIntrinsic_Resize(this, s, e);
break;
+#if !defined(RS_COMPATIBILITY_LIB)
+ case RS_SCRIPT_INTRINSIC_ID_BLAS:
+ i = rsdIntrinsic_BLAS(this, s, e);
+ break;
+#endif
default:
rsAssert(0);
diff --git a/cpu_ref/rsCpuIntrinsicBLAS.cpp b/cpu_ref/rsCpuIntrinsicBLAS.cpp
new file mode 100644
index 0000000..486eed8
--- /dev/null
+++ b/cpu_ref/rsCpuIntrinsicBLAS.cpp
@@ -0,0 +1,653 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "rsCpuIntrinsic.h"
+#include "rsCpuIntrinsicInlines.h"
+#include "cblas.h"
+
+using namespace android;
+using namespace android::renderscript;
+
+namespace android {
+namespace renderscript {
+
+
+class RsdCpuScriptIntrinsicBLAS : public RsdCpuScriptIntrinsic {
+public:
+ virtual void invokeForEach(uint32_t slot,
+ const Allocation ** ain,
+ uint32_t inLen,
+ Allocation * aout,
+ const void * usr,
+ uint32_t usrLen,
+ const RsScriptCall *sc);
+
+ virtual void populateScript(Script *);
+ virtual ~RsdCpuScriptIntrinsicBLAS();
+ RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx, const Script *s);
+
+protected:
+
+
+};
+
+}
+}
+
+void RsdCpuScriptIntrinsicBLAS::populateScript(Script *s) {
+ s->mHal.info.exportedVariableCount = 0;
+}
+
+static void initABC(const Allocation ** ain,
+ size_t size,
+ void** A,
+ void** B,
+ void** C,
+ int* lda,
+ int* ldb,
+ int* ldc)
+{
+ if (ain[0]) {
+ *A = ain[0]->mHal.drvState.lod[0].mallocPtr;
+ *lda = (int)(ain[0]->mHal.drvState.lod[0].stride/size);
+ }
+ if (ain[1]) {
+ *B = ain[1]->mHal.drvState.lod[0].mallocPtr;
+ *ldb = (int)(ain[1]->mHal.drvState.lod[0].stride/size);
+ }
+ if (ain[2]) {
+ *C = ain[2]->mHal.drvState.lod[0].mallocPtr;
+ *ldc = (int)(ain[2]->mHal.drvState.lod[0].stride/size);
+ }
+
+
+}
+
+void RsdCpuScriptIntrinsicBLAS::invokeForEach(uint32_t slot,
+ const Allocation ** ain,
+ uint32_t inLen,
+ Allocation * aout,
+ const void * usr,
+ uint32_t usrLen,
+ const RsScriptCall *sc) {
+ RsBlasCall* call = (RsBlasCall*) usr;
+ // setup BLAS enum args
+ enum CBLAS_TRANSPOSE TransA = (enum CBLAS_TRANSPOSE)call->transA;
+ enum CBLAS_TRANSPOSE TransB = (enum CBLAS_TRANSPOSE)call->transB;
+ enum CBLAS_UPLO Uplo = (enum CBLAS_UPLO)call->uplo;
+ enum CBLAS_DIAG Diag = (enum CBLAS_DIAG)call->diag;
+ enum CBLAS_SIDE Side = (enum CBLAS_SIDE)call->side;
+
+ void *A = nullptr;
+ void *B = nullptr;
+ void *C = nullptr;
+ void *X = nullptr;
+ void *Y = nullptr;
+
+ int lda = 0, ldb = 0, ldc = 0;
+
+ switch (call->func) {
+
+ // Level 1 BLAS: returns into a 1D Allocation
+
+
+ // Level 2 BLAS
+ case (RsBlas_sgemv):
+ initABC(ain, sizeof(float), &A, &X, &C, &lda, &ldb, &ldc);
+ cblas_sgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.f, (float*)A,
+ lda, (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
+ break;
+ case (RsBlas_sgbmv):
+ initABC(ain, sizeof(float), &A, &X, &C, &lda, &ldb, &ldc);
+ cblas_sgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
+ call->alpha.f, (float*)A, lda, (float*)X, call->incX,
+ call->beta.f, (float*)Y, call->incY);
+ break;
+ case (RsBlas_strmv):
+ initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_strmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
+ lda, (float*)X, call->incX);
+ break;
+ case (RsBlas_stbmv):
+ initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_stbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A,
+ lda, (float*)X, call->incX);
+ break;
+ // stpmv takes a packed 1D Allocation only
+ case (RsBlas_stpmv):
+ initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_stpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
+ (float*)X, call->incX);
+ break;
+ case (RsBlas_strsv):
+ initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_strsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A, lda,
+ (float*)X, call->incX);
+ break;
+ case (RsBlas_stbsv):
+ initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_stbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (float*)A,
+ lda, (float*)X, call->incX);
+ break;
+ case (RsBlas_stpsv):
+ initABC(ain, sizeof(float), &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_stpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (float*)A,
+ (float*)X, call->incX);
+ break;
+ case (RsBlas_dgemv):
+ initABC(ain, sizeof(double), &A, &X, &C, &lda, &ldb, &ldc);
+ cblas_dgemv(CblasRowMajor, TransA, call->M, call->N, call->alpha.d, (double*)A,
+ lda, (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
+ break;
+ case (RsBlas_dgbmv):
+ initABC(ain, sizeof(double), &A, &X, &C, &lda, &ldb, &ldc);
+ cblas_dgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
+ call->alpha.d, (double*)A, lda, (double*)X, call->incX,
+ call->beta.d, (double*)Y, call->incY);
+ break;
+ case (RsBlas_dtrmv):
+ initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_dtrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
+ lda, (double*)X, call->incX);
+ break;
+ case (RsBlas_dtbmv):
+ initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_dtbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A,
+ lda, (double*)X, call->incX);
+ break;
+ // stpmv takes a packed 1D Allocation only
+ case (RsBlas_dtpmv):
+ initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_dtpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
+ (double*)X, call->incX);
+ break;
+ case (RsBlas_dtrsv):
+ initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_dtrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A, lda,
+ (double*)X, call->incX);
+ break;
+ case (RsBlas_dtbsv):
+ initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_dtbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (double*)A,
+ lda, (double*)X, call->incX);
+ break;
+ case (RsBlas_dtpsv):
+ initABC(ain, sizeof(double), &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_dtpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (double*)A,
+ (double*)X, call->incX);
+ break;
+ case (RsBlas_cgemv):
+ initABC(ain, sizeof(float)*2, &A, &X, &C, &lda, &ldb, &ldc);
+ cblas_cgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.c, (void*)A,
+ lda, (void*)X, call->incX, (void*)&call->beta.c, (void*)Y, call->incY);
+ break;
+ case (RsBlas_cgbmv):
+ initABC(ain, sizeof(float)*2, &A, &X, &C, &lda, &ldb, &ldc);
+ cblas_cgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
+ (void*)&call->alpha.c, (void*)A, lda, (void*)X, call->incX,
+ (void*)&call->beta.c, (void*)Y, call->incY);
+ break;
+ case (RsBlas_ctrmv):
+ initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_ctrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
+ lda, (void*)X, call->incX);
+ break;
+ case (RsBlas_ctbmv):
+ initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_ctbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
+ lda, (void*)X, call->incX);
+ break;
+ // stpmv takes a packed 1D Allocation only
+ case (RsBlas_ctpmv):
+ initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_ctpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
+ (void*)X, call->incX);
+ break;
+ case (RsBlas_ctrsv):
+ initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_ctrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda,
+ (void*)X, call->incX);
+ break;
+ case (RsBlas_ctbsv):
+ initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_ctbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
+ lda, (void*)X, call->incX);
+ break;
+ case (RsBlas_ctpsv):
+ initABC(ain, sizeof(float)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_ctpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
+ (void*)X, call->incX);
+ break;
+ case (RsBlas_zgemv):
+ initABC(ain, sizeof(double)*2, &A, &X, &C, &lda, &ldb, &ldc);
+ cblas_zgemv(CblasRowMajor, TransA, call->M, call->N, (void*)&call->alpha.z, (void*)A,
+ lda, (void*)X, call->incX, (void*)&call->beta.z, (void*)Y, call->incY);
+ break;
+ case (RsBlas_zgbmv):
+ initABC(ain, sizeof(double)*2, &A, &X, &C, &lda, &ldb, &ldc);
+ cblas_zgbmv(CblasRowMajor, TransA, call->M, call->N, call->KL, call->KU,
+ (void*)&call->alpha.z, (void*)A, lda, (void*)X, call->incX,
+ (void*)&call->beta.z, (void*)Y, call->incY);
+ break;
+ case (RsBlas_ztrmv):
+ initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_ztrmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
+ lda, (void*)X, call->incX);
+ break;
+ case (RsBlas_ztbmv):
+ initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_ztbmv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
+ lda, (void*)X, call->incX);
+ break;
+ // stpmv takes a packed 1D Allocation only
+ case (RsBlas_ztpmv):
+ initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_ztpmv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
+ (void*)X, call->incX);
+ break;
+ case (RsBlas_ztrsv):
+ initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_ztrsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A, lda,
+ (void*)X, call->incX);
+ break;
+ case (RsBlas_ztbsv):
+ initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_ztbsv(CblasRowMajor, Uplo, TransA, Diag, call->N, call->K, (void*)A,
+ lda, (void*)X, call->incX);
+ break;
+ case (RsBlas_ztpsv):
+ initABC(ain, sizeof(double)*2, &A, &X, nullptr, &lda, &ldb, nullptr);
+ cblas_ztpsv(CblasRowMajor, Uplo, TransA, Diag, call->N, (void*)A,
+ (void*)X, call->incX);
+ break;
+
+
+ // S and D only
+ case (RsBlas_ssymv):
+ initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
+ cblas_ssymv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A, lda,
+ (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
+ break;
+ case (RsBlas_ssbmv):
+ initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
+ cblas_ssbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.f,
+ (float*)A, lda, (float*)X, call->incX, call->beta.f,
+ (float*)Y, call->incY);
+ break;
+ //sspmv requires a packed 1D Allocation
+ case (RsBlas_sspmv):
+ initABC(ain, sizeof(float), &A, &X, &Y, &lda, &ldb, &ldc);
+ cblas_sspmv(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)A,
+ (float*)X, call->incX, call->beta.f, (float*)Y, call->incY);
+ break;
+ // following calls have init reordered because A is output matrix
+ case (RsBlas_sger):
+ initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_sger(CblasRowMajor, call->M, call->N, call->alpha.f, (float*)X,
+ call->incX, (float*)Y, call->incY, (float*)A, lda);
+ break;
+ case (RsBlas_ssyr):
+ initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr);
+ cblas_ssyr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
+ (float*)A, lda);
+ break;
+ // sspr is packed 1D Allocation A only
+ case (RsBlas_sspr):
+ initABC(ain, sizeof(float), &X, &A, nullptr, &ldb, &lda, nullptr);
+ cblas_sspr(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
+ (float*)A);
+ break;
+ case (RsBlas_ssyr2):
+ initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_ssyr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
+ (float*)Y, call->incY, (float*)A, lda);
+ break;
+ // sspr2 is packed 1D Allocation A only
+ case (RsBlas_sspr2):
+ initABC(ain, sizeof(float), &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_sspr2(CblasRowMajor, Uplo, call->N, call->alpha.f, (float*)X, call->incX,
+ (float*)Y, call->incY, (float*)A);
+ break;
+ case (RsBlas_dsymv):
+ initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
+ cblas_dsymv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A, lda,
+ (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
+ break;
+ case (RsBlas_dsbmv):
+ initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
+ cblas_dsbmv(CblasRowMajor, Uplo, call->N, call->K, call->alpha.d,
+ (double*)A, lda, (double*)X, call->incX, call->beta.d,
+ (double*)Y, call->incY);
+ break;
+ // dspmv requires a packed 1D Allocation
+ case (RsBlas_dspmv):
+ initABC(ain, sizeof(double), &A, &X, &Y, &lda, &ldb, &ldc);
+ cblas_dspmv(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)A,
+ (double*)X, call->incX, call->beta.d, (double*)Y, call->incY);
+ break;
+ // following calls have init reordered because A is output matrix
+ case (RsBlas_dger):
+ initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_dger(CblasRowMajor, call->M, call->N, call->alpha.d, (double*)X,
+ call->incX, (double*)Y, call->incY, (double*)A, lda);
+ break;
+ case (RsBlas_dsyr):
+ initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr);
+ cblas_dsyr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
+ (double*)A, lda);
+ break;
+ // dspr is packed 1D Allocation A only
+ case (RsBlas_dspr):
+ initABC(ain, sizeof(double), &X, &A, nullptr, &ldb, &lda, nullptr);
+ cblas_dspr(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
+ (double*)A);
+ break;
+ case (RsBlas_dsyr2):
+ initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_dsyr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
+ (double*)Y, call->incY, (double*)A, lda);
+ break;
+ // dspr2 is packed 1D Allocation A only
+ case (RsBlas_dspr2):
+ initABC(ain, sizeof(double), &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_dspr2(CblasRowMajor, Uplo, call->N, call->alpha.d, (double*)X, call->incX,
+ (double*)Y, call->incY, (double*)A);
+ break;
+
+ // C and Z only
+ case (RsBlas_chemv):
+ initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
+ cblas_chemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A, lda,
+ X, call->incX, (void*)&call->beta.c, Y, call->incY);
+ break;
+ case (RsBlas_chbmv):
+ initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
+ cblas_chbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.c,
+ A, lda, X, call->incX, (void*)&call->beta.c, Y, call->incY);
+ break;
+ case (RsBlas_chpmv):
+ initABC(ain, sizeof(float)*2, &A, &X, &Y, &lda, &ldb, &ldc);
+ cblas_chpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, A,
+ X, call->incX, (void*)&call->beta.c, Y, call->incY);
+ break;
+ case (RsBlas_cgeru):
+ initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_cgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c,
+ X, call->incX, Y, call->incY, A, lda);
+ break;
+ case (RsBlas_cgerc):
+ initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_cgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.c,
+ X, call->incX, Y, call->incY, A, lda);
+ break;
+ case (RsBlas_cher):
+ initABC(ain, sizeof(float)*2, &X, &A, nullptr, &ldb, &lda, nullptr);
+ cblas_cher(CblasRowMajor, Uplo, call->N, call->alpha.f,
+ X, call->incX, A, lda);
+ break;
+ // packed 1D Allocations only
+ case (RsBlas_chpr):
+ initABC(ain, sizeof(float)*2, &X, &A, nullptr, &ldb, &lda, nullptr);
+ cblas_chpr(CblasRowMajor, Uplo, call->N, call->alpha.f, X,
+ call->incX, A);
+ break;
+ case (RsBlas_cher2):
+ initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_cher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c,
+ X, call->incX, Y, call->incY, A, lda);
+ break;
+ // packed 1D Allocations only
+ case (RsBlas_chpr2):
+ initABC(ain, sizeof(float)*2, &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_chpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.c, X,
+ call->incX, Y, call->incY, A);
+ break;
+ case (RsBlas_zhemv):
+ initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
+ cblas_zhemv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A, lda,
+ X, call->incX, (void*)&call->beta.z, Y, call->incY);
+ break;
+ case (RsBlas_zhbmv):
+ initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
+ cblas_zhbmv(CblasRowMajor, Uplo, call->N, call->K, (void*)&call->alpha.z,
+ A, lda, X, call->incX, (void*)&call->beta.z, Y, call->incY);
+ break;
+ case (RsBlas_zhpmv):
+ initABC(ain, sizeof(double)*2, &A, &X, &Y, &lda, &ldb, &ldc);
+ cblas_zhpmv(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, A,
+ X, call->incX, (void*)&call->beta.z, Y, call->incY);
+ break;
+ case (RsBlas_zgeru):
+ initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_zgeru(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z,
+ X, call->incX, Y, call->incY, A, lda);
+ break;
+ case (RsBlas_zgerc):
+ initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_zgerc(CblasRowMajor, call->M, call->N, (void*)&call->alpha.z,
+ X, call->incX, Y, call->incY, A, lda);
+ break;
+ case (RsBlas_zher):
+ initABC(ain, sizeof(double)*2, &X, &A, nullptr, &ldb, &lda, nullptr);
+ cblas_zher(CblasRowMajor, Uplo, call->N, call->alpha.d,
+ X, call->incX, A, lda);
+ break;
+ // packed 1D Allocations only
+ case (RsBlas_zhpr):
+ initABC(ain, sizeof(double)*2, &X, &A, nullptr, &ldb, &lda, nullptr);
+ cblas_zhpr(CblasRowMajor, Uplo, call->N, call->alpha.d, X,
+ call->incX, A);
+ break;
+ case (RsBlas_zher2):
+ initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_zher2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z,
+ X, call->incX, Y, call->incY, A, lda);
+ break;
+ // packed 1D Allocations only
+ case (RsBlas_zhpr2):
+ initABC(ain, sizeof(double)*2, &X, &Y, &A, &ldb, &ldc, &lda);
+ cblas_zhpr2(CblasRowMajor, Uplo, call->N, (void*)&call->alpha.z, X,
+ call->incX, Y, call->incY, A);
+ break;
+
+ // Level 3 BLAS
+ case (RsBlas_sgemm):
+ initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
+ ALOGE("call->M = %d, call->N = %d, call->K = %d, lda = %d, ldb = %d, ldc = %d", call->M, call->N, call->K, lda, ldb, ldc);
+ cblas_sgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.f,
+ (float*)A, lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
+ break;
+ case (RsBlas_ssymm):
+ initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_ssymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.f, (float*)A,
+ lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
+ break;
+ case (RsBlas_ssyrk):
+ initABC(ain, sizeof(float), &A, nullptr, &C, &lda, nullptr, &ldc);
+ cblas_ssyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A,
+ lda, call->beta.f, (float*)C, ldc);
+ break;
+ case (RsBlas_ssyr2k):
+ initABC(ain, sizeof(float), &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_ssyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, (float*)A,
+ lda, (float*)B, ldb, call->beta.f, (float*)C, ldc);
+ break;
+ case (RsBlas_strmm):
+ initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr);
+ cblas_strmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f,
+ (float*)A, lda, (float*)B, ldb);
+ break;
+ case (RsBlas_strsm):
+ initABC(ain, sizeof(float), &A, &B, nullptr, &lda, &ldb, nullptr);
+ cblas_strsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.f,
+ (float*)A, lda, (float*)B, ldb);
+ break;
+
+
+ case (RsBlas_dgemm):
+ initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_dgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, call->alpha.d,
+ (double*)A, lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
+ break;
+ case (RsBlas_dsymm):
+ initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_dsymm(CblasRowMajor, Side, Uplo, call->M, call->N, call->alpha.d, (double*)A,
+ lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
+ break;
+ case (RsBlas_dsyrk):
+ initABC(ain, sizeof(double), &A, nullptr, &C, &lda, nullptr, &ldc);
+ cblas_dsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A,
+ lda, call->beta.d, (double*)C, ldc);
+ break;
+ case (RsBlas_dsyr2k):
+ initABC(ain, sizeof(double), &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_dsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, (double*)A,
+ lda, (double*)B, ldb, call->beta.d, (double*)C, ldc);
+ break;
+ case (RsBlas_dtrmm):
+ initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr);
+ cblas_dtrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d,
+ (double*)A, lda, (double*)B, ldb);
+ break;
+ case (RsBlas_dtrsm):
+ initABC(ain, sizeof(double), &A, &B, nullptr, &lda, &ldb, nullptr);
+ cblas_dtrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, call->alpha.d,
+ (double*)A, lda, (double*)B, ldb);
+ break;
+
+ case (RsBlas_cgemm):
+ initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_cgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.c,
+ A, lda, B, ldb, (void*)&call->beta.c, C, ldc);
+ break;
+ case (RsBlas_csymm):
+ initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_csymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A,
+ lda, B, ldb, (void*)&call->beta.c, C, ldc);
+ break;
+ case (RsBlas_csyrk):
+ initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
+ cblas_csyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A,
+ lda, (void*)&call->beta.c, C, ldc);
+ break;
+ case (RsBlas_csyr2k):
+ initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_csyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A,
+ lda, B, ldb, (void*)&call->beta.c, C, ldc);
+ break;
+ case (RsBlas_ctrmm):
+ initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
+ cblas_ctrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c,
+ A, lda, B, ldb);
+ break;
+ case (RsBlas_ctrsm):
+ initABC(ain, sizeof(float)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
+ cblas_ctrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.c,
+ A, lda, B, ldb);
+ break;
+
+ case (RsBlas_zgemm):
+ initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_zgemm(CblasRowMajor, TransA, TransB, call->M, call->N, call->K, (void*)&call->alpha.z,
+ A, lda, B, ldb, (void*)&call->beta.z, C, ldc);
+ break;
+ case (RsBlas_zsymm):
+ initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_zsymm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A,
+ lda, B, ldb, (void*)&call->beta.z, C, ldc);
+ break;
+ case (RsBlas_zsyrk):
+ initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
+ cblas_zsyrk(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A,
+ lda, (void*)&call->beta.z, C, ldc);
+ break;
+ case (RsBlas_zsyr2k):
+ initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_zsyr2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A,
+ lda, B, ldb, (void*)&call->beta.z, C, ldc);
+ break;
+ case (RsBlas_ztrmm):
+ initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
+ cblas_ztrmm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z,
+ A, lda, B, ldb);
+ break;
+ case (RsBlas_ztrsm):
+ initABC(ain, sizeof(double)*2, &A, &B, nullptr, &lda, &ldb, nullptr);
+ cblas_ztrsm(CblasRowMajor, Side, Uplo, TransA, Diag, call->M, call->N, (void*)&call->alpha.z,
+ A, lda, B, ldb);
+ break;
+
+ // Level 3 C and Z only
+ case (RsBlas_chemm):
+ initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_chemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.c, A, lda,
+ B, ldb, (void*)&call->beta.c, C, ldc);
+ break;
+ case (RsBlas_cherk):
+ initABC(ain, sizeof(float)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
+ cblas_cherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.f, A, lda,
+ call->beta.f, C, ldc);
+ break;
+ case (RsBlas_cher2k):
+ initABC(ain, sizeof(float)*2, &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_cher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.c, A, lda,
+ B, ldb, call->beta.f, C, ldc);
+ break;
+
+ case (RsBlas_zhemm):
+ initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_zhemm(CblasRowMajor, Side, Uplo, call->M, call->N, (void*)&call->alpha.z, A, lda,
+ B, ldb, (void*)&call->beta.z, C, ldc);
+ break;
+ case (RsBlas_zherk):
+ initABC(ain, sizeof(double)*2, &A, nullptr, &C, &lda, nullptr, &ldc);
+ cblas_zherk(CblasRowMajor, Uplo, TransA, call->N, call->K, call->alpha.d, A, lda,
+ call->beta.d, C, ldc);
+ break;
+ case (RsBlas_zher2k):
+ initABC(ain, sizeof(double)*2, &A, &B, &C, &lda, &ldb, &ldc);
+ cblas_zher2k(CblasRowMajor, Uplo, TransA, call->N, call->K, (void*)&call->alpha.z, A, lda,
+ B, ldb, call->beta.d, C, ldc);
+ break;
+
+ default:
+ ALOGE("unimplemented\n");
+ }
+
+
+}
+
+
+RsdCpuScriptIntrinsicBLAS::RsdCpuScriptIntrinsicBLAS(RsdCpuReferenceImpl *ctx,
+ const Script *s)
+ : RsdCpuScriptIntrinsic(ctx, s, nullptr, RS_SCRIPT_INTRINSIC_ID_BLAS) {
+
+
+}
+
+RsdCpuScriptIntrinsicBLAS::~RsdCpuScriptIntrinsicBLAS() {
+}
+
+
+
+
+
+RsdCpuScriptImpl * rsdIntrinsic_BLAS(RsdCpuReferenceImpl *ctx,
+ const Script *s, const Element *e) {
+
+ return new RsdCpuScriptIntrinsicBLAS(ctx, s);
+}
diff --git a/rsDefines.h b/rsDefines.h
index 69a62d6..4ccdeb8 100644
--- a/rsDefines.h
+++ b/rsDefines.h
@@ -254,7 +254,209 @@
RS_CONTEXT_LOW_POWER = 0x0004
};
+enum RsBlasTranspose {
+ RsBlasNoTrans=111,
+ RsBlasTrans=112,
+ RsBlasConjTrans=113
+};
+enum RsBlasUplo {
+ RsBlasUpper=121,
+ RsBlasLower=122
+};
+
+enum RsBlasDiag {
+ RsBlasNonUnit=131,
+ RsBlasUnit=132
+};
+
+enum RsBlasSide {
+ RsBlasLeft=141,
+ RsBlasRight=142
+};
+
+enum RsBlasFunction {
+ RsBlas_nop = 0,
+ RsBlas_sdsdot,
+ RsBlas_dsdot,
+ RsBlas_sdot,
+ RsBlas_ddot,
+ RsBlas_cdotu_sub,
+ RsBlas_cdotc_sub,
+ RsBlas_zdotu_sub,
+ RsBlas_zdotc_sub,
+ RsBlas_snrm2,
+ RsBlas_sasum,
+ RsBlas_dnrm2,
+ RsBlas_dasum,
+ RsBlas_scnrm2,
+ RsBlas_scasum,
+ RsBlas_dznrm2,
+ RsBlas_dzasum,
+ RsBlas_isamax,
+ RsBlas_idamax,
+ RsBlas_icamax,
+ RsBlas_izamax,
+ RsBlas_sswap,
+ RsBlas_scopy,
+ RsBlas_saxpy,
+ RsBlas_dswap,
+ RsBlas_dcopy,
+ RsBlas_daxpy,
+ RsBlas_cswap,
+ RsBlas_ccopy,
+ RsBlas_caxpy,
+ RsBlas_zswap,
+ RsBlas_zcopy,
+ RsBlas_zaxpy,
+ RsBlas_srotg,
+ RsBlas_srotmg,
+ RsBlas_srot,
+ RsBlas_srotm,
+ RsBlas_drotg,
+ RsBlas_drotmg,
+ RsBlas_drot,
+ RsBlas_drotm,
+ RsBlas_sscal,
+ RsBlas_dscal,
+ RsBlas_cscal,
+ RsBlas_zscal,
+ RsBlas_csscal,
+ RsBlas_zdscal,
+ RsBlas_sgemv,
+ RsBlas_sgbmv,
+ RsBlas_strmv,
+ RsBlas_stbmv,
+ RsBlas_stpmv,
+ RsBlas_strsv,
+ RsBlas_stbsv,
+ RsBlas_stpsv,
+ RsBlas_dgemv,
+ RsBlas_dgbmv,
+ RsBlas_dtrmv,
+ RsBlas_dtbmv,
+ RsBlas_dtpmv,
+ RsBlas_dtrsv,
+ RsBlas_dtbsv,
+ RsBlas_dtpsv,
+ RsBlas_cgemv,
+ RsBlas_cgbmv,
+ RsBlas_ctrmv,
+ RsBlas_ctbmv,
+ RsBlas_ctpmv,
+ RsBlas_ctrsv,
+ RsBlas_ctbsv,
+ RsBlas_ctpsv,
+ RsBlas_zgemv,
+ RsBlas_zgbmv,
+ RsBlas_ztrmv,
+ RsBlas_ztbmv,
+ RsBlas_ztpmv,
+ RsBlas_ztrsv,
+ RsBlas_ztbsv,
+ RsBlas_ztpsv,
+ RsBlas_ssymv,
+ RsBlas_ssbmv,
+ RsBlas_sspmv,
+ RsBlas_sger,
+ RsBlas_ssyr,
+ RsBlas_sspr,
+ RsBlas_ssyr2,
+ RsBlas_sspr2,
+ RsBlas_dsymv,
+ RsBlas_dsbmv,
+ RsBlas_dspmv,
+ RsBlas_dger,
+ RsBlas_dsyr,
+ RsBlas_dspr,
+ RsBlas_dsyr2,
+ RsBlas_dspr2,
+ RsBlas_chemv,
+ RsBlas_chbmv,
+ RsBlas_chpmv,
+ RsBlas_cgeru,
+ RsBlas_cgerc,
+ RsBlas_cher,
+ RsBlas_chpr,
+ RsBlas_cher2,
+ RsBlas_chpr2,
+ RsBlas_zhemv,
+ RsBlas_zhbmv,
+ RsBlas_zhpmv,
+ RsBlas_zgeru,
+ RsBlas_zgerc,
+ RsBlas_zher,
+ RsBlas_zhpr,
+ RsBlas_zher2,
+ RsBlas_zhpr2,
+ RsBlas_sgemm,
+ RsBlas_ssymm,
+ RsBlas_ssyrk,
+ RsBlas_ssyr2k,
+ RsBlas_strmm,
+ RsBlas_strsm,
+ RsBlas_dgemm,
+ RsBlas_dsymm,
+ RsBlas_dsyrk,
+ RsBlas_dsyr2k,
+ RsBlas_dtrmm,
+ RsBlas_dtrsm,
+ RsBlas_cgemm,
+ RsBlas_csymm,
+ RsBlas_csyrk,
+ RsBlas_csyr2k,
+ RsBlas_ctrmm,
+ RsBlas_ctrsm,
+ RsBlas_zgemm,
+ RsBlas_zsymm,
+ RsBlas_zsyrk,
+ RsBlas_zsyr2k,
+ RsBlas_ztrmm,
+ RsBlas_ztrsm,
+ RsBlas_chemm,
+ RsBlas_cherk,
+ RsBlas_cher2k,
+ RsBlas_zhemm,
+ RsBlas_zherk,
+ RsBlas_zher2k
+};
+
+// custom complex types because of NDK support
+typedef struct {
+ float r;
+ float i;
+} RsFloatComplex;
+
+typedef struct {
+ double r;
+ double i;
+} RsDoubleComplex;
+
+typedef union {
+ float f;
+ RsFloatComplex c;
+ double d;
+ RsDoubleComplex z;
+} RsBlasScalar;
+
+typedef struct {
+ RsBlasFunction func;
+ RsBlasTranspose transA;
+ RsBlasTranspose transB;
+ RsBlasUplo uplo;
+ RsBlasDiag diag;
+ RsBlasSide side;
+ int M;
+ int N;
+ int K;
+ RsBlasScalar alpha;
+ RsBlasScalar beta;
+ int incX;
+ int incY;
+ int KL;
+ int KU;
+} RsBlasCall;
+
#ifdef __cplusplus
};
#endif
diff --git a/rsInternalDefines.h b/rsInternalDefines.h
index 2a3f3fd..8a62e40 100644
--- a/rsInternalDefines.h
+++ b/rsInternalDefines.h
@@ -189,7 +189,7 @@
RS_SCRIPT_INTRINSIC_ID_HISTOGRAM = 9,
// unused 10, 11
RS_SCRIPT_INTRINSIC_ID_RESIZE = 12,
-
+ RS_SCRIPT_INTRINSIC_ID_BLAS = 13,
RS_SCRIPT_INTRINSIC_ID_OEM_START = 0x10000000
};