blob: 49a71b430ac67b4b3c0939a36c1b3b3d92e9971c [file] [log] [blame]
Tim Murray25207df2015-01-12 16:47:56 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.renderscript;
18
19import android.annotation.IntDef;
20import java.lang.annotation.Retention;
21import java.lang.annotation.RetentionPolicy;
22
23/**
24 *
Miao Wangfb675a52015-05-12 18:22:20 -070025 * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS.
26 *
27 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
28 * building blocks for performing basic vector and matrix operations.
29 *
30 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
Tim Murray25207df2015-01-12 16:47:56 -080031 *
Tim Murray25207df2015-01-12 16:47:56 -080032 **/
33public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
34 private Allocation mLUT;
35
36 private ScriptIntrinsicBLAS(long id, RenderScript rs) {
37 super(id, rs);
38 }
39
40 private static final int RsBlas_sdsdot = 1;
41 private static final int RsBlas_dsdot = 2;
42 private static final int RsBlas_sdot = 3;
43 private static final int RsBlas_ddot = 4;
44 private static final int RsBlas_cdotu_sub = 5;
45 private static final int RsBlas_cdotc_sub = 6;
46 private static final int RsBlas_zdotu_sub = 7;
47 private static final int RsBlas_zdotc_sub = 8;
48 private static final int RsBlas_snrm2 = 9;
49 private static final int RsBlas_sasum = 10;
50 private static final int RsBlas_dnrm2 = 11;
51 private static final int RsBlas_dasum = 12;
52 private static final int RsBlas_scnrm2 = 13;
53 private static final int RsBlas_scasum = 14;
54 private static final int RsBlas_dznrm2 = 15;
55 private static final int RsBlas_dzasum = 16;
56 private static final int RsBlas_isamax = 17;
57 private static final int RsBlas_idamax = 18;
58 private static final int RsBlas_icamax = 19;
59 private static final int RsBlas_izamax = 20;
60 private static final int RsBlas_sswap = 21;
61 private static final int RsBlas_scopy = 22;
62 private static final int RsBlas_saxpy = 23;
63 private static final int RsBlas_dswap = 24;
64 private static final int RsBlas_dcopy = 25;
65 private static final int RsBlas_daxpy = 26;
66 private static final int RsBlas_cswap = 27;
67 private static final int RsBlas_ccopy = 28;
68 private static final int RsBlas_caxpy = 29;
69 private static final int RsBlas_zswap = 30;
70 private static final int RsBlas_zcopy = 31;
71 private static final int RsBlas_zaxpy = 32;
72 private static final int RsBlas_srotg = 33;
73 private static final int RsBlas_srotmg = 34;
74 private static final int RsBlas_srot = 35;
75 private static final int RsBlas_srotm = 36;
76 private static final int RsBlas_drotg = 37;
77 private static final int RsBlas_drotmg = 38;
78 private static final int RsBlas_drot = 39;
79 private static final int RsBlas_drotm = 40;
80 private static final int RsBlas_sscal = 41;
81 private static final int RsBlas_dscal = 42;
82 private static final int RsBlas_cscal = 43;
83 private static final int RsBlas_zscal = 44;
84 private static final int RsBlas_csscal = 45;
85 private static final int RsBlas_zdscal = 46;
86 private static final int RsBlas_sgemv = 47;
87 private static final int RsBlas_sgbmv = 48;
88 private static final int RsBlas_strmv = 49;
89 private static final int RsBlas_stbmv = 50;
90 private static final int RsBlas_stpmv = 51;
91 private static final int RsBlas_strsv = 52;
92 private static final int RsBlas_stbsv = 53;
93 private static final int RsBlas_stpsv = 54;
94 private static final int RsBlas_dgemv = 55;
95 private static final int RsBlas_dgbmv = 56;
96 private static final int RsBlas_dtrmv = 57;
97 private static final int RsBlas_dtbmv = 58;
98 private static final int RsBlas_dtpmv = 59;
99 private static final int RsBlas_dtrsv = 60;
100 private static final int RsBlas_dtbsv = 61;
101 private static final int RsBlas_dtpsv = 62;
102 private static final int RsBlas_cgemv = 63;
103 private static final int RsBlas_cgbmv = 64;
104 private static final int RsBlas_ctrmv = 65;
105 private static final int RsBlas_ctbmv = 66;
106 private static final int RsBlas_ctpmv = 67;
107 private static final int RsBlas_ctrsv = 68;
108 private static final int RsBlas_ctbsv = 69;
109 private static final int RsBlas_ctpsv = 70;
110 private static final int RsBlas_zgemv = 71;
111 private static final int RsBlas_zgbmv = 72;
112 private static final int RsBlas_ztrmv = 73;
113 private static final int RsBlas_ztbmv = 74;
114 private static final int RsBlas_ztpmv = 75;
115 private static final int RsBlas_ztrsv = 76;
116 private static final int RsBlas_ztbsv = 77;
117 private static final int RsBlas_ztpsv = 78;
118 private static final int RsBlas_ssymv = 79;
119 private static final int RsBlas_ssbmv = 80;
120 private static final int RsBlas_sspmv = 81;
121 private static final int RsBlas_sger = 82;
122 private static final int RsBlas_ssyr = 83;
123 private static final int RsBlas_sspr = 84;
124 private static final int RsBlas_ssyr2 = 85;
125 private static final int RsBlas_sspr2 = 86;
126 private static final int RsBlas_dsymv = 87;
127 private static final int RsBlas_dsbmv = 88;
128 private static final int RsBlas_dspmv = 89;
129 private static final int RsBlas_dger = 90;
130 private static final int RsBlas_dsyr = 91;
131 private static final int RsBlas_dspr = 92;
132 private static final int RsBlas_dsyr2 = 93;
133 private static final int RsBlas_dspr2 = 94;
134 private static final int RsBlas_chemv = 95;
135 private static final int RsBlas_chbmv = 96;
136 private static final int RsBlas_chpmv = 97;
137 private static final int RsBlas_cgeru = 98;
138 private static final int RsBlas_cgerc = 99;
139 private static final int RsBlas_cher = 100;
140 private static final int RsBlas_chpr = 101;
141 private static final int RsBlas_cher2 = 102;
142 private static final int RsBlas_chpr2 = 103;
143 private static final int RsBlas_zhemv = 104;
144 private static final int RsBlas_zhbmv = 105;
145 private static final int RsBlas_zhpmv = 106;
146 private static final int RsBlas_zgeru = 107;
147 private static final int RsBlas_zgerc = 108;
148 private static final int RsBlas_zher = 109;
149 private static final int RsBlas_zhpr = 110;
150 private static final int RsBlas_zher2 = 111;
151 private static final int RsBlas_zhpr2 = 112;
152 private static final int RsBlas_sgemm = 113;
153 private static final int RsBlas_ssymm = 114;
154 private static final int RsBlas_ssyrk = 115;
155 private static final int RsBlas_ssyr2k = 116;
156 private static final int RsBlas_strmm = 117;
157 private static final int RsBlas_strsm = 118;
158 private static final int RsBlas_dgemm = 119;
159 private static final int RsBlas_dsymm = 120;
160 private static final int RsBlas_dsyrk = 121;
161 private static final int RsBlas_dsyr2k = 122;
162 private static final int RsBlas_dtrmm = 123;
163 private static final int RsBlas_dtrsm = 124;
164 private static final int RsBlas_cgemm = 125;
165 private static final int RsBlas_csymm = 126;
166 private static final int RsBlas_csyrk = 127;
167 private static final int RsBlas_csyr2k = 128;
168 private static final int RsBlas_ctrmm = 129;
169 private static final int RsBlas_ctrsm = 130;
170 private static final int RsBlas_zgemm = 131;
171 private static final int RsBlas_zsymm = 132;
172 private static final int RsBlas_zsyrk = 133;
173 private static final int RsBlas_zsyr2k = 134;
174 private static final int RsBlas_ztrmm = 135;
175 private static final int RsBlas_ztrsm = 136;
176 private static final int RsBlas_chemm = 137;
177 private static final int RsBlas_cherk = 138;
178 private static final int RsBlas_cher2k = 139;
179 private static final int RsBlas_zhemm = 140;
180 private static final int RsBlas_zherk = 141;
181 private static final int RsBlas_zher2k = 142;
182
Tim Murray9cb16a22015-04-01 11:07:16 -0700183 // BLAS extensions start here
184 private static final int RsBlas_bnnm = 1000;
185
Tim Murray25207df2015-01-12 16:47:56 -0800186 /**
Miao Wangfb675a52015-05-12 18:22:20 -0700187 * Create an intrinsic to access BLAS subroutines.
188 *
189 * @param rs The RenderScript context
190 * @return ScriptIntrinsicBLAS
Tim Murray25207df2015-01-12 16:47:56 -0800191 */
192 public static ScriptIntrinsicBLAS create(RenderScript rs) {
193 long id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs));
194 return new ScriptIntrinsicBLAS(id, rs);
195 }
196
Miao Wangfb675a52015-05-12 18:22:20 -0700197 /**
198 * @hide
199 */
Tim Murray25207df2015-01-12 16:47:56 -0800200 @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE})
201 @Retention(RetentionPolicy.SOURCE)
202 public @interface Transpose {}
203
Miao Wangfb675a52015-05-12 18:22:20 -0700204 /**
205 * @hide
206 */
Tim Murray25207df2015-01-12 16:47:56 -0800207 @IntDef({UPPER, LOWER})
208 @Retention(RetentionPolicy.SOURCE)
209 public @interface Uplo {}
210
Miao Wangfb675a52015-05-12 18:22:20 -0700211 /**
212 * @hide
213 */
Tim Murray25207df2015-01-12 16:47:56 -0800214 @IntDef({NON_UNIT, UNIT})
215 @Retention(RetentionPolicy.SOURCE)
216 public @interface Diag {}
217
Miao Wangfb675a52015-05-12 18:22:20 -0700218 /**
219 * @hide
220 */
Tim Murray25207df2015-01-12 16:47:56 -0800221 @IntDef({LEFT, RIGHT})
222 @Retention(RetentionPolicy.SOURCE)
223 public @interface Side {}
224
225 public static final int NO_TRANSPOSE = 111;
226 public static final int TRANSPOSE = 112;
227 public static final int CONJ_TRANSPOSE = 113;
228
229 public static final int UPPER = 121;
230 public static final int LOWER = 122;
231
232 public static final int NON_UNIT = 131;
233 public static final int UNIT = 132;
234
235 public static final int LEFT = 141;
236 public static final int RIGHT = 142;
237
238 static void validateSide(@Side int Side) {
239 if (Side != LEFT && Side != RIGHT) {
240 throw new RSRuntimeException("Invalid side passed to BLAS");
241 }
242 }
243
244 static void validateTranspose(@Transpose int Trans) {
245 if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE &&
246 Trans != CONJ_TRANSPOSE) {
247 throw new RSRuntimeException("Invalid transpose passed to BLAS");
248 }
249 }
250
251 static void validateConjTranspose(@Transpose int Trans) {
252 if (Trans != NO_TRANSPOSE &&
253 Trans != CONJ_TRANSPOSE) {
254 throw new RSRuntimeException("Invalid transpose passed to BLAS");
255 }
256 }
257
258 static void validateDiag(@Diag int Diag) {
259 if (Diag != NON_UNIT && Diag != UNIT) {
260 throw new RSRuntimeException("Invalid diag passed to BLAS");
261 }
262 }
263
264 static void validateUplo(@Uplo int Uplo) {
Miao Wang37ae07c2015-04-24 11:19:53 -0700265 if (Uplo != UPPER && Uplo != LOWER) {
Tim Murray25207df2015-01-12 16:47:56 -0800266 throw new RSRuntimeException("Invalid uplo passed to BLAS");
267 }
268 }
269
270
271 /**
272 * Level 2 BLAS
273 */
274
275 static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
276 validateTranspose(TransA);
277 int M = A.getType().getY();
278 int N = A.getType().getX();
279 if (!A.getType().getElement().isCompatible(e) ||
280 !X.getType().getElement().isCompatible(e) ||
281 !Y.getType().getElement().isCompatible(e)) {
282 throw new RSRuntimeException("Called BLAS with wrong Element type");
283 }
284 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
285 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
286 }
287
288 if (incX <= 0 || incY <= 0) {
289 throw new RSRuntimeException("Vector increments must be greater than 0");
290 }
291 int expectedXDim = -1, expectedYDim = -1;
292 if (TransA == NO_TRANSPOSE) {
293 expectedXDim = 1 + (N - 1) * incX;
294 expectedYDim = 1 + (M - 1) * incY;
295 } else {
296 expectedXDim = 1 + (M - 1) * incX;
297 expectedYDim = 1 + (N - 1) * incY;
298 }
299 if (X.getType().getX() != expectedXDim ||
Miao Wang68ca43e2015-04-23 15:06:09 -0700300 Y.getType().getX() != expectedYDim) {
Tim Murray25207df2015-01-12 16:47:56 -0800301 throw new RSRuntimeException("Incorrect vector dimensions for GEMV");
302 }
303 }
Miao Wangfb675a52015-05-12 18:22:20 -0700304
305 /**
306 * SGEMV performs one of the matrix-vector operations
307 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y
308 *
309 * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
310 *
311 * @param TransA The type of transpose applied to matrix A.
312 * @param alpha The scalar alpha.
313 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
314 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
315 * @param incX The increment for the elements of vector x, must be larger than zero.
316 * @param beta The scalar beta.
317 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
318 * @param incY The increment for the elements of vector y, must be larger than zero.
319 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700320 public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800321 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
322 int M = A.getType().getY();
323 int N = A.getType().getX();
324 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
325 }
Miao Wangfb675a52015-05-12 18:22:20 -0700326
327 /**
328 * DGEMV performs one of the matrix-vector operations
329 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y
330 *
331 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
332 *
333 * @param TransA The type of transpose applied to matrix A.
334 * @param alpha The scalar alpha.
335 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
336 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
337 * @param incX The increment for the elements of vector x, must be larger than zero.
338 * @param beta The scalar beta.
339 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
340 * @param incY The increment for the elements of vector y, must be larger than zero.
341 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700342 public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800343 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
344 int M = A.getType().getY();
345 int N = A.getType().getX();
346 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
347 }
Miao Wangfb675a52015-05-12 18:22:20 -0700348
349 /**
350 * CGEMV performs one of the matrix-vector operations
351 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y
352 *
353 * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
354 *
355 * @param TransA The type of transpose applied to matrix A.
356 * @param alpha The scalar alpha.
357 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
358 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
359 * @param incX The increment for the elements of vector x, must be larger than zero.
360 * @param beta The scalar beta.
361 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
362 * @param incY The increment for the elements of vector y, must be larger than zero.
363 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700364 public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800365 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
366 int M = A.getType().getY();
367 int N = A.getType().getX();
368 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
369 }
Miao Wangfb675a52015-05-12 18:22:20 -0700370
371 /**
372 * ZGEMV performs one of the matrix-vector operations
373 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y
374 *
375 * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
376 *
377 * @param TransA The type of transpose applied to matrix A.
378 * @param alpha The scalar alpha.
379 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
380 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
381 * @param incX The increment for the elements of vector x, must be larger than zero.
382 * @param beta The scalar beta.
383 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
384 * @param incY The increment for the elements of vector y, must be larger than zero.
385 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700386 public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800387 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
388 int M = A.getType().getY();
389 int N = A.getType().getX();
390 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
391 }
392
Miao Wangfb675a52015-05-12 18:22:20 -0700393 /**
394 * SGBMV performs one of the matrix-vector operations
395 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y
396 *
397 * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
398 *
399 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
400 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
401 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
402 * for i in range(0, m):
403 * for j in range(max(0, i-kl), min(i+ku+1, n)):
404 * b[i, j-i+kl] = a[i, j]
405 *
406 * @param TransA The type of transpose applied to matrix A.
407 * @param KL The number of sub-diagonals of the matrix A.
408 * @param KU The number of super-diagonals of the matrix A.
409 * @param alpha The scalar alpha.
410 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}.
411 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
412 * @param incX The increment for the elements of vector x, must be larger than zero.
413 * @param beta The scalar beta.
414 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
415 * @param incY The increment for the elements of vector y, must be larger than zero.
416 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700417 public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800418 // GBMV has the same validation requirements as GEMV + KL and KU >= 0
419 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
420 if (KL < 0 || KU < 0) {
421 throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
422 }
423 int M = A.getType().getY();
424 int N = A.getType().getX();
425 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);
426 }
Miao Wangfb675a52015-05-12 18:22:20 -0700427
428 /**
429 * DGBMV performs one of the matrix-vector operations
430 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y
431 *
432 * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
433 *
434 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
435 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
436 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
437 * for i in range(0, m):
438 * for j in range(max(0, i-kl), min(i+ku+1, n)):
439 * b[i, j-i+kl] = a[i, j]
440 *
441 * @param TransA The type of transpose applied to matrix A.
442 * @param KL The number of sub-diagonals of the matrix A.
443 * @param KU The number of super-diagonals of the matrix A.
444 * @param alpha The scalar alpha.
445 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}.
446 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
447 * @param incX The increment for the elements of vector x, must be larger than zero.
448 * @param beta The scalar beta.
449 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
450 * @param incY The increment for the elements of vector y, must be larger than zero.
451 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700452 public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800453 // GBMV has the same validation requirements as GEMV + KL and KU >= 0
454 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
455 if (KL < 0 || KU < 0) {
456 throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
457 }
458 int M = A.getType().getY();
459 int N = A.getType().getX();
460 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);
461 }
Miao Wangfb675a52015-05-12 18:22:20 -0700462
463 /**
464 * CGBMV performs one of the matrix-vector operations
465 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y
466 *
467 * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
468 *
469 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
470 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
471 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
472 * for i in range(0, m):
473 * for j in range(max(0, i-kl), min(i+ku+1, n)):
474 * b[i, j-i+kl] = a[i, j]
475 *
476 * @param TransA The type of transpose applied to matrix A.
477 * @param KL The number of sub-diagonals of the matrix A.
478 * @param KU The number of super-diagonals of the matrix A.
479 * @param alpha The scalar alpha.
480 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}.
481 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
482 * @param incX The increment for the elements of vector x, must be larger than zero.
483 * @param beta The scalar beta.
484 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
485 * @param incY The increment for the elements of vector y, must be larger than zero.
486 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700487 public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800488 // GBMV has the same validation requirements as GEMV + KL and KU >= 0
489 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
490 if (KL < 0 || KU < 0) {
491 throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
492 }
493 int M = A.getType().getY();
494 int N = A.getType().getX();
495 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);
496 }
Miao Wangfb675a52015-05-12 18:22:20 -0700497
498 /**
499 * ZGBMV performs one of the matrix-vector operations
500 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y
501 *
502 * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
503 *
504 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
505 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
506 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
507 * for i in range(0, m):
508 * for j in range(max(0, i-kl), min(i+ku+1, n)):
509 * b[i, j-i+kl] = a[i, j]
510 *
511 * @param TransA The type of transpose applied to matrix A.
512 * @param KL The number of sub-diagonals of the matrix A.
513 * @param KU The number of super-diagonals of the matrix A.
514 * @param alpha The scalar alpha.
515 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}.
516 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
517 * @param incX The increment for the elements of vector x, must be larger than zero.
518 * @param beta The scalar beta.
519 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
520 * @param incY The increment for the elements of vector y, must be larger than zero.
521 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700522 public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -0800523 // GBMV has the same validation requirements as GEMV + KL and KU >= 0
524 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
525 if (KL < 0 || KU < 0) {
526 throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
527 }
528 int M = A.getType().getY();
529 int N = A.getType().getX();
530 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);
531 }
532
Miao Wang68ca43e2015-04-23 15:06:09 -0700533 static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800534 validateTranspose(TransA);
Miao Wang68ca43e2015-04-23 15:06:09 -0700535 validateUplo(Uplo);
536 validateDiag(Diag);
Tim Murray25207df2015-01-12 16:47:56 -0800537 int N = A.getType().getY();
538 if (A.getType().getX() != N) {
539 throw new RSRuntimeException("A must be a square matrix for TRMV");
540 }
541 if (!A.getType().getElement().isCompatible(e) ||
542 !X.getType().getElement().isCompatible(e)) {
543 throw new RSRuntimeException("Called BLAS with wrong Element type");
544 }
545 if (X.getType().getY() > 1) {
546 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
547 }
548
549 if (incX <= 0) {
550 throw new RSRuntimeException("Vector increments must be greater than 0");
551 }
552 int expectedXDim = 1 + (N - 1) * incX;
553 if (X.getType().getX() != expectedXDim) {
554 throw new RSRuntimeException("Incorrect vector dimensions for TRMV");
555 }
556 }
557
558 static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
559 validateTranspose(TransA);
560 validateUplo(Uplo);
561 validateDiag(Diag);
562 if (!Ap.getType().getElement().isCompatible(e) ||
563 !X.getType().getElement().isCompatible(e)) {
564 throw new RSRuntimeException("Called BLAS with wrong Element type");
565 }
566 if (X.getType().getY() > 1) {
567 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
568 }
569
570 if (Ap.getType().getY() > 1) {
571 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
572 }
573
574 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
Miao Wang68ca43e2015-04-23 15:06:09 -0700575 //is it really doing anything?
Tim Murray25207df2015-01-12 16:47:56 -0800576 if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
577 throw new RSRuntimeException("Invalid dimension for Ap");
578 }
Miao Wang68ca43e2015-04-23 15:06:09 -0700579 if (incX <= 0) {
580 throw new RSRuntimeException("Vector increments must be greater than 0");
581 }
Tim Murray25207df2015-01-12 16:47:56 -0800582 int expectedXDim = 1 + (N - 1) * incX;
583 if (X.getType().getX() != expectedXDim) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700584 throw new RSRuntimeException("Incorrect vector dimensions for TPMV");
Tim Murray25207df2015-01-12 16:47:56 -0800585 }
586
587 return N;
588 }
589
Miao Wangfb675a52015-05-12 18:22:20 -0700590 /**
591 * STRMV performs one of the matrix-vector operations
592 * x := A*x or x := A**T*x
593 *
594 * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
595 *
596 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
597 * @param TransA The type of transpose applied to matrix A.
598 * @param Diag Specifies whether or not A is unit triangular.
599 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
600 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
601 * @param incX The increment for the elements of vector x, must be larger than zero.
602 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700603 public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700604 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800605 int N = A.getType().getY();
606 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
607 }
Miao Wangfb675a52015-05-12 18:22:20 -0700608
609 /**
610 * DTRMV performs one of the matrix-vector operations
611 * x := A*x or x := A**T*x
612 *
613 * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
614 *
615 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
616 * @param TransA The type of transpose applied to matrix A.
617 * @param Diag Specifies whether or not A is unit triangular.
618 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
619 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
620 * @param incX The increment for the elements of vector x, must be larger than zero.
621 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700622 public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700623 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800624 int N = A.getType().getY();
625 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
626 }
Miao Wangfb675a52015-05-12 18:22:20 -0700627
628 /**
629 * CTRMV performs one of the matrix-vector operations
630 * x := A*x or x := A**T*x or x := A**H*x
631 *
632 * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
633 *
634 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
635 * @param TransA The type of transpose applied to matrix A.
636 * @param Diag Specifies whether or not A is unit triangular.
637 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
638 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
639 * @param incX The increment for the elements of vector x, must be larger than zero.
640 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700641 public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700642 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800643 int N = A.getType().getY();
644 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
645 }
Miao Wangfb675a52015-05-12 18:22:20 -0700646
647 /**
648 * ZTRMV performs one of the matrix-vector operations
649 * x := A*x or x := A**T*x or x := A**H*x
650 *
651 * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
652 *
653 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
654 * @param TransA The type of transpose applied to matrix A.
655 * @param Diag Specifies whether or not A is unit triangular.
656 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
657 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
658 * @param incX The increment for the elements of vector x, must be larger than zero.
659 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700660 public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700661 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800662 int N = A.getType().getY();
663 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
664 }
Miao Wang68ca43e2015-04-23 15:06:09 -0700665
Miao Wangfb675a52015-05-12 18:22:20 -0700666 /**
667 * STBMV performs one of the matrix-vector operations
668 * x := A*x or x := A**T*x
669 *
670 * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
671 *
672 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
673 * but only the region N*(K+1) will be referenced. The following subroutine can is an
674 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
675 * for i in range(0, n):
676 * for j in range(i, min(i+k+1, n)):
677 * b[i, j-i] = a[i, j]
678 *
679 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
680 * @param TransA The type of transpose applied to matrix A.
681 * @param Diag Specifies whether or not A is unit triangular.
682 * @param K The number of off-diagonals of the matrix A
683 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
684 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
685 * @param incX The increment for the elements of vector x, must be larger than zero.
686 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700687 public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700688 // TBMV has the same requirements as TRMV + K >= 0
689 if (K < 0) {
690 throw new RSRuntimeException("K must be greater than or equal to 0");
691 }
692 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800693 int N = A.getType().getY();
694 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
695 }
Miao Wangfb675a52015-05-12 18:22:20 -0700696
697 /**
698 * DTBMV performs one of the matrix-vector operations
699 * x := A*x or x := A**T*x
700 *
701 * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
702 *
703 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
704 * but only the region N*(K+1) will be referenced. The following subroutine can is an
705 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
706 * for i in range(0, n):
707 * for j in range(i, min(i+k+1, n)):
708 * b[i, j-i] = a[i, j]
709 *
710 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
711 * @param TransA The type of transpose applied to matrix A.
712 * @param Diag Specifies whether or not A is unit triangular.
713 * @param K The number of off-diagonals of the matrix A
714 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
715 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
716 * @param incX The increment for the elements of vector x, must be larger than zero.
717 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700718 public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700719 // TBMV has the same requirements as TRMV + K >= 0
720 if (K < 0) {
721 throw new RSRuntimeException("K must be greater than or equal to 0");
722 }
723 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800724 int N = A.getType().getY();
725 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
726 }
Miao Wangfb675a52015-05-12 18:22:20 -0700727
728 /**
729 * CTBMV performs one of the matrix-vector operations
730 * x := A*x or x := A**T*x or x := A**H*x
731 *
732 * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
733 *
734 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
735 * but only the region N*(K+1) will be referenced. The following subroutine can is an
736 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
737 * for i in range(0, n):
738 * for j in range(i, min(i+k+1, n)):
739 * b[i, j-i] = a[i, j]
740 *
741 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
742 * @param TransA The type of transpose applied to matrix A.
743 * @param Diag Specifies whether or not A is unit triangular.
744 * @param K The number of off-diagonals of the matrix A
745 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
746 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
747 * @param incX The increment for the elements of vector x, must be larger than zero.
748 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700749 public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700750 // TBMV has the same requirements as TRMV + K >= 0
751 if (K < 0) {
752 throw new RSRuntimeException("K must be greater than or equal to 0");
753 }
754 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800755 int N = A.getType().getY();
756 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
757 }
Miao Wangfb675a52015-05-12 18:22:20 -0700758
759 /**
760 * ZTBMV performs one of the matrix-vector operations
761 * x := A*x or x := A**T*x or x := A**H*x
762 *
763 * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
764 *
765 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
766 * but only the region N*(K+1) will be referenced. The following subroutine can is an
767 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
768 * for i in range(0, n):
769 * for j in range(i, min(i+k+1, n)):
770 * b[i, j-i] = a[i, j]
771 *
772 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
773 * @param TransA The type of transpose applied to matrix A.
774 * @param Diag Specifies whether or not A is unit triangular.
775 * @param K The number of off-diagonals of the matrix A
776 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
777 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
778 * @param incX The increment for the elements of vector x, must be larger than zero.
779 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700780 public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -0700781 // TBMV has the same requirements as TRMV + K >= 0
782 if (K < 0) {
783 throw new RSRuntimeException("K must be greater than or equal to 0");
784 }
785 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800786 int N = A.getType().getY();
787 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
788 }
Miao Wangfb675a52015-05-12 18:22:20 -0700789
790 /**
791 * STPMV performs one of the matrix-vector operations
792 * x := A*x or x := A**T*x
793 *
794 * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
795 *
796 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
797 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
798 * 'a' to packed matrix 'b'.
799 * k = 0
800 * for i in range(0, n):
801 * for j in range(i, n):
802 * b[k++] = a[i, j]
803 *
804 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
805 * @param TransA The type of transpose applied to matrix A.
806 * @param Diag Specifies whether or not A is unit triangular.
807 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
808 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
809 * @param incX The increment for the elements of vector x, must be larger than zero.
810 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700811 public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800812 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
813 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
814 }
Miao Wangfb675a52015-05-12 18:22:20 -0700815
816 /**
817 * DTPMV performs one of the matrix-vector operations
818 * x := A*x or x := A**T*x
819 *
820 * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
821 *
822 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
823 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
824 * 'a' to packed matrix 'b'.
825 * k = 0
826 * for i in range(0, n):
827 * for j in range(i, n):
828 * b[k++] = a[i, j]
829 *
830 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
831 * @param TransA The type of transpose applied to matrix A.
832 * @param Diag Specifies whether or not A is unit triangular.
833 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
834 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
835 * @param incX The increment for the elements of vector x, must be larger than zero.
836 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700837 public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800838 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
839 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
840 }
Miao Wangfb675a52015-05-12 18:22:20 -0700841
842 /**
843 * CTPMV performs one of the matrix-vector operations
844 * x := A*x or x := A**T*x or x := A**H*x
845 *
846 * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
847 *
848 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
849 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
850 * 'a' to packed matrix 'b'.
851 * k = 0
852 * for i in range(0, n):
853 * for j in range(i, n):
854 * b[k++] = a[i, j]
855 *
856 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
857 * @param TransA The type of transpose applied to matrix A.
858 * @param Diag Specifies whether or not A is unit triangular.
859 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
860 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
861 * @param incX The increment for the elements of vector x, must be larger than zero.
862 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700863 public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800864 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
865 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
866 }
Miao Wangfb675a52015-05-12 18:22:20 -0700867
868 /**
869 * ZTPMV performs one of the matrix-vector operations
870 * x := A*x or x := A**T*x or x := A**H*x
871 *
872 * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
873 *
874 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
875 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
876 * 'a' to packed matrix 'b'.
877 * k = 0
878 * for i in range(0, n):
879 * for j in range(i, n):
880 * b[k++] = a[i, j]
881 *
882 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
883 * @param TransA The type of transpose applied to matrix A.
884 * @param Diag Specifies whether or not A is unit triangular.
885 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
886 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
887 * @param incX The increment for the elements of vector x, must be larger than zero.
888 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700889 public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800890 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
891 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
892 }
Miao Wangfb675a52015-05-12 18:22:20 -0700893
894 /**
895 * STRSV solves one of the systems of equations
896 * A*x = b or A**T*x = b
897 *
898 * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
899 *
900 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
901 * @param TransA The type of transpose applied to matrix A.
902 * @param Diag Specifies whether or not A is unit triangular.
903 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
904 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
905 * @param incX The increment for the elements of vector x, must be larger than zero.
906 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700907 public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800908 // TRSV is the same as TRMV
Miao Wang68ca43e2015-04-23 15:06:09 -0700909 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800910 int N = A.getType().getY();
911 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
912
913 }
Miao Wangfb675a52015-05-12 18:22:20 -0700914
915 /**
916 * DTRSV solves one of the systems of equations
917 * A*x = b or A**T*x = b
918 *
919 * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
920 *
921 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
922 * @param TransA The type of transpose applied to matrix A.
923 * @param Diag Specifies whether or not A is unit triangular.
924 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
925 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
926 * @param incX The increment for the elements of vector x, must be larger than zero.
927 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700928 public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800929 // TRSV is the same as TRMV
Miao Wang68ca43e2015-04-23 15:06:09 -0700930 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800931 int N = A.getType().getY();
932 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
933
934 }
Miao Wangfb675a52015-05-12 18:22:20 -0700935
936 /**
937 * CTRSV solves one of the systems of equations
938 * A*x = b or A**T*x = b or A**H*x = b
939 *
940 * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
941 *
942 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
943 * @param TransA The type of transpose applied to matrix A.
944 * @param Diag Specifies whether or not A is unit triangular.
945 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
946 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
947 * @param incX The increment for the elements of vector x, must be larger than zero.
948 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700949 public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800950 // TRSV is the same as TRMV
Miao Wang68ca43e2015-04-23 15:06:09 -0700951 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800952 int N = A.getType().getY();
953 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
954
955 }
Miao Wangfb675a52015-05-12 18:22:20 -0700956
957 /**
958 * ZTRSV solves one of the systems of equations
959 * A*x = b or A**T*x = b or A**H*x = b
960 *
961 * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
962 *
963 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
964 * @param TransA The type of transpose applied to matrix A.
965 * @param Diag Specifies whether or not A is unit triangular.
966 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
967 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
968 * @param incX The increment for the elements of vector x, must be larger than zero.
969 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700970 public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -0800971 // TRSV is the same as TRMV
Miao Wang68ca43e2015-04-23 15:06:09 -0700972 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -0800973 int N = A.getType().getY();
974 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
975
976 }
Miao Wangfb675a52015-05-12 18:22:20 -0700977
978 /**
979 * STBSV solves one of the systems of equations
980 * A*x = b or A**T*x = b
981 *
982 * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
983 *
984 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
985 * but only the region N*(K+1) will be referenced. The following subroutine can is an
986 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
987 * for i in range(0, n):
988 * for j in range(i, min(i+k+1, n)):
989 * b[i, j-i] = a[i, j]
990 *
991 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
992 * @param TransA The type of transpose applied to matrix A.
993 * @param Diag Specifies whether or not A is unit triangular.
994 * @param K The number of off-diagonals of the matrix A
995 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
996 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
997 * @param incX The increment for the elements of vector x, must be larger than zero.
998 */
Miao Wang89c3a5f2015-04-23 15:20:11 -0700999 public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001000 // TBSV is the same as TRMV + K >= 0
1001 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -08001002 int N = A.getType().getY();
1003 if (K < 0) {
1004 throw new RSRuntimeException("Number of diagonals must be positive");
1005 }
1006 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1007 }
Miao Wangfb675a52015-05-12 18:22:20 -07001008
1009 /**
1010 * DTBSV solves one of the systems of equations
1011 * A*x = b or A**T*x = b
1012 *
1013 * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
1014 *
1015 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1016 * but only the region N*(K+1) will be referenced. The following subroutine can is an
1017 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1018 * for i in range(0, n):
1019 * for j in range(i, min(i+k+1, n)):
1020 * b[i, j-i] = a[i, j]
1021 *
1022 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1023 * @param TransA The type of transpose applied to matrix A.
1024 * @param Diag Specifies whether or not A is unit triangular.
1025 * @param K The number of off-diagonals of the matrix A
1026 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1027 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1028 * @param incX The increment for the elements of vector x, must be larger than zero.
1029 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001030 public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001031 // TBSV is the same as TRMV + K >= 0
1032 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -08001033 int N = A.getType().getY();
1034 if (K < 0) {
1035 throw new RSRuntimeException("Number of diagonals must be positive");
1036 }
1037 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1038 }
Miao Wangfb675a52015-05-12 18:22:20 -07001039
1040 /**
1041 * CTBSV solves one of the systems of equations
1042 * A*x = b or A**T*x = b or A**H*x = b
1043 *
1044 * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
1045 *
1046 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1047 * but only the region N*(K+1) will be referenced. The following subroutine can is an
1048 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1049 * for i in range(0, n):
1050 * for j in range(i, min(i+k+1, n)):
1051 * b[i, j-i] = a[i, j]
1052 *
1053 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1054 * @param TransA The type of transpose applied to matrix A.
1055 * @param Diag Specifies whether or not A is unit triangular.
1056 * @param K The number of off-diagonals of the matrix A
1057 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1058 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1059 * @param incX The increment for the elements of vector x, must be larger than zero.
1060 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001061 public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001062 // TBSV is the same as TRMV + K >= 0
1063 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -08001064 int N = A.getType().getY();
1065 if (K < 0) {
1066 throw new RSRuntimeException("Number of diagonals must be positive");
1067 }
1068 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1069 }
Miao Wangfb675a52015-05-12 18:22:20 -07001070
1071 /**
1072 * ZTBSV solves one of the systems of equations
1073 * A*x = b or A**T*x = b or A**H*x = b
1074 *
1075 * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
1076 *
1077 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1078 * but only the region N*(K+1) will be referenced. The following subroutine can is an
1079 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1080 * for i in range(0, n):
1081 * for j in range(i, min(i+k+1, n)):
1082 * b[i, j-i] = a[i, j]
1083 *
1084 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1085 * @param TransA The type of transpose applied to matrix A.
1086 * @param Diag Specifies whether or not A is unit triangular.
1087 * @param K The number of off-diagonals of the matrix A
1088 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
1089 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1090 * @param incX The increment for the elements of vector x, must be larger than zero.
1091 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001092 public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001093 // TBSV is the same as TRMV + K >= 0
1094 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
Tim Murray25207df2015-01-12 16:47:56 -08001095 int N = A.getType().getY();
1096 if (K < 0) {
1097 throw new RSRuntimeException("Number of diagonals must be positive");
1098 }
1099 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1100 }
Miao Wangfb675a52015-05-12 18:22:20 -07001101
1102 /**
1103 * STPSV solves one of the systems of equations
1104 * A*x = b or A**T*x = b
1105 *
1106 * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
1107 *
1108 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1109 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1110 * 'a' to packed matrix 'b'.
1111 * k = 0
1112 * for i in range(0, n):
1113 * for j in range(i, n):
1114 * b[k++] = a[i, j]
1115 *
1116 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1117 * @param TransA The type of transpose applied to matrix A.
1118 * @param Diag Specifies whether or not A is unit triangular.
1119 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
1120 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1121 * @param incX The increment for the elements of vector x, must be larger than zero.
1122 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001123 public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -08001124 // TPSV is same as TPMV
1125 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
1126 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1127 }
Miao Wangfb675a52015-05-12 18:22:20 -07001128
1129 /**
1130 * DTPSV solves one of the systems of equations
1131 * A*x = b or A**T*x = b
1132 *
1133 * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
1134 *
1135 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1136 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1137 * 'a' to packed matrix 'b'.
1138 * k = 0
1139 * for i in range(0, n):
1140 * for j in range(i, n):
1141 * b[k++] = a[i, j]
1142 *
1143 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1144 * @param TransA The type of transpose applied to matrix A.
1145 * @param Diag Specifies whether or not A is unit triangular.
1146 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
1147 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1148 * @param incX The increment for the elements of vector x, must be larger than zero.
1149 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001150 public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -08001151 // TPSV is same as TPMV
1152 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
1153 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1154 }
Miao Wangfb675a52015-05-12 18:22:20 -07001155
1156 /**
1157 * CTPSV solves one of the systems of equations
1158 * A*x = b or A**T*x = b or A**H*x = b
1159 *
1160 * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
1161 *
1162 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1163 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1164 * 'a' to packed matrix 'b'.
1165 * k = 0
1166 * for i in range(0, n):
1167 * for j in range(i, n):
1168 * b[k++] = a[i, j]
1169 *
1170 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1171 * @param TransA The type of transpose applied to matrix A.
1172 * @param Diag Specifies whether or not A is unit triangular.
1173 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
1174 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1175 * @param incX The increment for the elements of vector x, must be larger than zero.
1176 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001177 public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -08001178 // TPSV is same as TPMV
1179 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1180 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1181 }
Miao Wangfb675a52015-05-12 18:22:20 -07001182
1183 /**
1184 * ZTPSV solves one of the systems of equations
1185 * A*x = b or A**T*x = b or A**H*x = b
1186 *
1187 * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
1188 *
1189 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1190 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1191 * 'a' to packed matrix 'b'.
1192 * k = 0
1193 * for i in range(0, n):
1194 * for j in range(i, n):
1195 * b[k++] = a[i, j]
1196 *
1197 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1198 * @param TransA The type of transpose applied to matrix A.
1199 * @param Diag Specifies whether or not A is unit triangular.
1200 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
1201 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1202 * @param incX The increment for the elements of vector x, must be larger than zero.
1203 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001204 public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
Tim Murray25207df2015-01-12 16:47:56 -08001205 // TPSV is same as TPMV
1206 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1207 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1208 }
1209
1210 /**
1211 * Level 2, S and D only
1212 */
1213 static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) {
1214 validateUplo(Uplo);
1215 int N = A.getType().getY();
1216 if (A.getType().getX() != N) {
1217 throw new RSRuntimeException("A must be a square matrix for SYMV");
1218 }
1219 if (!A.getType().getElement().isCompatible(e) ||
1220 !X.getType().getElement().isCompatible(e) ||
1221 !Y.getType().getElement().isCompatible(e) ) {
1222 throw new RSRuntimeException("Called BLAS with wrong Element type");
1223 }
1224 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1225 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1226 }
1227
1228 if (incX <= 0 || incY <= 0) {
1229 throw new RSRuntimeException("Vector increments must be greater than 0");
1230 }
1231 int expectedXDim = 1 + (N - 1) * incX;
1232 if (X.getType().getX() != expectedXDim) {
1233 throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1234 }
1235 int expectedYDim = 1 + (N - 1) * incY;
1236 if (Y.getType().getX() != expectedYDim) {
1237 throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1238 }
1239 return N;
1240 }
1241 static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {
1242 validateUplo(Uplo);
1243 if (!Ap.getType().getElement().isCompatible(e) ||
1244 !X.getType().getElement().isCompatible(e) ||
1245 !Y.getType().getElement().isCompatible(e)) {
1246 throw new RSRuntimeException("Called BLAS with wrong Element type");
1247 }
1248 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1249 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1250 }
1251
1252 if (Ap.getType().getY() > 1) {
1253 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1254 }
1255
1256 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1257 if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1258 throw new RSRuntimeException("Invalid dimension for Ap");
1259 }
Miao Wang68ca43e2015-04-23 15:06:09 -07001260 if (incX <= 0 || incY <= 0) {
1261 throw new RSRuntimeException("Vector increments must be greater than 0");
1262 }
Tim Murray25207df2015-01-12 16:47:56 -08001263 int expectedXDim = 1 + (N - 1) * incX;
1264 if (X.getType().getX() != expectedXDim) {
1265 throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1266 }
1267 int expectedYDim = 1 + (N - 1) * incY;
1268 if (Y.getType().getX() != expectedYDim) {
1269 throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1270 }
1271
1272 return N;
1273 }
1274 static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1275 if (!A.getType().getElement().isCompatible(e) ||
1276 !X.getType().getElement().isCompatible(e) ||
1277 !Y.getType().getElement().isCompatible(e) ) {
1278 throw new RSRuntimeException("Called BLAS with wrong Element type");
1279 }
1280
1281 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1282 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1283 }
1284
1285 int M = A.getType().getY();
1286 int N = A.getType().getX();
1287
1288 if (N < 1 || M < 1) {
1289 throw new RSRuntimeException("M and N must be 1 or greater for GER");
1290 }
Miao Wang68ca43e2015-04-23 15:06:09 -07001291 if (incX <= 0 || incY <= 0) {
1292 throw new RSRuntimeException("Vector increments must be greater than 0");
1293 }
1294 int expectedXDim = 1 + (M - 1) * incX;
Tim Murray25207df2015-01-12 16:47:56 -08001295 if (X.getType().getX() != expectedXDim) {
1296 throw new RSRuntimeException("Incorrect vector dimensions for GER");
1297 }
1298 int expectedYDim = 1 + (N - 1) * incY;
1299 if (Y.getType().getX() != expectedYDim) {
1300 throw new RSRuntimeException("Incorrect vector dimensions for GER");
1301 }
1302
1303
1304 }
1305 static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) {
1306 validateUplo(Uplo);
1307 if (!A.getType().getElement().isCompatible(e) ||
1308 !X.getType().getElement().isCompatible(e)) {
1309 throw new RSRuntimeException("Called BLAS with wrong Element type");
1310 }
1311
1312 int N = A.getType().getX();
1313
1314 if (X.getType().getY() > 1) {
1315 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1316 }
1317 if (N != A.getType().getY()) {
1318 throw new RSRuntimeException("A must be a symmetric matrix");
1319 }
Miao Wang68ca43e2015-04-23 15:06:09 -07001320 if (incX <= 0) {
1321 throw new RSRuntimeException("Vector increments must be greater than 0");
1322 }
Tim Murray25207df2015-01-12 16:47:56 -08001323 int expectedXDim = 1 + (N - 1) * incX;
1324 if (X.getType().getX() != expectedXDim) {
1325 throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1326 }
1327 return N;
1328 }
1329 static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) {
1330 validateUplo(Uplo);
1331 if (!Ap.getType().getElement().isCompatible(e) ||
1332 !X.getType().getElement().isCompatible(e)) {
1333 throw new RSRuntimeException("Called BLAS with wrong Element type");
1334 }
1335 if (X.getType().getY() > 1) {
1336 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1337 }
1338
1339 if (Ap.getType().getY() > 1) {
1340 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1341 }
1342
1343 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1344 if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1345 throw new RSRuntimeException("Invalid dimension for Ap");
1346 }
Miao Wang68ca43e2015-04-23 15:06:09 -07001347 if (incX <= 0) {
1348 throw new RSRuntimeException("Vector increments must be greater than 0");
1349 }
Tim Murray25207df2015-01-12 16:47:56 -08001350 int expectedXDim = 1 + (N - 1) * incX;
1351 if (X.getType().getX() != expectedXDim) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001352 throw new RSRuntimeException("Incorrect vector dimensions for SPR");
Tim Murray25207df2015-01-12 16:47:56 -08001353 }
1354
1355 return N;
1356 }
1357
1358 static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1359 validateUplo(Uplo);
1360 if (!A.getType().getElement().isCompatible(e) ||
1361 !X.getType().getElement().isCompatible(e) ||
1362 !Y.getType().getElement().isCompatible(e)) {
1363 throw new RSRuntimeException("Called BLAS with wrong Element type");
1364 }
1365
1366 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1367 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1368 }
1369
1370 int N = A.getType().getX();
1371
1372 if (N != A.getType().getY()) {
1373 throw new RSRuntimeException("A must be a symmetric matrix");
1374 }
Miao Wang68ca43e2015-04-23 15:06:09 -07001375 if (incX <= 0 || incY <= 0) {
1376 throw new RSRuntimeException("Vector increments must be greater than 0");
1377 }
Tim Murray25207df2015-01-12 16:47:56 -08001378 int expectedXDim = 1 + (N - 1) * incX;
1379 int expectedYDim = 1 + (N - 1) * incY;
1380 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1381 throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1382 }
1383 return N;
1384
1385 }
1386 static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1387 validateUplo(Uplo);
1388 if (!Ap.getType().getElement().isCompatible(e) ||
1389 !X.getType().getElement().isCompatible(e) ||
1390 !Y.getType().getElement().isCompatible(e)) {
1391 throw new RSRuntimeException("Called BLAS with wrong Element type");
1392 }
1393 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1394 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1395 }
1396
1397 if (Ap.getType().getY() > 1) {
1398 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1399 }
1400
1401 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1402 if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1403 throw new RSRuntimeException("Invalid dimension for Ap");
1404 }
Miao Wang68ca43e2015-04-23 15:06:09 -07001405 if (incX <= 0 || incY <= 0) {
1406 throw new RSRuntimeException("Vector increments must be greater than 0");
1407 }
Tim Murray25207df2015-01-12 16:47:56 -08001408 int expectedXDim = 1 + (N - 1) * incX;
1409 int expectedYDim = 1 + (N - 1) * incY;
1410 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001411 throw new RSRuntimeException("Incorrect vector dimensions for SPR2");
Tim Murray25207df2015-01-12 16:47:56 -08001412 }
1413
1414 return N;
1415 }
1416
Miao Wangfb675a52015-05-12 18:22:20 -07001417 /**
1418 * SSYMV performs the matrix-vector operation
1419 * y := alpha*A*x + beta*y
1420 *
1421 * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
1422 *
1423 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1424 * @param alpha The scalar alpha.
1425 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1426 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1427 * @param incX The increment for the elements of vector x, must be larger than zero.
1428 * @param beta The scalar beta.
1429 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1430 * @param incY The increment for the elements of vector y, must be larger than zero.
1431 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001432 public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001433 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1434 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1435 }
Miao Wangfb675a52015-05-12 18:22:20 -07001436
1437 /**
1438 * SSBMV performs the matrix-vector operation
1439 * y := alpha*A*x + beta*y
1440 *
1441 * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
1442 *
1443 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1444 * but only the region N*(K+1) will be referenced. The following subroutine can is an
1445 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1446 * for i in range(0, n):
1447 * for j in range(i, min(i+k+1, n)):
1448 * b[i, j-i] = a[i, j]
1449 *
1450 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1451 * @param K The number of off-diagonals of the matrix A
1452 * @param alpha The scalar alpha.
1453 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1454 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1455 * @param incX The increment for the elements of vector x, must be larger than zero.
1456 * @param beta The scalar beta.
1457 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1458 * @param incY The increment for the elements of vector y, must be larger than zero.
1459 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001460 public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001461 // SBMV is the same as SYMV + K >= 0
1462 if (K < 0) {
1463 throw new RSRuntimeException("K must be greater than or equal to 0");
1464 }
Tim Murray25207df2015-01-12 16:47:56 -08001465 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1466 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1467 }
Miao Wangfb675a52015-05-12 18:22:20 -07001468
1469 /**
1470 * SSPMV performs the matrix-vector operation
1471 * y := alpha*A*x + beta*y
1472 *
1473 * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
1474 *
1475 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1476 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1477 * 'a' to packed matrix 'b'.
1478 * k = 0
1479 * for i in range(0, n):
1480 * for j in range(i, n):
1481 * b[k++] = a[i, j]
1482 *
1483 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1484 * @param alpha The scalar alpha.
1485 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1486 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1487 * @param incX The increment for the elements of vector x, must be larger than zero.
1488 * @param beta The scalar beta.
1489 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1490 * @param incY The increment for the elements of vector y, must be larger than zero.
1491 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001492 public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001493 int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY);
1494 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1495 }
Miao Wangfb675a52015-05-12 18:22:20 -07001496
1497 /**
1498 * SGER performs the rank 1 operation
1499 * A := alpha*x*y**T + A
1500 *
1501 * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
1502 *
1503 * @param alpha The scalar alpha.
1504 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1505 * @param incX The increment for the elements of vector x, must be larger than zero.
1506 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1507 * @param incY The increment for the elements of vector y, must be larger than zero.
1508 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1509 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001510 public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001511 int M = A.getType().getY();
1512 int N = A.getType().getX();
Miao Wang68ca43e2015-04-23 15:06:09 -07001513 validateGER(Element.F32(mRS), X, incX, Y, incY, A);
Tim Murray25207df2015-01-12 16:47:56 -08001514 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);
1515 }
Miao Wangfb675a52015-05-12 18:22:20 -07001516
1517 /**
1518 * SSYR performs the rank 1 operation
1519 * A := alpha*x*x**T + A
1520 *
1521 * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
1522 *
1523 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1524 * @param alpha The scalar alpha.
1525 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1526 * @param incX The increment for the elements of vector x, must be larger than zero.
1527 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1528 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001529 public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001530 int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);
1531 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1532 }
Miao Wangfb675a52015-05-12 18:22:20 -07001533
1534 /**
1535 * SSPR performs the rank 1 operation
1536 * A := alpha*x*x**T + A
1537 *
1538 * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
1539 *
1540 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1541 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1542 * 'a' to packed matrix 'b'.
1543 * k = 0
1544 * for i in range(0, n):
1545 * for j in range(i, n):
1546 * b[k++] = a[i, j]
1547 *
1548 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1549 * @param alpha The scalar alpha.
1550 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1551 * @param incX The increment for the elements of vector x, must be larger than zero.
1552 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1553 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001554 public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08001555 int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap);
1556 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1557 }
Miao Wangfb675a52015-05-12 18:22:20 -07001558
1559 /**
1560 * SSYR2 performs the symmetric rank 2 operation
1561 * A := alpha*x*y**T + alpha*y*x**T + A
1562 *
1563 * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
1564 *
1565 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1566 * @param alpha The scalar alpha.
1567 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1568 * @param incX The increment for the elements of vector x, must be larger than zero.
1569 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1570 * @param incY The increment for the elements of vector y, must be larger than zero.
1571 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1572 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001573 public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001574 int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A);
1575 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);
1576 }
Miao Wangfb675a52015-05-12 18:22:20 -07001577
1578 /**
1579 * SSPR2 performs the symmetric rank 2 operation
1580 * A := alpha*x*y**T + alpha*y*x**T + A
1581 *
1582 * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
1583 *
1584 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1585 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1586 * 'a' to packed matrix 'b'.
1587 * k = 0
1588 * for i in range(0, n):
1589 * for j in range(i, n):
1590 * b[k++] = a[i, j]
1591 *
1592 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1593 * @param alpha The scalar alpha.
1594 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1595 * @param incX The increment for the elements of vector x, must be larger than zero.
1596 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1597 * @param incY The increment for the elements of vector y, must be larger than zero.
1598 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1599 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001600 public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08001601 int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap);
1602 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);
1603 }
Miao Wangfb675a52015-05-12 18:22:20 -07001604
1605 /**
1606 * DSYMV performs the matrix-vector operation
1607 * y := alpha*A*x + beta*y
1608 *
1609 * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
1610 *
1611 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1612 * @param alpha The scalar alpha.
1613 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1614 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1615 * @param incX The increment for the elements of vector x, must be larger than zero.
1616 * @param beta The scalar beta.
1617 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1618 * @param incY The increment for the elements of vector y, must be larger than zero.
1619 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001620 public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001621 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
1622 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1623 }
Miao Wangfb675a52015-05-12 18:22:20 -07001624
1625 /**
1626 * DSBMV performs the matrix-vector operation
1627 * y := alpha*A*x + beta*y
1628 *
1629 * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
1630 *
1631 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1632 * but only the region N*(K+1) will be referenced. The following subroutine can is an
1633 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1634 * for i in range(0, n):
1635 * for j in range(i, min(i+k+1, n)):
1636 * b[i, j-i] = a[i, j]
1637 *
1638 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1639 * @param K The number of off-diagonals of the matrix A
1640 * @param alpha The scalar alpha.
1641 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1642 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1643 * @param incX The increment for the elements of vector x, must be larger than zero.
1644 * @param beta The scalar beta.
1645 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1646 * @param incY The increment for the elements of vector y, must be larger than zero.
1647 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001648 public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
Miao Wang68ca43e2015-04-23 15:06:09 -07001649 // SBMV is the same as SYMV + K >= 0
1650 if (K < 0) {
1651 throw new RSRuntimeException("K must be greater than or equal to 0");
1652 }
Tim Murray25207df2015-01-12 16:47:56 -08001653 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
1654 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1655 }
Miao Wangfb675a52015-05-12 18:22:20 -07001656
1657 /**
1658 * DSPMV performs the matrix-vector operation
1659 * y := alpha*A*x + beta*y
1660 *
1661 * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
1662 *
1663 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1664 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1665 * 'a' to packed matrix 'b'.
1666 * k = 0
1667 * for i in range(0, n):
1668 * for j in range(i, n):
1669 * b[k++] = a[i, j]
1670 *
1671 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1672 * @param alpha The scalar alpha.
1673 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
1674 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1675 * @param incX The increment for the elements of vector x, must be larger than zero.
1676 * @param beta The scalar beta.
1677 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1678 * @param incY The increment for the elements of vector y, must be larger than zero.
1679 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001680 public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001681 int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY);
1682 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1683 }
Miao Wangfb675a52015-05-12 18:22:20 -07001684
1685 /**
1686 * DGER performs the rank 1 operation
1687 * A := alpha*x*y**T + A
1688 *
1689 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
1690 *
1691 * @param alpha The scalar alpha.
1692 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1693 * @param incX The increment for the elements of vector x, must be larger than zero.
1694 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1695 * @param incY The increment for the elements of vector y, must be larger than zero.
1696 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1697 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001698 public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001699 int M = A.getType().getY();
1700 int N = A.getType().getX();
Miao Wang68ca43e2015-04-23 15:06:09 -07001701 validateGER(Element.F64(mRS), X, incX, Y, incY, A);
Tim Murray25207df2015-01-12 16:47:56 -08001702 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);
1703 }
Miao Wangfb675a52015-05-12 18:22:20 -07001704
1705 /**
1706 * DSYR performs the rank 1 operation
1707 * A := alpha*x*x**T + A
1708 *
1709 * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
1710 *
1711 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1712 * @param alpha The scalar alpha.
1713 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1714 * @param incX The increment for the elements of vector x, must be larger than zero.
1715 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1716 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001717 public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001718 int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);
1719 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1720 }
Miao Wangfb675a52015-05-12 18:22:20 -07001721
1722 /**
1723 * DSPR performs the rank 1 operation
1724 * A := alpha*x*x**T + A
1725 *
1726 * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
1727 *
1728 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1729 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1730 * 'a' to packed matrix 'b'.
1731 * k = 0
1732 * for i in range(0, n):
1733 * for j in range(i, n):
1734 * b[k++] = a[i, j]
1735 *
1736 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1737 * @param alpha The scalar alpha.
1738 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1739 * @param incX The increment for the elements of vector x, must be larger than zero.
1740 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
1741 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001742 public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08001743 int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap);
1744 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1745 }
Miao Wangfb675a52015-05-12 18:22:20 -07001746
1747 /**
1748 * DSYR2 performs the symmetric rank 2 operation
1749 * A := alpha*x*y**T + alpha*y*x**T + A
1750 *
1751 * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
1752 *
1753 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1754 * @param alpha The scalar alpha.
1755 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1756 * @param incX The increment for the elements of vector x, must be larger than zero.
1757 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1758 * @param incY The increment for the elements of vector y, must be larger than zero.
1759 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1760 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001761 public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001762 int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A);
1763 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);
1764 }
Miao Wangfb675a52015-05-12 18:22:20 -07001765
1766 /**
1767 * DSPR2 performs the symmetric rank 2 operation
1768 * A := alpha*x*y**T + alpha*y*x**T + A
1769 *
1770 * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
1771 *
1772 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1773 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1774 * 'a' to packed matrix 'b'.
1775 * k = 0
1776 * for i in range(0, n):
1777 * for j in range(i, n):
1778 * b[k++] = a[i, j]
1779 *
1780 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1781 * @param alpha The scalar alpha.
1782 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1783 * @param incX The increment for the elements of vector x, must be larger than zero.
1784 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1785 * @param incY The increment for the elements of vector y, must be larger than zero.
1786 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
1787 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001788 public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08001789 int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap);
1790 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);
1791 }
1792
1793
1794 /**
1795 * Level 2, C and Z only
1796 */
1797
1798 static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1799 if (!A.getType().getElement().isCompatible(e) ||
1800 !X.getType().getElement().isCompatible(e) ||
1801 !Y.getType().getElement().isCompatible(e)) {
1802 throw new RSRuntimeException("Called BLAS with wrong Element type");
1803 }
1804 if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1805 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1806 }
1807
1808 int M = A.getType().getY();
1809 int N = A.getType().getX();
Miao Wang68ca43e2015-04-23 15:06:09 -07001810 if (incX <= 0 || incY <= 0) {
1811 throw new RSRuntimeException("Vector increments must be greater than 0");
1812 }
1813 int expectedXDim = 1 + (M - 1) * incX;
Tim Murray25207df2015-01-12 16:47:56 -08001814 if (X.getType().getX() != expectedXDim) {
1815 throw new RSRuntimeException("Incorrect vector dimensions for GERU");
1816 }
1817 int expectedYDim = 1 + (N - 1) * incY;
1818 if (Y.getType().getX() != expectedYDim) {
1819 throw new RSRuntimeException("Incorrect vector dimensions for GERU");
1820 }
1821
1822 }
1823
Miao Wangfb675a52015-05-12 18:22:20 -07001824 /**
1825 * CHEMV performs the matrix-vector operation
1826 * y := alpha*A*x + beta*y
1827 *
1828 * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
1829 *
1830 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1831 * @param alpha The scalar alpha.
1832 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1833 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1834 * @param incX The increment for the elements of vector x, must be larger than zero.
1835 * @param beta The scalar beta.
1836 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1837 * @param incY The increment for the elements of vector y, must be larger than zero.
1838 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001839 public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001840 // HEMV is the same as SYR2 validation-wise
1841 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
1842 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
1843 }
Miao Wangfb675a52015-05-12 18:22:20 -07001844
1845 /**
1846 * CHBMV performs the matrix-vector operation
1847 * y := alpha*A*x + beta*y
1848 *
1849 * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
1850 *
1851 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1852 * but only the region N*(K+1) will be referenced. The following subroutine can is an
1853 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1854 * for i in range(0, n):
1855 * for j in range(i, min(i+k+1, n)):
1856 * b[i, j-i] = a[i, j]
1857 *
1858 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1859 * @param K The number of off-diagonals of the matrix A
1860 * @param alpha The scalar alpha.
1861 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1862 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1863 * @param incX The increment for the elements of vector x, must be larger than zero.
1864 * @param beta The scalar beta.
1865 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1866 * @param incY The increment for the elements of vector y, must be larger than zero.
1867 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001868 public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001869 // HBMV is the same as SYR2 validation-wise
1870 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
1871 if (K < 0) {
1872 throw new RSRuntimeException("K must be 0 or greater for HBMV");
1873 }
1874 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
1875 }
Miao Wangfb675a52015-05-12 18:22:20 -07001876
1877 /**
1878 * CHPMV performs the matrix-vector operation
1879 * y := alpha*A*x + beta*y
1880 *
1881 * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
1882 *
1883 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1884 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1885 * 'a' to packed matrix 'b'.
1886 * k = 0
1887 * for i in range(0, n):
1888 * for j in range(i, n):
1889 * b[k++] = a[i, j]
1890 *
1891 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1892 * @param alpha The scalar alpha.
1893 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1894 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1895 * @param incX The increment for the elements of vector x, must be larger than zero.
1896 * @param beta The scalar beta.
1897 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1898 * @param incY The increment for the elements of vector y, must be larger than zero.
1899 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001900 public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08001901 // HPMV is the same as SPR2
1902 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
1903 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
1904 }
Miao Wangfb675a52015-05-12 18:22:20 -07001905
1906 /**
1907 * CGERU performs the rank 1 operation
1908 * A := alpha*x*y**T + A
1909 *
1910 * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
1911 *
1912 * @param alpha The scalar alpha.
1913 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1914 * @param incX The increment for the elements of vector x, must be larger than zero.
1915 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1916 * @param incY The increment for the elements of vector y, must be larger than zero.
1917 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1918 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001919 public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001920 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
1921 int M = A.getType().getY();
1922 int N = A.getType().getX();
1923 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
1924 }
Miao Wangfb675a52015-05-12 18:22:20 -07001925
1926 /**
1927 * CGERC performs the rank 1 operation
1928 * A := alpha*x*y**H + A
1929 *
1930 * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
1931 *
1932 * @param alpha The scalar alpha.
1933 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1934 * @param incX The increment for the elements of vector x, must be larger than zero.
1935 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1936 * @param incY The increment for the elements of vector y, must be larger than zero.
1937 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1938 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001939 public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001940 // same as GERU
1941 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
1942 int M = A.getType().getY();
1943 int N = A.getType().getX();
1944 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
1945 }
Miao Wangfb675a52015-05-12 18:22:20 -07001946
1947 /**
1948 * CHER performs the rank 1 operation
1949 * A := alpha*x*x**H + A
1950 *
1951 * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
1952 *
1953 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1954 * @param alpha The scalar alpha.
1955 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1956 * @param incX The increment for the elements of vector x, must be larger than zero.
1957 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1958 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001959 public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08001960 // same as SYR
Miao Wang68ca43e2015-04-23 15:06:09 -07001961 int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A);
Tim Murray25207df2015-01-12 16:47:56 -08001962 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);
1963 }
Miao Wangfb675a52015-05-12 18:22:20 -07001964
1965 /**
1966 * CHPR performs the rank 1 operation
1967 * A := alpha*x*x**H + A
1968 *
1969 * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
1970 *
1971 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1972 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1973 * 'a' to packed matrix 'b'.
1974 * k = 0
1975 * for i in range(0, n):
1976 * for j in range(i, n):
1977 * b[k++] = a[i, j]
1978 *
1979 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1980 * @param alpha The scalar alpha.
1981 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1982 * @param incX The increment for the elements of vector x, must be larger than zero.
1983 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1984 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07001985 public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08001986 // equivalent to SPR for validation
1987 int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap);
1988 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);
1989 }
Miao Wangfb675a52015-05-12 18:22:20 -07001990
1991 /**
1992 * CHER2 performs the symmetric rank 2 operation
1993 * A := alpha*x*y**H + alpha*y*x**H + A
1994 *
1995 * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
1996 *
1997 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1998 * @param alpha The scalar alpha.
1999 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2000 * @param incX The increment for the elements of vector x, must be larger than zero.
2001 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2002 * @param incY The increment for the elements of vector y, must be larger than zero.
2003 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2004 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002005 public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08002006 // same as SYR2
2007 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2008 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2009 }
Miao Wangfb675a52015-05-12 18:22:20 -07002010
2011 /**
2012 * CHPR2 performs the symmetric rank 2 operation
2013 * A := alpha*x*y**H + alpha*y*x**H + A
2014 *
2015 * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
2016 *
2017 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2018 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2019 * 'a' to packed matrix 'b'.
2020 * k = 0
2021 * for i in range(0, n):
2022 * for j in range(i, n):
2023 * b[k++] = a[i, j]
2024 *
2025 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2026 * @param alpha The scalar alpha.
2027 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2028 * @param incX The increment for the elements of vector x, must be larger than zero.
2029 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2030 * @param incY The increment for the elements of vector y, must be larger than zero.
2031 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2032 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002033 public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08002034 // same as SPR2
2035 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
2036 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);
2037 }
Miao Wangfb675a52015-05-12 18:22:20 -07002038
2039 /**
2040 * ZHEMV performs the matrix-vector operation
2041 * y := alpha*A*x + beta*y
2042 *
2043 * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
2044 *
2045 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2046 * @param alpha The scalar alpha.
2047 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2048 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2049 * @param incX The increment for the elements of vector x, must be larger than zero.
2050 * @param beta The scalar beta.
2051 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2052 * @param incY The increment for the elements of vector y, must be larger than zero.
2053 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002054 public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08002055 // HEMV is the same as SYR2 validation-wise
2056 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2057 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
2058 }
Miao Wangfb675a52015-05-12 18:22:20 -07002059
2060 /**
2061 * ZHBMV performs the matrix-vector operation
2062 * y := alpha*A*x + beta*y
2063 *
2064 * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
2065 *
2066 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2067 * but only the region N*(K+1) will be referenced. The following subroutine can is an
2068 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2069 * for i in range(0, n):
2070 * for j in range(i, min(i+k+1, n)):
2071 * b[i, j-i] = a[i, j]
2072 *
2073 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2074 * @param K The number of off-diagonals of the matrix A
2075 * @param alpha The scalar alpha.
2076 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2077 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2078 * @param incX The increment for the elements of vector x, must be larger than zero.
2079 * @param beta The scalar beta.
2080 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2081 * @param incY The increment for the elements of vector y, must be larger than zero.
2082 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002083 public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08002084 // HBMV is the same as SYR2 validation-wise
2085 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2086 if (K < 0) {
2087 throw new RSRuntimeException("K must be 0 or greater for HBMV");
2088 }
2089 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
2090 }
Miao Wangfb675a52015-05-12 18:22:20 -07002091
2092 /**
2093 * ZHPMV performs the matrix-vector operation
2094 * y := alpha*A*x + beta*y
2095 *
2096 * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
2097 *
2098 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2099 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2100 * 'a' to packed matrix 'b'.
2101 * k = 0
2102 * for i in range(0, n):
2103 * for j in range(i, n):
2104 * b[k++] = a[i, j]
2105 *
2106 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2107 * @param alpha The scalar alpha.
2108 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2109 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2110 * @param incX The increment for the elements of vector x, must be larger than zero.
2111 * @param beta The scalar beta.
2112 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2113 * @param incY The increment for the elements of vector y, must be larger than zero.
2114 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002115 public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
Tim Murray25207df2015-01-12 16:47:56 -08002116 // HPMV is the same as SPR2
2117 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2118 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
2119 }
Miao Wangfb675a52015-05-12 18:22:20 -07002120
2121 /**
2122 * ZGERU performs the rank 1 operation
2123 * A := alpha*x*y**T + A
2124 *
2125 * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
2126 *
2127 * @param alpha The scalar alpha.
2128 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2129 * @param incX The increment for the elements of vector x, must be larger than zero.
2130 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2131 * @param incY The increment for the elements of vector y, must be larger than zero.
2132 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2133 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002134 public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08002135 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2136 int M = A.getType().getY();
2137 int N = A.getType().getX();
2138 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2139 }
Miao Wangfb675a52015-05-12 18:22:20 -07002140
2141 /**
2142 * ZGERC performs the rank 1 operation
2143 * A := alpha*x*y**H + A
2144 *
2145 * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
2146 *
2147 * @param alpha The scalar alpha.
2148 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2149 * @param incX The increment for the elements of vector x, must be larger than zero.
2150 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2151 * @param incY The increment for the elements of vector y, must be larger than zero.
2152 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2153 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002154 public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08002155 // same as GERU
2156 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2157 int M = A.getType().getY();
2158 int N = A.getType().getX();
2159 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2160 }
Miao Wangfb675a52015-05-12 18:22:20 -07002161
2162 /**
2163 * ZHER performs the rank 1 operation
2164 * A := alpha*x*x**H + A
2165 *
2166 * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
2167 *
2168 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2169 * @param alpha The scalar alpha.
2170 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2171 * @param incX The increment for the elements of vector x, must be larger than zero.
2172 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2173 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002174 public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08002175 // same as SYR
Miao Wangcecc00a2015-04-29 18:14:55 -07002176 int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A);
Tim Murray25207df2015-01-12 16:47:56 -08002177 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);
2178 }
Miao Wangfb675a52015-05-12 18:22:20 -07002179
2180 /**
2181 * ZHPR performs the rank 1 operation
2182 * A := alpha*x*x**H + A
2183 *
2184 * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
2185 *
2186 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2187 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2188 * 'a' to packed matrix 'b'.
2189 * k = 0
2190 * for i in range(0, n):
2191 * for j in range(i, n):
2192 * b[k++] = a[i, j]
2193 *
2194 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2195 * @param alpha The scalar alpha.
2196 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2197 * @param incX The increment for the elements of vector x, must be larger than zero.
2198 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2199 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002200 public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08002201 // equivalent to SPR for validation
2202 int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap);
2203 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);
2204 }
Miao Wangfb675a52015-05-12 18:22:20 -07002205
2206 /**
2207 * ZHER2 performs the symmetric rank 2 operation
2208 * A := alpha*x*y**H + alpha*y*x**H + A
2209 *
2210 * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
2211 *
2212 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2213 * @param alpha The scalar alpha.
2214 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2215 * @param incX The increment for the elements of vector x, must be larger than zero.
2216 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2217 * @param incY The increment for the elements of vector y, must be larger than zero.
2218 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2219 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002220 public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
Tim Murray25207df2015-01-12 16:47:56 -08002221 // same as SYR2
2222 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2223 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2224 }
Miao Wangfb675a52015-05-12 18:22:20 -07002225
2226 /**
2227 * ZHPR2 performs the symmetric rank 2 operation
2228 * A := alpha*x*y**H + alpha*y*x**H + A
2229 *
2230 * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
2231 *
2232 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2233 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2234 * 'a' to packed matrix 'b'.
2235 * k = 0
2236 * for i in range(0, n):
2237 * for j in range(i, n):
2238 * b[k++] = a[i, j]
2239 *
2240 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2241 * @param alpha The scalar alpha.
2242 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2243 * @param incX The increment for the elements of vector x, must be larger than zero.
2244 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2245 * @param incY The increment for the elements of vector y, must be larger than zero.
2246 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2247 */
Miao Wang89c3a5f2015-04-23 15:20:11 -07002248 public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
Tim Murray25207df2015-01-12 16:47:56 -08002249 // same as SPR2
2250 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2251 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);
2252 }
2253
2254
2255 /**
2256 * Level 3 BLAS
2257 */
2258
2259 static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002260 int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
Tim Murray25207df2015-01-12 16:47:56 -08002261 if ((A != null && !A.getType().getElement().isCompatible(e)) ||
2262 (B != null && !B.getType().getElement().isCompatible(e)) ||
2263 (C != null && !C.getType().getElement().isCompatible(e))) {
2264 throw new RSRuntimeException("Called BLAS with wrong Element type");
2265 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002266 if (C == null) {
2267 //since matrix C is used to store the result, it cannot be null.
2268 throw new RSRuntimeException("Allocation C cannot be null");
Tim Murray25207df2015-01-12 16:47:56 -08002269 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002270 cM = C.getType().getY();
2271 cN = C.getType().getX();
2272
Tim Murray25207df2015-01-12 16:47:56 -08002273 if (Side == RIGHT) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002274 if ((A == null && B != null) || (A != null && B == null)) {
2275 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa");
2276 }
Tim Murray25207df2015-01-12 16:47:56 -08002277 if (B != null) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002278 bM = A.getType().getY();
2279 bN = A.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002280 }
2281 if (A != null) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002282 aM = B.getType().getY();
2283 aN = B.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002284 }
2285 } else {
2286 if (A != null) {
Miao Wang1e940d82015-04-30 10:47:42 -07002287 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002288 aN = A.getType().getY();
2289 aM = A.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002290 } else {
Miao Wang37ae07c2015-04-24 11:19:53 -07002291 aM = A.getType().getY();
2292 aN = A.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002293 }
2294 }
2295 if (B != null) {
Miao Wang1e940d82015-04-30 10:47:42 -07002296 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002297 bN = B.getType().getY();
2298 bM = B.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002299 } else {
Miao Wang37ae07c2015-04-24 11:19:53 -07002300 bM = B.getType().getY();
2301 bN = B.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002302 }
2303 }
2304 }
2305 if (A != null && B != null && C != null) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002306 if (aN != bM || aM != cM || bN != cN) {
Tim Murray25207df2015-01-12 16:47:56 -08002307 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2308 }
2309 } else if (A != null && C != null) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002310 // A and C only, for SYRK
2311 if (cM != cN) {
2312 throw new RSRuntimeException("Matrix C is not symmetric");
2313 }
Miao Wang50a8ff12015-05-01 15:32:24 -07002314 if (aM != cM) {
2315 throw new RSRuntimeException("Called BLAS with invalid dimensions");
Tim Murray25207df2015-01-12 16:47:56 -08002316 }
2317 } else if (A != null && B != null) {
2318 // A and B only
Miao Wang37ae07c2015-04-24 11:19:53 -07002319 if (aN != bM) {
2320 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2321 }
Tim Murray25207df2015-01-12 16:47:56 -08002322 }
2323
2324 }
2325
Miao Wangfb675a52015-05-12 18:22:20 -07002326 /**
2327 * SGEMM performs one of the matrix-matrix operations
2328 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T
2329 *
2330 * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
2331 *
2332 * @param TransA The type of transpose applied to matrix A.
2333 * @param TransB The type of transpose applied to matrix B.
2334 * @param alpha The scalar alpha.
2335 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2336 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2337 * @param beta The scalar beta.
2338 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2339 */
Tim Murray25207df2015-01-12 16:47:56 -08002340 public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A,
2341 Allocation B, float beta, Allocation C) {
2342 validateTranspose(TransA);
2343 validateTranspose(TransB);
2344 validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C);
2345
2346 int M = -1, N = -1, K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002347 if (TransA != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002348 M = A.getType().getX();
2349 K = A.getType().getY();
2350 } else {
2351 M = A.getType().getY();
2352 K = A.getType().getX();
2353 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002354 if (TransB != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002355 N = B.getType().getY();
2356 } else {
2357 N = B.getType().getX();
2358 }
2359 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS),
2360 beta, C.getID(mRS), 0, 0, 0, 0);
2361 }
Miao Wangfb675a52015-05-12 18:22:20 -07002362
2363 /**
2364 * DGEMM performs one of the matrix-matrix operations
2365 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T
2366 *
2367 * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
2368 *
2369 * @param TransA The type of transpose applied to matrix A.
2370 * @param TransB The type of transpose applied to matrix B.
2371 * @param alpha The scalar alpha.
2372 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2373 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2374 * @param beta The scalar beta.
2375 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2376 */
Tim Murray25207df2015-01-12 16:47:56 -08002377 public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A,
2378 Allocation B, double beta, Allocation C) {
2379 validateTranspose(TransA);
2380 validateTranspose(TransB);
2381 validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C);
2382 int M = -1, N = -1, K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002383 if (TransA != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002384 M = A.getType().getX();
2385 K = A.getType().getY();
2386 } else {
2387 M = A.getType().getY();
2388 K = A.getType().getX();
2389 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002390 if (TransB != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002391 N = B.getType().getY();
2392 } else {
2393 N = B.getType().getX();
2394 }
2395 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS),
2396 beta, C.getID(mRS), 0, 0, 0, 0);
2397 }
Miao Wangfb675a52015-05-12 18:22:20 -07002398
2399 /**
2400 * CGEMM performs one of the matrix-matrix operations
2401 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H
2402 *
2403 * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
2404 *
2405 * @param TransA The type of transpose applied to matrix A.
2406 * @param TransB The type of transpose applied to matrix B.
2407 * @param alpha The scalar alpha.
2408 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2409 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2410 * @param beta The scalar beta.
2411 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2412 */
Tim Murray25207df2015-01-12 16:47:56 -08002413 public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A,
2414 Allocation B, Float2 beta, Allocation C) {
2415 validateTranspose(TransA);
2416 validateTranspose(TransB);
2417 validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C);
2418 int M = -1, N = -1, K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002419 if (TransA != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002420 M = A.getType().getX();
2421 K = A.getType().getY();
2422 } else {
2423 M = A.getType().getY();
2424 K = A.getType().getX();
2425 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002426 if (TransB != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002427 N = B.getType().getY();
2428 } else {
2429 N = B.getType().getX();
2430 }
2431 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2432 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2433 }
2434
Miao Wangfb675a52015-05-12 18:22:20 -07002435 /**
2436 * ZGEMM performs one of the matrix-matrix operations
2437 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H
2438 *
2439 * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
2440 *
2441 * @param TransA The type of transpose applied to matrix A.
2442 * @param TransB The type of transpose applied to matrix B.
2443 * @param alpha The scalar alpha.
Elliot Waite54de7742017-01-11 15:30:35 -08002444 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2445 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
Miao Wangfb675a52015-05-12 18:22:20 -07002446 * @param beta The scalar beta.
Elliot Waite54de7742017-01-11 15:30:35 -08002447 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
Miao Wangfb675a52015-05-12 18:22:20 -07002448 */
Tim Murray25207df2015-01-12 16:47:56 -08002449 public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A,
2450 Allocation B, Double2 beta, Allocation C) {
2451 validateTranspose(TransA);
2452 validateTranspose(TransB);
2453 validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C);
2454 int M = -1, N = -1, K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002455 if (TransA != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002456 M = A.getType().getX();
2457 K = A.getType().getY();
2458 } else {
2459 M = A.getType().getY();
2460 K = A.getType().getX();
2461 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002462 if (TransB != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002463 N = B.getType().getY();
2464 } else {
2465 N = B.getType().getX();
2466 }
2467 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2468 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2469 }
2470
Miao Wangfb675a52015-05-12 18:22:20 -07002471 /**
2472 * SSYMM performs one of the matrix-matrix operations
2473 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C
2474 *
2475 * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
2476 *
2477 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2478 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2479 * @param alpha The scalar alpha.
2480 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2481 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2482 * @param beta The scalar beta.
2483 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2484 */
Tim Murray25207df2015-01-12 16:47:56 -08002485 public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A,
2486 Allocation B, float beta, Allocation C) {
2487 validateSide(Side);
2488 validateUplo(Uplo);
Miao Wang37ae07c2015-04-24 11:19:53 -07002489 //For SYMM, Matrix A should be symmetric
2490 if (A.getType().getX() != A.getType().getY()) {
2491 throw new RSRuntimeException("Matrix A is not symmetric");
2492 }
Tim Murray25207df2015-01-12 16:47:56 -08002493 validateL3(Element.F32(mRS), 0, 0, Side, A, B, C);
2494 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),
2495 beta, C.getID(mRS), 0, 0, 0, 0);
2496 }
Miao Wangfb675a52015-05-12 18:22:20 -07002497
2498 /**
2499 * DSYMM performs one of the matrix-matrix operations
2500 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C
2501 *
2502 * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
2503 *
2504 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2505 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2506 * @param alpha The scalar alpha.
2507 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2508 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2509 * @param beta The scalar beta.
2510 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2511 */
Tim Murray25207df2015-01-12 16:47:56 -08002512 public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A,
2513 Allocation B, double beta, Allocation C) {
2514 validateSide(Side);
2515 validateUplo(Uplo);
Miao Wang37ae07c2015-04-24 11:19:53 -07002516 if (A.getType().getX() != A.getType().getY()) {
2517 throw new RSRuntimeException("Matrix A is not symmetric");
2518 }
Tim Murray25207df2015-01-12 16:47:56 -08002519 validateL3(Element.F64(mRS), 0, 0, Side, A, B, C);
2520 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),
2521 beta, C.getID(mRS), 0, 0, 0, 0);
2522 }
Miao Wangfb675a52015-05-12 18:22:20 -07002523
2524 /**
2525 * CSYMM performs one of the matrix-matrix operations
2526 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C
2527 *
2528 * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
2529 *
2530 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2531 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2532 * @param alpha The scalar alpha.
2533 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2534 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2535 * @param beta The scalar beta.
2536 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2537 */
Tim Murray25207df2015-01-12 16:47:56 -08002538 public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A,
2539 Allocation B, Float2 beta, Allocation C) {
2540 validateSide(Side);
2541 validateUplo(Uplo);
Miao Wang37ae07c2015-04-24 11:19:53 -07002542 if (A.getType().getX() != A.getType().getY()) {
2543 throw new RSRuntimeException("Matrix A is not symmetric");
2544 }
Tim Murray25207df2015-01-12 16:47:56 -08002545 validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C);
2546 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2547 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2548 }
Miao Wangfb675a52015-05-12 18:22:20 -07002549
2550 /**
2551 * ZSYMM performs one of the matrix-matrix operations
2552 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C
2553 *
2554 * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
2555 *
2556 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2557 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2558 * @param alpha The scalar alpha.
2559 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2560 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2561 * @param beta The scalar beta.
2562 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2563 */
Tim Murray25207df2015-01-12 16:47:56 -08002564 public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A,
2565 Allocation B, Double2 beta, Allocation C) {
2566 validateSide(Side);
2567 validateUplo(Uplo);
Miao Wang37ae07c2015-04-24 11:19:53 -07002568 if (A.getType().getX() != A.getType().getY()) {
2569 throw new RSRuntimeException("Matrix A is not symmetric");
2570 }
Tim Murray25207df2015-01-12 16:47:56 -08002571 validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C);
2572 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2573 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2574 }
2575
Miao Wangfb675a52015-05-12 18:22:20 -07002576 /**
2577 * SSYRK performs one of the symmetric rank k operations
2578 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C
2579 *
2580 * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
2581 *
2582 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2583 * @param Trans The type of transpose applied to the operation.
2584 * @param alpha The scalar alpha.
2585 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2586 * @param beta The scalar beta.
2587 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2588 */
Tim Murray25207df2015-01-12 16:47:56 -08002589 public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
2590 validateTranspose(Trans);
2591 validateUplo(Uplo);
2592 validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C);
2593 int K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002594 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002595 K = A.getType().getY();
2596 } else {
2597 K = A.getType().getX();
2598 }
2599
2600 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);
2601 }
2602
Miao Wangfb675a52015-05-12 18:22:20 -07002603 /**
2604 * DSYRK performs one of the symmetric rank k operations
2605 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C
2606 *
2607 * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
2608 *
2609 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2610 * @param Trans The type of transpose applied to the operation.
2611 * @param alpha The scalar alpha.
2612 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2613 * @param beta The scalar beta.
2614 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2615 */
Tim Murray25207df2015-01-12 16:47:56 -08002616 public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
2617 validateTranspose(Trans);
2618 validateUplo(Uplo);
2619 validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C);
2620 int K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002621 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002622 K = A.getType().getY();
2623 } else {
2624 K = A.getType().getX();
2625 }
2626 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);
2627 }
Miao Wangfb675a52015-05-12 18:22:20 -07002628
2629 /**
2630 * CSYRK performs one of the symmetric rank k operations
2631 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C
2632 *
2633 * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
2634 *
2635 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2636 * @param Trans The type of transpose applied to the operation.
2637 * @param alpha The scalar alpha.
2638 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2639 * @param beta The scalar beta.
2640 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2641 */
Miao Wang4c472742015-04-22 15:57:57 -07002642 public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) {
Tim Murray25207df2015-01-12 16:47:56 -08002643 validateTranspose(Trans);
2644 validateUplo(Uplo);
2645 validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C);
2646 int K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002647 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002648 K = A.getType().getY();
2649 } else {
2650 K = A.getType().getX();
2651 }
Miao Wang4c472742015-04-22 15:57:57 -07002652 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,
Tim Murray25207df2015-01-12 16:47:56 -08002653 C.getID(mRS), 0, 0, 0, 0);
2654 }
Miao Wangfb675a52015-05-12 18:22:20 -07002655
2656 /**
2657 * ZSYRK performs one of the symmetric rank k operations
2658 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C
2659 *
2660 * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
2661 *
2662 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2663 * @param Trans The type of transpose applied to the operation.
2664 * @param alpha The scalar alpha.
2665 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2666 * @param beta The scalar beta.
2667 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2668 */
Miao Wang4c472742015-04-22 15:57:57 -07002669 public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) {
Tim Murray25207df2015-01-12 16:47:56 -08002670 validateTranspose(Trans);
2671 validateUplo(Uplo);
2672 validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C);
2673 int K = -1;
Miao Wang37ae07c2015-04-24 11:19:53 -07002674 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002675 K = A.getType().getY();
2676 } else {
2677 K = A.getType().getX();
2678 }
Miao Wang4c472742015-04-22 15:57:57 -07002679 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,
Tim Murray25207df2015-01-12 16:47:56 -08002680 C.getID(mRS), 0, 0, 0, 0);
2681 }
2682
2683 static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
2684 validateTranspose(Trans);
2685 if (!A.getType().getElement().isCompatible(e) ||
2686 !B.getType().getElement().isCompatible(e) ||
2687 !C.getType().getElement().isCompatible(e)) {
2688 throw new RSRuntimeException("Called BLAS with wrong Element type");
2689 }
2690 int Cdim = -1;
2691 // A is n x k if no transpose, k x n if transpose
2692 // C is n x n
2693 if (Trans == TRANSPOSE) {
2694 // check columns versus C
2695 Cdim = A.getType().getX();
2696 } else {
2697 // check rows versus C
2698 Cdim = A.getType().getY();
2699 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002700 if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
Tim Murray25207df2015-01-12 16:47:56 -08002701 throw new RSRuntimeException("Invalid symmetric matrix in SYR2K");
2702 }
2703 // A dims == B dims
2704 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
2705 throw new RSRuntimeException("Invalid A and B in SYR2K");
2706 }
2707 }
Miao Wangfb675a52015-05-12 18:22:20 -07002708
2709 /**
2710 * SSYR2K performs one of the symmetric rank 2k operations
2711 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C
2712 *
2713 * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
2714 *
2715 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2716 * @param Trans The type of transpose applied to the operation.
2717 * @param alpha The scalar alpha.
2718 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2719 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2720 * @param beta The scalar beta.
2721 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2722 */
Tim Murray25207df2015-01-12 16:47:56 -08002723 public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) {
2724 validateUplo(Uplo);
2725 validateSYR2K(Element.F32(mRS), Trans, A, B, C);
2726 int K = -1;
Miao Wang1e940d82015-04-30 10:47:42 -07002727 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002728 K = A.getType().getY();
2729 } else {
2730 K = A.getType().getX();
2731 }
2732 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);
2733 }
Miao Wangfb675a52015-05-12 18:22:20 -07002734
2735 /**
2736 * DSYR2K performs one of the symmetric rank 2k operations
2737 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C
2738 *
2739 * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
2740 *
2741 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2742 * @param Trans The type of transpose applied to the operation.
2743 * @param alpha The scalar alpha.
2744 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2745 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2746 * @param beta The scalar beta.
2747 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2748 */
Tim Murray25207df2015-01-12 16:47:56 -08002749 public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) {
2750 validateUplo(Uplo);
2751 validateSYR2K(Element.F64(mRS), Trans, A, B, C);
2752 int K = -1;
Miao Wang1e940d82015-04-30 10:47:42 -07002753 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002754 K = A.getType().getY();
2755 } else {
2756 K = A.getType().getX();
2757 }
Miao Wang194679ed2015-04-30 17:14:28 -07002758 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);
Tim Murray25207df2015-01-12 16:47:56 -08002759 }
Miao Wangfb675a52015-05-12 18:22:20 -07002760
2761 /**
2762 * CSYR2K performs one of the symmetric rank 2k operations
2763 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C
2764 *
2765 * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
2766 *
2767 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2768 * @param Trans The type of transpose applied to the operation.
2769 * @param alpha The scalar alpha.
2770 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2771 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2772 * @param beta The scalar beta.
2773 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2774 */
Tim Murray25207df2015-01-12 16:47:56 -08002775 public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
2776 validateUplo(Uplo);
2777 validateSYR2K(Element.F32_2(mRS), Trans, A, B, C);
2778 int K = -1;
Miao Wang1e940d82015-04-30 10:47:42 -07002779 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002780 K = A.getType().getY();
2781 } else {
2782 K = A.getType().getX();
2783 }
Miao Wang194679ed2015-04-30 17:14:28 -07002784 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
Tim Murray25207df2015-01-12 16:47:56 -08002785 }
Miao Wangfb675a52015-05-12 18:22:20 -07002786
2787 /**
2788 * ZSYR2K performs one of the symmetric rank 2k operations
2789 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C
2790 *
2791 * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
2792 *
2793 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2794 * @param Trans The type of transpose applied to the operation.
2795 * @param alpha The scalar alpha.
2796 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2797 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2798 * @param beta The scalar beta.
2799 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2800 */
Tim Murray25207df2015-01-12 16:47:56 -08002801 public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
2802 validateUplo(Uplo);
2803 validateSYR2K(Element.F64_2(mRS), Trans, A, B, C);
2804 int K = -1;
Miao Wang1e940d82015-04-30 10:47:42 -07002805 if (Trans != NO_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08002806 K = A.getType().getY();
2807 } else {
2808 K = A.getType().getX();
2809 }
Miao Wang194679ed2015-04-30 17:14:28 -07002810 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
Tim Murray25207df2015-01-12 16:47:56 -08002811 }
2812
2813 static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
2814 validateSide(Side);
2815 validateTranspose(TransA);
Miao Wang37ae07c2015-04-24 11:19:53 -07002816 int aM = -1, aN = -1, bM = -1, bN = -1;
Tim Murray25207df2015-01-12 16:47:56 -08002817 if (!A.getType().getElement().isCompatible(e) ||
2818 !B.getType().getElement().isCompatible(e)) {
2819 throw new RSRuntimeException("Called BLAS with wrong Element type");
2820 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002821
2822 aM = A.getType().getY();
2823 aN = A.getType().getX();
2824 if (aM != aN) {
2825 throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A");
Tim Murray25207df2015-01-12 16:47:56 -08002826 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002827
2828 bM = B.getType().getY();
2829 bN = B.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002830 if (Side == LEFT) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002831 if (aN != bM) {
Tim Murray25207df2015-01-12 16:47:56 -08002832 throw new RSRuntimeException("Called TRMM with invalid matrices");
2833 }
2834 } else {
Miao Wang37ae07c2015-04-24 11:19:53 -07002835 if (bN != aM) {
Tim Murray25207df2015-01-12 16:47:56 -08002836 throw new RSRuntimeException("Called TRMM with invalid matrices");
2837 }
2838 }
2839 }
Miao Wangfb675a52015-05-12 18:22:20 -07002840
2841 /**
2842 * STRMM performs one of the matrix-matrix operations
2843 * B := alpha*op(A)*B or B := alpha*B*op(A)
2844 * op(A) is one of op(A) = A or op(A) = A**T
2845 *
2846 * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
2847 *
2848 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2849 * @param Uplo Specifies whether matrix A is upper or lower triangular.
2850 * @param TransA The type of transpose applied to matrix A.
2851 * @param Diag Specifies whether or not A is unit triangular.
2852 * @param alpha The scalar alpha.
2853 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2854 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2855 */
Tim Murray25207df2015-01-12 16:47:56 -08002856 public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
2857 validateUplo(Uplo);
2858 validateDiag(Diag);
2859 validateTRMM(Element.F32(mRS), Side, TransA, A, B);
2860 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2861 alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0);
2862 }
Miao Wangfb675a52015-05-12 18:22:20 -07002863
2864 /**
2865 * DTRMM performs one of the matrix-matrix operations
2866 * B := alpha*op(A)*B or B := alpha*B*op(A)
2867 * op(A) is one of op(A) = A or op(A) = A**T
2868 *
2869 * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
2870 *
2871 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2872 * @param Uplo Specifies whether matrix A is upper or lower triangular.
2873 * @param TransA The type of transpose applied to matrix A.
2874 * @param Diag Specifies whether or not A is unit triangular.
2875 * @param alpha The scalar alpha.
2876 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2877 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2878 */
Tim Murray25207df2015-01-12 16:47:56 -08002879 public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
2880 validateUplo(Uplo);
2881 validateDiag(Diag);
2882 validateTRMM(Element.F64(mRS), Side, TransA, A, B);
Miao Wang194679ed2015-04-30 17:14:28 -07002883 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2884 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
Tim Murray25207df2015-01-12 16:47:56 -08002885 }
Miao Wangfb675a52015-05-12 18:22:20 -07002886
2887 /**
2888 * CTRMM performs one of the matrix-matrix operations
2889 * B := alpha*op(A)*B or B := alpha*B*op(A)
2890 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H
2891 *
2892 * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
2893 *
2894 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2895 * @param Uplo Specifies whether matrix A is upper or lower triangular.
2896 * @param TransA The type of transpose applied to matrix A.
2897 * @param Diag Specifies whether or not A is unit triangular.
2898 * @param alpha The scalar alpha.
2899 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2900 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2901 */
Tim Murray25207df2015-01-12 16:47:56 -08002902 public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
2903 validateUplo(Uplo);
2904 validateDiag(Diag);
2905 validateTRMM(Element.F32_2(mRS), Side, TransA, A, B);
Miao Wang194679ed2015-04-30 17:14:28 -07002906 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
Tim Murray25207df2015-01-12 16:47:56 -08002907 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
2908 }
Miao Wangfb675a52015-05-12 18:22:20 -07002909
2910 /**
2911 * ZTRMM performs one of the matrix-matrix operations
2912 * B := alpha*op(A)*B or B := alpha*B*op(A)
2913 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H
2914 *
2915 * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
2916 *
2917 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2918 * @param Uplo Specifies whether matrix A is upper or lower triangular.
2919 * @param TransA The type of transpose applied to matrix A.
2920 * @param Diag Specifies whether or not A is unit triangular.
2921 * @param alpha The scalar alpha.
2922 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2923 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2924 */
Tim Murray25207df2015-01-12 16:47:56 -08002925 public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
2926 validateUplo(Uplo);
2927 validateDiag(Diag);
2928 validateTRMM(Element.F64_2(mRS), Side, TransA, A, B);
Miao Wang194679ed2015-04-30 17:14:28 -07002929 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
Tim Murray25207df2015-01-12 16:47:56 -08002930 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
2931 }
2932
2933 static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
Miao Wang37ae07c2015-04-24 11:19:53 -07002934 int adim = -1, bM = -1, bN = -1;
Tim Murray25207df2015-01-12 16:47:56 -08002935 validateSide(Side);
2936 validateTranspose(TransA);
2937 if (!A.getType().getElement().isCompatible(e) ||
2938 !B.getType().getElement().isCompatible(e)) {
2939 throw new RSRuntimeException("Called BLAS with wrong Element type");
2940 }
2941 adim = A.getType().getX();
2942 if (adim != A.getType().getY()) {
2943 // this may be unnecessary, the restriction could potentially be relaxed
2944 // A needs to contain at least that symmetric matrix but could theoretically be larger
2945 // for now we assume adapters are sufficient, will reevaluate in the future
2946 throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A");
2947 }
Miao Wang37ae07c2015-04-24 11:19:53 -07002948 bM = B.getType().getY();
2949 bN = B.getType().getX();
Tim Murray25207df2015-01-12 16:47:56 -08002950 if (Side == LEFT) {
2951 // A is M*M
Miao Wang37ae07c2015-04-24 11:19:53 -07002952 if (adim != bM) {
Tim Murray25207df2015-01-12 16:47:56 -08002953 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
2954 }
2955 } else {
2956 // A is N*N
Miao Wang37ae07c2015-04-24 11:19:53 -07002957 if (adim != bN) {
Tim Murray25207df2015-01-12 16:47:56 -08002958 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
2959 }
2960 }
2961 }
Miao Wangfb675a52015-05-12 18:22:20 -07002962
2963 /**
2964 * STRSM solves one of the matrix equations
2965 * op(A)*X := alpha*B or X*op(A) := alpha*B
2966 * op(A) is one of op(A) = A or op(A) = A**T
2967 *
2968 * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
2969 *
2970 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2971 * @param Uplo Specifies whether matrix A is upper or lower triangular.
2972 * @param TransA The type of transpose applied to matrix A.
2973 * @param Diag Specifies whether or not A is unit triangular.
2974 * @param alpha The scalar alpha.
2975 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2976 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2977 */
Tim Murray25207df2015-01-12 16:47:56 -08002978 public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
2979 validateUplo(Uplo);
2980 validateDiag(Diag);
2981 validateTRSM(Element.F32(mRS), Side, TransA, A, B);
2982 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2983 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
2984 }
Miao Wangfb675a52015-05-12 18:22:20 -07002985
2986 /**
2987 * DTRSM solves one of the matrix equations
2988 * op(A)*X := alpha*B or X*op(A) := alpha*B
2989 * op(A) is one of op(A) = A or op(A) = A**T
2990 *
2991 * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
2992 *
2993 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2994 * @param Uplo Specifies whether matrix A is upper or lower triangular.
2995 * @param TransA The type of transpose applied to matrix A.
2996 * @param Diag Specifies whether or not A is unit triangular.
2997 * @param alpha The scalar alpha.
2998 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2999 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3000 */
Tim Murray25207df2015-01-12 16:47:56 -08003001 public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
3002 validateUplo(Uplo);
3003 validateDiag(Diag);
3004 validateTRSM(Element.F64(mRS), Side, TransA, A, B);
Miao Wang194679ed2015-04-30 17:14:28 -07003005 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
Tim Murray25207df2015-01-12 16:47:56 -08003006 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
3007 }
Miao Wangfb675a52015-05-12 18:22:20 -07003008
3009 /**
3010 * CTRSM solves one of the matrix equations
3011 * op(A)*X := alpha*B or X*op(A) := alpha*B
3012 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H
3013 *
3014 * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3015 *
3016 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3017 * @param Uplo Specifies whether matrix A is upper or lower triangular.
3018 * @param TransA The type of transpose applied to matrix A.
3019 * @param Diag Specifies whether or not A is unit triangular.
3020 * @param alpha The scalar alpha.
3021 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3022 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3023 */
Tim Murray25207df2015-01-12 16:47:56 -08003024 public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
3025 validateUplo(Uplo);
3026 validateDiag(Diag);
3027 validateTRSM(Element.F32_2(mRS), Side, TransA, A, B);
Miao Wang194679ed2015-04-30 17:14:28 -07003028 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
Tim Murray25207df2015-01-12 16:47:56 -08003029 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
3030 }
Miao Wangfb675a52015-05-12 18:22:20 -07003031
3032 /**
3033 * ZTRSM solves one of the matrix equations
3034 * op(A)*X := alpha*B or X*op(A) := alpha*B
3035 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H
3036 *
3037 * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3038 *
3039 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3040 * @param Uplo Specifies whether matrix A is upper or lower triangular.
3041 * @param TransA The type of transpose applied to matrix A.
3042 * @param Diag Specifies whether or not A is unit triangular.
3043 * @param alpha The scalar alpha.
3044 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3045 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3046 */
Tim Murray25207df2015-01-12 16:47:56 -08003047 public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
3048 validateUplo(Uplo);
3049 validateDiag(Diag);
3050 validateTRSM(Element.F64_2(mRS), Side, TransA, A, B);
Miao Wang194679ed2015-04-30 17:14:28 -07003051 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
Tim Murray25207df2015-01-12 16:47:56 -08003052 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
3053 }
3054
3055 static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) {
3056 validateSide(Side);
3057
3058 if (!A.getType().getElement().isCompatible(e) ||
3059 !B.getType().getElement().isCompatible(e) ||
3060 !C.getType().getElement().isCompatible(e)) {
3061 throw new RSRuntimeException("Called BLAS with wrong Element type");
3062 }
3063
3064 // A must be square; can potentially be relaxed similar to TRSM
3065 int adim = A.getType().getX();
3066 if (adim != A.getType().getY()) {
3067 throw new RSRuntimeException("Called HEMM with non-square A");
3068 }
3069 if ((Side == LEFT && adim != B.getType().getY()) ||
3070 (Side == RIGHT && adim != B.getType().getX())) {
3071 throw new RSRuntimeException("Called HEMM with invalid B");
3072 }
3073 if (B.getType().getX() != C.getType().getX() ||
3074 B.getType().getY() != C.getType().getY()) {
3075 throw new RSRuntimeException("Called HEMM with mismatched B and C");
3076 }
3077 }
Miao Wangfb675a52015-05-12 18:22:20 -07003078
3079 /**
3080 * CHEMM performs one of the matrix-matrix operations
3081 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C
3082 *
3083 * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3084 *
3085 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3086 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3087 * @param alpha The scalar alpha.
3088 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3089 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3090 * @param beta The scalar beta.
3091 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3092 */
Miao Wang4c472742015-04-22 15:57:57 -07003093 public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
Tim Murray25207df2015-01-12 16:47:56 -08003094 validateUplo(Uplo);
3095 validateHEMM(Element.F32_2(mRS), Side, A, B, C);
3096 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
Miao Wang4c472742015-04-22 15:57:57 -07003097 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
Tim Murray25207df2015-01-12 16:47:56 -08003098 }
Miao Wangfb675a52015-05-12 18:22:20 -07003099
3100 /**
3101 * ZHEMM performs one of the matrix-matrix operations
3102 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C
3103 *
3104 * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3105 *
3106 * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3107 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3108 * @param alpha The scalar alpha.
3109 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3110 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3111 * @param beta The scalar beta.
3112 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3113 */
Miao Wang4c472742015-04-22 15:57:57 -07003114 public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
Tim Murray25207df2015-01-12 16:47:56 -08003115 validateUplo(Uplo);
Miao Wang37ae07c2015-04-24 11:19:53 -07003116 validateHEMM(Element.F64_2(mRS), Side, A, B, C);
Tim Murray25207df2015-01-12 16:47:56 -08003117 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
Miao Wang4c472742015-04-22 15:57:57 -07003118 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
Tim Murray25207df2015-01-12 16:47:56 -08003119 }
3120
3121 static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) {
3122 if (!A.getType().getElement().isCompatible(e) ||
3123 !C.getType().getElement().isCompatible(e)) {
3124 throw new RSRuntimeException("Called BLAS with wrong Element type");
3125 }
3126 validateConjTranspose(Trans);
3127 int cdim = C.getType().getX();
3128 if (cdim != C.getType().getY()) {
3129 throw new RSRuntimeException("Called HERK with non-square C");
3130 }
3131 if (Trans == NO_TRANSPOSE) {
Miao Wang37ae07c2015-04-24 11:19:53 -07003132 if (cdim != A.getType().getY()) {
Tim Murray25207df2015-01-12 16:47:56 -08003133 throw new RSRuntimeException("Called HERK with invalid A");
3134 }
3135 } else {
Miao Wang37ae07c2015-04-24 11:19:53 -07003136 if (cdim != A.getType().getX()) {
Tim Murray25207df2015-01-12 16:47:56 -08003137 throw new RSRuntimeException("Called HERK with invalid A");
3138 }
3139 }
3140 }
Miao Wangfb675a52015-05-12 18:22:20 -07003141
3142 /**
3143 * CHERK performs one of the hermitian rank k operations
3144 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C
3145 *
3146 * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3147 *
3148 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3149 * @param Trans The type of transpose applied to the operation.
3150 * @param alpha The scalar alpha.
3151 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3152 * @param beta The scalar beta.
3153 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3154 */
Tim Murray25207df2015-01-12 16:47:56 -08003155 public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
3156 validateUplo(Uplo);
3157 validateHERK(Element.F32_2(mRS), Trans, A, C);
3158 int k = 0;
Miao Wang37ae07c2015-04-24 11:19:53 -07003159 if (Trans == CONJ_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08003160 k = A.getType().getY();
3161 } else {
3162 k = A.getType().getX();
3163 }
3164 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
3165 alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);
3166 }
Miao Wangfb675a52015-05-12 18:22:20 -07003167
3168 /**
3169 * ZHERK performs one of the hermitian rank k operations
3170 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C
3171 *
3172 * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
3173 *
3174 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3175 * @param Trans The type of transpose applied to the operation.
3176 * @param alpha The scalar alpha.
3177 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3178 * @param beta The scalar beta.
3179 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3180 */
Tim Murray25207df2015-01-12 16:47:56 -08003181 public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
3182 validateUplo(Uplo);
3183 validateHERK(Element.F64_2(mRS), Trans, A, C);
3184 int k = 0;
Miao Wang37ae07c2015-04-24 11:19:53 -07003185 if (Trans == CONJ_TRANSPOSE) {
Tim Murray25207df2015-01-12 16:47:56 -08003186 k = A.getType().getY();
3187 } else {
3188 k = A.getType().getX();
3189 }
3190 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
3191 alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);
3192 }
3193
3194 static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
3195 if (!A.getType().getElement().isCompatible(e) ||
3196 !B.getType().getElement().isCompatible(e) ||
3197 !C.getType().getElement().isCompatible(e)) {
3198 throw new RSRuntimeException("Called BLAS with wrong Element type");
3199 }
3200 validateConjTranspose(Trans);
3201 int cdim = C.getType().getX();
3202 if (cdim != C.getType().getY()) {
3203 throw new RSRuntimeException("Called HER2K with non-square C");
3204 }
3205 if (Trans == NO_TRANSPOSE) {
3206 if (A.getType().getY() != cdim) {
3207 throw new RSRuntimeException("Called HER2K with invalid matrices");
3208 }
3209 } else {
3210 if (A.getType().getX() != cdim) {
3211 throw new RSRuntimeException("Called HER2K with invalid matrices");
3212 }
3213 }
3214 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
3215 throw new RSRuntimeException("Called HER2K with invalid A and B matrices");
3216 }
3217 }
Miao Wangfb675a52015-05-12 18:22:20 -07003218
3219 /**
3220 * CHER2K performs one of the hermitian rank 2k operations
3221 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3222 *
3223 * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
3224 *
3225 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3226 * @param Trans The type of transpose applied to the operation.
3227 * @param alpha The scalar alpha.
3228 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3229 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3230 * @param beta The scalar beta.
3231 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3232 */
Tim Murray25207df2015-01-12 16:47:56 -08003233 public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) {
3234 validateUplo(Uplo);
3235 validateHER2K(Element.F32_2(mRS), Trans, A, B, C);
3236 int k = 0;
3237 if (Trans == NO_TRANSPOSE) {
3238 k = A.getType().getX();
3239 } else {
3240 k = A.getType().getY();
3241 }
3242 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
3243 A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);
3244 }
Miao Wangfb675a52015-05-12 18:22:20 -07003245
3246 /**
3247 * ZHER2K performs one of the hermitian rank 2k operations
3248 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3249 *
3250 * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
3251 *
3252 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3253 * @param Trans The type of transpose applied to the operation.
3254 * @param alpha The scalar alpha.
3255 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3256 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3257 * @param beta The scalar beta.
3258 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3259 */
Tim Murray25207df2015-01-12 16:47:56 -08003260 public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) {
3261 validateUplo(Uplo);
3262 validateHER2K(Element.F64_2(mRS), Trans, A, B, C);
3263 int k = 0;
3264 if (Trans == NO_TRANSPOSE) {
3265 k = A.getType().getX();
3266 } else {
3267 k = A.getType().getY();
3268 }
3269 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
3270 A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);
3271 }
3272
3273
Tim Murray9cb16a22015-04-01 11:07:16 -07003274 /**
Miao Wangd7d413a2015-07-15 11:35:28 -07003275 * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
Miao Wangfb675a52015-05-12 18:22:20 -07003276 * Calculations are done in 1.10.21 fixed-point format for the final output,
3277 * just before there's a shift down to drop the fractional parts. The output
3278 * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
3279 * gives some headroom to avoid wrapping around on small overflows.
Miao Wang6099ee62015-06-29 17:43:03 -07003280 *
Miao Wangfb675a52015-05-12 18:22:20 -07003281 * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}.
Miao Wang6099ee62015-06-29 17:43:03 -07003282 * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
Miao Wangfb675a52015-05-12 18:22:20 -07003283 * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}.
Miao Wang6099ee62015-06-29 17:43:03 -07003284 * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
Miao Wangfb675a52015-05-12 18:22:20 -07003285 * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}.
3286 * @param c_offset The offset for all values in matrix C.
3287 * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
Tim Murray9cb16a22015-04-01 11:07:16 -07003288 **/
3289 public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) {
3290 validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C);
3291
Miao Wang6099ee62015-06-29 17:43:03 -07003292 if (a_offset < 0 || a_offset > 255) {
3293 throw new RSRuntimeException("Invalid a_offset passed to BNNM");
3294 }
3295 if (b_offset < 0 || b_offset > 255) {
3296 throw new RSRuntimeException("Invalid b_offset passed to BNNM");
3297 }
Tim Murray9cb16a22015-04-01 11:07:16 -07003298 int M = -1, N = -1, K = -1;
3299 M = A.getType().getY();
3300 N = B.getType().getY();
3301 K = A.getType().getX();
3302
3303
3304 mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, A.getID(mRS), a_offset, B.getID(mRS), b_offset, C.getID(mRS), c_offset, c_mult);
3305
3306 }
Tim Murray25207df2015-01-12 16:47:56 -08003307
3308}