ergo
mat_gblas.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
45#ifndef MAT_GBLAS_HEADER
46#define MAT_GBLAS_HEADER
47#include <ctime>
48#include "Failure.h"
49
50/* We need to include config.h to get USE_LINALG_TEMPLATES and USE_SSE_INTRINSICS flags. */
51#include "config.h"
52
54
55#ifdef USE_SSE_INTRINSICS
57#include "gemm_sse/gemm_sse.h"
58#endif
59
60/* LEVEL 3 */
61extern "C" void dgemm_(const char *ta,const char *tb,
62 const int *n, const int *k, const int *l,
63 const double *alpha,const double *A,const int *lda,
64 const double *B, const int *ldb,
65 const double *beta, double *C, const int *ldc);
66extern "C" void dpptrf_(const char *uplo,const int *n, double* ap, int *info);
67extern "C" void dspgst_(const int *itype, const char *uplo,const int *n,
68 double* ap,const double *bp,int *info);
69extern "C" void dtptri_(const char *uplo,const char *diag,const int *n,
70 double* ap,int *info);
71/* unit triangular means that a value of 1.0 is assumed */
72/* for the diagonal elements (hence diagonal not stored in packed format) */
73extern "C" void dtrmm_(const char *side,const char *uplo,const char *transa,
74 const char *diag,const int *m,const int *n,
75 const double *alpha,const double *A,const int *lda,
76 double *B,const int *ldb);
77extern "C" void dsygv_(const int *itype,const char *jobz,
78 const char *uplo,const int *n,
79 double *A,const int *lda,double *B,const int *ldb,
80 double* w,double* work,const int *lwork,int *info);
81extern "C" void dggev_(const char *jobbl, const char *jobvr, const int *n,
82 double *A, const int *lda, double *B, const int *ldb,
83 double *alphar, double *alphai, double *beta,
84 double *vl, const int *ldvl,
85 double *vr, const int *ldvr,
86 double *work, const int *lwork, int *info);
87extern "C" void dpotrf_(const char *uplo, const int *n, double *A,
88 const int *lda, int *info);
89extern "C" void dtrtri_(const char *uplo,const char *diag,const int *n,
90 double *A, const int *lda, int *info);
91extern "C" void dsyrk_(const char *uplo, const char *trans, const int *n,
92 const int *k, const double *alpha, const double *A,
93 const int *lda, const double *beta,
94 double *C, const int *ldc);
95extern "C" void dsymm_(const char *side,const char *uplo,
96 const int *m,const int *n,
97 const double *alpha,const double *A,const int *lda,
98 const double *B,const int *ldb, const double* beta,
99 double *C,const int *ldc);
100extern "C" void dpocon_(const char *uplo, const int *n, const double *A,
101 const int *lda, const double *anorm, double *rcond,
102 double *work, int *iwork, int *info);
103extern "C" void dstevx_(const char *jobz, const char *range, const int *n,
104 double *d, double *e, const double *vl,
105 const double *vu, const int *il, const int *iu,
106 const double *abstol, int *m, double *w, double *z,
107 const int *ldz, double *work, int *iwork, int *ifail,
108 int *info);
109extern "C" void dstevr_(const char *jobz, const char *range, const int *n,
110 double *d, double *e, const double *vl,
111 const double *vu, const int *il, const int *iu,
112 const double *abstol, int *m, double *w, double *z,
113 const int *ldz, int* isuppz, double *work, int* lwork,
114 int *iwork, int* liwork, int *info);
115extern "C" void dsyev_(const char *jobz, const char *uplo, const int *n,
116 double *a, const int *lda, double *w, double *work,
117 const int *lwork, int *info);
118
119/* LEVEL 2 */
120extern "C" void dgemv_(const char *ta, const int *m, const int *n,
121 const double *alpha, const double *A, const int *lda,
122 const double *x, const int *incx, const double *beta,
123 double *y, const int *incy);
124extern "C" void dsymv_(const char *uplo, const int *n,
125 const double *alpha, const double *A, const int *lda,
126 const double *x, const int *incx, const double *beta,
127 double *y, const int *incy);
128extern "C" void dtrmv_(const char *uplo, const char *trans, const char *diag,
129 const int *n, const double *A, const int *lda,
130 double *x, const int *incx);
131/* LEVEL 1 */
132extern "C" void dscal_(const int* n,const double* da, double* dx,
133 const int* incx);
134extern "C" double ddot_(const int* n, const double* dx, const int* incx,
135 const double* dy, const int* incy);
136extern "C" void daxpy_(const int* n, const double* da, const double* dx,
137 const int* incx, double* dy,const int* incy);
138
139/* Single precision */
140/* LEVEL 3 */
141extern "C" void sgemm_(const char *ta,const char *tb,
142 const int *n, const int *k, const int *l,
143 const float *alpha,const float *A,const int *lda,
144 const float *B, const int *ldb,
145 const float *beta, float *C, const int *ldc);
146extern "C" void spptrf_(const char *uplo,const int *n, float* ap, int *info);
147extern "C" void sspgst_(const int *itype, const char *uplo,const int *n,
148 float* ap,const float *bp,int *info);
149extern "C" void stptri_(const char *uplo,const char *diag,const int *n,
150 float* ap,int *info);
151/* unit triangular means that a value of 1.0 is assumed */
152/* for the diagonal elements (hence diagonal not stored in packed format) */
153extern "C" void strmm_(const char *side,const char *uplo,const char *transa,
154 const char *diag,const int *m,const int *n,
155 const float *alpha,const float *A,const int *lda,
156 float *B,const int *ldb);
157extern "C" void ssygv_(const int *itype,const char *jobz,
158 const char *uplo,const int *n,
159 float *A,const int *lda,float *B,const int *ldb,
160 float* w,float* work,const int *lwork,int *info);
161extern "C" void sggev_(const char *jobbl, const char *jobvr, const int *n,
162 float *A, const int *lda, float *B, const int *ldb,
163 float *alphar, float *alphai, float *beta,
164 float *vl, const int *ldvl,
165 float *vr, const int *ldvr,
166 float *work, const int *lwork, int *info);
167extern "C" void spotrf_(const char *uplo, const int *n, float *A,
168 const int *lda, int *info);
169extern "C" void strtri_(const char *uplo,const char *diag,const int *n,
170 float *A, const int *lda, int *info);
171extern "C" void ssyrk_(const char *uplo, const char *trans, const int *n,
172 const int *k, const float *alpha, const float *A,
173 const int *lda, const float *beta,
174 float *C, const int *ldc);
175extern "C" void ssymm_(const char *side,const char *uplo,
176 const int *m,const int *n,
177 const float *alpha,const float *A,const int *lda,
178 const float *B,const int *ldb, const float* beta,
179 float *C,const int *ldc);
180extern "C" void spocon_(const char *uplo, const int *n, const float *A,
181 const int *lda, const float *anorm, float *rcond,
182 float *work, int *iwork, int *info);
183extern "C" void sstevx_(const char *jobz, const char *range, const int *n,
184 float *d, float *e, const float *vl,
185 const float *vu, const int *il, const int *iu,
186 const float *abstol, int *m, float *w, float *z,
187 const int *ldz, float *work, int *iwork, int *ifail,
188 int *info);
189extern "C" void sstevr_(const char *jobz, const char *range, const int *n,
190 float *d, float *e, const float *vl,
191 const float *vu, const int *il, const int *iu,
192 const float *abstol, int *m, float *w, float *z,
193 const int *ldz, int* isuppz, float *work, int* lwork,
194 int *iwork, int* liwork, int *info);
195extern "C" void ssyev_(const char *jobz, const char *uplo, const int *n,
196 float *a, const int *lda, float *w, float *work,
197 const int *lwork, int *info);
198
199/* LEVEL 2 */
200extern "C" void sgemv_(const char *ta, const int *m, const int *n,
201 const float *alpha, const float *A, const int *lda,
202 const float *x, const int *incx, const float *beta,
203 float *y, const int *incy);
204extern "C" void ssymv_(const char *uplo, const int *n,
205 const float *alpha, const float *A, const int *lda,
206 const float *x, const int *incx, const float *beta,
207 float *y, const int *incy);
208extern "C" void strmv_(const char *uplo, const char *trans, const char *diag,
209 const int *n, const float *A, const int *lda,
210 float *x, const int *incx);
211/* LEVEL 1 */
212extern "C" void sscal_(const int* n,const float* da, float* dx,
213 const int* incx);
214#if 0
215// sdot_ is unreliable because of varying return type in different
216// implementations. We therefore always use template dot for single precision
217extern "C" double sdot_(const int* n, const float* dx, const int* incx,
218 const float* dy, const int* incy);
219#endif
220extern "C" void saxpy_(const int* n, const float* da, const float* dx,
221 const int* incx, float* dy,const int* incy);
222
223namespace mat
224{
225 struct Gblas {
226 static float time;
227 static bool timekeeping;
228 };
229
230 /*************** Default version throws exception */
231 template<class T>
232 inline static void gemm(const char *ta,const char *tb,
233 const int *n, const int *k, const int *l,
234 const T *alpha,const T *A,const int *lda,
235 const T *B, const int *ldb,
236 const T *beta,T *C, const int *ldc) {
237#ifdef USE_SSE_INTRINSICS
238 if (*ta == 'N' && *tb == 'N' && *n == 32 && *k == 32 && *l == 32 && *alpha == 1.0 && *beta == 1) {
239 static T * A_packed;
240 static T * B_packed;
241 static T * C_packed;
242 int pack_max_size = 10000;
243 if ( (pack_max_size*sizeof(T))%16 )
244 throw std::runtime_error("In gblas gemm: requested buffer size not multiple of 16 bytes");
245 static T * buffer;
246 Memory_buffer_thread::instance().get_buffer(pack_max_size*3, buffer);
247 A_packed = buffer;
248 B_packed = buffer + pack_max_size;
249 C_packed = buffer + 2*pack_max_size;
250 gemm_sse<T>(A,B,C,32,32,32,
251 A_packed, B_packed, C_packed,
252 pack_max_size, pack_max_size, pack_max_size);
253 }
254 else
255#endif
256 template_blas_gemm(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
257 }
258
259
260 /* Computes the Cholesky factorization of a symmetric *
261 * positive definite matrix in packed storage. */
262 template<class T>
263 inline static void pptrf(const char *uplo,const int *n, T* ap, int *info) {
264 template_lapack_pptrf(uplo,n,ap,info);
265 }
266
267 template<class T>
268 inline static void spgst(const int *itype, const char *uplo,const int *n,
269 T* ap,const T *bp,int *info) {
270 template_lapack_spgst(itype,uplo,n,ap,bp,info);
271 }
272
273 /* Computes the inverse of a triangular matrix in packed storage. */
274 template<class T>
275 inline static void tptri(const char *uplo,const char *diag,const int *n,
276 T* ap,int *info) {
277 template_lapack_tptri(uplo,diag,n,ap,info);
278 }
279
280 template<class T>
281 inline static void trmm(const char *side,const char *uplo,
282 const char *transa, const char *diag,
283 const int *m,const int *n,
284 const T *alpha,const T *A,const int *lda,
285 T *B,const int *ldb) {
286 template_blas_trmm(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
287 }
288
289 /* Computes all eigenvalues and the eigenvectors of a generalized *
290 * symmetric-definite generalized eigenproblem, *
291 * Ax= lambda Bx, ABx= lambda x, or BAx= lambda x. */
292 template<class T>
293 inline static void sygv(const int *itype,const char *jobz,
294 const char *uplo,const int *n,
295 T *A,const int *lda,T *B,const int *ldb,
296 T* w,T* work,const int *lwork,int *info) {
297 template_lapack_sygv(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
298 }
299
300 template<class T>
301 inline static void ggev(const char *jobbl, const char *jobvr,
302 const int *n, T *A, const int *lda,
303 T *B, const int *ldb, T *alphar,
304 T *alphai, T *beta, T *vl,
305 const int *ldvl, T *vr, const int *ldvr,
306 T *work, const int *lwork, int *info) {
307 template_lapack_ggev(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
308 ldvl, vr, ldvr, work, lwork, info);
309 }
310
311 /* Computes the Cholesky factorization of a symmetric *
312 * positive definite matrix in packed storage. */
313 template<class T>
314 inline static void potrf(const char *uplo, const int *n, T *A,
315 const int *lda, int *info) {
316 template_lapack_potrf(uplo, n, A, lda, info);
317 }
318
319 /* Computes the inverse of a triangular matrix. */
320 template<class T>
321 inline static void trtri(const char *uplo,const char *diag,const int *n,
322 T *A, const int *lda, int *info) {
323 // Create copies of strings because they cannot be const inside trtri.
324 char uploCopy[2];
325 char diagCopy[2];
326 uploCopy[0] = uplo[0];
327 uploCopy[1] = '\0';
328 diagCopy[0] = diag[0];
329 diagCopy[1] = '\0';
330 template_lapack_trtri(uploCopy, diagCopy, n, A, lda, info);
331 }
332
333 template<class T>
334 inline static void syrk(const char *uplo, const char *trans, const int *n,
335 const int *k, const T *alpha, const T *A,
336 const int *lda, const T *beta,
337 T *C, const int *ldc) {
338 template_blas_syrk(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
339 }
340
341 template<class T>
342 inline static void symm(const char *side,const char *uplo,
343 const int *m,const int *n,
344 const T *alpha,const T *A,const int *lda,
345 const T *B,const int *ldb, const T* beta,
346 T *C,const int *ldc) {
347 template_blas_symm(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
348 }
349
350 template<class T>
351 inline static void pocon(const char *uplo, const int *n, const T *A,
352 const int *lda, const T *anorm, T *rcond,
353 T *work, int *iwork, int *info) {
354 template_lapack_pocon(uplo, n, A, lda, anorm, rcond, work, iwork, info);
355 }
356
357 template<class T>
358 inline static void stevx(const char *jobz, const char *range,
359 const int *n, T *d, T *e, const T *vl,
360 const T *vu, const int *il, const int *iu,
361 const T *abstol, int *m, T *w, T *z,
362 const int *ldz, T *work, int *iwork, int *ifail,
363 int *info) {
364 template_lapack_stevx(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
365 work, iwork, ifail, info);
366 }
367
368 template<class T>
369 inline static void stevr(const char *jobz, const char *range, const int *n,
370 T *d, T *e, const T *vl,
371 const T *vu, const int *il, const int *iu,
372 const T *abstol, int *m, T *w, T *z,
373 const int *ldz, int* isuppz, T *work, int* lwork,
374 int *iwork, int* liwork, int *info) {
375 template_lapack_stevr(jobz, range, n, d, e, vl, vu, il, iu, abstol,
376 m, w, z, ldz, isuppz,
377 work, lwork, iwork, liwork, info);
378 }
379
380
381 template<class T>
382 inline static void syev(const char *jobz, const char *uplo, const int *n,
383 T *a, const int *lda, T *w, T *work,
384 const int *lwork, int *info) {
385 template_lapack_syev(jobz, uplo, n, a, lda, w, work, lwork, info);
386 }
387
388
389 /* LEVEL 2 */
390 template<class T>
391 inline static void gemv(const char *ta, const int *m, const int *n,
392 const T *alpha, const T *A,
393 const int *lda,
394 const T *x, const int *incx,
395 const T *beta, T *y, const int *incy) {
396 template_blas_gemv(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
397 }
398
399 template<class T>
400 inline static void symv(const char *uplo, const int *n,
401 const T *alpha, const T *A,
402 const int *lda, const T *x,
403 const int *incx, const T *beta,
404 T *y, const int *incy) {
405 template_blas_symv(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
406 }
407
408 template<class T>
409 inline static void trmv(const char *uplo, const char *trans,
410 const char *diag, const int *n,
411 const T *A, const int *lda,
412 T *x, const int *incx) {
413 template_blas_trmv(uplo, trans, diag, n, A, lda, x, incx);
414 }
415
416
417 /* LEVEL 1 */
418 template<class T>
419 inline static void scal(const int* n,const T* da, T* dx,
420 const int* incx) {
421 template_blas_scal(n, da, dx, incx);
422 }
423
424 template<class T>
425 inline static T dot(const int* n, const T* dx, const int* incx,
426 const T* dy, const int* incy) {
427 return template_blas_dot(n, dx, incx, dy, incy);
428 }
429
430 template<class T>
431 inline static void axpy(const int* n, const T* da, const T* dx,
432 const int* incx, T* dy,const int* incy) {
433 template_blas_axpy(n, da, dx, incx, dy, incy);
434 }
435
436
437
438
439 /* Below follows specializations for double, single, etc.
440 These specializations are not needed if template_blas and template_lapack are used,
441 so in that case we skip this entire section. */
442#ifndef USE_LINALG_TEMPLATES
443
444
445 /*************** Double specialization */
446 template<>
447 inline void gemm<double>(const char *ta,const char *tb,
448 const int *n, const int *k, const int *l,
449 const double *alpha,
450 const double *A,const int *lda,
451 const double *B, const int *ldb,
452 const double *beta,
453 double *C, const int *ldc) {
454 if (Gblas::timekeeping) {
455 clock_t start = clock();
456 dgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
457 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
458 }
459 else {
460 dgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
461 }
462 }
463
464 template<>
465 inline void pptrf<double>(const char *uplo,const int *n,
466 double* ap, int *info) {
467 if (Gblas::timekeeping) {
468 clock_t start = clock();
469 dpptrf_(uplo,n,ap,info);
470 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
471 }
472 else {
473 dpptrf_(uplo,n,ap,info);
474 }
475 }
476
477 template<>
478 inline void spgst<double>(const int *itype, const char *uplo,
479 const int *n,
480 double* ap,const double *bp,int *info) {
481 if (Gblas::timekeeping) {
482 clock_t start = clock();
483 dspgst_(itype,uplo,n,ap,bp,info);
484 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
485 }
486 else {
487 dspgst_(itype,uplo,n,ap,bp,info);
488 }
489 }
490
491 template<>
492 inline void tptri<double>(const char *uplo,const char *diag,const int *n,
493 double* ap,int *info) {
494 if (Gblas::timekeeping) {
495 clock_t start = clock();
496 dtptri_(uplo,diag,n,ap,info);
497 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
498 }
499 else {
500 dtptri_(uplo,diag,n,ap,info);
501 }
502 }
503
504 template<>
505 inline void trmm<double>(const char *side,const char *uplo,
506 const char *transa,
507 const char *diag,const int *m,const int *n,
508 const double *alpha,
509 const double *A,const int *lda,
510 double *B,const int *ldb) {
511 if (Gblas::timekeeping) {
512 clock_t start = clock();
513 dtrmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
514 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
515 }
516 else {
517 dtrmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
518 }
519 }
520
521 template<>
522 inline void sygv<double>(const int *itype,const char *jobz,
523 const char *uplo,const int *n,
524 double *A,const int *lda,
525 double *B,const int *ldb,
526 double* w,double* work,
527 const int *lwork,int *info) {
528 if (Gblas::timekeeping) {
529 clock_t start = clock();
530 dsygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
531 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
532 }
533 else {
534 dsygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
535 }
536 }
537
538 template<>
539 inline void ggev<double>(const char *jobbl, const char *jobvr,
540 const int *n, double *A, const int *lda,
541 double *B, const int *ldb, double *alphar,
542 double *alphai, double *beta, double *vl,
543 const int *ldvl, double *vr, const int *ldvr,
544 double *work, const int *lwork, int *info) {
545 if (Gblas::timekeeping) {
546 clock_t start = clock();
547 dggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
548 ldvl, vr, ldvr, work, lwork, info);
549 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
550 }
551 else {
552 dggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
553 ldvl, vr, ldvr, work, lwork, info);
554 }
555 }
556
557
558 template<>
559 inline void potrf<double>(const char *uplo, const int *n, double *A,
560 const int *lda, int *info) {
561 if (Gblas::timekeeping) {
562 clock_t start = clock();
563 dpotrf_(uplo, n, A, lda, info);
564 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
565 }
566 else {
567 dpotrf_(uplo, n, A, lda, info);
568 }
569 }
570
571 template<>
572 inline void trtri<double>(const char *uplo,const char *diag,const int *n,
573 double *A, const int *lda, int *info) {
574 if (Gblas::timekeeping) {
575 clock_t start = clock();
576 dtrtri_(uplo, diag, n, A, lda, info);
577 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
578 }
579 else {
580 dtrtri_(uplo, diag, n, A, lda, info);
581 }
582 }
583
584 template<>
585 inline void syrk<double>(const char *uplo, const char *trans,
586 const int *n, const int *k, const double *alpha,
587 const double *A, const int *lda,
588 const double *beta, double *C, const int *ldc) {
589 if (Gblas::timekeeping) {
590 clock_t start = clock();
591 dsyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
592 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
593 }
594 else {
595 dsyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
596 }
597 }
598
599 template<>
600 inline void symm<double>(const char *side,const char *uplo,
601 const int *m,const int *n, const double *alpha,
602 const double *A,const int *lda,
603 const double *B,const int *ldb,
604 const double* beta,
605 double *C,const int *ldc) {
606 if (Gblas::timekeeping) {
607 clock_t start = clock();
608 dsymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
609 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
610 }
611 else {
612 dsymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
613 }
614 }
615
616 template<>
617 inline void pocon<double>(const char *uplo, const int *n,
618 const double *A, const int *lda,
619 const double *anorm, double *rcond,
620 double *work, int *iwork, int *info) {
621 if (Gblas::timekeeping) {
622 clock_t start = clock();
623 dpocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
624 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
625 }
626 else {
627 dpocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
628 }
629 }
630
631 template<>
632 inline void stevx<double>(const char *jobz, const char *range,
633 const int *n, double *d, double *e,
634 const double *vl,
635 const double *vu, const int *il, const int *iu,
636 const double *abstol, int *m, double *w,
637 double *z,
638 const int *ldz, double *work, int *iwork,
639 int *ifail, int *info) {
640 if (Gblas::timekeeping) {
641 clock_t start = clock();
642 dstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
643 work, iwork, ifail, info);
644 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
645 }
646 else {
647 dstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
648 work, iwork, ifail, info);
649 }
650 }
651
652 template<>
653 inline void stevr<double>(const char *jobz, const char *range,
654 const int *n, double *d, double *e,
655 const double *vl, const double *vu,
656 const int *il, const int *iu,
657 const double *abstol,
658 int *m, double *w,
659 double *z, const int *ldz, int* isuppz,
660 double *work, int* lwork,
661 int *iwork, int* liwork, int *info) {
662 if (Gblas::timekeeping) {
663 clock_t start = clock();
664 dstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
665 m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
666 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
667 }
668 else {
669 dstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
670 m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
671 }
672 }
673
674
675
676 template<>
677 inline void syev<double>(const char *jobz, const char *uplo, const int *n,
678 double *a, const int *lda, double *w,
679 double *work, const int *lwork, int *info) {
680 if (Gblas::timekeeping) {
681 clock_t start = clock();
682 dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
683 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
684 }
685 else {
686 dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
687 }
688 }
689
690
691 /* LEVEL 2 */
692 template<>
693 inline void gemv<double>(const char *ta, const int *m, const int *n,
694 const double *alpha, const double *A,
695 const int *lda,
696 const double *x, const int *incx,
697 const double *beta, double *y, const int *incy) {
698 if (Gblas::timekeeping) {
699 clock_t start = clock();
700 dgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
701 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
702 }
703 else {
704 dgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
705 }
706 }
707
708 template<>
709 inline void symv<double>(const char *uplo, const int *n,
710 const double *alpha, const double *A,
711 const int *lda, const double *x,
712 const int *incx, const double *beta,
713 double *y, const int *incy) {
714 if (Gblas::timekeeping) {
715 clock_t start = clock();
716 dsymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
717 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
718 }
719 else {
720 dsymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
721 }
722 }
723
724 template<>
725 inline void trmv<double>(const char *uplo, const char *trans,
726 const char *diag, const int *n,
727 const double *A, const int *lda,
728 double *x, const int *incx) {
729 if (Gblas::timekeeping) {
730 clock_t start = clock();
731 dtrmv_(uplo, trans, diag, n, A, lda, x, incx);
732 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
733 }
734 else {
735 dtrmv_(uplo, trans, diag, n, A, lda, x, incx);
736 }
737 }
738
739
740 /* LEVEL 1 */
741 template<>
742 inline void scal<double>(const int* n,const double* da, double* dx,
743 const int* incx) {
744 if (Gblas::timekeeping) {
745 clock_t start = clock();
746 dscal_(n, da, dx, incx);
747 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
748 }
749 else {
750 dscal_(n, da, dx, incx);
751 }
752 }
753
754 template<>
755 inline double dot<double>(const int* n, const double* dx, const int* incx,
756 const double* dy, const int* incy) {
757 double tmp = 0;
758 if (Gblas::timekeeping) {
759 clock_t start = clock();
760 tmp = ddot_(n, dx, incx, dy, incy);
761 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
762 }
763 else {
764 tmp = ddot_(n, dx, incx, dy, incy);
765 }
766 return tmp;
767 }
768
769 template<>
770 inline void axpy<double>(const int* n, const double* da, const double* dx,
771 const int* incx, double* dy,const int* incy) {
772 if (Gblas::timekeeping) {
773 clock_t start = clock();
774 daxpy_(n, da, dx, incx, dy, incy);
775 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
776 }
777 else {
778 daxpy_(n, da, dx, incx, dy, incy);
779 }
780 }
781
782
783 /*************** Single specialization */
784 template<>
785 inline void gemm<float>(const char *ta,const char *tb,
786 const int *n, const int *k, const int *l,
787 const float *alpha,
788 const float *A,const int *lda,
789 const float *B, const int *ldb,
790 const float *beta,
791 float *C, const int *ldc) {
792 if (Gblas::timekeeping) {
793 clock_t start = clock();
794 sgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
795 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
796 }
797 else {
798 sgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
799 }
800 }
801
802 template<>
803 inline void pptrf<float>(const char *uplo,const int *n,
804 float* ap, int *info) {
805 if (Gblas::timekeeping) {
806 clock_t start = clock();
807 spptrf_(uplo,n,ap,info);
808 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
809 }
810 else {
811 spptrf_(uplo,n,ap,info);
812 }
813 }
814
815 template<>
816 inline void spgst<float>(const int *itype, const char *uplo,
817 const int *n,
818 float* ap,const float *bp,int *info) {
819 if (Gblas::timekeeping) {
820 clock_t start = clock();
821 sspgst_(itype,uplo,n,ap,bp,info);
822 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
823 }
824 else {
825 sspgst_(itype,uplo,n,ap,bp,info);
826 }
827 }
828
829 template<>
830 inline void tptri<float>(const char *uplo,const char *diag,
831 const int *n,
832 float* ap,int *info) {
833 if (Gblas::timekeeping) {
834 clock_t start = clock();
835 stptri_(uplo,diag,n,ap,info);
836 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
837 }
838 else {
839 stptri_(uplo,diag,n,ap,info);
840 }
841 }
842
843 template<>
844 inline void trmm<float>(const char *side,const char *uplo,
845 const char *transa,
846 const char *diag,const int *m,const int *n,
847 const float *alpha,
848 const float *A,const int *lda,
849 float *B,const int *ldb) {
850 if (Gblas::timekeeping) {
851 clock_t start = clock();
852 strmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
853 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
854 }
855 else {
856 strmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
857 }
858 }
859
860 template<>
861 inline void sygv<float>(const int *itype,const char *jobz,
862 const char *uplo,const int *n,
863 float *A,const int *lda,
864 float *B,const int *ldb,
865 float* w,float* work,
866 const int *lwork,int *info) {
867 if (Gblas::timekeeping) {
868 clock_t start = clock();
869 ssygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
870 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
871 }
872 else {
873 ssygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
874 }
875 }
876
877 template<>
878 inline void ggev<float>(const char *jobbl, const char *jobvr,
879 const int *n, float *A, const int *lda,
880 float *B, const int *ldb, float *alphar,
881 float *alphai, float *beta, float *vl,
882 const int *ldvl, float *vr, const int *ldvr,
883 float *work, const int *lwork, int *info) {
884 if (Gblas::timekeeping) {
885 clock_t start = clock();
886 sggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
887 ldvl, vr, ldvr, work, lwork, info);
888 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
889 }
890 else {
891 sggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
892 ldvl, vr, ldvr, work, lwork, info);
893 }
894 }
895
896
897 template<>
898 inline void potrf<float>(const char *uplo, const int *n, float *A,
899 const int *lda, int *info) {
900 if (Gblas::timekeeping) {
901 clock_t start = clock();
902 spotrf_(uplo, n, A, lda, info);
903 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
904 }
905 else {
906 spotrf_(uplo, n, A, lda, info);
907 }
908 }
909
910 template<>
911 inline void trtri<float>(const char *uplo,const char *diag,const int *n,
912 float *A, const int *lda, int *info) {
913 if (Gblas::timekeeping) {
914 clock_t start = clock();
915 strtri_(uplo, diag, n, A, lda, info);
916 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
917 }
918 else {
919 strtri_(uplo, diag, n, A, lda, info);
920 }
921 }
922
923 template<>
924 inline void syrk<float>(const char *uplo, const char *trans,
925 const int *n, const int *k, const float *alpha,
926 const float *A, const int *lda,
927 const float *beta, float *C, const int *ldc) {
928 if (Gblas::timekeeping) {
929 clock_t start = clock();
930 ssyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
931 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
932 }
933 else {
934 ssyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
935 }
936 }
937
938 template<>
939 inline void symm<float>(const char *side,const char *uplo,
940 const int *m,const int *n, const float *alpha,
941 const float *A,const int *lda,
942 const float *B,const int *ldb,
943 const float* beta,
944 float *C,const int *ldc) {
945 if (Gblas::timekeeping) {
946 clock_t start = clock();
947 ssymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
948 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
949 }
950 else {
951 ssymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
952 }
953 }
954
955 template<>
956 inline void pocon<float>(const char *uplo, const int *n,
957 const float *A, const int *lda,
958 const float *anorm, float *rcond,
959 float *work, int *iwork, int *info) {
960 if (Gblas::timekeeping) {
961 clock_t start = clock();
962 spocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
963 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
964 }
965 else {
966 spocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
967 }
968 }
969
970 template<>
971 inline void stevx<float>(const char *jobz, const char *range,
972 const int *n, float *d, float *e,
973 const float *vl,
974 const float *vu, const int *il, const int *iu,
975 const float *abstol, int *m, float *w,
976 float *z,
977 const int *ldz, float *work, int *iwork,
978 int *ifail, int *info) {
979 if (Gblas::timekeeping) {
980 clock_t start = clock();
981 sstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
982 work, iwork, ifail, info);
983 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
984 }
985 else {
986 sstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
987 work, iwork, ifail, info);
988 }
989 }
990
991 template<>
992 inline void stevr<float>(const char *jobz, const char *range,
993 const int *n, float *d, float *e,
994 const float *vl, const float *vu,
995 const int *il, const int *iu,
996 const float *abstol,
997 int *m, float *w,
998 float *z, const int *ldz, int* isuppz,
999 float *work, int* lwork,
1000 int *iwork, int* liwork, int *info) {
1001 if (Gblas::timekeeping) {
1002 clock_t start = clock();
1003 sstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
1004 m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
1005 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1006 }
1007 else {
1008 sstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
1009 m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
1010 }
1011 }
1012
1013 template<>
1014 inline void syev<float>(const char *jobz, const char *uplo, const int *n,
1015 float *a, const int *lda, float *w,
1016 float *work, const int *lwork, int *info) {
1017 if (Gblas::timekeeping) {
1018 clock_t start = clock();
1019 ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
1020 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1021 }
1022 else {
1023 ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
1024 }
1025 }
1026
1027
1028 /* LEVEL 2 */
1029 template<>
1030 inline void gemv<float>(const char *ta, const int *m, const int *n,
1031 const float *alpha, const float *A,
1032 const int *lda,
1033 const float *x, const int *incx,
1034 const float *beta, float *y, const int *incy) {
1035 if (Gblas::timekeeping) {
1036 clock_t start = clock();
1037 sgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
1038 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1039 }
1040 else {
1041 sgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
1042 }
1043 }
1044
1045 template<>
1046 inline void symv<float>(const char *uplo, const int *n,
1047 const float *alpha, const float *A,
1048 const int *lda, const float *x,
1049 const int *incx, const float *beta,
1050 float *y, const int *incy) {
1051 if (Gblas::timekeeping) {
1052 clock_t start = clock();
1053 ssymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
1054 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1055 }
1056 else {
1057 ssymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
1058 }
1059 }
1060
1061 template<>
1062 inline void trmv<float>(const char *uplo, const char *trans,
1063 const char *diag, const int *n,
1064 const float *A, const int *lda,
1065 float *x, const int *incx) {
1066 if (Gblas::timekeeping) {
1067 clock_t start = clock();
1068 strmv_(uplo, trans, diag, n, A, lda, x, incx);
1069 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1070 }
1071 else {
1072 strmv_(uplo, trans, diag, n, A, lda, x, incx);
1073 }
1074 }
1075
1076 /* LEVEL 1 */
1077 template<>
1078 inline void scal<float>(const int* n,const float* da, float* dx,
1079 const int* incx) {
1080 if (Gblas::timekeeping) {
1081 clock_t start = clock();
1082 sscal_(n, da, dx, incx);
1083 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1084 }
1085 else {
1086 sscal_(n, da, dx, incx);
1087 }
1088 }
1089#if 0
1090 // Sdot has different return type in different BLAS implementations
1091 // Therefore the specialization for single precision is removed
1092 template<>
1093 inline float dot<float>(const int* n, const float* dx, const int* incx,
1094 const float* dy, const int* incy) {
1095 float tmp;
1096 if (Gblas::timekeeping) {
1097 clock_t start = clock();
1098 sdot_(n, dx, incx, dy, incy);
1099 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1100 }
1101 else {
1102 sdot_(n, dx, incx, dy, incy);
1103 }
1104 return tmp;
1105 }
1106#endif
1107
1108 template<>
1109 inline void axpy<float>(const int* n, const float* da, const float* dx,
1110 const int* incx, float* dy,const int* incy) {
1111 if (Gblas::timekeeping) {
1112 clock_t start = clock();
1113 saxpy_(n, da, dx, incx, dy, incy);
1114 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1115 }
1116 else {
1117 saxpy_(n, da, dx, incx, dy, incy);
1118 }
1119 }
1120
1121 /* END OF SPECIALIZATIONS */
1122#endif
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132 /* Other */
1133
1134 template<class Treal>
1135 static void fulltopacked(const Treal* full, Treal* packed, const int size){
1136 int pind=0;
1137 for (int col=0;col<size;col++)
1138 {
1139 for(int row=0;row<=col;row++)
1140 {
1141 packed[pind]=full[col*size+row];
1142 pind++;
1143 }
1144 }
1145 }
1146
1147 template<class Treal>
1148 static void packedtofull(const Treal* packed, Treal* full, const int size){
1149 int psize=(size+1)*size/2;
1150 int col=0;
1151 int row=0;
1152 for(int pind=0;pind<psize;pind++)
1153 {
1154 if (col==row)
1155 {
1156 full[col*size+row]=packed[pind];
1157 col++;
1158 row=0;
1159 }
1160 else
1161 {
1162 full[col*size+row]=packed[pind];
1163 full[row*size+col]=packed[pind];
1164 row++;
1165 }
1166 }
1167 }
1168
1169 template<class Treal>
1170 static void tripackedtofull(const Treal* packed,Treal* full,
1171 const int size) {
1172 int psize=(size+1)*size/2;
1173 int col=0;
1174 int row=0;
1175 for(int pind=0;pind<psize;pind++)
1176 {
1177 if (col==row)
1178 {
1179 full[col*size+row]=packed[pind];
1180 col++;
1181 row=0;
1182 }
1183 else
1184 {
1185 full[col*size+row]=packed[pind];
1186 full[row*size+col]=0;
1187 row++;
1188 }
1189 }
1190 }
1191
1192 template<class Treal>
1193 static void trifulltofull(Treal* trifull, const int size) {
1194 for(int col = 0; col < size - 1; col++)
1195 for(int row = col + 1; row < size; row++)
1196 trifull[col * size + row] = 0;
1197 }
1198
1199
1200} /* namespace mat */
1201
1202#endif /* GBLAS */
The Failure class is used for exception handling.
Code for managing aligned memory buffers, used if SSE intrinsics enabled.
Generalized matrix matrix multiplication using SSE intrinsics.
#define B
#define A
void sscal_(const int *n, const float *da, float *dx, const int *incx)
void strmm_(const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const float *alpha, const float *A, const int *lda, float *B, const int *ldb)
void ssygv_(const int *itype, const char *jobz, const char *uplo, const int *n, float *A, const int *lda, float *B, const int *ldb, float *w, float *work, const int *lwork, int *info)
void ssyrk_(const char *uplo, const char *trans, const int *n, const int *k, const float *alpha, const float *A, const int *lda, const float *beta, float *C, const int *ldc)
void dtptri_(const char *uplo, const char *diag, const int *n, double *ap, int *info)
void ssymv_(const char *uplo, const int *n, const float *alpha, const float *A, const int *lda, const float *x, const int *incx, const float *beta, float *y, const int *incy)
void spocon_(const char *uplo, const int *n, const float *A, const int *lda, const float *anorm, float *rcond, float *work, int *iwork, int *info)
void dtrmv_(const char *uplo, const char *trans, const char *diag, const int *n, const double *A, const int *lda, double *x, const int *incx)
void dscal_(const int *n, const double *da, double *dx, const int *incx)
void sggev_(const char *jobbl, const char *jobvr, const int *n, float *A, const int *lda, float *B, const int *ldb, float *alphar, float *alphai, float *beta, float *vl, const int *ldvl, float *vr, const int *ldvr, float *work, const int *lwork, int *info)
double ddot_(const int *n, const double *dx, const int *incx, const double *dy, const int *incy)
void sstevr_(const char *jobz, const char *range, const int *n, float *d, float *e, const float *vl, const float *vu, const int *il, const int *iu, const float *abstol, int *m, float *w, float *z, const int *ldz, int *isuppz, float *work, int *lwork, int *iwork, int *liwork, int *info)
void sgemv_(const char *ta, const int *m, const int *n, const float *alpha, const float *A, const int *lda, const float *x, const int *incx, const float *beta, float *y, const int *incy)
void sstevx_(const char *jobz, const char *range, const int *n, float *d, float *e, const float *vl, const float *vu, const int *il, const int *iu, const float *abstol, int *m, float *w, float *z, const int *ldz, float *work, int *iwork, int *ifail, int *info)
void dtrmm_(const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const double *alpha, const double *A, const int *lda, double *B, const int *ldb)
void dtrtri_(const char *uplo, const char *diag, const int *n, double *A, const int *lda, int *info)
void strtri_(const char *uplo, const char *diag, const int *n, float *A, const int *lda, int *info)
void stptri_(const char *uplo, const char *diag, const int *n, float *ap, int *info)
void dpptrf_(const char *uplo, const int *n, double *ap, int *info)
void spptrf_(const char *uplo, const int *n, float *ap, int *info)
void strmv_(const char *uplo, const char *trans, const char *diag, const int *n, const float *A, const int *lda, float *x, const int *incx)
void dsymv_(const char *uplo, const int *n, const double *alpha, const double *A, const int *lda, const double *x, const int *incx, const double *beta, double *y, const int *incy)
void dggev_(const char *jobbl, const char *jobvr, const int *n, double *A, const int *lda, double *B, const int *ldb, double *alphar, double *alphai, double *beta, double *vl, const int *ldvl, double *vr, const int *ldvr, double *work, const int *lwork, int *info)
void dpocon_(const char *uplo, const int *n, const double *A, const int *lda, const double *anorm, double *rcond, double *work, int *iwork, int *info)
void saxpy_(const int *n, const float *da, const float *dx, const int *incx, float *dy, const int *incy)
void sspgst_(const int *itype, const char *uplo, const int *n, float *ap, const float *bp, int *info)
void dsyrk_(const char *uplo, const char *trans, const int *n, const int *k, const double *alpha, const double *A, const int *lda, const double *beta, double *C, const int *ldc)
void dstevr_(const char *jobz, const char *range, const int *n, double *d, double *e, const double *vl, const double *vu, const int *il, const int *iu, const double *abstol, int *m, double *w, double *z, const int *ldz, int *isuppz, double *work, int *lwork, int *iwork, int *liwork, int *info)
void spotrf_(const char *uplo, const int *n, float *A, const int *lda, int *info)
void ssymm_(const char *side, const char *uplo, const int *m, const int *n, const float *alpha, const float *A, const int *lda, const float *B, const int *ldb, const float *beta, float *C, const int *ldc)
void dspgst_(const int *itype, const char *uplo, const int *n, double *ap, const double *bp, int *info)
void sgemm_(const char *ta, const char *tb, const int *n, const int *k, const int *l, const float *alpha, const float *A, const int *lda, const float *B, const int *ldb, const float *beta, float *C, const int *ldc)
void daxpy_(const int *n, const double *da, const double *dx, const int *incx, double *dy, const int *incy)
void dpotrf_(const char *uplo, const int *n, double *A, const int *lda, int *info)
void dsyev_(const char *jobz, const char *uplo, const int *n, double *a, const int *lda, double *w, double *work, const int *lwork, int *info)
void dsymm_(const char *side, const char *uplo, const int *m, const int *n, const double *alpha, const double *A, const int *lda, const double *B, const int *ldb, const double *beta, double *C, const int *ldc)
void dgemm_(const char *ta, const char *tb, const int *n, const int *k, const int *l, const double *alpha, const double *A, const int *lda, const double *B, const int *ldb, const double *beta, double *C, const int *ldc)
void dsygv_(const int *itype, const char *jobz, const char *uplo, const int *n, double *A, const int *lda, double *B, const int *ldb, double *w, double *work, const int *lwork, int *info)
void dgemv_(const char *ta, const int *m, const int *n, const double *alpha, const double *A, const int *lda, const double *x, const int *incx, const double *beta, double *y, const int *incy)
void dstevx_(const char *jobz, const char *range, const int *n, double *d, double *e, const double *vl, const double *vu, const int *il, const int *iu, const double *abstol, int *m, double *w, double *z, const int *ldz, double *work, int *iwork, int *ifail, int *info)
void ssyev_(const char *jobz, const char *uplo, const int *n, float *a, const int *lda, float *w, float *work, const int *lwork, int *info)
Definition allocate.cc:39
void ggev< float >(const char *jobbl, const char *jobvr, const int *n, float *A, const int *lda, float *B, const int *ldb, float *alphar, float *alphai, float *beta, float *vl, const int *ldvl, float *vr, const int *ldvr, float *work, const int *lwork, int *info)
Definition mat_gblas.h:878
void stevr< float >(const char *jobz, const char *range, const int *n, float *d, float *e, const float *vl, const float *vu, const int *il, const int *iu, const float *abstol, int *m, float *w, float *z, const int *ldz, int *isuppz, float *work, int *lwork, int *iwork, int *liwork, int *info)
Definition mat_gblas.h:992
void scal< double >(const int *n, const double *da, double *dx, const int *incx)
Definition mat_gblas.h:742
void tptri< float >(const char *uplo, const char *diag, const int *n, float *ap, int *info)
Definition mat_gblas.h:830
static void packedtofull(const Treal *packed, Treal *full, const int size)
Definition mat_gblas.h:1148
static void sygv(const int *itype, const char *jobz, const char *uplo, const int *n, T *A, const int *lda, T *B, const int *ldb, T *w, T *work, const int *lwork, int *info)
Definition mat_gblas.h:293
void gemv< double >(const char *ta, const int *m, const int *n, const double *alpha, const double *A, const int *lda, const double *x, const int *incx, const double *beta, double *y, const int *incy)
Definition mat_gblas.h:693
void trmm< double >(const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const double *alpha, const double *A, const int *lda, double *B, const int *ldb)
Definition mat_gblas.h:505
void sygv< float >(const int *itype, const char *jobz, const char *uplo, const int *n, float *A, const int *lda, float *B, const int *ldb, float *w, float *work, const int *lwork, int *info)
Definition mat_gblas.h:861
void trtri< float >(const char *uplo, const char *diag, const int *n, float *A, const int *lda, int *info)
Definition mat_gblas.h:911
static void syev(const char *jobz, const char *uplo, const int *n, T *a, const int *lda, T *w, T *work, const int *lwork, int *info)
Definition mat_gblas.h:382
static void gemm(const char *ta, const char *tb, const int *n, const int *k, const int *l, const T *alpha, const T *A, const int *lda, const T *B, const int *ldb, const T *beta, T *C, const int *ldc)
Definition mat_gblas.h:232
static void trtri(const char *uplo, const char *diag, const int *n, T *A, const int *lda, int *info)
Definition mat_gblas.h:321
void symv< double >(const char *uplo, const int *n, const double *alpha, const double *A, const int *lda, const double *x, const int *incx, const double *beta, double *y, const int *incy)
Definition mat_gblas.h:709
side
Definition Matrix.h:75
void sygv< double >(const int *itype, const char *jobz, const char *uplo, const int *n, double *A, const int *lda, double *B, const int *ldb, double *w, double *work, const int *lwork, int *info)
Definition mat_gblas.h:522
static void ggev(const char *jobbl, const char *jobvr, const int *n, T *A, const int *lda, T *B, const int *ldb, T *alphar, T *alphai, T *beta, T *vl, const int *ldvl, T *vr, const int *ldvr, T *work, const int *lwork, int *info)
Definition mat_gblas.h:301
void potrf< float >(const char *uplo, const int *n, float *A, const int *lda, int *info)
Definition mat_gblas.h:898
void spgst< double >(const int *itype, const char *uplo, const int *n, double *ap, const double *bp, int *info)
Definition mat_gblas.h:478
void pocon< double >(const char *uplo, const int *n, const double *A, const int *lda, const double *anorm, double *rcond, double *work, int *iwork, int *info)
Definition mat_gblas.h:617
void stevx< double >(const char *jobz, const char *range, const int *n, double *d, double *e, const double *vl, const double *vu, const int *il, const int *iu, const double *abstol, int *m, double *w, double *z, const int *ldz, double *work, int *iwork, int *ifail, int *info)
Definition mat_gblas.h:632
void tptri< double >(const char *uplo, const char *diag, const int *n, double *ap, int *info)
Definition mat_gblas.h:492
static void stevr(const char *jobz, const char *range, const int *n, T *d, T *e, const T *vl, const T *vu, const int *il, const int *iu, const T *abstol, int *m, T *w, T *z, const int *ldz, int *isuppz, T *work, int *lwork, int *iwork, int *liwork, int *info)
Definition mat_gblas.h:369
void axpy< double >(const int *n, const double *da, const double *dx, const int *incx, double *dy, const int *incy)
Definition mat_gblas.h:770
static void syrk(const char *uplo, const char *trans, const int *n, const int *k, const T *alpha, const T *A, const int *lda, const T *beta, T *C, const int *ldc)
Definition mat_gblas.h:334
void scal< float >(const int *n, const float *da, float *dx, const int *incx)
Definition mat_gblas.h:1078
void ggev< double >(const char *jobbl, const char *jobvr, const int *n, double *A, const int *lda, double *B, const int *ldb, double *alphar, double *alphai, double *beta, double *vl, const int *ldvl, double *vr, const int *ldvr, double *work, const int *lwork, int *info)
Definition mat_gblas.h:539
static void pptrf(const char *uplo, const int *n, T *ap, int *info)
Definition mat_gblas.h:263
void gemm< float >(const char *ta, const char *tb, const int *n, const int *k, const int *l, const float *alpha, const float *A, const int *lda, const float *B, const int *ldb, const float *beta, float *C, const int *ldc)
Definition mat_gblas.h:785
void axpy< float >(const int *n, const float *da, const float *dx, const int *incx, float *dy, const int *incy)
Definition mat_gblas.h:1109
void syev< double >(const char *jobz, const char *uplo, const int *n, double *a, const int *lda, double *w, double *work, const int *lwork, int *info)
Definition mat_gblas.h:677
void pptrf< double >(const char *uplo, const int *n, double *ap, int *info)
Definition mat_gblas.h:465
static void scal(const int *n, const T *da, T *dx, const int *incx)
Definition mat_gblas.h:419
void syrk< float >(const char *uplo, const char *trans, const int *n, const int *k, const float *alpha, const float *A, const int *lda, const float *beta, float *C, const int *ldc)
Definition mat_gblas.h:924
static void symv(const char *uplo, const int *n, const T *alpha, const T *A, const int *lda, const T *x, const int *incx, const T *beta, T *y, const int *incy)
Definition mat_gblas.h:400
void gemv< float >(const char *ta, const int *m, const int *n, const float *alpha, const float *A, const int *lda, const float *x, const int *incx, const float *beta, float *y, const int *incy)
Definition mat_gblas.h:1030
void pocon< float >(const char *uplo, const int *n, const float *A, const int *lda, const float *anorm, float *rcond, float *work, int *iwork, int *info)
Definition mat_gblas.h:956
void symv< float >(const char *uplo, const int *n, const float *alpha, const float *A, const int *lda, const float *x, const int *incx, const float *beta, float *y, const int *incy)
Definition mat_gblas.h:1046
void trtri< double >(const char *uplo, const char *diag, const int *n, double *A, const int *lda, int *info)
Definition mat_gblas.h:572
static void axpy(const int *n, const T *da, const T *dx, const int *incx, T *dy, const int *incy)
Definition mat_gblas.h:431
static void stevx(const char *jobz, const char *range, const int *n, T *d, T *e, const T *vl, const T *vu, const int *il, const int *iu, const T *abstol, int *m, T *w, T *z, const int *ldz, T *work, int *iwork, int *ifail, int *info)
Definition mat_gblas.h:358
void spgst< float >(const int *itype, const char *uplo, const int *n, float *ap, const float *bp, int *info)
Definition mat_gblas.h:816
void pptrf< float >(const char *uplo, const int *n, float *ap, int *info)
Definition mat_gblas.h:803
static void tripackedtofull(const Treal *packed, Treal *full, const int size)
Definition mat_gblas.h:1170
void trmm< float >(const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const float *alpha, const float *A, const int *lda, float *B, const int *ldb)
Definition mat_gblas.h:844
void trmv< float >(const char *uplo, const char *trans, const char *diag, const int *n, const float *A, const int *lda, float *x, const int *incx)
Definition mat_gblas.h:1062
void syrk< double >(const char *uplo, const char *trans, const int *n, const int *k, const double *alpha, const double *A, const int *lda, const double *beta, double *C, const int *ldc)
Definition mat_gblas.h:585
static void tptri(const char *uplo, const char *diag, const int *n, T *ap, int *info)
Definition mat_gblas.h:275
static void trmm(const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const T *alpha, const T *A, const int *lda, T *B, const int *ldb)
Definition mat_gblas.h:281
static void spgst(const int *itype, const char *uplo, const int *n, T *ap, const T *bp, int *info)
Definition mat_gblas.h:268
void stevx< float >(const char *jobz, const char *range, const int *n, float *d, float *e, const float *vl, const float *vu, const int *il, const int *iu, const float *abstol, int *m, float *w, float *z, const int *ldz, float *work, int *iwork, int *ifail, int *info)
Definition mat_gblas.h:971
void gemm< double >(const char *ta, const char *tb, const int *n, const int *k, const int *l, const double *alpha, const double *A, const int *lda, const double *B, const int *ldb, const double *beta, double *C, const int *ldc)
Definition mat_gblas.h:447
void trmv< double >(const char *uplo, const char *trans, const char *diag, const int *n, const double *A, const int *lda, double *x, const int *incx)
Definition mat_gblas.h:725
static T dot(const int *n, const T *dx, const int *incx, const T *dy, const int *incy)
Definition mat_gblas.h:425
void potrf< double >(const char *uplo, const int *n, double *A, const int *lda, int *info)
Definition mat_gblas.h:559
static void fulltopacked(const Treal *full, Treal *packed, const int size)
Definition mat_gblas.h:1135
void stevr< double >(const char *jobz, const char *range, const int *n, double *d, double *e, const double *vl, const double *vu, const int *il, const int *iu, const double *abstol, int *m, double *w, double *z, const int *ldz, int *isuppz, double *work, int *lwork, int *iwork, int *liwork, int *info)
Definition mat_gblas.h:653
static void symm(const char *side, const char *uplo, const int *m, const int *n, const T *alpha, const T *A, const int *lda, const T *B, const int *ldb, const T *beta, T *C, const int *ldc)
Definition mat_gblas.h:342
double dot< double >(const int *n, const double *dx, const int *incx, const double *dy, const int *incy)
Definition mat_gblas.h:755
static void potrf(const char *uplo, const int *n, T *A, const int *lda, int *info)
Definition mat_gblas.h:314
void symm< float >(const char *side, const char *uplo, const int *m, const int *n, const float *alpha, const float *A, const int *lda, const float *B, const int *ldb, const float *beta, float *C, const int *ldc)
Definition mat_gblas.h:939
static void pocon(const char *uplo, const int *n, const T *A, const int *lda, const T *anorm, T *rcond, T *work, int *iwork, int *info)
Definition mat_gblas.h:351
void symm< double >(const char *side, const char *uplo, const int *m, const int *n, const double *alpha, const double *A, const int *lda, const double *B, const int *ldb, const double *beta, double *C, const int *ldc)
Definition mat_gblas.h:600
void syev< float >(const char *jobz, const char *uplo, const int *n, float *a, const int *lda, float *w, float *work, const int *lwork, int *info)
Definition mat_gblas.h:1014
static void trifulltofull(Treal *trifull, const int size)
Definition mat_gblas.h:1193
static void trmv(const char *uplo, const char *trans, const char *diag, const int *n, const T *A, const int *lda, T *x, const int *incx)
Definition mat_gblas.h:409
static void gemv(const char *ta, const int *m, const int *n, const T *alpha, const T *A, const int *lda, const T *x, const int *incx, const T *beta, T *y, const int *incy)
Definition mat_gblas.h:391
Definition mat_gblas.h:225
static bool timekeeping
Definition mat_gblas.h:227
static float time
Definition mat_gblas.h:226
int template_blas_axpy(const integer *n, const Treal *da, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition template_blas_axpy.h:43
Treal template_blas_dot(const integer *n, const Treal *dx, const integer *incx, const Treal *dy, const integer *incy)
Definition template_blas_dot.h:43
int template_blas_gemm(const char *transa, const char *transb, const integer *m, const integer *n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda, const Treal *b, const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc)
Definition template_blas_gemm.h:43
int template_blas_gemv(const char *trans, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, const Treal *x, const integer *incx, const Treal *beta, Treal *y, const integer *incy)
Definition template_blas_gemv.h:43
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition template_blas_scal.h:43
int template_blas_symm(const char *side, const char *uplo, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, const Treal *b, const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc)
Definition template_blas_symm.h:42
int template_blas_symv(const char *uplo, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, const Treal *x, const integer *incx, const Treal *beta, Treal *y, const integer *incy)
Definition template_blas_symv.h:42
int template_blas_syrk(const char *uplo, const char *trans, const integer *n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda, const Treal *beta, Treal *c__, const integer *ldc)
Definition template_blas_syrk.h:42
int template_blas_trmm(const char *side, const char *uplo, const char *transa, const char *diag, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, Treal *b, const integer *ldb)
Definition template_blas_trmm.h:43
int template_blas_trmv(const char *uplo, const char *trans, const char *diag, const integer *n, const Treal *a, const integer *lda, Treal *x, const integer *incx)
Definition template_blas_trmv.h:42
int template_lapack_ggev(const char *jobvl, const char *jobvr, const integer *n, Treal *a, const integer *lda, Treal *b, const integer *ldb, Treal *alphar, Treal *alphai, Treal *beta, Treal *vl, const integer *ldvl, Treal *vr, const integer *ldvr, Treal *work, const integer *lwork, integer *info)
Definition template_lapack_ggev.h:42
int template_lapack_pocon(const char *uplo, const integer *n, const Treal *a, const integer *lda, const Treal *anorm, Treal *rcond, Treal *work, integer *iwork, integer *info)
Definition template_lapack_pocon.h:42
int template_lapack_potrf(const char *uplo, const integer *n, Treal *a, const integer *lda, integer *info)
Definition template_lapack_potrf.h:43
int template_lapack_pptrf(const char *uplo, const integer *n, Treal *ap, integer *info)
Definition template_lapack_pptrf.h:43
int template_lapack_spgst(const integer *itype, const char *uplo, const integer *n, Treal *ap, const Treal *bp, integer *info)
Definition template_lapack_spgst.h:43
int template_lapack_stevr(const char *jobz, const char *range, const integer *n, Treal *d__, Treal *e, const Treal *vl, const Treal *vu, const integer *il, const integer *iu, const Treal *abstol, integer *m, Treal *w, Treal *z__, const integer *ldz, integer *isuppz, Treal *work, integer *lwork, integer *iwork, integer *liwork, integer *info)
Definition template_lapack_stevr.h:41
int template_lapack_stevx(const char *jobz, const char *range, const integer *n, Treal *d__, Treal *e, const Treal *vl, const Treal *vu, const integer *il, const integer *iu, const Treal *abstol, integer *m, Treal *w, Treal *z__, const integer *ldz, Treal *work, integer *iwork, integer *ifail, integer *info)
Definition template_lapack_stevx.h:42
int template_lapack_syev(const char *jobz, const char *uplo, const integer *n, Treal *a, const integer *lda, Treal *w, Treal *work, const integer *lwork, integer *info)
Definition template_lapack_syev.h:42
int template_lapack_sygv(const integer *itype, const char *jobz, const char *uplo, const integer *n, Treal *a, const integer *lda, Treal *b, const integer *ldb, Treal *w, Treal *work, const integer *lwork, integer *info)
Definition template_lapack_sygv.h:42
int template_lapack_tptri(const char *uplo, const char *diag, const integer *n, Treal *ap, integer *info)
Definition template_lapack_tptri.h:43
int template_lapack_trtri(const char *uplo, const char *diag, const integer *n, Treal *a, const integer *lda, integer *info)
Definition template_lapack_trtri.h:42