Actual source code: ex66.c
1: static char help[] = "Tests MATH2OPUS\n\n";
3: #include <petscmat.h>
4: #include <petscsf.h>
6: static PetscScalar GenEntry_Symm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
7: {
8: PetscInt d;
9: PetscReal clength = sdim == 3 ? 0.2 : 0.1;
10: PetscReal dist, diff = 0.0;
12: for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
13: dist = PetscSqrtReal(diff);
14: return PetscExpReal(-dist / clength);
15: }
17: static PetscScalar GenEntry_Unsymm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
18: {
19: PetscInt d;
20: PetscReal clength = sdim == 3 ? 0.2 : 0.1;
21: PetscReal dist, diff = 0.0, nx = 0.0, ny = 0.0;
23: for (d = 0; d < sdim; d++) { nx += x[d]*x[d]; }
24: for (d = 0; d < sdim; d++) { ny += y[d]*y[d]; }
25: for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
26: dist = PetscSqrtReal(diff);
27: return nx > ny ? PetscExpReal(-dist / clength) : PetscExpReal(-dist / clength) + 1.;
28: }
30: int main(int argc,char **argv)
31: {
32: Mat A,B,C,D;
33: Vec v,x,y,Ax,Ay,Bx,By;
34: PetscRandom r;
35: PetscLayout map;
36: PetscScalar *Adata = NULL, *Cdata = NULL, scale = 1.0;
37: PetscReal *coords,nA,nD,nB,err,nX,norms[3];
38: PetscInt N, n = 64, dim = 1, i, j, nrhs = 11, lda = 0, ldc = 0, ldu = 0, nlr = 7, nt, ntrials = 2;
39: PetscMPIInt size,rank;
40: PetscBool testlayout = PETSC_FALSE, flg, symm = PETSC_FALSE, Asymm = PETSC_TRUE, kernel = PETSC_TRUE;
41: PetscBool checkexpl = PETSC_FALSE, agpu = PETSC_FALSE, bgpu = PETSC_FALSE, cgpu = PETSC_FALSE, flgglob;
42: PetscBool testtrans, testnorm, randommat = PETSC_TRUE, testorthog, testcompress, testhlru;
43: void (*approxnormfunc)(void);
44: void (*Anormfunc)(void);
46: #if defined(PETSC_HAVE_MPI_INIT_THREAD)
47: PETSC_MPI_THREAD_REQUIRED = MPI_THREAD_MULTIPLE;
48: #endif
49: PetscInitialize(&argc,&argv,(char*) 0,help);
50: PetscOptionsGetInt(NULL,NULL,"-ng",&N,&flgglob);
51: PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
52: PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);
53: PetscOptionsGetInt(NULL,NULL,"-dim",&dim,NULL);
54: PetscOptionsGetInt(NULL,NULL,"-lda",&lda,NULL);
55: PetscOptionsGetInt(NULL,NULL,"-ldc",&ldc,NULL);
56: PetscOptionsGetInt(NULL,NULL,"-nlr",&nlr,NULL);
57: PetscOptionsGetInt(NULL,NULL,"-ldu",&ldu,NULL);
58: PetscOptionsGetInt(NULL,NULL,"-matmattrials",&ntrials,NULL);
59: PetscOptionsGetBool(NULL,NULL,"-randommat",&randommat,NULL);
60: if (!flgglob) PetscOptionsGetBool(NULL,NULL,"-testlayout",&testlayout,NULL);
61: PetscOptionsGetBool(NULL,NULL,"-Asymm",&Asymm,NULL);
62: PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);
63: PetscOptionsGetBool(NULL,NULL,"-kernel",&kernel,NULL);
64: PetscOptionsGetBool(NULL,NULL,"-checkexpl",&checkexpl,NULL);
65: PetscOptionsGetBool(NULL,NULL,"-agpu",&agpu,NULL);
66: PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);
67: PetscOptionsGetBool(NULL,NULL,"-cgpu",&cgpu,NULL);
68: PetscOptionsGetScalar(NULL,NULL,"-scale",&scale,NULL);
69: if (!Asymm) symm = PETSC_FALSE;
71: MPI_Comm_size(PETSC_COMM_WORLD,&size);
73: /* Disable tests for unimplemented variants */
74: testtrans = (PetscBool)(size == 1 || symm);
75: testnorm = (PetscBool)(size == 1 || symm);
76: testorthog = (PetscBool)(size == 1 || symm);
77: testcompress = (PetscBool)(size == 1 || symm);
78: testhlru = (PetscBool)(size == 1);
80: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
81: PetscLayoutCreate(PETSC_COMM_WORLD,&map);
82: if (testlayout) {
83: if (rank%2) n = PetscMax(2*n-5*rank,0);
84: else n = 2*n+rank;
85: }
86: if (!flgglob) {
87: PetscLayoutSetLocalSize(map,n);
88: PetscLayoutSetUp(map);
89: PetscLayoutGetSize(map,&N);
90: } else {
91: PetscLayoutSetSize(map,N);
92: PetscLayoutSetUp(map);
93: PetscLayoutGetLocalSize(map,&n);
94: }
95: PetscLayoutDestroy(&map);
97: if (lda) {
98: PetscMalloc1(N*(n+lda),&Adata);
99: }
100: MatCreateDense(PETSC_COMM_WORLD,n,n,N,N,Adata,&A);
101: MatDenseSetLDA(A,n+lda);
103: /* Create random points; these are replicated in order to populate a dense matrix and to compare sequential and dense runs
104: The constructor for MATH2OPUS can take as input the distributed coordinates and replicates them internally in case
105: a kernel construction is requested */
106: PetscRandomCreate(PETSC_COMM_WORLD,&r);
107: PetscRandomSetFromOptions(r);
108: PetscRandomSetSeed(r,123456);
109: PetscRandomSeed(r);
110: PetscMalloc1(N*dim,&coords);
111: PetscRandomGetValuesReal(r,N*dim,coords);
112: PetscRandomDestroy(&r);
114: if (kernel || !randommat) {
115: MatH2OpusKernel k = Asymm ? GenEntry_Symm : GenEntry_Unsymm;
116: PetscInt ist,ien;
118: MatGetOwnershipRange(A,&ist,&ien);
119: for (i = ist; i < ien; i++) {
120: for (j = 0; j < N; j++) {
121: MatSetValue(A,i,j,(*k)(dim,coords + i*dim,coords + j*dim,NULL),INSERT_VALUES);
122: }
123: }
124: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
125: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
126: if (kernel) {
127: MatCreateH2OpusFromKernel(PETSC_COMM_WORLD,n,n,N,N,dim,coords + ist*dim,PETSC_TRUE,k,NULL,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);
128: } else {
129: MatCreateH2OpusFromMat(A,dim,coords + ist*dim,PETSC_TRUE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);
130: }
131: } else {
132: PetscInt ist;
134: MatGetOwnershipRange(A,&ist,NULL);
135: MatSetRandom(A,NULL);
136: if (Asymm) {
137: MatTranspose(A,MAT_INITIAL_MATRIX,&B);
138: MatAXPY(A,1.0,B,SAME_NONZERO_PATTERN);
139: MatDestroy(&B);
140: MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
141: }
142: MatCreateH2OpusFromMat(A,dim,coords + ist*dim,PETSC_TRUE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);
143: }
144: PetscFree(coords);
145: if (agpu) {
146: MatConvert(A,MATDENSECUDA,MAT_INPLACE_MATRIX,&A);
147: }
148: MatViewFromOptions(A,NULL,"-A_view");
150: MatSetOption(B,MAT_SYMMETRIC,symm);
152: /* assemble the H-matrix */
153: MatBindToCPU(B,(PetscBool)!bgpu);
154: MatSetFromOptions(B);
155: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
156: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
157: MatViewFromOptions(B,NULL,"-B_view");
159: /* Test MatScale */
160: MatScale(A,scale);
161: MatScale(B,scale);
163: /* Test MatMult */
164: MatCreateVecs(A,&Ax,&Ay);
165: MatCreateVecs(B,&Bx,&By);
166: VecSetRandom(Ax,NULL);
167: VecCopy(Ax,Bx);
168: MatMult(A,Ax,Ay);
169: MatMult(B,Bx,By);
170: VecViewFromOptions(Ay,NULL,"-mult_vec_view");
171: VecViewFromOptions(By,NULL,"-mult_vec_view");
172: VecNorm(Ay,NORM_INFINITY,&nX);
173: VecAXPY(Ay,-1.0,By);
174: VecViewFromOptions(Ay,NULL,"-mult_vec_view");
175: VecNorm(Ay,NORM_INFINITY,&err);
176: PetscPrintf(PETSC_COMM_WORLD,"MatMult err %g\n",err/nX);
177: VecScale(By,-1.0);
178: MatMultAdd(B,Bx,By,By);
179: VecNorm(By,NORM_INFINITY,&err);
180: VecViewFromOptions(By,NULL,"-mult_vec_view");
181: if (err > 10.*PETSC_SMALL) {
182: PetscPrintf(PETSC_COMM_WORLD,"MatMultAdd err %g\n",err);
183: }
185: /* Test MatNorm */
186: MatNorm(A,NORM_INFINITY,&norms[0]);
187: MatNorm(A,NORM_1,&norms[1]);
188: norms[2] = -1.; /* NORM_2 not supported */
189: PetscPrintf(PETSC_COMM_WORLD,"A Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);
190: MatGetOperation(A,MATOP_NORM,&Anormfunc);
191: MatGetOperation(B,MATOP_NORM,&approxnormfunc);
192: MatSetOperation(A,MATOP_NORM,approxnormfunc);
193: MatNorm(A,NORM_INFINITY,&norms[0]);
194: MatNorm(A,NORM_1,&norms[1]);
195: MatNorm(A,NORM_2,&norms[2]);
196: PetscPrintf(PETSC_COMM_WORLD,"A Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);
197: if (testnorm) {
198: MatNorm(B,NORM_INFINITY,&norms[0]);
199: MatNorm(B,NORM_1,&norms[1]);
200: MatNorm(B,NORM_2,&norms[2]);
201: } else {
202: norms[0] = -1.;
203: norms[1] = -1.;
204: norms[2] = -1.;
205: }
206: PetscPrintf(PETSC_COMM_WORLD,"B Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);
207: MatSetOperation(A,MATOP_NORM,Anormfunc);
209: /* Test MatDuplicate */
210: MatDuplicate(B,MAT_COPY_VALUES,&D);
211: MatSetOption(D,MAT_SYMMETRIC,symm);
212: MatMultEqual(B,D,10,&flg);
213: if (!flg) {
214: PetscPrintf(PETSC_COMM_WORLD,"MatMult error after MatDuplicate\n");
215: }
216: if (testtrans) {
217: MatMultTransposeEqual(B,D,10,&flg);
218: if (!flg) {
219: PetscPrintf(PETSC_COMM_WORLD,"MatMultTranspose error after MatDuplicate\n");
220: }
221: }
222: MatDestroy(&D);
224: if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
225: VecSetRandom(Ay,NULL);
226: VecCopy(Ay,By);
227: MatMultTranspose(A,Ay,Ax);
228: MatMultTranspose(B,By,Bx);
229: VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");
230: VecViewFromOptions(Bx,NULL,"-multtrans_vec_view");
231: VecNorm(Ax,NORM_INFINITY,&nX);
232: VecAXPY(Ax,-1.0,Bx);
233: VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");
234: VecNorm(Ax,NORM_INFINITY,&err);
235: PetscPrintf(PETSC_COMM_WORLD,"MatMultTranspose err %g\n",err/nX);
236: VecScale(Bx,-1.0);
237: MatMultTransposeAdd(B,By,Bx,Bx);
238: VecNorm(Bx,NORM_INFINITY,&err);
239: if (err > 10.*PETSC_SMALL) {
240: PetscPrintf(PETSC_COMM_WORLD,"MatMultTransposeAdd err %g\n",err);
241: }
242: }
243: VecDestroy(&Ax);
244: VecDestroy(&Ay);
245: VecDestroy(&Bx);
246: VecDestroy(&By);
248: /* Test MatMatMult */
249: if (ldc) {
250: PetscMalloc1(nrhs*(n+ldc),&Cdata);
251: }
252: MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,nrhs,Cdata,&C);
253: MatDenseSetLDA(C,n+ldc);
254: MatSetRandom(C,NULL);
255: if (cgpu) {
256: MatConvert(C,MATDENSECUDA,MAT_INPLACE_MATRIX,&C);
257: }
258: for (nt = 0; nt < ntrials; nt++) {
259: MatMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);
260: MatViewFromOptions(D,NULL,"-bc_view");
261: PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");
262: if (flg) {
263: MatCreateVecs(B,&x,&y);
264: MatCreateVecs(D,NULL,&v);
265: for (i = 0; i < nrhs; i++) {
266: MatGetColumnVector(D,v,i);
267: MatGetColumnVector(C,x,i);
268: MatMult(B,x,y);
269: VecAXPY(y,-1.0,v);
270: VecNorm(y,NORM_INFINITY,&err);
271: if (err > 10.*PETSC_SMALL) PetscPrintf(PETSC_COMM_WORLD,"MatMat err %" PetscInt_FMT " %g\n",i,err);
272: }
273: VecDestroy(&y);
274: VecDestroy(&x);
275: VecDestroy(&v);
276: }
277: }
278: MatDestroy(&D);
280: /* Test MatTransposeMatMult */
281: if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
282: for (nt = 0; nt < ntrials; nt++) {
283: MatTransposeMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);
284: MatViewFromOptions(D,NULL,"-btc_view");
285: PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");
286: if (flg) {
287: MatCreateVecs(B,&y,&x);
288: MatCreateVecs(D,NULL,&v);
289: for (i = 0; i < nrhs; i++) {
290: MatGetColumnVector(D,v,i);
291: MatGetColumnVector(C,x,i);
292: MatMultTranspose(B,x,y);
293: VecAXPY(y,-1.0,v);
294: VecNorm(y,NORM_INFINITY,&err);
295: if (err > 10.*PETSC_SMALL) PetscPrintf(PETSC_COMM_WORLD,"MatTransMat err %" PetscInt_FMT " %g\n",i,err);
296: }
297: VecDestroy(&y);
298: VecDestroy(&x);
299: VecDestroy(&v);
300: }
301: }
302: MatDestroy(&D);
303: }
305: /* Test basis orthogonalization */
306: if (testorthog) {
307: MatDuplicate(B,MAT_COPY_VALUES,&D);
308: MatSetOption(D,MAT_SYMMETRIC,symm);
309: MatH2OpusOrthogonalize(D);
310: MatMultEqual(B,D,10,&flg);
311: if (!flg) {
312: PetscPrintf(PETSC_COMM_WORLD,"MatMult error after basis ortogonalization\n");
313: }
314: MatDestroy(&D);
315: }
317: /* Test matrix compression */
318: if (testcompress) {
319: MatDuplicate(B,MAT_COPY_VALUES,&D);
320: MatSetOption(D,MAT_SYMMETRIC,symm);
321: MatH2OpusCompress(D,PETSC_SMALL);
322: MatDestroy(&D);
323: }
325: /* Test low-rank update */
326: if (testhlru) {
327: Mat U, V;
328: PetscScalar *Udata = NULL, *Vdata = NULL;
330: if (ldu) {
331: PetscMalloc1(nlr*(n+ldu),&Udata);
332: PetscMalloc1(nlr*(n+ldu+2),&Vdata);
333: }
334: MatDuplicate(B,MAT_COPY_VALUES,&D);
335: MatCreateDense(PetscObjectComm((PetscObject)D),n,PETSC_DECIDE,N,nlr,Udata,&U);
336: MatDenseSetLDA(U,n+ldu);
337: MatCreateDense(PetscObjectComm((PetscObject)D),n,PETSC_DECIDE,N,nlr,Vdata,&V);
338: if (ldu) MatDenseSetLDA(V,n+ldu+2);
339: MatSetRandom(U,NULL);
340: MatSetRandom(V,NULL);
341: MatH2OpusLowRankUpdate(D,U,V,0.5);
342: MatH2OpusLowRankUpdate(D,U,V,-0.5);
343: MatMultEqual(B,D,10,&flg);
344: if (!flg) {
345: PetscPrintf(PETSC_COMM_WORLD,"MatMult error after low-rank update\n");
346: }
347: MatDestroy(&D);
348: MatDestroy(&U);
349: PetscFree(Udata);
350: MatDestroy(&V);
351: PetscFree(Vdata);
352: }
354: /* check explicit operator */
355: if (checkexpl) {
356: Mat Be, Bet;
358: MatComputeOperator(B,MATDENSE,&D);
359: MatDuplicate(D,MAT_COPY_VALUES,&Be);
360: MatNorm(D,NORM_FROBENIUS,&nB);
361: MatViewFromOptions(D,NULL,"-expl_view");
362: MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);
363: MatViewFromOptions(D,NULL,"-diff_view");
364: MatNorm(D,NORM_FROBENIUS,&nD);
365: MatNorm(A,NORM_FROBENIUS,&nA);
366: PetscPrintf(PETSC_COMM_WORLD,"Approximation error %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);
367: MatDestroy(&D);
369: if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
370: MatTranspose(A,MAT_INPLACE_MATRIX,&A);
371: MatComputeOperatorTranspose(B,MATDENSE,&D);
372: MatDuplicate(D,MAT_COPY_VALUES,&Bet);
373: MatNorm(D,NORM_FROBENIUS,&nB);
374: MatViewFromOptions(D,NULL,"-expl_trans_view");
375: MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);
376: MatViewFromOptions(D,NULL,"-diff_trans_view");
377: MatNorm(D,NORM_FROBENIUS,&nD);
378: MatNorm(A,NORM_FROBENIUS,&nA);
379: PetscPrintf(PETSC_COMM_WORLD,"Approximation error transpose %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);
380: MatDestroy(&D);
382: MatTranspose(Bet,MAT_INPLACE_MATRIX,&Bet);
383: MatAXPY(Be,-1.0,Bet,SAME_NONZERO_PATTERN);
384: MatViewFromOptions(Be,NULL,"-diff_expl_view");
385: MatNorm(Be,NORM_FROBENIUS,&nB);
386: PetscPrintf(PETSC_COMM_WORLD,"Approximation error B - (B^T)^T %g\n",nB);
387: MatDestroy(&Be);
388: MatDestroy(&Bet);
389: }
390: }
391: MatDestroy(&A);
392: MatDestroy(&B);
393: MatDestroy(&C);
394: PetscFree(Cdata);
395: PetscFree(Adata);
396: PetscFinalize();
397: return 0;
398: }
400: /*TEST
402: build:
403: requires: h2opus
405: #tests from kernel
406: test:
407: requires: h2opus
408: nsize: 1
409: suffix: 1
410: args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 0
412: test:
413: requires: h2opus
414: nsize: 1
415: suffix: 1_ld
416: output_file: output/ex66_1.out
417: args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -ldu 17 -symm 0 -checkexpl -bgpu 0
419: test:
420: requires: h2opus cuda
421: nsize: 1
422: suffix: 1_cuda
423: output_file: output/ex66_1.out
424: args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 1
426: test:
427: requires: h2opus cuda
428: nsize: 1
429: suffix: 1_cuda_ld
430: output_file: output/ex66_1.out
431: args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -ldu 17 -symm 0 -checkexpl -bgpu 1
433: test:
434: requires: h2opus
435: nsize: {{2 3}}
436: suffix: 1_par
437: args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu 0 -cgpu 0
439: test:
440: requires: h2opus cuda
441: nsize: {{2 3}}
442: suffix: 1_par_cuda
443: args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu {{0 1}} -cgpu {{0 1}}
444: output_file: output/ex66_1_par.out
446: #tests from matrix sampling (parallel or unsymmetric not supported)
447: test:
448: requires: h2opus
449: nsize: 1
450: suffix: 2
451: args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu 0
453: test:
454: requires: h2opus cuda
455: nsize: 1
456: suffix: 2_cuda
457: output_file: output/ex66_2.out
458: args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu {{0 1}} -agpu {{0 1}}
460: #tests view operation
461: test:
462: requires: h2opus !cuda
463: filter: grep -v "MPI processes" | grep -v "\[" | grep -v "\]"
464: nsize: {{1 2 3}}
465: suffix: view
466: args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2
468: test:
469: requires: h2opus cuda
470: filter: grep -v "MPI processes" | grep -v "\[" | grep -v "\]"
471: nsize: {{1 2 3}}
472: suffix: view_cuda
473: args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -bgpu -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2
475: TEST*/