Actual source code: ex70.c

  1: #include <petscmat.h>

  3: static char help[] = "Tests MatMat operations with MAT_REUSE_MATRIX and already allocated dense result.\n\n";

  5: static PetscScalar MAGIC_NUMBER = 12345;

  7: static PetscErrorCode CheckLocal(Mat A, Mat B, PetscScalar *a, PetscScalar *b)
  8: {
  9:   PetscBool      wA = PETSC_FALSE, wB = PETSC_FALSE;
 10:   PetscBool      wAv = PETSC_FALSE, wBv = PETSC_FALSE;
 11:   PetscInt       lda,i,j,m,n;

 13:   if (a) {
 14:     const PetscScalar *Aa;
 15:     MatDenseGetArrayRead(A,&Aa);
 16:     wA   = (PetscBool)(a != Aa);
 17:     MatDenseGetLDA(A,&lda);
 18:     MatGetLocalSize(A,&m,&n);
 19:     for (j=0;j<n;j++) {
 20:       for (i=m;i<lda;i++) {
 21:         if (Aa[j*lda +i] != MAGIC_NUMBER) wAv = PETSC_TRUE;
 22:       }
 23:     }
 24:     MatDenseRestoreArrayRead(A,&Aa);
 25:   }
 26:   if (b) {
 27:     const PetscScalar *Bb;
 28:     MatDenseGetArrayRead(B,&Bb);
 29:     wB   = (PetscBool)(b != Bb);
 30:     MatDenseGetLDA(B,&lda);
 31:     MatGetLocalSize(B,&m,&n);
 32:     for (j=0;j<n;j++) {
 33:       for (i=m;i<lda;i++) {
 34:         if (Bb[j*lda +i] != MAGIC_NUMBER) wBv = PETSC_TRUE;
 35:       }
 36:     }
 37:     MatDenseRestoreArrayRead(B,&Bb);
 38:   }
 41:   return 0;
 42: }

 44: typedef struct {
 45:   Mat A;
 46:   Mat P;
 47:   Mat R;
 48: } proj_data;

 50: PetscErrorCode proj_destroy(void *ctx)
 51: {
 52:   proj_data      *userdata = (proj_data*)ctx;

 55:   MatDestroy(&userdata->A);
 56:   MatDestroy(&userdata->P);
 57:   MatDestroy(&userdata->R);
 58:   PetscFree(userdata);
 59:   return 0;
 60: }

 62: PetscErrorCode proj_mult(Mat S, Vec X, Vec Y)
 63: {
 64:   Mat            A,R,P;
 65:   Vec            Ax,Ay;
 66:   Vec            Px,Py;
 67:   proj_data      *userdata;

 69:   MatShellGetContext(S,&userdata);
 71:   A = userdata->A;
 72:   R = userdata->R;
 73:   P = userdata->P;
 77:   MatCreateVecs(A,&Ax,&Ay);
 78:   if (R) {
 79:     MatCreateVecs(R,&Py,&Px);
 80:   } else {
 81:     MatCreateVecs(P,&Px,&Py);
 82:   }
 83:   VecCopy(X,Px);
 84:   if (P) {
 85:     MatMult(P,Px,Py);
 86:   } else {
 87:     MatMultTranspose(R,Px,Py);
 88:   }
 89:   VecCopy(Py,Ax);
 90:   MatMult(A,Ax,Ay);
 91:   VecCopy(Ay,Py);
 92:   if (P) {
 93:     MatMultTranspose(P,Py,Px);
 94:   } else {
 95:     MatMult(R,Py,Px);
 96:   }
 97:   VecCopy(Px,Y);
 98:   VecDestroy(&Px);
 99:   VecDestroy(&Py);
100:   VecDestroy(&Ax);
101:   VecDestroy(&Ay);
102:   return 0;
103: }

105: PetscErrorCode MyPtShellPMultSymbolic(Mat S, Mat P, Mat PtAP, void** ctx)
106: {
107:   proj_data      *userdata;

109:   PetscNew(&userdata);
110:   MatShellSetContext(PtAP,userdata);
111:   *ctx = (void *)userdata;
112:   return 0;
113: }

115: PetscErrorCode MyPtShellPMultNumeric(Mat S, Mat P, Mat PtAP, void *ctx)
116: {
117:   Mat            A;
118:   proj_data      *userdata = (proj_data*)ctx;

120:   MatShellGetContext(S,&A);
121:   PetscObjectReference((PetscObject)A);
122:   PetscObjectReference((PetscObject)P);
123:   MatDestroy(&userdata->A);
124:   MatDestroy(&userdata->P);
125:   MatDestroy(&userdata->R);
126:   userdata->A = A;
127:   userdata->P = P;
128:   MatShellSetOperation(PtAP,MATOP_MULT,(void (*)(void))proj_mult);
129:   MatSetUp(PtAP);
130:   MatAssemblyBegin(PtAP,MAT_FINAL_ASSEMBLY);
131:   MatAssemblyEnd(PtAP,MAT_FINAL_ASSEMBLY);
132:   return 0;
133: }

135: PetscErrorCode MyRShellRtMultSymbolic(Mat S, Mat R, Mat RARt, void **ctx)
136: {
137:   proj_data      *userdata;

139:   PetscNew(&userdata);
140:   MatShellSetContext(RARt,userdata);
141:   *ctx = (void *)userdata;
142:   return 0;
143: }

145: PetscErrorCode MyRShellRtMultNumeric(Mat S, Mat R, Mat RARt, void *ctx)
146: {
147:   Mat            A;
148:   proj_data      *userdata = (proj_data*)ctx;

150:   MatShellGetContext(S,&A);
151:   PetscObjectReference((PetscObject)A);
152:   PetscObjectReference((PetscObject)R);
153:   MatDestroy(&userdata->A);
154:   MatDestroy(&userdata->P);
155:   MatDestroy(&userdata->R);
156:   userdata->A = A;
157:   userdata->R = R;
158:   MatShellSetOperation(RARt,MATOP_MULT,(void (*)(void))proj_mult);
159:   MatSetUp(RARt);
160:   MatAssemblyBegin(RARt,MAT_FINAL_ASSEMBLY);
161:   MatAssemblyEnd(RARt,MAT_FINAL_ASSEMBLY);
162:   return 0;
163: }

165: PetscErrorCode MyMatShellMatMultNumeric(Mat S, Mat B, Mat C, void *ctx)
166: {
167:   Mat            A;

169:   MatShellGetContext(S,&A);
170:   MatMatMult(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&C);
171:   return 0;
172: }

174: PetscErrorCode MyMatTransposeShellMatMultNumeric(Mat S, Mat B, Mat C, void *ctx)
175: {
176:   Mat            A;

178:   MatShellGetContext(S,&A);
179:   MatTransposeMatMult(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&C);
180:   return 0;
181: }

183: PetscErrorCode MyMatShellMatTransposeMultNumeric(Mat S, Mat B, Mat C, void *ctx)
184: {
185:   Mat            A;

187:   MatShellGetContext(S,&A);
188:   MatMatTransposeMult(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&C);
189:   return 0;
190: }

192: int main(int argc,char **args)
193: {
194:   Mat            X,B,A,Bt,T,T2,PtAP = NULL,RARt = NULL, R = NULL;
195:   Vec            r,l,rs,ls;
196:   PetscInt       m,n,k,M = 10,N = 10,K = 5, ldx = 3, ldb = 5, ldr = 4;
197:   const char     *deft = MATAIJ;
198:   char           mattype[256];
199:   PetscBool      flg,symm = PETSC_FALSE,testtt = PETSC_TRUE, testnest = PETSC_TRUE, testtranspose = PETSC_TRUE, testcircular = PETSC_FALSE, local = PETSC_TRUE;
200:   PetscBool      testhtranspose = PETSC_TRUE;
201:   PetscBool      xgpu = PETSC_FALSE, bgpu = PETSC_FALSE, testshellops = PETSC_FALSE, testproj = PETSC_TRUE, testrart = PETSC_TRUE, testmatmatt = PETSC_TRUE, testmattmat = PETSC_TRUE;
202:   PetscScalar    *dataX = NULL,*dataB = NULL, *dataR = NULL, *dataBt = NULL;
203:   PetscScalar    *aX,*aB,*aBt;
204:   PetscReal      err;

207:   PetscInitialize(&argc,&args,NULL,help);
208:   PetscOptionsGetInt(NULL,NULL,"-N",&N,NULL);
209:   PetscOptionsGetInt(NULL,NULL,"-M",&M,NULL);
210:   PetscOptionsGetInt(NULL,NULL,"-K",&K,NULL);
211:   PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);
212:   PetscOptionsGetBool(NULL,NULL,"-local",&local,NULL);
213:   PetscOptionsGetInt(NULL,NULL,"-ldx",&ldx,NULL);
214:   PetscOptionsGetInt(NULL,NULL,"-ldb",&ldb,NULL);
215:   PetscOptionsGetInt(NULL,NULL,"-ldr",&ldr,NULL);
216:   PetscOptionsGetBool(NULL,NULL,"-testtranspose",&testtranspose,NULL);
217:   PetscOptionsGetBool(NULL,NULL,"-testnest",&testnest,NULL);
218:   PetscOptionsGetBool(NULL,NULL,"-testtt",&testtt,NULL);
219:   PetscOptionsGetBool(NULL,NULL,"-testcircular",&testcircular,NULL);
220:   PetscOptionsGetBool(NULL,NULL,"-testshellops",&testshellops,NULL);
221:   PetscOptionsGetBool(NULL,NULL,"-testproj",&testproj,NULL);
222:   PetscOptionsGetBool(NULL,NULL,"-testrart",&testrart,NULL);
223:   PetscOptionsGetBool(NULL,NULL,"-testmatmatt",&testmatmatt,NULL);
224:   PetscOptionsGetBool(NULL,NULL,"-testmattmat",&testmattmat,NULL);
225:   PetscOptionsGetBool(NULL,NULL,"-xgpu",&xgpu,NULL);
226:   PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);
227:   PetscOptionsGetScalar(NULL,NULL,"-magic_number",&MAGIC_NUMBER,NULL);
228:   if (M != N) testproj = PETSC_FALSE;

230:   MatCreate(PETSC_COMM_WORLD,&A);
231:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);
232:   MatSetType(A,MATAIJ);
233:   MatSetUp(A);
234:   MatSetRandom(A,NULL);
235:   if (M==N && symm) {
236:     Mat AT;

238:     MatTranspose(A,MAT_INITIAL_MATRIX,&AT);
239:     MatAXPY(A,1.0,AT,DIFFERENT_NONZERO_PATTERN);
240:     MatDestroy(&AT);
241:     MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
242:   }
243:   MatViewFromOptions(A,NULL,"-A_init_view");
244:   PetscOptionsBegin(PETSC_COMM_WORLD,"","","");
245:   PetscOptionsFList("-A_mat_type","Matrix type","MatSetType",MatList,deft,mattype,256,&flg);
246:   PetscOptionsEnd();
247:   if (flg) {
248:     Mat A2;

250:     MatDuplicate(A,MAT_COPY_VALUES,&A2);
251:     MatConvert(A,mattype,MAT_INPLACE_MATRIX,&A);
252:     MatMultEqual(A,A2,10,&flg);
253:     if (!flg) {
254:       Mat AE,A2E;

256:       PetscPrintf(PETSC_COMM_WORLD,"Error with convert\n");
257:       MatComputeOperator(A,MATDENSE,&AE);
258:       MatComputeOperator(A2,MATDENSE,&A2E);
259:       MatView(AE,NULL);
260:       MatView(A2E,NULL);
261:       MatAXPY(A2E,-1.0,A,SAME_NONZERO_PATTERN);
262:       MatView(A2E,NULL);
263:       MatDestroy(&A2E);
264:       MatDestroy(&AE);
265:     }
266:     MatDestroy(&A2);
267:   }
268:   MatViewFromOptions(A,NULL,"-A_view");

270:   MatGetLocalSize(A,&m,&n);
271:   if (local) {
272:     PetscInt i;

274:     PetscMalloc1((m+ldx)*K,&dataX);
275:     PetscMalloc1((n+ldb)*K,&dataB);
276:     for (i=0;i<(m+ldx)*K;i++) dataX[i] = MAGIC_NUMBER;
277:     for (i=0;i<(n+ldb)*K;i++) dataB[i] = MAGIC_NUMBER;
278:   }
279:   MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,K,dataB,&B);
280:   MatCreateDense(PETSC_COMM_WORLD,m,PETSC_DECIDE,M,K,dataX,&X);
281:   if (local) {
282:     MatDenseSetLDA(X,m+ldx);
283:     MatDenseSetLDA(B,n+ldb);
284:   }
285:   MatGetLocalSize(B,NULL,&k);
286:   if (local) {
287:     PetscInt i;

289:     PetscMalloc1((k+ldr)*N,&dataBt);
290:     for (i=0;i<(k+ldr)*N;i++) dataBt[i] = MAGIC_NUMBER;
291:   }
292:   MatCreateDense(PETSC_COMM_WORLD,k,n,K,N,dataBt,&Bt);
293:   if (local) {
294:     MatDenseSetLDA(Bt,k+ldr);
295:   }

297:   /* store pointer to dense data for testing */
298:   MatDenseGetArrayRead(B,(const PetscScalar**)&dataB);
299:   MatDenseGetArrayRead(X,(const PetscScalar**)&dataX);
300:   MatDenseGetArrayRead(Bt,(const PetscScalar**)&dataBt);
301:   aX   = dataX;
302:   aB   = dataB;
303:   aBt  = dataBt;
304:   MatDenseRestoreArrayRead(Bt,(const PetscScalar**)&dataBt);
305:   MatDenseRestoreArrayRead(B,(const PetscScalar**)&dataB);
306:   MatDenseRestoreArrayRead(X,(const PetscScalar**)&dataX);
307:   if (local) {
308:     dataX  = aX;
309:     dataB  = aB;
310:     dataBt = aBt;
311:   }

313:   MatSetRandom(X,NULL);
314:   MatSetRandom(B,NULL);
315:   MatSetRandom(Bt,NULL);
316:   CheckLocal(X,NULL,aX,NULL);
317:   CheckLocal(Bt,B,aBt,aB);

319:   /* convert to CUDA if needed */
320:   if (bgpu) {
321:     MatConvert(B,MATDENSECUDA,MAT_INPLACE_MATRIX,&B);
322:     MatConvert(Bt,MATDENSECUDA,MAT_INPLACE_MATRIX,&Bt);
323:   }
324:   if (xgpu) {
325:     MatConvert(X,MATDENSECUDA,MAT_INPLACE_MATRIX,&X);
326:   }
327:   CheckLocal(B,X,aB,aX);

329:   /* Test MatDenseGetSubMatrix */
330:   {
331:     Mat B2,T3,T4;

333:     MatDuplicate(B,MAT_COPY_VALUES,&B2);
334:     MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&T4);
335:     MatSetRandom(T4,NULL);
336:     MatAXPY(B2,1.0,T4,SAME_NONZERO_PATTERN);
337:     MatDenseGetSubMatrix(B,PetscMin(1,K),PetscMin(2,K),&T);
338:     MatDenseGetSubMatrix(T4,PetscMin(1,K),PetscMin(2,K),&T2);
339:     MatDenseGetSubMatrix(B2,PetscMin(1,K),PetscMin(2,K),&T3);
340:     MatAXPY(T,1.0,T2,SAME_NONZERO_PATTERN);
341:     MatAXPY(T3,-1.0,T,SAME_NONZERO_PATTERN);
342:     MatNorm(T3,NORM_FROBENIUS,&err);
343:     if (err > PETSC_SMALL) {
344:       PetscPrintf(PETSC_COMM_WORLD,"Error with MatDenseGetSubMatrix\n");
345:       MatView(T3,NULL);
346:     }
347:     MatDenseRestoreSubMatrix(B,&T);
348:     MatDenseRestoreSubMatrix(T4,&T2);
349:     MatDenseRestoreSubMatrix(B2,&T3);
350:     CheckLocal(B,NULL,aB,NULL);
351:     MatDestroy(&B2);
352:     MatDestroy(&T4);
353:   }

355:   /* Test reusing a previously allocated dense buffer */
356:   MatMatMult(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);
357:   CheckLocal(B,X,aB,aX);
358:   MatMatMultEqual(A,B,X,10,&flg);
359:   if (!flg) {
360:     PetscPrintf(PETSC_COMM_WORLD,"Error with reusage\n");
361:     MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
362:     MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);
363:     MatView(T,NULL);
364:     MatDestroy(&T);
365:   }

367:   /* Test MatTransposeMat and MatMatTranspose */
368:   if (testmattmat) {
369:     MatTransposeMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
370:     CheckLocal(B,X,aB,aX);
371:     MatTransposeMatMultEqual(A,X,B,10,&flg);
372:     if (!flg) {
373:       PetscPrintf(PETSC_COMM_WORLD,"Error with reusage (MatTransposeMat)\n");
374:       MatTransposeMatMult(A,X,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B);
375:       MatAXPY(T,-1.0,B,SAME_NONZERO_PATTERN);
376:       MatView(T,NULL);
377:       MatDestroy(&T);
378:     }
379:   }
380:   if (testmatmatt) {
381:     MatMatTransposeMult(A,Bt,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);
382:     CheckLocal(Bt,X,aBt,aX);
383:     MatMatTransposeMultEqual(A,Bt,X,10,&flg);
384:     if (!flg) {
385:       PetscPrintf(PETSC_COMM_WORLD,"Error with reusage (MatMatTranspose)\n");
386:       MatMatTransposeMult(A,Bt,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
387:       MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);
388:       MatView(T,NULL);
389:       MatDestroy(&T);
390:     }
391:   }

393:   /* Test projection operations (PtAP and RARt) */
394:   if (testproj) {
395:     MatPtAP(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP);
396:     MatPtAPMultEqual(A,B,PtAP,10,&flg);
397:     if (!flg) {
398:       PetscPrintf(PETSC_COMM_WORLD,"Error with PtAP\n");
399:       MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
400:       MatTransposeMatMult(B,T,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T2);
401:       MatAXPY(T2,-1.0,PtAP,SAME_NONZERO_PATTERN);
402:       MatView(T2,NULL);
403:       MatDestroy(&T2);
404:       MatDestroy(&T);
405:     }
406:     PetscMalloc1((k+ldr)*M,&dataR);
407:     MatCreateDense(PETSC_COMM_WORLD,PETSC_DECIDE,m,K,M,dataR,&R);
408:     MatDenseSetLDA(R,k+ldr);
409:     MatSetRandom(R,NULL);
410:     if (testrart) { /* fails for AIJCUSPARSE because RA operation is not defined */
411:       MatRARt(A,R,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RARt);
412:       MatRARtMultEqual(A,R,RARt,10,&flg);
413:       if (!flg) {
414:         PetscPrintf(PETSC_COMM_WORLD,"Error with RARt\n");
415:         MatMatTransposeMult(A,R,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
416:         MatMatMult(R,T,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T2);
417:         MatAXPY(T2,-1.0,RARt,SAME_NONZERO_PATTERN);
418:         MatView(T2,NULL);
419:         MatDestroy(&T2);
420:         MatDestroy(&T);
421:       }
422:     }
423:   }

425:   /* Test MatDenseGetColumnVec and friends */
426:   MatMatMult(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);
427:   MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
428:   MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&T2);
429:   for (k=0;k<K;k++) {
430:     Vec Xv,Tv,T2v;

432:     MatDenseGetColumnVecRead(X,k,&Xv);
433:     MatDenseGetColumnVec(T,k,&Tv);
434:     MatDenseGetColumnVecWrite(T2,k,&T2v);
435:     VecCopy(Xv,T2v);
436:     VecAXPY(Tv,-1.,Xv);
437:     MatDenseRestoreColumnVecRead(X,k,&Xv);
438:     MatDenseRestoreColumnVec(T,k,&Tv);
439:     MatDenseRestoreColumnVecWrite(T2,k,&T2v);
440:   }
441:   MatNorm(T,NORM_FROBENIUS,&err);
442:   if (err > PETSC_SMALL) {
443:     PetscPrintf(PETSC_COMM_WORLD,"Error with MatDenseGetColumnVec\n");
444:     MatView(T,NULL);
445:   }
446:   MatAXPY(T2,-1.,X,SAME_NONZERO_PATTERN);
447:   MatNorm(T2,NORM_FROBENIUS,&err);
448:   if (err > PETSC_SMALL) {
449:     PetscPrintf(PETSC_COMM_WORLD,"Error with MatDenseGetColumnVecWrite\n");
450:     MatView(T2,NULL);
451:   }
452:   MatDestroy(&T);
453:   MatDestroy(&T2);

455:   /* Test with MatShell */
456:   MatDuplicate(A,MAT_COPY_VALUES,&T);
457:   MatConvert(T,MATSHELL,MAT_INITIAL_MATRIX,&T2);
458:   MatDestroy(&T);

460:   /* scale matrix */
461:   MatScale(A,2.0);
462:   MatScale(T2,2.0);
463:   MatCreateVecs(A,&r,&l);
464:   VecSetRandom(r,NULL);
465:   VecSetRandom(l,NULL);
466:   MatCreateVecs(T2,&rs,&ls);
467:   VecCopy(r,rs);
468:   VecCopy(l,ls);
469:   if (testproj) {
470:     MatDiagonalScale(A,r,r);
471:     MatDiagonalScale(T2,rs,rs);
472:   } else {
473:     MatDiagonalScale(A,l,r);
474:     MatDiagonalScale(T2,ls,rs);
475:   }
476:   MatDuplicate(A,MAT_COPY_VALUES,&T);
477:   MatAXPY(A,4.5,T,SAME_NONZERO_PATTERN);
478:   MatAXPY(T2,4.5,T,DIFFERENT_NONZERO_PATTERN);
479:   MatMultEqual(T2,A,10,&flg);
480:   if (!flg) {
481:     PetscPrintf(PETSC_COMM_WORLD,"Error with MATSHELL (MatMult)\n");
482:   }
483:   MatMultTransposeEqual(T2,A,10,&flg);
484:   if (!flg) {
485:     PetscPrintf(PETSC_COMM_WORLD,"Error with MATSHELL (MatMultTranspose)\n");
486:   }
487:   MatDestroy(&T);
488:   VecDestroy(&ls);
489:   VecDestroy(&rs);
490:   VecDestroy(&l);
491:   VecDestroy(&r);

493:   /* recompute projections, test reusage */
494:   if (PtAP) MatPtAP(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&PtAP);
495:   if (RARt) MatRARt(A,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&RARt);
496:   if (testshellops) { /* test callbacks for user defined MatProducts */
497:     MatShellSetMatProductOperation(T2,MATPRODUCT_AB,NULL,MyMatShellMatMultNumeric,NULL,MATDENSE,MATDENSE);
498:     MatShellSetMatProductOperation(T2,MATPRODUCT_AB,NULL,MyMatShellMatMultNumeric,NULL,MATDENSECUDA,MATDENSECUDA);
499:     MatShellSetMatProductOperation(T2,MATPRODUCT_AtB,NULL,MyMatTransposeShellMatMultNumeric,NULL,MATDENSE,MATDENSE);
500:     MatShellSetMatProductOperation(T2,MATPRODUCT_AtB,NULL,MyMatTransposeShellMatMultNumeric,NULL,MATDENSECUDA,MATDENSECUDA);
501:     MatShellSetMatProductOperation(T2,MATPRODUCT_ABt,NULL,MyMatShellMatTransposeMultNumeric,NULL,MATDENSE,MATDENSE);
502:     MatShellSetMatProductOperation(T2,MATPRODUCT_ABt,NULL,MyMatShellMatTransposeMultNumeric,NULL,MATDENSECUDA,MATDENSECUDA);
503:     if (testproj) {
504:       MatShellSetMatProductOperation(T2,MATPRODUCT_PtAP,MyPtShellPMultSymbolic,MyPtShellPMultNumeric,proj_destroy,MATDENSE,MATSHELL);
505:       MatShellSetMatProductOperation(T2,MATPRODUCT_PtAP,MyPtShellPMultSymbolic,MyPtShellPMultNumeric,proj_destroy,MATDENSECUDA,MATSHELL);
506:       MatShellSetMatProductOperation(T2,MATPRODUCT_RARt,MyRShellRtMultSymbolic,MyRShellRtMultNumeric,proj_destroy,MATDENSE,MATSHELL);
507:       MatShellSetMatProductOperation(T2,MATPRODUCT_RARt,MyRShellRtMultSymbolic,MyRShellRtMultNumeric,proj_destroy,MATDENSECUDA,MATSHELL);
508:     }
509:   }
510:   CheckLocal(B,X,aB,aX);
511:   /* we either use the shell operations or the loop over columns code, applying the operator */
512:   MatMatMult(T2,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);
513:   CheckLocal(B,X,aB,aX);
514:   MatMatMultEqual(T2,B,X,10,&flg);
515:   if (!flg) {
516:     PetscPrintf(PETSC_COMM_WORLD,"Error with reusage (MATSHELL)\n");
517:     MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
518:     MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);
519:     MatView(T,NULL);
520:     MatDestroy(&T);
521:   }
522:   if (testproj) {
523:     MatPtAPMultEqual(T2,B,PtAP,10,&flg);
524:     if (!flg) {
525:       PetscPrintf(PETSC_COMM_WORLD,"Error with PtAP (MATSHELL)\n");
526:     }
527:     if (testshellops) { /* projections fail if the product operations are not specified */
528:       MatPtAP(T2,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
529:       MatPtAP(T2,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&T);
530:       MatPtAPMultEqual(T2,B,T,10,&flg);
531:       if (!flg) {
532:         Mat TE;

534:         PetscPrintf(PETSC_COMM_WORLD,"Error with PtAP (MATSHELL user defined)\n");
535:         MatComputeOperator(T,MATDENSE,&TE);
536:         MatView(TE,NULL);
537:         MatView(PtAP,NULL);
538:         MatAXPY(TE,-1.0,PtAP,SAME_NONZERO_PATTERN);
539:         MatView(TE,NULL);
540:         MatDestroy(&TE);
541:       }
542:       MatDestroy(&T);
543:     }
544:     if (RARt) {
545:       MatRARtMultEqual(T2,R,RARt,10,&flg);
546:       if (!flg) {
547:         PetscPrintf(PETSC_COMM_WORLD,"Error with RARt (MATSHELL)\n");
548:       }
549:     }
550:     if (testshellops) {
551:       MatRARt(T2,R,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
552:       MatRARt(T2,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&T);
553:       MatRARtMultEqual(T2,R,T,10,&flg);
554:       if (!flg) {
555:         Mat TE;

557:         PetscPrintf(PETSC_COMM_WORLD,"Error with RARt (MATSHELL user defined)\n");
558:         MatComputeOperator(T,MATDENSE,&TE);
559:         MatView(TE,NULL);
560:         if (RARt) {
561:           MatView(RARt,NULL);
562:           MatAXPY(TE,-1.0,RARt,SAME_NONZERO_PATTERN);
563:           MatView(TE,NULL);
564:         }
565:         MatDestroy(&TE);
566:       }
567:       MatDestroy(&T);
568:     }
569:   }

571:   if (testmattmat) { /* we either use the shell operations or the loop over columns code applying the transposed operator */
572:     MatTransposeMatMult(T2,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
573:     CheckLocal(B,X,aB,aX);
574:     MatTransposeMatMultEqual(T2,X,B,10,&flg);
575:     if (!flg) {
576:       PetscPrintf(PETSC_COMM_WORLD,"Error with reusage (MatTranspose, MATSHELL)\n");
577:       MatTransposeMatMult(A,X,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
578:       MatAXPY(T,-1.0,B,SAME_NONZERO_PATTERN);
579:       MatView(T,NULL);
580:       MatDestroy(&T);
581:     }
582:   }
583:   if (testmatmatt && testshellops) { /* only when shell operations are set */
584:     MatMatTransposeMult(T2,Bt,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);
585:     CheckLocal(Bt,X,aBt,aX);
586:     MatMatTransposeMultEqual(T2,Bt,X,10,&flg);
587:     if (!flg) {
588:       PetscPrintf(PETSC_COMM_WORLD,"Error with reusage (MatMatTranspose, MATSHELL)\n");
589:       MatMatTransposeMult(A,Bt,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
590:       MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);
591:       MatView(T,NULL);
592:       MatDestroy(&T);
593:     }
594:   }
595:   MatDestroy(&T2);

597:   if (testnest) { /* test with MatNest */
598:     Mat NA;

600:     MatCreateNest(PETSC_COMM_WORLD,1,NULL,1,NULL,&A,&NA);
601:     MatViewFromOptions(NA,NULL,"-NA_view");
602:     MatMatMult(NA,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);
603:     CheckLocal(B,X,aB,aX);
604:     MatMatMultEqual(NA,B,X,10,&flg);
605:     if (!flg) {
606:       PetscPrintf(PETSC_COMM_WORLD,"Error with Nest\n");
607:       MatMatMult(NA,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
608:       MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);
609:       MatView(T,NULL);
610:       MatDestroy(&T);
611:     }
612:     MatDestroy(&NA);
613:   }

615:   if (testtranspose) { /* test with Transpose */
616:     Mat TA;

618:     MatCreateTranspose(A,&TA);
619:     MatMatMult(TA,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
620:     CheckLocal(B,X,aB,aX);
621:     MatMatMultEqual(TA,X,B,10,&flg);
622:     if (!flg) {
623:       PetscPrintf(PETSC_COMM_WORLD,"Error with Transpose\n");
624:       MatMatMult(TA,X,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
625:       MatAXPY(T,-1.0,B,SAME_NONZERO_PATTERN);
626:       MatView(T,NULL);
627:       MatDestroy(&T);
628:     }
629:     MatDestroy(&TA);
630:   }

632:   if (testhtranspose) { /* test with Hermitian Transpose */
633:     Mat TA;

635:     MatCreateHermitianTranspose(A,&TA);
636:     MatMatMult(TA,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
637:     CheckLocal(B,X,aB,aX);
638:     MatMatMultEqual(TA,X,B,10,&flg);
639:     if (!flg) {
640:       PetscPrintf(PETSC_COMM_WORLD,"Error with Transpose\n");
641:       MatMatMult(TA,X,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
642:       MatAXPY(T,-1.0,B,SAME_NONZERO_PATTERN);
643:       MatView(T,NULL);
644:       MatDestroy(&T);
645:     }
646:     MatDestroy(&TA);
647:   }

649:   if (testtt) { /* test with Transpose(Transpose) */
650:     Mat TA, TTA;

652:     MatCreateTranspose(A,&TA);
653:     MatCreateTranspose(TA,&TTA);
654:     MatMatMult(TTA,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);
655:     CheckLocal(B,X,aB,aX);
656:     MatMatMultEqual(TTA,B,X,10,&flg);
657:     if (!flg) {
658:       PetscPrintf(PETSC_COMM_WORLD,"Error with Transpose(Transpose)\n");
659:       MatMatMult(TTA,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
660:       MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);
661:       MatView(T,NULL);
662:       MatDestroy(&T);
663:     }
664:     MatDestroy(&TA);
665:     MatDestroy(&TTA);
666:   }

668:   if (testcircular) { /* test circular */
669:     Mat AB;

671:     MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AB);
672:     MatMatMult(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);
673:     CheckLocal(B,X,aB,aX);
674:     if (M == N && N == K) {
675:       MatMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
676:     } else {
677:       MatTransposeMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
678:     }
679:     CheckLocal(B,X,aB,aX);
680:     MatDestroy(&AB);
681:   }

683:   /* Test by Pierre Jolivet */
684:   {
685:     Mat C,D,D2,AtA;
686:     MatCreateNormal(A,&AtA);
687:     MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&C);
688:     MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&D);
689:     MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&D2);
690:     MatSetRandom(B,NULL);
691:     MatSetRandom(C,NULL);
692:     MatSetRandom(D,NULL);
693:     MatSetRandom(D2,NULL);
694:     MatProductCreateWithMat(A,B,NULL,C);
695:     MatProductSetType(C,MATPRODUCT_AB);
696:     MatProductSetFromOptions(C);
697:     MatProductSymbolic(C);
698:     MatProductCreateWithMat(A,C,NULL,D);
699:     MatProductSetType(D, MATPRODUCT_AtB);
700:     MatProductSetFromOptions(D);
701:     MatProductSymbolic(D);
702:     MatProductNumeric(C);
703:     MatProductNumeric(D);
704:     MatMatMultEqual(AtA,B,D,10,&flg);
705:     if (!flg) {
706:       MatMatMult(AtA,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
707:       MatAXPY(T,-1.0,D,SAME_NONZERO_PATTERN);
708:       MatView(T,NULL);
709:       MatDestroy(&T);
710:     }
711:     MatDestroy(&C);
712:     MatDestroy(&D);
713:     MatDestroy(&D2);
714:     MatDestroy(&AtA);
715:   }

717:   MatDestroy(&X);
718:   MatDestroy(&Bt);
719:   MatDestroy(&B);
720:   MatDestroy(&A);
721:   MatDestroy(&R);
722:   MatDestroy(&PtAP);
723:   MatDestroy(&RARt);
724:   PetscFree(dataX);
725:   PetscFree(dataB);
726:   PetscFree(dataR);
727:   PetscFree(dataBt);
728:   PetscFinalize();
729:   return 0;
730: }

732: /*TEST

734:   test:
735:     suffix: 1
736:     args: -local {{0 1}} -testshellops

738:   test:
739:     output_file: output/ex70_1.out
740:     requires: cuda
741:     suffix: 1_cuda
742:     args: -local {{0 1}} -xgpu {{0 1}} -bgpu {{0 1}} -A_mat_type {{seqaijcusparse seqaij}} -testshellops {{0 1}}

744:   test:
745:     output_file: output/ex70_1.out
746:     nsize: 2
747:     suffix: 1_par
748:     args: -local {{0 1}} -testmatmatt 0

750:   test:
751:     output_file: output/ex70_1.out
752:     requires: cuda
753:     nsize: 2
754:     suffix: 1_par_cuda
755:     args: -local {{0 1}} -xgpu {{0 1}} -bgpu {{0 1}} -A_mat_type {{mpiaijcusparse mpiaij}} -testnest 0 -testmatmatt 0 -matmatmult_Bbn 3

757:   test:
758:     output_file: output/ex70_1.out
759:     suffix: 2
760:     nsize: 1
761:     args: -M {{7 11}} -N {{12 9}} -K {{1 3}} -local {{0 1}}

763:   testset:
764:     requires: cuda
765:     output_file: output/ex70_1.out
766:     nsize: 1
767:     args: -M 7 -N 9 -K 2 -local {{0 1}} -testnest 0 -A_mat_type {{seqdensecuda seqdense}} -xgpu {{0 1}} -bgpu {{0 1}}
768:     test:
769:       requires: !complex
770:       suffix: 2_cuda_real
771:     test:
772:       # complex+single gives a little bigger error in the MatDenseGetColumnVec test
773:       requires: complex !single
774:       suffix: 2_cuda_complex

776:   test:
777:     output_file: output/ex70_1.out
778:     suffix: 2_par
779:     nsize: 2
780:     args: -M {{7 11}} -N {{12 9}} -K {{1 3}} -local {{0 1}} -testcircular -testmatmatt 0

782:   test:
783:     requires: cuda
784:     output_file: output/ex70_1.out
785:     suffix: 2_par_cuda
786:     nsize: 2
787:     args: -M 11 -N 9 -K 1 -local {{0 1}} -testcircular 0 -A_mat_type mpiaijcusparse -xgpu -bgpu -testnest 0 -testmatmatt 0

789:   test:
790:     output_file: output/ex70_1.out
791:     suffix: 3
792:     nsize: {{1 3}}
793:     args: -M 13 -N 13 -K {{1 3}} -local {{0 1}} -A_mat_type sbaij -symm -testproj 0 -testmatmatt 0

795:   test:
796:     output_file: output/ex70_1.out
797:     suffix: 4
798:     nsize: 1
799:     args: -M 3 -N 3 -K 3 -local {{0 1}} -testcircular

801:   test:
802:     output_file: output/ex70_1.out
803:     suffix: 5
804:     nsize: {{2 4}}
805:     args: -M 3 -N 3 -K 3 -local {{0 1}} -testcircular -testmatmatt 0

807:   test:
808:     output_file: output/ex70_1.out
809:     suffix: 6
810:     nsize: 1
811:     args: -M {{1 3}} -N {{2 5}} -K {{1 2}} -local {{0 1}} -testcircular

813:   test:
814:     output_file: output/ex70_1.out
815:     suffix: 7
816:     nsize: 1
817:     args: -M 13 -N 13 -K {{1 3}} -local {{0 1}} -A_mat_type dense -testnest -testcircular

819: TEST*/