Actual source code: axpy.c
2: #include <petsc/private/matimpl.h>
4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5: {
6: Mat A,F;
7: PetscErrorCode (*f)(Mat,Mat*);
9: PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);
10: if (f) {
11: MatTransposeGetMat(T,&A);
12: if (T == X) {
13: PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
14: MatTranspose(A,MAT_INITIAL_MATRIX,&F);
15: A = Y;
16: } else {
17: PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
18: MatTranspose(X,MAT_INITIAL_MATRIX,&F);
19: }
20: } else {
21: MatHermitianTransposeGetMat(T,&A);
22: if (T == X) {
23: PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
24: MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);
25: A = Y;
26: } else {
27: PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
28: MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);
29: }
30: }
31: MatAXPY(A,a,F,str);
32: MatDestroy(&F);
33: return 0;
34: }
36: /*@
37: MatAXPY - Computes Y = a*X + Y.
39: Logically Collective on Mat
41: Input Parameters:
42: + a - the scalar multiplier
43: . X - the first matrix
44: . Y - the second matrix
45: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
47: Notes: No operation is performed when a is zero.
49: Level: intermediate
51: .seealso: MatAYPX()
52: @*/
53: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
54: {
55: PetscInt M1,M2,N1,N2;
56: PetscInt m1,m2,n1,n2;
57: MatType t1,t2;
58: PetscBool sametype,transpose;
64: MatGetSize(X,&M1,&N1);
65: MatGetSize(Y,&M2,&N2);
66: MatGetLocalSize(X,&m1,&n1);
67: MatGetLocalSize(Y,&m2,&n2);
72: if (a == (PetscScalar)0.0) return 0;
73: if (Y == X) {
74: MatScale(Y,1.0 + a);
75: return 0;
76: }
77: MatGetType(X,&t1);
78: MatGetType(Y,&t2);
79: PetscStrcmp(t1,t2,&sametype);
80: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
81: if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
82: (*Y->ops->axpy)(Y,a,X,str);
83: } else {
84: PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);
85: if (transpose) {
86: MatTransposeAXPY_Private(Y,a,X,str,X);
87: } else {
88: PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);
89: if (transpose) {
90: MatTransposeAXPY_Private(Y,a,X,str,Y);
91: } else {
92: MatAXPY_Basic(Y,a,X,str);
93: }
94: }
95: }
96: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
97: return 0;
98: }
100: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
101: {
102: PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
104: /* look for any available faster alternative to the general preallocator */
105: PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
106: if (preall) {
107: (*preall)(Y,X,B);
108: } else { /* Use MatPrellocator, assumes same row-col distribution */
109: Mat preallocator;
110: PetscInt r,rstart,rend;
111: PetscInt m,n,M,N;
113: MatGetRowUpperTriangular(Y);
114: MatGetRowUpperTriangular(X);
115: MatGetSize(Y,&M,&N);
116: MatGetLocalSize(Y,&m,&n);
117: MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
118: MatSetType(preallocator,MATPREALLOCATOR);
119: MatSetLayouts(preallocator,Y->rmap,Y->cmap);
120: MatSetUp(preallocator);
121: MatGetOwnershipRange(preallocator,&rstart,&rend);
122: for (r = rstart; r < rend; ++r) {
123: PetscInt ncols;
124: const PetscInt *row;
125: const PetscScalar *vals;
127: MatGetRow(Y,r,&ncols,&row,&vals);
128: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
129: MatRestoreRow(Y,r,&ncols,&row,&vals);
130: MatGetRow(X,r,&ncols,&row,&vals);
131: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
132: MatRestoreRow(X,r,&ncols,&row,&vals);
133: }
134: MatSetOption(preallocator,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
135: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
136: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
137: MatRestoreRowUpperTriangular(Y);
138: MatRestoreRowUpperTriangular(X);
140: MatCreate(PetscObjectComm((PetscObject)Y),B);
141: MatSetType(*B,((PetscObject)Y)->type_name);
142: MatSetLayouts(*B,Y->rmap,Y->cmap);
143: MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
144: MatDestroy(&preallocator);
145: }
146: return 0;
147: }
149: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
150: {
151: PetscBool isshell,isdense,isnest;
153: MatIsShell(Y,&isshell);
154: if (isshell) { /* MatShell has special support for AXPY */
155: PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);
157: MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f);
158: if (f) {
159: (*f)(Y,a,X,str);
160: return 0;
161: }
162: }
163: /* no need to preallocate if Y is dense */
164: PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,"");
165: if (isdense) {
166: PetscObjectTypeCompare((PetscObject)X,MATNEST,&isnest);
167: if (isnest) {
168: MatAXPY_Dense_Nest(Y,a,X);
169: return 0;
170: }
171: if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
172: }
173: if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
174: PetscInt i,start,end,j,ncols,m,n;
175: const PetscInt *row;
176: PetscScalar *val;
177: const PetscScalar *vals;
179: MatGetSize(X,&m,&n);
180: MatGetOwnershipRange(X,&start,&end);
181: MatGetRowUpperTriangular(X);
182: if (a == 1.0) {
183: for (i = start; i < end; i++) {
184: MatGetRow(X,i,&ncols,&row,&vals);
185: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
186: MatRestoreRow(X,i,&ncols,&row,&vals);
187: }
188: } else {
189: PetscInt vs = 100;
190: /* realloc if needed, as this function may be used in parallel */
191: PetscMalloc1(vs,&val);
192: for (i=start; i<end; i++) {
193: MatGetRow(X,i,&ncols,&row,&vals);
194: if (vs < ncols) {
195: vs = PetscMin(2*ncols,n);
196: PetscRealloc(vs*sizeof(*val),&val);
197: }
198: for (j=0; j<ncols; j++) val[j] = a*vals[j];
199: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
200: MatRestoreRow(X,i,&ncols,&row,&vals);
201: }
202: PetscFree(val);
203: }
204: MatRestoreRowUpperTriangular(X);
205: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
206: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
207: } else {
208: Mat B;
210: MatAXPY_Basic_Preallocate(Y,X,&B);
211: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
212: MatHeaderMerge(Y,&B);
213: }
214: return 0;
215: }
217: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
218: {
219: PetscInt i,start,end,j,ncols,m,n;
220: const PetscInt *row;
221: PetscScalar *val;
222: const PetscScalar *vals;
224: MatGetSize(X,&m,&n);
225: MatGetOwnershipRange(X,&start,&end);
226: MatGetRowUpperTriangular(Y);
227: MatGetRowUpperTriangular(X);
228: if (a == 1.0) {
229: for (i = start; i < end; i++) {
230: MatGetRow(Y,i,&ncols,&row,&vals);
231: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
232: MatRestoreRow(Y,i,&ncols,&row,&vals);
234: MatGetRow(X,i,&ncols,&row,&vals);
235: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
236: MatRestoreRow(X,i,&ncols,&row,&vals);
237: }
238: } else {
239: PetscInt vs = 100;
240: /* realloc if needed, as this function may be used in parallel */
241: PetscMalloc1(vs,&val);
242: for (i=start; i<end; i++) {
243: MatGetRow(Y,i,&ncols,&row,&vals);
244: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
245: MatRestoreRow(Y,i,&ncols,&row,&vals);
247: MatGetRow(X,i,&ncols,&row,&vals);
248: if (vs < ncols) {
249: vs = PetscMin(2*ncols,n);
250: PetscRealloc(vs*sizeof(*val),&val);
251: }
252: for (j=0; j<ncols; j++) val[j] = a*vals[j];
253: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
254: MatRestoreRow(X,i,&ncols,&row,&vals);
255: }
256: PetscFree(val);
257: }
258: MatRestoreRowUpperTriangular(Y);
259: MatRestoreRowUpperTriangular(X);
260: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
261: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
262: return 0;
263: }
265: /*@
266: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
268: Neighbor-wise Collective on Mat
270: Input Parameters:
271: + Y - the matrices
272: - a - the PetscScalar
274: Level: intermediate
276: Notes:
277: If Y is a rectangular matrix, the shift is done on the main diagonal Y_{ii} of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)
279: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
280: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
281: entry). No operation is performed when a is zero.
283: To form Y = Y + diag(V) use MatDiagonalSet()
285: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
286: @*/
287: PetscErrorCode MatShift(Mat Y,PetscScalar a)
288: {
292: MatCheckPreallocated(Y,1);
293: if (a == 0.0) return 0;
295: if (Y->ops->shift) (*Y->ops->shift)(Y,a);
296: else MatShift_Basic(Y,a);
298: PetscObjectStateIncrease((PetscObject)Y);
299: return 0;
300: }
302: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
303: {
304: PetscInt i,start,end;
305: const PetscScalar *v;
307: MatGetOwnershipRange(Y,&start,&end);
308: VecGetArrayRead(D,&v);
309: for (i=start; i<end; i++) {
310: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
311: }
312: VecRestoreArrayRead(D,&v);
313: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
314: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
315: return 0;
316: }
318: /*@
319: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
320: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
321: INSERT_VALUES.
323: Neighbor-wise Collective on Mat
325: Input Parameters:
326: + Y - the input matrix
327: . D - the diagonal matrix, represented as a vector
328: - i - INSERT_VALUES or ADD_VALUES
330: Notes:
331: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
332: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
333: entry).
335: Level: intermediate
337: .seealso: MatShift(), MatScale(), MatDiagonalScale()
338: @*/
339: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
340: {
341: PetscInt matlocal,veclocal;
345: MatGetLocalSize(Y,&matlocal,NULL);
346: VecGetLocalSize(D,&veclocal);
348: if (Y->ops->diagonalset) {
349: (*Y->ops->diagonalset)(Y,D,is);
350: } else {
351: MatDiagonalSet_Default(Y,D,is);
352: }
353: PetscObjectStateIncrease((PetscObject)Y);
354: return 0;
355: }
357: /*@
358: MatAYPX - Computes Y = a*Y + X.
360: Logically on Mat
362: Input Parameters:
363: + a - the PetscScalar multiplier
364: . Y - the first matrix
365: . X - the second matrix
366: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
368: Level: intermediate
370: .seealso: MatAXPY()
371: @*/
372: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
373: {
374: MatScale(Y,a);
375: MatAXPY(Y,1.0,X,str);
376: return 0;
377: }
379: /*@
380: MatComputeOperator - Computes the explicit matrix
382: Collective on Mat
384: Input Parameters:
385: + inmat - the matrix
386: - mattype - the matrix type for the explicit operator
388: Output Parameter:
389: . mat - the explicit operator
391: Notes:
392: This computation is done by applying the operators to columns of the identity matrix.
393: This routine is costly in general, and is recommended for use only with relatively small systems.
394: Currently, this routine uses a dense matrix format if mattype == NULL.
396: Level: advanced
398: @*/
399: PetscErrorCode MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
400: {
403: MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
404: return 0;
405: }
407: /*@
408: MatComputeOperatorTranspose - Computes the explicit matrix representation of
409: a give matrix that can apply MatMultTranspose()
411: Collective on Mat
413: Input Parameters:
414: + inmat - the matrix
415: - mattype - the matrix type for the explicit operator
417: Output Parameter:
418: . mat - the explicit operator transposed
420: Notes:
421: This computation is done by applying the transpose of the operator to columns of the identity matrix.
422: This routine is costly in general, and is recommended for use only with relatively small systems.
423: Currently, this routine uses a dense matrix format if mattype == NULL.
425: Level: advanced
427: @*/
428: PetscErrorCode MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
429: {
430: Mat A;
434: MatCreateTranspose(inmat,&A);
435: MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
436: MatDestroy(&A);
437: return 0;
438: }
440: /*@
441: MatChop - Set all values in the matrix less than the tolerance to zero
443: Input Parameters:
444: + A - The matrix
445: - tol - The zero tolerance
447: Output Parameters:
448: . A - The chopped matrix
450: Level: intermediate
452: .seealso: MatCreate(), MatZeroEntries()
453: @*/
454: PetscErrorCode MatChop(Mat A, PetscReal tol)
455: {
456: Mat a;
457: PetscScalar *newVals;
458: PetscInt *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
459: PetscBool flg;
461: PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, "");
462: if (flg) {
463: MatDenseGetLocalMatrix(A, &a);
464: MatDenseGetLDA(a, &r);
465: MatGetSize(a, &rStart, &rEnd);
466: MatDenseGetArray(a, &newVals);
467: for (; colMax < rEnd; ++colMax) {
468: for (maxRows = 0; maxRows < rStart; ++maxRows) {
469: newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
470: }
471: }
472: MatDenseRestoreArray(a, &newVals);
473: } else {
474: MatGetOwnershipRange(A, &rStart, &rEnd);
475: MatGetRowUpperTriangular(A);
476: for (r = rStart; r < rEnd; ++r) {
477: PetscInt ncols;
479: MatGetRow(A, r, &ncols, NULL, NULL);
480: colMax = PetscMax(colMax, ncols);
481: MatRestoreRow(A, r, &ncols, NULL, NULL);
482: }
483: numRows = rEnd - rStart;
484: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
485: PetscMalloc2(colMax, &newCols, colMax, &newVals);
486: MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg); /* cache user-defined value */
487: MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
488: /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd() */
489: /* that are potentially called many times depending on the distribution of A */
490: for (r = rStart; r < rStart+maxRows; ++r) {
491: const PetscScalar *vals;
492: const PetscInt *cols;
493: PetscInt ncols, newcols, c;
495: if (r < rEnd) {
496: MatGetRow(A, r, &ncols, &cols, &vals);
497: for (c = 0; c < ncols; ++c) {
498: newCols[c] = cols[c];
499: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
500: }
501: newcols = ncols;
502: MatRestoreRow(A, r, &ncols, &cols, &vals);
503: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
504: }
505: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
506: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
507: }
508: MatRestoreRowUpperTriangular(A);
509: PetscFree2(newCols, newVals);
510: MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg); /* reset option to its user-defined value */
511: }
512: return 0;
513: }