Actual source code: gmreig.c
2: #include <../src/ksp/ksp/impls/gmres/gmresimpl.h>
3: #include <petscblaslapack.h>
5: PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin)
6: {
7: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
8: PetscInt n = gmres->it + 1,i,N = gmres->max_k + 2;
9: PetscBLASInt bn, bN,lwork, idummy,lierr;
10: PetscScalar *R = gmres->Rsvd,*work = R + N*N,sdummy = 0;
11: PetscReal *realpart = gmres->Dsvd;
13: PetscBLASIntCast(n,&bn);
14: PetscBLASIntCast(N,&bN);
15: PetscBLASIntCast(5*N,&lwork);
16: PetscBLASIntCast(N,&idummy);
17: if (n <= 0) {
18: *emax = *emin = 1.0;
19: return 0;
20: }
21: /* copy R matrix to work space */
22: PetscArraycpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1));
24: /* zero below diagonal garbage */
25: for (i=0; i<n; i++) R[i*N+i+1] = 0.0;
27: /* compute Singular Values */
28: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
29: #if !defined(PETSC_USE_COMPLEX)
30: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
31: #else
32: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr));
33: #endif
35: PetscFPTrapPop();
37: *emin = realpart[n-1];
38: *emax = realpart[0];
39: return 0;
40: }
42: PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig)
43: {
44: #if !defined(PETSC_USE_COMPLEX)
45: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
46: PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
47: PetscBLASInt bn, bN, lwork, idummy, l-1;
48: PetscScalar *R = gmres->Rsvd,*work = R + N*N;
49: PetscScalar *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy = 0;
51: PetscBLASIntCast(n,&bn);
52: PetscBLASIntCast(N,&bN);
53: PetscBLASIntCast(5*N,&lwork);
54: PetscBLASIntCast(N,&idummy);
56: *neig = n;
58: if (!n) return 0;
60: /* copy R matrix to work space */
61: PetscArraycpy(R,gmres->hes_origin,N*N);
63: /* compute eigenvalues */
64: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
65: PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
67: PetscFPTrapPop();
68: PetscMalloc1(n,&perm);
69: for (i=0; i<n; i++) perm[i] = i;
70: PetscSortRealWithPermutation(n,realpart,perm);
71: for (i=0; i<n; i++) {
72: r[i] = realpart[perm[i]];
73: c[i] = imagpart[perm[i]];
74: }
75: PetscFree(perm);
76: #else
77: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
78: PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
79: PetscScalar *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy;
80: PetscBLASInt bn,bN,lwork,idummy,l-1;
82: PetscBLASIntCast(n,&bn);
83: PetscBLASIntCast(N,&bN);
84: PetscBLASIntCast(5*N,&lwork);
85: PetscBLASIntCast(N,&idummy);
87: *neig = n;
89: if (!n) return 0;
91: /* copy R matrix to work space */
92: PetscArraycpy(R,gmres->hes_origin,N*N);
94: /* compute eigenvalues */
95: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
96: PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr));
98: PetscFPTrapPop();
99: PetscMalloc1(n,&perm);
100: for (i=0; i<n; i++) perm[i] = i;
101: for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]);
102: PetscSortRealWithPermutation(n,r,perm);
103: for (i=0; i<n; i++) {
104: r[i] = PetscRealPart(eigs[perm[i]]);
105: c[i] = PetscImaginaryPart(eigs[perm[i]]);
106: }
107: PetscFree(perm);
108: #endif
109: return 0;
110: }
112: #if !defined(PETSC_USE_COMPLEX)
113: PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai)
114: {
115: KSP_GMRES *gmres = (KSP_GMRES*)ksp->data;
116: PetscInt n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0;
117: PetscInt i,j,*perm;
118: PetscReal *H,*Q,*Ht; /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/
119: PetscReal *wr,*wi,*modul; /* Real and imaginary part and modul of the Ritz values*/
120: PetscReal *SR,*work;
121: PetscBLASInt bn,bN,lwork,idummy;
122: PetscScalar *t,sdummy = 0;
124: /* n: size of the Hessenberg matrix */
125: if (gmres->fullcycle) n = N-1;
126: /* NbrRitz: number of (harmonic) Ritz pairs to extract */
127: NbrRitz = PetscMin(*nrit,n);
129: /* Definition of PetscBLASInt for lapack routines*/
130: PetscBLASIntCast(n,&bn);
131: PetscBLASIntCast(N,&bN);
132: PetscBLASIntCast(N,&idummy);
133: PetscBLASIntCast(5*N,&lwork);
134: /* Memory allocation */
135: PetscMalloc1(bN*bN,&H);
136: PetscMalloc1(bn*bn,&Q);
137: PetscMalloc1(lwork,&work);
138: PetscMalloc1(n,&wr);
139: PetscMalloc1(n,&wi);
141: /* copy H matrix to work space */
142: if (gmres->fullcycle) {
143: PetscArraycpy(H,gmres->hes_ritz,bN*bN);
144: } else {
145: PetscArraycpy(H,gmres->hes_origin,bN*bN);
146: }
148: /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
149: if (!ritz) {
150: /* Transpose the Hessenberg matrix => Ht */
151: PetscMalloc1(bn*bn,&Ht);
152: for (i=0; i<bn; i++) {
153: for (j=0; j<bn; j++) {
154: Ht[i*bn+j] = H[j*bN+i];
155: }
156: }
157: /* Solve the system H^T*t = h^2_{m+1,m}e_m */
158: PetscCalloc1(bn,&t);
159: /* t = h^2_{m+1,m}e_m */
160: if (gmres->fullcycle) {
161: t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]);
162: } else {
163: t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]);
164: }
165: /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */
166: {
167: PetscBLASInt info;
168: PetscBLASInt nrhs = 1;
169: PetscBLASInt *ipiv;
170: PetscMalloc1(bn,&ipiv);
171: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info));
173: PetscFree(ipiv);
174: PetscFree(Ht);
175: }
176: /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
177: for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i];
178: PetscFree(t);
179: }
181: /* Compute (harmonic) Ritz pairs */
182: {
183: PetscBLASInt info;
184: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
185: PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info));
187: }
188: /* sort the (harmonic) Ritz values */
189: PetscMalloc1(n,&modul);
190: PetscMalloc1(n,&perm);
191: for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
192: for (i=0; i<n; i++) perm[i] = i;
193: PetscSortRealWithPermutation(n,modul,perm);
194: /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */
195: if (small) {
196: while (nb < NbrRitz) {
197: if (!wi[perm[nb]]) nb += 1;
198: else nb += 2;
199: }
200: PetscMalloc1(nb*n,&SR);
201: for (i=0; i<nb; i++) {
202: tetar[i] = wr[perm[i]];
203: tetai[i] = wi[perm[i]];
204: PetscArraycpy(&SR[i*n],&(Q[perm[i]*bn]),n);
205: }
206: } else {
207: while (nb < NbrRitz) {
208: if (wi[perm[n-nb-1]] == 0) nb += 1;
209: else nb += 2;
210: }
211: PetscMalloc1(nb*n,&SR);
212: for (i=0; i<nb; i++) {
213: tetar[i] = wr[perm[n-nb+i]];
214: tetai[i] = wi[perm[n-nb+i]];
215: PetscArraycpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n);
216: }
217: }
218: PetscFree(modul);
219: PetscFree(perm);
221: /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr,
222: where the columns of VV correspond to the basis of the Krylov subspace */
223: if (gmres->fullcycle) {
224: for (j=0; j<nb; j++) {
225: VecZeroEntries(S[j]);
226: VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);
227: }
228: } else {
229: for (j=0; j<nb; j++) {
230: VecZeroEntries(S[j]);
231: VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));
232: }
233: }
234: *nrit = nb;
235: PetscFree(H);
236: PetscFree(Q);
237: PetscFree(SR);
238: PetscFree(wr);
239: PetscFree(wi);
240: return 0;
241: }
242: #endif