Actual source code: bvcontour.c

slepc-3.20.1 2023-11-27
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    BV developer functions needed in contour integral methods
 12: */

 14: #include <slepc/private/bvimpl.h>
 15: #include <slepcblaslapack.h>

 17: #define p_id(i) (i*subcomm->n + subcomm->color)

 19: /*@
 20:    BVScatter - Scatters the columns of a BV to another BV created in a
 21:    subcommunicator.

 23:    Collective

 25:    Input Parameters:
 26: +  Vin  - input basis vectors (defined on the whole communicator)
 27: .  scat - VecScatter object that contains the info for the communication
 28: -  xdup - an auxiliary vector

 30:    Output Parameter:
 31: .  Vout - output basis vectors (defined on the subcommunicator)

 33:    Notes:
 34:    Currently implemented as a loop for each the active column, where each
 35:    column is scattered independently. The vector xdup is defined on the
 36:    contiguous parent communicator and have enough space to store one
 37:    duplicate of the original vector per each subcommunicator.

 39:    Level: developer

 41: .seealso: BVGetColumn()
 42: @*/
 43: PetscErrorCode BVScatter(BV Vin,BV Vout,VecScatter scat,Vec xdup)
 44: {
 45:   PetscInt          i;
 46:   Vec               v;
 47:   const PetscScalar *array;

 49:   PetscFunctionBegin;
 54:   for (i=Vin->l;i<Vin->k;i++) {
 55:     PetscCall(BVGetColumn(Vin,i,&v));
 56:     PetscCall(VecScatterBegin(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
 57:     PetscCall(VecScatterEnd(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
 58:     PetscCall(BVRestoreColumn(Vin,i,&v));
 59:     PetscCall(VecGetArrayRead(xdup,&array));
 60:     PetscCall(VecPlaceArray(Vout->t,array));
 61:     PetscCall(BVInsertVec(Vout,i,Vout->t));
 62:     PetscCall(VecResetArray(Vout->t));
 63:     PetscCall(VecRestoreArrayRead(xdup,&array));
 64:   }
 65:   PetscFunctionReturn(PETSC_SUCCESS);
 66: }

 68: /*@
 69:    BVSumQuadrature - Computes the sum of terms required in the quadrature
 70:    rule to approximate the contour integral.

 72:    Collective

 74:    Input Parameters:
 75: +  Y       - input basis vectors
 76: .  M       - number of moments
 77: .  L       - block size
 78: .  L_max   - maximum block size
 79: .  w       - quadrature weights
 80: .  zn      - normalized quadrature points
 81: .  scat    - (optional) VecScatter object to communicate between subcommunicators
 82: .  subcomm - subcommunicator layout
 83: .  npoints - number of points to process by the subcommunicator
 84: -  useconj - whether conjugate points can be used or not

 86:    Output Parameter:
 87: .  S       - output basis vectors

 89:    Notes:
 90:    This is a generalization of BVMult(). The resulting matrix S consists of M
 91:    panels of L columns, and the following formula is computed for each panel
 92:    S_k = sum_j w_j*zn_j^k*Y_j, where Y_j is the j-th panel of Y containing
 93:    the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
 94:    the width of the panels in Y.

 96:    When using subcommunicators, Y is stored in the subcommunicators for a subset
 97:    of integration points. In that case, the computation is done in the subcomm
 98:    and then scattered to the whole communicator in S using the VecScatter scat.
 99:    The value npoints is the number of points to be processed in this subcomm
100:    and the flag useconj indicates whether symmetric points can be reused.

102:    Level: developer

104: .seealso: BVMult(), BVScatter(), BVDotQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
105: @*/
106: PetscErrorCode BVSumQuadrature(BV S,BV Y,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
107: {
108:   PetscInt       i,j,k,nloc;
109:   Vec            v,sj;
110:   PetscScalar    *ppk,*pv,one=1.0;

112:   PetscFunctionBegin;

117:   PetscCall(BVGetSizes(Y,&nloc,NULL,NULL));
118:   PetscCall(PetscMalloc1(npoints,&ppk));
119:   for (i=0;i<npoints;i++) ppk[i] = 1.0;
120:   PetscCall(BVCreateVec(Y,&v));
121:   for (k=0;k<M;k++) {
122:     for (j=0;j<L;j++) {
123:       PetscCall(VecSet(v,0.0));
124:       for (i=0;i<npoints;i++) {
125:         PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
126:         PetscCall(BVMultVec(Y,ppk[i]*w[p_id(i)],1.0,v,&one));
127:       }
128:       if (PetscUnlikely(useconj)) {
129:         PetscCall(VecGetArray(v,&pv));
130:         for (i=0;i<nloc;i++) pv[i] = 2.0*PetscRealPart(pv[i]);
131:         PetscCall(VecRestoreArray(v,&pv));
132:       }
133:       PetscCall(BVGetColumn(S,k*L+j,&sj));
134:       if (PetscUnlikely(scat)) {
135:         PetscCall(VecScatterBegin(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
136:         PetscCall(VecScatterEnd(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
137:       } else PetscCall(VecCopy(v,sj));
138:       PetscCall(BVRestoreColumn(S,k*L+j,&sj));
139:     }
140:     for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
141:   }
142:   PetscCall(PetscFree(ppk));
143:   PetscCall(VecDestroy(&v));
144:   PetscFunctionReturn(PETSC_SUCCESS);
145: }

147: /*@
148:    BVDotQuadrature - Computes the projection terms required in the quadrature
149:    rule to approximate the contour integral.

151:    Collective

153:    Input Parameters:
154: +  Y       - first basis vectors
155: .  V       - second basis vectors
156: .  M       - number of moments
157: .  L       - block size
158: .  L_max   - maximum block size
159: .  w       - quadrature weights
160: .  zn      - normalized quadrature points
161: .  subcomm - subcommunicator layout
162: .  npoints - number of points to process by the subcommunicator
163: -  useconj - whether conjugate points can be used or not

165:    Output Parameter:
166: .  Mu      - computed result

168:    Notes:
169:    This is a generalization of BVDot(). The resulting matrix Mu consists of M
170:    blocks of size LxL (placed horizontally), each of them computed as
171:    Mu_k = sum_j w_j*zn_j^k*V'*Y_j, where Y_j is the j-th panel of Y containing
172:    the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
173:    the width of the panels in Y.

175:    When using subcommunicators, Y is stored in the subcommunicators for a subset
176:    of integration points. In that case, the computation is done in the subcomm
177:    and then the final result is combined via reduction.
178:    The value npoints is the number of points to be processed in this subcomm
179:    and the flag useconj indicates whether symmetric points can be reused.

181:    Level: developer

183: .seealso: BVDot(), BVScatter(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
184: @*/
185: PetscErrorCode BVDotQuadrature(BV Y,BV V,PetscScalar *Mu,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
186: {
187:   PetscMPIInt       sub_size,count;
188:   PetscInt          i,j,k,s;
189:   PetscScalar       *temp,*temp2,*ppk,alp;
190:   Mat               H;
191:   const PetscScalar *pH;
192:   MPI_Comm          child,parent;

194:   PetscFunctionBegin;

198:   PetscCall(PetscSubcommGetChild(subcomm,&child));
199:   PetscCallMPI(MPI_Comm_size(child,&sub_size));
200:   PetscCall(PetscMalloc3(npoints*L*(L+1),&temp,2*M*L*L,&temp2,npoints,&ppk));
201:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,L,L_max*npoints,NULL,&H));
202:   PetscCall(PetscArrayzero(temp2,2*M*L*L));
203:   PetscCall(BVSetActiveColumns(Y,0,L_max*npoints));
204:   PetscCall(BVSetActiveColumns(V,0,L));
205:   PetscCall(BVDot(Y,V,H));
206:   PetscCall(MatDenseGetArrayRead(H,&pH));
207:   for (i=0;i<npoints;i++) {
208:     for (j=0;j<L;j++) {
209:       for (k=0;k<L;k++) {
210:         temp[k+j*L+i*L*L] = pH[k+j*L+i*L*L_max];
211:       }
212:     }
213:   }
214:   PetscCall(MatDenseRestoreArrayRead(H,&pH));
215:   for (i=0;i<npoints;i++) ppk[i] = 1;
216:   for (k=0;k<2*M;k++) {
217:     for (j=0;j<L;j++) {
218:       for (i=0;i<npoints;i++) {
219:         alp = ppk[i]*w[p_id(i)];
220:         for (s=0;s<L;s++) {
221:           if (!useconj) temp2[s+(j+k*L)*L] += alp*temp[s+(j+i*L)*L];
222:           else temp2[s+(j+k*L)*L] += 2.0*PetscRealPart(alp*temp[s+(j+i*L)*L]);
223:         }
224:       }
225:     }
226:     for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
227:   }
228:   for (i=0;i<2*M*L*L;i++) temp2[i] /= sub_size;
229:   PetscCall(PetscMPIIntCast(2*M*L*L,&count));
230:   PetscCall(PetscSubcommGetParent(subcomm,&parent));
231:   PetscCall(MPIU_Allreduce(temp2,Mu,count,MPIU_SCALAR,MPIU_SUM,parent));
232:   PetscCall(PetscFree3(temp,temp2,ppk));
233:   PetscCall(MatDestroy(&H));
234:   PetscFunctionReturn(PETSC_SUCCESS);
235: }

237: /*@
238:    BVTraceQuadrature - Computes an estimate of the number of eigenvalues
239:    inside a region via quantities computed in the quadrature rule of
240:    contour integral methods.

242:    Collective

244:    Input Parameters:
245: +  Y       - first basis vectors
246: .  V       - second basis vectors
247: .  L       - block size
248: .  L_max   - maximum block size
249: .  w       - quadrature weights
250: .  scat    - (optional) VecScatter object to communicate between subcommunicators
251: .  subcomm - subcommunicator layout
252: .  npoints - number of points to process by the subcommunicator
253: -  useconj - whether conjugate points can be used or not

255:    Output Parameter:
256: .  est_eig - estimated eigenvalue count

258:    Notes:
259:    This function returns an estimation of the number of eigenvalues in the
260:    region, computed as trace(V'*S_0), where S_0 is the first panel of S
261:    computed by BVSumQuadrature().

263:    When using subcommunicators, Y is stored in the subcommunicators for a subset
264:    of integration points. In that case, the computation is done in the subcomm
265:    and then scattered to the whole communicator in S using the VecScatter scat.
266:    The value npoints is the number of points to be processed in this subcomm
267:    and the flag useconj indicates whether symmetric points can be reused.

269:    Level: developer

271: .seealso: BVScatter(), BVDotQuadrature(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
272: @*/
273: PetscErrorCode BVTraceQuadrature(BV Y,BV V,PetscInt L,PetscInt L_max,PetscScalar *w,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj,PetscReal *est_eig)
274: {
275:   PetscInt       i,j;
276:   Vec            y,yall,vj;
277:   PetscScalar    dot,sum=0.0,one=1.0;

279:   PetscFunctionBegin;

284:   PetscCall(BVCreateVec(Y,&y));
285:   PetscCall(BVCreateVec(V,&yall));
286:   for (j=0;j<L;j++) {
287:     PetscCall(VecSet(y,0.0));
288:     for (i=0;i<npoints;i++) {
289:       PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
290:       PetscCall(BVMultVec(Y,w[p_id(i)],1.0,y,&one));
291:     }
292:     PetscCall(BVGetColumn(V,j,&vj));
293:     if (scat) {
294:       PetscCall(VecScatterBegin(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
295:       PetscCall(VecScatterEnd(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
296:       PetscCall(VecDot(vj,yall,&dot));
297:     } else PetscCall(VecDot(vj,y,&dot));
298:     PetscCall(BVRestoreColumn(V,j,&vj));
299:     if (useconj) sum += 2.0*PetscRealPart(dot);
300:     else sum += dot;
301:   }
302:   *est_eig = PetscAbsScalar(sum)/(PetscReal)L;
303:   PetscCall(VecDestroy(&y));
304:   PetscCall(VecDestroy(&yall));
305:   PetscFunctionReturn(PETSC_SUCCESS);
306: }

308: static PetscErrorCode BVSVDAndRank_Refine(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
309: {
310:   PetscInt       i,j,k,ml=S->k;
311:   PetscMPIInt    len;
312:   PetscScalar    *work,*B,*tempB,*sarray,*Q1,*Q2,*temp2,alpha=1.0,beta=0.0;
313:   PetscBLASInt   l,m,n,lda,ldu,ldvt,lwork,info,ldb,ldc,lds;
314: #if defined(PETSC_USE_COMPLEX)
315:   PetscReal      *rwork;
316: #endif

318:   PetscFunctionBegin;
319:   PetscCall(PetscBLASIntCast(S->ld,&lds));
320:   PetscCall(BVGetArray(S,&sarray));
321:   PetscCall(PetscMalloc6(ml*ml,&temp2,S->n*ml,&Q1,S->n*ml,&Q2,ml*ml,&B,ml*ml,&tempB,5*ml,&work));
322: #if defined(PETSC_USE_COMPLEX)
323:   PetscCall(PetscMalloc1(5*ml,&rwork));
324: #endif
325:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));

327:   PetscCall(PetscArrayzero(B,ml*ml));
328:   for (i=0;i<ml;i++) B[i*ml+i]=1;

330:   for (k=0;k<2;k++) {
331:     PetscCall(PetscBLASIntCast(S->n,&m));
332:     PetscCall(PetscBLASIntCast(ml,&l));
333:     n = l; lda = m; ldb = m; ldc = l;
334:     if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,sarray,&lds,sarray,&lds,&beta,pA,&ldc));
335:     else PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q1,&lda,Q1,&ldb,&beta,pA,&ldc));
336:     PetscCall(PetscArrayzero(temp2,ml*ml));
337:     PetscCall(PetscMPIIntCast(ml*ml,&len));
338:     PetscCall(MPIU_Allreduce(pA,temp2,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)S)));

340:     PetscCall(PetscBLASIntCast(ml,&m));
341:     n = m; lda = m; lwork = 5*m, ldu = 1; ldvt = 1;
342: #if defined(PETSC_USE_COMPLEX)
343:     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
344: #else
345:     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
346: #endif
347:     SlepcCheckLapackInfo("gesvd",info);

349:     PetscCall(PetscBLASIntCast(S->n,&l));
350:     PetscCall(PetscBLASIntCast(ml,&n));
351:     m = n; lda = l; ldb = m; ldc = l;
352:     if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,sarray,&lds,temp2,&ldb,&beta,Q1,&ldc));
353:     else PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q1,&lda,temp2,&ldb,&beta,Q2,&ldc));

355:     PetscCall(PetscBLASIntCast(ml,&l));
356:     m = l; n = l; lda = l; ldb = m; ldc = l;
357:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,B,&lda,temp2,&ldb,&beta,tempB,&ldc));
358:     for (i=0;i<ml;i++) {
359:       sigma[i] = PetscSqrtReal(sigma[i]);
360:       for (j=0;j<S->n;j++) {
361:         if (k%2) Q2[j+i*S->n] /= sigma[i];
362:         else Q1[j+i*S->n] /= sigma[i];
363:       }
364:       for (j=0;j<ml;j++) B[j+i*ml] = tempB[j+i*ml]*sigma[i];
365:     }
366:   }

368:   PetscCall(PetscBLASIntCast(ml,&m));
369:   n = m; lda = m; ldu=1; ldvt=1;
370: #if defined(PETSC_USE_COMPLEX)
371:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
372: #else
373:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
374: #endif
375:   SlepcCheckLapackInfo("gesvd",info);

377:   PetscCall(PetscBLASIntCast(S->n,&l));
378:   PetscCall(PetscBLASIntCast(ml,&n));
379:   m = n; lda = l; ldb = m;
380:   if (k%2) PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q1,&lda,B,&ldb,&beta,sarray,&lds));
381:   else PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q2,&lda,B,&ldb,&beta,sarray,&lds));

383:   PetscCall(PetscFPTrapPop());
384:   PetscCall(BVRestoreArray(S,&sarray));

386:   if (rank) {
387:     (*rank) = 0;
388:     for (i=0;i<ml;i++) {
389:       if (sigma[i]/PetscMax(sigma[0],1.0)>delta) (*rank)++;
390:     }
391:   }
392:   PetscCall(PetscFree6(temp2,Q1,Q2,B,tempB,work));
393: #if defined(PETSC_USE_COMPLEX)
394:   PetscCall(PetscFree(rwork));
395: #endif
396:   PetscFunctionReturn(PETSC_SUCCESS);
397: }

399: static PetscErrorCode BVSVDAndRank_QR(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
400: {
401:   PetscInt       i,n,ml=S->k;
402:   PetscBLASInt   m,lda,lwork,info;
403:   PetscScalar    *work;
404:   PetscReal      *rwork;
405:   Mat            A;
406:   Vec            v;

408:   PetscFunctionBegin;
409:   /* Compute QR factorizaton of S */
410:   PetscCall(BVGetSizes(S,NULL,&n,NULL));
411:   n = PetscMin(n,ml);
412:   PetscCall(BVSetActiveColumns(S,0,n));
413:   PetscCall(PetscArrayzero(pA,ml*n));
414:   PetscCall(MatCreateDense(PETSC_COMM_SELF,n,n,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
415:   PetscCall(BVOrthogonalize(S,A));
416:   if (n<ml) {
417:     /* the rest of the factorization */
418:     for (i=n;i<ml;i++) {
419:       PetscCall(BVGetColumn(S,i,&v));
420:       PetscCall(BVOrthogonalizeVec(S,v,pA+i*n,NULL,NULL));
421:       PetscCall(BVRestoreColumn(S,i,&v));
422:     }
423:   }
424:   PetscCall(PetscBLASIntCast(n,&lda));
425:   PetscCall(PetscBLASIntCast(ml,&m));
426:   PetscCall(PetscMalloc2(5*ml,&work,5*ml,&rwork));
427:   lwork = 5*m;
428:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
429: #if !defined (PETSC_USE_COMPLEX)
430:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,&info));
431: #else
432:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,rwork,&info));
433: #endif
434:   SlepcCheckLapackInfo("gesvd",info);
435:   PetscCall(PetscFPTrapPop());
436:   *rank = 0;
437:   for (i=0;i<n;i++) {
438:     if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
439:   }
440:   /* n first columns of A have the left singular vectors */
441:   PetscCall(BVMultInPlace(S,A,0,*rank));
442:   PetscCall(PetscFree2(work,rwork));
443:   PetscCall(MatDestroy(&A));
444:   PetscFunctionReturn(PETSC_SUCCESS);
445: }

447: static PetscErrorCode BVSVDAndRank_QR_CAA(BV S,PetscInt M,PetscInt L,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
448: {
449:   PetscInt       i,j,n,ml=S->k;
450:   PetscBLASInt   m,k_,lda,lwork,info;
451:   PetscScalar    *work,*T,*U,*R,sone=1.0,zero=0.0;
452:   PetscReal      *rwork;
453:   Mat            A;

455:   PetscFunctionBegin;
456:   /* Compute QR factorizaton of S */
457:   PetscCall(BVGetSizes(S,NULL,&n,NULL));
458:   PetscCheck(n>=ml,PetscObjectComm((PetscObject)S),PETSC_ERR_SUP,"The QR_CAA method does not support problem size n < m*L");
459:   PetscCall(BVSetActiveColumns(S,0,ml));
460:   PetscCall(PetscArrayzero(pA,ml*ml));
461:   PetscCall(MatCreateDense(PETSC_COMM_SELF,ml,ml,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
462:   PetscCall(BVOrthogonalize(S,A));
463:   PetscCall(MatDestroy(&A));

465:   /* SVD of first (M-1)*L diagonal block */
466:   PetscCall(PetscBLASIntCast((M-1)*L,&m));
467:   PetscCall(PetscMalloc5(m*m,&T,m*m,&R,m*m,&U,5*ml,&work,5*ml,&rwork));
468:   for (j=0;j<m;j++) PetscCall(PetscArraycpy(R+j*m,pA+j*ml,m));
469:   lwork = 5*m;
470:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
471: #if !defined (PETSC_USE_COMPLEX)
472:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,&info));
473: #else
474:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,rwork,&info));
475: #endif
476:   SlepcCheckLapackInfo("gesvd",info);
477:   PetscCall(PetscFPTrapPop());
478:   *rank = 0;
479:   for (i=0;i<m;i++) {
480:     if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
481:   }
482:   PetscCall(MatCreateDense(PETSC_COMM_SELF,m,m,PETSC_DECIDE,PETSC_DECIDE,U,&A));
483:   PetscCall(BVSetActiveColumns(S,0,m));
484:   PetscCall(BVMultInPlace(S,A,0,*rank));
485:   PetscCall(MatDestroy(&A));
486:   /* Projected linear system */
487:   /* m first columns of A have the right singular vectors */
488:   PetscCall(PetscBLASIntCast(*rank,&k_));
489:   PetscCall(PetscBLASIntCast(ml,&lda));
490:   PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&m,&k_,&m,&sone,pA+L*lda,&lda,R,&m,&zero,T,&m));
491:   PetscCall(PetscArrayzero(pA,ml*ml));
492:   PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&m,&sone,U,&m,T,&m,&zero,pA,&k_));
493:   for (j=0;j<k_;j++) for (i=0;i<k_;i++) pA[j*k_+i] /= sigma[j];
494:   PetscCall(PetscFree5(T,R,U,work,rwork));
495:   PetscFunctionReturn(PETSC_SUCCESS);
496: }

498: /*@
499:    BVSVDAndRank - Compute the SVD (left singular vectors only, and singular
500:    values) and determine the numerical rank according to a tolerance.

502:    Collective

504:    Input Parameters:
505: +  S     - the basis vectors
506: .  m     - the moment degree
507: .  l     - the block size
508: .  delta - the tolerance used to determine the rank
509: -  meth  - the method to be used

511:    Output Parameters:
512: +  A     - workspace, on output contains relevant values in the CAA method
513: .  sigma - computed singular values
514: -  rank  - estimated rank (optional)

516:    Notes:
517:    This function computes [U,Sigma,V] = svd(S) and replaces S with U.
518:    The current implementation computes this via S'*S, and it may include
519:    some kind of iterative refinement to improve accuracy in some cases.

521:    The parameters m and l refer to the moment and block size of contour
522:    integral methods. All columns up to m*l are modified, and the active
523:    columns are set to 0..m*l.

525:    The method is one of BV_SVD_METHOD_REFINE, BV_SVD_METHOD_QR, BV_SVD_METHOD_QR_CAA.

527:    The A workspace should be m*l*m*l in size.

529:    Once the decomposition is computed, the numerical rank is estimated
530:    by counting the number of singular values that are larger than the
531:    tolerance delta, relative to the first singular value.

533:    Level: developer

535: .seealso: BVSetActiveColumns()
536: @*/
537: PetscErrorCode BVSVDAndRank(BV S,PetscInt m,PetscInt l,PetscReal delta,BVSVDMethod meth,PetscScalar *A,PetscReal *sigma,PetscInt *rank)
538: {
539:   PetscFunctionBegin;
545:   PetscAssertPointer(A,6);
546:   PetscAssertPointer(sigma,7);
547:   PetscAssertPointer(rank,8);

549:   PetscCall(PetscLogEventBegin(BV_SVDAndRank,S,0,0,0));
550:   PetscCall(BVSetActiveColumns(S,0,m*l));
551:   switch (meth) {
552:     case BV_SVD_METHOD_REFINE:
553:       PetscCall(BVSVDAndRank_Refine(S,delta,A,sigma,rank));
554:       break;
555:     case BV_SVD_METHOD_QR:
556:       PetscCall(BVSVDAndRank_QR(S,delta,A,sigma,rank));
557:       break;
558:     case BV_SVD_METHOD_QR_CAA:
559:       PetscCall(BVSVDAndRank_QR_CAA(S,m,l,delta,A,sigma,rank));
560:       break;
561:   }
562:   PetscCall(PetscLogEventEnd(BV_SVDAndRank,S,0,0,0));
563:   PetscFunctionReturn(PETSC_SUCCESS);
564: }

566: /*@
567:    BVCISSResizeBases - Resize the bases involved in CISS solvers when the L grows.

569:    Logically Collective

571:    Input Parameters:
572: +  S      - basis of L*M columns
573: .  V      - basis of L columns (may be associated to subcommunicators)
574: .  Y      - basis of npoints*L columns
575: .  Lold   - old value of L
576: .  Lnew   - new value of L
577: .  M      - the moment size
578: -  npoints - number of integration points

580:    Level: developer

582: .seealso: BVResize()
583: @*/
584: PetscErrorCode BVCISSResizeBases(BV S,BV V,BV Y,PetscInt Lold,PetscInt Lnew,PetscInt M,PetscInt npoints)
585: {
586:   PetscInt       i,j;

588:   PetscFunctionBegin;

597:   PetscCall(BVResize(S,Lnew*M,PETSC_FALSE));
598:   PetscCall(BVResize(V,Lnew,PETSC_TRUE));
599:   PetscCall(BVResize(Y,Lnew*npoints,PETSC_TRUE));
600:   /* columns of Y are interleaved */
601:   for (i=npoints-1;i>=0;i--) {
602:     for (j=Lold-1;j>=0;j--) PetscCall(BVCopyColumn(Y,i*Lold+j,i*Lnew+j));
603:   }
604:   PetscFunctionReturn(PETSC_SUCCESS);
605: }