IFPACK Development
Loading...
Searching...
No Matches
Factor_dh.c
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack: Object-Oriented Algebraic Preconditioner Package
5// Copyright (2002) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40//@HEADER
41*/
42
43#include "Factor_dh.h"
44#include "Vec_dh.h"
45#include "Mat_dh.h"
46#include "SubdomainGraph_dh.h"
47#include "TimeLog_dh.h"
48#include "Mem_dh.h"
49#include "Numbering_dh.h"
50#include "Hash_i_dh.h"
51#include "Parser_dh.h"
52#include "mat_dh_private.h"
53#include "getRow_dh.h"
54#include "Euclid_dh.h"
55#include "io_dh.h"
56
57/* suppress compiler complaints */
58void
59Factor_dh_junk ()
60{
61}
62
63static void adjust_bj_private (Factor_dh mat);
64static void unadjust_bj_private (Factor_dh mat);
65
66
67#undef __FUNC__
68#define __FUNC__ "Factor_dhCreate"
69void
70Factor_dhCreate (Factor_dh * mat)
71{
72 START_FUNC_DH struct _factor_dh *tmp;
73
74 if (np_dh > MAX_MPI_TASKS)
75 {
76 SET_V_ERROR ("you must change MAX_MPI_TASKS and recompile!");
77 }
78
79 tmp = (struct _factor_dh *) MALLOC_DH (sizeof (struct _factor_dh));
80 CHECK_V_ERROR;
81 *mat = tmp;
82
83 tmp->m = 0;
84 tmp->n = 0;
85 tmp->id = myid_dh;
86 tmp->beg_row = 0;
87 tmp->first_bdry = 0;
88 tmp->bdry_count = 0;
89 tmp->blockJacobi = false;
90
91 tmp->rp = NULL;
92 tmp->cval = NULL;
93 tmp->aval = NULL;
94 tmp->fill = NULL;
95 tmp->diag = NULL;
96 tmp->alloc = 0;
97
98 tmp->work_y_lo = tmp->work_x_hi = NULL;
99 tmp->sendbufLo = tmp->sendbufHi = NULL;
100 tmp->sendindLo = tmp->sendindHi = NULL;
101 tmp->num_recvLo = tmp->num_recvHi = 0;
102 tmp->num_sendLo = tmp->num_sendHi = 0;
103 tmp->sendlenLo = tmp->sendlenHi = 0;
104
105 tmp->solveIsSetup = false;
106 tmp->numbSolve = NULL;
107
108 tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_Factor");
109
110/* Factor_dhZeroTiming(tmp); CHECK_V_ERROR; */
111END_FUNC_DH}
112
113#undef __FUNC__
114#define __FUNC__ "Factor_dhDestroy"
115void
116Factor_dhDestroy (Factor_dh mat)
117{
118 START_FUNC_DH if (mat->rp != NULL)
119 {
120 FREE_DH (mat->rp);
121 CHECK_V_ERROR;
122 }
123 if (mat->cval != NULL)
124 {
125 FREE_DH (mat->cval);
126 CHECK_V_ERROR;
127 }
128 if (mat->aval != NULL)
129 {
130 FREE_DH (mat->aval);
131 CHECK_V_ERROR;
132 }
133 if (mat->diag != NULL)
134 {
135 FREE_DH (mat->diag);
136 CHECK_V_ERROR;
137 }
138 if (mat->fill != NULL)
139 {
140 FREE_DH (mat->fill);
141 CHECK_V_ERROR;
142 }
143
144 if (mat->work_y_lo != NULL)
145 {
146 FREE_DH (mat->work_y_lo);
147 CHECK_V_ERROR;
148 }
149 if (mat->work_x_hi != NULL)
150 {
151 FREE_DH (mat->work_x_hi);
152 CHECK_V_ERROR;
153 }
154 if (mat->sendbufLo != NULL)
155 {
156 FREE_DH (mat->sendbufLo);
157 CHECK_V_ERROR;
158 }
159 if (mat->sendbufHi != NULL)
160 {
161 FREE_DH (mat->sendbufHi);
162 CHECK_V_ERROR;
163 }
164 if (mat->sendindLo != NULL)
165 {
166 FREE_DH (mat->sendindLo);
167 CHECK_V_ERROR;
168 }
169 if (mat->sendindHi != NULL)
170 {
171 FREE_DH (mat->sendindHi);
172 CHECK_V_ERROR;
173 }
174
175 if (mat->numbSolve != NULL)
176 {
177 Numbering_dhDestroy (mat->numbSolve);
178 CHECK_V_ERROR;
179 }
180 FREE_DH (mat);
181 CHECK_V_ERROR;
182END_FUNC_DH}
183
184
185#undef __FUNC__
186#define __FUNC__ "create_fake_mat_private"
187static void
188create_fake_mat_private (Factor_dh mat, Mat_dh * matFakeIN)
189{
190 START_FUNC_DH Mat_dh matFake;
191 Mat_dhCreate (matFakeIN);
192 CHECK_V_ERROR;
193 matFake = *matFakeIN;
194 matFake->m = mat->m;
195 matFake->n = mat->n;
196 matFake->rp = mat->rp;
197 matFake->cval = mat->cval;
198 matFake->aval = mat->aval;
199 matFake->beg_row = mat->beg_row;
200END_FUNC_DH}
201
202#undef __FUNC__
203#define __FUNC__ "destroy_fake_mat_private"
204static void
205destroy_fake_mat_private (Mat_dh matFake)
206{
207 START_FUNC_DH matFake->rp = NULL;
208 matFake->cval = NULL;
209 matFake->aval = NULL;
210 Mat_dhDestroy (matFake);
211 CHECK_V_ERROR;
212END_FUNC_DH}
213
214
215
216#undef __FUNC__
217#define __FUNC__ "Factor_dhReadNz"
218int
219Factor_dhReadNz (Factor_dh mat)
220{
221 START_FUNC_DH int ierr, retval = mat->rp[mat->m];
222 int nz = retval;
223 ierr = MPI_Allreduce (&nz, &retval, 1, MPI_INT, MPI_SUM, comm_dh);
224 CHECK_MPI_ERROR (ierr);
225END_FUNC_VAL (retval)}
226
227
228
229#undef __FUNC__
230#define __FUNC__ "Factor_dhPrintRows"
231void
232Factor_dhPrintRows (Factor_dh mat, FILE * fp)
233{
234 START_FUNC_DH int beg_row = mat->beg_row;
235 int m = mat->m, i, j;
236 bool noValues;
237
238 noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
239 if (mat->aval == NULL)
240 noValues = true;
241
242 if (mat->blockJacobi)
243 {
244 adjust_bj_private (mat);
245 CHECK_V_ERROR;
246 }
247
248 fprintf (fp,
249 "\n----------------------- Factor_dhPrintRows ------------------\n");
250 if (mat->blockJacobi)
251 {
252 fprintf (fp,
253 "@@@ Block Jacobi ILU; adjusted values from zero-based @@@\n");
254 }
255
256 for (i = 0; i < m; ++i)
257 {
258 fprintf (fp, "%i :: ", 1 + i + beg_row);
259 for (j = mat->rp[i]; j < mat->rp[i + 1]; ++j)
260 {
261 if (noValues)
262 {
263 fprintf (fp, "%i ", 1 + mat->cval[j]);
264 }
265 else
266 {
267 fprintf (fp, "%i,%g ; ", 1 + mat->cval[j], mat->aval[j]);
268 }
269 }
270 fprintf (fp, "\n");
271 }
272
273 if (mat->blockJacobi)
274 {
275 unadjust_bj_private (mat);
276 CHECK_V_ERROR;
277 }
278END_FUNC_DH}
279
280
281#undef __FUNC__
282#define __FUNC__ "Factor_dhPrintDiags"
283void
284Factor_dhPrintDiags (Factor_dh mat, FILE * fp)
285{
286 START_FUNC_DH int beg_row = mat->beg_row;
287 int m = mat->m, i, pe, *diag = mat->diag;
288 REAL_DH *aval = mat->aval;
289
290
291 fprintf_dh (fp,
292 "\n----------------------- Factor_dhPrintDiags ------------------\n");
293 fprintf_dh (fp, "(grep for 'ZERO')\n");
294
295 for (pe = 0; pe < np_dh; ++pe)
296 {
297 MPI_Barrier (comm_dh);
298 if (mat->id == pe)
299 {
300 fprintf (fp, "----- subdomain: %i processor: %i\n", pe, myid_dh);
301 for (i = 0; i < m; ++i)
302 {
303 REAL_DH val = aval[diag[i]];
304 if (val)
305 {
306 fprintf (fp, "%i %g\n", i + 1 + beg_row, aval[diag[i]]);
307 }
308 else
309 {
310 fprintf (fp, "%i %g ZERO\n", i + 1 + beg_row,
311 aval[diag[i]]);
312 }
313 }
314 }
315 }
316END_FUNC_DH}
317
318
319#undef __FUNC__
320#define __FUNC__ "Factor_dhPrintGraph"
321void
322Factor_dhPrintGraph (Factor_dh mat, char *filename)
323{
324 START_FUNC_DH FILE * fp;
325 int i, j, m = mat->m, *work, *rp = mat->rp, *cval = mat->cval;
326
327 if (np_dh > 1)
328 SET_V_ERROR ("only implemented for single mpi task");
329
330 work = (int *) MALLOC_DH (m * sizeof (int));
331 CHECK_V_ERROR;
332
333 fp = openFile_dh (filename, "w");
334 CHECK_V_ERROR;
335
336 for (i = 0; i < m; ++i)
337 {
338 for (j = 0; j < m; ++j)
339 work[j] = 0;
340 for (j = rp[i]; j < rp[i]; ++j)
341 work[cval[j]] = 1;
342
343 for (j = 0; j < m; ++j)
344 {
345 if (work[j])
346 {
347 fprintf (fp, " x ");
348 }
349 else
350 {
351 fprintf (fp, " ");
352 }
353 }
354 fprintf (fp, "\n");
355 }
356
357 closeFile_dh (fp);
358 CHECK_V_ERROR;
359
360 FREE_DH (work);
361END_FUNC_DH}
362
363
364#undef __FUNC__
365#define __FUNC__ "Factor_dhPrintTriples"
366void
367Factor_dhPrintTriples (Factor_dh mat, char *filename)
368{
369 START_FUNC_DH int pe, i, j;
370 int m = mat->m, *rp = mat->rp;
371 int beg_row = mat->beg_row;
372 REAL_DH *aval = mat->aval;
373 bool noValues;
374 FILE *fp;
375
376 if (mat->blockJacobi)
377 {
378 adjust_bj_private (mat);
379 CHECK_V_ERROR;
380 }
381
382 noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
383 if (noValues)
384 aval = NULL;
385
386 for (pe = 0; pe < np_dh; ++pe)
387 {
388 MPI_Barrier (comm_dh);
389 if (mat->id == pe)
390 {
391 if (pe == 0)
392 {
393 fp = openFile_dh (filename, "w");
394 CHECK_V_ERROR;
395 }
396 else
397 {
398 fp = openFile_dh (filename, "a");
399 CHECK_V_ERROR;
400 }
401
402 for (i = 0; i < m; ++i)
403 {
404 for (j = rp[i]; j < rp[i + 1]; ++j)
405 {
406 if (noValues)
407 {
408 fprintf (fp, "%i %i\n", 1 + i + beg_row,
409 1 + mat->cval[j]);
410 }
411 else
412 {
413 fprintf (fp, TRIPLES_FORMAT,
414 1 + i + beg_row, 1 + mat->cval[j], aval[j]);
415 }
416 }
417 }
418 closeFile_dh (fp);
419 CHECK_V_ERROR;
420 }
421 }
422
423 if (mat->blockJacobi)
424 {
425 unadjust_bj_private (mat);
426 CHECK_V_ERROR;
427 }
428END_FUNC_DH}
429
430/*--------------------------------------------------------------------------------
431 * Functions to setup the matrix for triangular solves. These are similar to
432 * MatVecSetup(), except that there are two cases: subdomains ordered lower than
433 * ourselves, and subdomains ordered higher than ourselves. This SolveSetup
434 * is used for Parallel ILU (PILU). The following are adopted/modified from
435 * Edmond Chow's ParaSails
436 *--------------------------------------------------------------------------------*/
437
438/* adopted from Edmond Chow's ParaSails */
439
440/* 1. start receives of node data to be received from other processors;
441 2. send to other processors the list of nodes this processor needs
442 to receive from them.
443 Returns: the number of processors from whom nodes will be received.
444*/
445#undef __FUNC__
446#define __FUNC__ "setup_receives_private"
447static int
448setup_receives_private (Factor_dh mat, int *beg_rows, int *end_rows,
449 double *recvBuf, MPI_Request * req,
450 int *reqind, int reqlen, int *outlist, bool debug)
451{
452 START_FUNC_DH int i, j, this_pe, num_recv = 0;
453 MPI_Request request;
454
455 if (debug)
456 {
457 fprintf (logFile,
458 "\nFACT ========================================================\n");
459 fprintf (logFile, "FACT STARTING: setup_receives_private\n");
460 }
461
462 for (i = 0; i < reqlen; i = j)
463 { /* j is set below */
464 /* determine the processor that owns the row with index reqind[i] */
465 this_pe = mat_find_owner (beg_rows, end_rows, reqind[i]);
466 CHECK_ERROR (-1);
467
468 /* Figure out other rows we need from this_pe */
469 for (j = i + 1; j < reqlen; j++)
470 {
471 int idx = reqind[j];
472 if (idx < beg_rows[this_pe] || idx >= end_rows[this_pe])
473 {
474 break;
475 }
476 }
477
478 if (debug)
479 {
480 int k;
481 fprintf (logFile, "FACT need nodes from P_%i: ", this_pe);
482 for (k = i; k < j; ++k)
483 fprintf (logFile, "%i ", 1 + reqind[k]);
484 fprintf (logFile, "\n");
485 }
486
487 /* Record the number of number of indices needed from this_pe */
488 outlist[this_pe] = j - i;
489
490 /* Request rows in reqind[i..j-1] */
491 /* Note: the receiving processor, this_pe, doesn't yet know
492 about the incoming request, hence, can't set up a matching
493 receive; this matching receive will be started later,
494 in setup_sends_private.
495 */
496 MPI_Isend (reqind + i, j - i, MPI_INT, this_pe, 444, comm_dh, &request);
497 MPI_Request_free (&request);
498
499 /* set up persistent comms for receiving the values from this_pe */
500 MPI_Recv_init (recvBuf + i, j - i, MPI_DOUBLE, this_pe, 555,
501 comm_dh, req + num_recv);
502 ++num_recv;
503 }
504
505 END_FUNC_VAL (num_recv);
506}
507
508/*
509 1. start receive to get list of nodes that this processor
510 needs to send to other processors
511 2. start persistent comms to send the data
512*/
513#undef __FUNC__
514#define __FUNC__ "setup_sends_private"
515static void
516setup_sends_private (Factor_dh mat, int *inlist,
517 int *o2n_subdomain, bool debug)
518{
519 START_FUNC_DH int i, jLo, jHi, sendlenLo, sendlenHi, first = mat->beg_row;
520 MPI_Request *requests = mat->requests, *sendReq;
521 MPI_Status *statuses = mat->status;
522 bool isHigher;
523 int *rcvBuf;
524 double *sendBuf;
525 int myidNEW = o2n_subdomain[myid_dh];
526 int count;
527
528 if (debug)
529 {
530 fprintf (logFile, "FACT \nSTARTING: setup_sends_private\n");
531 }
532
533 /* Determine size of and allocate sendbuf and sendind */
534 sendlenLo = sendlenHi = 0;
535 for (i = 0; i < np_dh; i++)
536 {
537 if (inlist[i])
538 {
539 if (o2n_subdomain[i] < myidNEW)
540 {
541 sendlenLo += inlist[i];
542 }
543 else
544 {
545 sendlenHi += inlist[i];
546 }
547 }
548 }
549
550 mat->sendlenLo = sendlenLo;
551 mat->sendlenHi = sendlenHi;
552 mat->sendbufLo = (double *) MALLOC_DH (sendlenLo * sizeof (double));
553 CHECK_V_ERROR;
554 mat->sendbufHi = (double *) MALLOC_DH (sendlenHi * sizeof (double));
555 CHECK_V_ERROR;
556 mat->sendindLo = (int *) MALLOC_DH (sendlenLo * sizeof (int));
557 CHECK_V_ERROR;
558 mat->sendindHi = (int *) MALLOC_DH (sendlenHi * sizeof (int));
559 CHECK_V_ERROR;
560
561 count = 0; /* number of calls to MPI_Irecv() */
562 jLo = jHi = 0;
563 mat->num_sendLo = 0;
564 mat->num_sendHi = 0;
565 for (i = 0; i < np_dh; i++)
566 {
567 if (inlist[i])
568 {
569 isHigher = (o2n_subdomain[i] < myidNEW) ? false : true;
570
571 /* Post receive for the actual indices */
572 if (isHigher)
573 {
574 rcvBuf = &mat->sendindHi[jHi];
575 sendBuf = &mat->sendbufHi[jHi];
576 sendReq = &mat->send_reqHi[mat->num_sendHi];
577 mat->num_sendHi++;
578 jHi += inlist[i];
579 }
580 else
581 {
582 rcvBuf = &mat->sendindLo[jLo];
583 sendBuf = &mat->sendbufLo[jLo];
584 sendReq = &mat->send_reqLo[mat->num_sendLo];
585 mat->num_sendLo++;
586 jLo += inlist[i];
587 }
588
589 /* matching receive, for list of unknowns that will be sent,
590 during the triangular solves, from ourselves to P_i
591 */
592 MPI_Irecv (rcvBuf, inlist[i], MPI_INT, i, 444, comm_dh,
593 requests + count);
594 ++count;
595
596 /* Set up the send */
597 MPI_Send_init (sendBuf, inlist[i], MPI_DOUBLE, i, 555, comm_dh,
598 sendReq);
599 }
600 }
601
602 /* note: count = mat->num_sendLo = mat->num_sendHi */
603 MPI_Waitall (count, requests, statuses);
604
605 if (debug)
606 {
607 int j;
608 jLo = jHi = 0;
609
610 fprintf (logFile,
611 "\nFACT columns that I must send to other subdomains:\n");
612 for (i = 0; i < np_dh; i++)
613 {
614 if (inlist[i])
615 {
616 isHigher = (o2n_subdomain[i] < myidNEW) ? false : true;
617 if (isHigher)
618 {
619 rcvBuf = &mat->sendindHi[jHi];
620 jHi += inlist[i];
621 }
622 else
623 {
624 rcvBuf = &mat->sendindLo[jLo];
625 jLo += inlist[i];
626 }
627
628 fprintf (logFile, "FACT send to P_%i: ", i);
629 for (j = 0; j < inlist[i]; ++j)
630 fprintf (logFile, "%i ", rcvBuf[j] + 1);
631 fprintf (logFile, "\n");
632 }
633 }
634 }
635
636 /* convert global indices to local indices */
637 /* these are all indices on this processor */
638 for (i = 0; i < mat->sendlenLo; i++)
639 mat->sendindLo[i] -= first;
640 for (i = 0; i < mat->sendlenHi; i++)
641 mat->sendindHi[i] -= first;
642END_FUNC_DH}
643
644
645
646#undef __FUNC__
647#define __FUNC__ "Factor_dhSolveSetup"
648void
649Factor_dhSolveSetup (Factor_dh mat, SubdomainGraph_dh sg)
650{
651 START_FUNC_DH int *outlist, *inlist;
652 int i, row, *rp = mat->rp, *cval = mat->cval;
653 Numbering_dh numb;
654 int m = mat->m;
655 /* int firstLocalRow = mat->beg_row; */
656 int *beg_rows = sg->beg_rowP, *row_count = sg->row_count, *end_rows;
657 Mat_dh matFake;
658 bool debug = false;
659 double *recvBuf;
660
661 if (mat->debug && logFile != NULL)
662 debug = true;
663
664 end_rows = (int *) MALLOC_DH (np_dh * sizeof (int));
665 CHECK_V_ERROR;
666 outlist = (int *) MALLOC_DH (np_dh * sizeof (int));
667 CHECK_V_ERROR;
668 inlist = (int *) MALLOC_DH (np_dh * sizeof (int));
669 CHECK_V_ERROR;
670 for (i = 0; i < np_dh; ++i)
671 {
672 inlist[i] = 0;
673 outlist[i] = 0;
674 end_rows[i] = beg_rows[i] + row_count[i];
675 }
676
677 /* Create Numbering object */
678 create_fake_mat_private (mat, &matFake);
679 CHECK_V_ERROR;
680 Numbering_dhCreate (&(mat->numbSolve));
681 CHECK_V_ERROR;
682 numb = mat->numbSolve;
683 Numbering_dhSetup (numb, matFake);
684 CHECK_V_ERROR;
685 destroy_fake_mat_private (matFake);
686 CHECK_V_ERROR;
687
688 if (debug)
689 {
690 fprintf (stderr, "Numbering_dhSetup completed\n");
691 }
692
693 /* Allocate recvbuf; recvbuf has numlocal entries saved for local part of x */
694 i = m + numb->num_ext;
695 mat->work_y_lo = (double *) MALLOC_DH (i * sizeof (double));
696 CHECK_V_ERROR;
697 mat->work_x_hi = (double *) MALLOC_DH (i * sizeof (double));
698 CHECK_V_ERROR;
699 if (debug)
700 {
701 fprintf (logFile, "FACT num_extLo= %i num_extHi= %i\n",
702 numb->num_extLo, numb->num_extHi);
703 }
704
705 mat->num_recvLo = 0;
706 mat->num_recvHi = 0;
707 if (numb->num_extLo)
708 {
709 recvBuf = mat->work_y_lo + m;
710 mat->num_recvLo = setup_receives_private (mat, beg_rows, end_rows,
711 recvBuf, mat->recv_reqLo,
712 numb->idx_extLo,
713 numb->num_extLo, outlist,
714 debug);
715 CHECK_V_ERROR;
716
717 }
718
719 if (numb->num_extHi)
720 {
721 recvBuf = mat->work_x_hi + m + numb->num_extLo;
722 mat->num_recvHi = setup_receives_private (mat, beg_rows, end_rows,
723 recvBuf, mat->recv_reqHi,
724 numb->idx_extHi,
725 numb->num_extHi, outlist,
726 debug);
727 CHECK_V_ERROR;
728 }
729
730 MPI_Alltoall (outlist, 1, MPI_INT, inlist, 1, MPI_INT, comm_dh);
731 /* At this point, inlist[j] contains the number of indices
732 that this processor must send to P_j. Processors next need
733 to exchange the actual lists of required indices; this is done
734 in setup_sends_private()
735 */
736
737 setup_sends_private (mat, inlist, sg->o2n_sub, debug);
738 CHECK_V_ERROR;
739
740 /* Convert column indices in each row to local indices */
741 for (row = 0; row < m; row++)
742 {
743 int len = rp[row + 1] - rp[row];
744 int *ind = cval + rp[row];
745 Numbering_dhGlobalToLocal (numb, len, ind, ind);
746 CHECK_V_ERROR;
747 }
748
749 FREE_DH (outlist);
750 CHECK_V_ERROR;
751 FREE_DH (inlist);
752 CHECK_V_ERROR;
753 FREE_DH (end_rows);
754 CHECK_V_ERROR;
755
756 if (debug)
757 {
758 int ii, jj;
759
760 fprintf (logFile,
761 "\n--------- row/col structure, after global to local renumbering\n");
762 for (ii = 0; ii < mat->m; ++ii)
763 {
764 fprintf (logFile, "local row %i :: ", ii + 1);
765 for (jj = mat->rp[ii]; jj < mat->rp[ii + 1]; ++jj)
766 {
767 fprintf (logFile, "%i ", 1 + mat->cval[jj]);
768 }
769 fprintf (logFile, "\n");
770 }
771 fprintf (logFile, "\n");
772 fflush (logFile);
773 }
774END_FUNC_DH}
775
776/* solve for MPI implementation of PILU. This function is
777 so similar to MatVec, that I put it here, instead of with
778 the other solves located in Euclid_apply.c.
779*/
780static void forward_solve_private (int m, int from, int to,
781 int *rp, int *cval, int *diag,
782 double *aval, double *rhs, double *work_y,
783 bool debug);
784
785static void backward_solve_private (int m, int from, int to,
786 int *rp, int *cval, int *diag,
787 double *aval, double *work_y,
788 double *work_x, bool debug);
789
790static int beg_rowG;
791
792
793#undef __FUNC__
794#define __FUNC__ "Factor_dhSolve"
795void
796Factor_dhSolve (double *rhs, double *lhs, Euclid_dh ctx)
797{
798 START_FUNC_DH Factor_dh mat = ctx->F;
799 int from, to;
800 int ierr, i, m = mat->m, first_bdry = mat->first_bdry;
801 int offsetLo = mat->numbSolve->num_extLo;
802 int offsetHi = mat->numbSolve->num_extHi;
803 int *rp = mat->rp, *cval = mat->cval, *diag = mat->diag;
804 double *aval = mat->aval;
805 int *sendindLo = mat->sendindLo, *sendindHi = mat->sendindHi;
806 int sendlenLo = mat->sendlenLo, sendlenHi = mat->sendlenHi;
807 double *sendbufLo = mat->sendbufLo, *sendbufHi = mat->sendbufHi;
808 double *work_y = mat->work_y_lo;
809 double *work_x = mat->work_x_hi;
810 bool debug = false;
811
812 if (mat->debug && logFile != NULL)
813 debug = true;
814 if (debug)
815 beg_rowG = ctx->F->beg_row;
816
817/*
818for (i=0; i<m+offsetLo+offsetHi; ++i) {
819 work_y[i] = -99;
820 work_x[i] = -99;
821}
822*/
823
824 if (debug)
825 {
826 fprintf (logFile,
827 "\n=====================================================\n");
828 fprintf (logFile,
829 "FACT Factor_dhSolve: num_recvLo= %i num_recvHi = %i\n",
830 mat->num_recvLo, mat->num_recvHi);
831 }
832
833 /* start receives from higher and lower ordered subdomains */
834 if (mat->num_recvLo)
835 {
836 MPI_Startall (mat->num_recvLo, mat->recv_reqLo);
837 }
838 if (mat->num_recvHi)
839 {
840 MPI_Startall (mat->num_recvHi, mat->recv_reqHi);
841 }
842
843 /*-------------------------------------------------------------
844 * PART 1: Forward Solve Ly = rhs for y ('y' is called 'work')
845 *-------------------------------------------------------------*/
846 /* forward triangular solve on interior nodes */
847 from = 0;
848 to = first_bdry;
849 if (from != to)
850 {
851 forward_solve_private (m, from, to, rp, cval, diag, aval,
852 rhs, work_y, debug);
853 CHECK_V_ERROR;
854 }
855
856 /* wait for receives from lower ordered subdomains, then
857 complete forward solve on boundary nodes.
858 */
859 if (mat->num_recvLo)
860 {
861 MPI_Waitall (mat->num_recvLo, mat->recv_reqLo, mat->status);
862
863 /* debug block */
864 if (debug)
865 {
866 fprintf (logFile,
867 "FACT got 'y' values from lower neighbors; work buffer:\n ");
868 for (i = 0; i < offsetLo; ++i)
869 {
870 fprintf (logFile, "%g ", work_y[m + i]);
871 }
872 }
873 }
874
875 /* forward triangular solve on boundary nodes */
876 from = first_bdry;
877 to = m;
878 if (from != to)
879 {
880 forward_solve_private (m, from, to, rp, cval, diag, aval,
881 rhs, work_y, debug);
882 CHECK_V_ERROR;
883 }
884
885 /* send boundary elements from work vector 'y' to higher ordered subdomains */
886 if (mat->num_sendHi)
887 {
888
889 /* copy elements to send buffer */
890 for (i = 0; i < sendlenHi; i++)
891 {
892 sendbufHi[i] = work_y[sendindHi[i]];
893 }
894
895 /* start the sends */
896 MPI_Startall (mat->num_sendHi, mat->send_reqHi);
897
898 /* debug block */
899 if (debug)
900 {
901 fprintf (logFile,
902 "\nFACT sending 'y' values to higher neighbor:\nFACT ");
903 for (i = 0; i < sendlenHi; i++)
904 {
905 fprintf (logFile, "%g ", sendbufHi[i]);
906 }
907 fprintf (logFile, "\n");
908 }
909 }
910
911 /*----------------------------------------------------------
912 * PART 2: Backward Solve
913 *----------------------------------------------------------*/
914 /* wait for bdry nodes 'x' from higher-ordered processsors */
915 if (mat->num_recvHi)
916 {
917 ierr = MPI_Waitall (mat->num_recvHi, mat->recv_reqHi, mat->status);
918 CHECK_MPI_V_ERROR (ierr);
919
920 /* debug block */
921 if (debug)
922 {
923 fprintf (logFile, "FACT got 'x' values from higher neighbors:\n ");
924 for (i = m + offsetLo; i < m + offsetLo + offsetHi; ++i)
925 {
926 fprintf (logFile, "%g ", work_x[i]);
927 }
928 fprintf (logFile, "\n");
929 }
930 }
931
932 /* backward solve boundary nodes */
933 from = m;
934 to = first_bdry;
935 if (from != to)
936 {
937 backward_solve_private (m, from, to, rp, cval, diag, aval,
938 work_y, work_x, debug);
939 CHECK_V_ERROR;
940 }
941
942 /* send boundary node elements to lower ordered subdomains */
943 if (mat->num_sendLo)
944 {
945
946 /* copy elements to send buffer */
947 for (i = 0; i < sendlenLo; i++)
948 {
949 sendbufLo[i] = work_x[sendindLo[i]];
950 }
951
952 /* start the sends */
953 ierr = MPI_Startall (mat->num_sendLo, mat->send_reqLo);
954 CHECK_MPI_V_ERROR (ierr);
955
956 /* debug block */
957 if (debug)
958 {
959 fprintf (logFile,
960 "\nFACT sending 'x' values to lower neighbor:\nFACT ");
961 for (i = 0; i < sendlenLo; i++)
962 {
963 fprintf (logFile, "%g ", sendbufLo[i]);
964 }
965 fprintf (logFile, "\n");
966 }
967 }
968
969 /* backward solve interior nodes */
970 from = first_bdry;
971 to = 0;
972 if (from != to)
973 {
974 backward_solve_private (m, from, to, rp, cval, diag, aval,
975 work_y, work_x, debug);
976 CHECK_V_ERROR;
977 }
978
979 /* copy solution from work vector lhs vector */
980 memcpy (lhs, work_x, m * sizeof (double));
981
982 if (debug)
983 {
984 fprintf (logFile, "\nFACT solution: ");
985 for (i = 0; i < m; ++i)
986 {
987 fprintf (logFile, "%g ", lhs[i]);
988 }
989 fprintf (logFile, "\n");
990 }
991
992 /* wait for sends to go through */
993 if (mat->num_sendLo)
994 {
995 ierr = MPI_Waitall (mat->num_sendLo, mat->send_reqLo, mat->status);
996 CHECK_MPI_V_ERROR (ierr);
997 }
998
999 if (mat->num_sendHi)
1000 {
1001 ierr = MPI_Waitall (mat->num_sendHi, mat->send_reqHi, mat->status);
1002 CHECK_MPI_V_ERROR (ierr);
1003 }
1004END_FUNC_DH}
1005
1006
1007
1008#undef __FUNC__
1009#define __FUNC__ "forward_solve_private"
1010void
1011forward_solve_private (int m, int from, int to, int *rp,
1012 int *cval, int *diag, double *aval,
1013 double *rhs, double *work_y, bool debug)
1014{
1015 START_FUNC_DH int i, j, idx;
1016
1017 if (debug)
1018 {
1019 fprintf (logFile,
1020 "\nFACT starting forward_solve_private; from= %i; to= %i, m= %i\n",
1021 1 + from, 1 + to, m);
1022 }
1023
1024/*
1025 if (from == 0) {
1026 work_y[0] = rhs[0];
1027 if (debug) {
1028 fprintf(logFile, "FACT work_y[%i] = %g\n------------\n", 1+beg_rowG, work_y[0]);
1029 }
1030 } else {
1031 --from;
1032 }
1033*/
1034
1035 if (debug)
1036 {
1037 for (i = from; i < to; ++i)
1038 {
1039 int len = diag[i] - rp[i];
1040 int *col = cval + rp[i];
1041 double *val = aval + rp[i];
1042 double sum = rhs[i];
1043
1044 fprintf (logFile, "FACT solving for work_y[%i] (global)\n",
1045 i + 1 + beg_rowG);
1046 fprintf (logFile, "FACT sum = %g\n", sum);
1047 for (j = 0; j < len; ++j)
1048 {
1049 idx = col[j];
1050 sum -= (val[j] * work_y[idx]);
1051 fprintf (logFile,
1052 "FACT sum(%g) -= val[j] (%g) * work_y[%i] (%g)\n",
1053 sum, val[j], 1 + idx, work_y[idx]);
1054 }
1055 work_y[i] = sum;
1056 fprintf (logFile, "FACT work_y[%i] = %g\n", 1 + i + beg_rowG,
1057 work_y[i]);
1058 fprintf (logFile, "-----------\n");
1059 }
1060
1061 fprintf (logFile, "\nFACT work vector at end of forward solve:\n");
1062 for (i = 0; i < to; i++)
1063 fprintf (logFile, " %i %g\n", i + 1 + beg_rowG, work_y[i]);
1064
1065 }
1066 else
1067 {
1068 for (i = from; i < to; ++i)
1069 {
1070 int len = diag[i] - rp[i];
1071 int *col = cval + rp[i];
1072 double *val = aval + rp[i];
1073 double sum = rhs[i];
1074
1075 for (j = 0; j < len; ++j)
1076 {
1077 idx = col[j];
1078 sum -= (val[j] * work_y[idx]);
1079 }
1080 work_y[i] = sum;
1081 }
1082 }
1083END_FUNC_DH}
1084
1085#undef __FUNC__
1086#define __FUNC__ "backward_solve_private"
1087void
1088backward_solve_private (int m, int from, int to, int *rp,
1089 int *cval, int *diag, double *aval,
1090 double *work_y, double *work_x, bool debug)
1091{
1092 START_FUNC_DH int i, j, idx;
1093
1094 if (debug)
1095 {
1096 fprintf (logFile,
1097 "\nFACT starting backward_solve_private; from= %i; to= %i, m= %i\n",
1098 1 + from, 1 + to, m);
1099 for (i = from - 1; i >= to; --i)
1100 {
1101 int len = rp[i + 1] - diag[i] - 1;
1102 int *col = cval + diag[i] + 1;
1103 double *val = aval + diag[i] + 1;
1104 double sum = work_y[i];
1105 fprintf (logFile, "FACT solving for work_x[%i]\n",
1106 i + 1 + beg_rowG);
1107
1108 for (j = 0; j < len; ++j)
1109 {
1110 idx = col[j];
1111 sum -= (val[j] * work_x[idx]);
1112 fprintf (logFile,
1113 "FACT sum(%g) -= val[j] (%g) * work_x[idx] (%g)\n",
1114 sum, val[j], work_x[idx]);
1115 }
1116 work_x[i] = sum * aval[diag[i]];
1117 fprintf (logFile, "FACT work_x[%i] = %g\n", 1 + i, work_x[i]);
1118 fprintf (logFile, "----------\n");
1119 }
1120
1121 }
1122 else
1123 {
1124 for (i = from - 1; i >= to; --i)
1125 {
1126 int len = rp[i + 1] - diag[i] - 1;
1127 int *col = cval + diag[i] + 1;
1128 double *val = aval + diag[i] + 1;
1129 double sum = work_y[i];
1130
1131 for (j = 0; j < len; ++j)
1132 {
1133 idx = col[j];
1134 sum -= (val[j] * work_x[idx]);
1135 }
1136 work_x[i] = sum * aval[diag[i]];
1137 }
1138 }
1139END_FUNC_DH}
1140
1141#undef __FUNC__
1142#define __FUNC__ "Factor_dhInit"
1143void
1144Factor_dhInit (void *A, bool fillFlag, bool avalFlag,
1145 double rho, int id, int beg_rowP, Factor_dh * Fout)
1146{
1147 START_FUNC_DH int m, n, beg_row, alloc;
1148 Factor_dh F;
1149
1150 EuclidGetDimensions (A, &beg_row, &m, &n);
1151 CHECK_V_ERROR;
1152 alloc = rho * m;
1153 Factor_dhCreate (&F);
1154 CHECK_V_ERROR;
1155
1156 *Fout = F;
1157 F->m = m;
1158 F->n = n;
1159 F->beg_row = beg_rowP;
1160 F->id = id;
1161 F->alloc = alloc;
1162
1163 F->rp = (int *) MALLOC_DH ((m + 1) * sizeof (int));
1164 CHECK_V_ERROR;
1165 F->rp[0] = 0;
1166 F->cval = (int *) MALLOC_DH (alloc * sizeof (int));
1167 CHECK_V_ERROR;
1168 F->diag = (int *) MALLOC_DH (m * sizeof (int));
1169 CHECK_V_ERROR;
1170 if (fillFlag)
1171 {
1172 F->fill = (int *) MALLOC_DH (alloc * sizeof (int));
1173 CHECK_V_ERROR;
1174 }
1175 if (avalFlag)
1176 {
1177 F->aval = (REAL_DH *) MALLOC_DH (alloc * sizeof (REAL_DH));
1178 CHECK_V_ERROR;
1179 }
1180END_FUNC_DH}
1181
1182#undef __FUNC__
1183#define __FUNC__ "Factor_dhReallocate"
1184void
1185Factor_dhReallocate (Factor_dh F, int used, int additional)
1186{
1187 START_FUNC_DH int alloc = F->alloc;
1188
1189 if (used + additional > F->alloc)
1190 {
1191 int *tmpI;
1192 while (alloc < used + additional)
1193 alloc *= 2.0;
1194 F->alloc = alloc;
1195 tmpI = F->cval;
1196 F->cval = (int *) MALLOC_DH (alloc * sizeof (int));
1197 CHECK_V_ERROR;
1198 memcpy (F->cval, tmpI, used * sizeof (int));
1199 FREE_DH (tmpI);
1200 CHECK_V_ERROR;
1201 if (F->fill != NULL)
1202 {
1203 tmpI = F->fill;
1204 F->fill = (int *) MALLOC_DH (alloc * sizeof (int));
1205 CHECK_V_ERROR;
1206 memcpy (F->fill, tmpI, used * sizeof (int));
1207 FREE_DH (tmpI);
1208 CHECK_V_ERROR;
1209 }
1210 if (F->aval != NULL)
1211 {
1212 REAL_DH *tmpF = F->aval;
1213 F->aval = (REAL_DH *) MALLOC_DH (alloc * sizeof (REAL_DH));
1214 CHECK_V_ERROR;
1215 memcpy (F->aval, tmpF, used * sizeof (REAL_DH));
1216 FREE_DH (tmpF);
1217 CHECK_V_ERROR;
1218 }
1219 }
1220END_FUNC_DH}
1221
1222#undef __FUNC__
1223#define __FUNC__ "Factor_dhTranspose"
1224void
1225Factor_dhTranspose (Factor_dh A, Factor_dh * Bout)
1226{
1227 START_FUNC_DH Factor_dh B;
1228
1229 if (np_dh > 1)
1230 {
1231 SET_V_ERROR ("only for sequential");
1232 }
1233
1234 Factor_dhCreate (&B);
1235 CHECK_V_ERROR;
1236 *Bout = B;
1237 B->m = B->n = A->m;
1238 if (B->aval == NULL)
1239 {
1240 mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval,
1241 A->aval, NULL);
1242 CHECK_V_ERROR;
1243 }
1244 else
1245 {
1246 mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval,
1247 A->aval, &B->aval);
1248 CHECK_V_ERROR;
1249 }
1250END_FUNC_DH}
1251
1252
1253/* this could be done using OpenMP, but I took it out for now */
1254#undef __FUNC__
1255#define __FUNC__ "Factor_dhSolveSeq"
1256void
1257Factor_dhSolveSeq (double *rhs, double *lhs, Euclid_dh ctx)
1258{
1259 START_FUNC_DH Factor_dh F = ctx->F;
1260 if (F == NULL)
1261 {
1262 printf ("F is null.\n");
1263 }
1264 else
1265 {
1266 printf ("F isn't null.\n");
1267 }
1268 int *rp, *cval, *diag;
1269 int i, j, *vi, nz, m = F->m;
1270 REAL_DH *aval, *work;
1271 /* REAL_DH *scale; */
1272 REAL_DH *v, sum;
1273 bool debug = false;
1274
1275 if (ctx->F->debug && logFile != NULL)
1276 debug = true;
1277
1278 rp = F->rp;
1279 cval = F->cval;
1280 aval = F->aval;
1281 diag = F->diag;
1282 /* scale = ctx->scale; */
1283 work = ctx->work;
1284
1285 if (debug)
1286 {
1287 fprintf (logFile,
1288 "\nFACT ============================================================\n");
1289 fprintf (logFile, "FACT starting Factor_dhSolveSeq\n");
1290
1291 /* forward solve lower triangle */
1292 fprintf (logFile, "\nFACT STARTING FORWARD SOLVE\n------------\n");
1293 work[0] = rhs[0];
1294 fprintf (logFile, "FACT work[0] = %g\n------------\n", work[0]);
1295 for (i = 1; i < m; i++)
1296 {
1297 v = aval + rp[i];
1298 vi = cval + rp[i];
1299 nz = diag[i] - rp[i];
1300 fprintf (logFile, "FACT solving for work[%i]\n", i + 1);
1301 sum = rhs[i];
1302 for (j = 0; j < nz; ++j)
1303 {
1304 sum -= (v[j] * work[vi[j]]);
1305 fprintf (logFile,
1306 "FACT sum (%g) -= v[j] (%g) * work[vi[j]] (%g)\n",
1307 sum, v[j], work[vi[j]]);
1308 }
1309 work[i] = sum;
1310 fprintf (logFile, "FACT work[%i] = %g\n------------\n", 1 + i,
1311 work[i]);
1312 }
1313
1314
1315 fprintf (logFile, "\nFACT work vector at end of forward solve:\n");
1316 for (i = 0; i < m; i++)
1317 fprintf (logFile, " %i %g\n", i + 1, work[i]);
1318
1319
1320 /* backward solve upper triangular boundaries (sequential) */
1321 fprintf (logFile, "\nFACT STARTING BACKWARD SOLVE\n--------------\n");
1322 for (i = m - 1; i >= 0; i--)
1323 {
1324 v = aval + diag[i] + 1;
1325 vi = cval + diag[i] + 1;
1326 nz = rp[i + 1] - diag[i] - 1;
1327 fprintf (logFile, "FACT solving for lhs[%i]\n", i + 1);
1328 sum = work[i];
1329 for (j = 0; j < nz; ++j)
1330 {
1331 sum -= (v[j] * work[vi[j]]);
1332 fprintf (logFile,
1333 "FACT sum (%g) -= v[j] (%g) * work[vi[j]] (%g)\n",
1334 sum, v[j], work[vi[j]]);
1335 }
1336 lhs[i] = work[i] = sum * aval[diag[i]];
1337 fprintf (logFile, "FACT lhs[%i] = %g\n------------\n", 1 + i,
1338 lhs[i]);
1339 fprintf (logFile, "FACT solving for lhs[%i]\n", i + 1);
1340 }
1341
1342 fprintf (logFile, "\nFACT solution: ");
1343 for (i = 0; i < m; ++i)
1344 fprintf (logFile, "%g ", lhs[i]);
1345 fprintf (logFile, "\n");
1346
1347
1348 }
1349 else
1350 {
1351 /* forward solve lower triangle */
1352 work[0] = rhs[0];
1353 for (i = 1; i < m; i++)
1354 {
1355 v = aval + rp[i];
1356 vi = cval + rp[i];
1357 nz = diag[i] - rp[i];
1358 sum = rhs[i];
1359 while (nz--)
1360 sum -= (*v++ * work[*vi++]);
1361 work[i] = sum;
1362 }
1363
1364 /* backward solve upper triangular boundaries (sequential) */
1365 for (i = m - 1; i >= 0; i--)
1366 {
1367 v = aval + diag[i] + 1;
1368 vi = cval + diag[i] + 1;
1369 nz = rp[i + 1] - diag[i] - 1;
1370 sum = work[i];
1371 while (nz--)
1372 sum -= (*v++ * work[*vi++]);
1373 lhs[i] = work[i] = sum * aval[diag[i]];
1374 }
1375 }
1376END_FUNC_DH}
1377
1378/*---------------------------------------------------------------
1379 * next two are used by Factor_dhPrintXXX methods
1380 *---------------------------------------------------------------*/
1381
1382#undef __FUNC__
1383#define __FUNC__ "adjust_bj_private"
1384void
1385adjust_bj_private (Factor_dh mat)
1386{
1387 START_FUNC_DH int i;
1388 int nz = mat->rp[mat->m];
1389 int beg_row = mat->beg_row;
1390 for (i = 0; i < nz; ++i)
1391 mat->cval[i] += beg_row;
1392END_FUNC_DH}
1393
1394#undef __FUNC__
1395#define __FUNC__ "unadjust_bj_private"
1396void
1397unadjust_bj_private (Factor_dh mat)
1398{
1399 START_FUNC_DH int i;
1400 int nz = mat->rp[mat->m];
1401 int beg_row = mat->beg_row;
1402 for (i = 0; i < nz; ++i)
1403 mat->cval[i] -= beg_row;
1404END_FUNC_DH}
1405
1406#undef __FUNC__
1407#define __FUNC__ "Factor_dhMaxPivotInverse"
1408double
1409Factor_dhMaxPivotInverse (Factor_dh mat)
1410{
1411 START_FUNC_DH int i, m = mat->m, *diags = mat->diag;
1412 REAL_DH *aval = mat->aval;
1413 double minGlobal = 0.0, min = aval[diags[0]];
1414 double retval;
1415
1416 for (i = 0; i < m; ++i)
1417 min = MIN (min, fabs (aval[diags[i]]));
1418 if (np_dh == 1)
1419 {
1420 minGlobal = min;
1421 }
1422 else
1423 {
1424 MPI_Reduce (&min, &minGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, comm_dh);
1425 }
1426
1427 if (minGlobal == 0)
1428 {
1429 retval = 0;
1430 }
1431 else
1432 {
1433 retval = 1.0 / minGlobal;
1434 }
1435END_FUNC_VAL (retval)}
1436
1437#undef __FUNC__
1438#define __FUNC__ "Factor_dhMaxValue"
1439double
1440Factor_dhMaxValue (Factor_dh mat)
1441{
1442 START_FUNC_DH double maxGlobal = 0.0, max = 0.0;
1443 int i, nz = mat->rp[mat->m];
1444 REAL_DH *aval = mat->aval;
1445
1446 for (i = 0; i < nz; ++i)
1447 {
1448 max = MAX (max, fabs (aval[i]));
1449 }
1450
1451 if (np_dh == 1)
1452 {
1453 maxGlobal = max;
1454 }
1455 else
1456 {
1457 MPI_Reduce (&max, &maxGlobal, 1, MPI_DOUBLE, MPI_MAX, 0, comm_dh);
1458 }
1459END_FUNC_VAL (maxGlobal)}
1460
1461
1462#undef __FUNC__
1463#define __FUNC__ "Factor_dhCondEst"
1464double
1465Factor_dhCondEst (Factor_dh mat, Euclid_dh ctx)
1466{
1467 START_FUNC_DH double max = 0.0, maxGlobal = 0.0;
1468 double *x;
1469 int i, m = mat->m;
1470 Vec_dh lhs, rhs;
1471
1472 Vec_dhCreate (&lhs);
1473 CHECK_ERROR (-1);
1474 Vec_dhInit (lhs, m);
1475 CHECK_ERROR (-1);
1476 Vec_dhDuplicate (lhs, &rhs);
1477 CHECK_ERROR (-1);
1478 Vec_dhSet (rhs, 1.0);
1479 CHECK_ERROR (-1);
1480 Euclid_dhApply (ctx, rhs->vals, lhs->vals);
1481 CHECK_ERROR (-1);
1482
1483 x = lhs->vals;
1484 for (i = 0; i < m; ++i)
1485 {
1486 max = MAX (max, fabs (x[i]));
1487 }
1488
1489 if (np_dh == 1)
1490 {
1491 maxGlobal = max;
1492 }
1493 else
1494 {
1495 MPI_Reduce (&max, &maxGlobal, 1, MPI_DOUBLE, MPI_MAX, 0, comm_dh);
1496 }
1497END_FUNC_VAL (maxGlobal)}