105 std::vector<int> &Bcols2Ccols, std::vector<int> &Icols2Ccols)
109#ifdef ENABLE_MMM_TIMINGS
110 using Teuchos::TimeMonitor;
111 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 1")));
116 int i,j,MyPID = B.
Comm().MyPID(), NumProc = B.
Comm().NumProc();
117 int Bstart=0, Istart=0, Cstart=0,Pstart=0;
124 int_type * Bgids = 0;
125 BColMap.MyGlobalElementsPtr(Bgids);
126 int Ni = IColMap.NumMyElements();
127 int_type * Igids = 0;
129 IColMap.MyGlobalElementsPtr(Igids);
131 if((
int)Bcols2Ccols.size() != Nb) Bcols2Ccols.resize(Nb);
132 if((
int)Icols2Ccols.size() != Ni) Icols2Ccols.resize(Ni);
147 std::vector<int> Bpids(Nb), Ipids(Ni);
149 else Bpids.assign(Nb,-1);
151 if(Ni != (
int) Bimport.ColMapOwningPIDs_.size()) {
155 Ipids[i] = (Bimport.ColMapOwningPIDs_[i]==MyPID)?(-1):(Bimport.ColMapOwningPIDs_[i]);
163 std::vector<int_type> Cgids(Csize);
164 Cremotepids.resize(Psize);
166#ifdef ENABLE_MMM_TIMINGS
167 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 2")));
179 for(i=0; i<DomainMap.
NumMyElements(); i++) Bcols2Ccols[i] = i;
184 for(i = 0; i < NumDomainElements; i++) {
185 int_type GID = (int_type) DomainMap.
GID64(i);
186 int LID = BColMap.LID(GID);
189 Bcols2Ccols[LID]=Cstart;
196 LID = IColMap.LID(GID);
198 Icols2Ccols[LID]=Cstart;
207 for(i=0,j=0; i<Ni && Ipids[i]==-1; i++) {
208 while(Cgids[j]!=Igids[i]) j++;
214#ifdef ENABLE_MMM_TIMINGS
215 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 3")));
223 int initial_temp_length = 0;
224 const int * lengths_from=0;
226 lengths_from= Distor->LengthsFrom();
227 for(i=0; i < Distor->NumReceives(); i++) initial_temp_length += lengths_from[i];
229 else initial_temp_length=100;
231 std::vector<int_type> Btemp(initial_temp_length), Itemp(initial_temp_length);
232 std::vector<int> Btemp2(initial_temp_length), Itemp2(initial_temp_length);
235 while (Bstart < Nb || Istart < Ni) {
236 int Bproc=NumProc+1, Iproc=NumProc+1, Cproc;
239 if(Bstart < Nb) Bproc=Bpids[Bstart];
240 if(Istart < Ni) Iproc=Ipids[Istart];
242 Cproc = (Bproc < Iproc)?Bproc:Iproc;
244 if(Bproc == Cproc && Iproc != Cproc) {
247 for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
251 int tCsize = Bnext-Bstart;
252 if(Btemp.size() < (
size_t)tCsize) {Btemp2.resize(tCsize);}
254 for(i=Bstart; i<Bnext; i++) {
255 Cremotepids[i-Bstart+Pstart] = Cproc;
256 Cgids[i-Bstart+Cstart] = Bgids[i];
257 Btemp2[i-Bstart] = i;
261 int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0;
262 util.Sort(
true, tCsize, &Cgids[Cstart], 0, 0, 1, &Bptr2, 0, 0);
264 for(i=0, j=Cstart; i<tCsize; i++){
265 while(Cgids[j] != Bgids[Btemp2[i]]) j++;
266 Bcols2Ccols[Btemp2[i]] = j;
272 else if(Bproc != Cproc && Iproc == Cproc) {
275 for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
279 int tCsize = Inext-Istart;
280 if(Itemp.size() < (
size_t)tCsize) {Itemp2.resize(tCsize);}
282 for(i=Istart; i<Inext; i++) {
283 Cremotepids[i-Istart+Pstart] = Cproc;
284 Cgids[i-Istart+Cstart] = Igids[i];
285 Itemp2[i-Istart] = i;
289 int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
290 util.Sort(
true, tCsize, &Cgids[Cstart], 0, 0, 1, &Iptr2, 0, 0);
292 for(i=0, j=Cstart; i<tCsize; i++){
293 while(Cgids[j] != Igids[Itemp2[i]]) j++;
294 Icols2Ccols[Itemp2[i]] = j;
304 for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
308 for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
312 int tBsize = Bnext-Bstart;
313 int tIsize = Inext-Istart;
315 if(Btemp.size() < (
size_t)tBsize) {Btemp.resize(tBsize); Btemp2.resize(tBsize);}
316 if(Itemp.size() < (
size_t)tIsize) {Itemp.resize(tIsize); Itemp2.resize(tIsize);}
318 for(i=Bstart; i<Bnext; i++) {Btemp[i-Bstart]=Bgids[i]; Btemp2[i-Bstart]=i;}
319 for(i=Istart; i<Inext; i++) {Itemp[i-Istart]=Igids[i]; Itemp2[i-Istart]=i;}
322 int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0;
int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
323 util.Sort(
true, tBsize, Btemp.size() ? &Btemp[0] : 0, 0, 0, 1, &Bptr2, 0, 0);
324 util.Sort(
true, tIsize, Itemp.size() ? &Itemp[0] : 0, 0, 0, 1, &Iptr2, 0, 0);
325 typename std::vector<int_type>::iterator mycstart = Cgids.begin()+Cstart;
326 typename std::vector<int_type>::iterator last_el=std::set_union(Btemp.begin(),Btemp.begin()+tBsize,Itemp.begin(),Itemp.begin()+tIsize,mycstart);
328 for(i=0, j=Cstart; i<tBsize; i++){
329 while(Cgids[j] != Bgids[Btemp2[i]]) j++;
330 Bcols2Ccols[Btemp2[i]] = j;
333 for(i=0, j=Cstart; i<tIsize; i++){
334 while(Cgids[j] != Igids[Itemp2[i]]) j++;
335 Icols2Ccols[Itemp2[i]] = j;
338 for(i=Pstart; i<(last_el - mycstart) + Pstart; i++) Cremotepids[i]=Cproc;
339 Cstart = (last_el - mycstart) + Cstart;
340 Pstart = (last_el - mycstart) + Pstart;
346#ifdef ENABLE_MMM_TIMINGS
347 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 4")));
351 Cremotepids.resize(Pstart);
357 unionmap=
new Epetra_Map((int_type) -1,Cstart,Cgids.size() ? &Cgids[0] : 0, (int_type) B.
ColMap().IndexBase64(),
358 B.
Comm(),B.
ColMap().DistributedGlobal(),(int_type) B.
ColMap().MinAllGID64(),(int_type) B.
ColMap().MaxAllGID64());
387 std::vector<int> & Bcol2Ccol,
388 std::vector<int> & Bimportcol2Ccol,
389 std::vector<int>& Cremotepids,
391 bool keep_all_hard_zeros
393#ifdef ENABLE_MMM_TIMINGS
394 using Teuchos::TimeMonitor;
395 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix SerialCore")));
404 int NumMyDiagonals=0;
407 int n=colmap_C->NumMyElements();
411 int *Arowptr, *Acolind;
416 int *Browptr, *Bcolind;
420 int *Irowptr=0, *Icolind=0;
422 if(Bview.importMatrix){
423 Irowptr = &Bview.importMatrix->rowptr_[0];
424 Icolind = (Bview.importMatrix->colind_.size()>0)?(&Bview.importMatrix->colind_[0]):0;
425 Ivals = (Bview.importMatrix->vals_.size()>0)?(&Bview.importMatrix->vals_[0]):0;
436 std::vector<int> c_status(n, -1);
440 if(CSR_alloc < n) CSR_alloc = n;
441 int CSR_ip=0,OLD_ip=0;
446 CSR_rowptr.Resize(m+1);
447 CSR_colind.Resize(CSR_alloc);
451 std::vector<int> NumEntriesPerRow(m);
455 bool found_diagonal=
false;
456 CSR_rowptr[i]=CSR_ip;
458 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
460 double Aval = Avals[k];
461 if(!keep_all_hard_zeros && Aval==0)
continue;
463 if(Bview.targetMapToOrigRow[Ak] != -1){
465 int Bk = Bview.targetMapToOrigRow[Ak];
466 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
467 int Cj=Bcol2Ccol[Bcolind[j]];
469 if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
471 if(c_status[Cj]<OLD_ip){
474 CSR_colind[CSR_ip]=Cj;
475 CSR_vals[CSR_ip]=Aval*Bvals[j];
479 CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
484 int Ik = Bview.targetMapToImportRow[Ak];
485 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
486 int Cj=Bimportcol2Ccol[Icolind[j]];
488 if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
490 if(c_status[Cj]<OLD_ip){
493 CSR_colind[CSR_ip]=Cj;
494 CSR_vals[CSR_ip]=Aval*Ivals[j];
498 CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
502 NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
505 if(CSR_ip + n > CSR_alloc){
508 CSR_colind.Resize(CSR_alloc);
513 CSR_rowptr[m]=CSR_ip;
515#ifdef ENABLE_MMM_TIMINGS
516 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix Final Sort")));
522#ifdef ENABLE_MMM_TIMINGS
523 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix Fast IE")));
528 int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
530 int *ExportLIDs=0, *ExportPIDs=0;
531 if(Bview.importMatrix) {
532 NumExports = Bview.importMatrix->
ExportLIDs_.size();
533 ExportLIDs = Bview.importMatrix->ExportLIDs_.size()?&Bview.importMatrix->ExportLIDs_[0]:0;
534 ExportPIDs = Bview.importMatrix->ExportPIDs_.size()?&Bview.importMatrix->ExportPIDs_[0]:0;
538 NumExports = B.
Importer()->NumExportIDs();
539 ExportLIDs = B.
Importer()->ExportLIDs();
540 ExportPIDs = B.
Importer()->ExportPIDs();
550#ifdef ENABLE_MMM_TIMINGS
551 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix ESFC")));
570 std::vector<int> & Bcol2Ccol,
571 std::vector<int> & Bimportcol2Ccol,
573 bool keep_all_hard_zeros){
575#ifdef ENABLE_MMM_TIMINGS
576 using Teuchos::TimeMonitor;
577 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse SerialCore")));
586 int n=colmap_C->NumMyElements();
590 int *Arowptr, *Acolind;
595 int *Browptr, *Bcolind;
599 int *Irowptr=0, *Icolind=0;
601 if(Bview.importMatrix){
602 Irowptr = &Bview.importMatrix->rowptr_[0];
603 Icolind = &Bview.importMatrix->colind_[0];
604 Ivals = &Bview.importMatrix->vals_[0];
608 int *CSR_rowptr, *CSR_colind;
617 std::vector<int> c_status(n, -1);
620 int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
621 int CSR_ip=0,OLD_ip=0;
625 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
627 double Aval = Avals[k];
628 if(!keep_all_hard_zeros && Aval==0)
continue;
630 if(Bview.targetMapToOrigRow[Ak] != -1){
632 int Bk = Bview.targetMapToOrigRow[Ak];
633 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
634 int Cj=Bcol2Ccol[Bcolind[j]];
636 if(c_status[Cj]<OLD_ip){
640 CSR_colind[CSR_ip]=Cj;
641 CSR_vals[CSR_ip]=Aval*Bvals[j];
645 CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
650 int Ik = Bview.targetMapToImportRow[Ak];
651 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
652 int Cj=Bimportcol2Ccol[Icolind[j]];
654 if(c_status[Cj]<OLD_ip){
658 CSR_colind[CSR_ip]=Cj;
659 CSR_vals[CSR_ip]=Aval*Ivals[j];
663 CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
670#ifdef ENABLE_MMM_TIMINGS
671 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse Final Sort")));
692 bool call_FillComplete_on_result,
693 bool keep_all_hard_zeros)
695 int C_firstCol = Bview.colMap->MinLID();
696 int C_lastCol = Bview.colMap->MaxLID();
698 int C_firstCol_import = 0;
699 int C_lastCol_import = -1;
702 Bview.colMap->MyGlobalElementsPtr(bcols);
703 int_type* bcols_import = NULL;
704 if (Bview.importMatrix != NULL) {
705 C_firstCol_import = Bview.importMatrix->ColMap_.MinLID();
706 C_lastCol_import = Bview.importMatrix->ColMap_.MaxLID();
707 Bview.importMatrix->ColMap_.MyGlobalElementsPtr(bcols_import);
710 int C_numCols = C_lastCol - C_firstCol + 1;
711 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
713 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
716 double* dwork =
new double[C_numCols];
717 int_type* iwork =
new int_type[C_numCols];
718 int_type *c_cols=iwork;
719 double *c_vals=dwork;
720 int *c_index=
new int[C_numCols];
726 int *Arowptr, *Acolind;
738 for(k=0;k<C_numCols;k++) c_index[k]=-1;
743 int_type global_row = (int_type) Aview.rowMap->GID64(i);
791 for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
793 double Aval = Avals[k];
795 if(Bview.remote[Ak] || (!keep_all_hard_zeros && Aval==0))
continue;
797 int* Bcol_inds = Bview.indices[Ak];
798 double* Bvals_k = Bview.values[Ak];
800 for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
801 int col=Bcol_inds[j];
808 c_cols[c_current]=col;
809 c_vals[c_current]=Aval*Bvals_k[j];
810 c_index[col]=c_current;
815 c_vals[c_index[col]]+=Aval*Bvals_k[j];
820 for(k=0; k<c_current; k++){
821 c_index[c_cols[k]]=-1;
822 c_cols[k]=bcols[c_cols[k]];
852 for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
854 double Aval = Avals[k];
856 if(!Bview.remote[Ak] || Aval==0)
continue;
858 int* Bcol_inds = Bview.indices[Ak];
859 double* Bvals_k = Bview.values[Ak];
861 for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
862 int col=Bcol_inds[j];
864 c_cols[c_current]=col;
865 c_vals[c_current]=Aval*Bvals_k[j];
866 c_index[col]=c_current;
870 c_vals[c_index[col]]+=Aval*Bvals_k[j];
875 for(k=0; k<c_current; k++){
876 c_index[c_cols[k]]=-1;
877 c_cols[k]=bcols_import[c_cols[k]];
897 if(call_FillComplete_on_result)
919 bool call_FillComplete_on_result,
920 bool keep_all_hard_zeros){
927 std::vector<int> Cremotepids;
928 std::vector<int> Bcol2Ccol(B.
ColMap().NumMyElements());
929 std::vector<int> Bimportcol2Ccol;
930 if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
932#ifdef ENABLE_MMM_TIMINGS
933 using Teuchos::TimeMonitor;
934 Teuchos::RCP<Teuchos::TimeMonitor> MM;
938 if(!call_FillComplete_on_result) {
939#ifdef ENABLE_MMM_TIMINGS
940 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM General Multiply")));
942 rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,
false,keep_all_hard_zeros);
950 int *C_rowptr, *C_colind;
953 bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
956 if(!NewFlag && ExtractFailFlag){
957#ifdef ENABLE_MMM_TIMINGS
958 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM General Multiply")));
960 rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result,keep_all_hard_zeros);
965#ifdef ENABLE_MMM_TIMINGS
966 if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix CMap")));
967 else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse CMap")));
972 if(Bview.importMatrix) {
973 EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
978 for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i;
986#ifdef ENABLE_MMM_TIMINGS
987 if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix Lookups")));
988 else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse Lookups")));
997 if(colmap_B->SameAs(*colmap_C)){
999 for(i=0;i<colmap_B->NumMyElements();i++)
1004 for(i=0;i<colmap_B->NumMyElements();i++){
1005 Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i));
1010 if(Bview.importMatrix){
1011 Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1012 for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
1013 Bimportcol2Ccol[i]=colmap_C->LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
1019#ifdef ENABLE_MMM_TIMINGS
1025 EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C,keep_all_hard_zeros));
1029 EPETRA_CHK_ERR(mult_A_B_reuse<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C,keep_all_hard_zeros));
1074 std::vector<int> & Bcol2Ccol,
1075 std::vector<int> & Bimportcol2Ccol,
1078#ifdef ENABLE_MMM_TIMINGS
1079 using Teuchos::TimeMonitor;
1080 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse SerialCore")));
1089 int n=colmap_C->NumMyElements();
1093 int *Arowptr, *Acolind;
1098 int *Browptr, *Bcolind;
1102 int *Irowptr=0, *Icolind=0;
1104 if(Bview.importMatrix){
1105 Irowptr = &Bview.importMatrix->rowptr_[0];
1106 Icolind = &Bview.importMatrix->colind_[0];
1107 Ivals = &Bview.importMatrix->vals_[0];
1111 const double *Dvals = Dinv.Values();
1114 int *CSR_rowptr, *CSR_colind;
1123 std::vector<int> c_status(n, -1);
1126 int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
1127 int CSR_ip=0,OLD_ip=0;
1131 double Dval = Dvals[i];
1134 for(k=Browptr[i]; k<Browptr[i+1]; k++){
1136 double Bval = Bvals[k];
1137 if(Bval==0)
continue;
1138 int Ck=Bcol2Ccol[Bcolind[k]];
1141 c_status[Ck]=CSR_ip;
1142 CSR_colind[CSR_ip]=Ck;
1143 CSR_vals[CSR_ip]= Bvals[k];
1148 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
1150 double Aval = Avals[k];
1151 if(Aval==0)
continue;
1153 if(Bview.targetMapToOrigRow[Ak] != -1){
1155 int Bk = Bview.targetMapToOrigRow[Ak];
1156 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1157 int Cj=Bcol2Ccol[Bcolind[j]];
1159 if(c_status[Cj]<OLD_ip){
1162 c_status[Cj]=CSR_ip;
1163 CSR_colind[CSR_ip]=Cj;
1164 CSR_vals[CSR_ip]= - omega * Dval * Aval * Bvals[j];
1168 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
1173 int Ik = Bview.targetMapToImportRow[Ak];
1174 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1175 int Cj=Bimportcol2Ccol[Icolind[j]];
1177 if(c_status[Cj]<OLD_ip){
1180 c_status[Cj]=CSR_ip;
1181 CSR_colind[CSR_ip]=Cj;
1182 CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
1186 CSR_vals[c_status[Cj]]-=omega * Dval * Aval * Ivals[j];
1193#ifdef ENABLE_MMM_TIMINGS
1194 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse Final Sort")));
1212 std::vector<int> & Bcol2Ccol,
1213 std::vector<int> & Bimportcol2Ccol,
1214 std::vector<int>& Cremotepids,
1217#ifdef ENABLE_MMM_TIMINGS
1218 using Teuchos::TimeMonitor;
1219 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix SerialCore")));
1226 int NumMyDiagonals=0;
1229 int n=colmap_C->NumMyElements();
1233 int *Arowptr, *Acolind;
1238 int *Browptr, *Bcolind;
1243 const double *Dvals = Dinv.Values();
1245 int *Irowptr=0, *Icolind=0;
1247 if(Bview.importMatrix){
1248 Irowptr = &Bview.importMatrix->rowptr_[0];
1249 Icolind = (Bview.importMatrix->colind_.size()>0)?(&Bview.importMatrix->colind_[0]):0;
1250 Ivals = (Bview.importMatrix->vals_.size()>0)?(&Bview.importMatrix->vals_[0]):0;
1261 std::vector<int> c_status(n, -1);
1266 int CSR_ip=0,OLD_ip=0;
1271 CSR_rowptr.Resize(m+1);
1272 CSR_colind.Resize(CSR_alloc);
1276 std::vector<int> NumEntriesPerRow(m);
1280 bool found_diagonal=
false;
1281 CSR_rowptr[i]=CSR_ip;
1282 double Dval = Dvals[i];
1285 for(k=Browptr[i]; k<Browptr[i+1]; k++){
1287 double Bval = Bvals[k];
1288 if(Bval==0)
continue;
1289 int Ck=Bcol2Ccol[Bcolind[k]];
1292 c_status[Ck]=CSR_ip;
1293 CSR_colind[CSR_ip]=Ck;
1294 CSR_vals[CSR_ip]= Bvals[k];
1299 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
1300 int Ak = Acolind[k];
1301 double Aval = Avals[k];
1302 if(Aval==0)
continue;
1304 if(Bview.targetMapToOrigRow[Ak] != -1){
1306 int Bk = Bview.targetMapToOrigRow[Ak];
1307 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1308 int Cj=Bcol2Ccol[Bcolind[j]];
1310 if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
1312 if(c_status[Cj]<OLD_ip){
1314 c_status[Cj]=CSR_ip;
1315 CSR_colind[CSR_ip]=Cj;
1316 CSR_vals[CSR_ip]= - omega * Dval* Aval * Bvals[j];
1320 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
1325 int Ik = Bview.targetMapToImportRow[Ak];
1326 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1327 int Cj=Bimportcol2Ccol[Icolind[j]];
1329 if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
1331 if(c_status[Cj]<OLD_ip){
1333 c_status[Cj]=CSR_ip;
1334 CSR_colind[CSR_ip]=Cj;
1335 CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
1339 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Ivals[j];
1343 NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
1346 if(CSR_ip + n > CSR_alloc){
1349 CSR_colind.Resize(CSR_alloc);
1354 CSR_rowptr[m]=CSR_ip;
1356#ifdef ENABLE_MMM_TIMINGS
1357 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix Final Sort")));
1363#ifdef ENABLE_MMM_TIMINGS
1364 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix Fast IE")));
1369 int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
1371 int *ExportLIDs=0, *ExportPIDs=0;
1372 if(Bview.importMatrix) {
1373 NumExports = Bview.importMatrix->
ExportLIDs_.size();
1374 ExportLIDs = Bview.importMatrix->ExportLIDs_.size()?&Bview.importMatrix->ExportLIDs_[0]:0;
1375 ExportPIDs = Bview.importMatrix->ExportPIDs_.size()?&Bview.importMatrix->ExportPIDs_[0]:0;
1379 NumExports = B.
Importer()->NumExportIDs();
1380 ExportLIDs = B.
Importer()->ExportLIDs();
1381 ExportPIDs = B.
Importer()->ExportPIDs();
1387#ifdef ENABLE_MMM_TIMINGS
1388 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix ESFC")));
1410 bool call_FillComplete_on_result){
1416 std::vector<int> Cremotepids;
1417 std::vector<int> Bcol2Ccol(B.
ColMap().NumMyElements());
1418 std::vector<int> Bimportcol2Ccol;
1419 if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1421#ifdef ENABLE_MMM_TIMINGS
1422 using Teuchos::TimeMonitor;
1423 Teuchos::RCP<Teuchos::TimeMonitor> MM;
1427 if(!call_FillComplete_on_result) {
1428#ifdef ENABLE_MMM_TIMINGS
1429 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi General Multiply")));
1431 throw std::runtime_error(
"jacobi_A_B_general not implemented");
1440 int *C_rowptr, *C_colind;
1443 bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
1446 if(!NewFlag && ExtractFailFlag){
1447#ifdef ENABLE_MMM_TIMINGS
1448 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi General Multiply")));
1450 throw std::runtime_error(
"jacobi_A_B_general not implemented");
1455#ifdef ENABLE_MMM_TIMINGS
1456 if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Newmatrix CMap")));
1457 else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse CMap")));
1462 if(Bview.importMatrix) {
1463 EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
1468 for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i;
1476#ifdef ENABLE_MMM_TIMINGS
1477 if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Newmatrix Lookups")));
1478 else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse Lookups")));
1487 if(colmap_B->SameAs(*colmap_C)){
1489 for(i=0;i<colmap_B->NumMyElements();i++)
1494 for(i=0;i<colmap_B->NumMyElements();i++){
1495 Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i));
1500 if(Bview.importMatrix){
1501 Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1502 for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
1503 Bimportcol2Ccol[i]=colmap_C->LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
1513 EPETRA_CHK_ERR(jacobi_A_B_newmatrix<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C));
1517 EPETRA_CHK_ERR(jacobi_A_B_reuse<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C));
1563#ifdef ENABLE_MMM_TIMINGS
1564 using Teuchos::TimeMonitor;
1565 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM-T Transpose")));
1572#ifdef ENABLE_MMM_TIMINGS
1573 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM-T AB-core")));
1575 RCP<Epetra_CrsMatrix> Ctemp;
1578 bool needs_final_export = Atransview.origMatrix->Exporter() != 0;
1579 if(needs_final_export)
1580 Ctemp = rcp(
new Epetra_CrsMatrix(
Copy,Atransview.origMatrix->RowMap(),Bview.origMatrix->ColMap(),0));
1583 Ctemp = rcp(&C,
false);
1587 std::vector<int> Bcol2Ccol(Bview.origMatrix->NumMyCols());
1588 for(
int i=0; i<Bview.origMatrix->NumMyCols(); i++)
1590 std::vector<int> Bimportcol2Ccol,Cremotepids;
1591 if(Bview.origMatrix->Importer())
1594 EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(*Atransview.origMatrix,*Bview.origMatrix,Bview,
1595 Bcol2Ccol,Bimportcol2Ccol,Cremotepids,
1596 *Ctemp,keep_all_hard_zeros));
1601#ifdef ENABLE_MMM_TIMINGS
1602 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM-T ESFC")));
1605 if(needs_final_export)
1606 C.
FusedExport(*Ctemp,*Ctemp->Exporter(),&Bview.origMatrix->DomainMap(),&Atransview.origMatrix->RangeMap(),
false);