MueLu  Version of the Day
MueLu_RepartitionHeuristicFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 // Tobias Wiesner (tawiesn@sandia.gov)
43 //
44 // ***********************************************************************
45 //
46 // @HEADER
47 #ifndef PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
48 #define PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
49 
50 
51 #include <algorithm>
52 #include <iostream>
53 #include <sstream>
54 
55 #ifdef HAVE_MPI
57 #include <Teuchos_CommHelpers.hpp>
58 
59 //#include <Xpetra_Map.hpp>
60 #include <Xpetra_Matrix.hpp>
61 
62 #include "MueLu_Utilities.hpp"
63 
64 #include "MueLu_RAPFactory.hpp"
65 #ifdef HAVE_MUELU_EXPERIMENTAL
66 #include "MueLu_BlockedRAPFactory.hpp"
67 #endif
68 #include "MueLu_SubBlockAFactory.hpp"
69 #include "MueLu_Level.hpp"
70 #include "MueLu_MasterList.hpp"
71 #include "MueLu_Monitor.hpp"
72 
73 #include "MueLu_RepartitionHeuristicFactory.hpp"
74 
75 namespace MueLu {
76 
77  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
79  RCP<ParameterList> validParamList = rcp(new ParameterList());
80 
81 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
82  SET_VALID_ENTRY("repartition: start level");
83  SET_VALID_ENTRY("repartition: min rows per proc");
84  SET_VALID_ENTRY("repartition: max imbalance");
85 #undef SET_VALID_ENTRY
86 
87  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Factory of the matrix A");
88 
89  return validParamList;
90  }
91 
92  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
94  Input(currentLevel, "A");
95  }
96 
97  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
99  FactoryMonitor m(*this, "Build", currentLevel);
100 
101  const Teuchos::ParameterList & pL = GetParameterList();
102  // Access parameters here to make sure that we set the parameter entry flag to "used" even in case of short-circuit evaluation.
103  // TODO (JG): I don't really know if we want to do this.
104  const int startLevel = pL.get<int> ("repartition: start level");
105  const LO minRowsPerProcessor = pL.get<LO> ("repartition: min rows per proc");
106  const double nonzeroImbalance = pL.get<double>("repartition: max imbalance");
107 
108  RCP<const FactoryBase> Afact = GetFactory("A");
109  if(Teuchos::rcp_dynamic_cast<const RAPFactory>(Afact) == Teuchos::null &&
110 #ifdef HAVE_MUELU_EXPERIMENTAL
111  Teuchos::rcp_dynamic_cast<const BlockedRAPFactory>(Afact) == Teuchos::null &&
112 #endif
113  Teuchos::rcp_dynamic_cast<const SubBlockAFactory>(Afact) == Teuchos::null)
114  GetOStream(Warnings) <<
115  "MueLu::RepartitionHeuristicFactory::Build: The generation factory for A must " \
116  "be a RAPFactory or a SubBlockAFactory providing the non-rebalanced matrix information! " \
117  "It specifically must not be of type Rebalance(Blocked)AcFactory or similar. " \
118  "Please check the input. Make also sure that \"number of partitions\" is provided to" \
119  "the Interface class and the RepartitionFactory instance." << std::endl;
120 
121  // TODO: We only need a CrsGraph. This class does not have to be templated on Scalar types.
122  RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
123 
124  // ======================================================================================================
125  // Determine whether partitioning is needed
126  // ======================================================================================================
127  // NOTE: most tests include some global communication, which is why we currently only do tests until we make
128  // a decision on whether to repartition. However, there is value in knowing how "close" we are to having to
129  // rebalance an operator. So, it would probably be beneficial to do and report *all* tests.
130 
131  // Test1: skip repartitioning if current level is less than the specified minimum level for repartitioning
132  if (currentLevel.GetLevelID() < startLevel) {
133  GetOStream(Statistics1) << "Repartitioning? NO:" <<
134  "\n current level = " << Teuchos::toString(currentLevel.GetLevelID()) <<
135  ", first level where repartitioning can happen is " + Teuchos::toString(startLevel) << std::endl;
136 
137  // a negative number of processors means: no repartitioning
138  Set(currentLevel, "number of partitions", -1);
139 
140  return;
141  }
142 
143  RCP<const Map> rowMap = A->getRowMap();
144 
145  // NOTE: Teuchos::MPIComm::duplicate() calls MPI_Bcast inside, so this is
146  // a synchronization point. However, as we do MueLu_sumAll afterwards anyway, it
147  // does not matter.
148  RCP<const Teuchos::Comm<int> > origComm = rowMap->getComm();
149  RCP<const Teuchos::Comm<int> > comm = origComm->duplicate();
150 
151  // Test 2: check whether A is actually distributed, i.e. more than one processor owns part of A
152  // TODO: this global communication can be avoided if we store the information with the matrix (it is known when matrix is created)
153  // TODO: further improvements could be achieved when we use subcommunicator for the active set. Then we only need to check its size
154  {
155  int numActiveProcesses = 0;
156  MueLu_sumAll(comm, Teuchos::as<int>((A->getNodeNumRows() > 0) ? 1 : 0), numActiveProcesses);
157 
158  if (numActiveProcesses == 1) {
159  GetOStream(Statistics1) << "Repartitioning? NO:" <<
160  "\n # processes with rows = " << Teuchos::toString(numActiveProcesses) << std::endl;
161 
162  Set(currentLevel, "number of partitions", 1);
163  return;
164  }
165  }
166 
167  bool test3 = false, test4 = false;
168  std::string msg3, msg4;
169 
170  // Test3: check whether number of rows on any processor satisfies the minimum number of rows requirement
171  // NOTE: Test2 ensures that repartitionning is not done when there is only one processor (it may or may not satisfy Test3)
172  if (minRowsPerProcessor > 0) {
173  LO numMyRows = Teuchos::as<LO>(A->getNodeNumRows()), minNumRows, LOMAX = Teuchos::OrdinalTraits<LO>::max();
174  LO haveFewRows = (numMyRows < minRowsPerProcessor ? 1 : 0), numWithFewRows = 0;
175  MueLu_sumAll(comm, haveFewRows, numWithFewRows);
176  MueLu_minAll(comm, (numMyRows > 0 ? numMyRows : LOMAX), minNumRows);
177 
178  // TODO: we could change it to repartition only if the number of processors with numRows < minNumRows is larger than some
179  // percentage of the total number. This way, we won't repartition if 2 out of 1000 processors don't have enough elements.
180  // I'm thinking maybe 20% threshold. To implement, simply add " && numWithFewRows < .2*numProcs" to the if statement.
181  if (numWithFewRows > 0)
182  test3 = true;
183 
184  msg3 = "\n min # rows per proc = " + Teuchos::toString(minNumRows) + ", min allowable = " + Teuchos::toString(minRowsPerProcessor);
185  }
186 
187  // Test4: check whether the balance in the number of nonzeros per processor is greater than threshold
188  if (!test3) {
189  GO minNnz, maxNnz, numMyNnz = Teuchos::as<GO>(A->getNodeNumEntries());
190  MueLu_maxAll(comm, numMyNnz, maxNnz);
191  MueLu_minAll(comm, (numMyNnz > 0 ? numMyNnz : maxNnz), minNnz); // min nnz over all active processors
192  double imbalance = Teuchos::as<double>(maxNnz)/minNnz;
193 
194  if (imbalance > nonzeroImbalance)
195  test4 = true;
196 
197  msg4 = "\n nonzero imbalance = " + Teuchos::toString(imbalance) + ", max allowable = " + Teuchos::toString(nonzeroImbalance);
198  }
199 
200  if (!test3 && !test4) {
201  GetOStream(Statistics1) << "Repartitioning? NO:" << msg3 + msg4 << std::endl;
202 
203  // A negative number of partitions means: no repartitioning
204  Set(currentLevel, "number of partitions", -1);
205  return;
206  }
207 
208  GetOStream(Statistics1) << "Repartitioning? YES:" << msg3 + msg4 << std::endl;
209 
210  // ======================================================================================================
211  // Calculate number of partitions
212  // ======================================================================================================
213  // FIXME Quick way to figure out how many partitions there should be (same algorithm as ML)
214  // FIXME Should take into account nnz? Perhaps only when user is using min #nnz per row threshold.
215 
216  // The number of partitions is calculated by the RepartitionFactory and stored in "number of partitions" variable on
217  // the current level. If this variable is already set (e.g., by another instance of RepartitionFactory) then this number
218  // is used. The "number of partitions" variable serves as basic communication between the RepartitionFactory (which
219  // requests a certain number of partitions) and the *Interface classes which call the underlying partitioning algorithms
220  // and produce the "Partition" array with the requested number of partitions.
221  int numPartitions;
222  if (Teuchos::as<GO>(A->getGlobalNumRows()) < minRowsPerProcessor) {
223  // System is too small, migrate it to a single processor
224  numPartitions = 1;
225 
226  } else {
227  // Make sure that each processor has approximately minRowsPerProcessor
228  numPartitions = A->getGlobalNumRows() / minRowsPerProcessor;
229  }
230  numPartitions = std::min(numPartitions, comm->getSize());
231 
232  Set(currentLevel, "number of partitions", numPartitions);
233 
234  GetOStream(Statistics1) << "Number of partitions to use = " << numPartitions << std::endl;
235  } // Build
236 } // namespace MueLu
237 
238 #endif //ifdef HAVE_MPI
239 #endif /* PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_ */
#define MueLu_sumAll(rcpComm, in, out)
#define MueLu_maxAll(rcpComm, in, out)
void DeclareInput(Level &currentLevel) const
Determines the data that RepartitionHeuristicFactory needs, and the factories that generate that data...
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
Print more statistics.
void Build(Level &currentLevel) const
Build an object with this factory.
#define SET_VALID_ENTRY(name)
Namespace for MueLu classes and methods.
#define MueLu_minAll(rcpComm, in, out)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Print all warning messages.
std::string toString(const T &t)