MueLu  Version of the Day
MueLu_Aggregates_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 // Tobias Wiesner (tawiesn@sandia.gov)
43 //
44 // ***********************************************************************
45 //
46 // @HEADER
47 #ifndef MUELU_AGGREGATES_KOKKOS_DEF_HPP
48 #define MUELU_AGGREGATES_KOKKOS_DEF_HPP
49 
50 #include <Xpetra_Map.hpp>
51 #include <Xpetra_Vector.hpp>
52 #include <Xpetra_VectorFactory.hpp>
53 
54 #include "MueLu_LWGraph_kokkos.hpp"
57 
58 namespace MueLu {
59 
61  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
62  Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::Aggregates_kokkos(LWGraph_kokkos graph) {
63  nAggregates_ = 0;
64 
65  vertex2AggId_ = LOVectorFactory::Build(graph.GetImportMap());
66  vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
67 
68  procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
69  procWinner_->putScalar(MUELU_UNASSIGNED);
70 
71  isRoot_ = Teuchos::ArrayRCP<bool>(graph.GetImportMap()->getNodeNumElements(), false);
72 
73  // slow but safe, force TentativePFactory to build column map for P itself
74  aggregatesIncludeGhosts_ = true;
75  }
76 
78  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
79  Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::Aggregates_kokkos(const RCP<const Map>& map) {
80  nAggregates_ = 0;
81 
82  vertex2AggId_ = LOVectorFactory::Build(map);
83  vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
84 
85  procWinner_ = LOVectorFactory::Build(map);
86  procWinner_->putScalar(MUELU_UNASSIGNED);
87 
88  isRoot_ = Teuchos::ArrayRCP<bool>(map->getNodeNumElements(), false);
89 
90  // slow but safe, force TentativePFactory to build column map for P itself
91  aggregatesIncludeGhosts_ = true;
92  }
93 
95  template<class ProcWinnerType, class Vertex2AggIdType, class AggregateSizesType, class LO>
97  private:
98  ProcWinnerType procWinner;
99  Vertex2AggIdType vertex2AggId;
100  int myPID;
101  AggregateSizesType aggregateSizes;
102 
103  public:
104  ComputeAggregateSizesFunctor(ProcWinnerType procWinner_, Vertex2AggIdType vertex2AggId_, int myPID_, AggregateSizesType aggregateSizes_) :
105  procWinner(procWinner_),
106  vertex2AggId(vertex2AggId_),
107  myPID(myPID_),
108  aggregateSizes(aggregateSizes_)
109  { }
110 
111  KOKKOS_INLINE_FUNCTION
112  void operator()(const LO k) const {
113  if (procWinner(k, 0) == myPID)
114  aggregateSizes(vertex2AggId(k, 0))++;
115  }
116  };
117  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
118  typename Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::aggregates_sizes_type::const_type
119  Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::ComputeAggregateSizes(bool forceRecompute, bool cacheSizes) const {
120  if (aggregateSizes_.size() && !forceRecompute) {
121  return aggregateSizes_;
122 
123  } else {
124 
125  // invalidate previous sizes
126  aggregateSizes_ = aggregates_sizes_type("aggregates", 0);
127 
128  aggregates_sizes_type aggregateSizes("aggregates", nAggregates_);
129 
130  int myPID = GetMap()->getComm()->getRank();
131 
132  auto vertex2AggId = vertex2AggId_->template getLocalView<DeviceType>();
133  auto procWinner = procWinner_ ->template getLocalView<DeviceType>();
134 
135  typename AppendTrait<decltype(aggregateSizes_), Kokkos::Atomic>::type aggregateSizesAtomic = aggregateSizes;
136 
137  ComputeAggregateSizesFunctor<decltype(procWinner), decltype(vertex2AggId), decltype(aggregateSizesAtomic), LO>
138  computeAggSizesFunctor(procWinner, vertex2AggId, myPID, aggregateSizesAtomic);
139  Kokkos::parallel_for("MueLu:Aggregates:ComputeAggregateSizes:for", procWinner.size(), computeAggSizesFunctor);
140 
141  if (cacheSizes)
142  aggregateSizes_ = aggregateSizes;
143 
144  return aggregateSizes;
145  }
146 
147  }
148 
149  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
150  typename Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::local_graph_type
151  Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::GetGraph() const {
152  typedef typename local_graph_type::row_map_type row_map_type;
153  typedef typename local_graph_type::entries_type entries_type;
154 
155  int myPID = GetMap()->getComm()->getRank();
156 
157  ArrayRCP<LO> vertex2AggId = vertex2AggId_->getDataNonConst(0);
158  ArrayRCP<LO> procWinner = procWinner_->getDataNonConst(0);
159 
160  typename aggregates_sizes_type::const_type sizes = ComputeAggregateSizes();
161 
162  int numAggregates = nAggregates_;
163 
164  typename row_map_type::non_const_type rows("row_map", numAggregates+1); // rows(0) = 0 automatically
165  for (LO i = 0; i < nAggregates_; i++) // TODO: replace by parallel_scan
166  rows(i+1) = rows(i) + sizes(i);
167 
168  aggregates_sizes_type offsets("offsets", numAggregates);
169  for (LO i = 0; i < numAggregates; i++) // TODO: replace by parallel_for
170  offsets(i) = rows(i);
171 
172  typename entries_type::non_const_type cols("entries", rows(nAggregates_));
173  for (LO i = 0; i < procWinner.size(); i++)
174  if (procWinner[i] == myPID)
175  cols(offsets(vertex2AggId[i])++) = i;
176 
177  return local_graph_type(cols, rows);
178  }
179 
180  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
181  std::string Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::description() const {
182  return BaseClass::description() + "{nGlobalAggregates = " + toString(GetNumGlobalAggregates()) + "}";
183  }
184 
185  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
186  void Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::print(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const {
188 
189  if (verbLevel & Statistics1)
190  out0 << "Global number of aggregates: " << GetNumGlobalAggregates() << std::endl;
191  }
192 
193  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
194  GlobalOrdinal Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::GetNumGlobalAggregates() const {
195  LO nAggregates = GetNumAggregates();
196  GO nGlobalAggregates;
197  MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates);
198  return nGlobalAggregates;
199  }
200 
201  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
202  const RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>> >
203  Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetMap() const {
204  return vertex2AggId_->getMap();
205  }
206 
207 } //namespace MueLu
208 
209 #endif // MUELU_AGGREGATES_KOKKOS_DEF_HPP
#define MUELU_UNASSIGNED
#define MueLu_sumAll(rcpComm, in, out)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
ComputeAggregateSizesFunctor(ProcWinnerType procWinner_, Vertex2AggIdType vertex2AggId_, int myPID_, AggregateSizesType aggregateSizes_)
Print more statistics.
Namespace for MueLu classes and methods.
#define MUELU_UNAGGREGATED
KOKKOS_INLINE_FUNCTION void operator()(const LO k) const
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type *=0)
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects.
virtual std::string description() const
Return a simple one-line description of this object.