MueLu Version of the Day
MueLu_Aggregates_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42// Tobias Wiesner (tawiesn@sandia.gov)
43//
44// ***********************************************************************
45//
46// @HEADER
47#ifndef MUELU_AGGREGATES_KOKKOS_DEF_HPP
48#define MUELU_AGGREGATES_KOKKOS_DEF_HPP
49
50#include <Xpetra_Map.hpp>
51#include <Xpetra_Vector.hpp>
52#include <Xpetra_MultiVectorFactory.hpp>
53#include <Xpetra_VectorFactory.hpp>
54
55#include "MueLu_LWGraph_kokkos.hpp"
58
59namespace MueLu {
60
61 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
62 Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
63 Aggregates_kokkos(LWGraph_kokkos graph) {
64 numAggregates_ = 0;
65
66 vertex2AggId_ = LOVectorFactory::Build(graph.GetImportMap());
67 vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
68
69 procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
70 procWinner_->putScalar(MUELU_UNASSIGNED);
71
72 isRoot_ = Kokkos::View<bool*, device_type>(Kokkos::ViewAllocateWithoutInitializing("roots"), graph.GetImportMap()->getNodeNumElements());
73 Kokkos::deep_copy(isRoot_, false);
74
75 // slow but safe, force TentativePFactory to build column map for P itself
76 aggregatesIncludeGhosts_ = true;
77 }
78
79 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
80 Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
81 Aggregates_kokkos(const RCP<const Map>& map) {
82 numAggregates_ = 0;
83
84 vertex2AggId_ = LOVectorFactory::Build(map);
85 vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
86
87 procWinner_ = LOVectorFactory::Build(map);
88 procWinner_->putScalar(MUELU_UNASSIGNED);
89
90 isRoot_ = Kokkos::View<bool*,device_type>(Kokkos::ViewAllocateWithoutInitializing("roots"), map->getNodeNumElements());
91 Kokkos::deep_copy(isRoot_, false);
92
93 // slow but safe, force TentativePFactory to build column map for P itself
94 aggregatesIncludeGhosts_ = true;
95 }
96
97 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
98 typename Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::aggregates_sizes_type::const_type
99 Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::ComputeAggregateSizes(bool forceRecompute) const {
100 if (aggregateSizes_.size() && !forceRecompute) {
101 return aggregateSizes_;
102
103 } else {
104 // It is necessary to initialize this to 0
105 aggregates_sizes_type aggregateSizes("aggregates", numAggregates_);
106
107 int myPID = GetMap()->getComm()->getRank();
108
109 auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
110 auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly);
111
112 typename AppendTrait<decltype(aggregateSizes_), Kokkos::Atomic>::type aggregateSizesAtomic = aggregateSizes;
113 Kokkos::parallel_for("MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0,procWinner.size()),
114 KOKKOS_LAMBDA(const LO i) {
115 if (procWinner(i, 0) == myPID)
116 aggregateSizesAtomic(vertex2AggId(i, 0))++;
117 });
118
119 aggregateSizes_ = aggregateSizes;
120
121 return aggregateSizes;
122 }
123
124 }
125
126 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
127 typename Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::local_graph_type
128 Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::GetGraph() const {
129 using row_map_type = typename local_graph_type::row_map_type;
130 using entries_type = typename local_graph_type::entries_type;
131 using size_type = typename local_graph_type::size_type;
132
133 auto numAggregates = numAggregates_;
134
135 if (static_cast<LO>(graph_.numRows()) == numAggregates)
136 return graph_;
137
138 auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
139 auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly);
140 auto sizes = ComputeAggregateSizes();
141
142 // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0.
143 typename row_map_type::non_const_type rows("Agg_rows", numAggregates+1); // rows(0) = 0 automatically
144
145 // parallel_scan (exclusive)
146 Kokkos::parallel_scan("MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates),
147 KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) {
148 update += sizes(i);
149 if (final_pass)
150 rows(i+1) = update;
151 });
152
153 decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates+1); // +1 is just for ease
154 Kokkos::deep_copy(offsets, rows);
155
156 int myPID = GetMap()->getComm()->getRank();
157
158 size_type numNNZ;
159 {
160 Kokkos::View<size_type, device_type> numNNZ_device = Kokkos::subview(rows, numAggregates);
161 typename Kokkos::View<size_type, device_type>::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device);
162 Kokkos::deep_copy(numNNZ_host, numNNZ_device);
163 numNNZ = numNNZ_host();
164 }
165 typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), numNNZ);
166 size_t realnnz = 0;
167 Kokkos::parallel_reduce("MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()),
168 KOKKOS_LAMBDA(const LO i, size_t& nnz) {
169 if (procWinner(i, 0) == myPID) {
170 typedef typename std::remove_reference< decltype( offsets(0) ) >::type atomic_incr_type;
171 auto idx = Kokkos::atomic_fetch_add( &offsets(vertex2AggId(i,0)), atomic_incr_type(1));
172 cols(idx) = i;
173 nnz++;
174 }
175 }, realnnz);
176 TEUCHOS_TEST_FOR_EXCEPTION(realnnz != numNNZ, Exceptions::RuntimeError,
177 "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ << " != " << realnnz << " = realnnz");
178
179 graph_ = local_graph_type(cols, rows);
180
181 return graph_;
182 }
183
184 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
185 std::string Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::description() const {
186 return BaseClass::description() + "{nGlobalAggregates = " + toString(GetNumGlobalAggregates()) + "}";
187 }
188
189 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
190 void Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::print(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const {
192
193 if (verbLevel & Statistics1)
194 out0 << "Global number of aggregates: " << GetNumGlobalAggregates() << std::endl;
195 }
196
197 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
198 GlobalOrdinal Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::GetNumGlobalAggregates() const {
199 LO nAggregates = GetNumAggregates();
200 GO nGlobalAggregates;
201 MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates);
202 return nGlobalAggregates;
203 }
204
205 template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
206 const RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>> >
207 Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetMap() const {
208 return vertex2AggId_->getMap();
209 }
210
211} //namespace MueLu
212
213#endif // MUELU_AGGREGATES_KOKKOS_DEF_HPP
#define MUELU_UNAGGREGATED
#define MUELU_UNASSIGNED
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects.
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
virtual std::string description() const
Return a simple one-line description of this object.
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.