Tpetra parallel linear algebra Version of the Day
Tpetra_Details_normImpl.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_DETAILS_NORMIMPL_HPP
43#define TPETRA_DETAILS_NORMIMPL_HPP
44
53
54#include "TpetraCore_config.h"
55#include "Kokkos_Core.hpp"
56#include "Teuchos_ArrayView.hpp"
57#include "Teuchos_CommHelpers.hpp"
58#include "KokkosBlas.hpp"
59#include "Kokkos_ArithTraits.hpp"
60
61#ifndef DOXYGEN_SHOULD_SKIP_THIS
62namespace Teuchos {
63 template<class T>
64 class ArrayView; // forward declaration
65 template<class OrdinalType>
66 class Comm; // forward declaration
67}
68#endif // DOXYGEN_SHOULD_SKIP_THIS
69
71// Declarations start here
73
74namespace Tpetra {
75namespace Details {
76
79 NORM_ONE, //<! Use the one-norm
80 NORM_TWO, //<! Use the two-norm
81 NORM_INF //<! Use the infinity-norm
82};
83
85template <class ValueType,
86 class ArrayLayout,
87 class DeviceType,
88 class MagnitudeType>
89void
90normImpl (MagnitudeType norms[],
91 const Kokkos::View<const ValueType**, ArrayLayout, DeviceType>& X,
92 const EWhichNorm whichNorm,
93 const Teuchos::ArrayView<const size_t>& whichVecs,
94 const bool isConstantStride,
95 const bool isDistributed,
96 const Teuchos::Comm<int>* comm);
97
98} // namespace Details
99} // namespace Tpetra
100
102// Definitions start here
104
105namespace Tpetra {
106namespace Details {
107namespace Impl {
108
109template<class RV, class XMV>
110void
111lclNormImpl (const RV& normsOut,
112 const XMV& X,
113 const size_t numVecs,
114 const Teuchos::ArrayView<const size_t>& whichVecs,
115 const bool constantStride,
116 const EWhichNorm whichNorm)
117{
118 using Kokkos::ALL;
119 using Kokkos::subview;
120 using mag_type = typename RV::non_const_value_type;
121
122 static_assert (static_cast<int> (RV::Rank) == 1,
123 "Tpetra::MultiVector::lclNormImpl: "
124 "The first argument normsOut must have rank 1.");
125 static_assert (Kokkos::Impl::is_view<XMV>::value,
126 "Tpetra::MultiVector::lclNormImpl: "
127 "The second argument X is not a Kokkos::View.");
128 static_assert (static_cast<int> (XMV::Rank) == 2,
129 "Tpetra::MultiVector::lclNormImpl: "
130 "The second argument X must have rank 2.");
131
132 const size_t lclNumRows = static_cast<size_t> (X.extent (0));
133 TEUCHOS_TEST_FOR_EXCEPTION
134 (lclNumRows != 0 && constantStride &&
135 static_cast<size_t> (X.extent (1)) != numVecs,
136 std::logic_error, "Constant Stride X's dimensions are " << X.extent (0)
137 << " x " << X.extent (1) << ", which differ from the local dimensions "
138 << lclNumRows << " x " << numVecs << ". Please report this bug to "
139 "the Tpetra developers.");
140 TEUCHOS_TEST_FOR_EXCEPTION
141 (lclNumRows != 0 && ! constantStride &&
142 static_cast<size_t> (X.extent (1)) < numVecs,
143 std::logic_error, "Strided X's dimensions are " << X.extent (0) << " x "
144 << X.extent (1) << ", which are incompatible with the local dimensions "
145 << lclNumRows << " x " << numVecs << ". Please report this bug to "
146 "the Tpetra developers.");
147
148 if (lclNumRows == 0) {
149 const mag_type zeroMag = Kokkos::ArithTraits<mag_type>::zero ();
150 Kokkos::deep_copy (normsOut, zeroMag);
151 }
152 else { // lclNumRows != 0
153 if (constantStride) {
154 if (whichNorm == NORM_INF) {
155 KokkosBlas::nrminf (normsOut, X);
156 }
157 else if (whichNorm == NORM_ONE) {
158 KokkosBlas::nrm1 (normsOut, X);
159 }
160 else if (whichNorm == NORM_TWO) {
161 KokkosBlas::nrm2_squared (normsOut, X);
162 }
163 else {
164 TEUCHOS_TEST_FOR_EXCEPTION
165 (true, std::logic_error, "Should never get here!");
166 }
167 }
168 else { // not constant stride
169 // NOTE (mfh 15 Jul 2014, 11 Apr 2019) This does a kernel launch
170 // for every column. It might be better to have a kernel that
171 // does the work all at once. On the other hand, we don't
172 // prioritize performance of MultiVector views of noncontiguous
173 // columns.
174 for (size_t k = 0; k < numVecs; ++k) {
175 const size_t X_col = constantStride ? k : whichVecs[k];
176 if (whichNorm == NORM_INF) {
177 KokkosBlas::nrminf (subview (normsOut, k),
178 subview (X, ALL (), X_col));
179 }
180 else if (whichNorm == NORM_ONE) {
181 KokkosBlas::nrm1 (subview (normsOut, k),
182 subview (X, ALL (), X_col));
183 }
184 else if (whichNorm == NORM_TWO) {
185 KokkosBlas::nrm2_squared (subview (normsOut, k),
186 subview (X, ALL (), X_col));
187 }
188 else {
189 TEUCHOS_TEST_FOR_EXCEPTION
190 (true, std::logic_error, "Should never get here!");
191 }
192 } // for each column
193 } // constantStride
194 } // lclNumRows != 0
195}
196
197// Kokkos::parallel_for functor for applying square root to each
198// entry of a 1-D Kokkos::View.
199template<class ViewType>
200class SquareRootFunctor {
201public:
202 typedef typename ViewType::execution_space execution_space;
203 typedef typename ViewType::size_type size_type;
204
205 SquareRootFunctor (const ViewType& theView) :
206 theView_ (theView)
207 {}
208
209 KOKKOS_INLINE_FUNCTION void
210 operator() (const size_type& i) const
211 {
212 typedef typename ViewType::non_const_value_type value_type;
213 typedef Kokkos::Details::ArithTraits<value_type> KAT;
214 theView_(i) = KAT::sqrt (theView_(i));
215 }
216
217private:
218 ViewType theView_;
219};
220
221template<class RV>
222void
223gblNormImpl (const RV& normsOut,
224 const Teuchos::Comm<int>* const comm,
225 const bool distributed,
226 const EWhichNorm whichNorm)
227{
228 using Teuchos::REDUCE_MAX;
229 using Teuchos::REDUCE_SUM;
230 using Teuchos::reduceAll;
231 typedef typename RV::non_const_value_type mag_type;
232
233 const size_t numVecs = normsOut.extent (0);
234
235 // If the MultiVector is distributed over multiple processes, do
236 // the distributed (interprocess) part of the norm. We assume
237 // that the MPI implementation can read from and write to device
238 // memory.
239 //
240 // replaceMap() may have removed some processes. Those processes
241 // have a null Map. They must not participate in any collective
242 // operations. We ask first whether the Map is null, because
243 // isDistributed() defers that question to the Map. We still
244 // compute and return local norms for processes not participating
245 // in collective operations; those probably don't make any sense,
246 // but it doesn't hurt to do them, since it's illegal to call
247 // norm*() on those processes anyway.
248 if (distributed && comm != nullptr) {
249 // The calling process only participates in the collective if
250 // both the Map and its Comm on that process are nonnull.
251 const int nv = static_cast<int> (numVecs);
252 const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
253
254 if (commIsInterComm) {
255 RV lclNorms (Kokkos::ViewAllocateWithoutInitializing ("MV::normImpl lcl"), numVecs);
256 Kokkos::deep_copy (lclNorms, normsOut);
257 const mag_type* const lclSum = lclNorms.data ();
258 mag_type* const gblSum = normsOut.data ();
259
260 if (whichNorm == NORM_INF) {
261 reduceAll<int, mag_type> (*comm, REDUCE_MAX, nv, lclSum, gblSum);
262 } else {
263 reduceAll<int, mag_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
264 }
265 } else {
266 mag_type* const gblSum = normsOut.data ();
267 if (whichNorm == NORM_INF) {
268 reduceAll<int, mag_type> (*comm, REDUCE_MAX, nv, gblSum, gblSum);
269 } else {
270 reduceAll<int, mag_type> (*comm, REDUCE_SUM, nv, gblSum, gblSum);
271 }
272 }
273 }
274
275 if (whichNorm == NORM_TWO) {
276 // Replace the norm-squared results with their square roots in
277 // place, to get the final output. If the device memory and
278 // the host memory are the same, it probably doesn't pay to
279 // launch a parallel kernel for that, since there isn't enough
280 // parallelism for the typical MultiVector case.
281 const bool inHostMemory =
282 std::is_same<typename RV::memory_space,
283 typename RV::host_mirror_space::memory_space>::value;
284 if (inHostMemory) {
285 for (size_t j = 0; j < numVecs; ++j) {
286 normsOut(j) = Kokkos::Details::ArithTraits<mag_type>::sqrt (normsOut(j));
287 }
288 }
289 else {
290 // There's not as much parallelism now, but that's OK. The
291 // point of doing parallel dispatch here is to keep the norm
292 // results on the device, thus avoiding a copy to the host
293 // and back again.
294 SquareRootFunctor<RV> f (normsOut);
295 typedef typename RV::execution_space execution_space;
296 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
297 Kokkos::parallel_for (range_type (0, numVecs), f);
298 }
299 }
300}
301
302} // namespace Impl
303
304template <class ValueType,
305 class ArrayLayout,
306 class DeviceType,
307 class MagnitudeType>
308void
309normImpl (MagnitudeType norms[],
310 const Kokkos::View<const ValueType**, ArrayLayout, DeviceType>& X,
311 const EWhichNorm whichNorm,
312 const Teuchos::ArrayView<const size_t>& whichVecs,
313 const bool isConstantStride,
314 const bool isDistributed,
315 const Teuchos::Comm<int>* comm)
316{
317 using RV = Kokkos::View<MagnitudeType*, Kokkos::HostSpace>;
318 //using XMV = Kokkos::View<const ValueType**, ArrayLayout, DeviceType>;
319 //using pair_type = std::pair<size_t, size_t>;
320
321 const size_t numVecs = isConstantStride ?
322 static_cast<size_t> (X.extent (1)) :
323 static_cast<size_t> (whichVecs.size ());
324 if (numVecs == 0) {
325 return; // nothing to do
326 }
327 RV normsOut (norms, numVecs);
328
329 Impl::lclNormImpl (normsOut, X, numVecs, whichVecs,
330 isConstantStride, whichNorm);
331 Impl::gblNormImpl (normsOut, comm, isDistributed, whichNorm);
332}
333
334} // namespace Details
335} // namespace Tpetra
336
337#endif // TPETRA_DETAILS_NORMIMPL_HPP
Implementation details of Tpetra.
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
EWhichNorm
Input argument for normImpl() (which see).
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.