Tpetra parallel linear algebra Version of the Day
Tpetra_idot.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_DETAILS_IDOT_HPP
43#define TPETRA_DETAILS_IDOT_HPP
44
60
61#include "Tpetra_iallreduce.hpp"
62#include "Tpetra_MultiVector.hpp"
63#include "Tpetra_Vector.hpp"
64#include "Teuchos_CommHelpers.hpp"
65#include "KokkosBlas1_dot.hpp"
66#include <stdexcept>
67#include <sstream>
68
69namespace Tpetra {
70namespace Details {
71
72// Temporary helper to get read-only view from multivector in requested space.
73// TODO: when https://github.com/kokkos/kokkos/issues/3850 is resolved,
74// remove this and just use templated getLocalView<Device>(ReadOnly).
75template<typename MV>
76struct GetReadOnly
77{
78 using DevView = typename MV::dual_view_type::t_dev::const_type;
79 using HostView = typename MV::dual_view_type::t_host::const_type;
80
81 template<typename exec_space>
82 static DevView get(const MV& x, typename std::enable_if<std::is_same<exec_space, typename MV::execution_space>::value>::type* = nullptr)
83 {
84 return x.getLocalViewDevice(Tpetra::Access::ReadOnly);
85 }
86
87 template<typename exec_space>
88 static HostView get(const MV& x, typename std::enable_if<!std::is_same<exec_space, typename MV::execution_space>::value>::type* = nullptr)
89 {
90 return x.getLocalViewHost(Tpetra::Access::ReadOnly);
91 }
92};
93
95
96template<class MV, class ResultView, bool runOnDevice>
97void idotLocal(const ResultView& localResult,
98 const MV& X,
99 const MV& Y)
100{
101 using pair_type = Kokkos::pair<size_t, size_t>;
102 using exec_space = typename std::conditional<runOnDevice, typename MV::execution_space, Kokkos::DefaultHostExecutionSpace>::type;
103 //if the execution space can access localResult, use it directly. Otherwise need to make a copy.
104 static_assert(Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible,
105 "idotLocal: Execution space must be able to access localResult");
106 //If Dot executes on Serial, it requires the result to be HostSpace. If localResult is CudaUVMSpace,
107 //we can just treat it like HostSpace.
108 Kokkos::View<typename ResultView::data_type, typename exec_space::memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
109 localResultUnmanaged(localResult.data(), localResult.extent(0));
110 const size_t numRows = X.getLocalLength ();
111 const pair_type rowRange (0, numRows);
112 const size_t X_numVecs = X.getNumVectors ();
113 const size_t Y_numVecs = Y.getNumVectors ();
114 const size_t numVecs = X_numVecs > Y_numVecs ? X_numVecs : Y_numVecs;
115 // Check compatibility of number of columns; allow special cases of
116 // a multivector dot a single vector, or vice versa.
117 if (X_numVecs != Y_numVecs &&
118 X_numVecs != size_t (1) &&
119 Y_numVecs != size_t (1)) {
120 std::ostringstream os;
121 os << "Tpetra::idot: X.getNumVectors() = " << X_numVecs
122 << " != Y.getNumVectors() = " << Y_numVecs
123 << ", but neither is 1.";
124 throw std::invalid_argument (os.str ());
125 }
126 auto X_lcl = GetReadOnly<MV>::template get<exec_space>(X);
127 auto Y_lcl = GetReadOnly<MV>::template get<exec_space>(Y);
128 //Can the multivector dot kernel be used?
129 bool useMVDot = X.isConstantStride() && Y.isConstantStride() && X_numVecs == Y_numVecs;
130 if(useMVDot)
131 {
132 if (numVecs == size_t (1)) {
133 auto X_lcl_1d = Kokkos::subview (X_lcl, rowRange, 0);
134 auto Y_lcl_1d = Kokkos::subview (Y_lcl, rowRange, 0);
135 auto result_0d = Kokkos::subview (localResultUnmanaged, 0);
136 KokkosBlas::dot (result_0d, X_lcl_1d, Y_lcl_1d);
137 }
138 else {
139 auto X_lcl_2d = Kokkos::subview (X_lcl, rowRange, pair_type (0, X_numVecs));
140 auto Y_lcl_2d = Kokkos::subview (Y_lcl, rowRange, pair_type (0, Y_numVecs));
141 KokkosBlas::dot (localResultUnmanaged, X_lcl_2d, Y_lcl_2d);
142 }
143 }
144 else
145 {
146 auto XWhichVectors = Tpetra::getMultiVectorWhichVectors(X);
147 auto YWhichVectors = Tpetra::getMultiVectorWhichVectors(Y);
148 //Need to compute each dot individually, using 1D subviews of X_lcl and Y_lcl
149 for(size_t vec = 0; vec < numVecs; vec++) {
150 //Get the Vectors for each column of X and Y that will be dotted together.
151 //Note: "which vectors" is not populated for constant stride MVs (but it's the identity mapping)
152 size_t Xj = (numVecs == X_numVecs) ? vec : 0;
153 Xj = X.isConstantStride() ? Xj : XWhichVectors[Xj];
154 size_t Yj = (numVecs == Y_numVecs) ? vec : 0;
155 Yj = Y.isConstantStride() ? Yj : YWhichVectors[Yj];
156 auto Xcol = Kokkos::subview(X_lcl, rowRange, Xj);
157 auto Ycol = Kokkos::subview(Y_lcl, rowRange, Yj);
158 //Compute the rank-1 dot product, and place the result in an element of localResult
159 KokkosBlas::dot(Kokkos::subview(localResultUnmanaged, vec), Xcol, Ycol);
160 }
161 }
162}
163
164//Helper to avoid extra instantiations of KokkosBlas::dot and iallreduce.
165template<typename MV, typename ResultView>
166struct IdotHelper
167{
168 using dot_type = typename MV::dot_type;
169
170 //First version: use result directly
171 template<typename exec_space>
172 static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
173 const ResultView& globalResult, const MV& X, const MV& Y,
174 typename std::enable_if<Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* = nullptr)
175 {
176 constexpr bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
177 idotLocal<MV, ResultView, runOnDevice>(globalResult, X, Y);
178 //Fence because we're giving result of device kernel to MPI
179 if(runOnDevice)
180 exec_space().fence();
181 auto comm = X.getMap()->getComm();
182 return iallreduce(globalResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
183 }
184
185 //Second version: use a temporary local result, because exec_space can't write to globalResult
186 template<typename exec_space>
187 static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
188 const ResultView& globalResult, const MV& X, const MV& Y,
189 typename std::enable_if<!Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* = nullptr)
190 {
191 constexpr bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
192 Kokkos::View<dot_type*, typename exec_space::memory_space> localResult(Kokkos::ViewAllocateWithoutInitializing("idot:localResult"), X.getNumVectors());
193 idotLocal<MV, decltype(localResult), runOnDevice>(localResult, X, Y);
194 //Fence because we're giving result of device kernel to MPI
195 if(runOnDevice)
196 exec_space().fence();
197 auto comm = X.getMap()->getComm();
198 return iallreduce(localResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
199 }
200};
201
204template<class MV, class ResultView>
205std::shared_ptr< ::Tpetra::Details::CommRequest>
206idotImpl(const ResultView& globalResult,
207 const MV& X,
208 const MV& Y)
209{
210 static_assert(std::is_same<typename ResultView::non_const_value_type, typename MV::dot_type>::value,
211 "Tpetra::idot: result view's element type must match MV::dot_type");
212
213 // Execution space to use for dot kernel(s) is whichever has access to up-to-date X.
214 if(X.need_sync_device())
215 {
216 //run on host.
217 return IdotHelper<MV, ResultView>::template run<Kokkos::DefaultHostExecutionSpace>(globalResult, X, Y);
218 }
219 else
220 {
221 //run on device.
222 return IdotHelper<MV, ResultView>::template run<typename MV::execution_space>(globalResult, X, Y);
223 }
224}
225} // namespace Details
226
227//
228// SKIP DOWN TO HERE
229//
230
283template<class SC, class LO, class GO, class NT>
284std::shared_ptr< ::Tpetra::Details::CommRequest>
285idot (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type* resultRaw,
286 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
287 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
288{
289 using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
290 const size_t X_numVecs = X.getNumVectors ();
291 const size_t Y_numVecs = Y.getNumVectors ();
292 const size_t numVecs = (X_numVecs > Y_numVecs) ? X_numVecs : Y_numVecs;
293 Kokkos::View<dot_type*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
294 resultView(resultRaw, numVecs);
295 return Details::idotImpl(resultView, X, Y);
296}
297
360template<class SC, class LO, class GO, class NT>
361std::shared_ptr< ::Tpetra::Details::CommRequest>
362idot (const Kokkos::View<typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type*,
363 typename ::Tpetra::MultiVector<SC, LO, GO, NT>::device_type>& result,
364 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
365 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
366{
367 return Details::idotImpl(result, X, Y);
368}
369
411template<class SC, class LO, class GO, class NT>
412std::shared_ptr< ::Tpetra::Details::CommRequest>
413idot (const Kokkos::View<typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type,
414 typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type>& result,
415 const ::Tpetra::Vector<SC, LO, GO, NT>& X,
416 const ::Tpetra::Vector<SC, LO, GO, NT>& Y)
417{
418 using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
419 using result_device_t = typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type;
420 Kokkos::View<dot_type*, result_device_t, Kokkos::MemoryTraits<Kokkos::Unmanaged>> result1D(result.data(), 1);
421 return Details::idotImpl(result1D, X, Y);
422}
423
424} // namespace Tpetra
425
426#endif // TPETRA_DETAILS_IDOT_HPP
Declaration of Tpetra::iallreduce.
Implementation details of Tpetra.
std::shared_ptr< ::Tpetra::Details::CommRequest > idotImpl(const ResultView &globalResult, const MV &X, const MV &Y)
Internal (common) version of idot, a global dot product that uses a non-blocking MPI reduction.
void idotLocal(const ResultView &localResult, const MV &X, const MV &Y)
Compute dot product locally. Where the kernel runs controlled by runOnDevice.
Definition: Tpetra_idot.hpp:97
Namespace Tpetra contains the class and methods constituting the Tpetra library.
std::shared_ptr< ::Tpetra::Details::CommRequest > idot(typename ::Tpetra::MultiVector< SC, LO, GO, NT >::dot_type *resultRaw, const ::Tpetra::MultiVector< SC, LO, GO, NT > &X, const ::Tpetra::MultiVector< SC, LO, GO, NT > &Y)
Nonblocking dot product, with either Tpetra::MultiVector or Tpetra::Vector inputs,...