Tpetra parallel linear algebra Version of the Day
Tpetra_Details_Merge.hpp
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_DETAILS_MERGE_HPP
43#define TPETRA_DETAILS_MERGE_HPP
44
45#include "TpetraCore_config.h"
46#include "Teuchos_TestForException.hpp"
47#include <algorithm> // std::sort
48#include <utility> // std::pair, std::make_pair
49#include <stdexcept>
50
51namespace Tpetra {
52namespace Details {
53
72template<class OrdinalType, class IndexType>
73IndexType
74countMergeUnsortedIndices (const OrdinalType curInds[],
75 const IndexType numCurInds,
76 const OrdinalType inputInds[],
77 const IndexType numInputInds)
78{
79 IndexType mergeCount = 0;
80
81 if (numCurInds <= numInputInds) {
82 // More input than current entries, so iterate linearly over
83 // input and scan current entries repeatedly.
84 for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
85 const OrdinalType inVal = inputInds[inPos];
86 for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
87 if (curInds[curPos] == inVal) {
88 ++mergeCount;
89 }
90 }
91 }
92 }
93 else { // numCurInds > numInputInds
94 // More current entries than input, so iterate linearly over
95 // current entries and scan input repeatedly.
96 for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
97 const OrdinalType curVal = curInds[curPos];
98 for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
99 if (inputInds[inPos] == curVal) {
100 ++mergeCount;
101 }
102 }
103 }
104 }
105
106#ifdef HAVE_TPETRA_DEBUG
107 TEUCHOS_TEST_FOR_EXCEPTION
108 (mergeCount > numInputInds, std::logic_error, "mergeCount = " <<
109 mergeCount << " > numInputInds = " << numInputInds << ".");
110#endif // HAVE_TPETRA_DEBUG
111 return mergeCount;
112}
113
131template<class OrdinalType, class IndexType>
132IndexType
133countMergeSortedIndices (const OrdinalType curInds[],
134 const IndexType numCurInds,
135 const OrdinalType inputInds[],
136 const IndexType numInputInds)
137{
138 // Only count possible merges; don't merge yet. If the row
139 // doesn't have enough space, we want to return without side
140 // effects.
141 IndexType curPos = 0;
142 IndexType inPos = 0;
143 IndexType mergeCount = 0;
144 while (inPos < numInputInds && curPos < numCurInds) {
145 const OrdinalType inVal = inputInds[inPos];
146 const OrdinalType curVal = curInds[curPos];
147
148 if (curVal == inVal) { // can merge
149 ++mergeCount;
150 ++inPos; // go on to next input
151 } else if (curVal < inVal) {
152 ++curPos; // go on to next row entry
153 } else { // curVal > inVal
154 ++inPos; // can't merge it ever, since row entries sorted
155 }
156 }
157
158#ifdef HAVE_TPETRA_DEBUG
159 TEUCHOS_TEST_FOR_EXCEPTION
160 (inPos > numInputInds, std::logic_error, "inPos = " << inPos <<
161 " > numInputInds = " << numInputInds << ".");
162 TEUCHOS_TEST_FOR_EXCEPTION
163 (curPos > numCurInds, std::logic_error, "curPos = " << curPos <<
164 " > numCurInds = " << numCurInds << ".");
165 TEUCHOS_TEST_FOR_EXCEPTION
166 (mergeCount > numInputInds, std::logic_error, "mergeCount = " <<
167 mergeCount << " > numInputInds = " << numInputInds << ".");
168#endif // HAVE_TPETRA_DEBUG
169
170 // At this point, 2 situations are possible:
171 //
172 // 1. inPos == numInputInds: We looked at all inputs. Some
173 // (mergeCount of them) could have been merged.
174 // 2. inPos < numInputInds: We didn't get to look at all inputs.
175 // Since the inputs are sorted, we know that those inputs we
176 // didn't examine weren't mergeable.
177 //
178 // Either way, mergeCount gives the number of mergeable inputs.
179 return mergeCount;
180}
181
182
203template<class OrdinalType, class IndexType>
204std::pair<bool, IndexType>
205mergeSortedIndices (OrdinalType curInds[],
206 const IndexType midPos,
207 const IndexType endPos,
208 const OrdinalType inputInds[],
209 const IndexType numInputInds)
210{
211 // Optimize for the following cases, in decreasing order of
212 // optimization concern:
213 //
214 // a. Current row has allocated space but no entries
215 // b. All input indices already in the graph
216 //
217 // If the row has insufficient space for a merge, don't do
218 // anything! Just return an upper bound on the number of extra
219 // entries required to fit everything. This imposes extra cost,
220 // but correctly supports the count, allocate, fill, compute
221 // pattern. (If some entries were merged in and others weren't,
222 // how would you know which got merged in? CrsGraph insert is
223 // idempotent, but CrsMatrix insert does a += on the value and
224 // is therefore not idempotent.)
225 if (midPos == 0) {
226 // Current row has no entries, but may have preallocated space.
227 if (endPos >= numInputInds) {
228 // Sufficient space for new entries; copy directly.
229 for (IndexType k = 0; k < numInputInds; ++k) {
230 curInds[k] = inputInds[k];
231 }
232 std::sort (curInds, curInds + numInputInds);
233 return std::make_pair (true, numInputInds);
234 }
235 else { // not enough space
236 return std::make_pair (false, numInputInds);
237 }
238 }
239 else { // current row contains indices, requiring merge
240 // Only count possible merges; don't merge yet. If the row
241 // doesn't have enough space, we want to return without side
242 // effects.
243 const IndexType mergeCount =
244 countMergeSortedIndices<OrdinalType, IndexType> (curInds, midPos,
245 inputInds,
246 numInputInds);
247 const IndexType extraSpaceNeeded = numInputInds - mergeCount;
248 const IndexType newRowLen = midPos + extraSpaceNeeded;
249 if (newRowLen > endPos) {
250 return std::make_pair (false, newRowLen);
251 }
252 else { // we have enough space; merge in
253 IndexType curPos = 0;
254 IndexType inPos = 0;
255 IndexType newPos = midPos;
256 while (inPos < numInputInds && curPos < midPos) {
257 const OrdinalType inVal = inputInds[inPos];
258 const OrdinalType curVal = curInds[curPos];
259
260 if (curVal == inVal) { // can merge
261 ++inPos; // merge and go on to next input
262 } else if (curVal < inVal) {
263 ++curPos; // go on to next row entry
264 } else { // curVal > inVal
265 // The input doesn't exist in the row.
266 // Copy it to the end; we'll sort it in later.
267 curInds[newPos] = inVal;
268 ++newPos;
269 ++inPos; // move on to next input
270 }
271 }
272
273 // If any inputs remain, and the current row has space for them,
274 // then copy them in. We'll sort them later.
275 for (; inPos < numInputInds && newPos < newRowLen; ++inPos, ++newPos) {
276 curInds[newPos] = inputInds[inPos];
277 }
278
279#ifdef HAVE_TPETRA_DEBUG
280 TEUCHOS_TEST_FOR_EXCEPTION
281 (newPos != newRowLen, std::logic_error, "mergeSortedIndices: newPos = "
282 << newPos << " != newRowLen = " << newRowLen << " = " << midPos <<
283 " + " << extraSpaceNeeded << ". Please report this bug to the Tpetra "
284 "developers.");
285#endif // HAVE_TPETRA_DEBUG
286
287 if (newPos != midPos) { // new entries at end; sort them in
288 // FIXME (mfh 03 Jan 2016) Rather than sorting, it would
289 // be faster (linear time) just to iterate backwards
290 // through the current entries, pushing them over to make
291 // room for unmerged input. However, I'm not so worried
292 // about the asymptotics here, because dense rows in a
293 // sparse matrix are ungood anyway.
294 std::sort (curInds, curInds + newPos);
295 }
296 return std::make_pair (true, newPos);
297 }
298 }
299}
300
301
323template<class OrdinalType, class IndexType>
324std::pair<bool, IndexType>
325mergeUnsortedIndices (OrdinalType curInds[],
326 const IndexType midPos,
327 const IndexType endPos,
328 const OrdinalType inputInds[],
329 const IndexType numInputInds)
330{
331 // Optimize for the following cases, in decreasing order of
332 // optimization concern:
333 //
334 // a. Current row has allocated space but no entries
335 // b. All input indices already in the graph
336 //
337 // If the row has insufficient space for a merge, don't do
338 // anything! Just return an upper bound on the number of extra
339 // entries required to fit everything. This imposes extra cost,
340 // but correctly supports the count, allocate, fill, compute
341 // pattern. (If some entries were merged in and others weren't,
342 // how would you know which got merged in? CrsGraph insert is
343 // idempotent, but CrsMatrix insert does a += on the value and
344 // is therefore not idempotent.)
345 if (midPos == 0) {
346 // Current row has no entries, but may have preallocated space.
347 if (endPos >= numInputInds) {
348 // Sufficient space for new entries; copy directly.
349 for (IndexType k = 0; k < numInputInds; ++k) {
350 curInds[k] = inputInds[k];
351 }
352 return std::make_pair (true, numInputInds);
353 }
354 else { // not enough space
355 return std::make_pair (false, numInputInds);
356 }
357 }
358 else { // current row contains indices, requiring merge
359 // Only count possible merges; don't merge yet. If the row
360 // doesn't have enough space, we want to return without side
361 // effects.
362 const IndexType mergeCount =
363 countMergeUnsortedIndices<OrdinalType, IndexType> (curInds, midPos,
364 inputInds,
365 numInputInds);
366 const IndexType extraSpaceNeeded = numInputInds - mergeCount;
367 const IndexType newRowLen = midPos + extraSpaceNeeded;
368 if (newRowLen > endPos) {
369 return std::make_pair (false, newRowLen);
370 }
371 else { // we have enough space; merge in
372 // Iterate linearly over input. Scan current entries
373 // repeatedly. Add new entries at end.
374 IndexType newPos = midPos;
375 for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
376 const OrdinalType inVal = inputInds[inPos];
377 bool merged = false;
378 for (IndexType curPos = 0; curPos < midPos; ++curPos) {
379 if (curInds[curPos] == inVal) {
380 merged = true;
381 }
382 }
383 if (! merged) {
384 curInds[newPos] = inVal;
385 ++newPos;
386 }
387 }
388 return std::make_pair (true, newPos);
389 }
390 }
391}
392
417template<class OrdinalType, class ValueType, class IndexType>
418std::pair<bool, IndexType>
419mergeUnsortedIndicesAndValues (OrdinalType curInds[],
420 ValueType curVals[],
421 const IndexType midPos,
422 const IndexType endPos,
423 const OrdinalType inputInds[],
424 const ValueType inputVals[],
425 const IndexType numInputInds)
426{
427 // Optimize for the following cases, in decreasing order of
428 // optimization concern:
429 //
430 // a. Current row has allocated space but no entries
431 // b. All input indices already in the graph
432 //
433 // If the row has insufficient space for a merge, don't do
434 // anything! Just return an upper bound on the number of extra
435 // entries required to fit everything. This imposes extra cost,
436 // but correctly supports the count, allocate, fill, compute
437 // pattern. (If some entries were merged in and others weren't,
438 // how would you know which got merged in? CrsGraph insert is
439 // idempotent, but CrsMatrix insert does a += on the value and
440 // is therefore not idempotent.)
441 if (midPos == 0) {
442 // Current row has no entries, but may have preallocated space.
443 if (endPos >= numInputInds) {
444 // Sufficient space for new entries; copy directly.
445 for (IndexType k = 0; k < numInputInds; ++k) {
446 curInds[k] = inputInds[k];
447 curVals[k] = inputVals[k];
448 }
449 return std::make_pair (true, numInputInds);
450 }
451 else { // not enough space
452 return std::make_pair (false, numInputInds);
453 }
454 }
455 else { // current row contains indices, requiring merge
456 // Only count possible merges; don't merge yet. If the row
457 // doesn't have enough space, we want to return without side
458 // effects.
459 const IndexType mergeCount =
460 countMergeUnsortedIndices<OrdinalType, IndexType> (curInds, midPos,
461 inputInds,
462 numInputInds);
463 const IndexType extraSpaceNeeded = numInputInds - mergeCount;
464 const IndexType newRowLen = midPos + extraSpaceNeeded;
465 if (newRowLen > endPos) {
466 return std::make_pair (false, newRowLen);
467 }
468 else { // we have enough space; merge in
469 // Iterate linearly over input. Scan current entries
470 // repeatedly. Add new entries at end.
471 IndexType newPos = midPos;
472 for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
473 const OrdinalType inInd = inputInds[inPos];
474 bool merged = false;
475 for (IndexType curPos = 0; curPos < midPos; ++curPos) {
476 if (curInds[curPos] == inInd) {
477 merged = true;
478 curVals[curPos] += inputVals[inPos];
479 }
480 }
481 if (! merged) {
482 curInds[newPos] = inInd;
483 curVals[newPos] = inputVals[inPos];
484 ++newPos;
485 }
486 }
487 return std::make_pair (true, newPos);
488 }
489 }
490}
491
492} // namespace Details
493} // namespace Tpetra
494
495#endif // TPETRA_DETAILS_MERGE_HPP
Implementation details of Tpetra.
std::pair< bool, IndexType > mergeSortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row's column indices, assuming that both the curr...
IndexType countMergeSortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
std::pair< bool, IndexType > mergeUnsortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row's column indices, assuming that both the curr...
IndexType countMergeUnsortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
std::pair< bool, IndexType > mergeUnsortedIndicesAndValues(OrdinalType curInds[], ValueType curVals[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const ValueType inputVals[], const IndexType numInputInds)
Attempt to merge the input indices and values into the current row's column indices and corresponding...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.