Teuchos - Trilinos Tools Package Version of the Day
Teuchos_DefaultMpiComm.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Teuchos: Common Tools Package
5// Copyright (2004) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef TEUCHOS_MPI_COMM_HPP
43#define TEUCHOS_MPI_COMM_HPP
44
49
51
52// If MPI is not enabled, disable the contents of this file.
53#ifdef HAVE_TEUCHOS_MPI
54
55#include "Teuchos_Comm.hpp"
56#include "Teuchos_CommUtilities.hpp"
58#include "Teuchos_OpaqueWrapper.hpp"
60#include "Teuchos_SerializationTraitsHelpers.hpp"
61#include "Teuchos_Workspace.hpp"
63#include "Teuchos_as.hpp"
64#include "Teuchos_Assert.hpp"
65#include <mpi.h>
66#include <iterator>
67
68// This must be defined globally for the whole program!
69//#define TEUCHOS_MPI_COMM_DUMP
70
71#ifdef TEUCHOS_MPI_COMM_DUMP
72# include "Teuchos_VerboseObject.hpp"
73#endif
74
75namespace Teuchos {
76
78std::string
79mpiErrorCodeToString (const int err);
80
81namespace details {
95 void safeCommFree (MPI_Comm* comm);
96
101 int setCommErrhandler (MPI_Comm comm, MPI_Errhandler handler);
102
103} // namespace details
104
105#ifdef TEUCHOS_MPI_COMM_DUMP
106template<typename Ordinal, typename T>
107void dumpBuffer(
108 const std::string &funcName, const std::string &buffName
109 ,const Ordinal bytes, const T buff[]
110 )
111{
114 Teuchos::OSTab tab(out);
115 *out
116 << "\n" << funcName << "::" << buffName << ":\n";
117 tab.incrTab();
118 for( Ordinal i = 0; i < bytes; ++i ) {
119 *out << buffName << "[" << i << "] = '" << buff[i] << "'\n";
120 }
121 *out << "\n";
122}
123#endif // TEUCHOS_MPI_COMM_DUMP
124
136template<class OrdinalType>
137class MpiCommStatus : public CommStatus<OrdinalType> {
138public:
139 MpiCommStatus (MPI_Status status) : status_ (status) {}
140
142 virtual ~MpiCommStatus() {}
143
145 OrdinalType getSourceRank () { return status_.MPI_SOURCE; }
146
148 OrdinalType getTag () { return status_.MPI_TAG; }
149
151 OrdinalType getError () { return status_.MPI_ERROR; }
152
153private:
155 MpiCommStatus ();
156
158 MPI_Status status_;
159};
160
164template<class OrdinalType>
165inline RCP<MpiCommStatus<OrdinalType> >
166mpiCommStatus (MPI_Status rawMpiStatus)
167{
168 return rcp (new MpiCommStatus<OrdinalType> (rawMpiStatus));
169}
170
186template<class OrdinalType>
187class MpiCommRequestBase : public CommRequest<OrdinalType> {
188public:
190 MpiCommRequestBase () :
191 rawMpiRequest_ (MPI_REQUEST_NULL)
192 {}
193
195 MpiCommRequestBase (MPI_Request rawMpiRequest) :
196 rawMpiRequest_ (rawMpiRequest)
197 {}
198
206 MPI_Request releaseRawMpiRequest()
207 {
208 MPI_Request tmp_rawMpiRequest = rawMpiRequest_;
209 rawMpiRequest_ = MPI_REQUEST_NULL;
210 return tmp_rawMpiRequest;
211 }
212
214 bool isNull() const {
215 return rawMpiRequest_ == MPI_REQUEST_NULL;
216 }
217
218 bool isReady() {
219 MPI_Status rawMpiStatus;
220 int flag = 0;
221
222 MPI_Test(&rawMpiRequest_, &flag, &rawMpiStatus);
223
224 return (flag != 0);
225 }
226
232 RCP<CommStatus<OrdinalType> > wait () {
233 MPI_Status rawMpiStatus;
234 // Whether this function satisfies the strong exception guarantee
235 // depends on whether MPI_Wait modifies its input request on error.
236 const int err = MPI_Wait (&rawMpiRequest_, &rawMpiStatus);
238 err != MPI_SUCCESS, std::runtime_error,
239 "Teuchos: MPI_Wait() failed with error \""
240 << mpiErrorCodeToString (err));
241 // MPI_Wait sets the MPI_Request to MPI_REQUEST_NULL on success.
242 return mpiCommStatus<OrdinalType> (rawMpiStatus);
243 }
244
249 RCP<CommStatus<OrdinalType> > cancel () {
250 if (rawMpiRequest_ == MPI_REQUEST_NULL) {
251 return null;
252 }
253 else {
254 int err = MPI_Cancel (&rawMpiRequest_);
256 err != MPI_SUCCESS, std::runtime_error,
257 "Teuchos: MPI_Cancel failed with the following error: "
258 << mpiErrorCodeToString (err));
259
260 // Wait on the request. If successful, MPI_Wait will set the
261 // MPI_Request to MPI_REQUEST_NULL. The returned status may
262 // still be useful; for example, one may call MPI_Test_cancelled
263 // to test an MPI_Status from a nonblocking send.
264 MPI_Status status;
265 err = MPI_Wait (&rawMpiRequest_, &status);
266 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
267 "Teuchos::MpiCommStatus::cancel: MPI_Wait failed with the following "
268 "error: " << mpiErrorCodeToString (err));
269 return mpiCommStatus<OrdinalType> (status);
270 }
271 }
272
274 virtual ~MpiCommRequestBase () {
275 if (rawMpiRequest_ != MPI_REQUEST_NULL) {
276 // We're in a destructor, so don't throw errors. However, if
277 // MPI_Cancel fails, it's probably a bad idea to call MPI_Wait.
278 const int err = MPI_Cancel (&rawMpiRequest_);
279 if (err == MPI_SUCCESS) {
280 // The MPI_Cancel succeeded. Now wait on the request. Ignore
281 // any reported error, since we can't do anything about those
282 // in the destructor (other than kill the program). If
283 // successful, MPI_Wait will set the MPI_Request to
284 // MPI_REQUEST_NULL. We ignore the returned MPI_Status, since
285 // if the user let the request fall out of scope, she must not
286 // care about the status.
287 //
288 // mfh 21 Oct 2012: The MPI standard requires completing a
289 // canceled request by calling a function like MPI_Wait,
290 // MPI_Test, or MPI_Request_free. MPI_Wait on a canceled
291 // request behaves like a local operation (it does not
292 // communicate or block waiting for communication). One could
293 // also call MPI_Request_free instead of MPI_Wait, but
294 // MPI_Request_free is intended more for persistent requests
295 // (created with functions like MPI_Recv_init).
296 (void) MPI_Wait (&rawMpiRequest_, MPI_STATUS_IGNORE);
297 }
298 }
299 }
300
301private:
303 MPI_Request rawMpiRequest_;
304};
305
321template<class OrdinalType>
322class MpiCommRequest : public MpiCommRequestBase<OrdinalType> {
323public:
325 MpiCommRequest () :
326 MpiCommRequestBase<OrdinalType> (MPI_REQUEST_NULL),
327 numBytes_ (0)
328 {}
329
331 MpiCommRequest (MPI_Request rawMpiRequest,
332 const ArrayView<char>::size_type numBytesInMessage) :
333 MpiCommRequestBase<OrdinalType> (rawMpiRequest),
334 numBytes_ (numBytesInMessage)
335 {}
336
342 ArrayView<char>::size_type numBytes () const {
343 return numBytes_;
344 }
345
347 virtual ~MpiCommRequest () {}
348
349private:
352};
353
362template<class OrdinalType>
363inline RCP<MpiCommRequest<OrdinalType> >
364mpiCommRequest (MPI_Request rawMpiRequest,
365 const ArrayView<char>::size_type numBytes)
366{
367 return rcp (new MpiCommRequest<OrdinalType> (rawMpiRequest, numBytes));
368}
369
385template<typename Ordinal>
386class MpiComm : public Comm<Ordinal> {
387public:
389
390
411 explicit MpiComm (MPI_Comm rawMpiComm);
412
427 MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm);
428
446 MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm,
447 const int defaultTag);
448
465 MpiComm (const MpiComm<Ordinal>& other);
466
468 RCP<const OpaqueWrapper<MPI_Comm> > getRawMpiComm () const {
469 return rawMpiComm_;
470 }
471
536 void setErrorHandler (const RCP<const OpaqueWrapper<MPI_Errhandler> >& errHandler);
537
539
541
543 virtual int getRank() const;
544
546 virtual int getSize() const;
547
549 virtual void barrier() const;
550
552 virtual void broadcast(
553 const int rootRank, const Ordinal bytes, char buffer[]
554 ) const;
555
557 virtual void
558 gather (const Ordinal sendBytes, const char sendBuffer[],
559 const Ordinal recvBytes, char recvBuffer[],
560 const int root) const;
562 virtual void gatherAll(
563 const Ordinal sendBytes, const char sendBuffer[]
564 ,const Ordinal recvBytes, char recvBuffer[]
565 ) const;
567 virtual void reduceAll(
568 const ValueTypeReductionOp<Ordinal,char> &reductOp
569 ,const Ordinal bytes, const char sendBuffer[], char globalReducts[]
570 ) const;
572 virtual void scan(
573 const ValueTypeReductionOp<Ordinal,char> &reductOp
574 ,const Ordinal bytes, const char sendBuffer[], char scanReducts[]
575 ) const;
577 virtual void send(
578 const Ordinal bytes, const char sendBuffer[], const int destRank
579 ) const;
581 virtual void
582 send (const Ordinal bytes,
583 const char sendBuffer[],
584 const int destRank,
585 const int tag) const;
587 virtual void ssend(
588 const Ordinal bytes, const char sendBuffer[], const int destRank
589 ) const;
591 virtual void
592 ssend (const Ordinal bytes,
593 const char sendBuffer[],
594 const int destRank,
595 const int tag) const;
597 virtual int receive(
598 const int sourceRank, const Ordinal bytes, char recvBuffer[]
599 ) const;
601 virtual void readySend(
602 const ArrayView<const char> &sendBuffer,
603 const int destRank
604 ) const;
606 virtual void
607 readySend (const Ordinal bytes,
608 const char sendBuffer[],
609 const int destRank,
610 const int tag) const;
612 virtual RCP<CommRequest<Ordinal> > isend(
613 const ArrayView<const char> &sendBuffer,
614 const int destRank
615 ) const;
617 virtual RCP<CommRequest<Ordinal> >
618 isend (const ArrayView<const char> &sendBuffer,
619 const int destRank,
620 const int tag) const;
622 virtual RCP<CommRequest<Ordinal> > ireceive(
623 const ArrayView<char> &Buffer,
624 const int sourceRank
625 ) const;
627 virtual RCP<CommRequest<Ordinal> >
628 ireceive (const ArrayView<char> &Buffer,
629 const int sourceRank,
630 const int tag) const;
632 virtual void waitAll(
633 const ArrayView<RCP<CommRequest<Ordinal> > > &requests
634 ) const;
636 virtual void
637 waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
638 const ArrayView<RCP<CommStatus<Ordinal> > >& statuses) const;
640 virtual RCP<CommStatus<Ordinal> >
641 wait (const Ptr<RCP<CommRequest<Ordinal> > >& request) const;
643 virtual RCP< Comm<Ordinal> > duplicate() const;
645 virtual RCP< Comm<Ordinal> > split(const int color, const int key) const;
647 virtual RCP< Comm<Ordinal> > createSubcommunicator(
648 const ArrayView<const int>& ranks) const;
649
651
653
655 std::string description() const;
656
658
659 // These should be private but the PGI compiler requires them be public
660
661 static int const minTag_ = 26000; // These came from Teuchos::MpiComm???
662 static int const maxTag_ = 26099; // ""
663
669 int getTag () const { return tag_; }
670
671private:
672
676 void setupMembersFromComm();
677 static int tagCounter_;
678
686 RCP<const OpaqueWrapper<MPI_Comm> > rawMpiComm_;
687
689 int rank_;
690
692 int size_;
693
701 int tag_;
702
704 RCP<const OpaqueWrapper<MPI_Errhandler> > customErrorHandler_;
705
706 void assertRank(const int rank, const std::string &rankName) const;
707
708 // Not defined and not to be called!
709 MpiComm();
710
711#ifdef TEUCHOS_MPI_COMM_DUMP
712public:
713 static bool show_dump;
714#endif // TEUCHOS_MPI_COMM_DUMP
715
716};
717
718
732template<typename Ordinal>
733RCP<MpiComm<Ordinal> >
734createMpiComm(
735 const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm
736 );
737
738
752template<typename Ordinal>
753RCP<MpiComm<Ordinal> >
754createMpiComm(
755 const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm,
756 const int defaultTag
757 );
758
759
787template<typename Ordinal>
788MPI_Comm
789getRawMpiComm(const Comm<Ordinal> &comm);
790
791
792// ////////////////////////
793// Implementations
794
795
796// Static members
797
798
799template<typename Ordinal>
800int MpiComm<Ordinal>::tagCounter_ = MpiComm<Ordinal>::minTag_;
801
802
803// Constructors
804
805
806template<typename Ordinal>
807MpiComm<Ordinal>::
808MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm)
809{
811 rawMpiComm.get () == NULL, std::invalid_argument,
812 "Teuchos::MpiComm constructor: The input RCP is null.");
814 *rawMpiComm == MPI_COMM_NULL, std::invalid_argument,
815 "Teuchos::MpiComm constructor: The given MPI_Comm is MPI_COMM_NULL.");
816
817 rawMpiComm_ = rawMpiComm;
818
819 // mfh 09 Jul 2013: Please resist the temptation to modify the given
820 // MPI communicator's error handler here. See Bug 5943. Note that
821 // an MPI communicator's default error handler is
822 // MPI_ERRORS_ARE_FATAL, which immediately aborts on error (without
823 // returning an error code from the MPI function). Users who want
824 // MPI functions instead to return an error code if they encounter
825 // an error, should set the error handler to MPI_ERRORS_RETURN. DO
826 // NOT SET THE ERROR HANDLER HERE!!! Teuchos' MPI wrappers should
827 // always check the error code returned by an MPI function,
828 // regardless of the error handler. Users who want to set the error
829 // handler on an MpiComm may call its setErrorHandler method.
830
831 setupMembersFromComm ();
832}
833
834
835template<typename Ordinal>
836MpiComm<Ordinal>::
837MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm,
838 const int defaultTag)
839{
841 rawMpiComm.get () == NULL, std::invalid_argument,
842 "Teuchos::MpiComm constructor: The input RCP is null.");
844 *rawMpiComm == MPI_COMM_NULL, std::invalid_argument,
845 "Teuchos::MpiComm constructor: The given MPI_Comm is MPI_COMM_NULL.");
846
847 rawMpiComm_ = rawMpiComm;
848 // Set size_ (the number of processes in the communicator).
849 int err = MPI_Comm_size (*rawMpiComm_, &size_);
850 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
851 "Teuchos::MpiComm constructor: MPI_Comm_size failed with "
852 "error \"" << mpiErrorCodeToString (err) << "\".");
853 // Set rank_ (the calling process' rank).
854 err = MPI_Comm_rank (*rawMpiComm_, &rank_);
855 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
856 "Teuchos::MpiComm constructor: MPI_Comm_rank failed with "
857 "error \"" << mpiErrorCodeToString (err) << "\".");
858 tag_ = defaultTag; // set the default message tag
859}
860
861
862template<typename Ordinal>
863MpiComm<Ordinal>::MpiComm (MPI_Comm rawMpiComm)
864{
865 TEUCHOS_TEST_FOR_EXCEPTION(rawMpiComm == MPI_COMM_NULL,
866 std::invalid_argument, "Teuchos::MpiComm constructor: The given MPI_Comm "
867 "is MPI_COMM_NULL.");
868 // We don't supply a "free" function here, since this version of the
869 // constructor makes the caller responsible for freeing rawMpiComm
870 // after use if necessary.
871 rawMpiComm_ = opaqueWrapper<MPI_Comm> (rawMpiComm);
872
873 // mfh 09 Jul 2013: Please resist the temptation to modify the given
874 // MPI communicator's error handler here. See Bug 5943. Note that
875 // an MPI communicator's default error handler is
876 // MPI_ERRORS_ARE_FATAL, which immediately aborts on error (without
877 // returning an error code from the MPI function). Users who want
878 // MPI functions instead to return an error code if they encounter
879 // an error, should set the error handler to MPI_ERRORS_RETURN. DO
880 // NOT SET THE ERROR HANDLER HERE!!! Teuchos' MPI wrappers should
881 // always check the error code returned by an MPI function,
882 // regardless of the error handler. Users who want to set the error
883 // handler on an MpiComm may call its setErrorHandler method.
884
885 setupMembersFromComm ();
886}
887
888
889template<typename Ordinal>
890MpiComm<Ordinal>::MpiComm (const MpiComm<Ordinal>& other) :
891 rawMpiComm_ (opaqueWrapper<MPI_Comm> (MPI_COMM_NULL)) // <- This will be set below
892{
893 // These are logic errors, since they violate MpiComm's invariants.
894 RCP<const OpaqueWrapper<MPI_Comm> > origCommPtr = other.getRawMpiComm ();
895 TEUCHOS_TEST_FOR_EXCEPTION(origCommPtr == null, std::logic_error,
896 "Teuchos::MpiComm copy constructor: "
897 "The input's getRawMpiComm() method returns null.");
898 MPI_Comm origComm = *origCommPtr;
899 TEUCHOS_TEST_FOR_EXCEPTION(origComm == MPI_COMM_NULL, std::logic_error,
900 "Teuchos::MpiComm copy constructor: "
901 "The input's raw MPI_Comm is MPI_COMM_NULL.");
902
903 // mfh 19 Oct 2012: Don't change the behavior of MpiComm's copy
904 // constructor for now. Later, we'll switch to the version that
905 // calls MPI_Comm_dup. For now, we just copy other's handle over.
906 // Note that the new MpiComm's tag is still different than the input
907 // MpiComm's tag. See Bug 5740.
908 if (true) {
909 rawMpiComm_ = origCommPtr;
910 }
911 else { // false (not run)
912 MPI_Comm newComm;
913 const int err = MPI_Comm_dup (origComm, &newComm);
914 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
915 "Teuchos::MpiComm copy constructor: MPI_Comm_dup failed with "
916 "the following error: " << mpiErrorCodeToString (err));
917 // No side effects until after everything has succeeded.
918 rawMpiComm_ = opaqueWrapper (newComm, details::safeCommFree);
919 }
920
921 setupMembersFromComm ();
922}
923
924
925template<typename Ordinal>
926void MpiComm<Ordinal>::setupMembersFromComm ()
927{
928 int err = MPI_Comm_size (*rawMpiComm_, &size_);
929 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
930 "Teuchos::MpiComm constructor: MPI_Comm_size failed with "
931 "error \"" << mpiErrorCodeToString (err) << "\".");
932 err = MPI_Comm_rank (*rawMpiComm_, &rank_);
933 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
934 "Teuchos::MpiComm constructor: MPI_Comm_rank failed with "
935 "error \"" << mpiErrorCodeToString (err) << "\".");
936
937 // Set the default tag to make unique across all communicators
938 if (tagCounter_ > maxTag_) {
939 tagCounter_ = minTag_;
940 }
941 tag_ = tagCounter_++;
942 // Ensure that the same tag is used on all processes.
943 //
944 // FIXME (mfh 09 Jul 2013) This would not be necessary if MpiComm
945 // were just to call MPI_Comm_dup (as every library should) when
946 // given its communicator. Of course, MPI_Comm_dup may also be
947 // implemented as a collective, and may even be more expensive than
948 // a broadcast. If we do decide to use MPI_Comm_dup, we can get rid
949 // of the broadcast below, and also get rid of tag_, tagCounter_,
950 // minTag_, and maxTag_.
951 MPI_Bcast (&tag_, 1, MPI_INT, 0, *rawMpiComm_);
952}
953
954
955template<typename Ordinal>
956void
957MpiComm<Ordinal>::
958setErrorHandler (const RCP<const OpaqueWrapper<MPI_Errhandler> >& errHandler)
959{
960 if (! is_null (errHandler)) {
961 const int err = details::setCommErrhandler (*getRawMpiComm (), *errHandler);
962 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
963 "Teuchos::MpiComm: Setting the MPI_Comm's error handler failed with "
964 "error \"" << mpiErrorCodeToString (err) << "\".");
965 }
966 // Wait to set this until the end, in case setting the error handler
967 // doesn't succeed.
968 customErrorHandler_ = errHandler;
969}
970
971//
972// Overridden from Comm
973//
974
975template<typename Ordinal>
976int MpiComm<Ordinal>::getRank() const
977{
978 return rank_;
979}
980
981
982template<typename Ordinal>
983int MpiComm<Ordinal>::getSize() const
984{
985 return size_;
986}
987
988
989template<typename Ordinal>
990void MpiComm<Ordinal>::barrier() const
991{
992 TEUCHOS_COMM_TIME_MONITOR(
993 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::barrier()"
994 );
995 const int err = MPI_Barrier (*rawMpiComm_);
996 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
997 "Teuchos::MpiComm::barrier: MPI_Barrier failed with error \""
998 << mpiErrorCodeToString (err) << "\".");
999}
1000
1001
1002template<typename Ordinal>
1003void MpiComm<Ordinal>::broadcast(
1004 const int rootRank, const Ordinal bytes, char buffer[]
1005 ) const
1006{
1007 TEUCHOS_COMM_TIME_MONITOR(
1008 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::broadcast(...)"
1009 );
1010 const int err = MPI_Bcast (buffer, bytes, MPI_CHAR, rootRank, *rawMpiComm_);
1011 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1012 "Teuchos::MpiComm::broadcast: MPI_Bcast failed with error \""
1013 << mpiErrorCodeToString (err) << "\".");
1014}
1015
1016
1017template<typename Ordinal>
1018void MpiComm<Ordinal>::gatherAll(
1019 const Ordinal sendBytes, const char sendBuffer[],
1020 const Ordinal recvBytes, char recvBuffer[]
1021 ) const
1022{
1023 TEUCHOS_COMM_TIME_MONITOR(
1024 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::gatherAll(...)"
1025 );
1026 TEUCHOS_ASSERT_EQUALITY((sendBytes*size_), recvBytes );
1027 const int err =
1028 MPI_Allgather (const_cast<char *>(sendBuffer), sendBytes, MPI_CHAR,
1029 recvBuffer, sendBytes, MPI_CHAR, *rawMpiComm_);
1030 // NOTE: 'sendBytes' is being sent above for the MPI arg recvcount (which is
1031 // very confusing in the MPI documentation) for MPI_Allgether(...).
1032
1033 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1034 "Teuchos::MpiComm::gatherAll: MPI_Allgather failed with error \""
1035 << mpiErrorCodeToString (err) << "\".");
1036}
1037
1038
1039template<typename Ordinal>
1040void
1041MpiComm<Ordinal>::gather (const Ordinal sendBytes,
1042 const char sendBuffer[],
1043 const Ordinal recvBytes,
1044 char recvBuffer[],
1045 const int root) const
1046{
1047 (void) recvBytes; // silence compile warning for "unused parameter"
1048
1049 TEUCHOS_COMM_TIME_MONITOR(
1050 "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::gather(...)"
1051 );
1052 const int err =
1053 MPI_Gather (const_cast<char *> (sendBuffer), sendBytes, MPI_CHAR,
1054 recvBuffer, sendBytes, MPI_CHAR, root, *rawMpiComm_);
1055 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1056 "Teuchos::MpiComm::gather: MPI_Gather failed with error \""
1057 << mpiErrorCodeToString (err) << "\".");
1058}
1059
1060
1061template<typename Ordinal>
1062void
1063MpiComm<Ordinal>::
1064reduceAll (const ValueTypeReductionOp<Ordinal,char> &reductOp,
1065 const Ordinal bytes,
1066 const char sendBuffer[],
1067 char globalReducts[]) const
1068{
1069 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::reduceAll(...)" );
1070 int err = MPI_SUCCESS;
1071
1072 Details::MpiReductionOp<Ordinal> opWrap (reductOp);
1073 MPI_Op op = Details::setMpiReductionOp (opWrap);
1074
1075 // FIXME (mfh 23 Nov 2014) Ross decided to mash every type into
1076 // char. This can cause correctness issues if we're actually doing
1077 // a reduction over, say, double. Thus, he creates a custom
1078 // MPI_Datatype here that represents a contiguous block of char, so
1079 // that MPI doesn't split up the reduction type and thus do the sum
1080 // wrong. It's a hack but it works.
1081
1082 MPI_Datatype char_block;
1083 err = MPI_Type_contiguous (bytes, MPI_CHAR, &char_block);
1085 err != MPI_SUCCESS, std::runtime_error, "Teuchos::reduceAll: "
1086 "MPI_Type_contiguous failed with error \"" << mpiErrorCodeToString (err)
1087 << "\".");
1088 err = MPI_Type_commit (&char_block);
1090 err != MPI_SUCCESS, std::runtime_error, "Teuchos::reduceAll: "
1091 "MPI_Type_commit failed with error \"" << mpiErrorCodeToString (err)
1092 << "\".");
1093
1094 if (sendBuffer == globalReducts) {
1095 // NOTE (mfh 31 May 2017) This is only safe if the communicator is
1096 // NOT an intercomm. The usual case is that communicators are
1097 // intracomms.
1098 err = MPI_Allreduce (MPI_IN_PLACE, globalReducts, 1,
1099 char_block, op, *rawMpiComm_);
1100 }
1101 else {
1102 err = MPI_Allreduce (const_cast<char*> (sendBuffer), globalReducts, 1,
1103 char_block, op, *rawMpiComm_);
1104 }
1105 if (err != MPI_SUCCESS) {
1106 // Don't throw until we release the type resources we allocated
1107 // above. If freeing fails for some reason, let the memory leak
1108 // go; we already have more serious problems if MPI_Allreduce
1109 // doesn't work.
1110 (void) MPI_Type_free (&char_block);
1112 true, std::runtime_error, "Teuchos::reduceAll (MPI, custom op): "
1113 "MPI_Allreduce failed with error \"" << mpiErrorCodeToString (err)
1114 << "\".");
1115 }
1116 err = MPI_Type_free (&char_block);
1118 err != MPI_SUCCESS, std::runtime_error, "Teuchos::reduceAll: "
1119 "MPI_Type_free failed with error \"" << mpiErrorCodeToString (err)
1120 << "\".");
1121}
1122
1123
1124template<typename Ordinal>
1125void MpiComm<Ordinal>::scan(
1126 const ValueTypeReductionOp<Ordinal,char> &reductOp
1127 ,const Ordinal bytes, const char sendBuffer[], char scanReducts[]
1128 ) const
1129{
1130 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::scan(...)" );
1131
1132 Details::MpiReductionOp<Ordinal> opWrap (reductOp);
1133 MPI_Op op = Details::setMpiReductionOp (opWrap);
1134 const int err =
1135 MPI_Scan (const_cast<char*> (sendBuffer), scanReducts, bytes, MPI_CHAR,
1136 op, *rawMpiComm_);
1137 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1138 "Teuchos::MpiComm::scan: MPI_Scan() failed with error \""
1139 << mpiErrorCodeToString (err) << "\".");
1140}
1141
1142
1143template<typename Ordinal>
1144void
1145MpiComm<Ordinal>::send (const Ordinal bytes,
1146 const char sendBuffer[],
1147 const int destRank) const
1148{
1149 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::send(...)" );
1150
1151#ifdef TEUCHOS_MPI_COMM_DUMP
1152 if(show_dump) {
1153 dumpBuffer<Ordinal,char>(
1154 "Teuchos::MpiComm<Ordinal>::send(...)"
1155 ,"sendBuffer", bytes, sendBuffer
1156 );
1157 }
1158#endif // TEUCHOS_MPI_COMM_DUMP
1159
1160 const int err = MPI_Send (const_cast<char*>(sendBuffer), bytes, MPI_CHAR,
1161 destRank, tag_, *rawMpiComm_);
1162 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1163 "Teuchos::MpiComm::send: MPI_Send() failed with error \""
1164 << mpiErrorCodeToString (err) << "\".");
1165}
1166
1167
1168template<typename Ordinal>
1169void
1170MpiComm<Ordinal>::send (const Ordinal bytes,
1171 const char sendBuffer[],
1172 const int destRank,
1173 const int tag) const
1174{
1175 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::send(...)" );
1176 const int err = MPI_Send (const_cast<char*> (sendBuffer), bytes, MPI_CHAR,
1177 destRank, tag, *rawMpiComm_);
1178 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1179 "Teuchos::MpiComm::send: MPI_Send() failed with error \""
1180 << mpiErrorCodeToString (err) << "\".");
1181}
1182
1183
1184template<typename Ordinal>
1185void
1186MpiComm<Ordinal>::ssend (const Ordinal bytes,
1187 const char sendBuffer[],
1188 const int destRank) const
1189{
1190 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ssend(...)" );
1191
1192#ifdef TEUCHOS_MPI_COMM_DUMP
1193 if(show_dump) {
1194 dumpBuffer<Ordinal,char>(
1195 "Teuchos::MpiComm<Ordinal>::send(...)"
1196 ,"sendBuffer", bytes, sendBuffer
1197 );
1198 }
1199#endif // TEUCHOS_MPI_COMM_DUMP
1200
1201 const int err = MPI_Ssend (const_cast<char*>(sendBuffer), bytes, MPI_CHAR,
1202 destRank, tag_, *rawMpiComm_);
1203 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1204 "Teuchos::MpiComm::send: MPI_Ssend() failed with error \""
1205 << mpiErrorCodeToString (err) << "\".");
1206}
1207
1208template<typename Ordinal>
1209void
1210MpiComm<Ordinal>::ssend (const Ordinal bytes,
1211 const char sendBuffer[],
1212 const int destRank,
1213 const int tag) const
1214{
1215 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ssend(...)" );
1216 const int err =
1217 MPI_Ssend (const_cast<char*>(sendBuffer), bytes, MPI_CHAR,
1218 destRank, tag, *rawMpiComm_);
1219 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1220 "Teuchos::MpiComm::send: MPI_Ssend() failed with error \""
1221 << mpiErrorCodeToString (err) << "\".");
1222}
1223
1224template<typename Ordinal>
1225void MpiComm<Ordinal>::readySend(
1226 const ArrayView<const char> &sendBuffer,
1227 const int destRank
1228 ) const
1229{
1230 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::readySend" );
1231
1232#ifdef TEUCHOS_MPI_COMM_DUMP
1233 if(show_dump) {
1234 dumpBuffer<Ordinal,char>(
1235 "Teuchos::MpiComm<Ordinal>::readySend(...)"
1236 ,"sendBuffer", bytes, sendBuffer
1237 );
1238 }
1239#endif // TEUCHOS_MPI_COMM_DUMP
1240
1241 const int err =
1242 MPI_Rsend (const_cast<char*>(sendBuffer.getRawPtr()), static_cast<int>(sendBuffer.size()),
1243 MPI_CHAR, destRank, tag_, *rawMpiComm_);
1244 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1245 "Teuchos::MpiComm::readySend: MPI_Rsend() failed with error \""
1246 << mpiErrorCodeToString (err) << "\".");
1247}
1248
1249
1250template<typename Ordinal>
1251void MpiComm<Ordinal>::
1252readySend (const Ordinal bytes,
1253 const char sendBuffer[],
1254 const int destRank,
1255 const int tag) const
1256{
1257 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::readySend" );
1258 const int err =
1259 MPI_Rsend (const_cast<char*> (sendBuffer), bytes,
1260 MPI_CHAR, destRank, tag, *rawMpiComm_);
1261 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1262 "Teuchos::MpiComm::readySend: MPI_Rsend() failed with error \""
1263 << mpiErrorCodeToString (err) << "\".");
1264}
1265
1266
1267template<typename Ordinal>
1268int
1269MpiComm<Ordinal>::receive (const int sourceRank,
1270 const Ordinal bytes,
1271 char recvBuffer[]) const
1272{
1273 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::receive(...)" );
1274
1275 // A negative source rank indicates MPI_ANY_SOURCE, namely that we
1276 // will take an incoming message from any process, as long as the
1277 // tag matches.
1278 const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank;
1279
1280 MPI_Status status;
1281 const int err = MPI_Recv (recvBuffer, bytes, MPI_CHAR, theSrcRank, tag_,
1282 *rawMpiComm_, &status);
1283 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1284 "Teuchos::MpiComm::receive: MPI_Recv() failed with error \""
1285 << mpiErrorCodeToString (err) << "\".");
1286
1287#ifdef TEUCHOS_MPI_COMM_DUMP
1288 if (show_dump) {
1289 dumpBuffer<Ordinal,char> ("Teuchos::MpiComm<Ordinal>::receive(...)",
1290 "recvBuffer", bytes, recvBuffer);
1291 }
1292#endif // TEUCHOS_MPI_COMM_DUMP
1293
1294 // Returning the source rank is useful in the MPI_ANY_SOURCE case.
1295 return status.MPI_SOURCE;
1296}
1297
1298
1299template<typename Ordinal>
1300RCP<CommRequest<Ordinal> >
1301MpiComm<Ordinal>::isend (const ArrayView<const char> &sendBuffer,
1302 const int destRank) const
1303{
1304 using Teuchos::as;
1305 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::isend(...)" );
1306
1307 MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1308 const int err =
1309 MPI_Isend (const_cast<char*> (sendBuffer.getRawPtr ()),
1310 as<Ordinal> (sendBuffer.size ()), MPI_CHAR,
1311 destRank, tag_, *rawMpiComm_, &rawMpiRequest);
1312 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1313 "Teuchos::MpiComm::isend: MPI_Isend() failed with error \""
1314 << mpiErrorCodeToString (err) << "\".");
1315
1316 return mpiCommRequest<Ordinal> (rawMpiRequest, sendBuffer.size ());
1317}
1318
1319
1320template<typename Ordinal>
1321RCP<CommRequest<Ordinal> >
1322MpiComm<Ordinal>::
1323isend (const ArrayView<const char> &sendBuffer,
1324 const int destRank,
1325 const int tag) const
1326{
1327 using Teuchos::as;
1328 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::isend(...)" );
1329
1330 MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1331 const int err =
1332 MPI_Isend (const_cast<char*> (sendBuffer.getRawPtr ()),
1333 as<Ordinal> (sendBuffer.size ()), MPI_CHAR,
1334 destRank, tag, *rawMpiComm_, &rawMpiRequest);
1335 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1336 "Teuchos::MpiComm::isend: MPI_Isend() failed with error \""
1337 << mpiErrorCodeToString (err) << "\".");
1338
1339 return mpiCommRequest<Ordinal> (rawMpiRequest, sendBuffer.size ());
1340}
1341
1342
1343template<typename Ordinal>
1344RCP<CommRequest<Ordinal> >
1345MpiComm<Ordinal>::ireceive (const ArrayView<char> &recvBuffer,
1346 const int sourceRank) const
1347{
1348 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ireceive(...)" );
1349
1350 // A negative source rank indicates MPI_ANY_SOURCE, namely that we
1351 // will take an incoming message from any process, as long as the
1352 // tag matches.
1353 const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank;
1354
1355 MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1356 const int err =
1357 MPI_Irecv (const_cast<char*>(recvBuffer.getRawPtr()), recvBuffer.size(),
1358 MPI_CHAR, theSrcRank, tag_, *rawMpiComm_, &rawMpiRequest);
1359 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1360 "Teuchos::MpiComm::ireceive: MPI_Irecv() failed with error \""
1361 << mpiErrorCodeToString (err) << "\".");
1362
1363 return mpiCommRequest<Ordinal> (rawMpiRequest, recvBuffer.size());
1364}
1365
1366template<typename Ordinal>
1367RCP<CommRequest<Ordinal> >
1368MpiComm<Ordinal>::ireceive (const ArrayView<char> &recvBuffer,
1369 const int sourceRank,
1370 const int tag) const
1371{
1372 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ireceive(...)" );
1373
1374 // A negative source rank indicates MPI_ANY_SOURCE, namely that we
1375 // will take an incoming message from any process, as long as the
1376 // tag matches.
1377 const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank;
1378
1379 MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1380 const int err =
1381 MPI_Irecv (const_cast<char*> (recvBuffer.getRawPtr ()), recvBuffer.size (),
1382 MPI_CHAR, theSrcRank, tag, *rawMpiComm_, &rawMpiRequest);
1383 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1384 "Teuchos::MpiComm::ireceive: MPI_Irecv() failed with error \""
1385 << mpiErrorCodeToString (err) << "\".");
1386
1387 return mpiCommRequest<Ordinal> (rawMpiRequest, recvBuffer.size ());
1388}
1389
1390namespace {
1391 // Called by the two-argument MpiComm::waitAll() variant.
1392 template<typename Ordinal>
1393 void
1394 waitAllImpl (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
1395 const ArrayView<MPI_Status>& rawMpiStatuses)
1396 {
1397 typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type;
1398 const size_type count = requests.size();
1399 // waitAllImpl() is not meant to be called by users, so it's a bug
1400 // for the two views to have different lengths.
1401 TEUCHOS_TEST_FOR_EXCEPTION(rawMpiStatuses.size() != count,
1402 std::logic_error, "Teuchos::MpiComm's waitAllImpl: rawMpiStatus.size() = "
1403 << rawMpiStatuses.size() << " != requests.size() = " << requests.size()
1404 << ". Please report this bug to the Tpetra developers.");
1405 if (count == 0) {
1406 return; // No requests on which to wait
1407 }
1408
1409 // MpiComm wraps MPI and can't expose any MPI structs or opaque
1410 // objects. Thus, we have to unpack requests into a separate array.
1411 // If that's too slow, then your code should just call into MPI
1412 // directly.
1413 //
1414 // Pull out the raw MPI requests from the wrapped requests.
1415 // MPI_Waitall should not fail if a request is MPI_REQUEST_NULL, but
1416 // we keep track just to inform the user.
1417 bool someNullRequests = false;
1418 Array<MPI_Request> rawMpiRequests (count, MPI_REQUEST_NULL);
1419 for (int i = 0; i < count; ++i) {
1420 RCP<CommRequest<Ordinal> > request = requests[i];
1421 if (! is_null (request)) {
1422 RCP<MpiCommRequestBase<Ordinal> > mpiRequest =
1423 rcp_dynamic_cast<MpiCommRequestBase<Ordinal> > (request);
1424 // releaseRawMpiRequest() sets the MpiCommRequest's raw
1425 // MPI_Request to MPI_REQUEST_NULL. This makes waitAll() not
1426 // satisfy the strong exception guarantee. That's OK because
1427 // MPI_Waitall() doesn't promise that it satisfies the strong
1428 // exception guarantee, and we would rather conservatively
1429 // invalidate the handles than leave dangling requests around
1430 // and risk users trying to wait on the same request twice.
1431 rawMpiRequests[i] = mpiRequest->releaseRawMpiRequest();
1432 }
1433 else { // Null requests map to MPI_REQUEST_NULL
1434 rawMpiRequests[i] = MPI_REQUEST_NULL;
1435 someNullRequests = true;
1436 }
1437 }
1438
1439 // This is the part where we've finally peeled off the wrapper and
1440 // we can now interact with MPI directly.
1441 //
1442 // One option in the one-argument version of waitAll() is to ignore
1443 // the statuses completely. MPI lets you pass in the named constant
1444 // MPI_STATUSES_IGNORE for the MPI_Status array output argument in
1445 // MPI_Waitall(), which would tell MPI not to bother with the
1446 // statuses. However, we want the statuses because we can use them
1447 // for detailed error diagnostics in case something goes wrong.
1448 const int err = MPI_Waitall (count, rawMpiRequests.getRawPtr(),
1449 rawMpiStatuses.getRawPtr());
1450
1451 // In MPI_Waitall(), an error indicates that one or more requests
1452 // failed. In that case, there could be requests that completed
1453 // (their MPI_Status' error field is MPI_SUCCESS), and other
1454 // requests that have not completed yet but have not necessarily
1455 // failed (MPI_PENDING). We make no attempt here to wait on the
1456 // pending requests. It doesn't make sense for us to do so, because
1457 // in general Teuchos::Comm doesn't attempt to provide robust
1458 // recovery from failed messages.
1459 if (err != MPI_SUCCESS) {
1460 if (err == MPI_ERR_IN_STATUS) {
1461 //
1462 // When MPI_Waitall returns MPI_ERR_IN_STATUS (a standard error
1463 // class), it's telling us to check the error codes in the
1464 // returned statuses. In that case, we do so and generate a
1465 // detailed exception message.
1466 //
1467 // Figure out which of the requests failed.
1468 Array<std::pair<size_type, int> > errorLocationsAndCodes;
1469 for (size_type k = 0; k < rawMpiStatuses.size(); ++k) {
1470 const int curErr = rawMpiStatuses[k].MPI_ERROR;
1471 if (curErr != MPI_SUCCESS) {
1472 errorLocationsAndCodes.push_back (std::make_pair (k, curErr));
1473 }
1474 }
1475 const size_type numErrs = errorLocationsAndCodes.size();
1476 if (numErrs > 0) {
1477 // There was at least one error. Assemble a detailed
1478 // exception message reporting which requests failed,
1479 // their error codes, and their source
1480 std::ostringstream os;
1481 os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \""
1482 << mpiErrorCodeToString (err) << "\". Of the " << count
1483 << " total request" << (count != 1 ? "s" : "") << ", " << numErrs
1484 << " failed. Here are the indices of the failed requests, and the "
1485 "error codes extracted from their returned MPI_Status objects:"
1486 << std::endl;
1487 for (size_type k = 0; k < numErrs; ++k) {
1488 const size_type errInd = errorLocationsAndCodes[k].first;
1489 os << "Request " << errInd << ": MPI_ERROR = "
1490 << mpiErrorCodeToString (rawMpiStatuses[errInd].MPI_ERROR)
1491 << std::endl;
1492 }
1493 if (someNullRequests) {
1494 os << " On input to MPI_Waitall, there was at least one MPI_"
1495 "Request that was MPI_REQUEST_NULL. MPI_Waitall should not "
1496 "normally fail in that case, but we thought we should let you know "
1497 "regardless.";
1498 }
1499 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
1500 }
1501 // If there were no actual errors in the returned statuses,
1502 // well, then I guess everything is OK. Just keep going.
1503 }
1504 else {
1505 std::ostringstream os;
1506 os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \""
1507 << mpiErrorCodeToString (err) << "\".";
1508 if (someNullRequests) {
1509 os << " On input to MPI_Waitall, there was at least one MPI_Request "
1510 "that was MPI_REQUEST_NULL. MPI_Waitall should not normally fail in "
1511 "that case, but we thought we should let you know regardless.";
1512 }
1513 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
1514 }
1515 }
1516
1517 // Invalidate the input array of requests by setting all entries
1518 // to null.
1519 std::fill (requests.begin(), requests.end(), null);
1520 }
1521
1522
1523
1524 // Called by the one-argument MpiComm::waitAll() variant.
1525 template<typename Ordinal>
1526 void
1527 waitAllImpl (const ArrayView<RCP<CommRequest<Ordinal> > >& requests)
1528 {
1529 typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type;
1530 const size_type count = requests.size ();
1531 if (count == 0) {
1532 return; // No requests on which to wait
1533 }
1534
1535 // MpiComm wraps MPI and can't expose any MPI structs or opaque
1536 // objects. Thus, we have to unpack requests into a separate
1537 // array. If that's too slow, then your code should just call
1538 // into MPI directly.
1539 //
1540 // Pull out the raw MPI requests from the wrapped requests.
1541 // MPI_Waitall should not fail if a request is MPI_REQUEST_NULL,
1542 // but we keep track just to inform the user.
1543 bool someNullRequests = false;
1544 Array<MPI_Request> rawMpiRequests (count, MPI_REQUEST_NULL);
1545 for (int i = 0; i < count; ++i) {
1546 RCP<CommRequest<Ordinal> > request = requests[i];
1547 if (! request.is_null ()) {
1548 RCP<MpiCommRequestBase<Ordinal> > mpiRequest =
1549 rcp_dynamic_cast<MpiCommRequestBase<Ordinal> > (request);
1550 // releaseRawMpiRequest() sets the MpiCommRequest's raw
1551 // MPI_Request to MPI_REQUEST_NULL. This makes waitAll() not
1552 // satisfy the strong exception guarantee. That's OK because
1553 // MPI_Waitall() doesn't promise that it satisfies the strong
1554 // exception guarantee, and we would rather conservatively
1555 // invalidate the handles than leave dangling requests around
1556 // and risk users trying to wait on the same request twice.
1557 rawMpiRequests[i] = mpiRequest->releaseRawMpiRequest ();
1558 }
1559 else { // Null requests map to MPI_REQUEST_NULL
1560 rawMpiRequests[i] = MPI_REQUEST_NULL;
1561 someNullRequests = true;
1562 }
1563 }
1564
1565 // This is the part where we've finally peeled off the wrapper and
1566 // we can now interact with MPI directly.
1567 //
1568 // MPI lets us pass in the named constant MPI_STATUSES_IGNORE for
1569 // the MPI_Status array output argument in MPI_Waitall(), which
1570 // tells MPI not to bother writing out the statuses.
1571 const int err = MPI_Waitall (count, rawMpiRequests.getRawPtr(),
1572 MPI_STATUSES_IGNORE);
1573
1574 // In MPI_Waitall(), an error indicates that one or more requests
1575 // failed. In that case, there could be requests that completed
1576 // (their MPI_Status' error field is MPI_SUCCESS), and other
1577 // requests that have not completed yet but have not necessarily
1578 // failed (MPI_PENDING). We make no attempt here to wait on the
1579 // pending requests. It doesn't make sense for us to do so,
1580 // because in general Teuchos::Comm doesn't attempt to provide
1581 // robust recovery from failed messages.
1582 if (err != MPI_SUCCESS) {
1583 std::ostringstream os;
1584 os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \""
1585 << mpiErrorCodeToString (err) << "\".";
1586 if (someNullRequests) {
1587 os << std::endl << "On input to MPI_Waitall, there was at least one "
1588 "MPI_Request that was MPI_REQUEST_NULL. MPI_Waitall should not "
1589 "normally fail in that case, but we thought we should let you know "
1590 "regardless.";
1591 }
1592 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
1593 }
1594
1595 // Invalidate the input array of requests by setting all entries
1596 // to null. We delay this until the end, since some
1597 // implementations of CommRequest might hold the only reference to
1598 // the communication buffer, and we don't want that to go away
1599 // until we've waited on the communication operation.
1600 std::fill (requests.begin(), requests.end(), null);
1601 }
1602
1603} // namespace (anonymous)
1604
1605
1606
1607template<typename Ordinal>
1608void
1609MpiComm<Ordinal>::
1610waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests) const
1611{
1612 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::waitAll(requests)" );
1613 // Call the one-argument version of waitAllImpl, to avoid overhead
1614 // of handling statuses (which the user didn't want anyway).
1615 waitAllImpl<Ordinal> (requests);
1616}
1617
1618
1619template<typename Ordinal>
1620void
1621MpiComm<Ordinal>::
1622waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
1623 const ArrayView<RCP<CommStatus<Ordinal> > >& statuses) const
1624{
1625 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::waitAll(requests, statuses)" );
1626
1627 typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type;
1628 const size_type count = requests.size();
1629
1630 TEUCHOS_TEST_FOR_EXCEPTION(count != statuses.size(),
1631 std::invalid_argument, "Teuchos::MpiComm::waitAll: requests.size() = "
1632 << count << " != statuses.size() = " << statuses.size() << ".");
1633
1634 Array<MPI_Status> rawMpiStatuses (count);
1635 waitAllImpl<Ordinal> (requests, rawMpiStatuses());
1636
1637 // Repackage the raw MPI_Status structs into the wrappers.
1638 for (size_type i = 0; i < count; ++i) {
1639 statuses[i] = mpiCommStatus<Ordinal> (rawMpiStatuses[i]);
1640 }
1641}
1642
1643
1644template<typename Ordinal>
1645RCP<CommStatus<Ordinal> >
1646MpiComm<Ordinal>::wait (const Ptr<RCP<CommRequest<Ordinal> > >& request) const
1647{
1648 TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::wait(...)" );
1649
1650 if (is_null (*request)) {
1651 return null; // Nothing to wait on ...
1652 }
1653 else {
1654 RCP<CommStatus<Ordinal> > status = (*request)->wait ();
1655 // mfh 22 Oct 2012: The unit tests expect waiting on the
1656 // CommRequest to invalidate it by setting it to null.
1657 *request = null;
1658 return status;
1659 }
1660}
1661
1662template<typename Ordinal>
1663RCP< Comm<Ordinal> >
1664MpiComm<Ordinal>::duplicate() const
1665{
1666 MPI_Comm origRawComm = *rawMpiComm_;
1667 MPI_Comm newRawComm = MPI_COMM_NULL;
1668 const int err = MPI_Comm_dup (origRawComm, &newRawComm);
1669 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, "Teuchos"
1670 "::MpiComm::duplicate: MPI_Comm_dup failed with the following error: "
1671 << mpiErrorCodeToString (err));
1672
1673 // Wrap the raw communicator, and pass the (const) wrapped
1674 // communicator to MpiComm's constructor. We created the raw comm,
1675 // so we have to supply a function that frees it after use.
1676 RCP<OpaqueWrapper<MPI_Comm> > wrapped =
1677 opaqueWrapper<MPI_Comm> (newRawComm, details::safeCommFree);
1678 // Since newComm's raw MPI_Comm is the result of an MPI_Comm_dup,
1679 // its messages cannot collide with those of any other MpiComm.
1680 // This means we can assign its tag without an MPI_Bcast.
1681 RCP<MpiComm<Ordinal> > newComm =
1682 rcp (new MpiComm<Ordinal> (wrapped.getConst (), minTag_));
1683 return rcp_implicit_cast<Comm<Ordinal> > (newComm);
1684}
1685
1686
1687template<typename Ordinal>
1688RCP< Comm<Ordinal> >
1689MpiComm<Ordinal>::split(const int color, const int key) const
1690{
1691 MPI_Comm newComm;
1692 const int splitReturn =
1693 MPI_Comm_split (*rawMpiComm_,
1694 color < 0 ? MPI_UNDEFINED : color,
1695 key,
1696 &newComm);
1698 splitReturn != MPI_SUCCESS,
1699 std::logic_error,
1700 "Teuchos::MpiComm::split: Failed to create communicator with color "
1701 << color << "and key " << key << ". MPI_Comm_split failed with error \""
1702 << mpiErrorCodeToString (splitReturn) << "\".");
1703 if (newComm == MPI_COMM_NULL) {
1704 return RCP< Comm<Ordinal> >();
1705 } else {
1706 RCP<const OpaqueWrapper<MPI_Comm> > wrapped =
1707 opaqueWrapper<MPI_Comm> (newComm, details::safeCommFree);
1708 // Since newComm's raw MPI_Comm is the result of an
1709 // MPI_Comm_split, its messages cannot collide with those of any
1710 // other MpiComm. This means we can assign its tag without an
1711 // MPI_Bcast.
1712 return rcp (new MpiComm<Ordinal> (wrapped, minTag_));
1713 }
1714}
1715
1716
1717template<typename Ordinal>
1718RCP< Comm<Ordinal> >
1719MpiComm<Ordinal>::createSubcommunicator(const ArrayView<const int> &ranks) const
1720{
1721 int err = MPI_SUCCESS; // For error codes returned by MPI functions
1722
1723 // Get the group that this communicator is in.
1724 MPI_Group thisGroup;
1725 err = MPI_Comm_group (*rawMpiComm_, &thisGroup);
1726 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1727 "Failed to obtain the current communicator's group. "
1728 "MPI_Comm_group failed with error \""
1729 << mpiErrorCodeToString (err) << "\".");
1730
1731 // Create a new group with the specified members.
1732 MPI_Group newGroup;
1733 // It's rude to cast away const, but MPI functions demand it.
1734 //
1735 // NOTE (mfh 14 Aug 2012) Please don't ask for &ranks[0] unless you
1736 // know that ranks.size() > 0. That's why I'm using getRawPtr().
1737 err = MPI_Group_incl (thisGroup, ranks.size(),
1738 const_cast<int*> (ranks.getRawPtr ()), &newGroup);
1739 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1740 "Failed to create subgroup. MPI_Group_incl failed with error \""
1741 << mpiErrorCodeToString (err) << "\".");
1742
1743 // Create a new communicator from the new group.
1744 MPI_Comm newComm;
1745 try {
1746 err = MPI_Comm_create (*rawMpiComm_, newGroup, &newComm);
1747 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1748 "Failed to create subcommunicator. MPI_Comm_create failed with error \""
1749 << mpiErrorCodeToString (err) << "\".");
1750 } catch (...) {
1751 // Attempt to free the new group before rethrowing. If
1752 // successful, this will prevent a memory leak due to the "lost"
1753 // group that was allocated successfully above. Since we're
1754 // throwing std::logic_error anyway, we can only promise
1755 // best-effort recovery; thus, we don't check the error code.
1756 (void) MPI_Group_free (&newGroup);
1757 (void) MPI_Group_free (&thisGroup);
1758 throw;
1759 }
1760
1761 // We don't need the group any more, so free it.
1762 err = MPI_Group_free (&newGroup);
1763 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1764 "Failed to free subgroup. MPI_Group_free failed with error \""
1765 << mpiErrorCodeToString (err) << "\".");
1766 err = MPI_Group_free (&thisGroup);
1767 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1768 "Failed to free subgroup. MPI_Group_free failed with error \""
1769 << mpiErrorCodeToString (err) << "\".");
1770
1771 if (newComm == MPI_COMM_NULL) {
1772 return RCP<Comm<Ordinal> > ();
1773 } else {
1774 using Teuchos::details::safeCommFree;
1775 typedef OpaqueWrapper<MPI_Comm> ow_type;
1776 RCP<const ow_type> wrapper =
1777 rcp_implicit_cast<const ow_type> (opaqueWrapper (newComm, safeCommFree));
1778 // Since newComm's raw MPI_Comm is the result of an
1779 // MPI_Comm_create, its messages cannot collide with those of any
1780 // other MpiComm. This means we can assign its tag without an
1781 // MPI_Bcast.
1782 return rcp (new MpiComm<Ordinal> (wrapper, minTag_));
1783 }
1784}
1785
1786
1787// Overridden from Describable
1788
1789
1790template<typename Ordinal>
1791std::string MpiComm<Ordinal>::description() const
1792{
1793 std::ostringstream oss;
1794 oss
1795 << typeName(*this)
1796 << "{"
1797 << "size="<<size_
1798 << ",rank="<<rank_
1799 << ",rawMpiComm="<<static_cast<MPI_Comm>(*rawMpiComm_)
1800 <<"}";
1801 return oss.str();
1802}
1803
1804
1805#ifdef TEUCHOS_MPI_COMM_DUMP
1806template<typename Ordinal>
1807bool MpiComm<Ordinal>::show_dump = false;
1808#endif
1809
1810
1811// private
1812
1813
1814template<typename Ordinal>
1815void MpiComm<Ordinal>::assertRank(const int rank, const std::string &rankName) const
1816{
1818 ! ( 0 <= rank && rank < size_ ), std::logic_error
1819 ,"Error, "<<rankName<<" = " << rank << " is not < 0 or is not"
1820 " in the range [0,"<<size_-1<<"]!"
1821 );
1822}
1823
1824
1825} // namespace Teuchos
1826
1827
1828template<typename Ordinal>
1830Teuchos::createMpiComm(
1831 const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm
1832 )
1833{
1834 if( rawMpiComm.get()!=NULL && *rawMpiComm != MPI_COMM_NULL )
1835 return rcp(new MpiComm<Ordinal>(rawMpiComm));
1836 return Teuchos::null;
1837}
1838
1839
1840template<typename Ordinal>
1842Teuchos::createMpiComm(
1843 const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm,
1844 const int defaultTag
1845 )
1846{
1847 if( rawMpiComm.get()!=NULL && *rawMpiComm != MPI_COMM_NULL )
1848 return rcp(new MpiComm<Ordinal>(rawMpiComm, defaultTag));
1849 return Teuchos::null;
1850}
1851
1852
1853template<typename Ordinal>
1854MPI_Comm
1855Teuchos::getRawMpiComm(const Comm<Ordinal> &comm)
1856{
1857 return *(
1858 dyn_cast<const MpiComm<Ordinal> >(comm).getRawMpiComm()
1859 );
1860}
1861
1862
1863#endif // HAVE_TEUCHOS_MPI
1864#endif // TEUCHOS_MPI_COMM_HPP
1865
Teuchos header file which uses auto-configuration information to include necessary C++ headers.
Implementation detail of Teuchos' MPI wrapper.
Defines basic traits for the ordinal field type.
Defines basic traits returning the name of a type in a portable and readable way.
Definition of Teuchos::as, for conversions between types.
bool is_null(const ArrayRCP< T > &p)
Returns true if p.get()==NULL.
Ordinal size_type
Type representing the number of elements in an ArrayRCP or view thereof.
int rank(const Comm< Ordinal > &comm)
Get the process rank.
RCP< OpaqueWrapper< Opaque > > opaqueWrapper(Opaque opaque)
Create a new OpaqueWrapper object without a free function.
Smart reference counting pointer class for automatic garbage collection.
RCP< T > rcp(const boost::shared_ptr< T > &sptr)
Conversion function that takes in a boost::shared_ptr object and spits out a Teuchos::RCP object.
static RCP< FancyOStream > getDefaultOStream()
Get the default output stream object.
Tabbing class for helping to create formated, indented output for a basic_FancyOStream object.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
This macro is checks that to numbers are equal and if not then throws an exception with a good error ...
TypeTo as(const TypeFrom &t)
Convert from one value type to another.
std::string typeName(const T &t)
Template function for returning the concrete type name of a passed-in object.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
void ssend(const Packet sendBuffer[], const Ordinal count, const int destRank, const int tag, const Comm< Ordinal > &comm)
Variant of ssend() that takes a tag (and restores the correct order of arguments).
void readySend(const Packet sendBuffer[], const Ordinal count, const int destRank, const int tag, const Comm< Ordinal > &comm)
Variant of readySend() that accepts a message tag.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
RCP< CommRequest< Ordinal > > ireceive(const ArrayRCP< Packet > &recvBuffer, const int sourceRank, const int tag, const Comm< Ordinal > &comm)
Variant of ireceive that takes a tag argument (and restores the correct order of arguments).
void send(const Packet sendBuffer[], const Ordinal count, const int destRank, const int tag, const Comm< Ordinal > &comm)
Variant of send() that takes a tag (and restores the correct order of arguments).
Teuchos implementation details.