5 #ifndef GKO_PUBLIC_CORE_BASE_MPI_HPP_
6 #define GKO_PUBLIC_CORE_BASE_MPI_HPP_
10 #include <type_traits>
13 #include <ginkgo/config.hpp>
14 #include <ginkgo/core/base/exception.hpp>
15 #include <ginkgo/core/base/exception_helpers.hpp>
16 #include <ginkgo/core/base/executor.hpp>
17 #include <ginkgo/core/base/half.hpp>
18 #include <ginkgo/core/base/types.hpp>
19 #include <ginkgo/core/base/utils_helper.hpp>
29 namespace experimental {
44 #if GINKGO_HAVE_GPU_AWARE_MPI
62 #define GKO_REGISTER_MPI_TYPE(input_type, mpi_type) \
64 struct type_impl<input_type> { \
65 static MPI_Datatype get_type() { return mpi_type; } \
80 GKO_REGISTER_MPI_TYPE(
char, MPI_CHAR);
81 GKO_REGISTER_MPI_TYPE(
unsigned char, MPI_UNSIGNED_CHAR);
82 GKO_REGISTER_MPI_TYPE(
unsigned, MPI_UNSIGNED);
83 GKO_REGISTER_MPI_TYPE(
int, MPI_INT);
84 GKO_REGISTER_MPI_TYPE(
unsigned short, MPI_UNSIGNED_SHORT);
85 GKO_REGISTER_MPI_TYPE(
unsigned long, MPI_UNSIGNED_LONG);
86 GKO_REGISTER_MPI_TYPE(
long, MPI_LONG);
87 GKO_REGISTER_MPI_TYPE(
long long, MPI_LONG_LONG_INT);
88 GKO_REGISTER_MPI_TYPE(
unsigned long long, MPI_UNSIGNED_LONG_LONG);
89 GKO_REGISTER_MPI_TYPE(
float, MPI_FLOAT);
90 GKO_REGISTER_MPI_TYPE(
double, MPI_DOUBLE);
91 GKO_REGISTER_MPI_TYPE(
long double, MPI_LONG_DOUBLE);
92 #if GINKGO_ENABLE_HALF
96 GKO_REGISTER_MPI_TYPE(
half, MPI_UNSIGNED_SHORT);
97 GKO_REGISTER_MPI_TYPE(std::complex<half>, MPI_FLOAT);
98 #endif // GKO_ENABLE_HALF
99 GKO_REGISTER_MPI_TYPE(std::complex<float>, MPI_C_FLOAT_COMPLEX);
100 GKO_REGISTER_MPI_TYPE(std::complex<double>, MPI_C_DOUBLE_COMPLEX);
119 GKO_ASSERT_NO_MPI_ERRORS(MPI_Type_contiguous(count, old_type, &type_));
120 GKO_ASSERT_NO_MPI_ERRORS(MPI_Type_commit(&type_));
145 *
this = std::move(other);
157 if (
this != &other) {
158 this->type_ = std::exchange(other.type_, MPI_DATATYPE_NULL);
168 if (type_ != MPI_DATATYPE_NULL) {
169 MPI_Type_free(&type_);
178 MPI_Datatype
get()
const {
return type_; }
190 serialized = MPI_THREAD_SERIALIZED,
191 funneled = MPI_THREAD_FUNNELED,
192 single = MPI_THREAD_SINGLE,
193 multiple = MPI_THREAD_MULTIPLE
208 static bool is_finalized()
211 GKO_ASSERT_NO_MPI_ERRORS(MPI_Finalized(&flag));
215 static bool is_initialized()
218 GKO_ASSERT_NO_MPI_ERRORS(MPI_Initialized(&flag));
238 const thread_type thread_t = thread_type::serialized)
240 this->required_thread_support_ = static_cast<int>(thread_t);
241 GKO_ASSERT_NO_MPI_ERRORS(
242 MPI_Init_thread(&argc, &argv, this->required_thread_support_,
243 &(this->provided_thread_support_)));
257 int required_thread_support_;
258 int provided_thread_support_;
271 using pointer = MPI_Comm*;
272 void operator()(pointer comm)
const
274 GKO_ASSERT(*comm != MPI_COMM_NULL);
275 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_free(comm));
298 MPI_Status*
get() {
return &this->status_; }
310 template <
typename T>
344 this->req_ = std::exchange(o.req_, MPI_REQUEST_NULL);
351 if (req_ != MPI_REQUEST_NULL) {
352 if (MPI_Request_free(&req_) != MPI_SUCCESS) {
364 MPI_Request*
get() {
return &this->req_; }
375 GKO_ASSERT_NO_MPI_ERRORS(MPI_Wait(&req_,
status.
get()));
392 inline std::vector<status>
wait_all(std::vector<request>& req)
394 std::vector<status> stat;
395 for (std::size_t i = 0; i < req.size(); ++i) {
396 stat.emplace_back(req[i].wait());
429 : comm_(), force_host_buffer_(force_host_buffer)
431 this->comm_.reset(
new MPI_Comm(comm));
445 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split(comm, color, key, &comm_out));
446 this->comm_.reset(
new MPI_Comm(comm_out), comm_deleter{});
460 GKO_ASSERT_NO_MPI_ERRORS(
461 MPI_Comm_split(comm.
get(), color, key, &comm_out));
462 this->comm_.reset(
new MPI_Comm(comm_out), comm_deleter{});
470 const MPI_Comm&
get()
const {
return *(this->comm_.get()); }
472 bool force_host_buffer()
const {
return force_host_buffer_; }
479 int size()
const {
return get_num_ranks(); }
486 int rank()
const {
return get_my_rank(); };
502 return compare(rhs.get());
518 GKO_ASSERT_NO_MPI_ERRORS(MPI_Barrier(this->
get()));
534 template <
typename SendType>
535 void send(std::shared_ptr<const Executor> exec,
const SendType* send_buffer,
536 const int send_count,
const int destination_rank,
537 const int send_tag)
const
539 auto guard = exec->get_scoped_device_id_guard();
540 GKO_ASSERT_NO_MPI_ERRORS(
542 destination_rank, send_tag, this->
get()));
561 template <
typename SendType>
563 const SendType* send_buffer,
const int send_count,
564 const int destination_rank,
const int send_tag)
const
566 auto guard = exec->get_scoped_device_id_guard();
568 GKO_ASSERT_NO_MPI_ERRORS(
570 destination_rank, send_tag, this->
get(), req.
get()));
589 template <
typename RecvType>
590 status recv(std::shared_ptr<const Executor> exec, RecvType* recv_buffer,
591 const int recv_count,
const int source_rank,
592 const int recv_tag)
const
594 auto guard = exec->get_scoped_device_id_guard();
596 GKO_ASSERT_NO_MPI_ERRORS(
598 source_rank, recv_tag, this->
get(), st.
get()));
617 template <
typename RecvType>
619 const int recv_count,
const int source_rank,
620 const int recv_tag)
const
622 auto guard = exec->get_scoped_device_id_guard();
624 GKO_ASSERT_NO_MPI_ERRORS(
626 source_rank, recv_tag, this->
get(), req.
get()));
642 template <
typename BroadcastType>
643 void broadcast(std::shared_ptr<const Executor> exec, BroadcastType* buffer,
644 int count,
int root_rank)
const
646 auto guard = exec->get_scoped_device_id_guard();
647 GKO_ASSERT_NO_MPI_ERRORS(MPI_Bcast(buffer, count,
649 root_rank, this->
get()));
667 template <
typename BroadcastType>
669 BroadcastType* buffer,
int count,
int root_rank)
const
671 auto guard = exec->get_scoped_device_id_guard();
673 GKO_ASSERT_NO_MPI_ERRORS(
675 root_rank, this->
get(), req.
get()));
693 template <
typename ReduceType>
694 void reduce(std::shared_ptr<const Executor> exec,
695 const ReduceType* send_buffer, ReduceType* recv_buffer,
696 int count, MPI_Op operation,
int root_rank)
const
698 auto guard = exec->get_scoped_device_id_guard();
699 GKO_ASSERT_NO_MPI_ERRORS(MPI_Reduce(send_buffer, recv_buffer, count,
701 operation, root_rank, this->
get()));
720 template <
typename ReduceType>
722 const ReduceType* send_buffer, ReduceType* recv_buffer,
723 int count, MPI_Op operation,
int root_rank)
const
725 auto guard = exec->get_scoped_device_id_guard();
727 GKO_ASSERT_NO_MPI_ERRORS(MPI_Ireduce(
729 operation, root_rank, this->
get(), req.
get()));
746 template <
typename ReduceType>
748 ReduceType* recv_buffer,
int count, MPI_Op operation)
const
750 auto guard = exec->get_scoped_device_id_guard();
751 GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(
753 operation, this->
get()));
771 template <
typename ReduceType>
773 ReduceType* recv_buffer,
int count,
774 MPI_Op operation)
const
776 auto guard = exec->get_scoped_device_id_guard();
778 GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(
780 operation, this->
get(), req.
get()));
798 template <
typename ReduceType>
800 const ReduceType* send_buffer, ReduceType* recv_buffer,
801 int count, MPI_Op operation)
const
803 auto guard = exec->get_scoped_device_id_guard();
804 GKO_ASSERT_NO_MPI_ERRORS(MPI_Allreduce(
806 operation, this->
get()));
825 template <
typename ReduceType>
827 const ReduceType* send_buffer, ReduceType* recv_buffer,
828 int count, MPI_Op operation)
const
830 auto guard = exec->get_scoped_device_id_guard();
832 GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallreduce(
834 operation, this->
get(), req.
get()));
854 template <
typename SendType,
typename RecvType>
855 void gather(std::shared_ptr<const Executor> exec,
856 const SendType* send_buffer,
const int send_count,
857 RecvType* recv_buffer,
const int recv_count,
860 auto guard = exec->get_scoped_device_id_guard();
861 GKO_ASSERT_NO_MPI_ERRORS(
864 root_rank, this->
get()));
886 template <
typename SendType,
typename RecvType>
888 const SendType* send_buffer,
const int send_count,
889 RecvType* recv_buffer,
const int recv_count,
892 auto guard = exec->get_scoped_device_id_guard();
894 GKO_ASSERT_NO_MPI_ERRORS(MPI_Igather(
919 template <
typename SendType,
typename RecvType>
920 void gather_v(std::shared_ptr<const Executor> exec,
921 const SendType* send_buffer,
const int send_count,
922 RecvType* recv_buffer,
const int* recv_counts,
923 const int* displacements,
int root_rank)
const
925 auto guard = exec->get_scoped_device_id_guard();
926 GKO_ASSERT_NO_MPI_ERRORS(MPI_Gatherv(
928 recv_buffer, recv_counts, displacements,
952 template <
typename SendType,
typename RecvType>
954 const SendType* send_buffer,
const int send_count,
955 RecvType* recv_buffer,
const int* recv_counts,
956 const int* displacements,
int root_rank)
const
958 auto guard = exec->get_scoped_device_id_guard();
960 GKO_ASSERT_NO_MPI_ERRORS(MPI_Igatherv(
962 recv_buffer, recv_counts, displacements,
983 template <
typename SendType,
typename RecvType>
985 const SendType* send_buffer,
const int send_count,
986 RecvType* recv_buffer,
const int recv_count)
const
988 auto guard = exec->get_scoped_device_id_guard();
989 GKO_ASSERT_NO_MPI_ERRORS(MPI_Allgather(
1013 template <
typename SendType,
typename RecvType>
1015 const SendType* send_buffer,
const int send_count,
1016 RecvType* recv_buffer,
const int recv_count)
const
1018 auto guard = exec->get_scoped_device_id_guard();
1020 GKO_ASSERT_NO_MPI_ERRORS(MPI_Iallgather(
1023 this->
get(), req.
get()));
1042 template <
typename SendType,
typename RecvType>
1043 void scatter(std::shared_ptr<const Executor> exec,
1044 const SendType* send_buffer,
const int send_count,
1045 RecvType* recv_buffer,
const int recv_count,
1046 int root_rank)
const
1048 auto guard = exec->get_scoped_device_id_guard();
1049 GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatter(
1073 template <
typename SendType,
typename RecvType>
1075 const SendType* send_buffer,
const int send_count,
1076 RecvType* recv_buffer,
const int recv_count,
1077 int root_rank)
const
1079 auto guard = exec->get_scoped_device_id_guard();
1081 GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscatter(
1084 this->
get(), req.
get()));
1106 template <
typename SendType,
typename RecvType>
1108 const SendType* send_buffer,
const int* send_counts,
1109 const int* displacements, RecvType* recv_buffer,
1110 const int recv_count,
int root_rank)
const
1112 auto guard = exec->get_scoped_device_id_guard();
1113 GKO_ASSERT_NO_MPI_ERRORS(MPI_Scatterv(
1114 send_buffer, send_counts, displacements,
1139 template <
typename SendType,
typename RecvType>
1141 const SendType* send_buffer,
const int* send_counts,
1142 const int* displacements, RecvType* recv_buffer,
1143 const int recv_count,
int root_rank)
const
1145 auto guard = exec->get_scoped_device_id_guard();
1147 GKO_ASSERT_NO_MPI_ERRORS(
1148 MPI_Iscatterv(send_buffer, send_counts, displacements,
1151 root_rank, this->
get(), req.
get()));
1171 template <
typename RecvType>
1172 void all_to_all(std::shared_ptr<const Executor> exec, RecvType* recv_buffer,
1173 const int recv_count)
const
1175 auto guard = exec->get_scoped_device_id_guard();
1176 GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall(
1200 template <
typename RecvType>
1202 RecvType* recv_buffer,
const int recv_count)
const
1204 auto guard = exec->get_scoped_device_id_guard();
1206 GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall(
1209 this->
get(), req.
get()));
1229 template <
typename SendType,
typename RecvType>
1231 const SendType* send_buffer,
const int send_count,
1232 RecvType* recv_buffer,
const int recv_count)
const
1234 auto guard = exec->get_scoped_device_id_guard();
1235 GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoall(
1259 template <
typename SendType,
typename RecvType>
1261 const SendType* send_buffer,
const int send_count,
1262 RecvType* recv_buffer,
const int recv_count)
const
1264 auto guard = exec->get_scoped_device_id_guard();
1266 GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoall(
1269 this->
get(), req.
get()));
1292 template <
typename SendType,
typename RecvType>
1294 const SendType* send_buffer,
const int* send_counts,
1295 const int* send_offsets, RecvType* recv_buffer,
1296 const int* recv_counts,
const int* recv_offsets)
const
1298 this->
all_to_all_v(std::move(exec), send_buffer, send_counts,
1300 recv_buffer, recv_counts, recv_offsets,
1320 const void* send_buffer,
const int* send_counts,
1321 const int* send_offsets, MPI_Datatype send_type,
1322 void* recv_buffer,
const int* recv_counts,
1323 const int* recv_offsets, MPI_Datatype recv_type)
const
1325 auto guard = exec->get_scoped_device_id_guard();
1326 GKO_ASSERT_NO_MPI_ERRORS(MPI_Alltoallv(
1327 send_buffer, send_counts, send_offsets, send_type, recv_buffer,
1328 recv_counts, recv_offsets, recv_type, this->
get()));
1351 const void* send_buffer,
const int* send_counts,
1352 const int* send_offsets, MPI_Datatype send_type,
1353 void* recv_buffer,
const int* recv_counts,
1354 const int* recv_offsets,
1355 MPI_Datatype recv_type)
const
1357 auto guard = exec->get_scoped_device_id_guard();
1359 GKO_ASSERT_NO_MPI_ERRORS(MPI_Ialltoallv(
1360 send_buffer, send_counts, send_offsets, send_type, recv_buffer,
1361 recv_counts, recv_offsets, recv_type, this->
get(), req.
get()));
1385 template <
typename SendType,
typename RecvType>
1387 const SendType* send_buffer,
const int* send_counts,
1388 const int* send_offsets, RecvType* recv_buffer,
1389 const int* recv_counts,
1390 const int* recv_offsets)
const
1393 std::move(exec), send_buffer, send_counts, send_offsets,
1412 template <
typename ScanType>
1413 void scan(std::shared_ptr<const Executor> exec,
const ScanType* send_buffer,
1414 ScanType* recv_buffer,
int count, MPI_Op operation)
const
1416 auto guard = exec->get_scoped_device_id_guard();
1417 GKO_ASSERT_NO_MPI_ERRORS(MPI_Scan(send_buffer, recv_buffer, count,
1419 operation, this->
get()));
1438 template <
typename ScanType>
1440 const ScanType* send_buffer, ScanType* recv_buffer,
1441 int count, MPI_Op operation)
const
1443 auto guard = exec->get_scoped_device_id_guard();
1445 GKO_ASSERT_NO_MPI_ERRORS(MPI_Iscan(send_buffer, recv_buffer, count,
1447 operation, this->
get(), req.
get()));
1452 std::shared_ptr<MPI_Comm> comm_;
1453 bool force_host_buffer_;
1455 int get_my_rank()
const
1458 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(
get(), &my_rank));
1462 int get_node_local_rank()
const
1464 MPI_Comm local_comm;
1466 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_split_type(
1467 this->
get(), MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &local_comm));
1468 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_rank(local_comm, &
rank));
1469 MPI_Comm_free(&local_comm);
1473 int get_num_ranks()
const
1476 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_size(this->
get(), &size));
1480 bool compare(
const MPI_Comm& other)
const
1483 GKO_ASSERT_NO_MPI_ERRORS(MPI_Comm_compare(
get(), other, &flag));
1484 return flag == MPI_IDENT;
1494 const communicator& comm);
1513 template <
typename ValueType>
1519 enum class create_type { allocate = 1, create = 2, dynamic_create = 3 };
1541 window(
window&& other) : window_{std::exchange(other.window_, MPI_WIN_NULL)}
1552 window_ = std::exchange(other.window_, MPI_WIN_NULL);
1567 window(std::shared_ptr<const Executor> exec, ValueType* base,
int num_elems,
1568 const communicator& comm,
const int disp_unit =
sizeof(ValueType),
1569 MPI_Info input_info = MPI_INFO_NULL,
1572 auto guard = exec->get_scoped_device_id_guard();
1573 unsigned size = num_elems *
sizeof(ValueType);
1574 if (c_type == create_type::create) {
1575 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_create(
1576 base, size, disp_unit, input_info, comm.
get(), &this->window_));
1577 }
else if (c_type == create_type::dynamic_create) {
1578 GKO_ASSERT_NO_MPI_ERRORS(
1579 MPI_Win_create_dynamic(input_info, comm.
get(), &this->window_));
1580 }
else if (c_type == create_type::allocate) {
1581 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_allocate(
1582 size, disp_unit, input_info, comm.
get(), base, &this->window_));
1584 GKO_NOT_IMPLEMENTED;
1603 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_fence(assert, this->window_));
1615 int assert = 0)
const
1617 if (lock_t == lock_type::shared) {
1618 GKO_ASSERT_NO_MPI_ERRORS(
1619 MPI_Win_lock(MPI_LOCK_SHARED, rank, assert, this->window_));
1620 }
else if (lock_t == lock_type::exclusive) {
1621 GKO_ASSERT_NO_MPI_ERRORS(
1622 MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, assert, this->window_));
1624 GKO_NOT_IMPLEMENTED;
1636 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_unlock(rank, this->window_));
1647 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_lock_all(assert, this->window_));
1656 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_unlock_all(this->window_));
1667 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush(rank, this->window_));
1678 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local(rank, this->window_));
1687 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_all(this->window_));
1696 GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_flush_local_all(this->window_));
1702 void sync()
const { GKO_ASSERT_NO_MPI_ERRORS(MPI_Win_sync(this->window_)); }
1709 if (this->window_ && this->window_ != MPI_WIN_NULL) {
1710 MPI_Win_free(&this->window_);
1724 template <
typename PutType>
1725 void put(std::shared_ptr<const Executor> exec,
const PutType* origin_buffer,
1726 const int origin_count,
const int target_rank,
1727 const unsigned int target_disp,
const int target_count)
const
1729 auto guard = exec->get_scoped_device_id_guard();
1730 GKO_ASSERT_NO_MPI_ERRORS(
1732 target_rank, target_disp, target_count,
1748 template <
typename PutType>
1750 const PutType* origin_buffer,
const int origin_count,
1751 const int target_rank,
const unsigned int target_disp,
1752 const int target_count)
const
1754 auto guard = exec->get_scoped_device_id_guard();
1756 GKO_ASSERT_NO_MPI_ERRORS(MPI_Rput(
1758 target_rank, target_disp, target_count,
1774 template <
typename PutType>
1776 const PutType* origin_buffer,
const int origin_count,
1777 const int target_rank,
const unsigned int target_disp,
1778 const int target_count, MPI_Op operation)
const
1780 auto guard = exec->get_scoped_device_id_guard();
1781 GKO_ASSERT_NO_MPI_ERRORS(MPI_Accumulate(
1783 target_rank, target_disp, target_count,
1800 template <
typename PutType>
1802 const PutType* origin_buffer,
const int origin_count,
1803 const int target_rank,
const unsigned int target_disp,
1804 const int target_count, MPI_Op operation)
const
1806 auto guard = exec->get_scoped_device_id_guard();
1808 GKO_ASSERT_NO_MPI_ERRORS(MPI_Raccumulate(
1810 target_rank, target_disp, target_count,
1826 template <
typename GetType>
1827 void get(std::shared_ptr<const Executor> exec, GetType* origin_buffer,
1828 const int origin_count,
const int target_rank,
1829 const unsigned int target_disp,
const int target_count)
const
1831 auto guard = exec->get_scoped_device_id_guard();
1832 GKO_ASSERT_NO_MPI_ERRORS(
1834 target_rank, target_disp, target_count,
1850 template <
typename GetType>
1851 request r_get(std::shared_ptr<const Executor> exec, GetType* origin_buffer,
1852 const int origin_count,
const int target_rank,
1853 const unsigned int target_disp,
const int target_count)
const
1855 auto guard = exec->get_scoped_device_id_guard();
1857 GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget(
1859 target_rank, target_disp, target_count,
1877 template <
typename GetType>
1879 GetType* origin_buffer,
const int origin_count,
1880 GetType* result_buffer,
const int result_count,
1881 const int target_rank,
const unsigned int target_disp,
1882 const int target_count, MPI_Op operation)
const
1884 auto guard = exec->get_scoped_device_id_guard();
1885 GKO_ASSERT_NO_MPI_ERRORS(MPI_Get_accumulate(
1888 target_rank, target_disp, target_count,
1907 template <
typename GetType>
1909 GetType* origin_buffer,
const int origin_count,
1910 GetType* result_buffer,
const int result_count,
1911 const int target_rank,
1912 const unsigned int target_disp,
1913 const int target_count, MPI_Op operation)
const
1915 auto guard = exec->get_scoped_device_id_guard();
1917 GKO_ASSERT_NO_MPI_ERRORS(MPI_Rget_accumulate(
1920 target_rank, target_disp, target_count,
1936 template <
typename GetType>
1938 GetType* origin_buffer, GetType* result_buffer,
1939 const int target_rank,
const unsigned int target_disp,
1940 MPI_Op operation)
const
1942 auto guard = exec->get_scoped_device_id_guard();
1943 GKO_ASSERT_NO_MPI_ERRORS(MPI_Fetch_and_op(
1945 target_rank, target_disp, operation, this->
get_window()));
1958 #endif // GKO_HAVE_MPI
1961 #endif // GKO_PUBLIC_CORE_BASE_MPI_HPP_