Ginkgo  Generated from pipelines/2216270019 branch based on develop. Ginkgo version 1.12.0
A numerical linear algebra library targeting many-core architectures
vector.hpp
1 // SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
2 //
3 // SPDX-License-Identifier: BSD-3-Clause
4 
5 #ifndef GKO_PUBLIC_CORE_DISTRIBUTED_VECTOR_HPP_
6 #define GKO_PUBLIC_CORE_DISTRIBUTED_VECTOR_HPP_
7 
8 
9 #include <ginkgo/config.hpp>
10 
11 
12 #if GINKGO_BUILD_MPI
13 
14 
15 #include <ginkgo/core/base/dense_cache.hpp>
16 #include <ginkgo/core/base/lin_op.hpp>
17 #include <ginkgo/core/base/mpi.hpp>
18 #include <ginkgo/core/distributed/base.hpp>
19 #include <ginkgo/core/matrix/dense.hpp>
20 
21 
22 namespace gko {
23 namespace experimental {
24 namespace distributed {
25 namespace detail {
26 
27 
28 template <typename ValueType>
29 class VectorCache;
30 
31 
32 } // namespace detail
33 
34 
35 template <typename LocalIndexType, typename GlobalIndexType>
36 class Partition;
37 
38 
66 template <typename ValueType = double>
67 class Vector
68  : public EnableLinOp<Vector<ValueType>>,
69  public ConvertibleTo<Vector<next_precision<ValueType>>>,
70 #if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
71  public ConvertibleTo<Vector<next_precision<ValueType, 2>>>,
72 #endif
73 #if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
74  public ConvertibleTo<Vector<next_precision<ValueType, 3>>>,
75 #endif
76  public EnableAbsoluteComputation<remove_complex<Vector<ValueType>>>,
77  public DistributedBase {
78  friend class EnablePolymorphicObject<Vector, LinOp>;
79  friend class Vector<to_complex<ValueType>>;
80  friend class Vector<remove_complex<ValueType>>;
81  friend class Vector<previous_precision<ValueType>>;
82  friend class detail::VectorCache<ValueType>;
83  GKO_ASSERT_SUPPORTED_VALUE_TYPE;
84 
85 public:
88  using ConvertibleTo<Vector<next_precision<ValueType>>>::convert_to;
89  using ConvertibleTo<Vector<next_precision<ValueType>>>::move_to;
90 
91  using value_type = ValueType;
92  using absolute_type = remove_complex<Vector>;
93  using real_type = absolute_type;
94  using complex_type = Vector<to_complex<value_type>>;
95  using local_vector_type = gko::matrix::Dense<value_type>;
96 
103  static std::unique_ptr<Vector> create_with_config_of(
104  ptr_param<const Vector> other);
105 
106 
118  static std::unique_ptr<Vector> create_with_type_of(
119  ptr_param<const Vector> other, std::shared_ptr<const Executor> exec);
120 
133  static std::unique_ptr<Vector> create_with_type_of(
134  ptr_param<const Vector> other, std::shared_ptr<const Executor> exec,
135  const dim<2>& global_size, const dim<2>& local_size, size_type stride);
136 
151  void read_distributed(const device_matrix_data<ValueType, int64>& data,
152  ptr_param<const Partition<int64, int64>> partition);
153 
154  void read_distributed(const device_matrix_data<ValueType, int64>& data,
155  ptr_param<const Partition<int32, int64>> partition);
156 
157  void read_distributed(const device_matrix_data<ValueType, int32>& data,
158  ptr_param<const Partition<int32, int32>> partition);
159 
169  void read_distributed(const matrix_data<ValueType, int64>& data,
170  ptr_param<const Partition<int64, int64>> partition);
171 
172  void read_distributed(const matrix_data<ValueType, int64>& data,
173  ptr_param<const Partition<int32, int64>> partition);
174 
175  void read_distributed(const matrix_data<ValueType, int32>& data,
176  ptr_param<const Partition<int32, int32>> partition);
177 
178  void convert_to(Vector<next_precision<ValueType>>* result) const override;
179 
180  void move_to(Vector<next_precision<ValueType>>* result) override;
181 
182 #if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
183  friend class Vector<previous_precision<ValueType, 2>>;
184  using ConvertibleTo<Vector<next_precision<ValueType, 2>>>::convert_to;
185  using ConvertibleTo<Vector<next_precision<ValueType, 2>>>::move_to;
186 
187  void convert_to(
188  Vector<next_precision<ValueType, 2>>* result) const override;
189 
190  void move_to(Vector<next_precision<ValueType, 2>>* result) override;
191 #endif
192 
193 #if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
194  friend class Vector<previous_precision<ValueType, 3>>;
195  using ConvertibleTo<Vector<next_precision<ValueType, 3>>>::convert_to;
196  using ConvertibleTo<Vector<next_precision<ValueType, 3>>>::move_to;
197 
198  void convert_to(
199  Vector<next_precision<ValueType, 3>>* result) const override;
200 
201  void move_to(Vector<next_precision<ValueType, 3>>* result) override;
202 #endif
203 
204  std::unique_ptr<absolute_type> compute_absolute() const override;
205 
206  void compute_absolute_inplace() override;
207 
212  std::unique_ptr<complex_type> make_complex() const;
213 
219  void make_complex(ptr_param<complex_type> result) const;
220 
225  std::unique_ptr<real_type> get_real() const;
226 
230  void get_real(ptr_param<real_type> result) const;
231 
236  std::unique_ptr<real_type> get_imag() const;
237 
242  void get_imag(ptr_param<real_type> result) const;
243 
249  void fill(ValueType value);
250 
260  void scale(ptr_param<const LinOp> alpha);
261 
271  void inv_scale(ptr_param<const LinOp> alpha);
272 
282  void add_scaled(ptr_param<const LinOp> alpha, ptr_param<const LinOp> b);
283 
292  void sub_scaled(ptr_param<const LinOp> alpha, ptr_param<const LinOp> b);
293 
303  void compute_dot(ptr_param<const LinOp> b, ptr_param<LinOp> result) const;
304 
317  void compute_dot(ptr_param<const LinOp> b, ptr_param<LinOp> result,
318  array<char>& tmp) const;
319 
329  void compute_conj_dot(ptr_param<const LinOp> b,
330  ptr_param<LinOp> result) const;
331 
344  void compute_conj_dot(ptr_param<const LinOp> b, ptr_param<LinOp> result,
345  array<char>& tmp) const;
346 
355  void compute_squared_norm2(ptr_param<LinOp> result) const;
356 
368  void compute_squared_norm2(ptr_param<LinOp> result, array<char>& tmp) const;
369 
378  void compute_norm2(ptr_param<LinOp> result) const;
379 
391  void compute_norm2(ptr_param<LinOp> result, array<char>& tmp) const;
392 
400  void compute_norm1(ptr_param<LinOp> result) const;
401 
413  void compute_norm1(ptr_param<LinOp> result, array<char>& tmp) const;
414 
423  void compute_mean(ptr_param<LinOp> result) const;
424 
436  void compute_mean(ptr_param<LinOp> result, array<char>& tmp) const;
437 
448  value_type& at_local(size_type row, size_type col) noexcept;
449 
453  value_type at_local(size_type row, size_type col) const noexcept;
454 
469  ValueType& at_local(size_type idx) noexcept;
470 
474  ValueType at_local(size_type idx) const noexcept;
475 
481  value_type* get_local_values();
482 
490  const value_type* get_const_local_values() const;
491 
497  const local_vector_type* get_local_vector() const;
498 
506  std::unique_ptr<const real_type> create_real_view() const;
507 
511  std::unique_ptr<real_type> create_real_view();
512 
522  std::unique_ptr<Vector> create_submatrix(local_span rows,
523  local_span columns,
524  dim<2> global_size);
525 
526  size_type get_stride() const noexcept { return local_.get_stride(); }
527 
539  static std::unique_ptr<Vector> create(std::shared_ptr<const Executor> exec,
540  mpi::communicator comm,
541  dim<2> global_size, dim<2> local_size,
542  size_type stride);
543 
555  static std::unique_ptr<Vector> create(std::shared_ptr<const Executor> exec,
556  mpi::communicator comm,
557  dim<2> global_size = {},
558  dim<2> local_size = {});
559 
577  static std::unique_ptr<Vector> create(
578  std::shared_ptr<const Executor> exec, mpi::communicator comm,
579  dim<2> global_size, std::unique_ptr<local_vector_type> local_vector);
580 
599  static std::unique_ptr<Vector> create(
600  std::shared_ptr<const Executor> exec, mpi::communicator comm,
601  std::unique_ptr<local_vector_type> local_vector);
602 
615  static std::unique_ptr<const Vector> create_const(
616  std::shared_ptr<const Executor> exec, mpi::communicator comm,
617  dim<2> global_size,
618  std::unique_ptr<const local_vector_type> local_vector);
619 
632  static std::unique_ptr<const Vector> create_const(
633  std::shared_ptr<const Executor> exec, mpi::communicator comm,
634  std::unique_ptr<const local_vector_type> local_vector);
635 
636 protected:
637  Vector(std::shared_ptr<const Executor> exec, mpi::communicator comm,
638  dim<2> global_size, dim<2> local_size, size_type stride);
639 
640  explicit Vector(std::shared_ptr<const Executor> exec,
641  mpi::communicator comm, dim<2> global_size = {},
642  dim<2> local_size = {});
643 
644  Vector(std::shared_ptr<const Executor> exec, mpi::communicator comm,
645  dim<2> global_size, std::unique_ptr<local_vector_type> local_vector);
646 
647  Vector(std::shared_ptr<const Executor> exec, mpi::communicator comm,
648  std::unique_ptr<local_vector_type> local_vector);
649 
650  void resize(dim<2> global_size, dim<2> local_size);
651 
652  template <typename LocalIndexType, typename GlobalIndexType>
653  void read_distributed_impl(
654  const device_matrix_data<ValueType, GlobalIndexType>& data,
655  const Partition<LocalIndexType, GlobalIndexType>* partition);
656 
657  void apply_impl(const LinOp*, LinOp*) const override;
658 
659  void apply_impl(const LinOp*, const LinOp*, const LinOp*,
660  LinOp*) const override;
661 
668  virtual std::unique_ptr<Vector> create_with_same_config() const;
669 
682  virtual std::unique_ptr<Vector> create_with_type_of_impl(
683  std::shared_ptr<const Executor> exec, const dim<2>& global_size,
684  const dim<2>& local_size, size_type stride) const;
685 
689  virtual std::unique_ptr<Vector> create_submatrix_impl(local_span rows,
690  local_span columns,
691  dim<2> global_size);
692 
693 private:
694  local_vector_type local_;
695  ::gko::detail::DenseCache<ValueType> host_reduction_buffer_;
696  ::gko::detail::DenseCache<remove_complex<ValueType>> host_norm_buffer_;
697 };
698 
699 
700 } // namespace distributed
701 } // namespace experimental
702 
703 
704 namespace detail {
705 
706 
707 template <typename TargetType>
708 struct conversion_target_helper;
709 
710 
720 template <typename ValueType>
721 struct conversion_target_helper<experimental::distributed::Vector<ValueType>> {
722  using target_type = experimental::distributed::Vector<ValueType>;
723  using source_type =
724  experimental::distributed::Vector<previous_precision<ValueType>>;
725 
726  static std::unique_ptr<target_type> create_empty(const source_type* source)
727  {
728  return target_type::create(source->get_executor(),
729  source->get_communicator());
730  }
731 
732  // Allow to create_empty of the same type
733  // For distributed case, next<next<V>> will be V in the candidate list.
734  // TODO: decide to whether to add this or add condition to the list
735  static std::unique_ptr<target_type> create_empty(const target_type* source)
736  {
737  return target_type::create(source->get_executor(),
738  source->get_communicator());
739  }
740 
741 #if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
742  using snd_source_type =
743  experimental::distributed::Vector<previous_precision<ValueType, 2>>;
744 
745  static std::unique_ptr<target_type> create_empty(
746  const snd_source_type* source)
747  {
748  return target_type::create(source->get_executor(),
749  source->get_communicator());
750  }
751 #endif
752 #if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
753  using trd_source_type =
754  experimental::distributed::Vector<previous_precision<ValueType, 3>>;
755 
756  static std::unique_ptr<target_type> create_empty(
757  const trd_source_type* source)
758  {
759  return target_type::create(source->get_executor(),
760  source->get_communicator());
761  }
762 #endif
763 };
764 
765 
766 } // namespace detail
767 } // namespace gko
768 
769 
770 #endif // GINKGO_BUILD_MPI
771 
772 
773 #endif // GKO_PUBLIC_CORE_DISTRIBUTED_VECTOR_HPP_
gko::experimental::distributed::Vector::create_submatrix
std::unique_ptr< Vector > create_submatrix(local_span rows, local_span columns, dim< 2 > global_size)
Creates a view of a submatrix of this vector.
gko::EnablePolymorphicAssignment< ConcreteLinOp >::move_to
void move_to(result_type *result) override
Definition: polymorphic_object.hpp:751
gko::EnablePolymorphicAssignment< ConcreteLinOp >::convert_to
void convert_to(result_type *result) const override
Definition: polymorphic_object.hpp:749
gko::experimental::distributed::Vector::at_local
value_type & at_local(size_type row, size_type col) noexcept
Returns a single element of the multi-vector.
gko::matrix::Dense< value_type >
gko::experimental::distributed::Vector::make_complex
std::unique_ptr< complex_type > make_complex() const
Creates a complex copy of the original vectors.
gko::experimental::distributed::Vector::create_with_type_of
static std::unique_ptr< Vector > create_with_type_of(ptr_param< const Vector > other, std::shared_ptr< const Executor > exec)
Creates an empty Vector with the same type as another Vector, but on a different executor.
gko::experimental::distributed::Vector::compute_squared_norm2
void compute_squared_norm2(ptr_param< LinOp > result) const
Computes the square of the column-wise Euclidean ( ) norm of this (multi-)vector using a global reduc...
gko::size_type
std::size_t size_type
Integral type used for allocation quantities.
Definition: types.hpp:90
gko::experimental::distributed::Vector::create
static std::unique_ptr< Vector > create(std::shared_ptr< const Executor > exec, mpi::communicator comm, dim< 2 > global_size, dim< 2 > local_size, size_type stride)
Creates an empty distributed vector with a specified size.
gko::experimental::distributed::Vector::read_distributed
void read_distributed(const device_matrix_data< ValueType, int64 > &data, ptr_param< const Partition< int64, int64 >> partition)
Reads a vector from the device_matrix_data structure and a global row partition.
gko::experimental::distributed::Vector::create_real_view
std::unique_ptr< const real_type > create_real_view() const
Create a real view of the (potentially) complex original multi-vector.
gko::experimental::distributed::Vector::compute_norm1
void compute_norm1(ptr_param< LinOp > result) const
Computes the column-wise (L^1) norm of this (multi-)vector.
gko::experimental::distributed::Vector::get_real
std::unique_ptr< real_type > get_real() const
Creates new real vectors and extracts the real part of the original vectors into that.
gko::experimental::distributed::Vector::get_local_values
value_type * get_local_values()
Returns a pointer to the array of local values of the multi-vector.
gko::matrix::Dense::get_stride
size_type get_stride() const noexcept
Returns the stride of the matrix.
Definition: dense.hpp:880
gko::experimental::distributed::Vector::get_local_vector
const local_vector_type * get_local_vector() const
Direct (read) access to the underlying local local_vector_type vectors.
gko
The Ginkgo namespace.
Definition: abstract_factory.hpp:20
gko::experimental::distributed::Vector::fill
void fill(ValueType value)
Fill the distributed vectors with a given value.
gko::experimental::distributed::Vector::compute_mean
void compute_mean(ptr_param< LinOp > result) const
Computes the column-wise mean of this (multi-)vector using a global reduction.
gko::experimental::distributed::Vector::create_with_config_of
static std::unique_ptr< Vector > create_with_config_of(ptr_param< const Vector > other)
Creates a distributed Vector with the same size and stride as another Vector.
gko::experimental::distributed::Vector::compute_absolute
std::unique_ptr< absolute_type > compute_absolute() const override
Gets the AbsoluteLinOp.
gko::experimental::distributed::Vector::add_scaled
void add_scaled(ptr_param< const LinOp > alpha, ptr_param< const LinOp > b)
Adds b scaled by alpha to the vectors (aka: BLAS axpy).
gko::experimental::distributed::Vector::compute_dot
void compute_dot(ptr_param< const LinOp > b, ptr_param< LinOp > result) const
Computes the column-wise dot product of this (multi-)vector and b using a global reduction.
gko::experimental::distributed::Vector::compute_conj_dot
void compute_conj_dot(ptr_param< const LinOp > b, ptr_param< LinOp > result) const
Computes the column-wise dot product of this (multi-)vector and conj(b) using a global reduction.
gko::previous_precision
typename detail::find_precision_impl< T, -step >::type previous_precision
Obtains the previous move type of T in the singly-linked precision corresponding bfloat16/half.
Definition: math.hpp:473
gko::experimental::distributed::Vector::compute_absolute_inplace
void compute_absolute_inplace() override
Compute absolute inplace on each element.
gko::experimental::distributed::Vector::create_const
static std::unique_ptr< const Vector > create_const(std::shared_ptr< const Executor > exec, mpi::communicator comm, dim< 2 > global_size, std::unique_ptr< const local_vector_type > local_vector)
Creates a constant (immutable) distributed Vector from a constant local vector.
gko::experimental::distributed::Vector::get_imag
std::unique_ptr< real_type > get_imag() const
Creates new real vectors and extracts the imaginary part of the original vectors into that.
gko::experimental::distributed::Vector::compute_norm2
void compute_norm2(ptr_param< LinOp > result) const
Computes the Euclidean (L^2) norm of this (multi-)vector using a global reduction.
gko::experimental::distributed::Vector::get_const_local_values
const value_type * get_const_local_values() const
Returns a pointer to the array of local values of the multi-vector.
gko::remove_complex
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition: math.hpp:264
gko::experimental::distributed::Vector::inv_scale
void inv_scale(ptr_param< const LinOp > alpha)
Scales the vectors with the inverse of a scalar.
gko::experimental::distributed::Vector::sub_scaled
void sub_scaled(ptr_param< const LinOp > alpha, ptr_param< const LinOp > b)
Subtracts b scaled by alpha from the vectors (aka: BLAS axpy).
gko::LinOp::LinOp
LinOp(const LinOp &)=default
Copy-constructs a LinOp.
gko::to_complex
typename detail::to_complex_s< T >::type to_complex
Obtain the type which adds the complex of complex/scalar type or the template parameter of class by a...
Definition: math.hpp:283
gko::experimental::distributed::Vector::scale
void scale(ptr_param< const LinOp > alpha)
Scales the vectors with a scalar (aka: BLAS scal).