doc/develop/matrix_8hpp_source.html

// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors

//

// SPDX-License-Identifier: BSD-3-Clause


#ifndef GKO_PUBLIC_CORE_DISTRIBUTED_MATRIX_HPP_

#define GKO_PUBLIC_CORE_DISTRIBUTED_MATRIX_HPP_


#include <ginkgo/config.hpp>


#if GINKGO_BUILD_MPI


#include <ginkgo/core/base/dense_cache.hpp>

#include <ginkgo/core/base/lin_op.hpp>

#include <ginkgo/core/base/mpi.hpp>

#include <ginkgo/core/base/std_extensions.hpp>

#include <ginkgo/core/distributed/base.hpp>

#include <ginkgo/core/distributed/index_map.hpp>


namespace gko {

namespace matrix {


template <typename ValueType, typename IndexType>

class Csr;


}


namespace multigrid {


template <typename ValueType, typename IndexType>

class Pgm;


}


namespace detail {


template <typename Builder, typename ValueType, typename IndexType,

          typename = void>

struct is_matrix_type_builder : std::false_type {};


template <typename Builder, typename ValueType, typename IndexType>

struct is_matrix_type_builder<

    Builder, ValueType, IndexType,

    xstd::void_t<

        decltype(std::declval<Builder>().template create<ValueType, IndexType>(

            std::declval<std::shared_ptr<const Executor>>()))>>

    : std::true_type {};


template <template <typename, typename> class MatrixType,

          typename... CreateArgs>

struct MatrixTypeBuilderFromValueAndIndex {

    template <typename ValueType, typename IndexType, std::size_t... I>

    auto create_impl(std::shared_ptr<const Executor> exec,

                     std::index_sequence<I...>)

    {

        return MatrixType<ValueType, IndexType>::create(

            exec, std::get<I>(create_args)...);

    }


    template <typename ValueType, typename IndexType>

    auto create(std::shared_ptr<const Executor> exec)

    {

        // with c++17 we could use std::apply

        static constexpr auto size = sizeof...(CreateArgs);

        return create_impl<ValueType, IndexType>(

            std::move(exec), std::make_index_sequence<size>{});

    }


    std::tuple<CreateArgs...> create_args;

};


}  // namespace detail


template <template <typename, typename> class MatrixType, typename... Args>

auto with_matrix_type(Args&&... create_args)

{

    return detail::MatrixTypeBuilderFromValueAndIndex<MatrixType, Args...>{

        std::forward_as_tuple(create_args...)};

}


namespace experimental {

namespace distributed {


enum class assembly_mode { communicate, local_only };


template <typename LocalIndexType, typename GlobalIndexType>

class Partition;

template <typename ValueType>

class Vector;


template <typename ValueType = default_precision,

          typename LocalIndexType = int32, typename GlobalIndexType = int64>

class Matrix

    : public EnableLinOp<Matrix<ValueType, LocalIndexType, GlobalIndexType>>,

      public ConvertibleTo<

          Matrix<next_precision<ValueType>, LocalIndexType, GlobalIndexType>>,

#if GINKGO_ENABLE_HALF

      public ConvertibleTo<Matrix<next_precision<next_precision<ValueType>>,

                                  LocalIndexType, GlobalIndexType>>,

#endif

      public DistributedBase {

    friend class EnablePolymorphicObject<Matrix, LinOp>;

    friend class Matrix<previous_precision<ValueType>, LocalIndexType,

                        GlobalIndexType>;

    friend class multigrid::Pgm<ValueType, LocalIndexType>;


public:

    using value_type = ValueType;

    using index_type = GlobalIndexType;

    using local_index_type = LocalIndexType;

    using global_index_type = GlobalIndexType;

    using global_vector_type =

        gko::experimental::distributed::Vector<ValueType>;

    using local_vector_type = typename global_vector_type::local_vector_type;


    using EnableLinOp<Matrix>::convert_to;

    using EnableLinOp<Matrix>::move_to;

    using ConvertibleTo<Matrix<next_precision<ValueType>, LocalIndexType,

                               GlobalIndexType>>::convert_to;

    using ConvertibleTo<Matrix<next_precision<ValueType>, LocalIndexType,

                               GlobalIndexType>>::move_to;


    void convert_to(Matrix<next_precision<value_type>, local_index_type,

                           global_index_type>* result) const override;


    void move_to(Matrix<next_precision<value_type>, local_index_type,

                        global_index_type>* result) override;

#if GINKGO_ENABLE_HALF

    friend class Matrix<previous_precision<previous_precision<ValueType>>,

                        LocalIndexType, GlobalIndexType>;

    using ConvertibleTo<

        Matrix<next_precision<next_precision<value_type>>, local_index_type,

               global_index_type>>::convert_to;

    using ConvertibleTo<Matrix<next_precision<next_precision<value_type>>,

                               local_index_type, global_index_type>>::move_to;


    void convert_to(

        Matrix<next_precision<next_precision<value_type>>, local_index_type,

               global_index_type>* result) const override;


    void move_to(Matrix<next_precision<next_precision<value_type>>,

                        local_index_type, global_index_type>* result) override;


#endif


    void read_distributed(

        const device_matrix_data<value_type, global_index_type>& data,

        std::shared_ptr<const Partition<local_index_type, global_index_type>>

            partition,

        assembly_mode assembly_type = assembly_mode::local_only);


    void read_distributed(

        const matrix_data<value_type, global_index_type>& data,

        std::shared_ptr<const Partition<local_index_type, global_index_type>>

            partition,

        assembly_mode assembly_type = assembly_mode::local_only);


    void read_distributed(

        const device_matrix_data<value_type, global_index_type>& data,

        std::shared_ptr<const Partition<local_index_type, global_index_type>>

            row_partition,

        std::shared_ptr<const Partition<local_index_type, global_index_type>>

            col_partition,

        assembly_mode assembly_type = assembly_mode::local_only);


    void read_distributed(

        const matrix_data<value_type, global_index_type>& data,

        std::shared_ptr<const Partition<local_index_type, global_index_type>>

            row_partition,

        std::shared_ptr<const Partition<local_index_type, global_index_type>>

            col_partition,

        assembly_mode assembly_type = assembly_mode::local_only);


    std::shared_ptr<const LinOp> get_local_matrix() const { return local_mtx_; }


    std::shared_ptr<const LinOp> get_non_local_matrix() const

    {

        return non_local_mtx_;

    }


    Matrix(const Matrix& other);


    Matrix(Matrix&& other) noexcept;


    Matrix& operator=(const Matrix& other);


    Matrix& operator=(Matrix&& other);


    static std::unique_ptr<Matrix> create(std::shared_ptr<const Executor> exec,

                                          mpi::communicator comm);


    template <typename MatrixType,

              typename = std::enable_if_t<gko::detail::is_matrix_type_builder<

                  MatrixType, ValueType, LocalIndexType>::value>>

    static std::unique_ptr<Matrix> create(std::shared_ptr<const Executor> exec,

                                          mpi::communicator comm,

                                          MatrixType matrix_template)

    {

        return create(

            exec, comm,

            matrix_template.template create<ValueType, LocalIndexType>(exec));

    }


    template <typename LocalMatrixType, typename NonLocalMatrixType,

              typename = std::enable_if_t<

                  gko::detail::is_matrix_type_builder<

                      LocalMatrixType, ValueType, LocalIndexType>::value &&

                  gko::detail::is_matrix_type_builder<

                      NonLocalMatrixType, ValueType, LocalIndexType>::value>>

    static std::unique_ptr<Matrix> create(

        std::shared_ptr<const Executor> exec, mpi::communicator comm,

        LocalMatrixType local_matrix_template,

        NonLocalMatrixType non_local_matrix_template)

    {

        return create(

            exec, comm,

            local_matrix_template.template create<ValueType, LocalIndexType>(

                exec),

            non_local_matrix_template

                .template create<ValueType, LocalIndexType>(exec));

    }


    static std::unique_ptr<Matrix> create(

        std::shared_ptr<const Executor> exec, mpi::communicator comm,

        ptr_param<const LinOp> matrix_template);


    static std::unique_ptr<Matrix> create(

        std::shared_ptr<const Executor> exec, mpi::communicator comm,

        ptr_param<const LinOp> local_matrix_template,

        ptr_param<const LinOp> non_local_matrix_template);


    static std::unique_ptr<Matrix> create(std::shared_ptr<const Executor> exec,

                                          mpi::communicator comm, dim<2> size,

                                          std::shared_ptr<LinOp> local_linop);


    [[deprecated(

        "Please use the overload with an index_map instead.")]] static std::

        unique_ptr<Matrix>

        create(std::shared_ptr<const Executor> exec, mpi::communicator comm,

               dim<2> size, std::shared_ptr<LinOp> local_linop,

               std::shared_ptr<LinOp> non_local_linop,

               std::vector<comm_index_type> recv_sizes,

               std::vector<comm_index_type> recv_offsets,

               array<local_index_type> recv_gather_idxs);


    static std::unique_ptr<Matrix> create(

        std::shared_ptr<const Executor> exec, mpi::communicator comm,

        index_map<local_index_type, global_index_type> imap,

        std::shared_ptr<LinOp> local_linop,

        std::shared_ptr<LinOp> non_local_linop);


    void col_scale(ptr_param<const global_vector_type> scaling_factors);


    void row_scale(ptr_param<const global_vector_type> scaling_factors);


protected:

    explicit Matrix(std::shared_ptr<const Executor> exec,

                    mpi::communicator comm);


    explicit Matrix(std::shared_ptr<const Executor> exec,

                    mpi::communicator comm,

                    ptr_param<const LinOp> local_matrix_template,

                    ptr_param<const LinOp> non_local_matrix_template);


    explicit Matrix(std::shared_ptr<const Executor> exec,

                    mpi::communicator comm, dim<2> size,

                    std::shared_ptr<LinOp> local_linop);


    explicit Matrix(std::shared_ptr<const Executor> exec,

                    mpi::communicator comm,

                    index_map<local_index_type, global_index_type> imap,

                    std::shared_ptr<LinOp> local_linop,

                    std::shared_ptr<LinOp> non_local_linop);


    mpi::request communicate(const local_vector_type* local_b) const;


    void apply_impl(const LinOp* b, LinOp* x) const override;


    void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta,

                    LinOp* x) const override;


private:

    index_map<local_index_type, global_index_type> imap_;

    std::vector<comm_index_type> send_offsets_;

    std::vector<comm_index_type> send_sizes_;

    std::vector<comm_index_type> recv_offsets_;

    std::vector<comm_index_type> recv_sizes_;

    array<local_index_type> gather_idxs_;

    gko::detail::DenseCache<value_type> one_scalar_;

    gko::detail::DenseCache<value_type> host_send_buffer_;

    gko::detail::DenseCache<value_type> host_recv_buffer_;

    gko::detail::DenseCache<value_type> send_buffer_;

    gko::detail::DenseCache<value_type> recv_buffer_;

    std::shared_ptr<LinOp> local_mtx_;

    std::shared_ptr<LinOp> non_local_mtx_;

};


}  // namespace distributed

}  // namespace experimental

}  // namespace gko


#endif


#endif  // GKO_PUBLIC_CORE_DISTRIBUTED_MATRIX_HPP_