doc/develop/dense_8hpp_source.html

// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors

//

// SPDX-License-Identifier: BSD-3-Clause


#ifndef GKO_PUBLIC_CORE_MATRIX_DENSE_HPP_

#define GKO_PUBLIC_CORE_MATRIX_DENSE_HPP_


#include <initializer_list>

#include <type_traits>


#include <ginkgo/core/base/array.hpp>

#include <ginkgo/core/base/exception_helpers.hpp>

#include <ginkgo/core/base/executor.hpp>

#include <ginkgo/core/base/lin_op.hpp>

#include <ginkgo/core/base/range_accessors.hpp>

#include <ginkgo/core/base/types.hpp>

#include <ginkgo/core/base/utils.hpp>

#include <ginkgo/core/matrix/permutation.hpp>

#include <ginkgo/core/matrix/scaled_permutation.hpp>


namespace gko {

namespace experimental {

namespace distributed {


template <typename ValueType>

class Vector;


namespace detail {


template <typename ValueType>

class VectorCache;


}  // namespace detail

}  // namespace distributed

}  // namespace experimental


namespace matrix {


template <typename ValueType, typename IndexType>

class Coo;


template <typename ValueType, typename IndexType>

class Csr;


template <typename ValueType>

class Diagonal;


template <typename ValueType, typename IndexType>

class Ell;


template <typename ValueType, typename IndexType>

class Fbcsr;


template <typename ValueType, typename IndexType>

class Hybrid;


template <typename ValueType, typename IndexType>

class Sellp;


template <typename ValueType, typename IndexType>

class SparsityCsr;


template <typename ValueType = default_precision>

class Dense

    : public EnableLinOp<Dense<ValueType>>,

      public ConvertibleTo<Dense<next_precision<ValueType>>>,

#if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16

      public ConvertibleTo<Dense<next_precision<ValueType, 2>>>,

#endif

#if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16

      public ConvertibleTo<Dense<next_precision<ValueType, 3>>>,

#endif

      public ConvertibleTo<Coo<ValueType, int32>>,

      public ConvertibleTo<Coo<ValueType, int64>>,

      public ConvertibleTo<Csr<ValueType, int32>>,

      public ConvertibleTo<Csr<ValueType, int64>>,

      public ConvertibleTo<Ell<ValueType, int32>>,

      public ConvertibleTo<Ell<ValueType, int64>>,

      public ConvertibleTo<Fbcsr<ValueType, int32>>,

      public ConvertibleTo<Fbcsr<ValueType, int64>>,

      public ConvertibleTo<Hybrid<ValueType, int32>>,

      public ConvertibleTo<Hybrid<ValueType, int64>>,

      public ConvertibleTo<Sellp<ValueType, int32>>,

      public ConvertibleTo<Sellp<ValueType, int64>>,

      public ConvertibleTo<SparsityCsr<ValueType, int32>>,

      public ConvertibleTo<SparsityCsr<ValueType, int64>>,

      public DiagonalExtractable<ValueType>,

      public ReadableFromMatrixData<ValueType, int32>,

      public ReadableFromMatrixData<ValueType, int64>,

      public WritableToMatrixData<ValueType, int32>,

      public WritableToMatrixData<ValueType, int64>,

      public Transposable,

      public Permutable<int32>,

      public Permutable<int64>,

      public EnableAbsoluteComputation<remove_complex<Dense<ValueType>>>,

      public ScaledIdentityAddable {

    friend class EnablePolymorphicObject<Dense, LinOp>;

    friend class Coo<ValueType, int32>;

    friend class Coo<ValueType, int64>;

    friend class Csr<ValueType, int32>;

    friend class Csr<ValueType, int64>;

    friend class Diagonal<ValueType>;

    friend class Ell<ValueType, int32>;

    friend class Ell<ValueType, int64>;

    friend class Fbcsr<ValueType, int32>;

    friend class Fbcsr<ValueType, int64>;

    friend class Hybrid<ValueType, int32>;

    friend class Hybrid<ValueType, int64>;

    friend class Sellp<ValueType, int32>;

    friend class Sellp<ValueType, int64>;

    friend class SparsityCsr<ValueType, int32>;

    friend class SparsityCsr<ValueType, int64>;

    friend class Dense<to_complex<ValueType>>;

    friend class experimental::distributed::Vector<ValueType>;

    friend class experimental::distributed::detail::VectorCache<ValueType>;

    GKO_ASSERT_SUPPORTED_VALUE_TYPE;


public:

    using EnableLinOp<Dense>::convert_to;

    using EnableLinOp<Dense>::move_to;

    using ConvertibleTo<Dense<next_precision<ValueType>>>::convert_to;

    using ConvertibleTo<Dense<next_precision<ValueType>>>::move_to;

    using ConvertibleTo<Coo<ValueType, int32>>::convert_to;

    using ConvertibleTo<Coo<ValueType, int32>>::move_to;

    using ConvertibleTo<Coo<ValueType, int64>>::convert_to;

    using ConvertibleTo<Coo<ValueType, int64>>::move_to;

    using ConvertibleTo<Csr<ValueType, int32>>::convert_to;

    using ConvertibleTo<Csr<ValueType, int32>>::move_to;

    using ConvertibleTo<Csr<ValueType, int64>>::convert_to;

    using ConvertibleTo<Csr<ValueType, int64>>::move_to;

    using ConvertibleTo<Ell<ValueType, int32>>::convert_to;

    using ConvertibleTo<Ell<ValueType, int32>>::move_to;

    using ConvertibleTo<Ell<ValueType, int64>>::convert_to;

    using ConvertibleTo<Ell<ValueType, int64>>::move_to;

    using ConvertibleTo<Fbcsr<ValueType, int32>>::convert_to;

    using ConvertibleTo<Fbcsr<ValueType, int32>>::move_to;

    using ConvertibleTo<Fbcsr<ValueType, int64>>::convert_to;

    using ConvertibleTo<Fbcsr<ValueType, int64>>::move_to;

    using ConvertibleTo<Hybrid<ValueType, int32>>::convert_to;

    using ConvertibleTo<Hybrid<ValueType, int32>>::move_to;

    using ConvertibleTo<Hybrid<ValueType, int64>>::convert_to;

    using ConvertibleTo<Hybrid<ValueType, int64>>::move_to;

    using ConvertibleTo<Sellp<ValueType, int32>>::convert_to;

    using ConvertibleTo<Sellp<ValueType, int32>>::move_to;

    using ConvertibleTo<Sellp<ValueType, int64>>::convert_to;

    using ConvertibleTo<Sellp<ValueType, int64>>::move_to;

    using ConvertibleTo<SparsityCsr<ValueType, int32>>::convert_to;

    using ConvertibleTo<SparsityCsr<ValueType, int32>>::move_to;

    using ConvertibleTo<SparsityCsr<ValueType, int64>>::convert_to;

    using ConvertibleTo<SparsityCsr<ValueType, int64>>::move_to;

    using ReadableFromMatrixData<ValueType, int32>::read;

    using ReadableFromMatrixData<ValueType, int64>::read;


    using value_type = ValueType;

    using index_type = int64;

    using transposed_type = Dense<ValueType>;

    using mat_data = matrix_data<ValueType, int64>;

    using mat_data32 = matrix_data<ValueType, int32>;

    using device_mat_data = device_matrix_data<ValueType, int64>;

    using device_mat_data32 = device_matrix_data<ValueType, int32>;

    using absolute_type = remove_complex<Dense>;

    using real_type = absolute_type;

    using complex_type = to_complex<Dense>;


    using row_major_range = gko::range<gko::accessor::row_major<ValueType, 2>>;


    static std::unique_ptr<Dense> create_with_config_of(

        ptr_param<const Dense> other)

    {

        // De-referencing `other` before calling the functions (instead of

        // using operator `->`) is currently required to be compatible with

        // CUDA 10.1.

        // Otherwise, it results in a compile error.

        return (*other).create_with_same_config();

    }


    static std::unique_ptr<Dense> create_with_type_of(

        ptr_param<const Dense> other, std::shared_ptr<const Executor> exec,

        const dim<2>& size = dim<2>{})

    {

        // See create_with_config_of()

        return (*other).create_with_type_of_impl(exec, size, size[1]);

    }


    static std::unique_ptr<Dense> create_with_type_of(

        ptr_param<const Dense> other, std::shared_ptr<const Executor> exec,

        const dim<2>& size, size_type stride)

    {

        // See create_with_config_of()

        return (*other).create_with_type_of_impl(exec, size, stride);

    }


    static std::unique_ptr<Dense> create_with_type_of(

        ptr_param<const Dense> other, std::shared_ptr<const Executor> exec,

        const dim<2>& size, const dim<2>& local_size, size_type stride)

    {

        // See create_with_config_of()

        return (*other).create_with_type_of_impl(exec, size, stride);

    }


    static std::unique_ptr<Dense> create_view_of(ptr_param<Dense> other)

    {

        return other->create_view_of_impl();

    }


    static std::unique_ptr<const Dense> create_const_view_of(

        ptr_param<const Dense> other)

    {

        return other->create_const_view_of_impl();

    }


    friend class Dense<previous_precision<ValueType>>;


    void convert_to(Dense<next_precision<ValueType>>* result) const override;


    void move_to(Dense<next_precision<ValueType>>* result) override;


#if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16

    friend class Dense<previous_precision<ValueType, 2>>;

    using ConvertibleTo<Dense<next_precision<ValueType, 2>>>::convert_to;

    using ConvertibleTo<Dense<next_precision<ValueType, 2>>>::move_to;


    void convert_to(Dense<next_precision<ValueType, 2>>* result) const override;


    void move_to(Dense<next_precision<ValueType, 2>>* result) override;

#endif


#if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16

    friend class Dense<previous_precision<ValueType, 3>>;

    using ConvertibleTo<Dense<next_precision<ValueType, 3>>>::convert_to;

    using ConvertibleTo<Dense<next_precision<ValueType, 3>>>::move_to;


    void convert_to(Dense<next_precision<ValueType, 3>>* result) const override;


    void move_to(Dense<next_precision<ValueType, 3>>* result) override;

#endif


    void convert_to(Coo<ValueType, int32>* result) const override;


    void move_to(Coo<ValueType, int32>* result) override;


    void convert_to(Coo<ValueType, int64>* result) const override;


    void move_to(Coo<ValueType, int64>* result) override;


    void convert_to(Csr<ValueType, int32>* result) const override;


    void move_to(Csr<ValueType, int32>* result) override;


    void convert_to(Csr<ValueType, int64>* result) const override;


    void move_to(Csr<ValueType, int64>* result) override;


    void convert_to(Ell<ValueType, int32>* result) const override;


    void move_to(Ell<ValueType, int32>* result) override;


    void convert_to(Ell<ValueType, int64>* result) const override;


    void move_to(Ell<ValueType, int64>* result) override;


    void convert_to(Fbcsr<ValueType, int32>* result) const override;


    void move_to(Fbcsr<ValueType, int32>* result) override;


    void convert_to(Fbcsr<ValueType, int64>* result) const override;


    void move_to(Fbcsr<ValueType, int64>* result) override;


    void convert_to(Hybrid<ValueType, int32>* result) const override;


    void move_to(Hybrid<ValueType, int32>* result) override;


    void convert_to(Hybrid<ValueType, int64>* result) const override;


    void move_to(Hybrid<ValueType, int64>* result) override;


    void convert_to(Sellp<ValueType, int32>* result) const override;


    void move_to(Sellp<ValueType, int32>* result) override;


    void convert_to(Sellp<ValueType, int64>* result) const override;


    void move_to(Sellp<ValueType, int64>* result) override;


    void convert_to(SparsityCsr<ValueType, int32>* result) const override;


    void move_to(SparsityCsr<ValueType, int32>* result) override;


    void convert_to(SparsityCsr<ValueType, int64>* result) const override;


    void move_to(SparsityCsr<ValueType, int64>* result) override;


    void read(const mat_data& data) override;


    void read(const mat_data32& data) override;


    void read(const device_mat_data& data) override;


    void read(const device_mat_data32& data) override;


    void read(device_mat_data&& data) override;


    void read(device_mat_data32&& data) override;


    void write(mat_data& data) const override;


    void write(mat_data32& data) const override;


    std::unique_ptr<LinOp> transpose() const override;


    std::unique_ptr<LinOp> conj_transpose() const override;


    void transpose(ptr_param<Dense> output) const;


    void conj_transpose(ptr_param<Dense> output) const;


    void fill(const ValueType value);


    std::unique_ptr<Dense> permute(

        ptr_param<const Permutation<int32>> permutation,

        permute_mode mode = permute_mode::symmetric) const;


    std::unique_ptr<Dense> permute(

        ptr_param<const Permutation<int64>> permutation,

        permute_mode mode = permute_mode::symmetric) const;


    void permute(ptr_param<const Permutation<int32>> permutation,

                 ptr_param<Dense> output, permute_mode mode) const;


    void permute(ptr_param<const Permutation<int64>> permutation,

                 ptr_param<Dense> output, permute_mode mode) const;


    std::unique_ptr<Dense> permute(

        ptr_param<const Permutation<int32>> row_permutation,

        ptr_param<const Permutation<int32>> column_permutation,

        bool invert = false) const;


    std::unique_ptr<Dense> permute(

        ptr_param<const Permutation<int64>> row_permutation,

        ptr_param<const Permutation<int64>> column_permutation,

        bool invert = false) const;


    void permute(ptr_param<const Permutation<int32>> row_permutation,

                 ptr_param<const Permutation<int32>> column_permutation,

                 ptr_param<Dense> output, bool invert = false) const;


    void permute(ptr_param<const Permutation<int64>> row_permutation,

                 ptr_param<const Permutation<int64>> column_permutation,

                 ptr_param<Dense> output, bool invert = false) const;


    std::unique_ptr<Dense> scale_permute(

        ptr_param<const ScaledPermutation<value_type, int32>> permutation,

        permute_mode mode = permute_mode::symmetric) const;


    std::unique_ptr<Dense> scale_permute(

        ptr_param<const ScaledPermutation<value_type, int64>> permutation,

        permute_mode mode = permute_mode::symmetric) const;


    void scale_permute(

        ptr_param<const ScaledPermutation<value_type, int32>> permutation,

        ptr_param<Dense> output, permute_mode mode) const;


    void scale_permute(

        ptr_param<const ScaledPermutation<value_type, int64>> permutation,

        ptr_param<Dense> output, permute_mode mode) const;


    std::unique_ptr<Dense> scale_permute(

        ptr_param<const ScaledPermutation<value_type, int32>> row_permutation,

        ptr_param<const ScaledPermutation<value_type, int32>>

            column_permutation,

        bool invert = false) const;


    std::unique_ptr<Dense> scale_permute(

        ptr_param<const ScaledPermutation<value_type, int64>> row_permutation,

        ptr_param<const ScaledPermutation<value_type, int64>>

            column_permutation,

        bool invert = false) const;


    void scale_permute(

        ptr_param<const ScaledPermutation<value_type, int32>> row_permutation,

        ptr_param<const ScaledPermutation<value_type, int32>>

            column_permutation,

        ptr_param<Dense> output, bool invert = false) const;


    void scale_permute(

        ptr_param<const ScaledPermutation<value_type, int64>> row_permutation,

        ptr_param<const ScaledPermutation<value_type, int64>>

            column_permutation,

        ptr_param<Dense> output, bool invert = false) const;


    std::unique_ptr<LinOp> permute(

        const array<int32>* permutation_indices) const override;


    std::unique_ptr<LinOp> permute(

        const array<int64>* permutation_indices) const override;


    void permute(const array<int32>* permutation_indices,

                 ptr_param<Dense> output) const;


    void permute(const array<int64>* permutation_indices,

                 ptr_param<Dense> output) const;


    std::unique_ptr<LinOp> inverse_permute(

        const array<int32>* permutation_indices) const override;


    std::unique_ptr<LinOp> inverse_permute(

        const array<int64>* permutation_indices) const override;


    void inverse_permute(const array<int32>* permutation_indices,

                         ptr_param<Dense> output) const;


    void inverse_permute(const array<int64>* permutation_indices,

                         ptr_param<Dense> output) const;


    std::unique_ptr<LinOp> row_permute(

        const array<int32>* permutation_indices) const override;


    std::unique_ptr<LinOp> row_permute(

        const array<int64>* permutation_indices) const override;


    void row_permute(const array<int32>* permutation_indices,

                     ptr_param<Dense> output) const;


    void row_permute(const array<int64>* permutation_indices,

                     ptr_param<Dense> output) const;


    std::unique_ptr<Dense> row_gather(const array<int32>* gather_indices) const;


    std::unique_ptr<Dense> row_gather(const array<int64>* gather_indices) const;


    void row_gather(const array<int32>* gather_indices,

                    ptr_param<LinOp> row_collection) const;


    void row_gather(const array<int64>* gather_indices,

                    ptr_param<LinOp> row_collection) const;


    void row_gather(ptr_param<const LinOp> alpha,

                    const array<int32>* gather_indices,

                    ptr_param<const LinOp> beta,

                    ptr_param<LinOp> row_collection) const;


    void row_gather(ptr_param<const LinOp> alpha,

                    const array<int64>* gather_indices,

                    ptr_param<const LinOp> beta,

                    ptr_param<LinOp> row_collection) const;


    std::unique_ptr<LinOp> column_permute(

        const array<int32>* permutation_indices) const override;


    std::unique_ptr<LinOp> column_permute(

        const array<int64>* permutation_indices) const override;


    void column_permute(const array<int32>* permutation_indices,

                        ptr_param<Dense> output) const;


    void column_permute(const array<int64>* permutation_indices,

                        ptr_param<Dense> output) const;


    std::unique_ptr<LinOp> inverse_row_permute(

        const array<int32>* permutation_indices) const override;


    std::unique_ptr<LinOp> inverse_row_permute(

        const array<int64>* permutation_indices) const override;


    void inverse_row_permute(const array<int32>* permutation_indices,

                             ptr_param<Dense> output) const;


    void inverse_row_permute(const array<int64>* permutation_indices,

                             ptr_param<Dense> output) const;


    std::unique_ptr<LinOp> inverse_column_permute(

        const array<int32>* permutation_indices) const override;


    std::unique_ptr<LinOp> inverse_column_permute(

        const array<int64>* permutation_indices) const override;


    void inverse_column_permute(const array<int32>* permutation_indices,

                                ptr_param<Dense> output) const;


    void inverse_column_permute(const array<int64>* permutation_indices,

                                ptr_param<Dense> output) const;


    std::unique_ptr<Diagonal<ValueType>> extract_diagonal() const override;


    void extract_diagonal(ptr_param<Diagonal<ValueType>> output) const;


    std::unique_ptr<absolute_type> compute_absolute() const override;


    void compute_absolute(ptr_param<absolute_type> output) const;


    void compute_absolute_inplace() override;


    std::unique_ptr<complex_type> make_complex() const;


    void make_complex(ptr_param<complex_type> result) const;


    std::unique_ptr<real_type> get_real() const;


    void get_real(ptr_param<real_type> result) const;


    std::unique_ptr<real_type> get_imag() const;


    void get_imag(ptr_param<real_type> result) const;


    value_type* get_values() noexcept { return values_.get_data(); }


    const value_type* get_const_values() const noexcept

    {

        return values_.get_const_data();

    }


    size_type get_stride() const noexcept { return stride_; }


    size_type get_num_stored_elements() const noexcept

    {

        return values_.get_size();

    }


    value_type& at(size_type row, size_type col) noexcept

    {

        return values_.get_data()[linearize_index(row, col)];

    }


    value_type at(size_type row, size_type col) const noexcept

    {

        return values_.get_const_data()[linearize_index(row, col)];

    }


    ValueType& at(size_type idx) noexcept

    {

        return values_.get_data()[linearize_index(idx)];

    }


    ValueType at(size_type idx) const noexcept

    {

        return values_.get_const_data()[linearize_index(idx)];

    }


    void scale(ptr_param<const LinOp> alpha);


    void inv_scale(ptr_param<const LinOp> alpha);


    void add_scaled(ptr_param<const LinOp> alpha, ptr_param<const LinOp> b);


    void sub_scaled(ptr_param<const LinOp> alpha, ptr_param<const LinOp> b);


    void compute_dot(ptr_param<const LinOp> b, ptr_param<LinOp> result) const;


    void compute_dot(ptr_param<const LinOp> b, ptr_param<LinOp> result,

                     array<char>& tmp) const;


    void compute_conj_dot(ptr_param<const LinOp> b,

                          ptr_param<LinOp> result) const;


    void compute_conj_dot(ptr_param<const LinOp> b, ptr_param<LinOp> result,

                          array<char>& tmp) const;


    void compute_norm2(ptr_param<LinOp> result) const;


    void compute_norm2(ptr_param<LinOp> result, array<char>& tmp) const;


    void compute_norm1(ptr_param<LinOp> result) const;


    void compute_norm1(ptr_param<LinOp> result, array<char>& tmp) const;


    void compute_squared_norm2(ptr_param<LinOp> result) const;


    void compute_squared_norm2(ptr_param<LinOp> result, array<char>& tmp) const;


    void compute_mean(ptr_param<LinOp> result) const;


    void compute_mean(ptr_param<LinOp> result, array<char>& tmp) const;


    std::unique_ptr<Dense> create_submatrix(const span& rows,

                                            const span& columns,

                                            const size_type stride)

    {

        return this->create_submatrix_impl(rows, columns, stride);

    }


    std::unique_ptr<Dense> create_submatrix(const span& rows,

                                            const span& columns)

    {

        return create_submatrix(rows, columns, this->get_stride());

    }


    std::unique_ptr<Dense> create_submatrix(const local_span& rows,

                                            const local_span& columns,

                                            dim<2> size)

    {

        dim<2> deduced_size{rows.length(), columns.length()};

        GKO_ASSERT_EQUAL_DIMENSIONS(deduced_size, size);

        return create_submatrix(rows, columns, this->get_stride());

    }


    std::unique_ptr<real_type> create_real_view();


    std::unique_ptr<const real_type> create_real_view() const;


    static std::unique_ptr<Dense> create(std::shared_ptr<const Executor> exec,

                                         const dim<2>& size = {},

                                         size_type stride = 0);


    static std::unique_ptr<Dense> create(std::shared_ptr<const Executor> exec,

                                         const dim<2>& size,

                                         array<value_type> values,

                                         size_type stride);


    template <typename InputValueType>

    GKO_DEPRECATED(

        "explicitly construct the gko::array argument instead of passing an"

        "initializer list")

    static std::unique_ptr<Dense> create(

        std::shared_ptr<const Executor> exec, const dim<2>& size,

        std::initializer_list<InputValueType> values, size_type stride)

    {

        return create(exec, size, array<value_type>{exec, std::move(values)},

                      stride);

    }


    static std::unique_ptr<const Dense> create_const(

        std::shared_ptr<const Executor> exec, const dim<2>& size,

        gko::detail::const_array_view<ValueType>&& values, size_type stride);


    Dense& operator=(const Dense&);


    Dense& operator=(Dense&&);


    Dense(const Dense&);


    Dense(Dense&&);


protected:

    Dense(std::shared_ptr<const Executor> exec, const dim<2>& size = {},

          size_type stride = 0);


    Dense(std::shared_ptr<const Executor> exec, const dim<2>& size,

          array<value_type> values, size_type stride);


    virtual std::unique_ptr<Dense> create_with_same_config() const

    {

        return Dense::create(this->get_executor(), this->get_size(),

                             this->get_stride());

    }


    virtual std::unique_ptr<Dense> create_with_type_of_impl(

        std::shared_ptr<const Executor> exec, const dim<2>& size,

        size_type stride) const

    {

        return Dense::create(exec, size, stride);

    }


    virtual std::unique_ptr<Dense> create_view_of_impl()

    {

        auto exec = this->get_executor();

        return Dense::create(

            exec, this->get_size(),

            gko::make_array_view(exec, this->get_num_stored_elements(),

                                 this->get_values()),

            this->get_stride());

    }


    virtual std::unique_ptr<const Dense> create_const_view_of_impl() const

    {

        auto exec = this->get_executor();

        return Dense::create_const(

            exec, this->get_size(),

            gko::make_const_array_view(exec, this->get_num_stored_elements(),

                                       this->get_const_values()),

            this->get_stride());

    }


    template <typename IndexType>

    void convert_impl(Coo<ValueType, IndexType>* result) const;


    template <typename IndexType>

    void convert_impl(Csr<ValueType, IndexType>* result) const;


    template <typename IndexType>

    void convert_impl(Ell<ValueType, IndexType>* result) const;


    template <typename IndexType>

    void convert_impl(Fbcsr<ValueType, IndexType>* result) const;


    template <typename IndexType>

    void convert_impl(Hybrid<ValueType, IndexType>* result) const;


    template <typename IndexType>

    void convert_impl(Sellp<ValueType, IndexType>* result) const;


    template <typename IndexType>

    void convert_impl(SparsityCsr<ValueType, IndexType>* result) const;


    virtual void scale_impl(const LinOp* alpha);


    virtual void inv_scale_impl(const LinOp* alpha);


    virtual void add_scaled_impl(const LinOp* alpha, const LinOp* b);


    virtual void sub_scaled_impl(const LinOp* alpha, const LinOp* b);


    virtual void compute_dot_impl(const LinOp* b, LinOp* result) const;


    virtual void compute_conj_dot_impl(const LinOp* b, LinOp* result) const;


    virtual void compute_norm2_impl(LinOp* result) const;


    virtual void compute_norm1_impl(LinOp* result) const;


    virtual void compute_squared_norm2_impl(LinOp* result) const;


    virtual void compute_mean_impl(LinOp* result) const;


    void resize(gko::dim<2> new_size);


    virtual std::unique_ptr<Dense> create_submatrix_impl(

        const span& rows, const span& columns, const size_type stride);


    void apply_impl(const LinOp* b, LinOp* x) const override;


    void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta,

                    LinOp* x) const override;


    size_type linearize_index(size_type row, size_type col) const noexcept

    {

        return row * stride_ + col;

    }


    size_type linearize_index(size_type idx) const noexcept

    {

        return linearize_index(idx / this->get_size()[1],

                               idx % this->get_size()[1]);

    }


    template <typename IndexType>

    void permute_impl(const Permutation<IndexType>* permutation,

                      permute_mode mode, Dense* output) const;


    template <typename IndexType>

    void permute_impl(const Permutation<IndexType>* row_permutation,

                      const Permutation<IndexType>* col_permutation,

                      bool invert, Dense* output) const;


    template <typename IndexType>

    void scale_permute_impl(

        const ScaledPermutation<ValueType, IndexType>* permutation,

        permute_mode mode, Dense* output) const;


    template <typename IndexType>

    void scale_permute_impl(

        const ScaledPermutation<ValueType, IndexType>* row_permutation,

        const ScaledPermutation<ValueType, IndexType>* column_permutation,

        bool invert, Dense* output) const;


    template <typename OutputType, typename IndexType>

    void row_gather_impl(const array<IndexType>* row_idxs,

                         Dense<OutputType>* row_collection) const;


    template <typename OutputType, typename IndexType>

    void row_gather_impl(const Dense<ValueType>* alpha,

                         const array<IndexType>* row_idxs,

                         const Dense<ValueType>* beta,

                         Dense<OutputType>* row_collection) const;


private:

    size_type stride_;

    array<value_type> values_;


    void add_scaled_identity_impl(const LinOp* a, const LinOp* b) override;

};


}  // namespace matrix


namespace detail {


template <typename ValueType>

struct temporary_clone_helper<matrix::Dense<ValueType>> {

    static std::unique_ptr<matrix::Dense<ValueType>> create(

        std::shared_ptr<const Executor> exec, matrix::Dense<ValueType>* ptr,

        bool copy_data)

    {

        if (copy_data) {

            return gko::clone(std::move(exec), ptr);

        } else {

            return matrix::Dense<ValueType>::create(exec, ptr->get_size());

        }

    }

};


}  // namespace detail


template <typename VecPtr>

std::unique_ptr<matrix::Dense<typename detail::pointee<VecPtr>::value_type>>

make_dense_view(VecPtr&& vector)

{

    using value_type = typename detail::pointee<VecPtr>::value_type;

    return matrix::Dense<value_type>::create_view_of(vector);

}


template <typename VecPtr>

std::unique_ptr<

    const matrix::Dense<typename detail::pointee<VecPtr>::value_type>>

make_const_dense_view(VecPtr&& vector)

{

    using value_type = typename detail::pointee<VecPtr>::value_type;

    return matrix::Dense<value_type>::create_const_view_of(vector);

}


template <typename Matrix, typename... TArgs>

std::unique_ptr<Matrix> initialize(

    size_type stride, std::initializer_list<typename Matrix::value_type> vals,

    std::shared_ptr<const Executor> exec, TArgs&&... create_args)

{

    using dense = matrix::Dense<typename Matrix::value_type>;

    size_type num_rows = vals.size();

    auto tmp = dense::create(exec->get_master(), dim<2>{num_rows, 1}, stride);

    size_type idx = 0;

    for (const auto& elem : vals) {

        tmp->at(idx) = elem;

        ++idx;

    }

    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);

    tmp->move_to(mtx);

    return mtx;

}


template <typename Matrix, typename... TArgs>

std::unique_ptr<Matrix> initialize(

    std::initializer_list<typename Matrix::value_type> vals,

    std::shared_ptr<const Executor> exec, TArgs&&... create_args)

{

    return initialize<Matrix>(1, vals, std::move(exec),

                              std::forward<TArgs>(create_args)...);

}


template <typename Matrix, typename... TArgs>

std::unique_ptr<Matrix> initialize(

    size_type stride,

    std::initializer_list<std::initializer_list<typename Matrix::value_type>>

        vals,

    std::shared_ptr<const Executor> exec, TArgs&&... create_args)

{

    using dense = matrix::Dense<typename Matrix::value_type>;

    size_type num_rows = vals.size();

    size_type num_cols = num_rows > 0 ? begin(vals)->size() : 1;

    auto tmp =

        dense::create(exec->get_master(), dim<2>{num_rows, num_cols}, stride);

    size_type ridx = 0;

    for (const auto& row : vals) {

        size_type cidx = 0;

        for (const auto& elem : row) {

            tmp->at(ridx, cidx) = elem;

            ++cidx;

        }

        ++ridx;

    }

    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);

    tmp->move_to(mtx);

    return mtx;

}


template <typename Matrix, typename... TArgs>

std::unique_ptr<Matrix> initialize(

    std::initializer_list<std::initializer_list<typename Matrix::value_type>>

        vals,

    std::shared_ptr<const Executor> exec, TArgs&&... create_args)

{

    return initialize<Matrix>(vals.size() > 0 ? begin(vals)->size() : 0, vals,

                              std::move(exec),

                              std::forward<TArgs>(create_args)...);

}


}  // namespace gko


#endif  // GKO_PUBLIC_CORE_MATRIX_DENSE_HPP_