doc/v1.0.0/csr_8hpp_source.html

 /*******************************<GINKGO LICENSE>******************************
 Copyright (c) 2017-2019, the Ginkgo authors
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:

 1. Redistributions of source code must retain the above copyright
 notice, this list of conditions and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright
 notice, this list of conditions and the following disclaimer in the
 documentation and/or other materials provided with the distribution.

 3. Neither the name of the copyright holder nor the names of its
 contributors may be used to endorse or promote products derived from
 this software without specific prior written permission.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/

 #ifndef GKO_CORE_MATRIX_CSR_HPP_
 #define GKO_CORE_MATRIX_CSR_HPP_


 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/lin_op.hpp>


 namespace gko {
 namespace matrix {


 template <typename ValueType>
 class Dense;


 template <typename ValueType, typename IndexType>
 class Coo;

 template <typename ValueType, typename IndexType>
 class Ell;


 template <typename ValueType, typename IndexType>
 class Sellp;


 template <typename ValueType = default_precision, typename IndexType = int32>
 class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
             public EnableCreateMethod<Csr<ValueType, IndexType>>,
             public ConvertibleTo<Dense<ValueType>>,
             public ConvertibleTo<Coo<ValueType, IndexType>>,
             public ConvertibleTo<Sellp<ValueType, IndexType>>,
             public ConvertibleTo<Ell<ValueType, IndexType>>,
             public ReadableFromMatrixData<ValueType, IndexType>,
             public WritableToMatrixData<ValueType, IndexType>,
             public Transposable {
     friend class EnableCreateMethod<Csr>;
     friend class EnablePolymorphicObject<Csr, LinOp>;
     friend class Coo<ValueType, IndexType>;
     friend class Dense<ValueType>;
     friend class Sellp<ValueType, IndexType>;
     friend class Ell<ValueType, IndexType>;

 public:
     using EnableLinOp<Csr>::convert_to;
     using EnableLinOp<Csr>::move_to;

     using value_type = ValueType;
     using index_type = IndexType;
     using mat_data = matrix_data<ValueType, IndexType>;

     class automatical;

     class strategy_type {
         friend class automatical;

     public:
         strategy_type(std::string name) : name_(name) {}

         std::string get_name() { return name_; }

         virtual void process(const Array<index_type> &mtx_row_ptrs,
                              Array<index_type> *mtx_srow) = 0;

         virtual int64_t clac_size(const int64_t nnz) = 0;

     protected:
         void set_name(std::string name) { name_ = name; }

     private:
         std::string name_;
     };

     class classical : public strategy_type {
     public:
         classical() : strategy_type("classical") {}

         void process(const Array<index_type> &mtx_row_ptrs,
                      Array<index_type> *mtx_srow)
         {}

         int64_t clac_size(const int64_t nnz) { return 0; }
     };

     class merge_path : public strategy_type {
     public:
         merge_path() : strategy_type("merge_path") {}

         void process(const Array<index_type> &mtx_row_ptrs,
                      Array<index_type> *mtx_srow)
         {}

         int64_t clac_size(const int64_t nnz) { return 0; }
     };

     class cusparse : public strategy_type {
     public:
         cusparse() : strategy_type("cusparse") {}

         void process(const Array<index_type> &mtx_row_ptrs,
                      Array<index_type> *mtx_srow)
         {}

         int64_t clac_size(const int64_t nnz) { return 0; }
     };

     class load_balance : public strategy_type {
     public:
         load_balance()
             : load_balance(std::move(
                   gko::CudaExecutor::create(0, gko::OmpExecutor::create())))
         {}

         load_balance(std::shared_ptr<const CudaExecutor> exec)
             : load_balance(exec->get_num_warps())
         {}

         load_balance(int64_t nwarps)
             : strategy_type("load_balance"), nwarps_(nwarps)
         {}

         void process(const Array<index_type> &mtx_row_ptrs,
                      Array<index_type> *mtx_srow)
         {
             constexpr uint32 warp_size = 32;
             auto nwarps = mtx_srow->get_num_elems();

             if (nwarps > 0) {
                 auto exec = mtx_srow->get_executor()->get_master();
                 Array<index_type> srow_host(exec);
                 srow_host = *mtx_srow;
                 auto srow = srow_host.get_data();
                 Array<index_type> row_ptrs_host(exec);
                 row_ptrs_host = mtx_row_ptrs;
                 auto row_ptrs = row_ptrs_host.get_const_data();
                 for (size_type i = 0; i < nwarps; i++) {
                     srow[i] = 0;
                 }
                 auto num_rows = mtx_row_ptrs.get_num_elems() - 1;
                 auto num_elems = row_ptrs[num_rows];
                 for (size_type i = 0; i < num_rows; i++) {
                     auto bucket =
                         ceildiv((ceildiv(row_ptrs[i + 1], warp_size) * nwarps),
                                 ceildiv(num_elems, warp_size));
                     if (bucket < nwarps) {
                         srow[bucket]++;
                     }
                 }
                 // find starting row for thread i
                 for (size_type i = 1; i < nwarps; i++) {
                     srow[i] += srow[i - 1];
                 }
                 *mtx_srow = srow_host;
             }
         }

         int64_t clac_size(const int64_t nnz)
         {
             constexpr uint32 warp_size = 32;
             int multiple = 8;
             if (nnz >= 2000000) {
                 multiple = 128;
             } else if (nnz >= 200000) {
                 multiple = 32;
             }
             auto nwarps = nwarps_ * multiple;
             return min(ceildiv(nnz, warp_size), static_cast<int64_t>(nwarps));
         }

     private:
         int64_t nwarps_;
     };

     class automatical : public strategy_type {
     public:
         automatical()
             : automatical(std::move(
                   gko::CudaExecutor::create(0, gko::OmpExecutor::create())))
         {}

         automatical(std::shared_ptr<const CudaExecutor> exec)
             : automatical(exec->get_num_warps())
         {}

         automatical(int64_t nwarps)
             : strategy_type("automatical"), nwarps_(nwarps)
         {}

         void process(const Array<index_type> &mtx_row_ptrs,
                      Array<index_type> *mtx_srow)
         {
             // if the number of stored elements is larger than 1e6 or
             // the maximum number of stored elements per row is larger than
             // 64, use load_balance otherwise use classical
             const auto num_rows = mtx_row_ptrs.get_num_elems() - 1;
             Array<index_type> host_row_ptrs(
                 mtx_row_ptrs.get_executor()->get_master());
             host_row_ptrs = mtx_row_ptrs;
             const auto row_val = host_row_ptrs.get_const_data();
             if (row_val[num_rows] > static_cast<index_type>(1e6)) {
                 std::make_shared<load_balance>(nwarps_)->process(host_row_ptrs,
                                                                  mtx_srow);
                 this->set_name("load_balance");
             } else {
                 index_type maxnum = 0;
                 for (index_type i = 1; i < num_rows + 1; i++) {
                     maxnum = max(maxnum, row_val[i] - row_val[i - 1]);
                 }
                 if (maxnum > 64) {
                     std::make_shared<load_balance>(nwarps_)->process(
                         host_row_ptrs, mtx_srow);
                     this->set_name("load_balance");
                 } else {
                     std::make_shared<classical>()->process(host_row_ptrs,
                                                            mtx_srow);
                     this->set_name("classical");
                 }
             }
         }

         int64_t clac_size(const int64_t nnz)
         {
             return std::make_shared<load_balance>(nwarps_)->clac_size(nnz);
         }

     private:
         int64_t nwarps_;
     };

     void convert_to(Dense<ValueType> *other) const override;

     void move_to(Dense<ValueType> *other) override;

     void convert_to(Coo<ValueType, IndexType> *result) const override;

     void move_to(Coo<ValueType, IndexType> *result) override;

     void convert_to(Sellp<ValueType, IndexType> *result) const override;

     void move_to(Sellp<ValueType, IndexType> *result) override;

     void convert_to(Ell<ValueType, IndexType> *result) const override;

     void move_to(Ell<ValueType, IndexType> *result) override;

     void read(const mat_data &data) override;

     void write(mat_data &data) const override;

     std::unique_ptr<LinOp> transpose() const override;

     std::unique_ptr<LinOp> conj_transpose() const override;

     value_type *get_values() noexcept { return values_.get_data(); }

     const value_type *get_const_values() const noexcept
     {
         return values_.get_const_data();
     }

     index_type *get_col_idxs() noexcept { return col_idxs_.get_data(); }

     const index_type *get_const_col_idxs() const noexcept
     {
         return col_idxs_.get_const_data();
     }

     index_type *get_row_ptrs() noexcept { return row_ptrs_.get_data(); }

     const index_type *get_const_row_ptrs() const noexcept
     {
         return row_ptrs_.get_const_data();
     }

     index_type *get_srow() noexcept { return srow_.get_data(); }

     const index_type *get_const_srow() const noexcept
     {
         return srow_.get_const_data();
     }

     size_type get_num_srow_elements() const noexcept
     {
         return srow_.get_num_elems();
     }

     size_type get_num_stored_elements() const noexcept
     {
         return values_.get_num_elems();
     }

     std::shared_ptr<strategy_type> get_strategy() const noexcept
     {
         return strategy_;
     }

 protected:
     Csr(std::shared_ptr<const Executor> exec,
         std::shared_ptr<strategy_type> strategy)
         : Csr(std::move(exec), dim<2>{}, {}, std::move(strategy))
     {}

     Csr(std::shared_ptr<const Executor> exec, const dim<2> &size = dim<2>{},
         size_type num_nonzeros = {},
         std::shared_ptr<strategy_type> strategy = std::make_shared<cusparse>())
         : EnableLinOp<Csr>(exec, size),
           values_(exec, num_nonzeros),
           col_idxs_(exec, num_nonzeros),
           // avoid allocation for empty matrix
           row_ptrs_(exec, size[0] + (size[0] > 0)),
           srow_(exec, strategy->clac_size(num_nonzeros)),
           strategy_(std::move(strategy))
     {}

     template <typename ValuesArray, typename ColIdxsArray,
               typename RowPtrsArray>
     Csr(std::shared_ptr<const Executor> exec, const dim<2> &size,
         ValuesArray &&values, ColIdxsArray &&col_idxs, RowPtrsArray &&row_ptrs,
         std::shared_ptr<strategy_type> strategy = std::make_shared<cusparse>())
         : EnableLinOp<Csr>(exec, size),
           values_{exec, std::forward<ValuesArray>(values)},
           col_idxs_{exec, std::forward<ColIdxsArray>(col_idxs)},
           row_ptrs_{exec, std::forward<RowPtrsArray>(row_ptrs)},
           srow_(exec),
           strategy_(std::move(strategy))
     {
         GKO_ENSURE_IN_BOUNDS(values_.get_num_elems() - 1,
                              col_idxs_.get_num_elems());
         GKO_ENSURE_IN_BOUNDS(this->get_size()[0], row_ptrs_.get_num_elems());
         srow_.resize_and_reset(strategy_->clac_size(values_.get_num_elems()));
         this->make_srow();
     }

     void apply_impl(const LinOp *b, LinOp *x) const override;

     void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta,
                     LinOp *x) const override;

     void make_srow() { strategy_->process(row_ptrs_, &srow_); }

 private:
     Array<value_type> values_;
     Array<index_type> col_idxs_;
     Array<index_type> row_ptrs_;
     Array<index_type> srow_;
     std::shared_ptr<strategy_type> strategy_;
 };


 }  // namespace matrix
 }  // namespace gko


 #endif  // GKO_CORE_MATRIX_CSR_HPP_
gko::ceildiv
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition: math.hpp:280

gko::matrix::Csr::get_const_srow
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition: csr.hpp:374

gko::dim< 2 >

gko::EnableCreateMethod
This mixin implements a static create() method on ConcreteType that dynamically allocates the memory...
Definition: polymorphic_object.hpp:576

gko::matrix::Csr::get_const_row_ptrs
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:355

gko::Array::get_num_elems
size_type get_num_elems() const noexcept
Returns the number of elements in the Array.
Definition: array.hpp:388

gko::matrix::Csr::cusparse
Definition: csr.hpp:145

gko::matrix::Csr::get_values
value_type * get_values() noexcept
Returns the values of the matrix.
Definition: csr.hpp:308

gko::matrix::Csr::get_srow
index_type * get_srow() noexcept
Returns the starting rows.
Definition: csr.hpp:365

gko::CudaExecutor::create
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master)
Creates a new CudaExecutor.

gko::matrix::Ell
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition: csr.hpp:53

gko::ConvertibleTo
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
Definition: polymorphic_object.hpp:380

gko::matrix::Csr::get_col_idxs
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:327

gko::EnablePolymorphicObject
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition: polymorphic_object.hpp:505

gko::uint32
std::uint32_t uint32
32-bit unsigned integral type.
Definition: types.hpp:134

gko::size_type
std::size_t size_type
Integral type used for allocation quantities.
Definition: types.hpp:94

gko::matrix::Csr::strategy_type
Definition: csr.hpp:103

gko::Array::get_executor
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition: array.hpp:413

gko::Array::get_const_data
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the Array...
Definition: array.hpp:406

gko
The Ginkgo namespace.
Definition: abstract_factory.hpp:45

gko::matrix::Csr
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition: coo.hpp:51

gko::OmpExecutor::create
static std::shared_ptr< OmpExecutor > create()
Creates a new OmpExecutor.
Definition: executor.hpp:735

gko::matrix::Csr::get_row_ptrs
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:346

gko::matrix::Csr::automatical
Definition: csr.hpp:223

gko::min
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition: math.hpp:393

gko::matrix::Csr::transpose
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.

gko::matrix::Sellp
SELL-P is a matrix format similar to ELL format.
Definition: csr.hpp:57

gko::matrix::Dense
Dense is a matrix format which explicitly stores all values of the matrix.
Definition: coo.hpp:55

gko::LinOp
Definition: lin_op.hpp:134

gko::EnableLinOp
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition: lin_op.hpp:509

gko::matrix::Csr::write
void write(mat_data &data) const override
Writes a matrix to a matrix_data structure.

gko::LinOp::get_size
const dim< 2 > & get_size() const noexcept
Returns the size of the operator.
Definition: lin_op.hpp:221

gko::matrix::Csr::get_strategy
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition: csr.hpp:403

gko::matrix::Coo
COO stores a matrix in the coordinate matrix format.
Definition: coo.hpp:73

gko::Array< index_type >

gko::matrix::Csr::get_num_stored_elements
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition: csr.hpp:394

gko::matrix::Csr::classical
Definition: csr.hpp:123

gko::matrix::Csr::get_const_col_idxs
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:336

gko::WritableToMatrixData
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition: lin_op.hpp:446

gko::matrix::Csr::conj_transpose
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.

gko::matrix::Csr::merge_path
Definition: csr.hpp:134

gko::matrix::Csr::get_num_srow_elements
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps)
Definition: csr.hpp:384

gko::matrix::Csr::load_balance
Definition: csr.hpp:156

gko::Array::get_data
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the Array. ...
Definition: array.hpp:397

gko::matrix::Csr::get_const_values
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition: csr.hpp:317

gko::Transposable
Linear operators which support transposition should implement the Transposable interface.
Definition: lin_op.hpp:398

gko::max
constexpr T max(const T &x, const T &y)
Returns the larger of the arguments.
Definition: math.hpp:373

gko::matrix_data
This structure is used as an intermediate data type to store a sparse matrix.
Definition: matrix_data.hpp:102

gko::ReadableFromMatrixData
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition: lin_op.hpp:426

gko::matrix::Csr::read
void read(const mat_data &data) override
Reads a matrix from a matrix_data structure.