33 #ifndef GKO_CORE_MATRIX_CSR_HPP_ 34 #define GKO_CORE_MATRIX_CSR_HPP_ 37 #include <ginkgo/core/base/array.hpp> 38 #include <ginkgo/core/base/lin_op.hpp> 45 template <
typename ValueType>
49 template <
typename ValueType,
typename IndexType>
52 template <
typename ValueType,
typename IndexType>
56 template <
typename ValueType,
typename IndexType>
76 template <
typename ValueType = default_precision,
typename IndexType =
int32>
88 friend class Coo<ValueType, IndexType>;
89 friend class Dense<ValueType>;
90 friend class Sellp<ValueType, IndexType>;
91 friend class Ell<ValueType, IndexType>;
97 using value_type = ValueType;
98 using index_type = IndexType;
109 std::string get_name() {
return name_; }
114 virtual int64_t clac_size(
const int64_t nnz) = 0;
117 void set_name(std::string name) { name_ = name; }
131 int64_t clac_size(
const int64_t nnz) {
return 0; }
142 int64_t clac_size(
const int64_t nnz) {
return 0; }
153 int64_t clac_size(
const int64_t nnz) {
return 0; }
174 constexpr
uint32 warp_size = 32;
180 srow_host = *mtx_srow;
183 row_ptrs_host = mtx_row_ptrs;
189 auto num_elems = row_ptrs[num_rows];
190 for (
size_type i = 0; i < num_rows; i++) {
193 ceildiv(num_elems, warp_size));
194 if (bucket < nwarps) {
200 srow[i] += srow[i - 1];
202 *mtx_srow = srow_host;
206 int64_t clac_size(
const int64_t nnz)
208 constexpr
uint32 warp_size = 32;
210 if (nnz >= 2000000) {
212 }
else if (nnz >= 200000) {
215 auto nwarps = nwarps_ * multiple;
216 return min(
ceildiv(nnz, warp_size), static_cast<int64_t>(nwarps));
230 automatical(std::shared_ptr<const CudaExecutor> exec)
247 host_row_ptrs = mtx_row_ptrs;
249 if (row_val[num_rows] > static_cast<index_type>(1e6)) {
250 std::make_shared<load_balance>(nwarps_)->process(host_row_ptrs,
252 this->set_name(
"load_balance");
254 index_type maxnum = 0;
255 for (index_type i = 1; i < num_rows + 1; i++) {
256 maxnum =
max(maxnum, row_val[i] - row_val[i - 1]);
259 std::make_shared<load_balance>(nwarps_)->process(
260 host_row_ptrs, mtx_srow);
261 this->set_name(
"load_balance");
263 std::make_shared<classical>()->process(host_row_ptrs,
265 this->set_name(
"classical");
270 int64_t clac_size(
const int64_t nnz)
272 return std::make_shared<load_balance>(nwarps_)->clac_size(nnz);
299 std::unique_ptr<LinOp>
transpose()
const override;
308 value_type *
get_values() noexcept {
return values_.get_data(); }
319 return values_.get_const_data();
338 return col_idxs_.get_const_data();
357 return row_ptrs_.get_const_data();
365 index_type *
get_srow() noexcept {
return srow_.get_data(); }
376 return srow_.get_const_data();
386 return srow_.get_num_elems();
396 return values_.get_num_elems();
415 Csr(std::shared_ptr<const Executor> exec,
416 std::shared_ptr<strategy_type> strategy)
417 :
Csr(std::move(exec),
dim<2>{}, {}, std::move(strategy))
428 Csr(std::shared_ptr<const Executor> exec,
const dim<2> &size =
dim<2>{},
430 std::shared_ptr<strategy_type> strategy = std::make_shared<cusparse>())
432 values_(exec, num_nonzeros),
433 col_idxs_(exec, num_nonzeros),
435 row_ptrs_(exec, size[0] + (size[0] > 0)),
436 srow_(exec, strategy->clac_size(num_nonzeros)),
437 strategy_(std::move(strategy))
460 template <
typename ValuesArray,
typename ColIdxsArray,
461 typename RowPtrsArray>
462 Csr(std::shared_ptr<const Executor> exec,
const dim<2> &size,
463 ValuesArray &&values, ColIdxsArray &&col_idxs, RowPtrsArray &&row_ptrs,
464 std::shared_ptr<strategy_type> strategy = std::make_shared<cusparse>())
466 values_{exec, std::forward<ValuesArray>(values)},
467 col_idxs_{exec, std::forward<ColIdxsArray>(col_idxs)},
468 row_ptrs_{exec, std::forward<RowPtrsArray>(row_ptrs)},
470 strategy_(std::move(strategy))
472 GKO_ENSURE_IN_BOUNDS(values_.get_num_elems() - 1,
473 col_idxs_.get_num_elems());
474 GKO_ENSURE_IN_BOUNDS(this->
get_size()[0], row_ptrs_.get_num_elems());
475 srow_.resize_and_reset(strategy_->clac_size(values_.get_num_elems()));
479 void apply_impl(
const LinOp *b,
LinOp *x)
const override;
482 LinOp *x)
const override;
487 void make_srow() { strategy_->process(row_ptrs_, &srow_); }
494 std::shared_ptr<strategy_type> strategy_;
502 #endif // GKO_CORE_MATRIX_CSR_HPP_ constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition: math.hpp:280
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition: csr.hpp:374
This mixin implements a static create() method on ConcreteType that dynamically allocates the memory...
Definition: polymorphic_object.hpp:576
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:355
size_type get_num_elems() const noexcept
Returns the number of elements in the Array.
Definition: array.hpp:388
value_type * get_values() noexcept
Returns the values of the matrix.
Definition: csr.hpp:308
index_type * get_srow() noexcept
Returns the starting rows.
Definition: csr.hpp:365
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master)
Creates a new CudaExecutor.
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition: csr.hpp:53
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
Definition: polymorphic_object.hpp:380
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:327
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition: polymorphic_object.hpp:505
std::uint32_t uint32
32-bit unsigned integral type.
Definition: types.hpp:134
std::size_t size_type
Integral type used for allocation quantities.
Definition: types.hpp:94
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition: array.hpp:413
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the Array...
Definition: array.hpp:406
The Ginkgo namespace.
Definition: abstract_factory.hpp:45
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition: coo.hpp:51
static std::shared_ptr< OmpExecutor > create()
Creates a new OmpExecutor.
Definition: executor.hpp:735
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:346
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition: math.hpp:393
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
SELL-P is a matrix format similar to ELL format.
Definition: csr.hpp:57
Dense is a matrix format which explicitly stores all values of the matrix.
Definition: coo.hpp:55
Definition: lin_op.hpp:134
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition: lin_op.hpp:509
void write(mat_data &data) const override
Writes a matrix to a matrix_data structure.
const dim< 2 > & get_size() const noexcept
Returns the size of the operator.
Definition: lin_op.hpp:221
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition: csr.hpp:403
COO stores a matrix in the coordinate matrix format.
Definition: coo.hpp:73
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition: csr.hpp:394
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:336
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition: lin_op.hpp:446
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps)
Definition: csr.hpp:384
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the Array. ...
Definition: array.hpp:397
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition: csr.hpp:317
Linear operators which support transposition should implement the Transposable interface.
Definition: lin_op.hpp:398
constexpr T max(const T &x, const T &y)
Returns the larger of the arguments.
Definition: math.hpp:373
This structure is used as an intermediate data type to store a sparse matrix.
Definition: matrix_data.hpp:102
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition: lin_op.hpp:426
void read(const mat_data &data) override
Reads a matrix from a matrix_data structure.