 |
Ginkgo
Generated from pipelines/2171896597 branch based on develop. Ginkgo version 1.11.0
A numerical linear algebra library targeting many-core architectures
|
5 #ifndef GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
6 #define GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
9 #include <ginkgo/core/base/array.hpp>
10 #include <ginkgo/core/base/index_set.hpp>
11 #include <ginkgo/core/base/lin_op.hpp>
12 #include <ginkgo/core/base/math.hpp>
13 #include <ginkgo/core/matrix/permutation.hpp>
14 #include <ginkgo/core/matrix/scaled_permutation.hpp>
21 template <
typename ValueType>
24 template <
typename ValueType>
27 template <
typename ValueType,
typename IndexType>
30 template <
typename ValueType,
typename IndexType>
33 template <
typename ValueType,
typename IndexType>
36 template <
typename ValueType,
typename IndexType>
39 template <
typename ValueType,
typename IndexType>
42 template <
typename ValueType,
typename IndexType>
45 template <
typename ValueType,
typename IndexType>
48 template <
typename ValueType,
typename IndexType>
51 template <
typename IndexType>
58 template <
typename ValueType = default_precision,
typename IndexType =
int32>
103 template <
typename ValueType = default_precision,
typename IndexType =
int32>
105 public ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>,
106 #if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
107 public ConvertibleTo<Csr<next_precision<ValueType, 2>, IndexType>>,
109 #if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
110 public ConvertibleTo<Csr<next_precision<ValueType, 3>, IndexType>>,
125 remove_complex<Csr<ValueType, IndexType>>>,
128 friend class Coo<ValueType, IndexType>;
129 friend class Dense<ValueType>;
131 friend class Ell<ValueType, IndexType>;
132 friend class Hybrid<ValueType, IndexType>;
133 friend class Sellp<ValueType, IndexType>;
135 friend class Fbcsr<ValueType, IndexType>;
136 friend class CsrBuilder<ValueType, IndexType>;
138 GKO_ASSERT_SUPPORTED_VALUE_AND_INDEX_TYPE;
161 using value_type = ValueType;
162 using index_type = IndexType;
212 virtual int64_t
clac_size(
const int64_t nnz) = 0;
218 virtual std::shared_ptr<strategy_type>
copy() = 0;
221 void set_name(std::string name) { name_ = name; }
243 auto host_mtx_exec = mtx_row_ptrs.
get_executor()->get_master();
245 const bool is_mtx_on_host{host_mtx_exec ==
247 const index_type* row_ptrs{};
248 if (is_mtx_on_host) {
251 row_ptrs_host = mtx_row_ptrs;
254 auto num_rows = mtx_row_ptrs.
get_size() - 1;
255 max_length_per_row_ = 0;
256 for (
size_type i = 0; i < num_rows; i++) {
257 max_length_per_row_ = std::max(max_length_per_row_,
258 row_ptrs[i + 1] - row_ptrs[i]);
262 int64_t
clac_size(
const int64_t nnz)
override {
return 0; }
264 index_type get_max_length_per_row() const noexcept
266 return max_length_per_row_;
269 std::shared_ptr<strategy_type>
copy()
override
271 return std::make_shared<classical>();
275 index_type max_length_per_row_;
294 int64_t
clac_size(
const int64_t nnz)
override {
return 0; }
296 std::shared_ptr<strategy_type>
copy()
override
298 return std::make_shared<merge_path>();
319 int64_t
clac_size(
const int64_t nnz)
override {
return 0; }
321 std::shared_ptr<strategy_type>
copy()
override
323 return std::make_shared<cusparse>();
343 int64_t
clac_size(
const int64_t nnz)
override {
return 0; }
345 std::shared_ptr<strategy_type>
copy()
override
347 return std::make_shared<sparselib>();
373 :
load_balance(exec->get_num_warps(), exec->get_warp_size())
382 :
load_balance(exec->get_num_warps(), exec->get_warp_size(), false)
393 :
load_balance(exec->get_num_subgroups(), 32, false,
"intel")
408 bool cuda_strategy =
true,
409 std::string strategy_name =
"none")
412 warp_size_(warp_size),
413 cuda_strategy_(cuda_strategy),
414 strategy_name_(strategy_name)
423 auto host_srow_exec = mtx_srow->
get_executor()->get_master();
424 auto host_mtx_exec = mtx_row_ptrs.
get_executor()->get_master();
425 const bool is_srow_on_host{host_srow_exec ==
427 const bool is_mtx_on_host{host_mtx_exec ==
431 const index_type* row_ptrs{};
433 if (is_srow_on_host) {
436 srow_host = *mtx_srow;
439 if (is_mtx_on_host) {
442 row_ptrs_host = mtx_row_ptrs;
448 const auto num_rows = mtx_row_ptrs.
get_size() - 1;
449 const auto num_elems = row_ptrs[num_rows];
450 const auto bucket_divider =
451 num_elems > 0 ?
ceildiv(num_elems, warp_size_) : 1;
452 for (
size_type i = 0; i < num_rows; i++) {
456 if (bucket < nwarps) {
462 srow[i] += srow[i - 1];
464 if (!is_srow_on_host) {
465 *mtx_srow = srow_host;
472 if (warp_size_ > 0) {
474 if (nnz >= static_cast<int64_t>(2e8)) {
476 }
else if (nnz >= static_cast<int64_t>(2e7)) {
478 }
else if (nnz >= static_cast<int64_t>(2e6)) {
480 }
else if (nnz >= static_cast<int64_t>(2e5)) {
483 if (strategy_name_ ==
"intel") {
485 if (nnz >= static_cast<int64_t>(2e8)) {
487 }
else if (nnz >= static_cast<int64_t>(2e7)) {
491 #if GINKGO_HIP_PLATFORM_HCC
492 if (!cuda_strategy_) {
494 if (nnz >= static_cast<int64_t>(1e7)) {
496 }
else if (nnz >= static_cast<int64_t>(1e6)) {
500 #endif // GINKGO_HIP_PLATFORM_HCC
502 auto nwarps = nwarps_ * multiple;
509 std::shared_ptr<strategy_type>
copy()
override
511 return std::make_shared<load_balance>(
512 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
519 std::string strategy_name_;
526 const index_type nvidia_row_len_limit = 1024;
529 const index_type nvidia_nnz_limit{static_cast<index_type>(1e6)};
532 const index_type amd_row_len_limit = 768;
535 const index_type amd_nnz_limit{static_cast<index_type>(1e8)};
538 const index_type intel_row_len_limit = 25600;
541 const index_type intel_nnz_limit{static_cast<index_type>(3e8)};
561 :
automatical(exec->get_num_warps(), exec->get_warp_size())
570 :
automatical(exec->get_num_warps(), exec->get_warp_size(), false)
581 :
automatical(exec->get_num_subgroups(), 32, false,
"intel")
596 bool cuda_strategy =
true,
597 std::string strategy_name =
"none")
600 warp_size_(warp_size),
601 cuda_strategy_(cuda_strategy),
602 strategy_name_(strategy_name),
603 max_length_per_row_(0)
612 index_type nnz_limit = nvidia_nnz_limit;
613 index_type row_len_limit = nvidia_row_len_limit;
614 if (strategy_name_ ==
"intel") {
615 nnz_limit = intel_nnz_limit;
616 row_len_limit = intel_row_len_limit;
618 #if GINKGO_HIP_PLATFORM_HCC
619 if (!cuda_strategy_) {
620 nnz_limit = amd_nnz_limit;
621 row_len_limit = amd_row_len_limit;
623 #endif // GINKGO_HIP_PLATFORM_HCC
624 auto host_mtx_exec = mtx_row_ptrs.
get_executor()->get_master();
625 const bool is_mtx_on_host{host_mtx_exec ==
628 const index_type* row_ptrs{};
629 if (is_mtx_on_host) {
632 row_ptrs_host = mtx_row_ptrs;
635 const auto num_rows = mtx_row_ptrs.
get_size() - 1;
636 if (row_ptrs[num_rows] > nnz_limit) {
638 cuda_strategy_, strategy_name_);
639 if (is_mtx_on_host) {
640 actual_strategy.
process(mtx_row_ptrs, mtx_srow);
642 actual_strategy.
process(row_ptrs_host, mtx_srow);
644 this->set_name(actual_strategy.
get_name());
646 index_type maxnum = 0;
647 for (
size_type i = 0; i < num_rows; i++) {
648 maxnum = std::max(maxnum, row_ptrs[i + 1] - row_ptrs[i]);
650 if (maxnum > row_len_limit) {
652 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
653 if (is_mtx_on_host) {
654 actual_strategy.
process(mtx_row_ptrs, mtx_srow);
656 actual_strategy.
process(row_ptrs_host, mtx_srow);
658 this->set_name(actual_strategy.
get_name());
661 if (is_mtx_on_host) {
662 actual_strategy.
process(mtx_row_ptrs, mtx_srow);
663 max_length_per_row_ =
664 actual_strategy.get_max_length_per_row();
666 actual_strategy.
process(row_ptrs_host, mtx_srow);
667 max_length_per_row_ =
668 actual_strategy.get_max_length_per_row();
670 this->set_name(actual_strategy.
get_name());
677 return std::make_shared<load_balance>(
678 nwarps_, warp_size_, cuda_strategy_, strategy_name_)
682 index_type get_max_length_per_row() const noexcept
684 return max_length_per_row_;
687 std::shared_ptr<strategy_type>
copy()
override
689 return std::make_shared<automatical>(
690 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
697 std::string strategy_name_;
698 index_type max_length_per_row_;
708 #if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
720 #if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
760 void read(
const mat_data& data)
override;
762 void read(
const device_mat_data& data)
override;
764 void read(device_mat_data&& data)
override;
766 void write(mat_data& data)
const override;
768 std::unique_ptr<LinOp>
transpose()
const override;
806 std::unique_ptr<lookup_data>
internal;
881 std::unique_ptr<lookup_data>
internal;
969 std::unique_ptr<lookup_data>
internal;
1039 std::unique_ptr<Permutation<IndexType>> value_permutation;
1091 bool invert =
false)
const;
1138 bool invert =
false)
const;
1170 bool invert =
false)
const;
1172 std::unique_ptr<LinOp>
permute(
1207 bool is_sorted_by_column_index()
const;
1333 strategy_ = std::move(strategy->copy());
1346 GKO_ASSERT_EQUAL_DIMENSIONS(alpha,
dim<2>(1, 1));
1359 GKO_ASSERT_EQUAL_DIMENSIONS(alpha,
dim<2>(1, 1));
1371 static std::unique_ptr<Csr>
create(std::shared_ptr<const Executor> exec,
1372 std::shared_ptr<strategy_type> strategy);
1385 static std::unique_ptr<Csr>
create(
1386 std::shared_ptr<const Executor> exec,
const dim<2>& size = {},
1388 std::shared_ptr<strategy_type> strategy =
nullptr);
1409 static std::unique_ptr<Csr>
create(
1410 std::shared_ptr<const Executor> exec,
const dim<2>& size,
1411 array<value_type> values, array<index_type> col_idxs,
1412 array<index_type> row_ptrs,
1413 std::shared_ptr<strategy_type> strategy =
nullptr);
1419 template <
typename InputValueType,
typename InputColumnIndexType,
1420 typename InputRowPtrType>
1422 "explicitly construct the gko::array argument instead of passing "
1423 "initializer lists")
1425 std::shared_ptr<const
Executor> exec, const
dim<2>& size,
1426 std::initializer_list<InputValueType> values,
1427 std::initializer_list<InputColumnIndexType> col_idxs,
1428 std::initializer_list<InputRowPtrType> row_ptrs)
1451 std::shared_ptr<const Executor> exec,
const dim<2>& size,
1452 gko::detail::const_array_view<ValueType>&& values,
1453 gko::detail::const_array_view<IndexType>&& col_idxs,
1454 gko::detail::const_array_view<IndexType>&& row_ptrs,
1455 std::shared_ptr<strategy_type> strategy =
nullptr);
1485 const span& row_span,
const span& column_span)
const;
1512 Csr(std::shared_ptr<const Executor> exec,
const dim<2>& size = {},
1514 std::shared_ptr<strategy_type> strategy =
nullptr);
1516 Csr(std::shared_ptr<const Executor> exec,
const dim<2>& size,
1517 array<value_type> values, array<index_type> col_idxs,
1518 array<index_type> row_ptrs,
1519 std::shared_ptr<strategy_type> strategy =
nullptr);
1521 void apply_impl(
const LinOp* b,
LinOp* x)
const override;
1523 void apply_impl(
const LinOp* alpha,
const LinOp* b,
const LinOp* beta,
1524 LinOp* x)
const override;
1527 static std::shared_ptr<strategy_type> make_default_strategy(
1528 std::shared_ptr<const Executor> exec)
1530 auto cuda_exec = std::dynamic_pointer_cast<const CudaExecutor>(exec);
1531 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(exec);
1532 auto dpcpp_exec = std::dynamic_pointer_cast<const DpcppExecutor>(exec);
1533 std::shared_ptr<strategy_type> new_strategy;
1535 new_strategy = std::make_shared<automatical>(cuda_exec);
1536 }
else if (hip_exec) {
1537 new_strategy = std::make_shared<automatical>(hip_exec);
1538 }
else if (dpcpp_exec) {
1539 new_strategy = std::make_shared<automatical>(dpcpp_exec);
1541 new_strategy = std::make_shared<classical>();
1543 return new_strategy;
1547 template <
typename CsrType>
1548 void convert_strategy_helper(CsrType* result)
const
1551 std::shared_ptr<typename CsrType::strategy_type> new_strat;
1552 if (dynamic_cast<classical*>(strat)) {
1553 new_strat = std::make_shared<typename CsrType::classical>();
1554 }
else if (dynamic_cast<merge_path*>(strat)) {
1555 new_strat = std::make_shared<typename CsrType::merge_path>();
1556 }
else if (dynamic_cast<cusparse*>(strat)) {
1557 new_strat = std::make_shared<typename CsrType::cusparse>();
1558 }
else if (dynamic_cast<sparselib*>(strat)) {
1559 new_strat = std::make_shared<typename CsrType::sparselib>();
1561 auto rexec = result->get_executor();
1563 std::dynamic_pointer_cast<const CudaExecutor>(rexec);
1564 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(rexec);
1566 std::dynamic_pointer_cast<const DpcppExecutor>(rexec);
1567 auto lb = dynamic_cast<load_balance*>(strat);
1571 std::make_shared<typename CsrType::load_balance>(
1574 new_strat = std::make_shared<typename CsrType::automatical>(
1577 }
else if (hip_exec) {
1580 std::make_shared<typename CsrType::load_balance>(
1583 new_strat = std::make_shared<typename CsrType::automatical>(
1586 }
else if (dpcpp_exec) {
1589 std::make_shared<typename CsrType::load_balance>(
1592 new_strat = std::make_shared<typename CsrType::automatical>(
1597 auto this_cuda_exec =
1598 std::dynamic_pointer_cast<const CudaExecutor>(
1600 auto this_hip_exec =
1601 std::dynamic_pointer_cast<const HipExecutor>(
1603 auto this_dpcpp_exec =
1604 std::dynamic_pointer_cast<const DpcppExecutor>(
1606 if (this_cuda_exec) {
1609 std::make_shared<typename CsrType::load_balance>(
1613 std::make_shared<typename CsrType::automatical>(
1616 }
else if (this_hip_exec) {
1619 std::make_shared<typename CsrType::load_balance>(
1623 std::make_shared<typename CsrType::automatical>(
1626 }
else if (this_dpcpp_exec) {
1629 std::make_shared<typename CsrType::load_balance>(
1633 std::make_shared<typename CsrType::automatical>(
1641 new_strat = std::make_shared<typename CsrType::classical>();
1645 result->set_strategy(new_strat);
1654 strategy_->process(row_ptrs_, &srow_);
1663 virtual void scale_impl(
const LinOp* alpha);
1671 virtual void inv_scale_impl(
const LinOp* alpha);
1674 std::shared_ptr<strategy_type> strategy_;
1675 array<value_type> values_;
1676 array<index_type> col_idxs_;
1677 array<index_type> row_ptrs_;
1678 array<index_type> srow_;
1680 void add_scaled_identity_impl(
const LinOp* a,
const LinOp* b)
override;
1693 template <
typename ValueType,
typename IndexType>
1694 void strategy_rebuild_helper(Csr<ValueType, IndexType>* result)
1696 using load_balance =
typename Csr<ValueType, IndexType>::load_balance;
1697 using automatical =
typename Csr<ValueType, IndexType>::automatical;
1698 auto strategy = result->get_strategy();
1699 auto executor = result->get_executor();
1700 if (std::dynamic_pointer_cast<load_balance>(strategy)) {
1702 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1703 result->set_strategy(std::make_shared<load_balance>(exec));
1704 }
else if (
auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1706 result->set_strategy(std::make_shared<load_balance>(exec));
1708 }
else if (std::dynamic_pointer_cast<automatical>(strategy)) {
1710 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1711 result->set_strategy(std::make_shared<automatical>(exec));
1712 }
else if (
auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1714 result->set_strategy(std::make_shared<automatical>(exec));
1725 #endif // GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition: csr.hpp:1292
load_balance(std::shared_ptr< const HipExecutor > exec)
Creates a load_balance strategy with HIP executor.
Definition: csr.hpp:381
Csr & operator=(const Csr &)
Copy-assigns a Csr matrix.
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:315
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:1245
Fixed-block compressed sparse row storage matrix format.
Definition: csr.hpp:46
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition: matrix.hpp:30
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:1273
sparselib()
Creates a sparselib strategy.
Definition: csr.hpp:337
std::unique_ptr< Csr > multiply(ptr_param< const Csr > other) const
Computes the sparse matrix product this * other on the executor of this matrix.
Definition: lin_op.hpp:117
std::pair< std::unique_ptr< Csr >, scale_add_reuse_info > add_scale_reuse(ptr_param< const Dense< value_type >> scale_this, ptr_param< const Dense< value_type >> scale_other, ptr_param< const Csr > mtx_other) const
Computes the sparse matrix sum scale_this * this + scale_other * mtx_add on the executor of this matr...
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type >> permutation, permute_mode mode=permute_mode::symmetric) const
Computes the operations necessary to propagate changed values from a matrix A to a permuted matrix.
Dense is a matrix format which explicitly stores all values of the matrix.
Definition: dense_cache.hpp:28
Class describing the internal lookup structures created by multiply_add_reuse to recompute a sparse m...
Definition: csr.hpp:845
std::unique_ptr< LinOp > inverse_row_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the row permutation of the inverse permuted object.
sparselib is a strategy_type which uses the sparselib csr.
Definition: csr.hpp:332
SparsityCsr is a matrix format which stores only the sparsity pattern of a sparse matrix by compressi...
Definition: csr.hpp:40
load_balance is a strategy_type which uses the load balance algorithm.
Definition: csr.hpp:354
std::pair< std::unique_ptr< Csr >, multiply_reuse_info > multiply_reuse(ptr_param< const Csr > other) const
Computes the sparse matrix product this * other on the executor of this matrix, and necessary data fo...
void update_values(ptr_param< const Csr > mtx, ptr_param< const Dense< value_type >> scale_mult, ptr_param< const Csr > mtx_mult, ptr_param< const Dense< value_type >> scale_add, ptr_param< const Csr > mtx_add, ptr_param< Csr > out) const
Recomputes the sparse matrix-matrix product out = scale_mult * mtx * mtx_mult + scale_add * mtx_add w...
void scale(ptr_param< const LinOp > alpha)
Scales the matrix with a scalar.
Definition: csr.hpp:1343
automatical(std::shared_ptr< const HipExecutor > exec)
Creates an automatical strategy with HIP executor.
Definition: csr.hpp:569
Linear operators which support transposition should implement the Transposable interface.
Definition: lin_op.hpp:433
std::unique_ptr< LinOp > column_permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the column permutation of the Permutable object.
std::string get_name()
Returns the name of strategy.
Definition: csr.hpp:194
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:262
ScaledPermutation is a matrix combining a permutation with scaling factors.
Definition: scaled_permutation.hpp:36
std::size_t size_type
Integral type used for allocation quantities.
Definition: types.hpp:90
virtual std::shared_ptr< strategy_type > copy()=0
Copy a strategy.
index_type * get_srow() noexcept
Returns the starting rows.
Definition: csr.hpp:1283
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:343
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > transpose_reuse() const
Computes the necessary data to update a transposed matrix from its original matrix.
Permutation is a matrix format that represents a permutation matrix, i.e.
Definition: csr.hpp:52
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:606
permuting_reuse_info()
Creates an empty reuse info.
std::unique_ptr< LinOp > row_permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the row permutation of the Permutable object.
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:269
This is the Executor subclass which represents the CUDA device.
Definition: executor.hpp:1540
virtual void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow)=0
Computes srow according to row pointers.
Linear operators which support permutation should implement the Permutable interface.
Definition: lin_op.hpp:484
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
load_balance(std::shared_ptr< const DpcppExecutor > exec)
Creates a load_balance strategy with DPCPP executor.
Definition: csr.hpp:392
The Ginkgo namespace.
Definition: abstract_factory.hpp:20
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:417
void inv_scale(ptr_param< const LinOp > alpha)
Scales the matrix with the inverse of a scalar.
Definition: csr.hpp:1356
std::unique_ptr< Diagonal< ValueType > > extract_diagonal() const override
Extracts the diagonal entries of the matrix into a vector.
std::unique_ptr< Csr > multiply_add(ptr_param< const Dense< value_type >> scale_mult, ptr_param< const Csr > mtx_mult, ptr_param< const Dense< value_type >> scale_add, ptr_param< const Csr > mtx_add) const
Computes the sparse matrix product scale_mult * this * mtx_mult + scale_add * mtx_add on the executor...
cusparse is a strategy_type which uses the sparselib csr.
Definition: csr.hpp:308
std::unique_ptr< LinOp > inverse_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the symmetric inverse row and column permutation of the Permutable objec...
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:1264
void resize_and_reset(size_type size)
Resizes the array so it is able to hold the specified number of elements.
Definition: array.hpp:622
A span is a lightweight structure used to create sub-ranges from other ranges.
Definition: range.hpp:46
This structure is used as an intermediate data type to store a sparse matrix.
Definition: matrix_data.hpp:126
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:470
merge_path is a strategy_type which uses the merge_path algorithm.
Definition: csr.hpp:283
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type >> permutation, permute_mode mode=permute_mode::symmetric) const
Creates a permuted copy of this matrix with the given permutation .
An index set class represents an ordered set of intervals.
Definition: index_set.hpp:56
automatical()
Creates an automatical strategy.
Definition: csr.hpp:550
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:296
load_balance(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates a load_balance strategy with specified parameters.
Definition: csr.hpp:407
This class is a utility which efficiently implements the diagonal matrix (a linear operator which sca...
Definition: lin_op.hpp:31
virtual int64_t clac_size(const int64_t nnz)=0
Computes the srow size according to the number of nonzeros.
load_balance(std::shared_ptr< const CudaExecutor > exec)
Creates a load_balance strategy with CUDA executor.
Definition: csr.hpp:372
This class is used for function parameters in the place of raw pointers.
Definition: utils_helper.hpp:41
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the array.
Definition: array.hpp:687
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition: lin_op.hpp:605
This is the Executor subclass which represents the OpenMP device (typically CPU).
Definition: executor.hpp:1386
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition: lin_op.hpp:660
The rows and columns will be permuted.
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:339
cusparse()
Creates a cusparse strategy.
Definition: csr.hpp:313
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:319
merge_path()
Creates a merge_path strategy.
Definition: csr.hpp:288
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition: csr.hpp:1223
mode
The mode for the residual norm criterion.
Definition: residual_norm.hpp:37
load_balance()
Creates a load_balance strategy.
Definition: csr.hpp:362
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition: array.hpp:703
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition: csr.hpp:1312
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const index_set< IndexType > &row_index_set, const index_set< IndexType > &column_index_set) const
Creates a submatrix from this Csr matrix given row and column index_set objects.
Adds the operation M <- a I + b M for matrix M, identity operator I and scalars a and b,...
Definition: lin_op.hpp:818
A struct describing a transformation of the matrix that reorders the values of the matrix into the tr...
Definition: csr.hpp:1021
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:509
typename detail::find_precision_impl< T, step >::type next_precision
Obtains the next move type of T in the singly-linked precision corresponding bfloat16/half.
Definition: math.hpp:466
classical()
Creates a classical strategy.
Definition: csr.hpp:238
strategy_type(std::string name)
Creates a strategy_type.
Definition: csr.hpp:185
void sort_by_column_index()
Sorts all (value, col_idx) pairs in each row by column index.
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:290
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const dim< 2 > &size, gko::detail::const_array_view< ValueType > &&values, gko::detail::const_array_view< IndexType > &&col_idxs, gko::detail::const_array_view< IndexType > &&row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a constant (immutable) Csr matrix from a set of constant arrays.
std::unique_ptr< Csr > scale_add(ptr_param< const Dense< value_type >> scale_this, ptr_param< const Dense< value_type >> scale_other, ptr_param< const Csr > mtx_other) const
Computes the sparse matrix sum scale_this * this + scale_other * mtx_add on the executor of this matr...
typename detail::find_precision_impl< T, -step >::type previous_precision
Obtains the previous move type of T in the singly-linked precision corresponding bfloat16/half.
Definition: math.hpp:473
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:687
classical is a strategy_type which uses the same number of threads on each row.
Definition: csr.hpp:233
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition: csr.hpp:1321
void update_values(ptr_param< const Csr > input, ptr_param< Csr > output) const
Propagates the values from an input matrix to the transformed matrix.
void set_strategy(std::shared_ptr< strategy_type > strategy)
Set the strategy.
Definition: csr.hpp:1331
void update_values(ptr_param< const Dense< value_type >> scale1, ptr_param< const Csr > mtx1, ptr_param< const Dense< value_type >> scale2, ptr_param< const Csr > mtx2, ptr_param< Csr > out) const
Recomputes the sparse matrix-matrix sum out = scale1 * mtx1 + scale2 * mtx2 when only the values of m...
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition: csr.hpp:31
std::unique_ptr< const Dense< ValueType > > create_const_value_view() const
Creates a const Dense view of the value array of this matrix as a column vector of dimensions nnz x 1...
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
Definition: polymorphic_object.hpp:479
std::unique_ptr< absolute_type > compute_absolute() const override
Gets the AbsoluteLinOp.
strategy_type is to decide how to set the csr algorithm.
Definition: csr.hpp:176
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition: temporary_clone.hpp:208
std::pair< std::unique_ptr< Csr >, multiply_add_reuse_info > multiply_add_reuse(ptr_param< const Dense< value_type >> scale_mult, ptr_param< const Csr > mtx_mult, ptr_param< const Dense< value_type >> scale_add, ptr_param< const Csr > mtx_add) const
Computes the sparse matrix product scale_mult * this * mtx_mult + scale_add * mtx_add on the executor...
The first step in using the Ginkgo library consists of creating an executor.
Definition: executor.hpp:615
HYBRID is a matrix format which splits the matrix into ELLPACK and COO format.
Definition: coo.hpp:32
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the array.
Definition: array.hpp:696
void write(mat_data &data) const override
Writes a matrix to a matrix_data structure.
permute_mode
Specifies how a permutation will be applied to a matrix.
Definition: permutation.hpp:42
void update_values(ptr_param< const Csr > mtx1, ptr_param< const Csr > mtx2, ptr_param< Csr > out) const
Recomputes the sparse matrix-matrix product out = mtx1 * mtx2 when only the values of mtx1 and mtx2 c...
SELL-P is a matrix format similar to ELL format.
Definition: csr.hpp:37
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition: math.hpp:750
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:321
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:1254
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition: math.hpp:614
automatical(std::shared_ptr< const DpcppExecutor > exec)
Creates an automatical strategy with Dpcpp executor.
Definition: csr.hpp:580
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:294
The EnableAbsoluteComputation mixin provides the default implementations of compute_absolute_linop an...
Definition: lin_op.hpp:794
std::unique_ptr< LinOp > inverse_column_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the row permutation of the inverse permuted object.
Csr(const Csr &)
Copy-constructs a Csr matrix.
automatical(std::shared_ptr< const CudaExecutor > exec)
Creates an automatical strategy with CUDA executor.
Definition: csr.hpp:560
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor of the object.
Definition: polymorphic_object.hpp:243
size_type get_size() const noexcept
Returns the number of elements in the array.
Definition: array.hpp:670
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:675
automatical(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates an automatical strategy with specified parameters.
Definition: csr.hpp:595
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:240
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition: math.hpp:264
void compute_absolute_inplace() override
Compute absolute inplace on each element.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type >> permutation, permute_mode=permute_mode::symmetric) const
Creates a scaled and permuted copy of this matrix.
This type is a device-side equivalent to matrix_data.
Definition: device_matrix_data.hpp:36
void read(const mat_data &data) override
Reads a matrix from a matrix_data structure.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, std::shared_ptr< strategy_type > strategy)
Creates an uninitialized CSR matrix of the specified size.
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition: lin_op.hpp:877
std::unique_ptr< Dense< ValueType > > create_value_view()
Creates a Dense view of the value array of this matrix as a column vector of dimensions nnz x 1.
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:345
value_type * get_values() noexcept
Returns the values of the matrix.
Definition: csr.hpp:1214
Class describing the internal lookup structures created by multiply_reuse(const Csr*) to recompute a ...
Definition: csr.hpp:777
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps)
Definition: csr.hpp:1302
Class describing the internal lookup structures created by scale_add_reuse to recompute a sparse matr...
Definition: csr.hpp:937
LinOp(const LinOp &)=default
Copy-constructs a LinOp.
typename detail::to_complex_s< T >::type to_complex
Obtain the type which adds the complex of complex/scalar type or the template parameter of class by a...
Definition: math.hpp:283
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition: polymorphic_object.hpp:667
COO stores a matrix in the coordinate matrix format.
Definition: coo.hpp:50