Ginkgo  Generated from pipelines/1868155508 branch based on main. Ginkgo version 1.10.0
A numerical linear algebra library targeting many-core architectures
csr.hpp
1 // SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
2 //
3 // SPDX-License-Identifier: BSD-3-Clause
4 
5 #ifndef GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
6 #define GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
7 
8 
9 #include <ginkgo/core/base/array.hpp>
10 #include <ginkgo/core/base/index_set.hpp>
11 #include <ginkgo/core/base/lin_op.hpp>
12 #include <ginkgo/core/base/math.hpp>
13 #include <ginkgo/core/matrix/permutation.hpp>
14 #include <ginkgo/core/matrix/scaled_permutation.hpp>
15 
16 
17 namespace gko {
18 namespace matrix {
19 
20 
21 template <typename ValueType>
22 class Dense;
23 
24 template <typename ValueType>
25 class Diagonal;
26 
27 template <typename ValueType, typename IndexType>
28 class Coo;
29 
30 template <typename ValueType, typename IndexType>
31 class Ell;
32 
33 template <typename ValueType, typename IndexType>
34 class Hybrid;
35 
36 template <typename ValueType, typename IndexType>
37 class Sellp;
38 
39 template <typename ValueType, typename IndexType>
41 
42 template <typename ValueType, typename IndexType>
43 class Csr;
44 
45 template <typename ValueType, typename IndexType>
46 class Fbcsr;
47 
48 template <typename ValueType, typename IndexType>
49 class CsrBuilder;
50 
51 template <typename IndexType>
53 
54 
55 namespace detail {
56 
57 
58 template <typename ValueType = default_precision, typename IndexType = int32>
59 void strategy_rebuild_helper(Csr<ValueType, IndexType>* result);
60 
61 
62 } // namespace detail
63 
64 
103 template <typename ValueType = default_precision, typename IndexType = int32>
104 class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
105  public ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>,
106 #if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
107  public ConvertibleTo<Csr<next_precision<ValueType, 2>, IndexType>>,
108 #endif
109 #if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
110  public ConvertibleTo<Csr<next_precision<ValueType, 3>, IndexType>>,
111 #endif
112  public ConvertibleTo<Dense<ValueType>>,
113  public ConvertibleTo<Coo<ValueType, IndexType>>,
114  public ConvertibleTo<Ell<ValueType, IndexType>>,
115  public ConvertibleTo<Fbcsr<ValueType, IndexType>>,
116  public ConvertibleTo<Hybrid<ValueType, IndexType>>,
117  public ConvertibleTo<Sellp<ValueType, IndexType>>,
118  public ConvertibleTo<SparsityCsr<ValueType, IndexType>>,
119  public DiagonalExtractable<ValueType>,
120  public ReadableFromMatrixData<ValueType, IndexType>,
121  public WritableToMatrixData<ValueType, IndexType>,
122  public Transposable,
123  public Permutable<IndexType>,
125  remove_complex<Csr<ValueType, IndexType>>>,
126  public ScaledIdentityAddable {
127  friend class EnablePolymorphicObject<Csr, LinOp>;
128  friend class Coo<ValueType, IndexType>;
129  friend class Dense<ValueType>;
130  friend class Diagonal<ValueType>;
131  friend class Ell<ValueType, IndexType>;
132  friend class Hybrid<ValueType, IndexType>;
133  friend class Sellp<ValueType, IndexType>;
134  friend class SparsityCsr<ValueType, IndexType>;
135  friend class Fbcsr<ValueType, IndexType>;
136  friend class CsrBuilder<ValueType, IndexType>;
137  friend class Csr<to_complex<ValueType>, IndexType>;
138 
139 public:
142  using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::convert_to;
143  using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::move_to;
144  using ConvertibleTo<Dense<ValueType>>::convert_to;
145  using ConvertibleTo<Dense<ValueType>>::move_to;
146  using ConvertibleTo<Coo<ValueType, IndexType>>::convert_to;
148  using ConvertibleTo<Ell<ValueType, IndexType>>::convert_to;
159 
160  using value_type = ValueType;
161  using index_type = IndexType;
162  using transposed_type = Csr<ValueType, IndexType>;
163  using mat_data = matrix_data<ValueType, IndexType>;
164  using device_mat_data = device_matrix_data<ValueType, IndexType>;
165  using absolute_type = remove_complex<Csr>;
166 
167  class automatical;
168 
176  friend class automatical;
177 
178  public:
184  strategy_type(std::string name) : name_(name) {}
185 
186  virtual ~strategy_type() = default;
187 
193  std::string get_name() { return name_; }
194 
201  virtual void process(const array<index_type>& mtx_row_ptrs,
202  array<index_type>* mtx_srow) = 0;
203 
211  virtual int64_t clac_size(const int64_t nnz) = 0;
212 
217  virtual std::shared_ptr<strategy_type> copy() = 0;
218 
219  protected:
220  void set_name(std::string name) { name_ = name; }
221 
222  private:
223  std::string name_;
224  };
225 
232  class classical : public strategy_type {
233  public:
237  classical() : strategy_type("classical"), max_length_per_row_(0) {}
238 
239  void process(const array<index_type>& mtx_row_ptrs,
240  array<index_type>* mtx_srow) override
241  {
242  auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
243  array<index_type> row_ptrs_host(host_mtx_exec);
244  const bool is_mtx_on_host{host_mtx_exec ==
245  mtx_row_ptrs.get_executor()};
246  const index_type* row_ptrs{};
247  if (is_mtx_on_host) {
248  row_ptrs = mtx_row_ptrs.get_const_data();
249  } else {
250  row_ptrs_host = mtx_row_ptrs;
251  row_ptrs = row_ptrs_host.get_const_data();
252  }
253  auto num_rows = mtx_row_ptrs.get_size() - 1;
254  max_length_per_row_ = 0;
255  for (size_type i = 0; i < num_rows; i++) {
256  max_length_per_row_ = std::max(max_length_per_row_,
257  row_ptrs[i + 1] - row_ptrs[i]);
258  }
259  }
260 
261  int64_t clac_size(const int64_t nnz) override { return 0; }
262 
263  index_type get_max_length_per_row() const noexcept
264  {
265  return max_length_per_row_;
266  }
267 
268  std::shared_ptr<strategy_type> copy() override
269  {
270  return std::make_shared<classical>();
271  }
272 
273  private:
274  index_type max_length_per_row_;
275  };
276 
282  class merge_path : public strategy_type {
283  public:
287  merge_path() : strategy_type("merge_path") {}
288 
289  void process(const array<index_type>& mtx_row_ptrs,
290  array<index_type>* mtx_srow) override
291  {}
292 
293  int64_t clac_size(const int64_t nnz) override { return 0; }
294 
295  std::shared_ptr<strategy_type> copy() override
296  {
297  return std::make_shared<merge_path>();
298  }
299  };
300 
307  class cusparse : public strategy_type {
308  public:
312  cusparse() : strategy_type("cusparse") {}
313 
314  void process(const array<index_type>& mtx_row_ptrs,
315  array<index_type>* mtx_srow) override
316  {}
317 
318  int64_t clac_size(const int64_t nnz) override { return 0; }
319 
320  std::shared_ptr<strategy_type> copy() override
321  {
322  return std::make_shared<cusparse>();
323  }
324  };
325 
331  class sparselib : public strategy_type {
332  public:
336  sparselib() : strategy_type("sparselib") {}
337 
338  void process(const array<index_type>& mtx_row_ptrs,
339  array<index_type>* mtx_srow) override
340  {}
341 
342  int64_t clac_size(const int64_t nnz) override { return 0; }
343 
344  std::shared_ptr<strategy_type> copy() override
345  {
346  return std::make_shared<sparselib>();
347  }
348  };
349 
353  class load_balance : public strategy_type {
354  public:
361  [[deprecated]] load_balance()
362  : load_balance(std::move(
364  {}
365 
371  load_balance(std::shared_ptr<const CudaExecutor> exec)
372  : load_balance(exec->get_num_warps(), exec->get_warp_size())
373  {}
374 
380  load_balance(std::shared_ptr<const HipExecutor> exec)
381  : load_balance(exec->get_num_warps(), exec->get_warp_size(), false)
382  {}
383 
391  load_balance(std::shared_ptr<const DpcppExecutor> exec)
392  : load_balance(exec->get_num_subgroups(), 32, false, "intel")
393  {}
394 
406  load_balance(int64_t nwarps, int warp_size = 32,
407  bool cuda_strategy = true,
408  std::string strategy_name = "none")
409  : strategy_type("load_balance"),
410  nwarps_(nwarps),
411  warp_size_(warp_size),
412  cuda_strategy_(cuda_strategy),
413  strategy_name_(strategy_name)
414  {}
415 
416  void process(const array<index_type>& mtx_row_ptrs,
417  array<index_type>* mtx_srow) override
418  {
419  auto nwarps = mtx_srow->get_size();
420 
421  if (nwarps > 0) {
422  auto host_srow_exec = mtx_srow->get_executor()->get_master();
423  auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
424  const bool is_srow_on_host{host_srow_exec ==
425  mtx_srow->get_executor()};
426  const bool is_mtx_on_host{host_mtx_exec ==
427  mtx_row_ptrs.get_executor()};
428  array<index_type> row_ptrs_host(host_mtx_exec);
429  array<index_type> srow_host(host_srow_exec);
430  const index_type* row_ptrs{};
431  index_type* srow{};
432  if (is_srow_on_host) {
433  srow = mtx_srow->get_data();
434  } else {
435  srow_host = *mtx_srow;
436  srow = srow_host.get_data();
437  }
438  if (is_mtx_on_host) {
439  row_ptrs = mtx_row_ptrs.get_const_data();
440  } else {
441  row_ptrs_host = mtx_row_ptrs;
442  row_ptrs = row_ptrs_host.get_const_data();
443  }
444  for (size_type i = 0; i < nwarps; i++) {
445  srow[i] = 0;
446  }
447  const auto num_rows = mtx_row_ptrs.get_size() - 1;
448  const auto num_elems = row_ptrs[num_rows];
449  const auto bucket_divider =
450  num_elems > 0 ? ceildiv(num_elems, warp_size_) : 1;
451  for (size_type i = 0; i < num_rows; i++) {
452  auto bucket =
453  ceildiv((ceildiv(row_ptrs[i + 1], warp_size_) * nwarps),
454  bucket_divider);
455  if (bucket < nwarps) {
456  srow[bucket]++;
457  }
458  }
459  // find starting row for thread i
460  for (size_type i = 1; i < nwarps; i++) {
461  srow[i] += srow[i - 1];
462  }
463  if (!is_srow_on_host) {
464  *mtx_srow = srow_host;
465  }
466  }
467  }
468 
469  int64_t clac_size(const int64_t nnz) override
470  {
471  if (warp_size_ > 0) {
472  int multiple = 8;
473  if (nnz >= static_cast<int64_t>(2e8)) {
474  multiple = 2048;
475  } else if (nnz >= static_cast<int64_t>(2e7)) {
476  multiple = 512;
477  } else if (nnz >= static_cast<int64_t>(2e6)) {
478  multiple = 128;
479  } else if (nnz >= static_cast<int64_t>(2e5)) {
480  multiple = 32;
481  }
482  if (strategy_name_ == "intel") {
483  multiple = 8;
484  if (nnz >= static_cast<int64_t>(2e8)) {
485  multiple = 256;
486  } else if (nnz >= static_cast<int64_t>(2e7)) {
487  multiple = 32;
488  }
489  }
490 #if GINKGO_HIP_PLATFORM_HCC
491  if (!cuda_strategy_) {
492  multiple = 8;
493  if (nnz >= static_cast<int64_t>(1e7)) {
494  multiple = 64;
495  } else if (nnz >= static_cast<int64_t>(1e6)) {
496  multiple = 16;
497  }
498  }
499 #endif // GINKGO_HIP_PLATFORM_HCC
500 
501  auto nwarps = nwarps_ * multiple;
502  return min(ceildiv(nnz, warp_size_), nwarps);
503  } else {
504  return 0;
505  }
506  }
507 
508  std::shared_ptr<strategy_type> copy() override
509  {
510  return std::make_shared<load_balance>(
511  nwarps_, warp_size_, cuda_strategy_, strategy_name_);
512  }
513 
514  private:
515  int64_t nwarps_;
516  int warp_size_;
517  bool cuda_strategy_;
518  std::string strategy_name_;
519  };
520 
521  class automatical : public strategy_type {
522  public:
523  /* Use imbalance strategy when the maximum number of nonzero per row is
524  * more than 1024 on NVIDIA hardware */
525  const index_type nvidia_row_len_limit = 1024;
526  /* Use imbalance strategy when the matrix has more more than 1e6 on
527  * NVIDIA hardware */
528  const index_type nvidia_nnz_limit{static_cast<index_type>(1e6)};
529  /* Use imbalance strategy when the maximum number of nonzero per row is
530  * more than 768 on AMD hardware */
531  const index_type amd_row_len_limit = 768;
532  /* Use imbalance strategy when the matrix has more more than 1e8 on AMD
533  * hardware */
534  const index_type amd_nnz_limit{static_cast<index_type>(1e8)};
535  /* Use imbalance strategy when the maximum number of nonzero per row is
536  * more than 25600 on Intel hardware */
537  const index_type intel_row_len_limit = 25600;
538  /* Use imbalance strategy when the matrix has more more than 3e8 on
539  * Intel hardware */
540  const index_type intel_nnz_limit{static_cast<index_type>(3e8)};
541 
542  public:
549  [[deprecated]] automatical()
550  : automatical(std::move(
552  {}
553 
559  automatical(std::shared_ptr<const CudaExecutor> exec)
560  : automatical(exec->get_num_warps(), exec->get_warp_size())
561  {}
562 
568  automatical(std::shared_ptr<const HipExecutor> exec)
569  : automatical(exec->get_num_warps(), exec->get_warp_size(), false)
570  {}
571 
579  automatical(std::shared_ptr<const DpcppExecutor> exec)
580  : automatical(exec->get_num_subgroups(), 32, false, "intel")
581  {}
582 
594  automatical(int64_t nwarps, int warp_size = 32,
595  bool cuda_strategy = true,
596  std::string strategy_name = "none")
597  : strategy_type("automatical"),
598  nwarps_(nwarps),
599  warp_size_(warp_size),
600  cuda_strategy_(cuda_strategy),
601  strategy_name_(strategy_name),
602  max_length_per_row_(0)
603  {}
604 
605  void process(const array<index_type>& mtx_row_ptrs,
606  array<index_type>* mtx_srow) override
607  {
608  // if the number of stored elements is larger than <nnz_limit> or
609  // the maximum number of stored elements per row is larger than
610  // <row_len_limit>, use load_balance otherwise use classical
611  index_type nnz_limit = nvidia_nnz_limit;
612  index_type row_len_limit = nvidia_row_len_limit;
613  if (strategy_name_ == "intel") {
614  nnz_limit = intel_nnz_limit;
615  row_len_limit = intel_row_len_limit;
616  }
617 #if GINKGO_HIP_PLATFORM_HCC
618  if (!cuda_strategy_) {
619  nnz_limit = amd_nnz_limit;
620  row_len_limit = amd_row_len_limit;
621  }
622 #endif // GINKGO_HIP_PLATFORM_HCC
623  auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
624  const bool is_mtx_on_host{host_mtx_exec ==
625  mtx_row_ptrs.get_executor()};
626  array<index_type> row_ptrs_host(host_mtx_exec);
627  const index_type* row_ptrs{};
628  if (is_mtx_on_host) {
629  row_ptrs = mtx_row_ptrs.get_const_data();
630  } else {
631  row_ptrs_host = mtx_row_ptrs;
632  row_ptrs = row_ptrs_host.get_const_data();
633  }
634  const auto num_rows = mtx_row_ptrs.get_size() - 1;
635  if (row_ptrs[num_rows] > nnz_limit) {
636  load_balance actual_strategy(nwarps_, warp_size_,
637  cuda_strategy_, strategy_name_);
638  if (is_mtx_on_host) {
639  actual_strategy.process(mtx_row_ptrs, mtx_srow);
640  } else {
641  actual_strategy.process(row_ptrs_host, mtx_srow);
642  }
643  this->set_name(actual_strategy.get_name());
644  } else {
645  index_type maxnum = 0;
646  for (size_type i = 0; i < num_rows; i++) {
647  maxnum = std::max(maxnum, row_ptrs[i + 1] - row_ptrs[i]);
648  }
649  if (maxnum > row_len_limit) {
650  load_balance actual_strategy(
651  nwarps_, warp_size_, cuda_strategy_, strategy_name_);
652  if (is_mtx_on_host) {
653  actual_strategy.process(mtx_row_ptrs, mtx_srow);
654  } else {
655  actual_strategy.process(row_ptrs_host, mtx_srow);
656  }
657  this->set_name(actual_strategy.get_name());
658  } else {
659  classical actual_strategy;
660  if (is_mtx_on_host) {
661  actual_strategy.process(mtx_row_ptrs, mtx_srow);
662  max_length_per_row_ =
663  actual_strategy.get_max_length_per_row();
664  } else {
665  actual_strategy.process(row_ptrs_host, mtx_srow);
666  max_length_per_row_ =
667  actual_strategy.get_max_length_per_row();
668  }
669  this->set_name(actual_strategy.get_name());
670  }
671  }
672  }
673 
674  int64_t clac_size(const int64_t nnz) override
675  {
676  return std::make_shared<load_balance>(
677  nwarps_, warp_size_, cuda_strategy_, strategy_name_)
678  ->clac_size(nnz);
679  }
680 
681  index_type get_max_length_per_row() const noexcept
682  {
683  return max_length_per_row_;
684  }
685 
686  std::shared_ptr<strategy_type> copy() override
687  {
688  return std::make_shared<automatical>(
689  nwarps_, warp_size_, cuda_strategy_, strategy_name_);
690  }
691 
692  private:
693  int64_t nwarps_;
694  int warp_size_;
695  bool cuda_strategy_;
696  std::string strategy_name_;
697  index_type max_length_per_row_;
698  };
699 
700  friend class Csr<previous_precision<ValueType>, IndexType>;
701 
702  void convert_to(
703  Csr<next_precision<ValueType>, IndexType>* result) const override;
704 
705  void move_to(Csr<next_precision<ValueType>, IndexType>* result) override;
706 
707 #if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
708  friend class Csr<previous_precision<ValueType, 2>, IndexType>;
709  using ConvertibleTo<
710  Csr<next_precision<ValueType, 2>, IndexType>>::convert_to;
711  using ConvertibleTo<Csr<next_precision<ValueType, 2>, IndexType>>::move_to;
712 
713  void convert_to(
714  Csr<next_precision<ValueType, 2>, IndexType>* result) const override;
715 
716  void move_to(Csr<next_precision<ValueType, 2>, IndexType>* result) override;
717 #endif
718 
719 #if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
720  friend class Csr<previous_precision<ValueType, 3>, IndexType>;
721  using ConvertibleTo<
722  Csr<next_precision<ValueType, 3>, IndexType>>::convert_to;
723  using ConvertibleTo<Csr<next_precision<ValueType, 3>, IndexType>>::move_to;
724 
725  void convert_to(
726  Csr<next_precision<ValueType, 3>, IndexType>* result) const override;
727 
728  void move_to(Csr<next_precision<ValueType, 3>, IndexType>* result) override;
729 #endif
730 
731  void convert_to(Dense<ValueType>* other) const override;
732 
733  void move_to(Dense<ValueType>* other) override;
734 
735  void convert_to(Coo<ValueType, IndexType>* result) const override;
736 
737  void move_to(Coo<ValueType, IndexType>* result) override;
738 
739  void convert_to(Ell<ValueType, IndexType>* result) const override;
740 
741  void move_to(Ell<ValueType, IndexType>* result) override;
742 
743  void convert_to(Fbcsr<ValueType, IndexType>* result) const override;
744 
745  void move_to(Fbcsr<ValueType, IndexType>* result) override;
746 
747  void convert_to(Hybrid<ValueType, IndexType>* result) const override;
748 
749  void move_to(Hybrid<ValueType, IndexType>* result) override;
750 
751  void convert_to(Sellp<ValueType, IndexType>* result) const override;
752 
753  void move_to(Sellp<ValueType, IndexType>* result) override;
754 
755  void convert_to(SparsityCsr<ValueType, IndexType>* result) const override;
756 
757  void move_to(SparsityCsr<ValueType, IndexType>* result) override;
758 
759  void read(const mat_data& data) override;
760 
761  void read(const device_mat_data& data) override;
762 
763  void read(device_mat_data&& data) override;
764 
765  void write(mat_data& data) const override;
766 
767  std::unique_ptr<LinOp> transpose() const override;
768 
769  std::unique_ptr<LinOp> conj_transpose() const override;
770 
777  explicit permuting_reuse_info();
778 
781  std::unique_ptr<Permutation<index_type>> value_permutation);
782 
791  ptr_param<Csr> output) const;
792 
793  std::unique_ptr<Permutation<IndexType>> value_permutation;
794  };
795 
807  std::pair<std::unique_ptr<Csr>, permuting_reuse_info> transpose_reuse()
808  const;
809 
824  std::unique_ptr<Csr> permute(
825  ptr_param<const Permutation<index_type>> permutation,
827 
841  std::unique_ptr<Csr> permute(
842  ptr_param<const Permutation<index_type>> row_permutation,
843  ptr_param<const Permutation<index_type>> column_permutation,
844  bool invert = false) const;
845 
866  std::pair<std::unique_ptr<Csr>, permuting_reuse_info> permute_reuse(
867  ptr_param<const Permutation<index_type>> permutation,
869 
888  std::pair<std::unique_ptr<Csr>, permuting_reuse_info> permute_reuse(
889  ptr_param<const Permutation<index_type>> row_permutation,
890  ptr_param<const Permutation<index_type>> column_permutation,
891  bool invert = false) const;
892 
902  std::unique_ptr<Csr> scale_permute(
905 
918  std::unique_ptr<Csr> scale_permute(
920  row_permutation,
922  column_permutation,
923  bool invert = false) const;
924 
925  std::unique_ptr<LinOp> permute(
926  const array<IndexType>* permutation_indices) const override;
927 
928  std::unique_ptr<LinOp> inverse_permute(
929  const array<IndexType>* inverse_permutation_indices) const override;
930 
931  std::unique_ptr<LinOp> row_permute(
932  const array<IndexType>* permutation_indices) const override;
933 
934  std::unique_ptr<LinOp> column_permute(
935  const array<IndexType>* permutation_indices) const override;
936 
937  std::unique_ptr<LinOp> inverse_row_permute(
938  const array<IndexType>* inverse_permutation_indices) const override;
939 
940  std::unique_ptr<LinOp> inverse_column_permute(
941  const array<IndexType>* inverse_permutation_indices) const override;
942 
943  std::unique_ptr<Diagonal<ValueType>> extract_diagonal() const override;
944 
945  std::unique_ptr<absolute_type> compute_absolute() const override;
946 
947  void compute_absolute_inplace() override;
948 
952  void sort_by_column_index();
953 
954  /*
955  * Tests if all row entry pairs (value, col_idx) are sorted by column index
956  *
957  * @returns True if all row entry pairs (value, col_idx) are sorted by
958  * column index
959  */
960  bool is_sorted_by_column_index() const;
961 
967  value_type* get_values() noexcept { return values_.get_data(); }
968 
976  const value_type* get_const_values() const noexcept
977  {
978  return values_.get_const_data();
979  }
980 
985  std::unique_ptr<Dense<ValueType>> create_value_view();
986 
991  std::unique_ptr<const Dense<ValueType>> create_const_value_view() const;
992 
998  index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); }
999 
1007  const index_type* get_const_col_idxs() const noexcept
1008  {
1009  return col_idxs_.get_const_data();
1010  }
1011 
1017  index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); }
1018 
1026  const index_type* get_const_row_ptrs() const noexcept
1027  {
1028  return row_ptrs_.get_const_data();
1029  }
1030 
1036  index_type* get_srow() noexcept { return srow_.get_data(); }
1037 
1045  const index_type* get_const_srow() const noexcept
1046  {
1047  return srow_.get_const_data();
1048  }
1049 
1056  {
1057  return srow_.get_size();
1058  }
1059 
1066  {
1067  return values_.get_size();
1068  }
1069 
1074  std::shared_ptr<strategy_type> get_strategy() const noexcept
1075  {
1076  return strategy_;
1077  }
1078 
1084  void set_strategy(std::shared_ptr<strategy_type> strategy)
1085  {
1086  strategy_ = std::move(strategy->copy());
1087  this->make_srow();
1088  }
1089 
1097  {
1098  auto exec = this->get_executor();
1099  GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1100  this->scale_impl(make_temporary_clone(exec, alpha).get());
1101  }
1102 
1110  {
1111  auto exec = this->get_executor();
1112  GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1113  this->inv_scale_impl(make_temporary_clone(exec, alpha).get());
1114  }
1115 
1124  static std::unique_ptr<Csr> create(std::shared_ptr<const Executor> exec,
1125  std::shared_ptr<strategy_type> strategy);
1126 
1138  static std::unique_ptr<Csr> create(
1139  std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1140  size_type num_nonzeros = {},
1141  std::shared_ptr<strategy_type> strategy = nullptr);
1142 
1162  static std::unique_ptr<Csr> create(
1163  std::shared_ptr<const Executor> exec, const dim<2>& size,
1164  array<value_type> values, array<index_type> col_idxs,
1165  array<index_type> row_ptrs,
1166  std::shared_ptr<strategy_type> strategy = nullptr);
1167 
1172  template <typename InputValueType, typename InputColumnIndexType,
1173  typename InputRowPtrType>
1174  GKO_DEPRECATED(
1175  "explicitly construct the gko::array argument instead of passing "
1176  "initializer lists")
1177  static std::unique_ptr<Csr> create(
1178  std::shared_ptr<const Executor> exec, const dim<2>& size,
1179  std::initializer_list<InputValueType> values,
1180  std::initializer_list<InputColumnIndexType> col_idxs,
1181  std::initializer_list<InputRowPtrType> row_ptrs)
1182  {
1183  return create(exec, size, array<value_type>{exec, std::move(values)},
1184  array<index_type>{exec, std::move(col_idxs)},
1185  array<index_type>{exec, std::move(row_ptrs)});
1186  }
1187 
1203  static std::unique_ptr<const Csr> create_const(
1204  std::shared_ptr<const Executor> exec, const dim<2>& size,
1205  gko::detail::const_array_view<ValueType>&& values,
1206  gko::detail::const_array_view<IndexType>&& col_idxs,
1207  gko::detail::const_array_view<IndexType>&& row_ptrs,
1208  std::shared_ptr<strategy_type> strategy = nullptr);
1209 
1222  std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1223  const index_set<IndexType>& row_index_set,
1224  const index_set<IndexType>& column_index_set) const;
1225 
1237  std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1238  const span& row_span, const span& column_span) const;
1239 
1243  Csr& operator=(const Csr&);
1244 
1250  Csr& operator=(Csr&&);
1251 
1255  Csr(const Csr&);
1256 
1262  Csr(Csr&&);
1263 
1264 protected:
1265  Csr(std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1266  size_type num_nonzeros = {},
1267  std::shared_ptr<strategy_type> strategy = nullptr);
1268 
1269  Csr(std::shared_ptr<const Executor> exec, const dim<2>& size,
1270  array<value_type> values, array<index_type> col_idxs,
1271  array<index_type> row_ptrs,
1272  std::shared_ptr<strategy_type> strategy = nullptr);
1273 
1274  void apply_impl(const LinOp* b, LinOp* x) const override;
1275 
1276  void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta,
1277  LinOp* x) const override;
1278 
1279  // TODO: This provides some more sane settings. Please fix this!
1280  static std::shared_ptr<strategy_type> make_default_strategy(
1281  std::shared_ptr<const Executor> exec)
1282  {
1283  auto cuda_exec = std::dynamic_pointer_cast<const CudaExecutor>(exec);
1284  auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(exec);
1285  auto dpcpp_exec = std::dynamic_pointer_cast<const DpcppExecutor>(exec);
1286  std::shared_ptr<strategy_type> new_strategy;
1287  if (cuda_exec) {
1288  new_strategy = std::make_shared<automatical>(cuda_exec);
1289  } else if (hip_exec) {
1290  new_strategy = std::make_shared<automatical>(hip_exec);
1291  } else if (dpcpp_exec) {
1292  new_strategy = std::make_shared<automatical>(dpcpp_exec);
1293  } else {
1294  new_strategy = std::make_shared<classical>();
1295  }
1296  return new_strategy;
1297  }
1298 
1299  // TODO clean this up as soon as we improve strategy_type
1300  template <typename CsrType>
1301  void convert_strategy_helper(CsrType* result) const
1302  {
1303  auto strat = this->get_strategy().get();
1304  std::shared_ptr<typename CsrType::strategy_type> new_strat;
1305  if (dynamic_cast<classical*>(strat)) {
1306  new_strat = std::make_shared<typename CsrType::classical>();
1307  } else if (dynamic_cast<merge_path*>(strat)) {
1308  new_strat = std::make_shared<typename CsrType::merge_path>();
1309  } else if (dynamic_cast<cusparse*>(strat)) {
1310  new_strat = std::make_shared<typename CsrType::cusparse>();
1311  } else if (dynamic_cast<sparselib*>(strat)) {
1312  new_strat = std::make_shared<typename CsrType::sparselib>();
1313  } else {
1314  auto rexec = result->get_executor();
1315  auto cuda_exec =
1316  std::dynamic_pointer_cast<const CudaExecutor>(rexec);
1317  auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(rexec);
1318  auto dpcpp_exec =
1319  std::dynamic_pointer_cast<const DpcppExecutor>(rexec);
1320  auto lb = dynamic_cast<load_balance*>(strat);
1321  if (cuda_exec) {
1322  if (lb) {
1323  new_strat =
1324  std::make_shared<typename CsrType::load_balance>(
1325  cuda_exec);
1326  } else {
1327  new_strat = std::make_shared<typename CsrType::automatical>(
1328  cuda_exec);
1329  }
1330  } else if (hip_exec) {
1331  if (lb) {
1332  new_strat =
1333  std::make_shared<typename CsrType::load_balance>(
1334  hip_exec);
1335  } else {
1336  new_strat = std::make_shared<typename CsrType::automatical>(
1337  hip_exec);
1338  }
1339  } else if (dpcpp_exec) {
1340  if (lb) {
1341  new_strat =
1342  std::make_shared<typename CsrType::load_balance>(
1343  dpcpp_exec);
1344  } else {
1345  new_strat = std::make_shared<typename CsrType::automatical>(
1346  dpcpp_exec);
1347  }
1348  } else {
1349  // Try to preserve this executor's configuration
1350  auto this_cuda_exec =
1351  std::dynamic_pointer_cast<const CudaExecutor>(
1352  this->get_executor());
1353  auto this_hip_exec =
1354  std::dynamic_pointer_cast<const HipExecutor>(
1355  this->get_executor());
1356  auto this_dpcpp_exec =
1357  std::dynamic_pointer_cast<const DpcppExecutor>(
1358  this->get_executor());
1359  if (this_cuda_exec) {
1360  if (lb) {
1361  new_strat =
1362  std::make_shared<typename CsrType::load_balance>(
1363  this_cuda_exec);
1364  } else {
1365  new_strat =
1366  std::make_shared<typename CsrType::automatical>(
1367  this_cuda_exec);
1368  }
1369  } else if (this_hip_exec) {
1370  if (lb) {
1371  new_strat =
1372  std::make_shared<typename CsrType::load_balance>(
1373  this_hip_exec);
1374  } else {
1375  new_strat =
1376  std::make_shared<typename CsrType::automatical>(
1377  this_hip_exec);
1378  }
1379  } else if (this_dpcpp_exec) {
1380  if (lb) {
1381  new_strat =
1382  std::make_shared<typename CsrType::load_balance>(
1383  this_dpcpp_exec);
1384  } else {
1385  new_strat =
1386  std::make_shared<typename CsrType::automatical>(
1387  this_dpcpp_exec);
1388  }
1389  } else {
1390  // FIXME: this changes strategies.
1391  // We had a load balance or automatical strategy from a non
1392  // HIP or Cuda executor and are moving to a non HIP or Cuda
1393  // executor.
1394  new_strat = std::make_shared<typename CsrType::classical>();
1395  }
1396  }
1397  }
1398  result->set_strategy(new_strat);
1399  }
1400 
1404  void make_srow()
1405  {
1406  srow_.resize_and_reset(strategy_->clac_size(values_.get_size()));
1407  strategy_->process(row_ptrs_, &srow_);
1408  }
1409 
1416  virtual void scale_impl(const LinOp* alpha);
1417 
1424  virtual void inv_scale_impl(const LinOp* alpha);
1425 
1426 private:
1427  std::shared_ptr<strategy_type> strategy_;
1428  array<value_type> values_;
1429  array<index_type> col_idxs_;
1430  array<index_type> row_ptrs_;
1431  array<index_type> srow_;
1432 
1433  void add_scaled_identity_impl(const LinOp* a, const LinOp* b) override;
1434 };
1435 
1436 
1437 namespace detail {
1438 
1439 
1446 template <typename ValueType, typename IndexType>
1447 void strategy_rebuild_helper(Csr<ValueType, IndexType>* result)
1448 {
1449  using load_balance = typename Csr<ValueType, IndexType>::load_balance;
1450  using automatical = typename Csr<ValueType, IndexType>::automatical;
1451  auto strategy = result->get_strategy();
1452  auto executor = result->get_executor();
1453  if (std::dynamic_pointer_cast<load_balance>(strategy)) {
1454  if (auto exec =
1455  std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1456  result->set_strategy(std::make_shared<load_balance>(exec));
1457  } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1458  executor)) {
1459  result->set_strategy(std::make_shared<load_balance>(exec));
1460  }
1461  } else if (std::dynamic_pointer_cast<automatical>(strategy)) {
1462  if (auto exec =
1463  std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1464  result->set_strategy(std::make_shared<automatical>(exec));
1465  } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1466  executor)) {
1467  result->set_strategy(std::make_shared<automatical>(exec));
1468  }
1469  }
1470 }
1471 
1472 
1473 } // namespace detail
1474 } // namespace matrix
1475 } // namespace gko
1476 
1477 
1478 #endif // GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
gko::matrix::Csr::automatical
Definition: csr.hpp:521
gko::matrix::Csr::get_const_srow
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition: csr.hpp:1045
gko::matrix::Csr::load_balance::load_balance
load_balance(std::shared_ptr< const HipExecutor > exec)
Creates a load_balance strategy with HIP executor.
Definition: csr.hpp:380
gko::matrix::Csr::operator=
Csr & operator=(const Csr &)
Copy-assigns a Csr matrix.
gko::matrix::Csr::cusparse::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:314
gko::matrix::Csr::get_col_idxs
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:998
gko::matrix::Fbcsr
Fixed-block compressed sparse row storage matrix format.
Definition: csr.hpp:46
gko::matrix::Csr
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition: matrix.hpp:30
gko::matrix::Csr::get_const_row_ptrs
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:1026
gko::matrix::Csr::sparselib::sparselib
sparselib()
Creates a sparselib strategy.
Definition: csr.hpp:336
gko::LinOp
Definition: lin_op.hpp:117
gko::matrix::Csr::permute_reuse
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type >> permutation, permute_mode mode=permute_mode::symmetric) const
Computes the operations necessary to propagate changed values from a matrix A to a permuted matrix.
gko::matrix::Dense
Dense is a matrix format which explicitly stores all values of the matrix.
Definition: dense_cache.hpp:28
gko::matrix::CsrBuilder
Definition: csr.hpp:49
gko::matrix::Csr::inverse_row_permute
std::unique_ptr< LinOp > inverse_row_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the row permutation of the inverse permuted object.
gko::matrix::Csr::sparselib
sparselib is a strategy_type which uses the sparselib csr.
Definition: csr.hpp:331
gko::DiagonalExtractable
The diagonal of a LinOp implementing this interface can be extracted.
Definition: lin_op.hpp:743
gko::matrix::SparsityCsr
SparsityCsr is a matrix format which stores only the sparsity pattern of a sparse matrix by compressi...
Definition: csr.hpp:40
gko::matrix::Csr::load_balance
load_balance is a strategy_type which uses the load balance algorithm.
Definition: csr.hpp:353
gko::matrix::Csr::scale
void scale(ptr_param< const LinOp > alpha)
Scales the matrix with a scalar.
Definition: csr.hpp:1096
gko::matrix::Csr::automatical::automatical
automatical(std::shared_ptr< const HipExecutor > exec)
Creates an automatical strategy with HIP executor.
Definition: csr.hpp:568
gko::Transposable
Linear operators which support transposition should implement the Transposable interface.
Definition: lin_op.hpp:433
gko::matrix::Csr::column_permute
std::unique_ptr< LinOp > column_permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the column permutation of the Permutable object.
gko::matrix::Csr::strategy_type::get_name
std::string get_name()
Returns the name of strategy.
Definition: csr.hpp:193
gko::matrix::Csr::classical::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:261
gko::matrix::ScaledPermutation
ScaledPermutation is a matrix combining a permutation with scaling factors.
Definition: scaled_permutation.hpp:36
gko::size_type
std::size_t size_type
Integral type used for allocation quantities.
Definition: types.hpp:90
gko::matrix::Csr::strategy_type::copy
virtual std::shared_ptr< strategy_type > copy()=0
Copy a strategy.
gko::matrix::Csr::get_srow
index_type * get_srow() noexcept
Returns the starting rows.
Definition: csr.hpp:1036
gko::matrix::Csr::sparselib::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:342
gko::matrix::Csr::transpose_reuse
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > transpose_reuse() const
Computes the necessary data to update a transposed matrix from its original matrix.
gko::matrix::Permutation
Permutation is a matrix format that represents a permutation matrix, i.e.
Definition: csr.hpp:52
gko::matrix::Csr::automatical::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:605
gko::matrix::Csr::permuting_reuse_info::permuting_reuse_info
permuting_reuse_info()
Creates an empty reuse info.
gko::matrix::Csr::row_permute
std::unique_ptr< LinOp > row_permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the row permutation of the Permutable object.
gko::matrix::Csr::classical::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:268
gko::CudaExecutor
This is the Executor subclass which represents the CUDA device.
Definition: executor.hpp:1540
gko::matrix::Csr::strategy_type::process
virtual void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow)=0
Computes srow according to row pointers.
gko::Permutable
Linear operators which support permutation should implement the Permutable interface.
Definition: lin_op.hpp:484
gko::matrix::Csr::transpose
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
gko::matrix::Csr::load_balance::load_balance
load_balance(std::shared_ptr< const DpcppExecutor > exec)
Creates a load_balance strategy with DPCPP executor.
Definition: csr.hpp:391
gko
The Ginkgo namespace.
Definition: abstract_factory.hpp:20
gko::matrix::Csr::load_balance::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:416
gko::matrix::Csr::inv_scale
void inv_scale(ptr_param< const LinOp > alpha)
Scales the matrix with the inverse of a scalar.
Definition: csr.hpp:1109
gko::matrix::Csr::extract_diagonal
std::unique_ptr< Diagonal< ValueType > > extract_diagonal() const override
Extracts the diagonal entries of the matrix into a vector.
gko::array< index_type >
gko::matrix::Csr::cusparse
cusparse is a strategy_type which uses the sparselib csr.
Definition: csr.hpp:307
gko::matrix::Csr::inverse_permute
std::unique_ptr< LinOp > inverse_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the symmetric inverse row and column permutation of the Permutable objec...
gko::matrix::Csr::get_row_ptrs
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:1017
gko::array::resize_and_reset
void resize_and_reset(size_type size)
Resizes the array so it is able to hold the specified number of elements.
Definition: array.hpp:622
gko::span
A span is a lightweight structure used to create sub-ranges from other ranges.
Definition: range.hpp:46
gko::dim< 2 >
gko::matrix_data
This structure is used as an intermediate data type to store a sparse matrix.
Definition: matrix_data.hpp:126
gko::matrix::Csr::load_balance::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:469
gko::matrix::Csr::merge_path
merge_path is a strategy_type which uses the merge_path algorithm.
Definition: csr.hpp:282
gko::matrix::Csr::permute
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type >> permutation, permute_mode mode=permute_mode::symmetric) const
Creates a permuted copy of this matrix with the given permutation .
gko::index_set
An index set class represents an ordered set of intervals.
Definition: index_set.hpp:56
gko::matrix::Csr::automatical::automatical
automatical()
Creates an automatical strategy.
Definition: csr.hpp:549
gko::matrix::Csr::merge_path::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:295
gko::matrix::Csr::load_balance::load_balance
load_balance(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates a load_balance strategy with specified parameters.
Definition: csr.hpp:406
gko::matrix::Diagonal
This class is a utility which efficiently implements the diagonal matrix (a linear operator which sca...
Definition: lin_op.hpp:31
gko::matrix::Csr::strategy_type::clac_size
virtual int64_t clac_size(const int64_t nnz)=0
Computes the srow size according to the number of nonzeros.
gko::matrix::Csr::load_balance::load_balance
load_balance(std::shared_ptr< const CudaExecutor > exec)
Creates a load_balance strategy with CUDA executor.
Definition: csr.hpp:371
gko::ptr_param
This class is used for function parameters in the place of raw pointers.
Definition: utils_helper.hpp:41
gko::array::get_data
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the array.
Definition: array.hpp:687
gko::ReadableFromMatrixData
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition: lin_op.hpp:605
gko::OmpExecutor
This is the Executor subclass which represents the OpenMP device (typically CPU).
Definition: executor.hpp:1386
gko::matrix::Csr::conj_transpose
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
gko::WritableToMatrixData
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition: lin_op.hpp:660
gko::matrix::permute_mode::symmetric
The rows and columns will be permuted.
gko::matrix::Csr::sparselib::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:338
gko::matrix::Csr::cusparse::cusparse
cusparse()
Creates a cusparse strategy.
Definition: csr.hpp:312
gko::matrix::Csr::cusparse::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:318
gko::matrix::Csr::merge_path::merge_path
merge_path()
Creates a merge_path strategy.
Definition: csr.hpp:287
gko::matrix::Csr::get_const_values
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition: csr.hpp:976
gko::stop::mode
mode
The mode for the residual norm criterion.
Definition: residual_norm.hpp:38
gko::matrix::Csr::load_balance::load_balance
load_balance()
Creates a load_balance strategy.
Definition: csr.hpp:361
gko::array::get_executor
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition: array.hpp:703
gko::matrix::Csr::get_num_stored_elements
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition: csr.hpp:1065
gko::matrix::Csr::create_submatrix
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const index_set< IndexType > &row_index_set, const index_set< IndexType > &column_index_set) const
Creates a submatrix from this Csr matrix given row and column index_set objects.
gko::ScaledIdentityAddable
Adds the operation M <- a I + b M for matrix M, identity operator I and scalars a and b,...
Definition: lin_op.hpp:818
gko::matrix::Csr::permuting_reuse_info
A struct describing a transformation of the matrix that reorders the values of the matrix into the tr...
Definition: csr.hpp:775
gko::matrix::Csr::load_balance::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:508
gko::next_precision
typename detail::find_precision_impl< T, step >::type next_precision
Obtains the next move type of T in the singly-linked precision corresponding bfloat16/half.
Definition: math.hpp:466
gko::matrix::Csr::classical::classical
classical()
Creates a classical strategy.
Definition: csr.hpp:237
gko::matrix::Csr::strategy_type::strategy_type
strategy_type(std::string name)
Creates a strategy_type.
Definition: csr.hpp:184
gko::matrix::Csr::sort_by_column_index
void sort_by_column_index()
Sorts all (value, col_idx) pairs in each row by column index.
gko::matrix::Csr::merge_path::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:289
gko::matrix::Csr::create_const
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const dim< 2 > &size, gko::detail::const_array_view< ValueType > &&values, gko::detail::const_array_view< IndexType > &&col_idxs, gko::detail::const_array_view< IndexType > &&row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a constant (immutable) Csr matrix from a set of constant arrays.
gko::previous_precision
typename detail::find_precision_impl< T, -step >::type previous_precision
Obtains the previous move type of T in the singly-linked precision corresponding bfloat16/half.
Definition: math.hpp:473
gko::matrix::Csr::automatical::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:686
gko::matrix::Csr::classical
classical is a strategy_type which uses the same number of threads on each row.
Definition: csr.hpp:232
gko::matrix::Csr::get_strategy
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition: csr.hpp:1074
gko::matrix::Csr::permuting_reuse_info::update_values
void update_values(ptr_param< const Csr > input, ptr_param< Csr > output) const
Propagates the values from an input matrix to the transformed matrix.
gko::matrix::Csr::set_strategy
void set_strategy(std::shared_ptr< strategy_type > strategy)
Set the strategy.
Definition: csr.hpp:1084
gko::matrix::Ell
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition: csr.hpp:31
gko::matrix::Csr::create_const_value_view
std::unique_ptr< const Dense< ValueType > > create_const_value_view() const
Creates a const Dense view of the value array of this matrix as a column vector of dimensions nnz x 1...
gko::ConvertibleTo
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
Definition: polymorphic_object.hpp:479
gko::matrix::Csr::compute_absolute
std::unique_ptr< absolute_type > compute_absolute() const override
Gets the AbsoluteLinOp.
gko::matrix::Csr::strategy_type
strategy_type is to decide how to set the csr algorithm.
Definition: csr.hpp:175
gko::make_temporary_clone
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition: temporary_clone.hpp:208
gko::Executor
The first step in using the Ginkgo library consists of creating an executor.
Definition: executor.hpp:615
gko::matrix::Hybrid
HYBRID is a matrix format which splits the matrix into ELLPACK and COO format.
Definition: coo.hpp:32
gko::array::get_const_data
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the array.
Definition: array.hpp:696
gko::matrix::Csr::write
void write(mat_data &data) const override
Writes a matrix to a matrix_data structure.
gko::matrix::permute_mode
permute_mode
Specifies how a permutation will be applied to a matrix.
Definition: permutation.hpp:42
gko::matrix::Sellp
SELL-P is a matrix format similar to ELL format.
Definition: csr.hpp:37
gko::min
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition: math.hpp:750
gko::matrix::Csr::cusparse::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:320
gko::matrix::Csr::get_const_col_idxs
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:1007
gko::ceildiv
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition: math.hpp:614
gko::matrix::Csr::automatical::automatical
automatical(std::shared_ptr< const DpcppExecutor > exec)
Creates an automatical strategy with Dpcpp executor.
Definition: csr.hpp:579
gko::matrix::Csr::merge_path::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:293
gko::EnableAbsoluteComputation
The EnableAbsoluteComputation mixin provides the default implementations of compute_absolute_linop an...
Definition: lin_op.hpp:794
gko::matrix::Csr::inverse_column_permute
std::unique_ptr< LinOp > inverse_column_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the row permutation of the inverse permuted object.
gko::matrix::Csr::Csr
Csr(const Csr &)
Copy-constructs a Csr matrix.
gko::matrix::Csr::automatical::automatical
automatical(std::shared_ptr< const CudaExecutor > exec)
Creates an automatical strategy with CUDA executor.
Definition: csr.hpp:559
gko::PolymorphicObject::get_executor
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor of the object.
Definition: polymorphic_object.hpp:243
gko::array::get_size
size_type get_size() const noexcept
Returns the number of elements in the array.
Definition: array.hpp:670
gko::matrix::Csr::automatical::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:674
gko::matrix::Csr::automatical::automatical
automatical(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates an automatical strategy with specified parameters.
Definition: csr.hpp:594
gko::matrix::Csr::classical::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:239
gko::remove_complex
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition: math.hpp:264
gko::matrix::Csr::compute_absolute_inplace
void compute_absolute_inplace() override
Compute absolute inplace on each element.
gko::matrix::Csr::scale_permute
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type >> permutation, permute_mode=permute_mode::symmetric) const
Creates a scaled and permuted copy of this matrix.
gko::device_matrix_data
This type is a device-side equivalent to matrix_data.
Definition: device_matrix_data.hpp:36
gko::matrix::Csr::read
void read(const mat_data &data) override
Reads a matrix from a matrix_data structure.
gko::matrix::Csr::create
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, std::shared_ptr< strategy_type > strategy)
Creates an uninitialized CSR matrix of the specified size.
gko::EnableLinOp
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition: lin_op.hpp:877
gko::matrix::Csr::create_value_view
std::unique_ptr< Dense< ValueType > > create_value_view()
Creates a Dense view of the value array of this matrix as a column vector of dimensions nnz x 1.
gko::matrix::Csr::sparselib::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:344
gko::matrix::Csr::get_values
value_type * get_values() noexcept
Returns the values of the matrix.
Definition: csr.hpp:967
gko::matrix::Csr::get_num_srow_elements
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps)
Definition: csr.hpp:1055
gko::LinOp::LinOp
LinOp(const LinOp &)=default
Copy-constructs a LinOp.
gko::to_complex
typename detail::to_complex_s< T >::type to_complex
Obtain the type which adds the complex of complex/scalar type or the template parameter of class by a...
Definition: math.hpp:283
gko::EnablePolymorphicObject
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition: polymorphic_object.hpp:667
gko::matrix::Coo
COO stores a matrix in the coordinate matrix format.
Definition: coo.hpp:50