Ginkgo  Generated from pipelines/2171896597 branch based on develop. Ginkgo version 1.11.0
A numerical linear algebra library targeting many-core architectures
csr.hpp
1 // SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
2 //
3 // SPDX-License-Identifier: BSD-3-Clause
4 
5 #ifndef GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
6 #define GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
7 
8 
9 #include <ginkgo/core/base/array.hpp>
10 #include <ginkgo/core/base/index_set.hpp>
11 #include <ginkgo/core/base/lin_op.hpp>
12 #include <ginkgo/core/base/math.hpp>
13 #include <ginkgo/core/matrix/permutation.hpp>
14 #include <ginkgo/core/matrix/scaled_permutation.hpp>
15 
16 
17 namespace gko {
18 namespace matrix {
19 
20 
21 template <typename ValueType>
22 class Dense;
23 
24 template <typename ValueType>
25 class Diagonal;
26 
27 template <typename ValueType, typename IndexType>
28 class Coo;
29 
30 template <typename ValueType, typename IndexType>
31 class Ell;
32 
33 template <typename ValueType, typename IndexType>
34 class Hybrid;
35 
36 template <typename ValueType, typename IndexType>
37 class Sellp;
38 
39 template <typename ValueType, typename IndexType>
41 
42 template <typename ValueType, typename IndexType>
43 class Csr;
44 
45 template <typename ValueType, typename IndexType>
46 class Fbcsr;
47 
48 template <typename ValueType, typename IndexType>
49 class CsrBuilder;
50 
51 template <typename IndexType>
53 
54 
55 namespace detail {
56 
57 
58 template <typename ValueType = default_precision, typename IndexType = int32>
59 void strategy_rebuild_helper(Csr<ValueType, IndexType>* result);
60 
61 
62 } // namespace detail
63 
64 
103 template <typename ValueType = default_precision, typename IndexType = int32>
104 class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
105  public ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>,
106 #if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
107  public ConvertibleTo<Csr<next_precision<ValueType, 2>, IndexType>>,
108 #endif
109 #if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
110  public ConvertibleTo<Csr<next_precision<ValueType, 3>, IndexType>>,
111 #endif
112  public ConvertibleTo<Dense<ValueType>>,
113  public ConvertibleTo<Coo<ValueType, IndexType>>,
114  public ConvertibleTo<Ell<ValueType, IndexType>>,
115  public ConvertibleTo<Fbcsr<ValueType, IndexType>>,
116  public ConvertibleTo<Hybrid<ValueType, IndexType>>,
117  public ConvertibleTo<Sellp<ValueType, IndexType>>,
118  public ConvertibleTo<SparsityCsr<ValueType, IndexType>>,
119  public DiagonalExtractable<ValueType>,
120  public ReadableFromMatrixData<ValueType, IndexType>,
121  public WritableToMatrixData<ValueType, IndexType>,
122  public Transposable,
123  public Permutable<IndexType>,
125  remove_complex<Csr<ValueType, IndexType>>>,
126  public ScaledIdentityAddable {
127  friend class EnablePolymorphicObject<Csr, LinOp>;
128  friend class Coo<ValueType, IndexType>;
129  friend class Dense<ValueType>;
130  friend class Diagonal<ValueType>;
131  friend class Ell<ValueType, IndexType>;
132  friend class Hybrid<ValueType, IndexType>;
133  friend class Sellp<ValueType, IndexType>;
134  friend class SparsityCsr<ValueType, IndexType>;
135  friend class Fbcsr<ValueType, IndexType>;
136  friend class CsrBuilder<ValueType, IndexType>;
137  friend class Csr<to_complex<ValueType>, IndexType>;
138  GKO_ASSERT_SUPPORTED_VALUE_AND_INDEX_TYPE;
139 
140 public:
143  using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::convert_to;
144  using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::move_to;
145  using ConvertibleTo<Dense<ValueType>>::convert_to;
146  using ConvertibleTo<Dense<ValueType>>::move_to;
147  using ConvertibleTo<Coo<ValueType, IndexType>>::convert_to;
149  using ConvertibleTo<Ell<ValueType, IndexType>>::convert_to;
160 
161  using value_type = ValueType;
162  using index_type = IndexType;
163  using transposed_type = Csr<ValueType, IndexType>;
164  using mat_data = matrix_data<ValueType, IndexType>;
165  using device_mat_data = device_matrix_data<ValueType, IndexType>;
166  using absolute_type = remove_complex<Csr>;
167 
168  class automatical;
169 
177  friend class automatical;
178 
179  public:
185  strategy_type(std::string name) : name_(name) {}
186 
187  virtual ~strategy_type() = default;
188 
194  std::string get_name() { return name_; }
195 
202  virtual void process(const array<index_type>& mtx_row_ptrs,
203  array<index_type>* mtx_srow) = 0;
204 
212  virtual int64_t clac_size(const int64_t nnz) = 0;
213 
218  virtual std::shared_ptr<strategy_type> copy() = 0;
219 
220  protected:
221  void set_name(std::string name) { name_ = name; }
222 
223  private:
224  std::string name_;
225  };
226 
233  class classical : public strategy_type {
234  public:
238  classical() : strategy_type("classical"), max_length_per_row_(0) {}
239 
240  void process(const array<index_type>& mtx_row_ptrs,
241  array<index_type>* mtx_srow) override
242  {
243  auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
244  array<index_type> row_ptrs_host(host_mtx_exec);
245  const bool is_mtx_on_host{host_mtx_exec ==
246  mtx_row_ptrs.get_executor()};
247  const index_type* row_ptrs{};
248  if (is_mtx_on_host) {
249  row_ptrs = mtx_row_ptrs.get_const_data();
250  } else {
251  row_ptrs_host = mtx_row_ptrs;
252  row_ptrs = row_ptrs_host.get_const_data();
253  }
254  auto num_rows = mtx_row_ptrs.get_size() - 1;
255  max_length_per_row_ = 0;
256  for (size_type i = 0; i < num_rows; i++) {
257  max_length_per_row_ = std::max(max_length_per_row_,
258  row_ptrs[i + 1] - row_ptrs[i]);
259  }
260  }
261 
262  int64_t clac_size(const int64_t nnz) override { return 0; }
263 
264  index_type get_max_length_per_row() const noexcept
265  {
266  return max_length_per_row_;
267  }
268 
269  std::shared_ptr<strategy_type> copy() override
270  {
271  return std::make_shared<classical>();
272  }
273 
274  private:
275  index_type max_length_per_row_;
276  };
277 
283  class merge_path : public strategy_type {
284  public:
288  merge_path() : strategy_type("merge_path") {}
289 
290  void process(const array<index_type>& mtx_row_ptrs,
291  array<index_type>* mtx_srow) override
292  {}
293 
294  int64_t clac_size(const int64_t nnz) override { return 0; }
295 
296  std::shared_ptr<strategy_type> copy() override
297  {
298  return std::make_shared<merge_path>();
299  }
300  };
301 
308  class cusparse : public strategy_type {
309  public:
313  cusparse() : strategy_type("cusparse") {}
314 
315  void process(const array<index_type>& mtx_row_ptrs,
316  array<index_type>* mtx_srow) override
317  {}
318 
319  int64_t clac_size(const int64_t nnz) override { return 0; }
320 
321  std::shared_ptr<strategy_type> copy() override
322  {
323  return std::make_shared<cusparse>();
324  }
325  };
326 
332  class sparselib : public strategy_type {
333  public:
337  sparselib() : strategy_type("sparselib") {}
338 
339  void process(const array<index_type>& mtx_row_ptrs,
340  array<index_type>* mtx_srow) override
341  {}
342 
343  int64_t clac_size(const int64_t nnz) override { return 0; }
344 
345  std::shared_ptr<strategy_type> copy() override
346  {
347  return std::make_shared<sparselib>();
348  }
349  };
350 
354  class load_balance : public strategy_type {
355  public:
362  [[deprecated]] load_balance()
363  : load_balance(std::move(
365  {}
366 
372  load_balance(std::shared_ptr<const CudaExecutor> exec)
373  : load_balance(exec->get_num_warps(), exec->get_warp_size())
374  {}
375 
381  load_balance(std::shared_ptr<const HipExecutor> exec)
382  : load_balance(exec->get_num_warps(), exec->get_warp_size(), false)
383  {}
384 
392  load_balance(std::shared_ptr<const DpcppExecutor> exec)
393  : load_balance(exec->get_num_subgroups(), 32, false, "intel")
394  {}
395 
407  load_balance(int64_t nwarps, int warp_size = 32,
408  bool cuda_strategy = true,
409  std::string strategy_name = "none")
410  : strategy_type("load_balance"),
411  nwarps_(nwarps),
412  warp_size_(warp_size),
413  cuda_strategy_(cuda_strategy),
414  strategy_name_(strategy_name)
415  {}
416 
417  void process(const array<index_type>& mtx_row_ptrs,
418  array<index_type>* mtx_srow) override
419  {
420  auto nwarps = mtx_srow->get_size();
421 
422  if (nwarps > 0) {
423  auto host_srow_exec = mtx_srow->get_executor()->get_master();
424  auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
425  const bool is_srow_on_host{host_srow_exec ==
426  mtx_srow->get_executor()};
427  const bool is_mtx_on_host{host_mtx_exec ==
428  mtx_row_ptrs.get_executor()};
429  array<index_type> row_ptrs_host(host_mtx_exec);
430  array<index_type> srow_host(host_srow_exec);
431  const index_type* row_ptrs{};
432  index_type* srow{};
433  if (is_srow_on_host) {
434  srow = mtx_srow->get_data();
435  } else {
436  srow_host = *mtx_srow;
437  srow = srow_host.get_data();
438  }
439  if (is_mtx_on_host) {
440  row_ptrs = mtx_row_ptrs.get_const_data();
441  } else {
442  row_ptrs_host = mtx_row_ptrs;
443  row_ptrs = row_ptrs_host.get_const_data();
444  }
445  for (size_type i = 0; i < nwarps; i++) {
446  srow[i] = 0;
447  }
448  const auto num_rows = mtx_row_ptrs.get_size() - 1;
449  const auto num_elems = row_ptrs[num_rows];
450  const auto bucket_divider =
451  num_elems > 0 ? ceildiv(num_elems, warp_size_) : 1;
452  for (size_type i = 0; i < num_rows; i++) {
453  auto bucket =
454  ceildiv((ceildiv(row_ptrs[i + 1], warp_size_) * nwarps),
455  bucket_divider);
456  if (bucket < nwarps) {
457  srow[bucket]++;
458  }
459  }
460  // find starting row for thread i
461  for (size_type i = 1; i < nwarps; i++) {
462  srow[i] += srow[i - 1];
463  }
464  if (!is_srow_on_host) {
465  *mtx_srow = srow_host;
466  }
467  }
468  }
469 
470  int64_t clac_size(const int64_t nnz) override
471  {
472  if (warp_size_ > 0) {
473  int multiple = 8;
474  if (nnz >= static_cast<int64_t>(2e8)) {
475  multiple = 2048;
476  } else if (nnz >= static_cast<int64_t>(2e7)) {
477  multiple = 512;
478  } else if (nnz >= static_cast<int64_t>(2e6)) {
479  multiple = 128;
480  } else if (nnz >= static_cast<int64_t>(2e5)) {
481  multiple = 32;
482  }
483  if (strategy_name_ == "intel") {
484  multiple = 8;
485  if (nnz >= static_cast<int64_t>(2e8)) {
486  multiple = 256;
487  } else if (nnz >= static_cast<int64_t>(2e7)) {
488  multiple = 32;
489  }
490  }
491 #if GINKGO_HIP_PLATFORM_HCC
492  if (!cuda_strategy_) {
493  multiple = 8;
494  if (nnz >= static_cast<int64_t>(1e7)) {
495  multiple = 64;
496  } else if (nnz >= static_cast<int64_t>(1e6)) {
497  multiple = 16;
498  }
499  }
500 #endif // GINKGO_HIP_PLATFORM_HCC
501 
502  auto nwarps = nwarps_ * multiple;
503  return min(ceildiv(nnz, warp_size_), nwarps);
504  } else {
505  return 0;
506  }
507  }
508 
509  std::shared_ptr<strategy_type> copy() override
510  {
511  return std::make_shared<load_balance>(
512  nwarps_, warp_size_, cuda_strategy_, strategy_name_);
513  }
514 
515  private:
516  int64_t nwarps_;
517  int warp_size_;
518  bool cuda_strategy_;
519  std::string strategy_name_;
520  };
521 
522  class automatical : public strategy_type {
523  public:
524  /* Use imbalance strategy when the maximum number of nonzero per row is
525  * more than 1024 on NVIDIA hardware */
526  const index_type nvidia_row_len_limit = 1024;
527  /* Use imbalance strategy when the matrix has more more than 1e6 on
528  * NVIDIA hardware */
529  const index_type nvidia_nnz_limit{static_cast<index_type>(1e6)};
530  /* Use imbalance strategy when the maximum number of nonzero per row is
531  * more than 768 on AMD hardware */
532  const index_type amd_row_len_limit = 768;
533  /* Use imbalance strategy when the matrix has more more than 1e8 on AMD
534  * hardware */
535  const index_type amd_nnz_limit{static_cast<index_type>(1e8)};
536  /* Use imbalance strategy when the maximum number of nonzero per row is
537  * more than 25600 on Intel hardware */
538  const index_type intel_row_len_limit = 25600;
539  /* Use imbalance strategy when the matrix has more more than 3e8 on
540  * Intel hardware */
541  const index_type intel_nnz_limit{static_cast<index_type>(3e8)};
542 
543  public:
550  [[deprecated]] automatical()
551  : automatical(std::move(
553  {}
554 
560  automatical(std::shared_ptr<const CudaExecutor> exec)
561  : automatical(exec->get_num_warps(), exec->get_warp_size())
562  {}
563 
569  automatical(std::shared_ptr<const HipExecutor> exec)
570  : automatical(exec->get_num_warps(), exec->get_warp_size(), false)
571  {}
572 
580  automatical(std::shared_ptr<const DpcppExecutor> exec)
581  : automatical(exec->get_num_subgroups(), 32, false, "intel")
582  {}
583 
595  automatical(int64_t nwarps, int warp_size = 32,
596  bool cuda_strategy = true,
597  std::string strategy_name = "none")
598  : strategy_type("automatical"),
599  nwarps_(nwarps),
600  warp_size_(warp_size),
601  cuda_strategy_(cuda_strategy),
602  strategy_name_(strategy_name),
603  max_length_per_row_(0)
604  {}
605 
606  void process(const array<index_type>& mtx_row_ptrs,
607  array<index_type>* mtx_srow) override
608  {
609  // if the number of stored elements is larger than <nnz_limit> or
610  // the maximum number of stored elements per row is larger than
611  // <row_len_limit>, use load_balance otherwise use classical
612  index_type nnz_limit = nvidia_nnz_limit;
613  index_type row_len_limit = nvidia_row_len_limit;
614  if (strategy_name_ == "intel") {
615  nnz_limit = intel_nnz_limit;
616  row_len_limit = intel_row_len_limit;
617  }
618 #if GINKGO_HIP_PLATFORM_HCC
619  if (!cuda_strategy_) {
620  nnz_limit = amd_nnz_limit;
621  row_len_limit = amd_row_len_limit;
622  }
623 #endif // GINKGO_HIP_PLATFORM_HCC
624  auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
625  const bool is_mtx_on_host{host_mtx_exec ==
626  mtx_row_ptrs.get_executor()};
627  array<index_type> row_ptrs_host(host_mtx_exec);
628  const index_type* row_ptrs{};
629  if (is_mtx_on_host) {
630  row_ptrs = mtx_row_ptrs.get_const_data();
631  } else {
632  row_ptrs_host = mtx_row_ptrs;
633  row_ptrs = row_ptrs_host.get_const_data();
634  }
635  const auto num_rows = mtx_row_ptrs.get_size() - 1;
636  if (row_ptrs[num_rows] > nnz_limit) {
637  load_balance actual_strategy(nwarps_, warp_size_,
638  cuda_strategy_, strategy_name_);
639  if (is_mtx_on_host) {
640  actual_strategy.process(mtx_row_ptrs, mtx_srow);
641  } else {
642  actual_strategy.process(row_ptrs_host, mtx_srow);
643  }
644  this->set_name(actual_strategy.get_name());
645  } else {
646  index_type maxnum = 0;
647  for (size_type i = 0; i < num_rows; i++) {
648  maxnum = std::max(maxnum, row_ptrs[i + 1] - row_ptrs[i]);
649  }
650  if (maxnum > row_len_limit) {
651  load_balance actual_strategy(
652  nwarps_, warp_size_, cuda_strategy_, strategy_name_);
653  if (is_mtx_on_host) {
654  actual_strategy.process(mtx_row_ptrs, mtx_srow);
655  } else {
656  actual_strategy.process(row_ptrs_host, mtx_srow);
657  }
658  this->set_name(actual_strategy.get_name());
659  } else {
660  classical actual_strategy;
661  if (is_mtx_on_host) {
662  actual_strategy.process(mtx_row_ptrs, mtx_srow);
663  max_length_per_row_ =
664  actual_strategy.get_max_length_per_row();
665  } else {
666  actual_strategy.process(row_ptrs_host, mtx_srow);
667  max_length_per_row_ =
668  actual_strategy.get_max_length_per_row();
669  }
670  this->set_name(actual_strategy.get_name());
671  }
672  }
673  }
674 
675  int64_t clac_size(const int64_t nnz) override
676  {
677  return std::make_shared<load_balance>(
678  nwarps_, warp_size_, cuda_strategy_, strategy_name_)
679  ->clac_size(nnz);
680  }
681 
682  index_type get_max_length_per_row() const noexcept
683  {
684  return max_length_per_row_;
685  }
686 
687  std::shared_ptr<strategy_type> copy() override
688  {
689  return std::make_shared<automatical>(
690  nwarps_, warp_size_, cuda_strategy_, strategy_name_);
691  }
692 
693  private:
694  int64_t nwarps_;
695  int warp_size_;
696  bool cuda_strategy_;
697  std::string strategy_name_;
698  index_type max_length_per_row_;
699  };
700 
701  friend class Csr<previous_precision<ValueType>, IndexType>;
702 
703  void convert_to(
704  Csr<next_precision<ValueType>, IndexType>* result) const override;
705 
706  void move_to(Csr<next_precision<ValueType>, IndexType>* result) override;
707 
708 #if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
709  friend class Csr<previous_precision<ValueType, 2>, IndexType>;
710  using ConvertibleTo<
711  Csr<next_precision<ValueType, 2>, IndexType>>::convert_to;
712  using ConvertibleTo<Csr<next_precision<ValueType, 2>, IndexType>>::move_to;
713 
714  void convert_to(
715  Csr<next_precision<ValueType, 2>, IndexType>* result) const override;
716 
717  void move_to(Csr<next_precision<ValueType, 2>, IndexType>* result) override;
718 #endif
719 
720 #if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
721  friend class Csr<previous_precision<ValueType, 3>, IndexType>;
722  using ConvertibleTo<
723  Csr<next_precision<ValueType, 3>, IndexType>>::convert_to;
724  using ConvertibleTo<Csr<next_precision<ValueType, 3>, IndexType>>::move_to;
725 
726  void convert_to(
727  Csr<next_precision<ValueType, 3>, IndexType>* result) const override;
728 
729  void move_to(Csr<next_precision<ValueType, 3>, IndexType>* result) override;
730 #endif
731 
732  void convert_to(Dense<ValueType>* other) const override;
733 
734  void move_to(Dense<ValueType>* other) override;
735 
736  void convert_to(Coo<ValueType, IndexType>* result) const override;
737 
738  void move_to(Coo<ValueType, IndexType>* result) override;
739 
740  void convert_to(Ell<ValueType, IndexType>* result) const override;
741 
742  void move_to(Ell<ValueType, IndexType>* result) override;
743 
744  void convert_to(Fbcsr<ValueType, IndexType>* result) const override;
745 
746  void move_to(Fbcsr<ValueType, IndexType>* result) override;
747 
748  void convert_to(Hybrid<ValueType, IndexType>* result) const override;
749 
750  void move_to(Hybrid<ValueType, IndexType>* result) override;
751 
752  void convert_to(Sellp<ValueType, IndexType>* result) const override;
753 
754  void move_to(Sellp<ValueType, IndexType>* result) override;
755 
756  void convert_to(SparsityCsr<ValueType, IndexType>* result) const override;
757 
758  void move_to(SparsityCsr<ValueType, IndexType>* result) override;
759 
760  void read(const mat_data& data) override;
761 
762  void read(const device_mat_data& data) override;
763 
764  void read(device_mat_data&& data) override;
765 
766  void write(mat_data& data) const override;
767 
768  std::unique_ptr<LinOp> transpose() const override;
769 
770  std::unique_ptr<LinOp> conj_transpose() const override;
771 
778  friend class Csr;
779 
780  public:
781  explicit multiply_reuse_info();
782 
784 
785  multiply_reuse_info(const multiply_reuse_info&) = delete;
786 
788 
789  multiply_reuse_info& operator=(const multiply_reuse_info&) = delete;
790 
791  multiply_reuse_info& operator=(multiply_reuse_info&&) noexcept;
792 
799  ptr_param<Csr> out) const;
800 
801  private:
802  struct lookup_data;
803 
804  explicit multiply_reuse_info(std::unique_ptr<lookup_data> data);
805 
806  std::unique_ptr<lookup_data> internal;
807  };
808 
819  std::unique_ptr<Csr> multiply(ptr_param<const Csr> other) const;
820 
837  std::pair<std::unique_ptr<Csr>, multiply_reuse_info> multiply_reuse(
838  ptr_param<const Csr> other) const;
839 
846  friend class Csr;
847 
848  public:
849  explicit multiply_add_reuse_info();
850 
852 
854 
856 
858  delete;
859 
860  multiply_add_reuse_info& operator=(multiply_add_reuse_info&&) noexcept;
861 
870  ptr_param<const Dense<value_type>> scale_mult,
871  ptr_param<const Csr> mtx_mult,
873  ptr_param<const Csr> mtx_add,
874  ptr_param<Csr> out) const;
875 
876  private:
877  struct lookup_data;
878 
879  explicit multiply_add_reuse_info(std::unique_ptr<lookup_data> data);
880 
881  std::unique_ptr<lookup_data> internal;
882  };
883 
899  std::unique_ptr<Csr> multiply_add(
900  ptr_param<const Dense<value_type>> scale_mult,
901  ptr_param<const Csr> mtx_mult,
903  ptr_param<const Csr> mtx_add) const;
904 
926  std::pair<std::unique_ptr<Csr>, multiply_add_reuse_info> multiply_add_reuse(
927  ptr_param<const Dense<value_type>> scale_mult,
928  ptr_param<const Csr> mtx_mult,
930  ptr_param<const Csr> mtx_add) const;
931 
938  friend class Csr;
939 
940  public:
941  explicit scale_add_reuse_info();
942 
944 
946 
948 
949  scale_add_reuse_info& operator=(const scale_add_reuse_info&) = delete;
950 
951  scale_add_reuse_info& operator=(scale_add_reuse_info&&) noexcept;
952 
959  void update_values(ptr_param<const Dense<value_type>> scale1,
961  ptr_param<const Dense<value_type>> scale2,
962  ptr_param<const Csr> mtx2, ptr_param<Csr> out) const;
963 
964  private:
965  struct lookup_data;
966 
967  explicit scale_add_reuse_info(std::unique_ptr<lookup_data> data);
968 
969  std::unique_ptr<lookup_data> internal;
970  };
971 
986  std::unique_ptr<Csr> scale_add(
987  ptr_param<const Dense<value_type>> scale_this,
988  ptr_param<const Dense<value_type>> scale_other,
989  ptr_param<const Csr> mtx_other) const;
990 
1012  std::pair<std::unique_ptr<Csr>, scale_add_reuse_info> add_scale_reuse(
1013  ptr_param<const Dense<value_type>> scale_this,
1014  ptr_param<const Dense<value_type>> scale_other,
1015  ptr_param<const Csr> mtx_other) const;
1016 
1023  explicit permuting_reuse_info();
1024 
1026  explicit permuting_reuse_info(
1027  std::unique_ptr<Permutation<index_type>> value_permutation);
1028 
1037  ptr_param<Csr> output) const;
1038 
1039  std::unique_ptr<Permutation<IndexType>> value_permutation;
1040  };
1041 
1054  std::pair<std::unique_ptr<Csr>, permuting_reuse_info> transpose_reuse()
1055  const;
1056 
1071  std::unique_ptr<Csr> permute(
1072  ptr_param<const Permutation<index_type>> permutation,
1074 
1088  std::unique_ptr<Csr> permute(
1089  ptr_param<const Permutation<index_type>> row_permutation,
1090  ptr_param<const Permutation<index_type>> column_permutation,
1091  bool invert = false) const;
1092 
1113  std::pair<std::unique_ptr<Csr>, permuting_reuse_info> permute_reuse(
1114  ptr_param<const Permutation<index_type>> permutation,
1116 
1135  std::pair<std::unique_ptr<Csr>, permuting_reuse_info> permute_reuse(
1136  ptr_param<const Permutation<index_type>> row_permutation,
1137  ptr_param<const Permutation<index_type>> column_permutation,
1138  bool invert = false) const;
1139 
1149  std::unique_ptr<Csr> scale_permute(
1152 
1165  std::unique_ptr<Csr> scale_permute(
1167  row_permutation,
1169  column_permutation,
1170  bool invert = false) const;
1171 
1172  std::unique_ptr<LinOp> permute(
1173  const array<IndexType>* permutation_indices) const override;
1174 
1175  std::unique_ptr<LinOp> inverse_permute(
1176  const array<IndexType>* inverse_permutation_indices) const override;
1177 
1178  std::unique_ptr<LinOp> row_permute(
1179  const array<IndexType>* permutation_indices) const override;
1180 
1181  std::unique_ptr<LinOp> column_permute(
1182  const array<IndexType>* permutation_indices) const override;
1183 
1184  std::unique_ptr<LinOp> inverse_row_permute(
1185  const array<IndexType>* inverse_permutation_indices) const override;
1186 
1187  std::unique_ptr<LinOp> inverse_column_permute(
1188  const array<IndexType>* inverse_permutation_indices) const override;
1189 
1190  std::unique_ptr<Diagonal<ValueType>> extract_diagonal() const override;
1191 
1192  std::unique_ptr<absolute_type> compute_absolute() const override;
1193 
1194  void compute_absolute_inplace() override;
1195 
1199  void sort_by_column_index();
1200 
1201  /*
1202  * Tests if all row entry pairs (value, col_idx) are sorted by column index
1203  *
1204  * @returns True if all row entry pairs (value, col_idx) are sorted by
1205  * column index
1206  */
1207  bool is_sorted_by_column_index() const;
1208 
1214  value_type* get_values() noexcept { return values_.get_data(); }
1215 
1223  const value_type* get_const_values() const noexcept
1224  {
1225  return values_.get_const_data();
1226  }
1227 
1232  std::unique_ptr<Dense<ValueType>> create_value_view();
1233 
1238  std::unique_ptr<const Dense<ValueType>> create_const_value_view() const;
1239 
1245  index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); }
1246 
1254  const index_type* get_const_col_idxs() const noexcept
1255  {
1256  return col_idxs_.get_const_data();
1257  }
1258 
1264  index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); }
1265 
1273  const index_type* get_const_row_ptrs() const noexcept
1274  {
1275  return row_ptrs_.get_const_data();
1276  }
1277 
1283  index_type* get_srow() noexcept { return srow_.get_data(); }
1284 
1292  const index_type* get_const_srow() const noexcept
1293  {
1294  return srow_.get_const_data();
1295  }
1296 
1303  {
1304  return srow_.get_size();
1305  }
1306 
1313  {
1314  return values_.get_size();
1315  }
1316 
1321  std::shared_ptr<strategy_type> get_strategy() const noexcept
1322  {
1323  return strategy_;
1324  }
1325 
1331  void set_strategy(std::shared_ptr<strategy_type> strategy)
1332  {
1333  strategy_ = std::move(strategy->copy());
1334  this->make_srow();
1335  }
1336 
1344  {
1345  auto exec = this->get_executor();
1346  GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1347  this->scale_impl(make_temporary_clone(exec, alpha).get());
1348  }
1349 
1357  {
1358  auto exec = this->get_executor();
1359  GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1360  this->inv_scale_impl(make_temporary_clone(exec, alpha).get());
1361  }
1362 
1371  static std::unique_ptr<Csr> create(std::shared_ptr<const Executor> exec,
1372  std::shared_ptr<strategy_type> strategy);
1373 
1385  static std::unique_ptr<Csr> create(
1386  std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1387  size_type num_nonzeros = {},
1388  std::shared_ptr<strategy_type> strategy = nullptr);
1389 
1409  static std::unique_ptr<Csr> create(
1410  std::shared_ptr<const Executor> exec, const dim<2>& size,
1411  array<value_type> values, array<index_type> col_idxs,
1412  array<index_type> row_ptrs,
1413  std::shared_ptr<strategy_type> strategy = nullptr);
1414 
1419  template <typename InputValueType, typename InputColumnIndexType,
1420  typename InputRowPtrType>
1421  GKO_DEPRECATED(
1422  "explicitly construct the gko::array argument instead of passing "
1423  "initializer lists")
1424  static std::unique_ptr<Csr> create(
1425  std::shared_ptr<const Executor> exec, const dim<2>& size,
1426  std::initializer_list<InputValueType> values,
1427  std::initializer_list<InputColumnIndexType> col_idxs,
1428  std::initializer_list<InputRowPtrType> row_ptrs)
1429  {
1430  return create(exec, size, array<value_type>{exec, std::move(values)},
1431  array<index_type>{exec, std::move(col_idxs)},
1432  array<index_type>{exec, std::move(row_ptrs)});
1433  }
1434 
1450  static std::unique_ptr<const Csr> create_const(
1451  std::shared_ptr<const Executor> exec, const dim<2>& size,
1452  gko::detail::const_array_view<ValueType>&& values,
1453  gko::detail::const_array_view<IndexType>&& col_idxs,
1454  gko::detail::const_array_view<IndexType>&& row_ptrs,
1455  std::shared_ptr<strategy_type> strategy = nullptr);
1456 
1469  std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1470  const index_set<IndexType>& row_index_set,
1471  const index_set<IndexType>& column_index_set) const;
1472 
1484  std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1485  const span& row_span, const span& column_span) const;
1486 
1490  Csr& operator=(const Csr&);
1491 
1497  Csr& operator=(Csr&&);
1498 
1502  Csr(const Csr&);
1503 
1509  Csr(Csr&&);
1510 
1511 protected:
1512  Csr(std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1513  size_type num_nonzeros = {},
1514  std::shared_ptr<strategy_type> strategy = nullptr);
1515 
1516  Csr(std::shared_ptr<const Executor> exec, const dim<2>& size,
1517  array<value_type> values, array<index_type> col_idxs,
1518  array<index_type> row_ptrs,
1519  std::shared_ptr<strategy_type> strategy = nullptr);
1520 
1521  void apply_impl(const LinOp* b, LinOp* x) const override;
1522 
1523  void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta,
1524  LinOp* x) const override;
1525 
1526  // TODO: This provides some more sane settings. Please fix this!
1527  static std::shared_ptr<strategy_type> make_default_strategy(
1528  std::shared_ptr<const Executor> exec)
1529  {
1530  auto cuda_exec = std::dynamic_pointer_cast<const CudaExecutor>(exec);
1531  auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(exec);
1532  auto dpcpp_exec = std::dynamic_pointer_cast<const DpcppExecutor>(exec);
1533  std::shared_ptr<strategy_type> new_strategy;
1534  if (cuda_exec) {
1535  new_strategy = std::make_shared<automatical>(cuda_exec);
1536  } else if (hip_exec) {
1537  new_strategy = std::make_shared<automatical>(hip_exec);
1538  } else if (dpcpp_exec) {
1539  new_strategy = std::make_shared<automatical>(dpcpp_exec);
1540  } else {
1541  new_strategy = std::make_shared<classical>();
1542  }
1543  return new_strategy;
1544  }
1545 
1546  // TODO clean this up as soon as we improve strategy_type
1547  template <typename CsrType>
1548  void convert_strategy_helper(CsrType* result) const
1549  {
1550  auto strat = this->get_strategy().get();
1551  std::shared_ptr<typename CsrType::strategy_type> new_strat;
1552  if (dynamic_cast<classical*>(strat)) {
1553  new_strat = std::make_shared<typename CsrType::classical>();
1554  } else if (dynamic_cast<merge_path*>(strat)) {
1555  new_strat = std::make_shared<typename CsrType::merge_path>();
1556  } else if (dynamic_cast<cusparse*>(strat)) {
1557  new_strat = std::make_shared<typename CsrType::cusparse>();
1558  } else if (dynamic_cast<sparselib*>(strat)) {
1559  new_strat = std::make_shared<typename CsrType::sparselib>();
1560  } else {
1561  auto rexec = result->get_executor();
1562  auto cuda_exec =
1563  std::dynamic_pointer_cast<const CudaExecutor>(rexec);
1564  auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(rexec);
1565  auto dpcpp_exec =
1566  std::dynamic_pointer_cast<const DpcppExecutor>(rexec);
1567  auto lb = dynamic_cast<load_balance*>(strat);
1568  if (cuda_exec) {
1569  if (lb) {
1570  new_strat =
1571  std::make_shared<typename CsrType::load_balance>(
1572  cuda_exec);
1573  } else {
1574  new_strat = std::make_shared<typename CsrType::automatical>(
1575  cuda_exec);
1576  }
1577  } else if (hip_exec) {
1578  if (lb) {
1579  new_strat =
1580  std::make_shared<typename CsrType::load_balance>(
1581  hip_exec);
1582  } else {
1583  new_strat = std::make_shared<typename CsrType::automatical>(
1584  hip_exec);
1585  }
1586  } else if (dpcpp_exec) {
1587  if (lb) {
1588  new_strat =
1589  std::make_shared<typename CsrType::load_balance>(
1590  dpcpp_exec);
1591  } else {
1592  new_strat = std::make_shared<typename CsrType::automatical>(
1593  dpcpp_exec);
1594  }
1595  } else {
1596  // Try to preserve this executor's configuration
1597  auto this_cuda_exec =
1598  std::dynamic_pointer_cast<const CudaExecutor>(
1599  this->get_executor());
1600  auto this_hip_exec =
1601  std::dynamic_pointer_cast<const HipExecutor>(
1602  this->get_executor());
1603  auto this_dpcpp_exec =
1604  std::dynamic_pointer_cast<const DpcppExecutor>(
1605  this->get_executor());
1606  if (this_cuda_exec) {
1607  if (lb) {
1608  new_strat =
1609  std::make_shared<typename CsrType::load_balance>(
1610  this_cuda_exec);
1611  } else {
1612  new_strat =
1613  std::make_shared<typename CsrType::automatical>(
1614  this_cuda_exec);
1615  }
1616  } else if (this_hip_exec) {
1617  if (lb) {
1618  new_strat =
1619  std::make_shared<typename CsrType::load_balance>(
1620  this_hip_exec);
1621  } else {
1622  new_strat =
1623  std::make_shared<typename CsrType::automatical>(
1624  this_hip_exec);
1625  }
1626  } else if (this_dpcpp_exec) {
1627  if (lb) {
1628  new_strat =
1629  std::make_shared<typename CsrType::load_balance>(
1630  this_dpcpp_exec);
1631  } else {
1632  new_strat =
1633  std::make_shared<typename CsrType::automatical>(
1634  this_dpcpp_exec);
1635  }
1636  } else {
1637  // FIXME: this changes strategies.
1638  // We had a load balance or automatical strategy from a non
1639  // HIP or Cuda executor and are moving to a non HIP or Cuda
1640  // executor.
1641  new_strat = std::make_shared<typename CsrType::classical>();
1642  }
1643  }
1644  }
1645  result->set_strategy(new_strat);
1646  }
1647 
1651  void make_srow()
1652  {
1653  srow_.resize_and_reset(strategy_->clac_size(values_.get_size()));
1654  strategy_->process(row_ptrs_, &srow_);
1655  }
1656 
1663  virtual void scale_impl(const LinOp* alpha);
1664 
1671  virtual void inv_scale_impl(const LinOp* alpha);
1672 
1673 private:
1674  std::shared_ptr<strategy_type> strategy_;
1675  array<value_type> values_;
1676  array<index_type> col_idxs_;
1677  array<index_type> row_ptrs_;
1678  array<index_type> srow_;
1679 
1680  void add_scaled_identity_impl(const LinOp* a, const LinOp* b) override;
1681 };
1682 
1683 
1684 namespace detail {
1685 
1686 
1693 template <typename ValueType, typename IndexType>
1694 void strategy_rebuild_helper(Csr<ValueType, IndexType>* result)
1695 {
1696  using load_balance = typename Csr<ValueType, IndexType>::load_balance;
1697  using automatical = typename Csr<ValueType, IndexType>::automatical;
1698  auto strategy = result->get_strategy();
1699  auto executor = result->get_executor();
1700  if (std::dynamic_pointer_cast<load_balance>(strategy)) {
1701  if (auto exec =
1702  std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1703  result->set_strategy(std::make_shared<load_balance>(exec));
1704  } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1705  executor)) {
1706  result->set_strategy(std::make_shared<load_balance>(exec));
1707  }
1708  } else if (std::dynamic_pointer_cast<automatical>(strategy)) {
1709  if (auto exec =
1710  std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1711  result->set_strategy(std::make_shared<automatical>(exec));
1712  } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1713  executor)) {
1714  result->set_strategy(std::make_shared<automatical>(exec));
1715  }
1716  }
1717 }
1718 
1719 
1720 } // namespace detail
1721 } // namespace matrix
1722 } // namespace gko
1723 
1724 
1725 #endif // GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
gko::matrix::Csr::automatical
Definition: csr.hpp:522
gko::matrix::Csr::get_const_srow
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition: csr.hpp:1292
gko::matrix::Csr::load_balance::load_balance
load_balance(std::shared_ptr< const HipExecutor > exec)
Creates a load_balance strategy with HIP executor.
Definition: csr.hpp:381
gko::matrix::Csr::operator=
Csr & operator=(const Csr &)
Copy-assigns a Csr matrix.
gko::matrix::Csr::cusparse::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:315
gko::matrix::Csr::get_col_idxs
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:1245
gko::matrix::Fbcsr
Fixed-block compressed sparse row storage matrix format.
Definition: csr.hpp:46
gko::matrix::Csr
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition: matrix.hpp:30
gko::matrix::Csr::get_const_row_ptrs
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:1273
gko::matrix::Csr::sparselib::sparselib
sparselib()
Creates a sparselib strategy.
Definition: csr.hpp:337
gko::matrix::Csr::multiply
std::unique_ptr< Csr > multiply(ptr_param< const Csr > other) const
Computes the sparse matrix product this * other on the executor of this matrix.
gko::LinOp
Definition: lin_op.hpp:117
gko::matrix::Csr::add_scale_reuse
std::pair< std::unique_ptr< Csr >, scale_add_reuse_info > add_scale_reuse(ptr_param< const Dense< value_type >> scale_this, ptr_param< const Dense< value_type >> scale_other, ptr_param< const Csr > mtx_other) const
Computes the sparse matrix sum scale_this * this + scale_other * mtx_add on the executor of this matr...
gko::matrix::Csr::permute_reuse
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type >> permutation, permute_mode mode=permute_mode::symmetric) const
Computes the operations necessary to propagate changed values from a matrix A to a permuted matrix.
gko::matrix::Dense
Dense is a matrix format which explicitly stores all values of the matrix.
Definition: dense_cache.hpp:28
gko::matrix::Csr::multiply_add_reuse_info
Class describing the internal lookup structures created by multiply_add_reuse to recompute a sparse m...
Definition: csr.hpp:845
gko::matrix::CsrBuilder
Definition: csr.hpp:49
gko::matrix::Csr::inverse_row_permute
std::unique_ptr< LinOp > inverse_row_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the row permutation of the inverse permuted object.
gko::matrix::Csr::sparselib
sparselib is a strategy_type which uses the sparselib csr.
Definition: csr.hpp:332
gko::DiagonalExtractable
The diagonal of a LinOp implementing this interface can be extracted.
Definition: lin_op.hpp:743
gko::matrix::SparsityCsr
SparsityCsr is a matrix format which stores only the sparsity pattern of a sparse matrix by compressi...
Definition: csr.hpp:40
gko::matrix::Csr::load_balance
load_balance is a strategy_type which uses the load balance algorithm.
Definition: csr.hpp:354
gko::matrix::Csr::multiply_reuse
std::pair< std::unique_ptr< Csr >, multiply_reuse_info > multiply_reuse(ptr_param< const Csr > other) const
Computes the sparse matrix product this * other on the executor of this matrix, and necessary data fo...
gko::matrix::Csr::multiply_add_reuse_info::update_values
void update_values(ptr_param< const Csr > mtx, ptr_param< const Dense< value_type >> scale_mult, ptr_param< const Csr > mtx_mult, ptr_param< const Dense< value_type >> scale_add, ptr_param< const Csr > mtx_add, ptr_param< Csr > out) const
Recomputes the sparse matrix-matrix product out = scale_mult * mtx * mtx_mult + scale_add * mtx_add w...
gko::matrix::Csr::scale
void scale(ptr_param< const LinOp > alpha)
Scales the matrix with a scalar.
Definition: csr.hpp:1343
gko::matrix::Csr::automatical::automatical
automatical(std::shared_ptr< const HipExecutor > exec)
Creates an automatical strategy with HIP executor.
Definition: csr.hpp:569
gko::Transposable
Linear operators which support transposition should implement the Transposable interface.
Definition: lin_op.hpp:433
gko::matrix::Csr::column_permute
std::unique_ptr< LinOp > column_permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the column permutation of the Permutable object.
gko::matrix::Csr::strategy_type::get_name
std::string get_name()
Returns the name of strategy.
Definition: csr.hpp:194
gko::matrix::Csr::classical::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:262
gko::matrix::ScaledPermutation
ScaledPermutation is a matrix combining a permutation with scaling factors.
Definition: scaled_permutation.hpp:36
gko::size_type
std::size_t size_type
Integral type used for allocation quantities.
Definition: types.hpp:90
gko::matrix::Csr::strategy_type::copy
virtual std::shared_ptr< strategy_type > copy()=0
Copy a strategy.
gko::matrix::Csr::get_srow
index_type * get_srow() noexcept
Returns the starting rows.
Definition: csr.hpp:1283
gko::matrix::Csr::sparselib::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:343
gko::matrix::Csr::transpose_reuse
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > transpose_reuse() const
Computes the necessary data to update a transposed matrix from its original matrix.
gko::matrix::Permutation
Permutation is a matrix format that represents a permutation matrix, i.e.
Definition: csr.hpp:52
gko::matrix::Csr::automatical::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:606
gko::matrix::Csr::permuting_reuse_info::permuting_reuse_info
permuting_reuse_info()
Creates an empty reuse info.
gko::matrix::Csr::row_permute
std::unique_ptr< LinOp > row_permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the row permutation of the Permutable object.
gko::matrix::Csr::classical::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:269
gko::CudaExecutor
This is the Executor subclass which represents the CUDA device.
Definition: executor.hpp:1540
gko::matrix::Csr::strategy_type::process
virtual void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow)=0
Computes srow according to row pointers.
gko::Permutable
Linear operators which support permutation should implement the Permutable interface.
Definition: lin_op.hpp:484
gko::matrix::Csr::transpose
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
gko::matrix::Csr::load_balance::load_balance
load_balance(std::shared_ptr< const DpcppExecutor > exec)
Creates a load_balance strategy with DPCPP executor.
Definition: csr.hpp:392
gko
The Ginkgo namespace.
Definition: abstract_factory.hpp:20
gko::matrix::Csr::load_balance::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:417
gko::matrix::Csr::inv_scale
void inv_scale(ptr_param< const LinOp > alpha)
Scales the matrix with the inverse of a scalar.
Definition: csr.hpp:1356
gko::matrix::Csr::extract_diagonal
std::unique_ptr< Diagonal< ValueType > > extract_diagonal() const override
Extracts the diagonal entries of the matrix into a vector.
gko::array< index_type >
gko::matrix::Csr::multiply_add
std::unique_ptr< Csr > multiply_add(ptr_param< const Dense< value_type >> scale_mult, ptr_param< const Csr > mtx_mult, ptr_param< const Dense< value_type >> scale_add, ptr_param< const Csr > mtx_add) const
Computes the sparse matrix product scale_mult * this * mtx_mult + scale_add * mtx_add on the executor...
gko::matrix::Csr::cusparse
cusparse is a strategy_type which uses the sparselib csr.
Definition: csr.hpp:308
gko::matrix::Csr::inverse_permute
std::unique_ptr< LinOp > inverse_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the symmetric inverse row and column permutation of the Permutable objec...
gko::matrix::Csr::get_row_ptrs
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:1264
gko::array::resize_and_reset
void resize_and_reset(size_type size)
Resizes the array so it is able to hold the specified number of elements.
Definition: array.hpp:622
gko::span
A span is a lightweight structure used to create sub-ranges from other ranges.
Definition: range.hpp:46
gko::dim< 2 >
gko::matrix_data
This structure is used as an intermediate data type to store a sparse matrix.
Definition: matrix_data.hpp:126
gko::matrix::Csr::load_balance::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:470
gko::matrix::Csr::merge_path
merge_path is a strategy_type which uses the merge_path algorithm.
Definition: csr.hpp:283
gko::matrix::Csr::permute
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type >> permutation, permute_mode mode=permute_mode::symmetric) const
Creates a permuted copy of this matrix with the given permutation .
gko::index_set
An index set class represents an ordered set of intervals.
Definition: index_set.hpp:56
gko::matrix::Csr::automatical::automatical
automatical()
Creates an automatical strategy.
Definition: csr.hpp:550
gko::matrix::Csr::merge_path::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:296
gko::matrix::Csr::load_balance::load_balance
load_balance(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates a load_balance strategy with specified parameters.
Definition: csr.hpp:407
gko::matrix::Diagonal
This class is a utility which efficiently implements the diagonal matrix (a linear operator which sca...
Definition: lin_op.hpp:31
gko::matrix::Csr::strategy_type::clac_size
virtual int64_t clac_size(const int64_t nnz)=0
Computes the srow size according to the number of nonzeros.
gko::matrix::Csr::load_balance::load_balance
load_balance(std::shared_ptr< const CudaExecutor > exec)
Creates a load_balance strategy with CUDA executor.
Definition: csr.hpp:372
gko::ptr_param
This class is used for function parameters in the place of raw pointers.
Definition: utils_helper.hpp:41
gko::array::get_data
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the array.
Definition: array.hpp:687
gko::ReadableFromMatrixData
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition: lin_op.hpp:605
gko::OmpExecutor
This is the Executor subclass which represents the OpenMP device (typically CPU).
Definition: executor.hpp:1386
gko::matrix::Csr::conj_transpose
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
gko::WritableToMatrixData
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition: lin_op.hpp:660
gko::matrix::permute_mode::symmetric
The rows and columns will be permuted.
gko::matrix::Csr::sparselib::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:339
gko::matrix::Csr::cusparse::cusparse
cusparse()
Creates a cusparse strategy.
Definition: csr.hpp:313
gko::matrix::Csr::cusparse::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:319
gko::matrix::Csr::merge_path::merge_path
merge_path()
Creates a merge_path strategy.
Definition: csr.hpp:288
gko::matrix::Csr::get_const_values
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition: csr.hpp:1223
gko::stop::mode
mode
The mode for the residual norm criterion.
Definition: residual_norm.hpp:37
gko::matrix::Csr::load_balance::load_balance
load_balance()
Creates a load_balance strategy.
Definition: csr.hpp:362
gko::array::get_executor
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition: array.hpp:703
gko::matrix::Csr::get_num_stored_elements
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition: csr.hpp:1312
gko::matrix::Csr::create_submatrix
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const index_set< IndexType > &row_index_set, const index_set< IndexType > &column_index_set) const
Creates a submatrix from this Csr matrix given row and column index_set objects.
gko::ScaledIdentityAddable
Adds the operation M <- a I + b M for matrix M, identity operator I and scalars a and b,...
Definition: lin_op.hpp:818
gko::matrix::Csr::permuting_reuse_info
A struct describing a transformation of the matrix that reorders the values of the matrix into the tr...
Definition: csr.hpp:1021
gko::matrix::Csr::load_balance::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:509
gko::next_precision
typename detail::find_precision_impl< T, step >::type next_precision
Obtains the next move type of T in the singly-linked precision corresponding bfloat16/half.
Definition: math.hpp:466
gko::matrix::Csr::classical::classical
classical()
Creates a classical strategy.
Definition: csr.hpp:238
gko::matrix::Csr::strategy_type::strategy_type
strategy_type(std::string name)
Creates a strategy_type.
Definition: csr.hpp:185
gko::matrix::Csr::sort_by_column_index
void sort_by_column_index()
Sorts all (value, col_idx) pairs in each row by column index.
gko::matrix::Csr::merge_path::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:290
gko::matrix::Csr::create_const
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const dim< 2 > &size, gko::detail::const_array_view< ValueType > &&values, gko::detail::const_array_view< IndexType > &&col_idxs, gko::detail::const_array_view< IndexType > &&row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a constant (immutable) Csr matrix from a set of constant arrays.
gko::matrix::Csr::scale_add
std::unique_ptr< Csr > scale_add(ptr_param< const Dense< value_type >> scale_this, ptr_param< const Dense< value_type >> scale_other, ptr_param< const Csr > mtx_other) const
Computes the sparse matrix sum scale_this * this + scale_other * mtx_add on the executor of this matr...
gko::previous_precision
typename detail::find_precision_impl< T, -step >::type previous_precision
Obtains the previous move type of T in the singly-linked precision corresponding bfloat16/half.
Definition: math.hpp:473
gko::matrix::Csr::automatical::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:687
gko::matrix::Csr::classical
classical is a strategy_type which uses the same number of threads on each row.
Definition: csr.hpp:233
gko::matrix::Csr::get_strategy
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition: csr.hpp:1321
gko::matrix::Csr::permuting_reuse_info::update_values
void update_values(ptr_param< const Csr > input, ptr_param< Csr > output) const
Propagates the values from an input matrix to the transformed matrix.
gko::matrix::Csr::set_strategy
void set_strategy(std::shared_ptr< strategy_type > strategy)
Set the strategy.
Definition: csr.hpp:1331
gko::matrix::Csr::scale_add_reuse_info::update_values
void update_values(ptr_param< const Dense< value_type >> scale1, ptr_param< const Csr > mtx1, ptr_param< const Dense< value_type >> scale2, ptr_param< const Csr > mtx2, ptr_param< Csr > out) const
Recomputes the sparse matrix-matrix sum out = scale1 * mtx1 + scale2 * mtx2 when only the values of m...
gko::matrix::Ell
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition: csr.hpp:31
gko::matrix::Csr::create_const_value_view
std::unique_ptr< const Dense< ValueType > > create_const_value_view() const
Creates a const Dense view of the value array of this matrix as a column vector of dimensions nnz x 1...
gko::ConvertibleTo
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
Definition: polymorphic_object.hpp:479
gko::matrix::Csr::compute_absolute
std::unique_ptr< absolute_type > compute_absolute() const override
Gets the AbsoluteLinOp.
gko::matrix::Csr::strategy_type
strategy_type is to decide how to set the csr algorithm.
Definition: csr.hpp:176
gko::make_temporary_clone
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition: temporary_clone.hpp:208
gko::matrix::Csr::multiply_add_reuse
std::pair< std::unique_ptr< Csr >, multiply_add_reuse_info > multiply_add_reuse(ptr_param< const Dense< value_type >> scale_mult, ptr_param< const Csr > mtx_mult, ptr_param< const Dense< value_type >> scale_add, ptr_param< const Csr > mtx_add) const
Computes the sparse matrix product scale_mult * this * mtx_mult + scale_add * mtx_add on the executor...
gko::Executor
The first step in using the Ginkgo library consists of creating an executor.
Definition: executor.hpp:615
gko::matrix::Hybrid
HYBRID is a matrix format which splits the matrix into ELLPACK and COO format.
Definition: coo.hpp:32
gko::array::get_const_data
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the array.
Definition: array.hpp:696
gko::matrix::Csr::write
void write(mat_data &data) const override
Writes a matrix to a matrix_data structure.
gko::matrix::permute_mode
permute_mode
Specifies how a permutation will be applied to a matrix.
Definition: permutation.hpp:42
gko::matrix::Csr::multiply_reuse_info::update_values
void update_values(ptr_param< const Csr > mtx1, ptr_param< const Csr > mtx2, ptr_param< Csr > out) const
Recomputes the sparse matrix-matrix product out = mtx1 * mtx2 when only the values of mtx1 and mtx2 c...
gko::matrix::Sellp
SELL-P is a matrix format similar to ELL format.
Definition: csr.hpp:37
gko::min
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition: math.hpp:750
gko::matrix::Csr::cusparse::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:321
gko::matrix::Csr::get_const_col_idxs
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:1254
gko::ceildiv
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition: math.hpp:614
gko::matrix::Csr::automatical::automatical
automatical(std::shared_ptr< const DpcppExecutor > exec)
Creates an automatical strategy with Dpcpp executor.
Definition: csr.hpp:580
gko::matrix::Csr::merge_path::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:294
gko::EnableAbsoluteComputation
The EnableAbsoluteComputation mixin provides the default implementations of compute_absolute_linop an...
Definition: lin_op.hpp:794
gko::matrix::Csr::inverse_column_permute
std::unique_ptr< LinOp > inverse_column_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the row permutation of the inverse permuted object.
gko::matrix::Csr::Csr
Csr(const Csr &)
Copy-constructs a Csr matrix.
gko::matrix::Csr::automatical::automatical
automatical(std::shared_ptr< const CudaExecutor > exec)
Creates an automatical strategy with CUDA executor.
Definition: csr.hpp:560
gko::PolymorphicObject::get_executor
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor of the object.
Definition: polymorphic_object.hpp:243
gko::array::get_size
size_type get_size() const noexcept
Returns the number of elements in the array.
Definition: array.hpp:670
gko::matrix::Csr::automatical::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:675
gko::matrix::Csr::automatical::automatical
automatical(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates an automatical strategy with specified parameters.
Definition: csr.hpp:595
gko::matrix::Csr::classical::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:240
gko::remove_complex
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition: math.hpp:264
gko::matrix::Csr::compute_absolute_inplace
void compute_absolute_inplace() override
Compute absolute inplace on each element.
gko::matrix::Csr::scale_permute
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type >> permutation, permute_mode=permute_mode::symmetric) const
Creates a scaled and permuted copy of this matrix.
gko::device_matrix_data
This type is a device-side equivalent to matrix_data.
Definition: device_matrix_data.hpp:36
gko::matrix::Csr::read
void read(const mat_data &data) override
Reads a matrix from a matrix_data structure.
gko::matrix::Csr::create
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, std::shared_ptr< strategy_type > strategy)
Creates an uninitialized CSR matrix of the specified size.
gko::EnableLinOp
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition: lin_op.hpp:877
gko::matrix::Csr::create_value_view
std::unique_ptr< Dense< ValueType > > create_value_view()
Creates a Dense view of the value array of this matrix as a column vector of dimensions nnz x 1.
gko::matrix::Csr::sparselib::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:345
gko::matrix::Csr::get_values
value_type * get_values() noexcept
Returns the values of the matrix.
Definition: csr.hpp:1214
gko::matrix::Csr::multiply_reuse_info
Class describing the internal lookup structures created by multiply_reuse(const Csr*) to recompute a ...
Definition: csr.hpp:777
gko::matrix::Csr::get_num_srow_elements
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps)
Definition: csr.hpp:1302
gko::matrix::Csr::scale_add_reuse_info
Class describing the internal lookup structures created by scale_add_reuse to recompute a sparse matr...
Definition: csr.hpp:937
gko::LinOp::LinOp
LinOp(const LinOp &)=default
Copy-constructs a LinOp.
gko::to_complex
typename detail::to_complex_s< T >::type to_complex
Obtain the type which adds the complex of complex/scalar type or the template parameter of class by a...
Definition: math.hpp:283
gko::EnablePolymorphicObject
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition: polymorphic_object.hpp:667
gko::matrix::Coo
COO stores a matrix in the coordinate matrix format.
Definition: coo.hpp:50