Ginkgo  Generated from pipelines/1589998975 branch based on develop. Ginkgo version 1.10.0
A numerical linear algebra library targeting many-core architectures
csr.hpp
1 // SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
2 //
3 // SPDX-License-Identifier: BSD-3-Clause
4 
5 #ifndef GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
6 #define GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
7 
8 
9 #include <ginkgo/core/base/array.hpp>
10 #include <ginkgo/core/base/index_set.hpp>
11 #include <ginkgo/core/base/lin_op.hpp>
12 #include <ginkgo/core/base/math.hpp>
13 #include <ginkgo/core/matrix/permutation.hpp>
14 #include <ginkgo/core/matrix/scaled_permutation.hpp>
15 
16 
17 namespace gko {
18 namespace matrix {
19 
20 
21 template <typename ValueType>
22 class Dense;
23 
24 template <typename ValueType>
25 class Diagonal;
26 
27 template <typename ValueType, typename IndexType>
28 class Coo;
29 
30 template <typename ValueType, typename IndexType>
31 class Ell;
32 
33 template <typename ValueType, typename IndexType>
34 class Hybrid;
35 
36 template <typename ValueType, typename IndexType>
37 class Sellp;
38 
39 template <typename ValueType, typename IndexType>
41 
42 template <typename ValueType, typename IndexType>
43 class Csr;
44 
45 template <typename ValueType, typename IndexType>
46 class Fbcsr;
47 
48 template <typename ValueType, typename IndexType>
49 class CsrBuilder;
50 
51 
52 namespace detail {
53 
54 
55 template <typename ValueType = default_precision, typename IndexType = int32>
56 void strategy_rebuild_helper(Csr<ValueType, IndexType>* result);
57 
58 
59 } // namespace detail
60 
61 
100 template <typename ValueType = default_precision, typename IndexType = int32>
101 class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
102  public ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>,
103 #if GINKGO_ENABLE_HALF
104  public ConvertibleTo<
105  Csr<next_precision<next_precision<ValueType>>, IndexType>>,
106 #endif
107  public ConvertibleTo<Dense<ValueType>>,
108  public ConvertibleTo<Coo<ValueType, IndexType>>,
109  public ConvertibleTo<Ell<ValueType, IndexType>>,
110  public ConvertibleTo<Fbcsr<ValueType, IndexType>>,
111  public ConvertibleTo<Hybrid<ValueType, IndexType>>,
112  public ConvertibleTo<Sellp<ValueType, IndexType>>,
113  public ConvertibleTo<SparsityCsr<ValueType, IndexType>>,
114  public DiagonalExtractable<ValueType>,
115  public ReadableFromMatrixData<ValueType, IndexType>,
116  public WritableToMatrixData<ValueType, IndexType>,
117  public Transposable,
118  public Permutable<IndexType>,
120  remove_complex<Csr<ValueType, IndexType>>>,
121  public ScaledIdentityAddable {
122  friend class EnablePolymorphicObject<Csr, LinOp>;
123  friend class Coo<ValueType, IndexType>;
124  friend class Dense<ValueType>;
125  friend class Diagonal<ValueType>;
126  friend class Ell<ValueType, IndexType>;
127  friend class Hybrid<ValueType, IndexType>;
128  friend class Sellp<ValueType, IndexType>;
129  friend class SparsityCsr<ValueType, IndexType>;
130  friend class Fbcsr<ValueType, IndexType>;
131  friend class CsrBuilder<ValueType, IndexType>;
132  friend class Csr<to_complex<ValueType>, IndexType>;
133 
134 public:
137  using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::convert_to;
138  using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::move_to;
139  using ConvertibleTo<Dense<ValueType>>::convert_to;
140  using ConvertibleTo<Dense<ValueType>>::move_to;
141  using ConvertibleTo<Coo<ValueType, IndexType>>::convert_to;
143  using ConvertibleTo<Ell<ValueType, IndexType>>::convert_to;
154 
155  using value_type = ValueType;
156  using index_type = IndexType;
157  using transposed_type = Csr<ValueType, IndexType>;
158  using mat_data = matrix_data<ValueType, IndexType>;
159  using device_mat_data = device_matrix_data<ValueType, IndexType>;
160  using absolute_type = remove_complex<Csr>;
161 
162  class automatical;
163 
171  friend class automatical;
172 
173  public:
179  strategy_type(std::string name) : name_(name) {}
180 
181  virtual ~strategy_type() = default;
182 
188  std::string get_name() { return name_; }
189 
196  virtual void process(const array<index_type>& mtx_row_ptrs,
197  array<index_type>* mtx_srow) = 0;
198 
206  virtual int64_t clac_size(const int64_t nnz) = 0;
207 
212  virtual std::shared_ptr<strategy_type> copy() = 0;
213 
214  protected:
215  void set_name(std::string name) { name_ = name; }
216 
217  private:
218  std::string name_;
219  };
220 
227  class classical : public strategy_type {
228  public:
232  classical() : strategy_type("classical"), max_length_per_row_(0) {}
233 
234  void process(const array<index_type>& mtx_row_ptrs,
235  array<index_type>* mtx_srow) override
236  {
237  auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
238  array<index_type> row_ptrs_host(host_mtx_exec);
239  const bool is_mtx_on_host{host_mtx_exec ==
240  mtx_row_ptrs.get_executor()};
241  const index_type* row_ptrs{};
242  if (is_mtx_on_host) {
243  row_ptrs = mtx_row_ptrs.get_const_data();
244  } else {
245  row_ptrs_host = mtx_row_ptrs;
246  row_ptrs = row_ptrs_host.get_const_data();
247  }
248  auto num_rows = mtx_row_ptrs.get_size() - 1;
249  max_length_per_row_ = 0;
250  for (size_type i = 0; i < num_rows; i++) {
251  max_length_per_row_ = std::max(max_length_per_row_,
252  row_ptrs[i + 1] - row_ptrs[i]);
253  }
254  }
255 
256  int64_t clac_size(const int64_t nnz) override { return 0; }
257 
258  index_type get_max_length_per_row() const noexcept
259  {
260  return max_length_per_row_;
261  }
262 
263  std::shared_ptr<strategy_type> copy() override
264  {
265  return std::make_shared<classical>();
266  }
267 
268  private:
269  index_type max_length_per_row_;
270  };
271 
277  class merge_path : public strategy_type {
278  public:
282  merge_path() : strategy_type("merge_path") {}
283 
284  void process(const array<index_type>& mtx_row_ptrs,
285  array<index_type>* mtx_srow) override
286  {}
287 
288  int64_t clac_size(const int64_t nnz) override { return 0; }
289 
290  std::shared_ptr<strategy_type> copy() override
291  {
292  return std::make_shared<merge_path>();
293  }
294  };
295 
302  class cusparse : public strategy_type {
303  public:
307  cusparse() : strategy_type("cusparse") {}
308 
309  void process(const array<index_type>& mtx_row_ptrs,
310  array<index_type>* mtx_srow) override
311  {}
312 
313  int64_t clac_size(const int64_t nnz) override { return 0; }
314 
315  std::shared_ptr<strategy_type> copy() override
316  {
317  return std::make_shared<cusparse>();
318  }
319  };
320 
326  class sparselib : public strategy_type {
327  public:
331  sparselib() : strategy_type("sparselib") {}
332 
333  void process(const array<index_type>& mtx_row_ptrs,
334  array<index_type>* mtx_srow) override
335  {}
336 
337  int64_t clac_size(const int64_t nnz) override { return 0; }
338 
339  std::shared_ptr<strategy_type> copy() override
340  {
341  return std::make_shared<sparselib>();
342  }
343  };
344 
348  class load_balance : public strategy_type {
349  public:
356  [[deprecated]] load_balance()
357  : load_balance(std::move(
359  {}
360 
366  load_balance(std::shared_ptr<const CudaExecutor> exec)
367  : load_balance(exec->get_num_warps(), exec->get_warp_size())
368  {}
369 
375  load_balance(std::shared_ptr<const HipExecutor> exec)
376  : load_balance(exec->get_num_warps(), exec->get_warp_size(), false)
377  {}
378 
386  load_balance(std::shared_ptr<const DpcppExecutor> exec)
387  : load_balance(exec->get_num_subgroups(), 32, false, "intel")
388  {}
389 
401  load_balance(int64_t nwarps, int warp_size = 32,
402  bool cuda_strategy = true,
403  std::string strategy_name = "none")
404  : strategy_type("load_balance"),
405  nwarps_(nwarps),
406  warp_size_(warp_size),
407  cuda_strategy_(cuda_strategy),
408  strategy_name_(strategy_name)
409  {}
410 
411  void process(const array<index_type>& mtx_row_ptrs,
412  array<index_type>* mtx_srow) override
413  {
414  auto nwarps = mtx_srow->get_size();
415 
416  if (nwarps > 0) {
417  auto host_srow_exec = mtx_srow->get_executor()->get_master();
418  auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
419  const bool is_srow_on_host{host_srow_exec ==
420  mtx_srow->get_executor()};
421  const bool is_mtx_on_host{host_mtx_exec ==
422  mtx_row_ptrs.get_executor()};
423  array<index_type> row_ptrs_host(host_mtx_exec);
424  array<index_type> srow_host(host_srow_exec);
425  const index_type* row_ptrs{};
426  index_type* srow{};
427  if (is_srow_on_host) {
428  srow = mtx_srow->get_data();
429  } else {
430  srow_host = *mtx_srow;
431  srow = srow_host.get_data();
432  }
433  if (is_mtx_on_host) {
434  row_ptrs = mtx_row_ptrs.get_const_data();
435  } else {
436  row_ptrs_host = mtx_row_ptrs;
437  row_ptrs = row_ptrs_host.get_const_data();
438  }
439  for (size_type i = 0; i < nwarps; i++) {
440  srow[i] = 0;
441  }
442  const auto num_rows = mtx_row_ptrs.get_size() - 1;
443  const auto num_elems = row_ptrs[num_rows];
444  const auto bucket_divider =
445  num_elems > 0 ? ceildiv(num_elems, warp_size_) : 1;
446  for (size_type i = 0; i < num_rows; i++) {
447  auto bucket =
448  ceildiv((ceildiv(row_ptrs[i + 1], warp_size_) * nwarps),
449  bucket_divider);
450  if (bucket < nwarps) {
451  srow[bucket]++;
452  }
453  }
454  // find starting row for thread i
455  for (size_type i = 1; i < nwarps; i++) {
456  srow[i] += srow[i - 1];
457  }
458  if (!is_srow_on_host) {
459  *mtx_srow = srow_host;
460  }
461  }
462  }
463 
464  int64_t clac_size(const int64_t nnz) override
465  {
466  if (warp_size_ > 0) {
467  int multiple = 8;
468  if (nnz >= static_cast<int64_t>(2e8)) {
469  multiple = 2048;
470  } else if (nnz >= static_cast<int64_t>(2e7)) {
471  multiple = 512;
472  } else if (nnz >= static_cast<int64_t>(2e6)) {
473  multiple = 128;
474  } else if (nnz >= static_cast<int64_t>(2e5)) {
475  multiple = 32;
476  }
477  if (strategy_name_ == "intel") {
478  multiple = 8;
479  if (nnz >= static_cast<int64_t>(2e8)) {
480  multiple = 256;
481  } else if (nnz >= static_cast<int64_t>(2e7)) {
482  multiple = 32;
483  }
484  }
485 #if GINKGO_HIP_PLATFORM_HCC
486  if (!cuda_strategy_) {
487  multiple = 8;
488  if (nnz >= static_cast<int64_t>(1e7)) {
489  multiple = 64;
490  } else if (nnz >= static_cast<int64_t>(1e6)) {
491  multiple = 16;
492  }
493  }
494 #endif // GINKGO_HIP_PLATFORM_HCC
495 
496  auto nwarps = nwarps_ * multiple;
497  return min(ceildiv(nnz, warp_size_), nwarps);
498  } else {
499  return 0;
500  }
501  }
502 
503  std::shared_ptr<strategy_type> copy() override
504  {
505  return std::make_shared<load_balance>(
506  nwarps_, warp_size_, cuda_strategy_, strategy_name_);
507  }
508 
509  private:
510  int64_t nwarps_;
511  int warp_size_;
512  bool cuda_strategy_;
513  std::string strategy_name_;
514  };
515 
516  class automatical : public strategy_type {
517  public:
518  /* Use imbalance strategy when the maximum number of nonzero per row is
519  * more than 1024 on NVIDIA hardware */
520  const index_type nvidia_row_len_limit = 1024;
521  /* Use imbalance strategy when the matrix has more more than 1e6 on
522  * NVIDIA hardware */
523  const index_type nvidia_nnz_limit{static_cast<index_type>(1e6)};
524  /* Use imbalance strategy when the maximum number of nonzero per row is
525  * more than 768 on AMD hardware */
526  const index_type amd_row_len_limit = 768;
527  /* Use imbalance strategy when the matrix has more more than 1e8 on AMD
528  * hardware */
529  const index_type amd_nnz_limit{static_cast<index_type>(1e8)};
530  /* Use imbalance strategy when the maximum number of nonzero per row is
531  * more than 25600 on Intel hardware */
532  const index_type intel_row_len_limit = 25600;
533  /* Use imbalance strategy when the matrix has more more than 3e8 on
534  * Intel hardware */
535  const index_type intel_nnz_limit{static_cast<index_type>(3e8)};
536 
537  public:
544  [[deprecated]] automatical()
545  : automatical(std::move(
547  {}
548 
554  automatical(std::shared_ptr<const CudaExecutor> exec)
555  : automatical(exec->get_num_warps(), exec->get_warp_size())
556  {}
557 
563  automatical(std::shared_ptr<const HipExecutor> exec)
564  : automatical(exec->get_num_warps(), exec->get_warp_size(), false)
565  {}
566 
574  automatical(std::shared_ptr<const DpcppExecutor> exec)
575  : automatical(exec->get_num_subgroups(), 32, false, "intel")
576  {}
577 
589  automatical(int64_t nwarps, int warp_size = 32,
590  bool cuda_strategy = true,
591  std::string strategy_name = "none")
592  : strategy_type("automatical"),
593  nwarps_(nwarps),
594  warp_size_(warp_size),
595  cuda_strategy_(cuda_strategy),
596  strategy_name_(strategy_name),
597  max_length_per_row_(0)
598  {}
599 
600  void process(const array<index_type>& mtx_row_ptrs,
601  array<index_type>* mtx_srow) override
602  {
603  // if the number of stored elements is larger than <nnz_limit> or
604  // the maximum number of stored elements per row is larger than
605  // <row_len_limit>, use load_balance otherwise use classical
606  index_type nnz_limit = nvidia_nnz_limit;
607  index_type row_len_limit = nvidia_row_len_limit;
608  if (strategy_name_ == "intel") {
609  nnz_limit = intel_nnz_limit;
610  row_len_limit = intel_row_len_limit;
611  }
612 #if GINKGO_HIP_PLATFORM_HCC
613  if (!cuda_strategy_) {
614  nnz_limit = amd_nnz_limit;
615  row_len_limit = amd_row_len_limit;
616  }
617 #endif // GINKGO_HIP_PLATFORM_HCC
618  auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
619  const bool is_mtx_on_host{host_mtx_exec ==
620  mtx_row_ptrs.get_executor()};
621  array<index_type> row_ptrs_host(host_mtx_exec);
622  const index_type* row_ptrs{};
623  if (is_mtx_on_host) {
624  row_ptrs = mtx_row_ptrs.get_const_data();
625  } else {
626  row_ptrs_host = mtx_row_ptrs;
627  row_ptrs = row_ptrs_host.get_const_data();
628  }
629  const auto num_rows = mtx_row_ptrs.get_size() - 1;
630  if (row_ptrs[num_rows] > nnz_limit) {
631  load_balance actual_strategy(nwarps_, warp_size_,
632  cuda_strategy_, strategy_name_);
633  if (is_mtx_on_host) {
634  actual_strategy.process(mtx_row_ptrs, mtx_srow);
635  } else {
636  actual_strategy.process(row_ptrs_host, mtx_srow);
637  }
638  this->set_name(actual_strategy.get_name());
639  } else {
640  index_type maxnum = 0;
641  for (size_type i = 0; i < num_rows; i++) {
642  maxnum = std::max(maxnum, row_ptrs[i + 1] - row_ptrs[i]);
643  }
644  if (maxnum > row_len_limit) {
645  load_balance actual_strategy(
646  nwarps_, warp_size_, cuda_strategy_, strategy_name_);
647  if (is_mtx_on_host) {
648  actual_strategy.process(mtx_row_ptrs, mtx_srow);
649  } else {
650  actual_strategy.process(row_ptrs_host, mtx_srow);
651  }
652  this->set_name(actual_strategy.get_name());
653  } else {
654  classical actual_strategy;
655  if (is_mtx_on_host) {
656  actual_strategy.process(mtx_row_ptrs, mtx_srow);
657  max_length_per_row_ =
658  actual_strategy.get_max_length_per_row();
659  } else {
660  actual_strategy.process(row_ptrs_host, mtx_srow);
661  max_length_per_row_ =
662  actual_strategy.get_max_length_per_row();
663  }
664  this->set_name(actual_strategy.get_name());
665  }
666  }
667  }
668 
669  int64_t clac_size(const int64_t nnz) override
670  {
671  return std::make_shared<load_balance>(
672  nwarps_, warp_size_, cuda_strategy_, strategy_name_)
673  ->clac_size(nnz);
674  }
675 
676  index_type get_max_length_per_row() const noexcept
677  {
678  return max_length_per_row_;
679  }
680 
681  std::shared_ptr<strategy_type> copy() override
682  {
683  return std::make_shared<automatical>(
684  nwarps_, warp_size_, cuda_strategy_, strategy_name_);
685  }
686 
687  private:
688  int64_t nwarps_;
689  int warp_size_;
690  bool cuda_strategy_;
691  std::string strategy_name_;
692  index_type max_length_per_row_;
693  };
694 
695  friend class Csr<previous_precision<ValueType>, IndexType>;
696 
697  void convert_to(
698  Csr<next_precision<ValueType>, IndexType>* result) const override;
699 
700  void move_to(Csr<next_precision<ValueType>, IndexType>* result) override;
701 
702 #if GINKGO_ENABLE_HALF
703  friend class Csr<previous_precision<previous_precision<ValueType>>,
704  IndexType>;
705  using ConvertibleTo<
706  Csr<next_precision<next_precision<ValueType>>, IndexType>>::convert_to;
707  using ConvertibleTo<
708  Csr<next_precision<next_precision<ValueType>>, IndexType>>::move_to;
709 
710  void convert_to(Csr<next_precision<next_precision<ValueType>>, IndexType>*
711  result) const override;
712 
713  void move_to(Csr<next_precision<next_precision<ValueType>>, IndexType>*
714  result) override;
715 #endif
716 
717  void convert_to(Dense<ValueType>* other) const override;
718 
719  void move_to(Dense<ValueType>* other) override;
720 
721  void convert_to(Coo<ValueType, IndexType>* result) const override;
722 
723  void move_to(Coo<ValueType, IndexType>* result) override;
724 
725  void convert_to(Ell<ValueType, IndexType>* result) const override;
726 
727  void move_to(Ell<ValueType, IndexType>* result) override;
728 
729  void convert_to(Fbcsr<ValueType, IndexType>* result) const override;
730 
731  void move_to(Fbcsr<ValueType, IndexType>* result) override;
732 
733  void convert_to(Hybrid<ValueType, IndexType>* result) const override;
734 
735  void move_to(Hybrid<ValueType, IndexType>* result) override;
736 
737  void convert_to(Sellp<ValueType, IndexType>* result) const override;
738 
739  void move_to(Sellp<ValueType, IndexType>* result) override;
740 
741  void convert_to(SparsityCsr<ValueType, IndexType>* result) const override;
742 
743  void move_to(SparsityCsr<ValueType, IndexType>* result) override;
744 
745  void read(const mat_data& data) override;
746 
747  void read(const device_mat_data& data) override;
748 
749  void read(device_mat_data&& data) override;
750 
751  void write(mat_data& data) const override;
752 
753  std::unique_ptr<LinOp> transpose() const override;
754 
755  std::unique_ptr<LinOp> conj_transpose() const override;
756 
771  std::unique_ptr<Csr> permute(
772  ptr_param<const Permutation<index_type>> permutation,
774 
788  std::unique_ptr<Csr> permute(
789  ptr_param<const Permutation<index_type>> row_permutation,
790  ptr_param<const Permutation<index_type>> column_permutation,
791  bool invert = false) const;
792 
802  std::unique_ptr<Csr> scale_permute(
805 
818  std::unique_ptr<Csr> scale_permute(
820  row_permutation,
822  column_permutation,
823  bool invert = false) const;
824 
825  std::unique_ptr<LinOp> permute(
826  const array<IndexType>* permutation_indices) const override;
827 
828  std::unique_ptr<LinOp> inverse_permute(
829  const array<IndexType>* inverse_permutation_indices) const override;
830 
831  std::unique_ptr<LinOp> row_permute(
832  const array<IndexType>* permutation_indices) const override;
833 
834  std::unique_ptr<LinOp> column_permute(
835  const array<IndexType>* permutation_indices) const override;
836 
837  std::unique_ptr<LinOp> inverse_row_permute(
838  const array<IndexType>* inverse_permutation_indices) const override;
839 
840  std::unique_ptr<LinOp> inverse_column_permute(
841  const array<IndexType>* inverse_permutation_indices) const override;
842 
843  std::unique_ptr<Diagonal<ValueType>> extract_diagonal() const override;
844 
845  std::unique_ptr<absolute_type> compute_absolute() const override;
846 
847  void compute_absolute_inplace() override;
848 
852  void sort_by_column_index();
853 
854  /*
855  * Tests if all row entry pairs (value, col_idx) are sorted by column index
856  *
857  * @returns True if all row entry pairs (value, col_idx) are sorted by
858  * column index
859  */
860  bool is_sorted_by_column_index() const;
861 
867  value_type* get_values() noexcept { return values_.get_data(); }
868 
876  const value_type* get_const_values() const noexcept
877  {
878  return values_.get_const_data();
879  }
880 
886  index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); }
887 
895  const index_type* get_const_col_idxs() const noexcept
896  {
897  return col_idxs_.get_const_data();
898  }
899 
905  index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); }
906 
914  const index_type* get_const_row_ptrs() const noexcept
915  {
916  return row_ptrs_.get_const_data();
917  }
918 
924  index_type* get_srow() noexcept { return srow_.get_data(); }
925 
933  const index_type* get_const_srow() const noexcept
934  {
935  return srow_.get_const_data();
936  }
937 
944  {
945  return srow_.get_size();
946  }
947 
954  {
955  return values_.get_size();
956  }
957 
962  std::shared_ptr<strategy_type> get_strategy() const noexcept
963  {
964  return strategy_;
965  }
966 
972  void set_strategy(std::shared_ptr<strategy_type> strategy)
973  {
974  strategy_ = std::move(strategy->copy());
975  this->make_srow();
976  }
977 
985  {
986  auto exec = this->get_executor();
987  GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
988  this->scale_impl(make_temporary_clone(exec, alpha).get());
989  }
990 
998  {
999  auto exec = this->get_executor();
1000  GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1001  this->inv_scale_impl(make_temporary_clone(exec, alpha).get());
1002  }
1003 
1012  static std::unique_ptr<Csr> create(std::shared_ptr<const Executor> exec,
1013  std::shared_ptr<strategy_type> strategy);
1014 
1026  static std::unique_ptr<Csr> create(
1027  std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1028  size_type num_nonzeros = {},
1029  std::shared_ptr<strategy_type> strategy = nullptr);
1030 
1050  static std::unique_ptr<Csr> create(
1051  std::shared_ptr<const Executor> exec, const dim<2>& size,
1052  array<value_type> values, array<index_type> col_idxs,
1053  array<index_type> row_ptrs,
1054  std::shared_ptr<strategy_type> strategy = nullptr);
1055 
1060  template <typename InputValueType, typename InputColumnIndexType,
1061  typename InputRowPtrType>
1062  GKO_DEPRECATED(
1063  "explicitly construct the gko::array argument instead of passing "
1064  "initializer lists")
1065  static std::unique_ptr<Csr> create(
1066  std::shared_ptr<const Executor> exec, const dim<2>& size,
1067  std::initializer_list<InputValueType> values,
1068  std::initializer_list<InputColumnIndexType> col_idxs,
1069  std::initializer_list<InputRowPtrType> row_ptrs)
1070  {
1071  return create(exec, size, array<value_type>{exec, std::move(values)},
1072  array<index_type>{exec, std::move(col_idxs)},
1073  array<index_type>{exec, std::move(row_ptrs)});
1074  }
1075 
1091  static std::unique_ptr<const Csr> create_const(
1092  std::shared_ptr<const Executor> exec, const dim<2>& size,
1093  gko::detail::const_array_view<ValueType>&& values,
1094  gko::detail::const_array_view<IndexType>&& col_idxs,
1095  gko::detail::const_array_view<IndexType>&& row_ptrs,
1096  std::shared_ptr<strategy_type> strategy = nullptr);
1097 
1110  std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1111  const index_set<IndexType>& row_index_set,
1112  const index_set<IndexType>& column_index_set) const;
1113 
1125  std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1126  const span& row_span, const span& column_span) const;
1127 
1131  Csr& operator=(const Csr&);
1132 
1138  Csr& operator=(Csr&&);
1139 
1143  Csr(const Csr&);
1144 
1150  Csr(Csr&&);
1151 
1152 protected:
1153  Csr(std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1154  size_type num_nonzeros = {},
1155  std::shared_ptr<strategy_type> strategy = nullptr);
1156 
1157  Csr(std::shared_ptr<const Executor> exec, const dim<2>& size,
1158  array<value_type> values, array<index_type> col_idxs,
1159  array<index_type> row_ptrs,
1160  std::shared_ptr<strategy_type> strategy = nullptr);
1161 
1162  void apply_impl(const LinOp* b, LinOp* x) const override;
1163 
1164  void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta,
1165  LinOp* x) const override;
1166 
1167  // TODO: This provides some more sane settings. Please fix this!
1168  static std::shared_ptr<strategy_type> make_default_strategy(
1169  std::shared_ptr<const Executor> exec)
1170  {
1171  auto cuda_exec = std::dynamic_pointer_cast<const CudaExecutor>(exec);
1172  auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(exec);
1173  auto dpcpp_exec = std::dynamic_pointer_cast<const DpcppExecutor>(exec);
1174  std::shared_ptr<strategy_type> new_strategy;
1175  if (cuda_exec) {
1176  new_strategy = std::make_shared<automatical>(cuda_exec);
1177  } else if (hip_exec) {
1178  new_strategy = std::make_shared<automatical>(hip_exec);
1179  } else if (dpcpp_exec) {
1180  new_strategy = std::make_shared<automatical>(dpcpp_exec);
1181  } else {
1182  new_strategy = std::make_shared<classical>();
1183  }
1184  return new_strategy;
1185  }
1186 
1187  // TODO clean this up as soon as we improve strategy_type
1188  template <typename CsrType>
1189  void convert_strategy_helper(CsrType* result) const
1190  {
1191  auto strat = this->get_strategy().get();
1192  std::shared_ptr<typename CsrType::strategy_type> new_strat;
1193  if (dynamic_cast<classical*>(strat)) {
1194  new_strat = std::make_shared<typename CsrType::classical>();
1195  } else if (dynamic_cast<merge_path*>(strat)) {
1196  new_strat = std::make_shared<typename CsrType::merge_path>();
1197  } else if (dynamic_cast<cusparse*>(strat)) {
1198  new_strat = std::make_shared<typename CsrType::cusparse>();
1199  } else if (dynamic_cast<sparselib*>(strat)) {
1200  new_strat = std::make_shared<typename CsrType::sparselib>();
1201  } else {
1202  auto rexec = result->get_executor();
1203  auto cuda_exec =
1204  std::dynamic_pointer_cast<const CudaExecutor>(rexec);
1205  auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(rexec);
1206  auto dpcpp_exec =
1207  std::dynamic_pointer_cast<const DpcppExecutor>(rexec);
1208  auto lb = dynamic_cast<load_balance*>(strat);
1209  if (cuda_exec) {
1210  if (lb) {
1211  new_strat =
1212  std::make_shared<typename CsrType::load_balance>(
1213  cuda_exec);
1214  } else {
1215  new_strat = std::make_shared<typename CsrType::automatical>(
1216  cuda_exec);
1217  }
1218  } else if (hip_exec) {
1219  if (lb) {
1220  new_strat =
1221  std::make_shared<typename CsrType::load_balance>(
1222  hip_exec);
1223  } else {
1224  new_strat = std::make_shared<typename CsrType::automatical>(
1225  hip_exec);
1226  }
1227  } else if (dpcpp_exec) {
1228  if (lb) {
1229  new_strat =
1230  std::make_shared<typename CsrType::load_balance>(
1231  dpcpp_exec);
1232  } else {
1233  new_strat = std::make_shared<typename CsrType::automatical>(
1234  dpcpp_exec);
1235  }
1236  } else {
1237  // Try to preserve this executor's configuration
1238  auto this_cuda_exec =
1239  std::dynamic_pointer_cast<const CudaExecutor>(
1240  this->get_executor());
1241  auto this_hip_exec =
1242  std::dynamic_pointer_cast<const HipExecutor>(
1243  this->get_executor());
1244  auto this_dpcpp_exec =
1245  std::dynamic_pointer_cast<const DpcppExecutor>(
1246  this->get_executor());
1247  if (this_cuda_exec) {
1248  if (lb) {
1249  new_strat =
1250  std::make_shared<typename CsrType::load_balance>(
1251  this_cuda_exec);
1252  } else {
1253  new_strat =
1254  std::make_shared<typename CsrType::automatical>(
1255  this_cuda_exec);
1256  }
1257  } else if (this_hip_exec) {
1258  if (lb) {
1259  new_strat =
1260  std::make_shared<typename CsrType::load_balance>(
1261  this_hip_exec);
1262  } else {
1263  new_strat =
1264  std::make_shared<typename CsrType::automatical>(
1265  this_hip_exec);
1266  }
1267  } else if (this_dpcpp_exec) {
1268  if (lb) {
1269  new_strat =
1270  std::make_shared<typename CsrType::load_balance>(
1271  this_dpcpp_exec);
1272  } else {
1273  new_strat =
1274  std::make_shared<typename CsrType::automatical>(
1275  this_dpcpp_exec);
1276  }
1277  } else {
1278  // FIXME: this changes strategies.
1279  // We had a load balance or automatical strategy from a non
1280  // HIP or Cuda executor and are moving to a non HIP or Cuda
1281  // executor.
1282  new_strat = std::make_shared<typename CsrType::classical>();
1283  }
1284  }
1285  }
1286  result->set_strategy(new_strat);
1287  }
1288 
1292  void make_srow()
1293  {
1294  srow_.resize_and_reset(strategy_->clac_size(values_.get_size()));
1295  strategy_->process(row_ptrs_, &srow_);
1296  }
1297 
1304  virtual void scale_impl(const LinOp* alpha);
1305 
1312  virtual void inv_scale_impl(const LinOp* alpha);
1313 
1314 private:
1315  std::shared_ptr<strategy_type> strategy_;
1316  array<value_type> values_;
1317  array<index_type> col_idxs_;
1318  array<index_type> row_ptrs_;
1319  array<index_type> srow_;
1320 
1321  void add_scaled_identity_impl(const LinOp* a, const LinOp* b) override;
1322 };
1323 
1324 
1325 namespace detail {
1326 
1327 
1334 template <typename ValueType, typename IndexType>
1335 void strategy_rebuild_helper(Csr<ValueType, IndexType>* result)
1336 {
1337  using load_balance = typename Csr<ValueType, IndexType>::load_balance;
1338  using automatical = typename Csr<ValueType, IndexType>::automatical;
1339  auto strategy = result->get_strategy();
1340  auto executor = result->get_executor();
1341  if (std::dynamic_pointer_cast<load_balance>(strategy)) {
1342  if (auto exec =
1343  std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1344  result->set_strategy(std::make_shared<load_balance>(exec));
1345  } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1346  executor)) {
1347  result->set_strategy(std::make_shared<load_balance>(exec));
1348  }
1349  } else if (std::dynamic_pointer_cast<automatical>(strategy)) {
1350  if (auto exec =
1351  std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1352  result->set_strategy(std::make_shared<automatical>(exec));
1353  } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1354  executor)) {
1355  result->set_strategy(std::make_shared<automatical>(exec));
1356  }
1357  }
1358 }
1359 
1360 
1361 } // namespace detail
1362 } // namespace matrix
1363 } // namespace gko
1364 
1365 
1366 #endif // GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
gko::matrix::Csr::automatical
Definition: csr.hpp:516
gko::matrix::Csr::get_const_srow
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition: csr.hpp:933
gko::matrix::Csr::load_balance::load_balance
load_balance(std::shared_ptr< const HipExecutor > exec)
Creates a load_balance strategy with HIP executor.
Definition: csr.hpp:375
gko::matrix::Csr::operator=
Csr & operator=(const Csr &)
Copy-assigns a Csr matrix.
gko::matrix::Csr::cusparse::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:309
gko::matrix::Csr::get_col_idxs
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:886
gko::matrix::Fbcsr
Fixed-block compressed sparse row storage matrix format.
Definition: csr.hpp:46
gko::matrix::Csr
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition: matrix.hpp:28
gko::matrix::Csr::get_const_row_ptrs
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:914
gko::matrix::Csr::sparselib::sparselib
sparselib()
Creates a sparselib strategy.
Definition: csr.hpp:331
gko::LinOp
Definition: lin_op.hpp:117
gko::matrix::Dense
Dense is a matrix format which explicitly stores all values of the matrix.
Definition: dense_cache.hpp:19
gko::matrix::CsrBuilder
Definition: csr.hpp:49
gko::matrix::Csr::inverse_row_permute
std::unique_ptr< LinOp > inverse_row_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the row permutation of the inverse permuted object.
gko::matrix::Csr::sparselib
sparselib is a strategy_type which uses the sparselib csr.
Definition: csr.hpp:326
gko::DiagonalExtractable
The diagonal of a LinOp implementing this interface can be extracted.
Definition: lin_op.hpp:743
gko::matrix::SparsityCsr
SparsityCsr is a matrix format which stores only the sparsity pattern of a sparse matrix by compressi...
Definition: csr.hpp:40
gko::matrix::Csr::load_balance
load_balance is a strategy_type which uses the load balance algorithm.
Definition: csr.hpp:348
gko::matrix::Csr::scale
void scale(ptr_param< const LinOp > alpha)
Scales the matrix with a scalar.
Definition: csr.hpp:984
gko::matrix::Csr::automatical::automatical
automatical(std::shared_ptr< const HipExecutor > exec)
Creates an automatical strategy with HIP executor.
Definition: csr.hpp:563
gko::Transposable
Linear operators which support transposition should implement the Transposable interface.
Definition: lin_op.hpp:433
gko::matrix::Csr::column_permute
std::unique_ptr< LinOp > column_permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the column permutation of the Permutable object.
gko::matrix::Csr::strategy_type::get_name
std::string get_name()
Returns the name of strategy.
Definition: csr.hpp:188
gko::matrix::Csr::classical::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:256
gko::matrix::ScaledPermutation
ScaledPermutation is a matrix combining a permutation with scaling factors.
Definition: scaled_permutation.hpp:36
gko::size_type
std::size_t size_type
Integral type used for allocation quantities.
Definition: types.hpp:89
gko::matrix::Csr::strategy_type::copy
virtual std::shared_ptr< strategy_type > copy()=0
Copy a strategy.
gko::matrix::Csr::get_srow
index_type * get_srow() noexcept
Returns the starting rows.
Definition: csr.hpp:924
gko::matrix::Csr::sparselib::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:337
gko::matrix::Permutation
Permutation is a matrix format that represents a permutation matrix, i.e.
Definition: permutation.hpp:111
gko::matrix::Csr::automatical::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:600
gko::matrix::Csr::row_permute
std::unique_ptr< LinOp > row_permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the row permutation of the Permutable object.
gko::matrix::Csr::classical::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:263
gko::CudaExecutor
This is the Executor subclass which represents the CUDA device.
Definition: executor.hpp:1540
gko::matrix::Csr::strategy_type::process
virtual void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow)=0
Computes srow according to row pointers.
gko::Permutable
Linear operators which support permutation should implement the Permutable interface.
Definition: lin_op.hpp:484
gko::matrix::Csr::transpose
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
gko::matrix::Csr::load_balance::load_balance
load_balance(std::shared_ptr< const DpcppExecutor > exec)
Creates a load_balance strategy with DPCPP executor.
Definition: csr.hpp:386
gko
The Ginkgo namespace.
Definition: abstract_factory.hpp:20
gko::matrix::Csr::load_balance::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:411
gko::matrix::Csr::inv_scale
void inv_scale(ptr_param< const LinOp > alpha)
Scales the matrix with the inverse of a scalar.
Definition: csr.hpp:997
gko::matrix::Csr::extract_diagonal
std::unique_ptr< Diagonal< ValueType > > extract_diagonal() const override
Extracts the diagonal entries of the matrix into a vector.
gko::array< index_type >
gko::matrix::Csr::cusparse
cusparse is a strategy_type which uses the sparselib csr.
Definition: csr.hpp:302
gko::matrix::Csr::inverse_permute
std::unique_ptr< LinOp > inverse_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the symmetric inverse row and column permutation of the Permutable objec...
gko::matrix::Csr::get_row_ptrs
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:905
gko::array::resize_and_reset
void resize_and_reset(size_type size)
Resizes the array so it is able to hold the specified number of elements.
Definition: array.hpp:622
gko::span
A span is a lightweight structure used to create sub-ranges from other ranges.
Definition: range.hpp:46
gko::dim< 2 >
gko::matrix_data
This structure is used as an intermediate data type to store a sparse matrix.
Definition: matrix_data.hpp:126
gko::matrix::Csr::load_balance::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:464
gko::matrix::Csr::merge_path
merge_path is a strategy_type which uses the merge_path algorithm.
Definition: csr.hpp:277
gko::matrix::Csr::permute
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type >> permutation, permute_mode mode=permute_mode::symmetric) const
Creates a permuted copy of this matrix with the given permutation .
gko::index_set
An index set class represents an ordered set of intervals.
Definition: index_set.hpp:56
gko::matrix::Csr::automatical::automatical
automatical()
Creates an automatical strategy.
Definition: csr.hpp:544
gko::matrix::Csr::merge_path::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:290
gko::matrix::Csr::load_balance::load_balance
load_balance(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates a load_balance strategy with specified parameters.
Definition: csr.hpp:401
gko::matrix::Diagonal
This class is a utility which efficiently implements the diagonal matrix (a linear operator which sca...
Definition: lin_op.hpp:31
gko::matrix::Csr::strategy_type::clac_size
virtual int64_t clac_size(const int64_t nnz)=0
Computes the srow size according to the number of nonzeros.
gko::matrix::Csr::load_balance::load_balance
load_balance(std::shared_ptr< const CudaExecutor > exec)
Creates a load_balance strategy with CUDA executor.
Definition: csr.hpp:366
gko::ptr_param
This class is used for function parameters in the place of raw pointers.
Definition: utils_helper.hpp:41
gko::array::get_data
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the array.
Definition: array.hpp:673
gko::ReadableFromMatrixData
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition: lin_op.hpp:605
gko::OmpExecutor
This is the Executor subclass which represents the OpenMP device (typically CPU).
Definition: executor.hpp:1386
gko::matrix::Csr::conj_transpose
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
gko::WritableToMatrixData
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition: lin_op.hpp:660
gko::matrix::permute_mode::symmetric
The rows and columns will be permuted.
gko::matrix::Csr::sparselib::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:333
gko::matrix::Csr::cusparse::cusparse
cusparse()
Creates a cusparse strategy.
Definition: csr.hpp:307
gko::matrix::Csr::cusparse::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:313
gko::matrix::Csr::merge_path::merge_path
merge_path()
Creates a merge_path strategy.
Definition: csr.hpp:282
gko::matrix::Csr::get_const_values
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition: csr.hpp:876
gko::stop::mode
mode
The mode for the residual norm criterion.
Definition: residual_norm.hpp:38
gko::matrix::Csr::load_balance::load_balance
load_balance()
Creates a load_balance strategy.
Definition: csr.hpp:356
gko::array::get_executor
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition: array.hpp:689
gko::matrix::Csr::get_num_stored_elements
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition: csr.hpp:953
gko::matrix::Csr::create_submatrix
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const index_set< IndexType > &row_index_set, const index_set< IndexType > &column_index_set) const
Creates a submatrix from this Csr matrix given row and column index_set objects.
gko::ScaledIdentityAddable
Adds the operation M <- a I + b M for matrix M, identity operator I and scalars a and b,...
Definition: lin_op.hpp:818
gko::matrix::Csr::load_balance::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:503
gko::matrix::Csr::classical::classical
classical()
Creates a classical strategy.
Definition: csr.hpp:232
gko::matrix::Csr::strategy_type::strategy_type
strategy_type(std::string name)
Creates a strategy_type.
Definition: csr.hpp:179
gko::matrix::Csr::sort_by_column_index
void sort_by_column_index()
Sorts all (value, col_idx) pairs in each row by column index.
gko::matrix::Csr::merge_path::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:284
gko::next_precision
next_precision_base< T > next_precision
Obtains the next type in the singly-linked precision list with half.
Definition: math.hpp:445
gko::matrix::Csr::create_const
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const dim< 2 > &size, gko::detail::const_array_view< ValueType > &&values, gko::detail::const_array_view< IndexType > &&col_idxs, gko::detail::const_array_view< IndexType > &&row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a constant (immutable) Csr matrix from a set of constant arrays.
gko::matrix::Csr::automatical::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:681
gko::matrix::Csr::classical
classical is a strategy_type which uses the same number of threads on each row.
Definition: csr.hpp:227
gko::matrix::Csr::get_strategy
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition: csr.hpp:962
gko::matrix::Csr::set_strategy
void set_strategy(std::shared_ptr< strategy_type > strategy)
Set the strategy.
Definition: csr.hpp:972
gko::matrix::Ell
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition: csr.hpp:31
gko::ConvertibleTo
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
Definition: polymorphic_object.hpp:479
gko::matrix::Csr::compute_absolute
std::unique_ptr< absolute_type > compute_absolute() const override
Gets the AbsoluteLinOp.
gko::matrix::Csr::strategy_type
strategy_type is to decide how to set the csr algorithm.
Definition: csr.hpp:170
gko::make_temporary_clone
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition: temporary_clone.hpp:208
gko::Executor
The first step in using the Ginkgo library consists of creating an executor.
Definition: executor.hpp:615
gko::matrix::Hybrid
HYBRID is a matrix format which splits the matrix into ELLPACK and COO format.
Definition: coo.hpp:32
gko::array::get_const_data
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the array.
Definition: array.hpp:682
gko::matrix::Csr::write
void write(mat_data &data) const override
Writes a matrix to a matrix_data structure.
gko::matrix::permute_mode
permute_mode
Specifies how a permutation will be applied to a matrix.
Definition: permutation.hpp:42
gko::matrix::Sellp
SELL-P is a matrix format similar to ELL format.
Definition: csr.hpp:37
gko::min
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition: math.hpp:719
gko::matrix::Csr::cusparse::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:315
gko::matrix::Csr::get_const_col_idxs
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:895
gko::ceildiv
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition: math.hpp:590
gko::matrix::Csr::automatical::automatical
automatical(std::shared_ptr< const DpcppExecutor > exec)
Creates an automatical strategy with Dpcpp executor.
Definition: csr.hpp:574
gko::matrix::Csr::merge_path::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:288
gko::EnableAbsoluteComputation
The EnableAbsoluteComputation mixin provides the default implementations of compute_absolute_linop an...
Definition: lin_op.hpp:794
gko::matrix::Csr::inverse_column_permute
std::unique_ptr< LinOp > inverse_column_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the row permutation of the inverse permuted object.
gko::matrix::Csr::Csr
Csr(const Csr &)
Copy-constructs a Csr matrix.
gko::matrix::Csr::automatical::automatical
automatical(std::shared_ptr< const CudaExecutor > exec)
Creates an automatical strategy with CUDA executor.
Definition: csr.hpp:554
gko::PolymorphicObject::get_executor
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor of the object.
Definition: polymorphic_object.hpp:243
gko::array::get_size
size_type get_size() const noexcept
Returns the number of elements in the array.
Definition: array.hpp:656
gko::matrix::Csr::automatical::clac_size
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition: csr.hpp:669
gko::matrix::Csr::automatical::automatical
automatical(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates an automatical strategy with specified parameters.
Definition: csr.hpp:589
gko::matrix::Csr::classical::process
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition: csr.hpp:234
gko::remove_complex
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition: math.hpp:260
gko::matrix::Csr::compute_absolute_inplace
void compute_absolute_inplace() override
Compute absolute inplace on each element.
gko::matrix::Csr::scale_permute
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type >> permutation, permute_mode=permute_mode::symmetric) const
Creates a scaled and permuted copy of this matrix.
gko::device_matrix_data
This type is a device-side equivalent to matrix_data.
Definition: device_matrix_data.hpp:36
gko::matrix::Csr::read
void read(const mat_data &data) override
Reads a matrix from a matrix_data structure.
gko::matrix::Csr::create
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, std::shared_ptr< strategy_type > strategy)
Creates an uninitialized CSR matrix of the specified size.
gko::EnableLinOp
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition: lin_op.hpp:877
gko::matrix::Csr::sparselib::copy
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition: csr.hpp:339
gko::matrix::Csr::get_values
value_type * get_values() noexcept
Returns the values of the matrix.
Definition: csr.hpp:867
gko::matrix::Csr::get_num_srow_elements
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps)
Definition: csr.hpp:943
gko::LinOp::LinOp
LinOp(const LinOp &)=default
Copy-constructs a LinOp.
gko::to_complex
typename detail::to_complex_s< T >::type to_complex
Obtain the type which adds the complex of complex/scalar type or the template parameter of class by a...
Definition: math.hpp:279
gko::EnablePolymorphicObject
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition: polymorphic_object.hpp:667
gko::matrix::Coo
COO stores a matrix in the coordinate matrix format.
Definition: coo.hpp:50