Ginkgo  Generated from tags/v1.0.0^0 branch based on master. Ginkgo version 1.0.0
A numerical linear algebra library targeting many-core architectures
csr.hpp
1 /*******************************<GINKGO LICENSE>******************************
2 Copyright (c) 2017-2019, the Ginkgo authors
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8 
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 
16 3. Neither the name of the copyright holder nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 ******************************<GINKGO LICENSE>*******************************/
32 
33 #ifndef GKO_CORE_MATRIX_CSR_HPP_
34 #define GKO_CORE_MATRIX_CSR_HPP_
35 
36 
37 #include <ginkgo/core/base/array.hpp>
38 #include <ginkgo/core/base/lin_op.hpp>
39 
40 
41 namespace gko {
42 namespace matrix {
43 
44 
45 template <typename ValueType>
46 class Dense;
47 
48 
49 template <typename ValueType, typename IndexType>
50 class Coo;
51 
52 template <typename ValueType, typename IndexType>
53 class Ell;
54 
55 
56 template <typename ValueType, typename IndexType>
57 class Sellp;
58 
59 
76 template <typename ValueType = default_precision, typename IndexType = int32>
77 class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
78  public EnableCreateMethod<Csr<ValueType, IndexType>>,
79  public ConvertibleTo<Dense<ValueType>>,
80  public ConvertibleTo<Coo<ValueType, IndexType>>,
81  public ConvertibleTo<Sellp<ValueType, IndexType>>,
82  public ConvertibleTo<Ell<ValueType, IndexType>>,
83  public ReadableFromMatrixData<ValueType, IndexType>,
84  public WritableToMatrixData<ValueType, IndexType>,
85  public Transposable {
86  friend class EnableCreateMethod<Csr>;
87  friend class EnablePolymorphicObject<Csr, LinOp>;
88  friend class Coo<ValueType, IndexType>;
89  friend class Dense<ValueType>;
90  friend class Sellp<ValueType, IndexType>;
91  friend class Ell<ValueType, IndexType>;
92 
93 public:
96 
97  using value_type = ValueType;
98  using index_type = IndexType;
100 
101  class automatical;
102 
104  friend class automatical;
105 
106  public:
107  strategy_type(std::string name) : name_(name) {}
108 
109  std::string get_name() { return name_; }
110 
111  virtual void process(const Array<index_type> &mtx_row_ptrs,
112  Array<index_type> *mtx_srow) = 0;
113 
114  virtual int64_t clac_size(const int64_t nnz) = 0;
115 
116  protected:
117  void set_name(std::string name) { name_ = name; }
118 
119  private:
120  std::string name_;
121  };
122 
123  class classical : public strategy_type {
124  public:
125  classical() : strategy_type("classical") {}
126 
127  void process(const Array<index_type> &mtx_row_ptrs,
128  Array<index_type> *mtx_srow)
129  {}
130 
131  int64_t clac_size(const int64_t nnz) { return 0; }
132  };
133 
134  class merge_path : public strategy_type {
135  public:
136  merge_path() : strategy_type("merge_path") {}
137 
138  void process(const Array<index_type> &mtx_row_ptrs,
139  Array<index_type> *mtx_srow)
140  {}
141 
142  int64_t clac_size(const int64_t nnz) { return 0; }
143  };
144 
145  class cusparse : public strategy_type {
146  public:
147  cusparse() : strategy_type("cusparse") {}
148 
149  void process(const Array<index_type> &mtx_row_ptrs,
150  Array<index_type> *mtx_srow)
151  {}
152 
153  int64_t clac_size(const int64_t nnz) { return 0; }
154  };
155 
156  class load_balance : public strategy_type {
157  public:
158  load_balance()
159  : load_balance(std::move(
161  {}
162 
163  load_balance(std::shared_ptr<const CudaExecutor> exec)
164  : load_balance(exec->get_num_warps())
165  {}
166 
167  load_balance(int64_t nwarps)
168  : strategy_type("load_balance"), nwarps_(nwarps)
169  {}
170 
171  void process(const Array<index_type> &mtx_row_ptrs,
172  Array<index_type> *mtx_srow)
173  {
174  constexpr uint32 warp_size = 32;
175  auto nwarps = mtx_srow->get_num_elems();
176 
177  if (nwarps > 0) {
178  auto exec = mtx_srow->get_executor()->get_master();
179  Array<index_type> srow_host(exec);
180  srow_host = *mtx_srow;
181  auto srow = srow_host.get_data();
182  Array<index_type> row_ptrs_host(exec);
183  row_ptrs_host = mtx_row_ptrs;
184  auto row_ptrs = row_ptrs_host.get_const_data();
185  for (size_type i = 0; i < nwarps; i++) {
186  srow[i] = 0;
187  }
188  auto num_rows = mtx_row_ptrs.get_num_elems() - 1;
189  auto num_elems = row_ptrs[num_rows];
190  for (size_type i = 0; i < num_rows; i++) {
191  auto bucket =
192  ceildiv((ceildiv(row_ptrs[i + 1], warp_size) * nwarps),
193  ceildiv(num_elems, warp_size));
194  if (bucket < nwarps) {
195  srow[bucket]++;
196  }
197  }
198  // find starting row for thread i
199  for (size_type i = 1; i < nwarps; i++) {
200  srow[i] += srow[i - 1];
201  }
202  *mtx_srow = srow_host;
203  }
204  }
205 
206  int64_t clac_size(const int64_t nnz)
207  {
208  constexpr uint32 warp_size = 32;
209  int multiple = 8;
210  if (nnz >= 2000000) {
211  multiple = 128;
212  } else if (nnz >= 200000) {
213  multiple = 32;
214  }
215  auto nwarps = nwarps_ * multiple;
216  return min(ceildiv(nnz, warp_size), static_cast<int64_t>(nwarps));
217  }
218 
219  private:
220  int64_t nwarps_;
221  };
222 
223  class automatical : public strategy_type {
224  public:
225  automatical()
226  : automatical(std::move(
228  {}
229 
230  automatical(std::shared_ptr<const CudaExecutor> exec)
231  : automatical(exec->get_num_warps())
232  {}
233 
234  automatical(int64_t nwarps)
235  : strategy_type("automatical"), nwarps_(nwarps)
236  {}
237 
238  void process(const Array<index_type> &mtx_row_ptrs,
239  Array<index_type> *mtx_srow)
240  {
241  // if the number of stored elements is larger than 1e6 or
242  // the maximum number of stored elements per row is larger than
243  // 64, use load_balance otherwise use classical
244  const auto num_rows = mtx_row_ptrs.get_num_elems() - 1;
245  Array<index_type> host_row_ptrs(
246  mtx_row_ptrs.get_executor()->get_master());
247  host_row_ptrs = mtx_row_ptrs;
248  const auto row_val = host_row_ptrs.get_const_data();
249  if (row_val[num_rows] > static_cast<index_type>(1e6)) {
250  std::make_shared<load_balance>(nwarps_)->process(host_row_ptrs,
251  mtx_srow);
252  this->set_name("load_balance");
253  } else {
254  index_type maxnum = 0;
255  for (index_type i = 1; i < num_rows + 1; i++) {
256  maxnum = max(maxnum, row_val[i] - row_val[i - 1]);
257  }
258  if (maxnum > 64) {
259  std::make_shared<load_balance>(nwarps_)->process(
260  host_row_ptrs, mtx_srow);
261  this->set_name("load_balance");
262  } else {
263  std::make_shared<classical>()->process(host_row_ptrs,
264  mtx_srow);
265  this->set_name("classical");
266  }
267  }
268  }
269 
270  int64_t clac_size(const int64_t nnz)
271  {
272  return std::make_shared<load_balance>(nwarps_)->clac_size(nnz);
273  }
274 
275  private:
276  int64_t nwarps_;
277  };
278 
279  void convert_to(Dense<ValueType> *other) const override;
280 
281  void move_to(Dense<ValueType> *other) override;
282 
283  void convert_to(Coo<ValueType, IndexType> *result) const override;
284 
285  void move_to(Coo<ValueType, IndexType> *result) override;
286 
287  void convert_to(Sellp<ValueType, IndexType> *result) const override;
288 
289  void move_to(Sellp<ValueType, IndexType> *result) override;
290 
291  void convert_to(Ell<ValueType, IndexType> *result) const override;
292 
293  void move_to(Ell<ValueType, IndexType> *result) override;
294 
295  void read(const mat_data &data) override;
296 
297  void write(mat_data &data) const override;
298 
299  std::unique_ptr<LinOp> transpose() const override;
300 
301  std::unique_ptr<LinOp> conj_transpose() const override;
302 
308  value_type *get_values() noexcept { return values_.get_data(); }
309 
317  const value_type *get_const_values() const noexcept
318  {
319  return values_.get_const_data();
320  }
321 
327  index_type *get_col_idxs() noexcept { return col_idxs_.get_data(); }
328 
336  const index_type *get_const_col_idxs() const noexcept
337  {
338  return col_idxs_.get_const_data();
339  }
340 
346  index_type *get_row_ptrs() noexcept { return row_ptrs_.get_data(); }
347 
355  const index_type *get_const_row_ptrs() const noexcept
356  {
357  return row_ptrs_.get_const_data();
358  }
359 
365  index_type *get_srow() noexcept { return srow_.get_data(); }
366 
374  const index_type *get_const_srow() const noexcept
375  {
376  return srow_.get_const_data();
377  }
378 
385  {
386  return srow_.get_num_elems();
387  }
388 
395  {
396  return values_.get_num_elems();
397  }
398 
403  std::shared_ptr<strategy_type> get_strategy() const noexcept
404  {
405  return strategy_;
406  }
407 
408 protected:
415  Csr(std::shared_ptr<const Executor> exec,
416  std::shared_ptr<strategy_type> strategy)
417  : Csr(std::move(exec), dim<2>{}, {}, std::move(strategy))
418  {}
419 
428  Csr(std::shared_ptr<const Executor> exec, const dim<2> &size = dim<2>{},
429  size_type num_nonzeros = {},
430  std::shared_ptr<strategy_type> strategy = std::make_shared<cusparse>())
431  : EnableLinOp<Csr>(exec, size),
432  values_(exec, num_nonzeros),
433  col_idxs_(exec, num_nonzeros),
434  // avoid allocation for empty matrix
435  row_ptrs_(exec, size[0] + (size[0] > 0)),
436  srow_(exec, strategy->clac_size(num_nonzeros)),
437  strategy_(std::move(strategy))
438  {}
439 
460  template <typename ValuesArray, typename ColIdxsArray,
461  typename RowPtrsArray>
462  Csr(std::shared_ptr<const Executor> exec, const dim<2> &size,
463  ValuesArray &&values, ColIdxsArray &&col_idxs, RowPtrsArray &&row_ptrs,
464  std::shared_ptr<strategy_type> strategy = std::make_shared<cusparse>())
465  : EnableLinOp<Csr>(exec, size),
466  values_{exec, std::forward<ValuesArray>(values)},
467  col_idxs_{exec, std::forward<ColIdxsArray>(col_idxs)},
468  row_ptrs_{exec, std::forward<RowPtrsArray>(row_ptrs)},
469  srow_(exec),
470  strategy_(std::move(strategy))
471  {
472  GKO_ENSURE_IN_BOUNDS(values_.get_num_elems() - 1,
473  col_idxs_.get_num_elems());
474  GKO_ENSURE_IN_BOUNDS(this->get_size()[0], row_ptrs_.get_num_elems());
475  srow_.resize_and_reset(strategy_->clac_size(values_.get_num_elems()));
476  this->make_srow();
477  }
478 
479  void apply_impl(const LinOp *b, LinOp *x) const override;
480 
481  void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta,
482  LinOp *x) const override;
483 
487  void make_srow() { strategy_->process(row_ptrs_, &srow_); }
488 
489 private:
490  Array<value_type> values_;
491  Array<index_type> col_idxs_;
492  Array<index_type> row_ptrs_;
493  Array<index_type> srow_;
494  std::shared_ptr<strategy_type> strategy_;
495 };
496 
497 
498 } // namespace matrix
499 } // namespace gko
500 
501 
502 #endif // GKO_CORE_MATRIX_CSR_HPP_
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition: math.hpp:280
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition: csr.hpp:374
This mixin implements a static create() method on ConcreteType that dynamically allocates the memory...
Definition: polymorphic_object.hpp:576
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:355
size_type get_num_elems() const noexcept
Returns the number of elements in the Array.
Definition: array.hpp:388
Definition: csr.hpp:145
value_type * get_values() noexcept
Returns the values of the matrix.
Definition: csr.hpp:308
index_type * get_srow() noexcept
Returns the starting rows.
Definition: csr.hpp:365
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master)
Creates a new CudaExecutor.
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition: csr.hpp:53
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
Definition: polymorphic_object.hpp:380
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:327
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition: polymorphic_object.hpp:505
std::uint32_t uint32
32-bit unsigned integral type.
Definition: types.hpp:134
std::size_t size_type
Integral type used for allocation quantities.
Definition: types.hpp:94
Definition: csr.hpp:103
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition: array.hpp:413
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the Array...
Definition: array.hpp:406
The Ginkgo namespace.
Definition: abstract_factory.hpp:45
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition: coo.hpp:51
static std::shared_ptr< OmpExecutor > create()
Creates a new OmpExecutor.
Definition: executor.hpp:735
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition: csr.hpp:346
Definition: csr.hpp:223
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition: math.hpp:393
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
SELL-P is a matrix format similar to ELL format.
Definition: csr.hpp:57
Dense is a matrix format which explicitly stores all values of the matrix.
Definition: coo.hpp:55
Definition: lin_op.hpp:134
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition: lin_op.hpp:509
void write(mat_data &data) const override
Writes a matrix to a matrix_data structure.
const dim< 2 > & get_size() const noexcept
Returns the size of the operator.
Definition: lin_op.hpp:221
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition: csr.hpp:403
COO stores a matrix in the coordinate matrix format.
Definition: coo.hpp:73
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition: csr.hpp:394
Definition: csr.hpp:123
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition: csr.hpp:336
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition: lin_op.hpp:446
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
Definition: csr.hpp:134
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps)
Definition: csr.hpp:384
Definition: csr.hpp:156
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the Array. ...
Definition: array.hpp:397
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition: csr.hpp:317
Linear operators which support transposition should implement the Transposable interface.
Definition: lin_op.hpp:398
constexpr T max(const T &x, const T &y)
Returns the larger of the arguments.
Definition: math.hpp:373
This structure is used as an intermediate data type to store a sparse matrix.
Definition: matrix_data.hpp:102
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition: lin_op.hpp:426
void read(const mat_data &data) override
Reads a matrix from a matrix_data structure.