Ginkgo
Generated from pipelines/1589998975 branch based on develop. Ginkgo version 1.10.0
A numerical linear algebra library targeting many-core architectures
|
This is the Executor subclass which represents the CUDA device. More...
#include <ginkgo/core/base/executor.hpp>
Public Member Functions | |
std::shared_ptr< Executor > | get_master () noexcept override |
Returns the master OmpExecutor of this Executor. More... | |
std::shared_ptr< const Executor > | get_master () const noexcept override |
Returns the master OmpExecutor of this Executor. More... | |
void | synchronize () const override |
Synchronize the operations launched on the executor with its master. | |
scoped_device_id_guard | get_scoped_device_id_guard () const override |
std::string | get_description () const override |
int | get_device_id () const noexcept |
Get the CUDA device id of the device associated to this executor. | |
int | get_num_warps_per_sm () const noexcept |
Get the number of warps per SM of this executor. | |
int | get_num_multiprocessor () const noexcept |
Get the number of multiprocessor of this executor. | |
int | get_num_warps () const noexcept |
Get the number of warps of this executor. | |
int | get_warp_size () const noexcept |
Get the warp size of this executor. | |
int | get_major_version () const noexcept |
Get the major version of compute capability. | |
int | get_minor_version () const noexcept |
Get the minor version of compute capability. | |
cublasContext * | get_cublas_handle () const |
Get the cublas handle for this executor. More... | |
cublasContext * | get_blas_handle () const |
Get the cublas handle for this executor. More... | |
cusparseContext * | get_cusparse_handle () const |
Get the cusparse handle for this executor. More... | |
cusparseContext * | get_sparselib_handle () const |
Get the cusparse handle for this executor. More... | |
std::vector< int > | get_closest_pus () const |
Get the closest PUs. More... | |
int | get_closest_numa () const |
Get the closest NUMA node. More... | |
CUstream_st * | get_stream () const |
Returns the CUDA stream used by this executor. More... | |
virtual void | run (const Operation &op) const=0 |
Runs the specified Operation using this Executor. More... | |
template<typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp > | |
void | run (const ClosureOmp &op_omp, const ClosureCuda &op_cuda, const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) const |
Runs one of the passed in functors, depending on the Executor type. More... | |
template<typename ClosureReference , typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp > | |
void | run (std::string name, const ClosureReference &op_ref, const ClosureOmp &op_omp, const ClosureCuda &op_cuda, const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) const |
Runs one of the passed in functors, depending on the Executor type. More... | |
Static Public Member Functions | |
static std::shared_ptr< CudaExecutor > | create (int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr) |
Creates a new CudaExecutor. More... | |
static std::shared_ptr< CudaExecutor > | create (int device_id, std::shared_ptr< Executor > master, std::shared_ptr< CudaAllocatorBase > alloc=std::make_shared< CudaAllocator >(), CUstream_st *stream=nullptr) |
Creates a new CudaExecutor with a custom allocator and device stream. More... | |
static int | get_num_devices () |
Get the number of devices present on the system. | |
This is the Executor subclass which represents the CUDA device.
|
static |
Creates a new CudaExecutor.
device_id | the CUDA device id of this device |
master | an executor on the host that is used to invoke the device kernels |
device_reset | this option no longer has any effect. |
alloc_mode | the allocation mode that the executor should operate on. See @allocation_mode for more details |
stream | the stream to execute operations on. |
|
static |
Creates a new CudaExecutor with a custom allocator and device stream.
device_id | the CUDA device id of this device |
master | an executor on the host that is used to invoke the device kernels. |
alloc | the allocator to use for device memory allocations. |
stream | the stream to execute operations on. |
|
inline |
Get the cublas handle for this executor.
|
inline |
Get the closest NUMA node.
|
inline |
Get the closest PUs.
|
inline |
Get the cublas handle for this executor.
|
inline |
Get the cusparse handle for this executor.
|
overridevirtual |
Implements gko::Executor.
|
overridevirtualnoexcept |
Returns the master OmpExecutor of this Executor.
Implements gko::Executor.
|
overridevirtualnoexcept |
Returns the master OmpExecutor of this Executor.
Implements gko::Executor.
|
inline |
Get the cusparse handle for this executor.
|
inline |
Returns the CUDA stream used by this executor.
Can be nullptr for the default stream.
|
inline |
Runs one of the passed in functors, depending on the Executor type.
ClosureOmp | type of op_omp |
ClosureCuda | type of op_cuda |
ClosureHip | type of op_hip |
ClosureDpcpp | type of op_dpcpp |
op_omp | functor to run in case of a OmpExecutor or ReferenceExecutor |
op_cuda | functor to run in case of a CudaExecutor |
op_hip | functor to run in case of a HipExecutor |
op_dpcpp | functor to run in case of a DpcppExecutor |
virtual void gko::Executor::run |
|
inline |
Runs one of the passed in functors, depending on the Executor type.
ClosureReference | type of op_ref |
ClosureOmp | type of op_omp |
ClosureCuda | type of op_cuda |
ClosureHip | type of op_hip |
ClosureDpcpp | type of op_dpcpp |
name | the name of the operation |
op_ref | functor to run in case of a ReferenceExecutor |
op_omp | functor to run in case of a OmpExecutor |
op_cuda | functor to run in case of a CudaExecutor |
op_hip | functor to run in case of a HipExecutor |
op_dpcpp | functor to run in case of a DpcppExecutor |