This is the Executor subclass which represents the CUDA device. More...

#include <ginkgo/core/base/executor.hpp>

Inheritance diagram for gko::CudaExecutor:

Collaboration diagram for gko::CudaExecutor:

Public Member Functions
std::shared_ptr< Executor >	get_master () noexcept override
	Returns the master OmpExecutor of this Executor. More...

std::shared_ptr< const Executor >	get_master () const noexcept override
	Returns the master OmpExecutor of this Executor. More...

void	synchronize () const override
	Synchronize the operations launched on the executor with its master.

scoped_device_id_guard	get_scoped_device_id_guard () const override

std::string	get_description () const override

int	get_device_id () const noexcept
	Get the CUDA device id of the device associated to this executor.

int	get_num_warps_per_sm () const noexcept
	Get the number of warps per SM of this executor.

int	get_num_multiprocessor () const noexcept
	Get the number of multiprocessor of this executor.

int	get_num_warps () const noexcept
	Get the number of warps of this executor.

int	get_warp_size () const noexcept
	Get the warp size of this executor.

int	get_major_version () const noexcept
	Get the major version of compute capability.

int	get_minor_version () const noexcept
	Get the minor version of compute capability.

int	get_compute_capability () const noexcept
	Get the compute capability.

cublasContext *	get_cublas_handle () const
	Get the cublas handle for this executor. More...

cublasContext *	get_blas_handle () const
	Get the cublas handle for this executor. More...

cusparseContext *	get_cusparse_handle () const
	Get the cusparse handle for this executor. More...

cusparseContext *	get_sparselib_handle () const
	Get the cusparse handle for this executor. More...

std::vector< int >	get_closest_pus () const
	Get the closest PUs. More...

int	get_closest_numa () const
	Get the closest NUMA node. More...

CUstream_st *	get_stream () const
	Returns the CUDA stream used by this executor. More...

virtual void	run (const Operation &op) const=0
	Runs the specified Operation using this Executor. More...

template<typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >
void	run (const ClosureOmp &op_omp, const ClosureCuda &op_cuda, const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) const
	Runs one of the passed in functors, depending on the Executor type. More...

template<typename ClosureReference , typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >
void	run (std::string name, const ClosureReference &op_ref, const ClosureOmp &op_omp, const ClosureCuda &op_cuda, const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) const
	Runs one of the passed in functors, depending on the Executor type. More...

Static Public Member Functions
static std::shared_ptr< CudaExecutor >	create (int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
	Creates a new CudaExecutor. More...

static std::shared_ptr< CudaExecutor >	create (int device_id, std::shared_ptr< Executor > master, std::shared_ptr< CudaAllocatorBase > alloc=std::make_shared< CudaAllocator >(), CUstream_st *stream=nullptr)
	Creates a new CudaExecutor with a custom allocator and device stream. More...

static int	get_num_devices ()
	Get the number of devices present on the system.

Detailed Description

This is the Executor subclass which represents the CUDA device.

Member Function Documentation

◆ create() [1/2]

static std::shared_ptr<CudaExecutor> gko::CudaExecutor::create	(	int	device_id,
		std::shared_ptr< Executor >	master,
		bool	device_reset,
		allocation_mode	alloc_mode = `default_cuda_alloc_mode`,
		CUstream_st *	stream = `nullptr`
	)

static

Creates a new CudaExecutor.

Parameters

device_id	the CUDA device id of this device
master	an executor on the host that is used to invoke the device kernels
device_reset	this option no longer has any effect.
alloc_mode	the allocation mode that the executor should operate on. See @allocation_mode for more details
stream	the stream to execute operations on.

◆ create() [2/2]

static std::shared_ptr<CudaExecutor> gko::CudaExecutor::create	(	int	device_id,
		std::shared_ptr< Executor >	master,
		std::shared_ptr< CudaAllocatorBase >	alloc = `std::make_shared< CudaAllocator >()`,
		CUstream_st *	stream = `nullptr`
	)

static

Creates a new CudaExecutor with a custom allocator and device stream.

Parameters

device_id	the CUDA device id of this device
master	an executor on the host that is used to invoke the device kernels.
alloc	the allocator to use for device memory allocations.
stream	the stream to execute operations on.

◆ get_blas_handle()

cublasContext* gko::CudaExecutor::get_blas_handle ( ) const

inline

Get the cublas handle for this executor.

Returns: the cublas handle (cublasContext*) for this executor

◆ get_closest_numa()

int gko::CudaExecutor::get_closest_numa ( ) const

inline

Get the closest NUMA node.

Returns: the closest NUMA node closest to this device

◆ get_closest_pus()

std::vector<int> gko::CudaExecutor::get_closest_pus ( ) const

inline

Get the closest PUs.

Returns: the array of PUs closest to this device

◆ get_cublas_handle()

cublasContext* gko::CudaExecutor::get_cublas_handle ( ) const

inline

Get the cublas handle for this executor.

Returns: the cublas handle (cublasContext*) for this executor

◆ get_cusparse_handle()

cusparseContext* gko::CudaExecutor::get_cusparse_handle ( ) const

inline

Get the cusparse handle for this executor.

Returns: the cusparse handle (cusparseContext*) for this executor

◆ get_description()

std::string gko::CudaExecutor::get_description ( ) const

overridevirtual

Returns: a textual representation of the executor and its device.

Implements gko::Executor.

◆ get_master() [1/2]

std::shared_ptr<const Executor> gko::CudaExecutor::get_master ( ) const

overridevirtualnoexcept

Returns the master OmpExecutor of this Executor.

Returns: the master OmpExecutor of this Executor.

Implements gko::Executor.

◆ get_master() [2/2]

std::shared_ptr<Executor> gko::CudaExecutor::get_master ( )

overridevirtualnoexcept

Returns the master OmpExecutor of this Executor.

Returns: the master OmpExecutor of this Executor.

Implements gko::Executor.

◆ get_sparselib_handle()

cusparseContext* gko::CudaExecutor::get_sparselib_handle ( ) const

inline

Get the cusparse handle for this executor.

Returns: the cusparse handle (cusparseContext*) for this executor

◆ get_stream()

CUstream_st* gko::CudaExecutor::get_stream ( ) const

inline

Returns the CUDA stream used by this executor.

Can be nullptr for the default stream.

Returns: the stream used to execute kernels and memory operations.

◆ run() [1/3]

template<typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >

void gko::Executor::run	(	typename ClosureOmp	,
		typename ClosureCuda	,
		typename ClosureHip	,
		typename ClosureDpcpp
	)

inline

Runs one of the passed in functors, depending on the Executor type.

Template Parameters

ClosureOmp	type of op_omp
ClosureCuda	type of op_cuda
ClosureHip	type of op_hip
ClosureDpcpp	type of op_dpcpp

Parameters

op_omp	functor to run in case of a OmpExecutor or ReferenceExecutor
op_cuda	functor to run in case of a CudaExecutor
op_hip	functor to run in case of a HipExecutor
op_dpcpp	functor to run in case of a DpcppExecutor

◆ run() [2/3]

virtual void gko::Executor::run

Runs the specified Operation using this Executor.

Parameters

op	the operation to run

◆ run() [3/3]

template<typename ClosureReference , typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >

void gko::Executor::run	(	typename ClosureReference	,
		typename ClosureOmp	,
		typename ClosureCuda	,
		typename ClosureHip	,
		typename ClosureDpcpp
	)

inline

Runs one of the passed in functors, depending on the Executor type.

Template Parameters

ClosureReference	type of op_ref
ClosureOmp	type of op_omp
ClosureCuda	type of op_cuda
ClosureHip	type of op_hip
ClosureDpcpp	type of op_dpcpp

Parameters

name	the name of the operation
op_ref	functor to run in case of a ReferenceExecutor
op_omp	functor to run in case of a OmpExecutor
op_cuda	functor to run in case of a CudaExecutor
op_hip	functor to run in case of a HipExecutor
op_dpcpp	functor to run in case of a DpcppExecutor

The documentation for this class was generated from the following file:

ginkgo/core/base/executor.hpp

Public Member Functions

Static Public Member Functions

Detailed Description

Member Function Documentation

◆ create() [1/2]

◆ create() [2/2]

◆ get_blas_handle()

◆ get_closest_numa()

◆ get_closest_pus()

◆ get_cublas_handle()

◆ get_cusparse_handle()

◆ get_description()

◆ get_master() [1/2]

◆ get_master() [2/2]

◆ get_sparselib_handle()

◆ get_stream()

◆ run() [1/3]

◆ run() [2/3]

◆ run() [3/3]