This is the Executor subclass which represents the CUDA device. More...

#include <ginkgo/core/base/executor.hpp>

Inheritance diagram for gko::CudaExecutor:

Collaboration diagram for gko::CudaExecutor:

Public Member Functions
std::shared_ptr< Executor >	get_master () noexcept override
	Returns the master OmpExecutor of this Executor. More...

std::shared_ptr< const Executor >	get_master () const noexcept override
	Returns the master OmpExecutor of this Executor. More...

void	synchronize () const override
	Synchronize the operations launched on the executor with its master.

scoped_device_id_guard	get_scoped_device_id_guard () const override

int	get_device_id () const noexcept
	Get the CUDA device id of the device associated to this executor.

int	get_num_warps_per_sm () const noexcept
	Get the number of warps per SM of this executor.

int	get_num_multiprocessor () const noexcept
	Get the number of multiprocessor of this executor.

int	get_num_warps () const noexcept
	Get the number of warps of this executor.

int	get_warp_size () const noexcept
	Get the warp size of this executor.

int	get_major_version () const noexcept
	Get the major version of compute capability.

int	get_minor_version () const noexcept
	Get the minor version of compute capability.

cublasContext *	get_cublas_handle () const
	Get the cublas handle for this executor. More...

cusparseContext *	get_cusparse_handle () const
	Get the cusparse handle for this executor. More...

std::vector< int >	get_closest_pus () const
	Get the closest PUs. More...

int	get_closest_numa () const
	Get the closest NUMA node. More...

CUstream_st *	get_stream () const
	Returns the CUDA stream used by this executor. More...

Static Public Member Functions
static std::shared_ptr< CudaExecutor >	create (int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
	Creates a new CudaExecutor. More...

static std::shared_ptr< CudaExecutor >	create (int device_id, std::shared_ptr< Executor > master, std::shared_ptr< CudaAllocatorBase > alloc=std::make_shared< CudaAllocator >(), CUstream_st *stream=nullptr)
	Creates a new CudaExecutor with a custom allocator and device stream. More...

static int	get_num_devices ()
	Get the number of devices present on the system.

Detailed Description

This is the Executor subclass which represents the CUDA device.

Member Function Documentation

◆ create() [1/2]

static std::shared_ptr<CudaExecutor> gko::CudaExecutor::create	(	int	device_id,
		std::shared_ptr< Executor >	master,
		bool	device_reset,
		allocation_mode	alloc_mode = `default_cuda_alloc_mode`,
		CUstream_st *	stream = `nullptr`
	)

static

Creates a new CudaExecutor.

Parameters

device_id	the CUDA device id of this device
master	an executor on the host that is used to invoke the device kernels
device_reset	this option no longer has any effect.
alloc_mode	the allocation mode that the executor should operate on. See @allocation_mode for more details
stream	the stream to execute operations on.

◆ create() [2/2]

static std::shared_ptr<CudaExecutor> gko::CudaExecutor::create	(	int	device_id,
		std::shared_ptr< Executor >	master,
		std::shared_ptr< CudaAllocatorBase >	alloc = `std::make_shared< CudaAllocator >()`,
		CUstream_st *	stream = `nullptr`
	)

static

Creates a new CudaExecutor with a custom allocator and device stream.

Parameters

device_id	the CUDA device id of this device
master	an executor on the host that is used to invoke the device kernels.
alloc	the allocator to use for device memory allocations.
stream	the stream to execute operations on.

◆ get_closest_numa()

int gko::CudaExecutor::get_closest_numa ( ) const

inline

Get the closest NUMA node.

Returns: the closest NUMA node closest to this device

◆ get_closest_pus()

std::vector<int> gko::CudaExecutor::get_closest_pus ( ) const

inline

Get the closest PUs.

Returns: the array of PUs closest to this device

◆ get_cublas_handle()

cublasContext* gko::CudaExecutor::get_cublas_handle ( ) const

inline

Get the cublas handle for this executor.

Returns: the cublas handle (cublasContext*) for this executor

◆ get_cusparse_handle()

cusparseContext* gko::CudaExecutor::get_cusparse_handle ( ) const

inline

Get the cusparse handle for this executor.

Returns: the cusparse handle (cusparseContext*) for this executor

◆ get_master() [1/2]

std::shared_ptr<const Executor> gko::CudaExecutor::get_master ( ) const

overridevirtualnoexcept

Returns the master OmpExecutor of this Executor.

Returns: the master OmpExecutor of this Executor.

Implements gko::Executor.

◆ get_master() [2/2]

std::shared_ptr<Executor> gko::CudaExecutor::get_master ( )

overridevirtualnoexcept

Returns the master OmpExecutor of this Executor.

Returns: the master OmpExecutor of this Executor.

Implements gko::Executor.

◆ get_stream()

CUstream_st* gko::CudaExecutor::get_stream ( ) const

inline

Returns the CUDA stream used by this executor.

Can be nullptr for the default stream.

Returns: the stream used to execute kernels and memory operations.

The documentation for this class was generated from the following file:

ginkgo/core/base/executor.hpp

Public Member Functions

Static Public Member Functions

Detailed Description

Member Function Documentation

◆ create() [1/2]

◆ create() [2/2]

◆ get_closest_numa()

◆ get_closest_pus()

◆ get_cublas_handle()

◆ get_cusparse_handle()

◆ get_master() [1/2]

◆ get_master() [2/2]

◆ get_stream()