![]() |
Ginkgo
Generated from pipelines/1764531644 branch based on develop. Ginkgo version 1.10.0
A numerical linear algebra library targeting many-core architectures
|
This Logger can be used to annotate the execution of Ginkgo functionality with profiler-specific ranges. More...
#include <ginkgo/core/log/profiler_hook.hpp>
Classes | |
struct | nested_summary_entry |
class | NestedSummaryWriter |
Receives the results from ProfilerHook::create_nested_summary(). More... | |
struct | summary_entry |
class | SummaryWriter |
Receives the results from ProfilerHook::create_summary(). More... | |
class | TableSummaryWriter |
Writes the results from ProfilerHook::create_summary() and ProfilerHook::create_nested_summary() to a ASCII table in Markdown format. More... | |
Public Types | |
using | hook_function = std::function< void(const char *, profile_event_category)> |
![]() | |
using | mask_type = gko::uint64 |
Public Member Functions | |
void | on_allocation_started (const gko::Executor *exec, const gko::size_type &) const override |
Executor's allocation started event. More... | |
void | on_allocation_completed (const gko::Executor *exec, const gko::size_type &, const gko::uintptr &) const override |
Executor's allocation completed event. More... | |
void | on_free_started (const gko::Executor *exec, const gko::uintptr &) const override |
Executor's free started event. More... | |
void | on_free_completed (const gko::Executor *exec, const gko::uintptr &) const override |
Executor's free completed event. More... | |
void | on_copy_started (const gko::Executor *from, const gko::Executor *to, const gko::uintptr &, const gko::uintptr &, const gko::size_type &) const override |
Executor's copy started event. More... | |
void | on_copy_completed (const gko::Executor *from, const gko::Executor *to, const gko::uintptr &, const gko::uintptr &, const gko::size_type &) const override |
Executor's copy completed event. More... | |
void | on_operation_launched (const Executor *exec, const Operation *operation) const override |
Executor's operation launched event (method run). More... | |
void | on_operation_completed (const Executor *exec, const Operation *operation) const override |
Executor's operation completed event (method run). More... | |
void | on_polymorphic_object_copy_started (const Executor *exec, const PolymorphicObject *from, const PolymorphicObject *to) const override |
PolymorphicObject's copy started event. More... | |
void | on_polymorphic_object_copy_completed (const Executor *exec, const PolymorphicObject *from, const PolymorphicObject *to) const override |
PolymorphicObject's copy completed event. More... | |
void | on_polymorphic_object_move_started (const Executor *exec, const PolymorphicObject *from, const PolymorphicObject *to) const override |
PolymorphicObject's move started event. More... | |
void | on_polymorphic_object_move_completed (const Executor *exec, const PolymorphicObject *from, const PolymorphicObject *to) const override |
PolymorphicObject's move completed event. More... | |
void | on_linop_apply_started (const LinOp *A, const LinOp *b, const LinOp *x) const override |
LinOp's apply started event. More... | |
void | on_linop_apply_completed (const LinOp *A, const LinOp *b, const LinOp *x) const override |
LinOp's apply completed event. More... | |
void | on_linop_advanced_apply_started (const LinOp *A, const LinOp *alpha, const LinOp *b, const LinOp *beta, const LinOp *x) const override |
LinOp's advanced apply started event. More... | |
void | on_linop_advanced_apply_completed (const LinOp *A, const LinOp *alpha, const LinOp *b, const LinOp *beta, const LinOp *x) const override |
LinOp's advanced apply completed event. More... | |
void | on_linop_factory_generate_started (const LinOpFactory *factory, const LinOp *input) const override |
LinOp Factory's generate started event. More... | |
void | on_linop_factory_generate_completed (const LinOpFactory *factory, const LinOp *input, const LinOp *output) const override |
LinOp Factory's generate completed event. More... | |
void | on_criterion_check_started (const stop::Criterion *criterion, const size_type &num_iterations, const LinOp *residual, const LinOp *residual_norm, const LinOp *solution, const uint8 &stopping_id, const bool &set_finalized) const override |
stop::Criterion's check started event. More... | |
void | on_criterion_check_completed (const stop::Criterion *criterion, const size_type &num_iterations, const LinOp *residual, const LinOp *residual_norm, const LinOp *solution, const uint8 &stopping_id, const bool &set_finalized, const array< stopping_status > *status, const bool &one_changed, const bool &all_stopped) const override |
stop::Criterion's check completed event. More... | |
void | on_criterion_check_completed (const stop::Criterion *criterion, const size_type &num_iterations, const LinOp *residual, const LinOp *residual_norm, const LinOp *implicit_sq_resnorm, const LinOp *solution, const uint8 &stopping_id, const bool &set_finalized, const array< stopping_status > *status, const bool &one_changed, const bool &all_stopped) const override |
stop::Criterion's check completed event. More... | |
void | on_iteration_complete (const LinOp *solver, const LinOp *right_hand_side, const LinOp *solution, const size_type &num_iterations, const LinOp *residual, const LinOp *residual_norm, const LinOp *implicit_sq_residual_norm, const array< stopping_status > *status, bool stopped) const override |
Register the iteration_complete event which logs every completed iterations. More... | |
void | on_iteration_complete (const LinOp *solver, const size_type &num_iterations, const LinOp *residual, const LinOp *solution, const LinOp *residual_norm) const override |
Register the iteration_complete event which logs every completed iterations. More... | |
void | on_iteration_complete (const LinOp *solver, const size_type &num_iterations, const LinOp *residual, const LinOp *solution, const LinOp *residual_norm, const LinOp *implicit_sq_residual_norm) const override |
Register the iteration_complete event which logs every completed iterations. More... | |
bool | needs_propagation () const override |
Returns true if this logger, when attached to an Executor, needs to be forwarded all events from objects on this executor. | |
void | set_object_name (ptr_param< const PolymorphicObject > obj, std::string name) |
Sets the name for an object to be profiled. More... | |
void | set_synchronization (bool synchronize) |
Should the events call executor->synchronize on operations and copy/allocation? This leads to a certain overhead, but makes the execution timeline of kernels synchronous. | |
profiling_scope_guard | user_range (const char *name) const |
Creates a scope guard for a user-defined range to be included in the profile. More... | |
![]() | |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==0 &&(0< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==1 &&(1< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==2 &&(2< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==3 &&(3< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==4 &&(4< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==5 &&(5< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==6 &&(6< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==7 &&(7< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==8 &&(8< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==9 &&(9< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==10 &&(10< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==11 &&(11< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==12 &&(12< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==13 &&(13< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==14 &&(14< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==15 &&(15< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==16 &&(16< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==17 &&(17< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==18 &&(18< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==19 &&(19< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==20 &&(20< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==21 &&(21< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==22 &&(22< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==23 &&(23< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==24 &&(24< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==25 &&(25< event_count_max)> | on (Params &&... params) const |
template<size_type Event, typename... Params> | |
std::enable_if_t< Event==26 &&(26< event_count_max)> | on (Params &&... params) const |
Static Public Member Functions | |
static std::shared_ptr< ProfilerHook > | create_tau (bool initialize=true) |
Creates a logger annotating Ginkgo events with TAU ranges via PerfStubs. More... | |
static std::shared_ptr< ProfilerHook > | create_vtune () |
Creates a logger annotating Ginkgo events with VTune ITT ranges. | |
static std::shared_ptr< ProfilerHook > | create_nvtx (uint32 color_argb=color_yellow_argb) |
Creates a logger annotating Ginkgo events with NVTX ranges for CUDA. More... | |
static std::shared_ptr< ProfilerHook > | create_roctx () |
Creates a logger annotating Ginkgo events with ROCTX ranges for HIP. | |
static std::shared_ptr< ProfilerHook > | create_for_executor (std::shared_ptr< const Executor > exec) |
Creates a logger annotating Ginkgo events with the most suitable backend for the given executor: NVTX for NSight Systems in CUDA, ROCTX for rocprof in HIP, TAU for everything else. | |
static std::shared_ptr< ProfilerHook > | create_summary (std::shared_ptr< Timer > timer=std::make_shared< CpuTimer >(), std::unique_ptr< SummaryWriter > writer=std::make_unique< TableSummaryWriter >(), bool debug_check_nesting=false) |
Creates a logger measuring the runtime of Ginkgo events and printing a summary when it is destroyed. More... | |
static std::shared_ptr< ProfilerHook > | create_nested_summary (std::shared_ptr< Timer > timer=std::make_shared< CpuTimer >(), std::unique_ptr< NestedSummaryWriter > writer=std::make_unique< TableSummaryWriter >(), bool debug_check_nesting=false) |
Creates a logger measuring the runtime of Ginkgo events in a nested fashion and printing a summary when it is destroyed. More... | |
static std::shared_ptr< ProfilerHook > | create_custom (hook_function begin, hook_function end) |
Creates a logger annotating Ginkgo events with a custom set of functions for range begin and end. | |
Static Public Attributes | |
constexpr static uint32 | color_yellow_argb = 0xFFFFCB05U |
The Ginkgo yellow background color as packed 32 bit ARGB value. | |
![]() | |
static constexpr size_type | event_count_max = sizeof(mask_type) * byte_size |
Maximum amount of events (bits) with the current implementation. | |
static constexpr mask_type | all_events_mask = ~mask_type{0} |
Bitset Mask which activates all events. | |
static constexpr size_type | allocation_started { 0 } |
static constexpr mask_type | allocation_started_mask {mask_type{1} << 0 } |
static constexpr size_type | allocation_completed { 1 } |
static constexpr mask_type | allocation_completed_mask {mask_type{1} << 1 } |
static constexpr size_type | free_started { 2 } |
static constexpr mask_type | free_started_mask {mask_type{1} << 2 } |
static constexpr size_type | free_completed { 3 } |
static constexpr mask_type | free_completed_mask {mask_type{1} << 3 } |
static constexpr size_type | copy_started { 4 } |
static constexpr mask_type | copy_started_mask {mask_type{1} << 4 } |
static constexpr size_type | copy_completed { 5 } |
static constexpr mask_type | copy_completed_mask {mask_type{1} << 5 } |
static constexpr size_type | operation_launched { 6 } |
static constexpr mask_type | operation_launched_mask {mask_type{1} << 6 } |
static constexpr size_type | operation_completed { 7 } |
static constexpr mask_type | operation_completed_mask {mask_type{1} << 7 } |
static constexpr size_type | polymorphic_object_create_started { 8 } |
static constexpr mask_type | polymorphic_object_create_started_mask {mask_type{1} << 8 } |
static constexpr size_type | polymorphic_object_create_completed { 9 } |
static constexpr mask_type | polymorphic_object_create_completed_mask {mask_type{1} << 9 } |
static constexpr size_type | polymorphic_object_copy_started { 10 } |
static constexpr mask_type | polymorphic_object_copy_started_mask {mask_type{1} << 10 } |
static constexpr size_type | polymorphic_object_copy_completed { 11 } |
static constexpr mask_type | polymorphic_object_copy_completed_mask {mask_type{1} << 11 } |
static constexpr size_type | polymorphic_object_deleted { 12 } |
static constexpr mask_type | polymorphic_object_deleted_mask {mask_type{1} << 12 } |
static constexpr size_type | linop_apply_started { 13 } |
static constexpr mask_type | linop_apply_started_mask {mask_type{1} << 13 } |
static constexpr size_type | linop_apply_completed { 14 } |
static constexpr mask_type | linop_apply_completed_mask {mask_type{1} << 14 } |
static constexpr size_type | linop_advanced_apply_started { 15 } |
static constexpr mask_type | linop_advanced_apply_started_mask {mask_type{1} << 15 } |
static constexpr size_type | linop_advanced_apply_completed { 16 } |
static constexpr mask_type | linop_advanced_apply_completed_mask {mask_type{1} << 16 } |
static constexpr size_type | linop_factory_generate_started { 17 } |
static constexpr mask_type | linop_factory_generate_started_mask {mask_type{1} << 17 } |
static constexpr size_type | linop_factory_generate_completed { 18 } |
static constexpr mask_type | linop_factory_generate_completed_mask {mask_type{1} << 18 } |
static constexpr size_type | criterion_check_started { 19 } |
static constexpr mask_type | criterion_check_started_mask {mask_type{1} << 19 } |
static constexpr size_type | criterion_check_completed { 20 } |
static constexpr mask_type | criterion_check_completed_mask {mask_type{1} << 20 } |
static constexpr size_type | iteration_complete {21} |
static constexpr mask_type | iteration_complete_mask {mask_type{1} << 21} |
static constexpr size_type | polymorphic_object_move_started { 22 } |
static constexpr mask_type | polymorphic_object_move_started_mask {mask_type{1} << 22 } |
static constexpr size_type | polymorphic_object_move_completed { 23 } |
static constexpr mask_type | polymorphic_object_move_completed_mask {mask_type{1} << 23 } |
static constexpr size_type | batch_linop_factory_generate_started { 24 } |
static constexpr mask_type | batch_linop_factory_generate_started_mask {mask_type{1} << 24 } |
static constexpr size_type | batch_linop_factory_generate_completed { 25 } |
static constexpr mask_type | batch_linop_factory_generate_completed_mask {mask_type{1} << 25 } |
static constexpr size_type | batch_solver_completed {26} |
static constexpr mask_type | batch_solver_completed_mask {mask_type{1} << 26} |
static constexpr mask_type | executor_events_mask |
Bitset Mask which activates all executor events. More... | |
static constexpr mask_type | operation_events_mask |
Bitset Mask which activates all operation events. More... | |
static constexpr mask_type | polymorphic_object_events_mask |
Bitset Mask which activates all polymorphic object events. More... | |
static constexpr mask_type | linop_events_mask |
Bitset Mask which activates all linop events. More... | |
static constexpr mask_type | linop_factory_events_mask |
Bitset Mask which activates all linop factory events. More... | |
static constexpr mask_type | batch_linop_factory_events_mask |
Bitset Mask which activates all batch linop factory events. More... | |
static constexpr mask_type | criterion_events_mask |
Bitset Mask which activates all criterion events. More... | |
This Logger can be used to annotate the execution of Ginkgo functionality with profiler-specific ranges.
It currently supports TAU, VTune, NSightSystems (NVTX) and rocPROF(ROCTX) and custom profiler hooks.
The Logger should be attached to the Executor that is being used to run the application for a full, program-wide annotation, or to individual objects to only highlight events caused directly by them (not operations and memory allocations though)
|
static |
Creates a logger measuring the runtime of Ginkgo events in a nested fashion and printing a summary when it is destroyed.
timer | The timer used to record time points. |
writer | The NestedSummaryWriter to receive the performance results. |
debug_check_nesting | Enable this flag if the output looks like it might contain incorrect nesting. This increases the overhead slightly, but recognizes mismatching push/pop pairs on the range stack. |
set_synchronization(true)
.
|
static |
Creates a logger annotating Ginkgo events with NVTX ranges for CUDA.
color_argb | The color of the NVTX ranges in the NSight Systems output. It has to be a 32 bit packed ARGB value. |
|
static |
Creates a logger measuring the runtime of Ginkgo events and printing a summary when it is destroyed.
timer | The timer used to record time points. |
writer | The SummaryWriter to receive the performance results. |
debug_check_nesting | Enable this flag if the output looks like it might contain incorrect nesting. This increases the overhead slightly, but recognizes mismatching push/pop pairs on the range stack. |
set_synchronization(true)
.
|
static |
Creates a logger annotating Ginkgo events with TAU ranges via PerfStubs.
initialize | Should we call TAU's initialization and finalization functions, or does the application take care of it? The initialization will happen immediately, the finalization at program exit. |
|
overridevirtual |
Executor's allocation completed event.
exec | the executor used |
num_bytes | the number of bytes allocated |
location | the address at which the data was allocated |
Reimplemented from gko::log::Logger.
|
overridevirtual |
Executor's allocation started event.
exec | the executor used |
num_bytes | the number of bytes to allocate |
Reimplemented from gko::log::Logger.
|
overridevirtual |
Executor's copy completed event.
exec_from | the executor copied from |
exec_to | the executor copied to |
loc_from | the address at which the data was copied from |
loc_to | the address at which the data was copied to |
num_bytes | the number of bytes copied |
Reimplemented from gko::log::Logger.
|
overridevirtual |
Executor's copy started event.
exec_from | the executor to be copied from |
exec_to | the executor to be copied to |
loc_from | the address at which the data will be copied from |
loc_to | the address at which the data will be copied to |
num_bytes | the number of bytes to be copied |
Reimplemented from gko::log::Logger.
|
overridevirtual |
stop::Criterion's check completed event.
Parameters are the Criterion, the stoppingId, the finalized boolean, the stopping status, plus the output one_changed boolean and output all_converged boolean.
criterion | the criterion used |
it | the current iteration count |
r | the residual |
tau | the residual norm |
implicit_tau_sq | the implicit residual norm squared |
x | the solution |
stopping_id | the id of the stopping criterion |
set_finalized | whether this finalizes the iteration |
status | the stopping status of the right hand sides |
one_changed | whether at least one right hand side converged or not |
all_converged | whether all right hand sides are converged |
Reimplemented from gko::log::Logger.
|
overridevirtual |
stop::Criterion's check completed event.
Parameters are the Criterion, the stoppingId, the finalized boolean, the stopping status, plus the output one_changed boolean and output all_converged boolean.
criterion | the criterion used |
it | the current iteration count |
r | the residual |
tau | the residual norm |
x | the solution |
stopping_id | the id of the stopping criterion |
set_finalized | whether this finalizes the iteration |
status | the stopping status of the right hand sides |
one_changed | whether at least one right hand side converged or not |
all_converged | whether all right hand sides |
Reimplemented from gko::log::Logger.
|
overridevirtual |
stop::Criterion's check started event.
criterion | the criterion used |
it | the current iteration count |
r | the residual |
tau | the residual norm |
x | the solution |
stopping_id | the id of the stopping criterion |
set_finalized | whether this finalizes the iteration |
Reimplemented from gko::log::Logger.
|
overridevirtual |
Executor's free completed event.
exec | the executor used |
location | the address at which the data was freed |
Reimplemented from gko::log::Logger.
|
overridevirtual |
Executor's free started event.
exec | the executor used |
location | the address at which the data will be freed |
Reimplemented from gko::log::Logger.
|
overridevirtual |
Register the iteration_complete
event which logs every completed iterations.
solver | the solver executing the iteration |
b | the right-hand-side vector |
x | the solution vector |
it | the current iteration count |
r | the residual (optional) |
tau | the implicit residual norm squared (optional) |
implicit_tau_sq | the residual norm (optional) |
status | the stopping status of the right hand sides (optional) |
stopped | whether all right hand sides have stopped (invalid if status is not provided) |
Reimplemented from gko::log::Logger.
|
overridevirtual |
Register the iteration_complete
event which logs every completed iterations.
it | the current iteration count |
r | the residual |
x | the solution vector (optional) |
tau | the residual norm (optional) |
Reimplemented from gko::log::Logger.
|
overridevirtual |
Register the iteration_complete
event which logs every completed iterations.
it | the current iteration count |
r | the residual |
x | the solution vector (optional) |
tau | the residual norm (optional) |
implicit_tau_sq | the implicit residual norm squared (optional) |
Reimplemented from gko::log::Logger.
|
overridevirtual |
LinOp's advanced apply completed event.
A | the system matrix |
alpha | scaling of the result of op(b) |
b | the input vector(s) |
beta | scaling of the input x |
x | the output vector(s) |
Reimplemented from gko::log::Logger.
|
overridevirtual |
LinOp's advanced apply started event.
A | the system matrix |
alpha | scaling of the result of op(b) |
b | the input vector(s) |
beta | scaling of the input x |
x | the output vector(s) |
Reimplemented from gko::log::Logger.
|
overridevirtual |
LinOp's apply completed event.
A | the system matrix |
b | the input vector(s) |
x | the output vector(s) |
Reimplemented from gko::log::Logger.
|
overridevirtual |
LinOp's apply started event.
A | the system matrix |
b | the input vector(s) |
x | the output vector(s) |
Reimplemented from gko::log::Logger.
|
overridevirtual |
LinOp Factory's generate completed event.
factory | the factory used |
input | the LinOp object used as input for the generation (usually a system matrix) |
output | the generated LinOp object |
Reimplemented from gko::log::Logger.
|
overridevirtual |
LinOp Factory's generate started event.
factory | the factory used |
input | the LinOp object used as input for the generation (usually a system matrix) |
Reimplemented from gko::log::Logger.
|
overridevirtual |
Executor's operation completed event (method run).
exec | the executor used |
op | the completed operation |
Reimplemented from gko::log::Logger.
|
overridevirtual |
Executor's operation launched event (method run).
exec | the executor used |
op | the operation launched |
Reimplemented from gko::log::Logger.
|
overridevirtual |
PolymorphicObject's copy completed event.
exec | the executor used |
input | the PolymorphicObject to be copied from |
output | the PolymorphicObject to be copied to |
Reimplemented from gko::log::Logger.
|
overridevirtual |
PolymorphicObject's copy started event.
exec | the executor used |
input | the PolymorphicObject to be copied from |
output | the PolymorphicObject to be copied to |
Reimplemented from gko::log::Logger.
|
overridevirtual |
PolymorphicObject's move completed event.
exec | the executor used |
input | the PolymorphicObject to be move from |
output | the PolymorphicObject to be move into |
Reimplemented from gko::log::Logger.
|
overridevirtual |
PolymorphicObject's move started event.
exec | the executor used |
input | the PolymorphicObject to be move from |
output | the PolymorphicObject to be move into |
Reimplemented from gko::log::Logger.
void gko::log::ProfilerHook::set_object_name | ( | ptr_param< const PolymorphicObject > | obj, |
std::string | name | ||
) |
Sets the name for an object to be profiled.
Every instance of that object in the profile will be replaced by the name instead of its runtime type.
obj | the object |
name | its name |
profiling_scope_guard gko::log::ProfilerHook::user_range | ( | const char * | name | ) | const |
Creates a scope guard for a user-defined range to be included in the profile.
name | the name of the range |