The preconditioned solver example..
This example depends on preconditioned-solver.
 
  This example shows how to use the multigrid preconditioner.
In this example, we first read in a matrix from a file. The preconditioned CG solver is enhanced with a multigrid preconditioner. The example features the generating time and runtime of the CG solver.
 
The commented program
{"cuda",
 [] {
 }},
{"hip",
 [] {
 }},
{"dpcpp",
 [] {
         0, gko::ReferenceExecutor::create());
 }},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
executor where Ginkgo will perform the computation
const auto exec = exec_map.at(executor_string)();  
Read data
auto A = 
share(gko::read<mtx>(std::ifstream(
"data/A.mtx"), exec));
 
Create RHS as 1 and initial guess as 0
auto host_x = vec::create(exec->get_master(), 
gko::dim<2>(size, 1));
 
auto host_b = vec::create(exec->get_master(), 
gko::dim<2>(size, 1));
 
for (auto i = 0; i < size; i++) {
    host_x->at(i, 0) = 0.;
    host_b->at(i, 0) = 1.;
}
auto x = vec::create(exec);
auto b = vec::create(exec);
x->copy_from(host_x);
b->copy_from(host_b);
 Calculate initial residual by overwriting b
auto one = gko::initialize<vec>({1.0}, exec);
 
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto initres = gko::initialize<vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(initres);
copy b again
Create multigrid factory
std::shared_ptr<gko::LinOpFactory> multigrid_gen;
multigrid_gen =
    mg::build()
        .with_mg_level(pgm::build().with_deterministic(true))
        .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
        .on(exec);
auto solver_gen =
    cg::build()
        .with_criteria(gko::stop::Iteration::build().with_max_iters(100u),
                           .with_baseline(gko::stop::mode::absolute)
                           .with_reduction_factor(tolerance))
        .with_preconditioner(multigrid_gen)
        .on(exec);
Create solver
std::chrono::nanoseconds gen_time(0);
auto gen_tic = std::chrono::steady_clock::now();
auto solver = solver_gen->generate(A);
 
exec->synchronize();
auto gen_toc = std::chrono::steady_clock::now();
gen_time +=
    std::chrono::duration_cast<std::chrono::nanoseconds>(gen_toc - gen_tic);
Add logger
std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
Solve system
exec->synchronize();
std::chrono::nanoseconds time(0);
auto tic = std::chrono::steady_clock::now();
exec->synchronize();
auto toc = std::chrono::steady_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
Calculate residual
auto res = gko::as<vec>(logger->get_residual_norm());
 
std::cout << "Initial residual norm sqrt(r^T r): \n";
write(std::cout, initres);
 
std::cout << "Final residual norm sqrt(r^T r): \n";
Print solver statistics
    std::cout << "CG iteration count:     " << logger->get_num_iterations()
              << std::endl;
    std::cout << "CG generation time [ms]: "
              << static_cast<double>(gen_time.count()) / 1000000.0 << std::endl;
    std::cout << "CG execution time [ms]: "
              << static_cast<double>(time.count()) / 1000000.0 << std::endl;
    std::cout << "CG execution time per iteration[ms]: "
              << static_cast<double>(time.count()) / 1000000.0 /
                     logger->get_num_iterations()
              << std::endl;
}
 
Results
This is the expected output:
Initial residual norm sqrt(r^T r):
1 1
4.3589
Final residual norm sqrt(r^T r):
1 1
1.69858e-09
CG iteration count:     39
CG generation time [ms]: 2.04293
CG execution time [ms]: 22.3874
CG execution time per iteration[ms]: 0.574036
Comments about programming and debugging 
 
The plain program
 
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <string>
 
#include <ginkgo/ginkgo.hpp>
 
 
int main(int argc, char* argv[])
{
    using ValueType = double;
    using IndexType = int;
 
 
    const auto executor_string = argc >= 2 ? argv[1] : "reference";
    std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
        exec_map{
            {"cuda",
             [] {
             }},
            {"hip",
             [] {
             }},
            {"dpcpp",
             [] {
                     0, gko::ReferenceExecutor::create());
             }},
            {"reference", [] { return gko::ReferenceExecutor::create(); }}};
 
    const auto exec = exec_map.at(executor_string)();  
 
    auto A = 
share(gko::read<mtx>(std::ifstream(
"data/A.mtx"), exec));
 
    auto host_x = vec::create(exec->get_master(), 
gko::dim<2>(size, 1));
 
    auto host_b = vec::create(exec->get_master(), 
gko::dim<2>(size, 1));
 
    for (auto i = 0; i < size; i++) {
        host_x->at(i, 0) = 0.;
        host_b->at(i, 0) = 1.;
    }
    auto x = vec::create(exec);
    auto b = vec::create(exec);
    x->copy_from(host_x);
    b->copy_from(host_b);
 
    auto one = gko::initialize<vec>({1.0}, exec);
 
    auto neg_one = gko::initialize<vec>({-1.0}, exec);
    auto initres = gko::initialize<vec>({0.0}, exec);
    A->apply(one, x, neg_one, b);
    b->compute_norm2(initres);
 
    b->copy_from(host_b);
 
    std::shared_ptr<gko::LinOpFactory> multigrid_gen;
    multigrid_gen =
        mg::build()
            .with_mg_level(pgm::build().with_deterministic(true))
            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
            .on(exec);
    auto solver_gen =
        cg::build()
            .with_criteria(gko::stop::Iteration::build().with_max_iters(100u),
                               .with_baseline(gko::stop::mode::absolute)
                               .with_reduction_factor(tolerance))
            .with_preconditioner(multigrid_gen)
            .on(exec);
    std::chrono::nanoseconds gen_time(0);
    auto gen_tic = std::chrono::steady_clock::now();
    auto solver = solver_gen->generate(A);
 
    exec->synchronize();
    auto gen_toc = std::chrono::steady_clock::now();
    gen_time +=
        std::chrono::duration_cast<std::chrono::nanoseconds>(gen_toc - gen_tic);
 
    std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
 
    exec->synchronize();
    std::chrono::nanoseconds time(0);
    auto tic = std::chrono::steady_clock::now();
    exec->synchronize();
    auto toc = std::chrono::steady_clock::now();
    time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
 
    auto res = gko::as<vec>(logger->get_residual_norm());
 
    std::cout << "Initial residual norm sqrt(r^T r): \n";
    write(std::cout, initres);
 
    std::cout << "Final residual norm sqrt(r^T r): \n";
 
    std::cout << "CG iteration count:     " << logger->get_num_iterations()
              << std::endl;
    std::cout << "CG generation time [ms]: "
              << static_cast<double>(gen_time.count()) / 1000000.0 << std::endl;
    std::cout << "CG execution time [ms]: "
              << static_cast<double>(time.count()) / 1000000.0 << std::endl;
    std::cout << "CG execution time per iteration[ms]: "
              << static_cast<double>(time.count()) / 1000000.0 /
                     logger->get_num_iterations()
              << std::endl;
}