The ParILU convergence example..
This example depends on simple-solver.
Introduction
About the example
This example can be used to inspect the convergence behavior of parallel incomplete factorizations. *
The commented program
template <typename Function>
auto try_generate(Function fun) -> decltype(fun())
{
decltype(fun()) result;
try {
result = fun();
std::cerr <<
"Error: " << err.
what() <<
'\n';
std::exit(-1);
}
return result;
}
template <typename ValueType, typename IndexType>
double compute_ilu_residual_norm(
{
residual->
write(residual_data);
double residual_norm{};
for (
auto entry : residual_data.
nonzeros) {
auto ref_row = it->row;
auto ref_col = it->column;
if (entry.row == ref_row && entry.column == ref_col) {
++it;
}
}
return std::sqrt(residual_norm);
}
int main(int argc, char* argv[])
{
using ValueType = double;
using IndexType = int;
print usage message
if (argc < 2 || executors.find(argv[1]) == executors.end()) {
std::cerr << "Usage: executable"
<< " <reference|omp|cuda|hip|dpcpp> [<matrix-file>] "
"[<parilu|parilut|paric|parict] [<max-iterations>] "
"[<num-repetitions>] [<fill-in-limit>]\n";
return -1;
}
generate executor based on first argument
auto exec = try_generate([&] { return executors.at(argv[1])(); });
set matrix and preconditioner name with default values
std::string matrix = argc < 3 ? "data/A.mtx" : argv[2];
std::string precond = argc < 4 ? "parilu" : argv[3];
int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]);
int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]);
double limit = argc < 7 ? 2 : std::stod(argv[6]);
load matrix file into Csr format
std::ifstream mtx_stream{matrix};
if (!mtx_stream) {
throw GKO_STREAM_ERROR("Unable to open matrix file");
}
std::cerr << "Reading " << matrix << std::endl;
return gko::read<gko::matrix::Csr<ValueType, IndexType>>(mtx_stream,
exec);
}));
auto factory_generator =
if (precond == "parilu") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "paric") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "parilut") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else if (precond == "parict") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else {
GKO_NOT_IMPLEMENTED;
}
};
auto one = gko::initialize<gko::matrix::Dense<ValueType>>({1.0}, exec);
auto minus_one =
gko::initialize<gko::matrix::Dense<ValueType>>({-1.0}, exec);
for (int it = 1; it <= max_iterations; ++it) {
auto factory = factory_generator(it);
std::cout << it << ';';
std::vector<long> times;
std::vector<double> residuals;
for (int rep = 0; rep < num_repetitions; ++rep) {
auto tic = std::chrono::high_resolution_clock::now();
auto result =
gko::as<gko::Composition<ValueType>>(
factory->generate(mtx));
exec->synchronize();
auto toc = std::chrono::high_resolution_clock::now();
result->get_operators()[0]->apply(one, result->get_operators()[1],
minus_one, residual);
times.push_back(
std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic)
.count());
residuals.push_back(
compute_ilu_residual_norm(residual.get(), mtx.get()));
}
for (auto el : times) {
std::cout << el << ';';
}
for (auto el : residuals) {
std::cout << el << ';';
}
std::cout << '\n';
}
}
Results
This is the expected output:
Usage: executable <reference|omp|cuda|hip|dpcpp> [<matrix-file>] [<parilu|parilut|paric|parict] [<
max-iterations>] [<num-repetitions>] [fill-in-limit]
When specifying an executor:
Reading data/A.mtx
1;71800;10300;8800;8200;8000;7700;7500;7500;7500;7400;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;
2;15500;9100;13500;9000;8600;8800;8700;8600;8600;8500;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
3;16500;10200;10100;10100;9900;10000;9800;9800;9900;9900;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
4;17500;11500;11200;15600;11300;11200;11400;11200;11200;11100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
5;18800;12800;12700;12600;12500;12400;12400;12400;12400;14100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
6;19200;13400;23100;15400;13200;13000;13000;13000;13100;13000;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
7;20500;14500;14400;14200;14200;14300;14200;14100;14300;14200;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
8;21600;15700;86200;16300;15700;15600;15500;15400;15500;15600;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
9;22700;17000;16700;16600;16700;16800;20400;17400;17500;17400;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
10;25500;19000;18800;18700;18700;18800;18600;18700;18600;18700;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
Comments about programming and debugging
The plain program
#include <fstream>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <ginkgo/ginkgo.hpp>
const std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
executors{
{"reference", [] { return gko::ReferenceExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp", [] {
}}};
template <typename Function>
auto try_generate(Function fun) -> decltype(fun())
{
decltype(fun()) result;
try {
result = fun();
std::cerr <<
"Error: " << err.
what() <<
'\n';
std::exit(-1);
}
return result;
}
template <typename ValueType, typename IndexType>
double compute_ilu_residual_norm(
{
residual->
write(residual_data);
double residual_norm{};
for (
auto entry : residual_data.
nonzeros) {
auto ref_row = it->row;
auto ref_col = it->column;
if (entry.row == ref_row && entry.column == ref_col) {
++it;
}
}
return std::sqrt(residual_norm);
}
int main(int argc, char* argv[])
{
using ValueType = double;
using IndexType = int;
if (argc < 2 || executors.find(argv[1]) == executors.end()) {
std::cerr << "Usage: executable"
<< " <reference|omp|cuda|hip|dpcpp> [<matrix-file>] "
"[<parilu|parilut|paric|parict] [<max-iterations>] "
"[<num-repetitions>] [<fill-in-limit>]\n";
return -1;
}
auto exec = try_generate([&] { return executors.at(argv[1])(); });
std::string matrix = argc < 3 ? "data/A.mtx" : argv[2];
std::string precond = argc < 4 ? "parilu" : argv[3];
int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]);
int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]);
double limit = argc < 7 ? 2 : std::stod(argv[6]);
std::ifstream mtx_stream{matrix};
if (!mtx_stream) {
throw GKO_STREAM_ERROR("Unable to open matrix file");
}
std::cerr << "Reading " << matrix << std::endl;
return gko::read<gko::matrix::Csr<ValueType, IndexType>>(mtx_stream,
exec);
}));
auto factory_generator =
if (precond == "parilu") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "paric") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "parilut") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else if (precond == "parict") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else {
GKO_NOT_IMPLEMENTED;
}
};
auto one = gko::initialize<gko::matrix::Dense<ValueType>>({1.0}, exec);
auto minus_one =
gko::initialize<gko::matrix::Dense<ValueType>>({-1.0}, exec);
for (int it = 1; it <= max_iterations; ++it) {
auto factory = factory_generator(it);
std::cout << it << ';';
std::vector<long> times;
std::vector<double> residuals;
for (int rep = 0; rep < num_repetitions; ++rep) {
auto tic = std::chrono::high_resolution_clock::now();
auto result =
gko::as<gko::Composition<ValueType>>(
factory->generate(mtx));
exec->synchronize();
auto toc = std::chrono::high_resolution_clock::now();
result->get_operators()[0]->apply(one, result->get_operators()[1],
minus_one, residual);
times.push_back(
std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic)
.count());
residuals.push_back(
compute_ilu_residual_norm(residual.get(), mtx.get()));
}
for (auto el : times) {
std::cout << el << ';';
}
for (auto el : residuals) {
std::cout << el << ';';
}
std::cout << '\n';
}
}