opencv/samples/gpu/performance/performance.cpp
Vladislav Vinogradov fd88654b45 replaced GPU -> CUDA
2013-09-02 14:00:44 +04:00

227 lines
5.6 KiB
C++

#include <iomanip>
#include <stdexcept>
#include <string>
#include "performance.h"
using namespace std;
using namespace cv;
using namespace cv::cuda;
void TestSystem::run()
{
if (is_list_mode_)
{
for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
cout << (*it)->name() << endl;
return;
}
// Run test initializers
for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)
{
if ((*it)->name().find(test_filter_, 0) != string::npos)
(*it)->run();
}
printHeading();
// Run tests
for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
{
try
{
if ((*it)->name().find(test_filter_, 0) != string::npos)
{
cout << endl << (*it)->name() << ":\n";
(*it)->run();
finishCurrentSubtest();
}
}
catch (const Exception&)
{
// Message is printed via callback
resetCurrentSubtest();
}
catch (const runtime_error& e)
{
printError(e.what());
resetCurrentSubtest();
}
}
printSummary();
}
void TestSystem::finishCurrentSubtest()
{
if (cur_subtest_is_empty_)
// There is no need to print subtest statistics
return;
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
speedup_total_ += speedup;
printMetrics(cpu_time, gpu_time, speedup);
num_subtests_called_++;
resetCurrentSubtest();
}
double TestSystem::meanTime(const vector<int64> &samples)
{
double sum = accumulate(samples.begin(), samples.end(), 0.);
if (samples.size() > 1)
return (sum - samples[0]) / (samples.size() - 1);
return sum;
}
void TestSystem::printHeading()
{
cout << endl;
cout << setiosflags(ios_base::left);
cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
<< setw(14) << "SPEEDUP"
<< "DESCRIPTION\n";
cout << resetiosflags(ios_base::left);
}
void TestSystem::printSummary()
{
cout << setiosflags(ios_base::fixed);
cout << "\naverage GPU speedup: x"
<< setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
<< endl;
cout << resetiosflags(ios_base::fixed);
}
void TestSystem::printMetrics(double cpu_time, double gpu_time, double speedup)
{
cout << TAB << setiosflags(ios_base::left);
stringstream stream;
stream << cpu_time;
cout << setw(10) << stream.str();
stream.str("");
stream << gpu_time;
cout << setw(10) << stream.str();
stream.str("");
stream << "x" << setprecision(3) << speedup;
cout << setw(14) << stream.str();
cout << cur_subtest_description_.str();
cout << resetiosflags(ios_base::left) << endl;
}
void TestSystem::printError(const std::string& msg)
{
cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
}
void gen(Mat& mat, int rows, int cols, int type, Scalar low, Scalar high)
{
mat.create(rows, cols, type);
RNG rng(0);
rng.fill(mat, RNG::UNIFORM, low, high);
}
string abspath(const string& relpath)
{
return TestSystem::instance().workingDir() + relpath;
}
static int cvErrorCallback(int /*status*/, const char* /*func_name*/,
const char* err_msg, const char* /*file_name*/,
int /*line*/, void* /*userdata*/)
{
TestSystem::instance().printError(err_msg);
return 0;
}
int main(int argc, const char* argv[])
{
int num_devices = getCudaEnabledDeviceCount();
if (num_devices == 0)
{
cerr << "No GPU found or the library was compiled without CUDA support";
return -1;
}
redirectError(cvErrorCallback);
const char* keys =
"{ h help | | print help message }"
"{ f filter | | filter for test }"
"{ w workdir | | set working directory }"
"{ l list | | show all tests }"
"{ d device | 0 | device id }"
"{ i iters | 10 | iteration count }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help") || !cmd.check())
{
cmd.printMessage();
cmd.printErrors();
return 0;
}
int device = cmd.get<int>("device");
if (device < 0 || device >= num_devices)
{
cerr << "Invalid device ID" << endl;
return -1;
}
DeviceInfo dev_info(device);
if (!dev_info.isCompatible())
{
cerr << "CUDA module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
return -1;
}
setDevice(device);
printShortCudaDeviceInfo(device);
string filter = cmd.get<string>("filter");
string workdir = cmd.get<string>("workdir");
bool list = cmd.has("list");
int iters = cmd.get<int>("iters");
if (!filter.empty())
TestSystem::instance().setTestFilter(filter);
if (!workdir.empty())
{
if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\')
workdir += '/';
TestSystem::instance().setWorkingDir(workdir);
}
if (list)
TestSystem::instance().setListMode(true);
TestSystem::instance().setNumIters(iters);
cout << "\nNote: the timings for GPU don't include data transfer" << endl;
TestSystem::instance().run();
return 0;
}