mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 22:44:02 +08:00
Merge pull request #11244 from alalek:cuda_samples_drop_performance
This commit is contained in:
commit
b6d45b9743
@ -60,4 +60,3 @@ foreach(sample_filename ${all_samples})
|
||||
ocv_target_link_libraries(${tgt} opencv_cudacodec)
|
||||
endif()
|
||||
endforeach()
|
||||
include("performance/CMakeLists.txt" OPTIONAL)
|
||||
|
@ -1,37 +0,0 @@
|
||||
set(the_target "example_gpu_performance")
|
||||
|
||||
file(GLOB sources "performance/*.cpp")
|
||||
file(GLOB headers "performance/*.h")
|
||||
|
||||
if(HAVE_opencv_xfeatures2d)
|
||||
ocv_include_modules_recurse(opencv_xfeatures2d)
|
||||
endif()
|
||||
|
||||
if(HAVE_opencv_bgsegm)
|
||||
ocv_include_modules_recurse(opencv_bgsegm)
|
||||
endif()
|
||||
|
||||
add_executable(${the_target} ${sources} ${headers})
|
||||
ocv_target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS})
|
||||
|
||||
if(HAVE_opencv_xfeatures2d)
|
||||
ocv_target_link_libraries(${the_target} opencv_xfeatures2d)
|
||||
endif()
|
||||
|
||||
if(HAVE_opencv_bgsegm)
|
||||
ocv_target_link_libraries(${the_target} opencv_bgsegm)
|
||||
endif()
|
||||
|
||||
set_target_properties(${the_target} PROPERTIES
|
||||
OUTPUT_NAME "performance_gpu"
|
||||
PROJECT_LABEL "(EXAMPLE_CUDA) performance")
|
||||
|
||||
if(ENABLE_SOLUTION_FOLDERS)
|
||||
set_target_properties(${the_target} PROPERTIES FOLDER "samples//gpu")
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/gpu" COMPONENT samples)
|
||||
endif()
|
||||
|
||||
ocv_install_example_src("gpu/performance" performance/*.cpp performance/*.h)
|
@ -1,227 +0,0 @@
|
||||
#include <iomanip>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include "performance.h"
|
||||
#include "opencv2/core/cuda.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
using namespace cv::cuda;
|
||||
|
||||
void TestSystem::run()
|
||||
{
|
||||
if (is_list_mode_)
|
||||
{
|
||||
for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
|
||||
cout << (*it)->name() << endl;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Run test initializers
|
||||
for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)
|
||||
{
|
||||
if ((*it)->name().find(test_filter_, 0) != string::npos)
|
||||
(*it)->run();
|
||||
}
|
||||
|
||||
printHeading();
|
||||
|
||||
// Run tests
|
||||
for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
|
||||
{
|
||||
try
|
||||
{
|
||||
if ((*it)->name().find(test_filter_, 0) != string::npos)
|
||||
{
|
||||
cout << endl << (*it)->name() << ":\n";
|
||||
(*it)->run();
|
||||
finishCurrentSubtest();
|
||||
}
|
||||
}
|
||||
catch (const Exception&)
|
||||
{
|
||||
// Message is printed via callback
|
||||
resetCurrentSubtest();
|
||||
}
|
||||
catch (const runtime_error& e)
|
||||
{
|
||||
printError(e.what());
|
||||
resetCurrentSubtest();
|
||||
}
|
||||
}
|
||||
|
||||
printSummary();
|
||||
}
|
||||
|
||||
|
||||
void TestSystem::finishCurrentSubtest()
|
||||
{
|
||||
if (cur_subtest_is_empty_)
|
||||
// There is no need to print subtest statistics
|
||||
return;
|
||||
|
||||
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
|
||||
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
|
||||
|
||||
double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
|
||||
speedup_total_ += speedup;
|
||||
|
||||
printMetrics(cpu_time, gpu_time, speedup);
|
||||
|
||||
num_subtests_called_++;
|
||||
resetCurrentSubtest();
|
||||
}
|
||||
|
||||
|
||||
double TestSystem::meanTime(const vector<int64> &samples)
|
||||
{
|
||||
double sum = accumulate(samples.begin(), samples.end(), 0.);
|
||||
if (samples.size() > 1)
|
||||
return (sum - samples[0]) / (samples.size() - 1);
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
void TestSystem::printHeading()
|
||||
{
|
||||
cout << endl;
|
||||
cout << setiosflags(ios_base::left);
|
||||
cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
|
||||
<< setw(14) << "SPEEDUP"
|
||||
<< "DESCRIPTION\n";
|
||||
cout << resetiosflags(ios_base::left);
|
||||
}
|
||||
|
||||
|
||||
void TestSystem::printSummary()
|
||||
{
|
||||
cout << setiosflags(ios_base::fixed);
|
||||
cout << "\naverage GPU speedup: x"
|
||||
<< setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
|
||||
<< endl;
|
||||
cout << resetiosflags(ios_base::fixed);
|
||||
}
|
||||
|
||||
|
||||
void TestSystem::printMetrics(double cpu_time, double gpu_time, double speedup)
|
||||
{
|
||||
cout << TAB << setiosflags(ios_base::left);
|
||||
stringstream stream;
|
||||
|
||||
stream << cpu_time;
|
||||
cout << setw(10) << stream.str();
|
||||
|
||||
stream.str("");
|
||||
stream << gpu_time;
|
||||
cout << setw(10) << stream.str();
|
||||
|
||||
stream.str("");
|
||||
stream << "x" << setprecision(3) << speedup;
|
||||
cout << setw(14) << stream.str();
|
||||
|
||||
cout << cur_subtest_description_.str();
|
||||
cout << resetiosflags(ios_base::left) << endl;
|
||||
}
|
||||
|
||||
|
||||
void TestSystem::printError(const std::string& msg)
|
||||
{
|
||||
cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
|
||||
}
|
||||
|
||||
|
||||
void gen(Mat& mat, int rows, int cols, int type, Scalar low, Scalar high)
|
||||
{
|
||||
mat.create(rows, cols, type);
|
||||
RNG rng(0);
|
||||
rng.fill(mat, RNG::UNIFORM, low, high);
|
||||
}
|
||||
|
||||
|
||||
string abspath(const string& relpath)
|
||||
{
|
||||
return TestSystem::instance().workingDir() + relpath;
|
||||
}
|
||||
|
||||
|
||||
static int cvErrorCallback(int /*status*/, const char* /*func_name*/,
|
||||
const char* err_msg, const char* /*file_name*/,
|
||||
int /*line*/, void* /*userdata*/)
|
||||
{
|
||||
TestSystem::instance().printError(err_msg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char* argv[])
|
||||
{
|
||||
int num_devices = getCudaEnabledDeviceCount();
|
||||
if (num_devices == 0)
|
||||
{
|
||||
cerr << "No GPU found or the library was compiled without CUDA support";
|
||||
return -1;
|
||||
}
|
||||
|
||||
redirectError(cvErrorCallback);
|
||||
|
||||
const char* keys =
|
||||
"{ h help | | print help message }"
|
||||
"{ f filter | | filter for test }"
|
||||
"{ w workdir | | set working directory }"
|
||||
"{ l list | | show all tests }"
|
||||
"{ d device | 0 | device id }"
|
||||
"{ i iters | 10 | iteration count }";
|
||||
|
||||
CommandLineParser cmd(argc, argv, keys);
|
||||
|
||||
if (cmd.has("help") || !cmd.check())
|
||||
{
|
||||
cmd.printMessage();
|
||||
cmd.printErrors();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int device = cmd.get<int>("device");
|
||||
if (device < 0 || device >= num_devices)
|
||||
{
|
||||
cerr << "Invalid device ID" << endl;
|
||||
return -1;
|
||||
}
|
||||
DeviceInfo dev_info(device);
|
||||
if (!dev_info.isCompatible())
|
||||
{
|
||||
cerr << "CUDA module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
|
||||
return -1;
|
||||
}
|
||||
setDevice(device);
|
||||
printShortCudaDeviceInfo(device);
|
||||
|
||||
string filter = cmd.get<string>("filter");
|
||||
string workdir = cmd.get<string>("workdir");
|
||||
bool list = cmd.has("list");
|
||||
int iters = cmd.get<int>("iters");
|
||||
|
||||
if (!filter.empty())
|
||||
TestSystem::instance().setTestFilter(filter);
|
||||
|
||||
if (!workdir.empty())
|
||||
{
|
||||
if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\')
|
||||
workdir += '/';
|
||||
|
||||
TestSystem::instance().setWorkingDir(workdir);
|
||||
}
|
||||
|
||||
if (list)
|
||||
TestSystem::instance().setListMode(true);
|
||||
|
||||
TestSystem::instance().setNumIters(iters);
|
||||
|
||||
cout << "\nNote: the timings for GPU don't include data transfer" << endl;
|
||||
|
||||
TestSystem::instance().run();
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,188 +0,0 @@
|
||||
#ifndef OPENCV_CUDA_SAMPLE_PERFORMANCE_H_
|
||||
#define OPENCV_CUDA_SAMPLE_PERFORMANCE_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <opencv2/core/utility.hpp>
|
||||
|
||||
#define TAB " "
|
||||
|
||||
class Runnable
|
||||
{
|
||||
public:
|
||||
explicit Runnable(const std::string& nameStr): name_(nameStr) {}
|
||||
virtual ~Runnable() {}
|
||||
|
||||
const std::string& name() const { return name_; }
|
||||
|
||||
virtual void run() = 0;
|
||||
|
||||
private:
|
||||
std::string name_;
|
||||
};
|
||||
|
||||
|
||||
class TestSystem
|
||||
{
|
||||
public:
|
||||
static TestSystem& instance()
|
||||
{
|
||||
static TestSystem me;
|
||||
return me;
|
||||
}
|
||||
|
||||
void setWorkingDir(const std::string& val) { working_dir_ = val; }
|
||||
const std::string& workingDir() const { return working_dir_; }
|
||||
|
||||
void setTestFilter(const std::string& val) { test_filter_ = val; }
|
||||
const std::string& testFilter() const { return test_filter_; }
|
||||
|
||||
void setNumIters(int num_iters) { num_iters_ = num_iters; }
|
||||
|
||||
void addInit(Runnable* init) { inits_.push_back(init); }
|
||||
void addTest(Runnable* test) { tests_.push_back(test); }
|
||||
void run();
|
||||
|
||||
// It's public because OpenCV callback uses it
|
||||
void printError(const std::string& msg);
|
||||
|
||||
std::stringstream& startNewSubtest()
|
||||
{
|
||||
finishCurrentSubtest();
|
||||
return cur_subtest_description_;
|
||||
}
|
||||
|
||||
bool stop() const { return cur_iter_idx_ >= num_iters_; }
|
||||
|
||||
void cpuOn() { cpu_started_ = cv::getTickCount(); }
|
||||
void cpuOff()
|
||||
{
|
||||
int64 delta = cv::getTickCount() - cpu_started_;
|
||||
cpu_times_.push_back(delta);
|
||||
++cur_iter_idx_;
|
||||
}
|
||||
void cpuComplete()
|
||||
{
|
||||
cpu_elapsed_ += meanTime(cpu_times_);
|
||||
cur_subtest_is_empty_ = false;
|
||||
cur_iter_idx_ = 0;
|
||||
}
|
||||
|
||||
void gpuOn() { gpu_started_ = cv::getTickCount(); }
|
||||
void gpuOff()
|
||||
{
|
||||
int64 delta = cv::getTickCount() - gpu_started_;
|
||||
gpu_times_.push_back(delta);
|
||||
++cur_iter_idx_;
|
||||
}
|
||||
void gpuComplete()
|
||||
{
|
||||
gpu_elapsed_ += meanTime(gpu_times_);
|
||||
cur_subtest_is_empty_ = false;
|
||||
cur_iter_idx_ = 0;
|
||||
}
|
||||
|
||||
bool isListMode() const { return is_list_mode_; }
|
||||
void setListMode(bool value) { is_list_mode_ = value; }
|
||||
|
||||
private:
|
||||
TestSystem():
|
||||
cur_subtest_is_empty_(true), cpu_elapsed_(0),
|
||||
gpu_elapsed_(0), speedup_total_(0.0),
|
||||
num_subtests_called_(0), is_list_mode_(false),
|
||||
num_iters_(10), cur_iter_idx_(0)
|
||||
{
|
||||
cpu_times_.reserve(num_iters_);
|
||||
gpu_times_.reserve(num_iters_);
|
||||
}
|
||||
|
||||
void finishCurrentSubtest();
|
||||
void resetCurrentSubtest()
|
||||
{
|
||||
cpu_elapsed_ = 0;
|
||||
gpu_elapsed_ = 0;
|
||||
cur_subtest_description_.str("");
|
||||
cur_subtest_is_empty_ = true;
|
||||
cur_iter_idx_ = 0;
|
||||
cpu_times_.clear();
|
||||
gpu_times_.clear();
|
||||
}
|
||||
|
||||
double meanTime(const std::vector<int64> &samples);
|
||||
|
||||
void printHeading();
|
||||
void printSummary();
|
||||
void printMetrics(double cpu_time, double gpu_time, double speedup);
|
||||
|
||||
std::string working_dir_;
|
||||
std::string test_filter_;
|
||||
|
||||
std::vector<Runnable*> inits_;
|
||||
std::vector<Runnable*> tests_;
|
||||
|
||||
std::stringstream cur_subtest_description_;
|
||||
bool cur_subtest_is_empty_;
|
||||
|
||||
int64 cpu_started_;
|
||||
int64 gpu_started_;
|
||||
double cpu_elapsed_;
|
||||
double gpu_elapsed_;
|
||||
|
||||
double speedup_total_;
|
||||
int num_subtests_called_;
|
||||
|
||||
bool is_list_mode_;
|
||||
|
||||
int num_iters_;
|
||||
int cur_iter_idx_;
|
||||
std::vector<int64> cpu_times_;
|
||||
std::vector<int64> gpu_times_;
|
||||
};
|
||||
|
||||
|
||||
#define GLOBAL_INIT(name) \
|
||||
struct name##_init: Runnable { \
|
||||
name##_init(): Runnable(#name) { \
|
||||
TestSystem::instance().addInit(this); \
|
||||
} \
|
||||
void run(); \
|
||||
} name##_init_instance; \
|
||||
void name##_init::run()
|
||||
|
||||
|
||||
#define TEST(name) \
|
||||
struct name##_test: Runnable { \
|
||||
name##_test(): Runnable(#name) { \
|
||||
TestSystem::instance().addTest(this); \
|
||||
} \
|
||||
void run(); \
|
||||
} name##_test_instance; \
|
||||
void name##_test::run()
|
||||
|
||||
#define SUBTEST TestSystem::instance().startNewSubtest()
|
||||
|
||||
#define CPU_ON \
|
||||
while (!TestSystem::instance().stop()) { \
|
||||
TestSystem::instance().cpuOn()
|
||||
#define CPU_OFF \
|
||||
TestSystem::instance().cpuOff(); \
|
||||
} TestSystem::instance().cpuComplete()
|
||||
|
||||
#define CUDA_ON \
|
||||
while (!TestSystem::instance().stop()) { \
|
||||
TestSystem::instance().gpuOn()
|
||||
#define CUDA_OFF \
|
||||
TestSystem::instance().gpuOff(); \
|
||||
} TestSystem::instance().gpuComplete()
|
||||
|
||||
// Generates a matrix
|
||||
void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low,
|
||||
cv::Scalar high);
|
||||
|
||||
// Returns abs path taking into account test system working dir
|
||||
std::string abspath(const std::string& relpath);
|
||||
|
||||
#endif // OPENCV_CUDA_SAMPLE_PERFORMANCE_H_
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user