Updated the GPU performance sample

This commit is contained in:
Alexey Spizhevoy 2012-02-16 13:08:22 +00:00
parent c908c50131
commit 630d874e03
3 changed files with 45 additions and 35 deletions

View File

@ -60,14 +60,10 @@ void TestSystem::finishCurrentSubtest()
// There is no need to print subtest statistics
return;
//int cpu_time = static_cast<int>(cpu_elapsed_ / getTickFrequency() * 1000.0);
//int gpu_time = static_cast<int>(gpu_elapsed_ / getTickFrequency() * 1000.0);
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
double speedup = static_cast<double>(cpu_elapsed_) /
std::max((int64)1, gpu_elapsed_);
double speedup = static_cast<double>(cpu_elapsed_) / std::max((int64)1, gpu_elapsed_);
speedup_total_ += speedup;
printMetrics(cpu_time, gpu_time, speedup);
@ -77,6 +73,15 @@ void TestSystem::finishCurrentSubtest()
}
double TestSystem::meanTime(const vector<int64> &samples)
{
double sum = accumulate(samples.begin(), samples.end(), 0.);
if (samples.size() > 1)
return (sum - samples[0]) / (samples.size() - 1);
return sum;
}
void TestSystem::printHeading()
{
cout << endl;
@ -210,7 +215,7 @@ int main(int argc, const char* argv[])
if (list)
TestSystem::instance().setListMode(true);
TestSystem::instance().setIters(iters);
TestSystem::instance().setNumIters(iters);
TestSystem::instance().run();

View File

@ -41,7 +41,7 @@ public:
void setTestFilter(const std::string& val) { test_filter_ = val; }
const std::string& testFilter() const { return test_filter_; }
void setIters(int iters) { iters_ = iters; }
void setNumIters(int num_iters) { num_iters_ = num_iters; }
void addInit(Runnable* init) { inits_.push_back(init); }
void addTest(Runnable* test) { tests_.push_back(test); }
@ -56,21 +56,20 @@ public:
return cur_subtest_description_;
}
bool stop() const { return it_ >= iters_; }
bool stop() const { return cur_iter_idx_ >= num_iters_; }
void cpuOn() { cpu_started_ = cv::getTickCount(); }
void cpuOff()
{
int64 delta = cv::getTickCount() - cpu_started_;
cpu_times_.push_back(delta);
++it_;
++cur_iter_idx_;
}
void cpuComplete()
{
double delta_mean = std::accumulate(cpu_times_.begin(), cpu_times_.end(), 0.0) / iters_;
cpu_elapsed_ += delta_mean;
cpu_elapsed_ += meanTime(cpu_times_);
cur_subtest_is_empty_ = false;
it_ = 0;
cur_iter_idx_ = 0;
}
void gpuOn() { gpu_started_ = cv::getTickCount(); }
@ -78,30 +77,28 @@ public:
{
int64 delta = cv::getTickCount() - gpu_started_;
gpu_times_.push_back(delta);
++it_;
++cur_iter_idx_;
}
void gpuComplete()
{
double delta_mean = std::accumulate(gpu_times_.begin(), gpu_times_.end(), 0.0) / iters_;
gpu_elapsed_ += delta_mean;
gpu_elapsed_ += meanTime(gpu_times_);
cur_subtest_is_empty_ = false;
it_ = 0;
cur_iter_idx_ = 0;
}
bool isListMode() const { return is_list_mode_; }
void setListMode(bool value) { is_list_mode_ = value; }
private:
TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0),
gpu_elapsed_(0), speedup_total_(0.0),
num_subtests_called_(0),
is_list_mode_(false)
TestSystem():
cur_subtest_is_empty_(true), cpu_elapsed_(0),
gpu_elapsed_(0), speedup_total_(0.0),
num_subtests_called_(0), is_list_mode_(false),
num_iters_(10), cur_iter_idx_(0)
{
iters_ = 10;
it_ = 0;
cpu_times_.reserve(iters_);
gpu_times_.reserve(iters_);
}
cpu_times_.reserve(num_iters_);
gpu_times_.reserve(num_iters_);
}
void finishCurrentSubtest();
void resetCurrentSubtest()
@ -110,11 +107,13 @@ private:
gpu_elapsed_ = 0;
cur_subtest_description_.str("");
cur_subtest_is_empty_ = true;
it_ = 0;
cur_iter_idx_ = 0;
cpu_times_.clear();
gpu_times_.clear();
}
double meanTime(const std::vector<int64> &samples);
void printHeading();
void printSummary();
void printMetrics(double cpu_time, double gpu_time, double speedup);
@ -136,8 +135,8 @@ private:
bool is_list_mode_;
int iters_;
int it_;
int num_iters_;
int cur_iter_idx_;
std::vector<int64> cpu_times_;
std::vector<int64> gpu_times_;
};
@ -164,13 +163,21 @@ private:
#define SUBTEST TestSystem::instance().startNewSubtest()
#define CPU_ON while (!TestSystem::instance().stop()) { TestSystem::instance().cpuOn()
#define CPU_OFF TestSystem::instance().cpuOff(); } TestSystem::instance().cpuComplete()
#define CPU_ON \
while (!TestSystem::instance().stop()) { \
TestSystem::instance().cpuOn()
#define CPU_OFF \
TestSystem::instance().cpuOff(); \
} TestSystem::instance().cpuComplete()
#define GPU_ON while (!TestSystem::instance().stop()) { TestSystem::instance().gpuOn()
#define GPU_OFF TestSystem::instance().gpuOff(); } TestSystem::instance().gpuComplete()
#define GPU_ON \
while (!TestSystem::instance().stop()) { \
TestSystem::instance().gpuOn()
#define GPU_OFF \
TestSystem::instance().gpuOff(); \
} TestSystem::instance().gpuComplete()
// Generates matrix
// Generates a matrix
void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low,
cv::Scalar high);

View File

@ -1203,13 +1203,11 @@ TEST(FarnebackOpticalFlow)
calc.flags |= useGaussianBlur ? OPTFLOW_FARNEBACK_GAUSSIAN : 0;
gpu::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy;
calc(d_frame0, d_frame1, d_flowx, d_flowy);
GPU_ON;
calc(d_frame0, d_frame1, d_flowx, d_flowy);
GPU_OFF;
Mat flow;
calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags);
CPU_ON;
calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags);
CPU_OFF;