diff --git a/samples/tapi/CMakeLists.txt b/samples/tapi/CMakeLists.txt
index 4cfb5805bd..f3aa17461a 100644
--- a/samples/tapi/CMakeLists.txt
+++ b/samples/tapi/CMakeLists.txt
@@ -1,4 +1,4 @@
-SET(OPENCV_TAPI_SAMPLES_REQUIRED_DEPS opencv_core opencv_imgproc opencv_video opencv_highgui)
+SET(OPENCV_TAPI_SAMPLES_REQUIRED_DEPS opencv_core opencv_imgproc opencv_video opencv_highgui opencv_objdetect opencv_features2d opencv_calib3d opencv_nonfree opencv_flann)
 
 ocv_check_dependencies(${OPENCV_TAPI_SAMPLES_REQUIRED_DEPS})
 
diff --git a/samples/tapi/bgfg_segm.cpp b/samples/tapi/bgfg_segm.cpp
new file mode 100644
index 0000000000..2fa12bba9d
--- /dev/null
+++ b/samples/tapi/bgfg_segm.cpp
@@ -0,0 +1,122 @@
+#include <iostream>
+#include <string>
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/ocl.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/video.hpp"
+
+using namespace std;
+using namespace cv;
+
+#define M_MOG  1
+#define M_MOG2 2
+
+int main(int argc, const char** argv)
+{
+    CommandLineParser cmd(argc, argv,
+        "{ c camera   | false       | use camera }"
+        "{ f file     | 768x576.avi | input video file }"
+        "{ t type     | mog         | method's type (mog, mog2) }"
+        "{ h help     | false       | print help message }"
+        "{ m cpu_mode | false       | press 'm' to switch OpenCL<->CPU}");
+
+    if (cmd.has("help"))
+    {
+        cout << "Usage : bgfg_segm [options]" << endl;
+        cout << "Available options:" << endl;
+        cmd.printMessage();
+        return EXIT_SUCCESS;
+    }
+
+    bool useCamera = cmd.has("camera");
+    string file = cmd.get<string>("file");
+    string method = cmd.get<string>("type");
+
+    if (method != "mog" && method != "mog2")
+    {
+        cerr << "Incorrect method" << endl;
+        return EXIT_FAILURE;
+    }
+
+    int m = method == "mog" ? M_MOG : M_MOG2;
+
+    VideoCapture cap;
+    if (useCamera)
+        cap.open(0);
+    else
+        cap.open(file);
+
+    if (!cap.isOpened())
+    {
+        cout << "can not open camera or video file" << endl;
+        return EXIT_FAILURE;
+    }
+
+    UMat frame, fgmask, fgimg;
+    cap >> frame;
+    fgimg.create(frame.size(), frame.type());
+
+    Ptr<BackgroundSubtractorMOG> mog = createBackgroundSubtractorMOG();
+    Ptr<BackgroundSubtractorMOG2> mog2 = createBackgroundSubtractorMOG2();
+
+    switch (m)
+    {
+    case M_MOG:
+        mog->apply(frame, fgmask, 0.01f);
+        break;
+
+    case M_MOG2:
+        mog2->apply(frame, fgmask);
+        break;
+    }
+    bool running=true;
+    for (;;)
+    {
+        if(!running)
+            break;
+        cap >> frame;
+        if (frame.empty())
+            break;
+
+        int64 start = getTickCount();
+
+        //update the model
+        switch (m)
+        {
+        case M_MOG:
+            mog->apply(frame, fgmask, 0.01f);
+            break;
+
+        case M_MOG2:
+            mog2->apply(frame, fgmask);
+            break;
+        }
+
+        double fps = getTickFrequency() / (getTickCount() - start);
+        std::cout << "FPS : " << fps << std::endl;
+        std::cout << fgimg.size() << std::endl;
+        fgimg.setTo(Scalar::all(0));
+        frame.copyTo(fgimg, fgmask);
+
+        imshow("image", frame);
+        imshow("foreground mask", fgmask);
+        imshow("foreground image", fgimg);
+
+        char key = (char)waitKey(30);
+
+        switch (key)
+        {
+        case 27:
+            running = false;
+            break;
+        case 'm':
+        case 'M':
+            ocl::setUseOpenCL(!ocl::useOpenCL());
+            cout << "Switched to " << (ocl::useOpenCL() ? "OpenCL enabled" : "CPU") << " mode\n";
+            break;
+        }
+    }
+    return EXIT_SUCCESS;
+}
diff --git a/samples/tapi/clahe.cpp b/samples/tapi/clahe.cpp
new file mode 100644
index 0000000000..a28f2ab05d
--- /dev/null
+++ b/samples/tapi/clahe.cpp
@@ -0,0 +1,107 @@
+#include <iostream>
+#include "opencv2/core/core.hpp"
+#include "opencv2/core/ocl.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/highgui/highgui.hpp"
+
+using namespace cv;
+using namespace std;
+
+Ptr<CLAHE> pFilter;
+int tilesize;
+int cliplimit;
+
+static void TSize_Callback(int pos)
+{
+    if(pos==0)
+        pFilter->setTilesGridSize(Size(1,1));
+    else
+        pFilter->setTilesGridSize(Size(tilesize,tilesize));
+}
+
+static void Clip_Callback(int)
+{
+    pFilter->setClipLimit(cliplimit);
+}
+
+int main(int argc, char** argv)
+{
+    const char* keys =
+        "{ i input    |                    | specify input image }"
+        "{ c camera   |  0                 | specify camera id   }"
+        "{ o output   | clahe_output.jpg   | specify output save path}"
+        "{ h help     | false              | print help message }";
+
+    cv::CommandLineParser cmd(argc, argv, keys);
+    if (cmd.has("help"))
+    {
+        cout << "Usage : clahe [options]" << endl;
+        cout << "Available options:" << endl;
+        cmd.printMessage();
+        return EXIT_SUCCESS;
+    }
+
+    string infile = cmd.get<string>("i"), outfile = cmd.get<string>("o");
+    int camid = cmd.get<int>("c");
+    VideoCapture capture;
+
+    namedWindow("CLAHE");
+    createTrackbar("Tile Size", "CLAHE", &tilesize, 32, (TrackbarCallback)TSize_Callback);
+    createTrackbar("Clip Limit", "CLAHE", &cliplimit, 20, (TrackbarCallback)Clip_Callback);
+
+    UMat frame, outframe;
+
+    int cur_clip;
+    Size cur_tilesize;
+    pFilter = createCLAHE();
+
+    cur_clip = (int)pFilter->getClipLimit();
+    cur_tilesize = pFilter->getTilesGridSize();
+    setTrackbarPos("Tile Size", "CLAHE", cur_tilesize.width);
+    setTrackbarPos("Clip Limit", "CLAHE", cur_clip);
+
+    if(infile != "")
+    {
+        imread(infile).copyTo(frame);
+        if(frame.empty())
+        {
+            cout << "error read image: " << infile << endl;
+            return EXIT_FAILURE;
+        }
+    }
+    else
+        capture.open(camid);
+
+    cout << "\nControls:\n"
+         << "\to - save output image\n"
+         << "\tm - switch OpenCL <-> CPU mode"
+         << "\tESC - exit\n";
+
+    for (;;)
+    {
+        if(capture.isOpened())
+            capture.read(frame);
+        else
+            imread(infile).copyTo(frame);
+        if(frame.empty())
+            continue;
+
+        cvtColor(frame, frame, COLOR_BGR2GRAY);
+        pFilter->apply(frame, outframe);
+
+        imshow("CLAHE", outframe);
+
+        char key = (char)waitKey(3);
+        if(key == 'o')
+            imwrite(outfile, outframe);
+        else if(key == 27)
+            break;
+        else if(key == 'm')
+        {
+            ocl::setUseOpenCL(!cv::ocl::useOpenCL());
+            cout << "Switched to " << (ocl::useOpenCL() ? "OpenCL enabled" : "CPU") << " mode\n";
+        }
+    }
+    return EXIT_SUCCESS;
+}
diff --git a/samples/tapi/hog.cpp b/samples/tapi/hog.cpp
new file mode 100644
index 0000000000..ee537b310e
--- /dev/null
+++ b/samples/tapi/hog.cpp
@@ -0,0 +1,364 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <stdexcept>
+#include <opencv2/core/ocl.hpp>
+#include <opencv2/core/utility.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/objdetect.hpp>
+#include <opencv2/imgproc.hpp>
+
+using namespace std;
+using namespace cv;
+
+class App
+{
+public:
+    App(CommandLineParser& cmd);
+    void run();
+    void handleKey(char key);
+    void hogWorkBegin();
+    void hogWorkEnd();
+    string hogWorkFps() const;
+    void workBegin();
+    void workEnd();
+    string workFps() const;
+    string message() const;
+
+
+// This function test if gpu_rst matches cpu_rst.
+// If the two vectors are not equal, it will return the difference in vector size
+// Else if will return
+// (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
+    double checkRectSimilarity(Size sz,
+                               std::vector<Rect>& cpu_rst,
+                               std::vector<Rect>& gpu_rst);
+private:
+    App operator=(App&);
+
+    //Args args;
+    bool running;
+    bool make_gray;
+    double scale;
+    double resize_scale;
+    int win_width;
+    int win_stride_width, win_stride_height;
+    int gr_threshold;
+    int nlevels;
+    double hit_threshold;
+    bool gamma_corr;
+
+    int64 hog_work_begin;
+    double hog_work_fps;
+    int64 work_begin;
+    double work_fps;
+
+    string img_source;
+    string vdo_source;
+    string output;
+    int camera_id;
+    bool write_once;
+};
+
+int main(int argc, char** argv)
+{
+    const char* keys =
+        "{ h help      | false          | print help message }"
+        "{ i input     |                | specify input image}"
+        "{ c camera    | -1             | enable camera capturing }"
+        "{ v video     | 768x576.avi    | use video as input }"
+        "{ g gray      | false          | convert image to gray one or not}"
+        "{ s scale     | 1.0            | resize the image before detect}"
+        "{ o output    |                | specify output path when input is images}";
+    CommandLineParser cmd(argc, argv, keys);
+    if (cmd.has("help"))
+    {
+        cout << "Usage : hog [options]" << endl;
+        cout << "Available options:" << endl;
+        cmd.printMessage();
+        return EXIT_SUCCESS;
+    }
+
+    App app(cmd);
+    try
+    {
+        app.run();
+    }
+    catch (const Exception& e)
+    {
+        return cout << "error: "  << e.what() << endl, 1;
+    }
+    catch (const exception& e)
+    {
+        return cout << "error: "  << e.what() << endl, 1;
+    }
+    catch(...)
+    {
+        return cout << "unknown exception" << endl, 1;
+    }
+    return EXIT_SUCCESS;
+}
+
+App::App(CommandLineParser& cmd)
+{
+    cout << "\nControls:\n"
+         << "\tESC - exit\n"
+         << "\tm - change mode GPU <-> CPU\n"
+         << "\tg - convert image to gray or not\n"
+         << "\to - save output image once, or switch on/off video save\n"
+         << "\t1/q - increase/decrease HOG scale\n"
+         << "\t2/w - increase/decrease levels count\n"
+         << "\t3/e - increase/decrease HOG group threshold\n"
+         << "\t4/r - increase/decrease hit threshold\n"
+         << endl;
+
+    make_gray = cmd.has("gray");
+    resize_scale = cmd.get<double>("s");
+    vdo_source = cmd.get<string>("v");
+    img_source = cmd.get<string>("i");
+    output = cmd.get<string>("o");
+    camera_id = cmd.get<int>("c");
+
+    win_width = 48;
+    win_stride_width = 8;
+    win_stride_height = 8;
+    gr_threshold = 8;
+    nlevels = 13;
+    hit_threshold = 1.4;
+    scale = 1.05;
+    gamma_corr = true;
+    write_once = false;
+
+    cout << "Group threshold: " << gr_threshold << endl;
+    cout << "Levels number: " << nlevels << endl;
+    cout << "Win width: " << win_width << endl;
+    cout << "Win stride: (" << win_stride_width << ", " << win_stride_height << ")\n";
+    cout << "Hit threshold: " << hit_threshold << endl;
+    cout << "Gamma correction: " << gamma_corr << endl;
+    cout << endl;
+}
+
+void App::run()
+{
+    running = true;
+    VideoWriter video_writer;
+
+    Size win_size(win_width, win_width * 2);
+    Size win_stride(win_stride_width, win_stride_height);
+
+    // Create HOG descriptors and detectors here
+
+    HOGDescriptor hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
+                          HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
+    hog.setSVMDetector( HOGDescriptor::getDaimlerPeopleDetector() );
+
+    while (running)
+    {
+        VideoCapture vc;
+        UMat frame;
+
+        if (vdo_source!="")
+        {
+            vc.open(vdo_source.c_str());
+            if (!vc.isOpened())
+                throw runtime_error(string("can't open video file: " + vdo_source));
+            vc >> frame;
+        }
+        else if (camera_id != -1)
+        {
+            vc.open(camera_id);
+            if (!vc.isOpened())
+            {
+                stringstream msg;
+                msg << "can't open camera: " << camera_id;
+                throw runtime_error(msg.str());
+            }
+            vc >> frame;
+        }
+        else
+        {
+            imread(img_source).copyTo(frame);
+            if (frame.empty())
+                throw runtime_error(string("can't open image file: " + img_source));
+        }
+
+        UMat img_aux, img;
+        Mat img_to_show;
+
+        // Iterate over all frames
+        while (running && !frame.empty())
+        {
+            workBegin();
+
+            // Change format of the image
+            if (make_gray) cvtColor(frame, img_aux, COLOR_BGR2GRAY );
+            else frame.copyTo(img_aux);
+
+            // Resize image
+            if (abs(scale-1.0)>0.001)
+            {
+                Size sz((int)((double)img_aux.cols/resize_scale), (int)((double)img_aux.rows/resize_scale));
+                resize(img_aux, img, sz);
+            }
+            else img = img_aux;
+            img.copyTo(img_to_show);
+            hog.nlevels = nlevels;
+            vector<Rect> found;
+
+            // Perform HOG classification
+            hogWorkBegin();
+
+            hog.detectMultiScale(img.getMat(ACCESS_READ), found, hit_threshold, win_stride,
+                    Size(0, 0), scale, gr_threshold);
+            hogWorkEnd();
+
+
+            // Draw positive classified windows
+            for (size_t i = 0; i < found.size(); i++)
+            {
+                Rect r = found[i];
+                rectangle(img_to_show, r.tl(), r.br(), Scalar(0, 255, 0), 3);
+            }
+
+            putText(img_to_show, "Mode: CPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
+            putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
+            putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
+            imshow("opencv_hog", img_to_show);
+            if (vdo_source!="" || camera_id!=-1) vc >> frame;
+
+            workEnd();
+
+            if (output!="" && write_once)
+            {
+                if (img_source!="")     // wirte image
+                {
+                    write_once = false;
+                    imwrite(output, img_to_show);
+                }
+                else                    //write video
+                {
+                    if (!video_writer.isOpened())
+                    {
+                        video_writer.open(output, VideoWriter::fourcc('x','v','i','d'), 24,
+                                          img_to_show.size(), true);
+                        if (!video_writer.isOpened())
+                            throw std::runtime_error("can't create video writer");
+                    }
+
+                    if (make_gray) cvtColor(img_to_show, img, COLOR_GRAY2BGR);
+                    else cvtColor(img_to_show, img, COLOR_BGRA2BGR);
+
+                    video_writer << img.getMat(ACCESS_READ);
+                }
+            }
+
+            handleKey((char)waitKey(3));
+        }
+    }
+}
+
+void App::handleKey(char key)
+{
+    switch (key)
+    {
+    case 27:
+        running = false;
+        break;
+    case 'm':
+    case 'M':
+        ocl::setUseOpenCL(!cv::ocl::useOpenCL());
+        cout << "Switched to " << (ocl::useOpenCL() ? "OpenCL enabled" : "CPU") << " mode\n";
+        break;
+    case 'g':
+    case 'G':
+        make_gray = !make_gray;
+        cout << "Convert image to gray: " << (make_gray ? "YES" : "NO") << endl;
+        break;
+    case '1':
+        scale *= 1.05;
+        cout << "Scale: " << scale << endl;
+        break;
+    case 'q':
+    case 'Q':
+        scale /= 1.05;
+        cout << "Scale: " << scale << endl;
+        break;
+    case '2':
+        nlevels++;
+        cout << "Levels number: " << nlevels << endl;
+        break;
+    case 'w':
+    case 'W':
+        nlevels = max(nlevels - 1, 1);
+        cout << "Levels number: " << nlevels << endl;
+        break;
+    case '3':
+        gr_threshold++;
+        cout << "Group threshold: " << gr_threshold << endl;
+        break;
+    case 'e':
+    case 'E':
+        gr_threshold = max(0, gr_threshold - 1);
+        cout << "Group threshold: " << gr_threshold << endl;
+        break;
+    case '4':
+        hit_threshold+=0.25;
+        cout << "Hit threshold: " << hit_threshold << endl;
+        break;
+    case 'r':
+    case 'R':
+        hit_threshold = max(0.0, hit_threshold - 0.25);
+        cout << "Hit threshold: " << hit_threshold << endl;
+        break;
+    case 'c':
+    case 'C':
+        gamma_corr = !gamma_corr;
+        cout << "Gamma correction: " << gamma_corr << endl;
+        break;
+    case 'o':
+    case 'O':
+        write_once = !write_once;
+        break;
+    }
+}
+
+
+inline void App::hogWorkBegin()
+{
+    hog_work_begin = getTickCount();
+}
+
+inline void App::hogWorkEnd()
+{
+    int64 delta = getTickCount() - hog_work_begin;
+    double freq = getTickFrequency();
+    hog_work_fps = freq / delta;
+}
+
+inline string App::hogWorkFps() const
+{
+    stringstream ss;
+    ss << hog_work_fps;
+    return ss.str();
+}
+
+inline void App::workBegin()
+{
+    work_begin = getTickCount();
+}
+
+inline void App::workEnd()
+{
+    int64 delta = getTickCount() - work_begin;
+    double freq = getTickFrequency();
+    work_fps = freq / delta;
+}
+
+inline string App::workFps() const
+{
+    stringstream ss;
+    ss << work_fps;
+    return ss.str();
+}
diff --git a/samples/tapi/pyrlk_optical_flow.cpp b/samples/tapi/pyrlk_optical_flow.cpp
new file mode 100644
index 0000000000..d4b77294be
--- /dev/null
+++ b/samples/tapi/pyrlk_optical_flow.cpp
@@ -0,0 +1,230 @@
+#include <iostream>
+#include <vector>
+#include <iomanip>
+
+#include "opencv2/core/utility.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/core/ocl.hpp"
+#include "opencv2/video/video.hpp"
+
+using namespace std;
+using namespace cv;
+
+typedef unsigned char uchar;
+#define LOOP_NUM 10
+int64 work_begin = 0;
+int64 work_end = 0;
+
+static void workBegin()
+{
+    work_begin = getTickCount();
+}
+static void workEnd()
+{
+    work_end += (getTickCount() - work_begin);
+}
+static double getTime()
+{
+    return work_end * 1000. / getTickFrequency();
+}
+
+static void drawArrows(UMat& _frame, const vector<Point2f>& prevPts, const vector<Point2f>& nextPts, const vector<uchar>& status,
+                       Scalar line_color = Scalar(0, 0, 255))
+{
+    Mat frame = _frame.getMat(ACCESS_WRITE);
+    for (size_t i = 0; i < prevPts.size(); ++i)
+    {
+        if (status[i])
+        {
+            int line_thickness = 1;
+
+            Point p = prevPts[i];
+            Point q = nextPts[i];
+
+            double angle = atan2((double) p.y - q.y, (double) p.x - q.x);
+
+            double hypotenuse = sqrt( (double)(p.y - q.y)*(p.y - q.y) + (double)(p.x - q.x)*(p.x - q.x) );
+
+            if (hypotenuse < 1.0)
+                continue;
+
+            // Here we lengthen the arrow by a factor of three.
+            q.x = (int) (p.x - 3 * hypotenuse * cos(angle));
+            q.y = (int) (p.y - 3 * hypotenuse * sin(angle));
+
+            // Now we draw the main line of the arrow.
+            line(frame, p, q, line_color, line_thickness);
+
+            // Now draw the tips of the arrow. I do some scaling so that the
+            // tips look proportional to the main line of the arrow.
+
+            p.x = (int) (q.x + 9 * cos(angle + CV_PI / 4));
+            p.y = (int) (q.y + 9 * sin(angle + CV_PI / 4));
+            line(frame, p, q, line_color, line_thickness);
+
+            p.x = (int) (q.x + 9 * cos(angle - CV_PI / 4));
+            p.y = (int) (q.y + 9 * sin(angle - CV_PI / 4));
+            line(frame, p, q, line_color, line_thickness);
+        }
+    }
+}
+
+
+int main(int argc, const char* argv[])
+{
+    const char* keys =
+        "{ h help           | false           | print help message }"
+        "{ l left           |                 | specify left image }"
+        "{ r right          |                 | specify right image }"
+        "{ c camera         | 0               | enable camera capturing }"
+        "{ v video          |                 | use video as input }"
+        "{ o output         | pyrlk_output.jpg| specify output save path when input is images }"
+        "{ points           | 1000            | specify points count [GoodFeatureToTrack] }"
+        "{ min_dist         | 0               | specify minimal distance between points [GoodFeatureToTrack] }"
+        "{ m cpu_mode       | false           | run without OpenCL }";
+
+    CommandLineParser cmd(argc, argv, keys);
+
+    if (cmd.has("help"))
+    {
+        cout << "Usage: pyrlk_optical_flow [options]" << endl;
+        cout << "Available options:" << endl;
+        cmd.printMessage();
+        return EXIT_SUCCESS;
+    }
+
+    bool defaultPicturesFail = true;
+    string fname0 = cmd.get<string>("left");
+    string fname1 = cmd.get<string>("right");
+    string vdofile = cmd.get<string>("video");
+    string outfile = cmd.get<string>("output");
+    int points = cmd.get<int>("points");
+    double minDist = cmd.get<double>("min_dist");
+    int inputName = cmd.get<int>("c");
+
+    UMat frame0;
+    imread(fname0, cv::IMREAD_GRAYSCALE).copyTo(frame0);
+    UMat frame1;
+    imread(fname1, cv::IMREAD_GRAYSCALE).copyTo(frame1);
+
+    vector<cv::Point2f> pts(points);
+    vector<cv::Point2f> nextPts(points);
+    vector<unsigned char> status(points);
+    vector<float> err;
+
+    cout << "Points count : " << points << endl << endl;
+
+    if (frame0.empty() || frame1.empty())
+    {
+        VideoCapture capture;
+        UMat frame, frameCopy;
+        UMat frame0Gray, frame1Gray;
+        UMat ptr0, ptr1;
+
+        if(vdofile.empty())
+            capture.open( inputName );
+        else
+            capture.open(vdofile.c_str());
+
+        int c = inputName ;
+        if(!capture.isOpened())
+        {
+            if(vdofile.empty())
+                cout << "Capture from CAM " << c << " didn't work" << endl;
+            else
+                cout << "Capture from file " << vdofile << " failed" <<endl;
+            if (defaultPicturesFail)
+                return EXIT_FAILURE;
+            goto nocamera;
+        }
+
+        cout << "In capture ..." << endl;
+        for(int i = 0;; i++)
+        {
+            if( !capture.read(frame) )
+                break;
+
+            if (i == 0)
+            {
+                frame.copyTo( frame0 );
+                cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+            }
+            else
+            {
+                if (i%2 == 1)
+                {
+                    frame.copyTo(frame1);
+                    cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
+                    ptr0 = frame0Gray;
+                    ptr1 = frame1Gray;
+                }
+                else
+                {
+                    frame.copyTo(frame0);
+                    cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+                    ptr0 = frame1Gray;
+                    ptr1 = frame0Gray;
+                }
+
+
+                pts.clear();
+                goodFeaturesToTrack(ptr0, pts, points, 0.01, 0.0);
+                if(pts.size() == 0)
+                    continue;
+                calcOpticalFlowPyrLK(ptr0, ptr1, pts, nextPts, status, err);
+
+                if (i%2 == 1)
+                    frame1.copyTo(frameCopy);
+                else
+                    frame0.copyTo(frameCopy);
+                drawArrows(frameCopy, pts, nextPts, status, Scalar(255, 0, 0));
+                imshow("PyrLK [Sparse]", frameCopy);
+            }
+            char key = (char)waitKey(10);
+
+            if (key == 27)
+                break;
+            else if (key == 'm' || key == 'M')
+            {
+                ocl::setUseOpenCL(!cv::ocl::useOpenCL());
+                cout << "Switched to " << (ocl::useOpenCL() ? "OpenCL" : "CPU") << " mode\n";
+            }
+        }
+        capture.release();
+    }
+    else
+    {
+nocamera:
+        if (cmd.has("cpu_mode"))
+        {
+            ocl::setUseOpenCL(false);
+            std::cout << "OpenCL was disabled" << std::endl;
+        }
+        for(int i = 0; i <= LOOP_NUM; i ++)
+        {
+            cout << "loop" << i << endl;
+            if (i > 0) workBegin();
+
+            goodFeaturesToTrack(frame0, pts, points, 0.01, minDist);
+            calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
+
+            if (i > 0 && i <= LOOP_NUM)
+                workEnd();
+
+            if (i == LOOP_NUM)
+            {
+                cout << "average time (noCamera) : ";
+
+                cout << getTime() / LOOP_NUM << " ms" << endl;
+
+                drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0));
+                imshow("PyrLK [Sparse]", frame0);
+                imwrite(outfile, frame0);
+            }
+        }
+    }
+
+    waitKey();
+
+    return EXIT_SUCCESS;
+}
diff --git a/samples/tapi/squares.cpp b/samples/tapi/squares.cpp
new file mode 100644
index 0000000000..402702e497
--- /dev/null
+++ b/samples/tapi/squares.cpp
@@ -0,0 +1,204 @@
+// The "Square Detector" program.
+// It loads several images sequentially and tries to find squares in
+// each image
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/ocl.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include <iostream>
+#include <string.h>
+
+using namespace cv;
+using namespace std;
+
+int thresh = 50, N = 11;
+const char* wndname = "Square Detection Demo";
+
+// helper function:
+// finds a cosine of angle between vectors
+// from pt0->pt1 and from pt0->pt2
+static double angle( Point pt1, Point pt2, Point pt0 )
+{
+    double dx1 = pt1.x - pt0.x;
+    double dy1 = pt1.y - pt0.y;
+    double dx2 = pt2.x - pt0.x;
+    double dy2 = pt2.y - pt0.y;
+    return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
+}
+
+
+// returns sequence of squares detected on the image.
+// the sequence is stored in the specified memory storage
+static void findSquares( const UMat& image, vector<vector<Point> >& squares )
+{
+    squares.clear();
+    UMat pyr, timg, gray0(image.size(), CV_8U), gray;
+
+    // down-scale and upscale the image to filter out the noise
+    pyrDown(image, pyr, Size(image.cols/2, image.rows/2));
+    pyrUp(pyr, timg, image.size());
+    vector<vector<Point> > contours;
+
+    // find squares in every color plane of the image
+    for( int c = 0; c < 3; c++ )
+    {
+        int ch[] = {c, 0};
+        mixChannels(timg, gray0, ch, 1);
+
+        // try several threshold levels
+        for( int l = 0; l < N; l++ )
+        {
+            // hack: use Canny instead of zero threshold level.
+            // Canny helps to catch squares with gradient shading
+            if( l == 0 )
+            {
+                // apply Canny. Take the upper threshold from slider
+                // and set the lower to 0 (which forces edges merging)
+                Canny(gray0, gray, 0, thresh, 5);
+                // dilate canny output to remove potential
+                // holes between edge segments
+                dilate(gray, gray, UMat(), Point(-1,-1));
+            }
+            else
+            {
+                // apply threshold if l!=0:
+                //     tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
+                cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY);
+            }
+
+            // find contours and store them all as a list
+            findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
+
+            vector<Point> approx;
+
+            // test each contour
+            for( size_t i = 0; i < contours.size(); i++ )
+            {
+                // approximate contour with accuracy proportional
+                // to the contour perimeter
+
+                approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
+
+                // square contours should have 4 vertices after approximation
+                // relatively large area (to filter out noisy contours)
+                // and be convex.
+                // Note: absolute value of an area is used because
+                // area may be positive or negative - in accordance with the
+                // contour orientation
+                if( approx.size() == 4 &&
+                        fabs(contourArea(Mat(approx))) > 1000 &&
+                        isContourConvex(Mat(approx)) )
+                {
+                    double maxCosine = 0;
+
+                    for( int j = 2; j < 5; j++ )
+                    {
+                        // find the maximum cosine of the angle between joint edges
+                        double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
+                        maxCosine = MAX(maxCosine, cosine);
+                    }
+
+                    // if cosines of all angles are small
+                    // (all angles are ~90 degree) then write quandrange
+                    // vertices to resultant sequence
+                    if( maxCosine < 0.3 )
+                        squares.push_back(approx);
+                }
+            }
+        }
+    }
+}
+
+// the function draws all the squares in the image
+static void drawSquares( UMat& _image, const vector<vector<Point> >& squares )
+{
+    Mat image = _image.getMat(ACCESS_WRITE);
+    for( size_t i = 0; i < squares.size(); i++ )
+    {
+        const Point* p = &squares[i][0];
+        int n = (int)squares[i].size();
+        polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, LINE_AA);
+    }
+}
+
+
+// draw both pure-C++ and ocl square results onto a single image
+static UMat drawSquaresBoth( const UMat& image,
+                            const vector<vector<Point> >& sqs)
+{
+    UMat imgToShow(Size(image.cols, image.rows), image.type());
+    image.copyTo(imgToShow);
+
+    drawSquares(imgToShow, sqs);
+
+    return imgToShow;
+}
+
+
+int main(int argc, char** argv)
+{
+    const char* keys =
+        "{ i input    | pic1.png           | specify input image }"
+        "{ o output   | squares_output.jpg | specify output save path}"
+        "{ h help     | false              | print help message }"
+        "{ m cpu_mode | false              | run without OpenCL }";
+
+    CommandLineParser cmd(argc, argv, keys);
+
+    if(cmd.has("help"))
+    {
+        cout << "Usage : squares [options]" << endl;
+        cout << "Available options:" << endl;
+        cmd.printMessage();
+        return EXIT_SUCCESS;
+    }
+    if (cmd.has("cpu_mode"))
+    {
+        ocl::setUseOpenCL(false);
+        std::cout << "OpenCL was disabled" << std::endl;
+    }
+
+    string inputName = cmd.get<string>("i");
+    string outfile = cmd.get<string>("o");
+
+    int iterations = 10;
+    namedWindow( wndname, WINDOW_AUTOSIZE );
+    vector<vector<Point> > squares;
+
+    UMat image;
+    imread(inputName, 1).copyTo(image);
+    if( image.empty() )
+    {
+        cout << "Couldn't load " << inputName << endl;
+        cmd.printMessage();
+        return EXIT_FAILURE;
+    }
+
+    int j = iterations;
+    int64 t_cpp = 0;
+    //warm-ups
+    cout << "warming up ..." << endl;
+    findSquares(image, squares);
+
+    do
+    {
+        int64 t_start = cv::getTickCount();
+        findSquares(image, squares);
+        t_cpp += cv::getTickCount() - t_start;
+
+        t_start  = cv::getTickCount();
+
+        cout << "run loop: " << j << endl;
+    }
+    while(--j);
+    cout << "average time: " << 1000.0f * (double)t_cpp / getTickFrequency() / iterations << "ms" << endl;
+
+    UMat result = drawSquaresBoth(image, squares);
+    imshow(wndname, result);
+    imwrite(outfile, result);
+    waitKey(0);
+
+    return EXIT_SUCCESS;
+}
diff --git a/samples/tapi/surf_matcher.cpp b/samples/tapi/surf_matcher.cpp
new file mode 100644
index 0000000000..9066bfd3fb
--- /dev/null
+++ b/samples/tapi/surf_matcher.cpp
@@ -0,0 +1,224 @@
+#include <iostream>
+#include <stdio.h>
+#include "opencv2/core/core.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/core/ocl.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/features2d.hpp"
+#include "opencv2/calib3d.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/nonfree.hpp"
+
+using namespace cv;
+
+const int LOOP_NUM = 10;
+const int GOOD_PTS_MAX = 50;
+const float GOOD_PORTION = 0.15f;
+
+int64 work_begin = 0;
+int64 work_end = 0;
+
+static void workBegin()
+{
+    work_begin = getTickCount();
+}
+
+static void workEnd()
+{
+    work_end = getTickCount() - work_begin;
+}
+
+static double getTime()
+{
+    return work_end /((double)getTickFrequency() )* 1000.;
+}
+
+template<class KPDetector>
+struct SURFDetector
+{
+    KPDetector surf;
+    SURFDetector(double hessian = 800.0)
+        :surf(hessian)
+    {
+    }
+    template<class T>
+    void operator()(const T& in, const T& mask, std::vector<cv::KeyPoint>& pts, T& descriptors, bool useProvided = false)
+    {
+        surf(in, mask, pts, descriptors, useProvided);
+    }
+};
+
+template<class KPMatcher>
+struct SURFMatcher
+{
+    KPMatcher matcher;
+    template<class T>
+    void match(const T& in1, const T& in2, std::vector<cv::DMatch>& matches)
+    {
+        matcher.match(in1, in2, matches);
+    }
+};
+
+static Mat drawGoodMatches(
+    const Mat& img1,
+    const Mat& img2,
+    const std::vector<KeyPoint>& keypoints1,
+    const std::vector<KeyPoint>& keypoints2,
+    std::vector<DMatch>& matches,
+    std::vector<Point2f>& scene_corners_
+    )
+{
+    //-- Sort matches and preserve top 10% matches
+    std::sort(matches.begin(), matches.end());
+    std::vector< DMatch > good_matches;
+    double minDist = matches.front().distance;
+    double maxDist = matches.back().distance;
+
+    const int ptsPairs = std::min(GOOD_PTS_MAX, (int)(matches.size() * GOOD_PORTION));
+    for( int i = 0; i < ptsPairs; i++ )
+    {
+        good_matches.push_back( matches[i] );
+    }
+    std::cout << "\nMax distance: " << maxDist << std::endl;
+    std::cout << "Min distance: " << minDist << std::endl;
+
+    std::cout << "Calculating homography using " << ptsPairs << " point pairs." << std::endl;
+
+    // drawing the results
+    Mat img_matches;
+
+    drawMatches( img1, keypoints1, img2, keypoints2,
+                 good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
+                 std::vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS  );
+
+    //-- Localize the object
+    std::vector<Point2f> obj;
+    std::vector<Point2f> scene;
+
+    for( size_t i = 0; i < good_matches.size(); i++ )
+    {
+        //-- Get the keypoints from the good matches
+        obj.push_back( keypoints1[ good_matches[i].queryIdx ].pt );
+        scene.push_back( keypoints2[ good_matches[i].trainIdx ].pt );
+    }
+    //-- Get the corners from the image_1 ( the object to be "detected" )
+    std::vector<Point2f> obj_corners(4);
+    obj_corners[0] = Point(0,0);
+    obj_corners[1] = Point( img1.cols, 0 );
+    obj_corners[2] = Point( img1.cols, img1.rows );
+    obj_corners[3] = Point( 0, img1.rows );
+    std::vector<Point2f> scene_corners(4);
+
+    Mat H = findHomography( obj, scene, RANSAC );
+    perspectiveTransform( obj_corners, scene_corners, H);
+
+    scene_corners_ = scene_corners;
+
+    //-- Draw lines between the corners (the mapped object in the scene - image_2 )
+    line( img_matches,
+          scene_corners[0] + Point2f( (float)img1.cols, 0), scene_corners[1] + Point2f( (float)img1.cols, 0),
+          Scalar( 0, 255, 0), 2, LINE_AA );
+    line( img_matches,
+          scene_corners[1] + Point2f( (float)img1.cols, 0), scene_corners[2] + Point2f( (float)img1.cols, 0),
+          Scalar( 0, 255, 0), 2, LINE_AA );
+    line( img_matches,
+          scene_corners[2] + Point2f( (float)img1.cols, 0), scene_corners[3] + Point2f( (float)img1.cols, 0),
+          Scalar( 0, 255, 0), 2, LINE_AA );
+    line( img_matches,
+          scene_corners[3] + Point2f( (float)img1.cols, 0), scene_corners[0] + Point2f( (float)img1.cols, 0),
+          Scalar( 0, 255, 0), 2, LINE_AA );
+    return img_matches;
+}
+
+////////////////////////////////////////////////////
+// This program demonstrates the usage of SURF_OCL.
+// use cpu findHomography interface to calculate the transformation matrix
+int main(int argc, char* argv[])
+{
+    const char* keys =
+        "{ h help     | false            | print help message  }"
+        "{ l left     | box.png          | specify left image  }"
+        "{ r right    | box_in_scene.png | specify right image }"
+        "{ o output   | SURF_output.jpg  | specify output save path }"
+        "{ m cpu_mode | false            | run without OpenCL }";
+
+    CommandLineParser cmd(argc, argv, keys);
+    if (cmd.has("help"))
+    {
+        std::cout << "Usage: surf_matcher [options]" << std::endl;
+        std::cout << "Available options:" << std::endl;
+        cmd.printMessage();
+        return EXIT_SUCCESS;
+    }
+    if (cmd.has("cpu_mode"))
+    {
+        ocl::setUseOpenCL(false);
+        std::cout << "OpenCL was disabled" << std::endl;
+    }
+
+    UMat img1, img2;
+
+    std::string outpath = cmd.get<std::string>("o");
+
+    std::string leftName = cmd.get<std::string>("l");
+    imread(leftName, IMREAD_GRAYSCALE).copyTo(img1);
+    if(img1.empty())
+    {
+        std::cout << "Couldn't load " << leftName << std::endl;
+        cmd.printMessage();
+        return EXIT_FAILURE;
+    }
+
+    std::string rightName = cmd.get<std::string>("r");
+    imread(rightName, IMREAD_GRAYSCALE).copyTo(img2);
+    if(img2.empty())
+    {
+        std::cout << "Couldn't load " << rightName << std::endl;
+        cmd.printMessage();
+        return EXIT_FAILURE;
+    }
+
+    double surf_time = 0.;
+
+    //declare input/output
+    std::vector<KeyPoint> keypoints1, keypoints2;
+    std::vector<DMatch> matches;
+
+    UMat _descriptors1, _descriptors2;
+    Mat descriptors1 = _descriptors1.getMat(ACCESS_RW),
+        descriptors2 = _descriptors2.getMat(ACCESS_RW);
+
+    //instantiate detectors/matchers
+    SURFDetector<SURF> surf;
+
+    SURFMatcher<BFMatcher> matcher;
+
+    //-- start of timing section
+
+    for (int i = 0; i <= LOOP_NUM; i++)
+    {
+        if(i == 1) workBegin();
+        surf(img1.getMat(ACCESS_READ), Mat(), keypoints1, descriptors1);
+        surf(img2.getMat(ACCESS_READ), Mat(), keypoints2, descriptors2);
+        matcher.match(descriptors1, descriptors2, matches);
+    }
+    workEnd();
+    std::cout << "FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+    std::cout << "FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+    surf_time = getTime();
+    std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
+
+
+    std::vector<Point2f> corner;
+    Mat img_matches = drawGoodMatches(img1.getMat(ACCESS_READ), img2.getMat(ACCESS_READ), keypoints1, keypoints2, matches, corner);
+
+    //-- Show detected matches
+
+    namedWindow("surf matches", 0);
+    imshow("surf matches", img_matches);
+    imwrite(outpath, img_matches);
+
+    waitKey(0);
+    return EXIT_SUCCESS;
+}
diff --git a/samples/tapi/tvl1_optical_flow.cpp b/samples/tapi/tvl1_optical_flow.cpp
new file mode 100644
index 0000000000..436ba715c8
--- /dev/null
+++ b/samples/tapi/tvl1_optical_flow.cpp
@@ -0,0 +1,231 @@
+#include <iostream>
+#include <vector>
+#include <iomanip>
+
+#include "opencv2/core/ocl.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/video/video.hpp"
+
+using namespace std;
+using namespace cv;
+
+typedef unsigned char uchar;
+#define LOOP_NUM 10
+int64 work_begin = 0;
+int64 work_end = 0;
+
+static void workBegin()
+{
+    work_begin = getTickCount();
+}
+static void workEnd()
+{
+    work_end += (getTickCount() - work_begin);
+}
+static double getTime()
+{
+    return work_end * 1000. / getTickFrequency();
+}
+
+template <typename T> inline T clamp (T x, T a, T b)
+{
+    return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a));
+}
+
+template <typename T> inline T mapValue(T x, T a, T b, T c, T d)
+{
+    x = clamp(x, a, b);
+    return c + (d - c) * (x - a) / (b - a);
+}
+
+static void getFlowField(const Mat& u, const Mat& v, Mat& flowField)
+{
+    float maxDisplacement = 1.0f;
+
+    for (int i = 0; i < u.rows; ++i)
+    {
+        const float* ptr_u = u.ptr<float>(i);
+        const float* ptr_v = v.ptr<float>(i);
+
+        for (int j = 0; j < u.cols; ++j)
+        {
+            float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j]));
+
+            if (d > maxDisplacement)
+                maxDisplacement = d;
+        }
+    }
+
+    flowField.create(u.size(), CV_8UC4);
+
+    for (int i = 0; i < flowField.rows; ++i)
+    {
+        const float* ptr_u = u.ptr<float>(i);
+        const float* ptr_v = v.ptr<float>(i);
+
+
+        Vec4b* row = flowField.ptr<Vec4b>(i);
+
+        for (int j = 0; j < flowField.cols; ++j)
+        {
+            row[j][0] = 0;
+            row[j][1] = static_cast<unsigned char> (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
+            row[j][2] = static_cast<unsigned char> (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
+            row[j][3] = 255;
+        }
+    }
+}
+
+
+int main(int argc, const char* argv[])
+{
+    const char* keys =
+        "{ h help     | false           | print help message }"
+        "{ l left     |                 | specify left image }"
+        "{ r right    |                 | specify right image }"
+        "{ o output   | tvl1_output.jpg | specify output save path }"
+        "{ c camera   | 0               | enable camera capturing }"
+        "{ m cpu_mode | false           | run without OpenCL }"
+        "{ v video    |                 | use video as input }";
+
+    CommandLineParser cmd(argc, argv, keys);
+
+    if (cmd.has("help"))
+    {
+        cout << "Usage: pyrlk_optical_flow [options]" << endl;
+        cout << "Available options:" << endl;
+        cmd.printMessage();
+        return EXIT_SUCCESS;
+    }
+
+    string fname0 = cmd.get<string>("l");
+    string fname1 = cmd.get<string>("r");
+    string vdofile = cmd.get<string>("v");
+    string outpath = cmd.get<string>("o");
+    bool useCPU = cmd.get<bool>("s");
+    bool useCamera = cmd.get<bool>("c");
+    int inputName = cmd.get<int>("c");
+
+    UMat frame0, frame1;
+    imread(fname0, cv::IMREAD_GRAYSCALE).copyTo(frame0);
+    imread(fname1, cv::IMREAD_GRAYSCALE).copyTo(frame1);
+    cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
+
+    UMat flow;
+    Mat show_flow;
+    vector<UMat> flow_vec;
+    if (frame0.empty() || frame1.empty())
+        useCamera = true;
+
+    if (useCamera)
+    {
+        VideoCapture capture;
+        UMat frame, frameCopy;
+        UMat frame0Gray, frame1Gray;
+        UMat ptr0, ptr1;
+
+        if(vdofile.empty())
+            capture.open( inputName );
+        else
+            capture.open(vdofile.c_str());
+
+        if(!capture.isOpened())
+        {
+            if(vdofile.empty())
+                cout << "Capture from CAM " << inputName << " didn't work" << endl;
+            else
+                cout << "Capture from file " << vdofile << " failed" <<endl;
+            goto nocamera;
+        }
+
+        cout << "In capture ..." << endl;
+        for(int i = 0;; i++)
+        {
+            if( !capture.read(frame) )
+                break;
+
+            if (i == 0)
+            {
+                frame.copyTo( frame0 );
+                cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+            }
+            else
+            {
+                if (i%2 == 1)
+                {
+                    frame.copyTo(frame1);
+                    cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
+                    ptr0 = frame0Gray;
+                    ptr1 = frame1Gray;
+                }
+                else
+                {
+                    frame.copyTo(frame0);
+                    cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+                    ptr0 = frame1Gray;
+                    ptr1 = frame0Gray;
+                }
+
+                alg->calc(ptr0, ptr1, flow);
+                split(flow, flow_vec);
+
+                if (i%2 == 1)
+                    frame1.copyTo(frameCopy);
+                else
+                    frame0.copyTo(frameCopy);
+                getFlowField(flow_vec[0].getMat(ACCESS_READ), flow_vec[1].getMat(ACCESS_READ), show_flow);
+                imshow("tvl1 optical flow field", show_flow);
+            }
+
+            char key = (char)waitKey(10);
+            if (key == 27)
+                break;
+            else if (key == 'm' || key == 'M')
+            {
+                ocl::setUseOpenCL(!cv::ocl::useOpenCL());
+                cout << "Switched to " << (ocl::useOpenCL() ? "OpenCL" : "CPU") << " mode\n";
+            }
+        }
+
+        capture.release();
+    }
+    else
+    {
+nocamera:
+        if (cmd.has("cpu_mode"))
+        {
+            ocl::setUseOpenCL(false);
+            std::cout << "OpenCL was disabled" << std::endl;
+        }
+        for(int i = 0; i <= LOOP_NUM; i ++)
+        {
+            cout << "loop" << i << endl;
+
+            if (i > 0) workBegin();
+
+            alg->calc(frame0, frame1, flow);
+            split(flow, flow_vec);
+
+            if (i > 0 && i <= LOOP_NUM)
+                workEnd();
+
+            if (i == LOOP_NUM)
+            {
+                if (useCPU)
+                    cout << "average CPU time (noCamera) : ";
+                else
+                    cout << "average GPU time (noCamera) : ";
+                cout << getTime() / LOOP_NUM << " ms" << endl;
+
+                getFlowField(flow_vec[0].getMat(ACCESS_READ), flow_vec[1].getMat(ACCESS_READ), show_flow);
+                imshow("PyrLK [Sparse]", show_flow);
+                imwrite(outpath, show_flow);
+            }
+        }
+    }
+
+    waitKey();
+
+    return EXIT_SUCCESS;
+}