diff --git a/modules/features2d/doc/feature_detection_and_description.rst b/modules/features2d/doc/feature_detection_and_description.rst
index c0f611713e..409fe54b7a 100644
--- a/modules/features2d/doc/feature_detection_and_description.rst
+++ b/modules/features2d/doc/feature_detection_and_description.rst
@@ -249,3 +249,54 @@ We notice that for keypoint matching applications, image content has little effe
     :param keypoints: Set of detected keypoints
     :param corrThresh: Correlation threshold.
     :param verbose: Prints pair selection informations.
+
+KAZE
+----
+.. ocv:class:: KAZE : public Feature2D
+
+Class implementing the KAZE keypoint detector and descriptor extractor, described in [ABD12]_.
+
+.. [ABD12] KAZE Features. Pablo F. Alcantarilla, Adrien Bartoli and Andrew J. Davison. In European Conference on Computer Vision (ECCV), Fiorenze, Italy, October 2012.
+
+KAZE::KAZE
+----------
+The KAZE constructor
+
+.. ocv:function:: KAZE::KAZE(bool extended, bool upright)
+
+    :param extended: Set to enable extraction of extended (128-byte) descriptor.
+    :param upright: Set to enable use of upright descriptors (non rotation-invariant).
+
+
+
+AKAZE
+-----
+.. ocv:class:: AKAZE : public Feature2D
+
+Class implementing the AKAZE keypoint detector and descriptor extractor, described in [ANB13]_. ::
+
+    class CV_EXPORTS_W AKAZE : public Feature2D
+    {
+    public:
+        /// AKAZE Descriptor Type
+        enum DESCRIPTOR_TYPE {
+            DESCRIPTOR_KAZE_UPRIGHT = 2, ///< Upright descriptors, not invariant to rotation
+            DESCRIPTOR_KAZE = 3,
+            DESCRIPTOR_MLDB_UPRIGHT = 4, ///< Upright descriptors, not invariant to rotation
+            DESCRIPTOR_MLDB = 5
+        };
+        CV_WRAP AKAZE();
+        explicit AKAZE(DESCRIPTOR_TYPE descriptor_type, int descriptor_size = 0, int descriptor_channels = 3);
+    };
+
+.. [ANB13] Fast Explicit Diffusion for Accelerated Features in Nonlinear Scale Spaces. Pablo F. Alcantarilla, Jesús Nuevo and Adrien Bartoli. In British Machine Vision Conference (BMVC), Bristol, UK, September 2013.
+
+AKAZE::AKAZE
+------------
+The AKAZE constructor
+
+.. ocv:function:: AKAZE::AKAZE(DESCRIPTOR_TYPE descriptor_type, int descriptor_size = 0, int descriptor_channels = 3)
+
+    :param descriptor_type: Type of the extracted descriptor.
+    :param descriptor_size: Size of the descriptor in bits. 0 -> Full size
+    :param descriptor_channels: Number of channels in the descriptor (1, 2, 3).
diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp
index 4129705033..ed2a6e1b84 100644
--- a/modules/features2d/include/opencv2/features2d.hpp
+++ b/modules/features2d/include/opencv2/features2d.hpp
@@ -895,7 +895,87 @@ protected:
     PixelTestFn test_fn_;
 };
 
+/*!
+KAZE implementation
+*/
+class CV_EXPORTS_W KAZE : public Feature2D
+{
+public:
+    CV_WRAP KAZE();
+    CV_WRAP explicit KAZE(bool extended, bool upright);
 
+    virtual ~KAZE();
+
+    // returns the descriptor size in bytes
+    int descriptorSize() const;
+    // returns the descriptor type
+    int descriptorType() const;
+    // returns the default norm type
+    int defaultNorm() const;
+
+    AlgorithmInfo* info() const;
+
+    // Compute the KAZE features on an image
+    void operator()(InputArray image, InputArray mask, std::vector<KeyPoint>& keypoints) const;
+
+    // Compute the KAZE features and descriptors on an image
+    void operator()(InputArray image, InputArray mask, std::vector<KeyPoint>& keypoints,
+        OutputArray descriptors, bool useProvidedKeypoints = false) const;
+
+protected:
+    void detectImpl(InputArray image, std::vector<KeyPoint>& keypoints, InputArray mask) const;
+    void computeImpl(InputArray image, std::vector<KeyPoint>& keypoints, OutputArray descriptors) const;
+
+    CV_PROP int  descriptor;
+    CV_PROP bool extended;
+    CV_PROP bool upright;
+};
+
+/*!
+AKAZE implementation
+*/
+class CV_EXPORTS_W AKAZE : public Feature2D
+{
+public:
+    /// AKAZE Descriptor Type
+    enum DESCRIPTOR_TYPE {
+        DESCRIPTOR_KAZE_UPRIGHT = 2, ///< Upright descriptors, not invariant to rotation
+        DESCRIPTOR_KAZE = 3,
+        DESCRIPTOR_MLDB_UPRIGHT = 4, ///< Upright descriptors, not invariant to rotation
+        DESCRIPTOR_MLDB = 5
+    };
+
+    CV_WRAP AKAZE();
+    explicit AKAZE(DESCRIPTOR_TYPE descriptor_type, int descriptor_size = 0, int descriptor_channels = 3);
+
+    virtual ~AKAZE();
+
+    // returns the descriptor size in bytes
+    int descriptorSize() const;
+    // returns the descriptor type
+    int descriptorType() const;
+    // returns the default norm type
+    int defaultNorm() const;
+
+    // Compute the AKAZE features on an image
+    void operator()(InputArray image, InputArray mask, std::vector<KeyPoint>& keypoints) const;
+
+    // Compute the AKAZE features and descriptors on an image
+    void operator()(InputArray image, InputArray mask, std::vector<KeyPoint>& keypoints,
+        OutputArray descriptors, bool useProvidedKeypoints = false) const;
+
+    AlgorithmInfo* info() const;
+
+protected:
+
+    void computeImpl(InputArray image, std::vector<KeyPoint>& keypoints, OutputArray descriptors) const;
+    void detectImpl(InputArray image, std::vector<KeyPoint>& keypoints, InputArray mask = noArray()) const;
+
+    CV_PROP int descriptor;
+    CV_PROP int descriptor_channels;
+    CV_PROP int descriptor_size;
+
+};
 /****************************************************************************************\
 *                                      Distance                                          *
 \****************************************************************************************/
diff --git a/modules/features2d/src/akaze.cpp b/modules/features2d/src/akaze.cpp
new file mode 100644
index 0000000000..4b1eb196a2
--- /dev/null
+++ b/modules/features2d/src/akaze.cpp
@@ -0,0 +1,232 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2008, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/*
+OpenCV wrapper of reference implementation of
+[1] Fast Explicit Diffusion for Accelerated Features in Nonlinear Scale Spaces.
+Pablo F. Alcantarilla, J. Nuevo and Adrien Bartoli.
+In British Machine Vision Conference (BMVC), Bristol, UK, September 2013
+http://www.robesafe.com/personal/pablo.alcantarilla/papers/Alcantarilla13bmvc.pdf
+@author Eugene Khvedchenya <ekhvedchenya@gmail.com>
+*/
+
+#include "precomp.hpp"
+#include "akaze/AKAZEFeatures.h"
+
+namespace cv
+{
+    AKAZE::AKAZE()
+        : descriptor(DESCRIPTOR_MLDB)
+        , descriptor_channels(3)
+        , descriptor_size(0)
+    {
+    }
+
+    AKAZE::AKAZE(DESCRIPTOR_TYPE _descriptor_type, int _descriptor_size, int _descriptor_channels)
+        : descriptor(_descriptor_type)
+        , descriptor_channels(_descriptor_channels)
+        , descriptor_size(_descriptor_size)
+    {
+
+    }
+
+    AKAZE::~AKAZE()
+    {
+
+    }
+
+    // returns the descriptor size in bytes
+    int AKAZE::descriptorSize() const
+    {
+        switch (descriptor)
+        {
+        case cv::AKAZE::DESCRIPTOR_KAZE:
+        case cv::AKAZE::DESCRIPTOR_KAZE_UPRIGHT:
+            return 64;
+
+        case cv::AKAZE::DESCRIPTOR_MLDB:
+        case cv::AKAZE::DESCRIPTOR_MLDB_UPRIGHT:
+            // We use the full length binary descriptor -> 486 bits
+            if (descriptor_size == 0)
+            {
+                int t = (6 + 36 + 120) * descriptor_channels;
+                return (int)ceil(t / 8.);
+            }
+            else
+            {
+                // We use the random bit selection length binary descriptor
+                return (int)ceil(descriptor_size / 8.);
+            }
+
+        default:
+            return -1;
+        }
+    }
+
+    // returns the descriptor type
+    int AKAZE::descriptorType() const
+    {
+        switch (descriptor)
+        {
+        case cv::AKAZE::DESCRIPTOR_KAZE:
+        case cv::AKAZE::DESCRIPTOR_KAZE_UPRIGHT:
+                return CV_32F;
+
+        case cv::AKAZE::DESCRIPTOR_MLDB:
+        case cv::AKAZE::DESCRIPTOR_MLDB_UPRIGHT:
+                return CV_8U;
+
+            default:
+                return -1;
+        }
+    }
+
+    // returns the default norm type
+    int AKAZE::defaultNorm() const
+    {
+        switch (descriptor)
+        {
+        case cv::AKAZE::DESCRIPTOR_KAZE:
+        case cv::AKAZE::DESCRIPTOR_KAZE_UPRIGHT:
+            return cv::NORM_L2;
+
+        case cv::AKAZE::DESCRIPTOR_MLDB:
+        case cv::AKAZE::DESCRIPTOR_MLDB_UPRIGHT:
+            return cv::NORM_HAMMING;
+
+        default:
+            return -1;
+        }
+    }
+
+
+    void AKAZE::operator()(InputArray image, InputArray mask,
+        std::vector<KeyPoint>& keypoints,
+        OutputArray descriptors,
+        bool useProvidedKeypoints) const
+    {
+        cv::Mat img = image.getMat();
+        if (img.type() != CV_8UC1)
+            cvtColor(image, img, COLOR_BGR2GRAY);
+
+        Mat img1_32;
+        img.convertTo(img1_32, CV_32F, 1.0 / 255.0, 0);
+
+        cv::Mat& desc = descriptors.getMatRef();
+
+        AKAZEOptions options;
+        options.descriptor = static_cast<DESCRIPTOR_TYPE>(descriptor);
+        options.descriptor_channels = descriptor_channels;
+        options.descriptor_size = descriptor_size;
+        options.img_width = img.cols;
+        options.img_height = img.rows;
+
+        AKAZEFeatures impl(options);
+        impl.Create_Nonlinear_Scale_Space(img1_32);
+
+        if (!useProvidedKeypoints)
+        {
+            impl.Feature_Detection(keypoints);
+        }
+
+        if (!mask.empty())
+        {
+            cv::KeyPointsFilter::runByPixelsMask(keypoints, mask.getMat());
+        }
+
+        impl.Compute_Descriptors(keypoints, desc);
+
+        CV_Assert((!desc.rows || desc.cols == descriptorSize()));
+        CV_Assert((!desc.rows || (desc.type() == descriptorType())));
+    }
+
+    void AKAZE::detectImpl(InputArray image, std::vector<KeyPoint>& keypoints, InputArray mask) const
+    {
+        cv::Mat img = image.getMat();
+        if (img.type() != CV_8UC1)
+            cvtColor(image, img, COLOR_BGR2GRAY);
+
+        Mat img1_32;
+        img.convertTo(img1_32, CV_32F, 1.0 / 255.0, 0);
+
+        AKAZEOptions options;
+        options.descriptor = static_cast<DESCRIPTOR_TYPE>(descriptor);
+        options.descriptor_channels = descriptor_channels;
+        options.descriptor_size = descriptor_size;
+        options.img_width = img.cols;
+        options.img_height = img.rows;
+
+        AKAZEFeatures impl(options);
+        impl.Create_Nonlinear_Scale_Space(img1_32);
+        impl.Feature_Detection(keypoints);
+
+        if (!mask.empty())
+        {
+            cv::KeyPointsFilter::runByPixelsMask(keypoints, mask.getMat());
+        }
+    }
+
+    void AKAZE::computeImpl(InputArray image, std::vector<KeyPoint>& keypoints, OutputArray descriptors) const
+    {
+        cv::Mat img = image.getMat();
+        if (img.type() != CV_8UC1)
+            cvtColor(image, img, COLOR_BGR2GRAY);
+
+        Mat img1_32;
+        img.convertTo(img1_32, CV_32F, 1.0 / 255.0, 0);
+
+        cv::Mat& desc = descriptors.getMatRef();
+
+        AKAZEOptions options;
+        options.descriptor = static_cast<DESCRIPTOR_TYPE>(descriptor);
+        options.descriptor_channels = descriptor_channels;
+        options.descriptor_size = descriptor_size;
+        options.img_width = img.cols;
+        options.img_height = img.rows;
+
+        AKAZEFeatures impl(options);
+        impl.Create_Nonlinear_Scale_Space(img1_32);
+        impl.Compute_Descriptors(keypoints, desc);
+
+        CV_Assert((!desc.rows || desc.cols == descriptorSize()));
+        CV_Assert((!desc.rows || (desc.type() == descriptorType())));
+    }
+}
\ No newline at end of file
diff --git a/modules/features2d/src/akaze/AKAZEConfig.h b/modules/features2d/src/akaze/AKAZEConfig.h
new file mode 100644
index 0000000000..1c1203f574
--- /dev/null
+++ b/modules/features2d/src/akaze/AKAZEConfig.h
@@ -0,0 +1,109 @@
+/**
+ * @file AKAZEConfig.h
+ * @brief AKAZE configuration file
+ * @date Feb 23, 2014
+ * @author Pablo F. Alcantarilla, Jesus Nuevo
+ */
+
+#pragma once
+
+/* ************************************************************************* */
+// OpenCV
+#include "precomp.hpp"
+#include <opencv2/features2d.hpp>
+
+/* ************************************************************************* */
+/// Lookup table for 2d gaussian (sigma = 2.5) where (0,0) is top left and (6,6) is bottom right
+const float gauss25[7][7] = {
+    { 0.02546481f, 0.02350698f, 0.01849125f, 0.01239505f, 0.00708017f, 0.00344629f, 0.00142946f },
+    { 0.02350698f, 0.02169968f, 0.01706957f, 0.01144208f, 0.00653582f, 0.00318132f, 0.00131956f },
+    { 0.01849125f, 0.01706957f, 0.01342740f, 0.00900066f, 0.00514126f, 0.00250252f, 0.00103800f },
+    { 0.01239505f, 0.01144208f, 0.00900066f, 0.00603332f, 0.00344629f, 0.00167749f, 0.00069579f },
+    { 0.00708017f, 0.00653582f, 0.00514126f, 0.00344629f, 0.00196855f, 0.00095820f, 0.00039744f },
+    { 0.00344629f, 0.00318132f, 0.00250252f, 0.00167749f, 0.00095820f, 0.00046640f, 0.00019346f },
+    { 0.00142946f, 0.00131956f, 0.00103800f, 0.00069579f, 0.00039744f, 0.00019346f, 0.00008024f }
+};
+
+/* ************************************************************************* */
+/// AKAZE configuration options structure
+struct AKAZEOptions {
+
+    /// AKAZE Diffusivities
+    enum DIFFUSIVITY_TYPE {
+        PM_G1 = 0,
+        PM_G2 = 1,
+        WEICKERT = 2,
+        CHARBONNIER = 3
+    };
+
+    AKAZEOptions()
+        : omax(4)
+        , nsublevels(4)
+        , img_width(0)
+        , img_height(0)
+        , soffset(1.6f)
+        , derivative_factor(1.5f)
+        , sderivatives(1.0)
+        , diffusivity(PM_G2)
+
+        , dthreshold(0.001f)
+        , min_dthreshold(0.00001f)
+
+        , descriptor(cv::AKAZE::DESCRIPTOR_MLDB)
+        , descriptor_size(0)
+        , descriptor_channels(3)
+        , descriptor_pattern_size(10)
+
+        , kcontrast(0.001f)
+        , kcontrast_percentile(0.7f)
+        , kcontrast_nbins(300)
+    {
+    }
+
+    int omax;                       ///< Maximum octave evolution of the image 2^sigma (coarsest scale sigma units)
+    int nsublevels;                 ///< Default number of sublevels per scale level
+    int img_width;                  ///< Width of the input image
+    int img_height;                 ///< Height of the input image
+    float soffset;                  ///< Base scale offset (sigma units)
+    float derivative_factor;        ///< Factor for the multiscale derivatives
+    float sderivatives;             ///< Smoothing factor for the derivatives
+    DIFFUSIVITY_TYPE diffusivity;   ///< Diffusivity type
+
+    float dthreshold;               ///< Detector response threshold to accept point
+    float min_dthreshold;           ///< Minimum detector threshold to accept a point
+
+    cv::AKAZE::DESCRIPTOR_TYPE descriptor;     ///< Type of descriptor
+    int descriptor_size;            ///< Size of the descriptor in bits. 0->Full size
+    int descriptor_channels;        ///< Number of channels in the descriptor (1, 2, 3)
+    int descriptor_pattern_size;    ///< Actual patch size is 2*pattern_size*point.scale
+
+    float kcontrast;                ///< The contrast factor parameter
+    float kcontrast_percentile;     ///< Percentile level for the contrast factor
+    int kcontrast_nbins;            ///< Number of bins for the contrast factor histogram
+};
+
+/* ************************************************************************* */
+/// AKAZE nonlinear diffusion filtering evolution
+struct TEvolution {
+
+    TEvolution() {
+        etime = 0.0f;
+        esigma = 0.0f;
+        octave = 0;
+        sublevel = 0;
+        sigma_size = 0;
+    }
+
+    cv::Mat Lx, Ly;	// First order spatial derivatives
+    cv::Mat Lxx, Lxy, Lyy;	// Second order spatial derivatives
+    cv::Mat Lflow;	// Diffusivity image
+    cv::Mat Lt;	// Evolution image
+    cv::Mat Lsmooth; // Smoothed image
+    cv::Mat Lstep; // Evolution step update
+    cv::Mat Ldet; // Detector response
+    float etime;	// Evolution time
+    float esigma;	// Evolution sigma. For linear diffusion t = sigma^2 / 2
+    size_t octave;	// Image octave
+    size_t sublevel;	// Image sublevel in each octave
+    size_t sigma_size;	// Integer sigma. For computing the feature detector responses
+};
\ No newline at end of file
diff --git a/modules/features2d/src/akaze/AKAZEFeatures.cpp b/modules/features2d/src/akaze/AKAZEFeatures.cpp
new file mode 100644
index 0000000000..e5955b21c2
--- /dev/null
+++ b/modules/features2d/src/akaze/AKAZEFeatures.cpp
@@ -0,0 +1,1941 @@
+/**
+ * @file AKAZEFeatures.cpp
+ * @brief Main class for detecting and describing binary features in an
+ * accelerated nonlinear scale space
+ * @date Sep 15, 2013
+ * @author Pablo F. Alcantarilla, Jesus Nuevo
+ */
+
+#include "AKAZEFeatures.h"
+#include "../kaze/fed.h"
+#include "../kaze/nldiffusion_functions.h"
+
+using namespace std;
+using namespace cv;
+using namespace cv::details::kaze;
+
+/* ************************************************************************* */
+/**
+ * @brief AKAZEFeatures constructor with input options
+ * @param options AKAZEFeatures configuration options
+ * @note This constructor allocates memory for the nonlinear scale space
+ */
+AKAZEFeatures::AKAZEFeatures(const AKAZEOptions& options) : options_(options) {
+
+    ncycles_ = 0;
+    reordering_ = true;
+
+    if (options_.descriptor_size > 0 && options_.descriptor >= cv::AKAZE::DESCRIPTOR_MLDB_UPRIGHT)
+    {
+        generateDescriptorSubsample(descriptorSamples_, descriptorBits_, options_.descriptor_size,
+            options_.descriptor_pattern_size, options_.descriptor_channels);
+    }
+
+    Allocate_Memory_Evolution();
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method allocates the memory for the nonlinear diffusion evolution
+ */
+void AKAZEFeatures::Allocate_Memory_Evolution(void) {
+
+    float rfactor = 0.0f;
+    int level_height = 0, level_width = 0;
+
+    // Allocate the dimension of the matrices for the evolution
+    for (int i = 0; i <= options_.omax - 1; i++) {
+        rfactor = 1.0f / pow(2.f, i);
+        level_height = (int)(options_.img_height*rfactor);
+        level_width = (int)(options_.img_width*rfactor);
+
+        // Smallest possible octave and allow one scale if the image is small
+        if ((level_width < 80 || level_height < 40) && i != 0) {
+            options_.omax = i;
+            break;
+        }
+
+        for (int j = 0; j < options_.nsublevels; j++) {
+            TEvolution step;
+            step.Lx = cv::Mat::zeros(level_height, level_width, CV_32F);
+            step.Ly = cv::Mat::zeros(level_height, level_width, CV_32F);
+            step.Lxx = cv::Mat::zeros(level_height, level_width, CV_32F);
+            step.Lxy = cv::Mat::zeros(level_height, level_width, CV_32F);
+            step.Lyy = cv::Mat::zeros(level_height, level_width, CV_32F);
+            step.Lt = cv::Mat::zeros(level_height, level_width, CV_32F);
+            step.Ldet = cv::Mat::zeros(level_height, level_width, CV_32F);
+            step.Lflow = cv::Mat::zeros(level_height, level_width, CV_32F);
+            step.Lstep = cv::Mat::zeros(level_height, level_width, CV_32F);
+            step.esigma = options_.soffset*pow(2.f, (float)(j) / (float)(options_.nsublevels) + i);
+            step.sigma_size = fRound(step.esigma);
+            step.etime = 0.5f*(step.esigma*step.esigma);
+            step.octave = i;
+            step.sublevel = j;
+            evolution_.push_back(step);
+        }
+    }
+
+    // Allocate memory for the number of cycles and time steps
+    for (size_t i = 1; i < evolution_.size(); i++) {
+        int naux = 0;
+        vector<float> tau;
+        float ttime = 0.0f;
+        ttime = evolution_[i].etime - evolution_[i - 1].etime;
+        naux = fed_tau_by_process_time(ttime, 1, 0.25f, reordering_, tau);
+        nsteps_.push_back(naux);
+        tsteps_.push_back(tau);
+        ncycles_++;
+    }
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method creates the nonlinear scale space for a given image
+ * @param img Input image for which the nonlinear scale space needs to be created
+ * @return 0 if the nonlinear scale space was created successfully, -1 otherwise
+ */
+int AKAZEFeatures::Create_Nonlinear_Scale_Space(const cv::Mat& img)
+{
+    CV_Assert(evolution_.size() > 0);
+
+    // Copy the original image to the first level of the evolution
+    img.copyTo(evolution_[0].Lt);
+    gaussian_2D_convolution(evolution_[0].Lt, evolution_[0].Lt, 0, 0, options_.soffset);
+    evolution_[0].Lt.copyTo(evolution_[0].Lsmooth);
+
+    // First compute the kcontrast factor
+    options_.kcontrast = compute_k_percentile(img, options_.kcontrast_percentile, 1.0f, options_.kcontrast_nbins, 0, 0);
+
+    // Now generate the rest of evolution levels
+    for (size_t i = 1; i < evolution_.size(); i++) {
+
+        if (evolution_[i].octave > evolution_[i - 1].octave) {
+            halfsample_image(evolution_[i - 1].Lt, evolution_[i].Lt);
+            options_.kcontrast = options_.kcontrast*0.75f;
+        }
+        else {
+            evolution_[i - 1].Lt.copyTo(evolution_[i].Lt);
+        }
+
+        gaussian_2D_convolution(evolution_[i].Lt, evolution_[i].Lsmooth, 0, 0, 1.0f);
+
+        // Compute the Gaussian derivatives Lx and Ly
+        image_derivatives_scharr(evolution_[i].Lsmooth, evolution_[i].Lx, 1, 0);
+        image_derivatives_scharr(evolution_[i].Lsmooth, evolution_[i].Ly, 0, 1);
+
+        // Compute the conductivity equation
+        switch (options_.diffusivity) {
+        case AKAZEOptions::PM_G1:
+            pm_g1(evolution_[i].Lx, evolution_[i].Ly, evolution_[i].Lflow, options_.kcontrast);
+            break;
+        case AKAZEOptions::PM_G2:
+            pm_g2(evolution_[i].Lx, evolution_[i].Ly, evolution_[i].Lflow, options_.kcontrast);
+            break;
+        case AKAZEOptions::WEICKERT:
+            weickert_diffusivity(evolution_[i].Lx, evolution_[i].Ly, evolution_[i].Lflow, options_.kcontrast);
+            break;
+        case AKAZEOptions::CHARBONNIER:
+            charbonnier_diffusivity(evolution_[i].Lx, evolution_[i].Ly, evolution_[i].Lflow, options_.kcontrast);
+            break;
+        default:
+            CV_Error(options_.diffusivity, "Diffusivity is not supported");
+            break;
+        }
+
+        // Perform FED n inner steps
+        for (int j = 0; j < nsteps_[i - 1]; j++) {
+            cv::details::kaze::nld_step_scalar(evolution_[i].Lt, evolution_[i].Lflow, evolution_[i].Lstep, tsteps_[i - 1][j]);
+        }
+    }
+
+    return 0;
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method selects interesting keypoints through the nonlinear scale space
+ * @param kpts Vector of detected keypoints
+ */
+void AKAZEFeatures::Feature_Detection(std::vector<cv::KeyPoint>& kpts)
+{
+    kpts.clear();
+
+    Compute_Determinant_Hessian_Response();
+    Find_Scale_Space_Extrema(kpts);
+    Do_Subpixel_Refinement(kpts);
+}
+
+/* ************************************************************************* */
+
+class MultiscaleDerivativesInvoker : public cv::ParallelLoopBody
+{
+public:
+    explicit MultiscaleDerivativesInvoker(std::vector<TEvolution>& ev, const AKAZEOptions& opt)
+        : evolution_(&ev)
+        , options_(opt)
+    {
+    }
+
+
+    void operator()(const cv::Range& range) const
+    {
+        std::vector<TEvolution>& evolution = *evolution_;
+
+        for (int i = range.start; i < range.end; i++)
+        {
+            float ratio = pow(2.f, (float)evolution[i].octave);
+            int sigma_size_ = fRound(evolution[i].esigma * options_.derivative_factor / ratio);
+
+            compute_scharr_derivatives(evolution[i].Lsmooth, evolution[i].Lx, 1, 0, sigma_size_);
+            compute_scharr_derivatives(evolution[i].Lsmooth, evolution[i].Ly, 0, 1, sigma_size_);
+            compute_scharr_derivatives(evolution[i].Lx, evolution[i].Lxx, 1, 0, sigma_size_);
+            compute_scharr_derivatives(evolution[i].Ly, evolution[i].Lyy, 0, 1, sigma_size_);
+            compute_scharr_derivatives(evolution[i].Lx, evolution[i].Lxy, 0, 1, sigma_size_);
+
+            evolution[i].Lx = evolution[i].Lx*((sigma_size_));
+            evolution[i].Ly = evolution[i].Ly*((sigma_size_));
+            evolution[i].Lxx = evolution[i].Lxx*((sigma_size_)*(sigma_size_));
+            evolution[i].Lxy = evolution[i].Lxy*((sigma_size_)*(sigma_size_));
+            evolution[i].Lyy = evolution[i].Lyy*((sigma_size_)*(sigma_size_));
+        }
+    }
+
+private:
+    std::vector<TEvolution>*  evolution_;
+    AKAZEOptions              options_;
+};
+
+/**
+ * @brief This method computes the multiscale derivatives for the nonlinear scale space
+ */
+void AKAZEFeatures::Compute_Multiscale_Derivatives(void)
+{
+    cv::parallel_for_(cv::Range(0, (int)evolution_.size()),
+        MultiscaleDerivativesInvoker(evolution_, options_));
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method computes the feature detector response for the nonlinear scale space
+ * @note We use the Hessian determinant as the feature detector response
+ */
+void AKAZEFeatures::Compute_Determinant_Hessian_Response(void) {
+
+    // Firstly compute the multiscale derivatives
+    Compute_Multiscale_Derivatives();
+
+    for (size_t i = 0; i < evolution_.size(); i++)
+    {
+        for (int ix = 0; ix < evolution_[i].Ldet.rows; ix++)
+        {
+            for (int jx = 0; jx < evolution_[i].Ldet.cols; jx++)
+            {
+                float lxx = *(evolution_[i].Lxx.ptr<float>(ix)+jx);
+                float lxy = *(evolution_[i].Lxy.ptr<float>(ix)+jx);
+                float lyy = *(evolution_[i].Lyy.ptr<float>(ix)+jx);
+                *(evolution_[i].Ldet.ptr<float>(ix)+jx) = (lxx*lyy - lxy*lxy);
+            }
+        }
+    }
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method finds extrema in the nonlinear scale space
+ * @param kpts Vector of detected keypoints
+ */
+void AKAZEFeatures::Find_Scale_Space_Extrema(std::vector<cv::KeyPoint>& kpts)
+{
+
+    float value = 0.0;
+    float dist = 0.0, ratio = 0.0, smax = 0.0;
+    int npoints = 0, id_repeated = 0;
+    int sigma_size_ = 0, left_x = 0, right_x = 0, up_y = 0, down_y = 0;
+    bool is_extremum = false, is_repeated = false, is_out = false;
+    cv::KeyPoint point;
+    vector<cv::KeyPoint> kpts_aux;
+
+    // Set maximum size
+    if (options_.descriptor == cv::AKAZE::DESCRIPTOR_MLDB_UPRIGHT || options_.descriptor == cv::AKAZE::DESCRIPTOR_MLDB) {
+        smax = 10.0f*sqrtf(2.0f);
+    }
+    else if (options_.descriptor == cv::AKAZE::DESCRIPTOR_KAZE_UPRIGHT || options_.descriptor == cv::AKAZE::DESCRIPTOR_KAZE) {
+        smax = 12.0f*sqrtf(2.0f);
+    }
+
+    for (size_t i = 0; i < evolution_.size(); i++) {
+        for (int ix = 1; ix < evolution_[i].Ldet.rows - 1; ix++) {
+            for (int jx = 1; jx < evolution_[i].Ldet.cols - 1; jx++) {
+                is_extremum = false;
+                is_repeated = false;
+                is_out = false;
+                value = *(evolution_[i].Ldet.ptr<float>(ix)+jx);
+
+                // Filter the points with the detector threshold
+                if (value > options_.dthreshold && value >= options_.min_dthreshold &&
+                    value > *(evolution_[i].Ldet.ptr<float>(ix)+jx - 1) &&
+                    value > *(evolution_[i].Ldet.ptr<float>(ix)+jx + 1) &&
+                    value > *(evolution_[i].Ldet.ptr<float>(ix - 1) + jx - 1) &&
+                    value > *(evolution_[i].Ldet.ptr<float>(ix - 1) + jx) &&
+                    value > *(evolution_[i].Ldet.ptr<float>(ix - 1) + jx + 1) &&
+                    value > *(evolution_[i].Ldet.ptr<float>(ix + 1) + jx - 1) &&
+                    value > *(evolution_[i].Ldet.ptr<float>(ix + 1) + jx) &&
+                    value > *(evolution_[i].Ldet.ptr<float>(ix + 1) + jx + 1)) {
+
+                    is_extremum = true;
+                    point.response = fabs(value);
+                    point.size = evolution_[i].esigma*options_.derivative_factor;
+                    point.octave = (int)evolution_[i].octave;
+                    point.class_id = (int)i;
+                    ratio = pow(2.f, point.octave);
+                    sigma_size_ = fRound(point.size / ratio);
+                    point.pt.x = static_cast<float>(jx);
+                    point.pt.y = static_cast<float>(ix);
+
+                    // Compare response with the same and lower scale
+                    for (size_t ik = 0; ik < kpts_aux.size(); ik++) {
+
+                        if ((point.class_id - 1) == kpts_aux[ik].class_id ||
+                            point.class_id == kpts_aux[ik].class_id) {
+                            dist = sqrt(pow(point.pt.x*ratio - kpts_aux[ik].pt.x, 2) + pow(point.pt.y*ratio - kpts_aux[ik].pt.y, 2));
+                            if (dist <= point.size) {
+                                if (point.response > kpts_aux[ik].response) {
+                                    id_repeated = (int)ik;
+                                    is_repeated = true;
+                                }
+                                else {
+                                    is_extremum = false;
+                                }
+                                break;
+                            }
+                        }
+                    }
+
+                    // Check out of bounds
+                    if (is_extremum == true) {
+
+                        // Check that the point is under the image limits for the descriptor computation
+                        left_x = fRound(point.pt.x - smax*sigma_size_) - 1;
+                        right_x = fRound(point.pt.x + smax*sigma_size_) + 1;
+                        up_y = fRound(point.pt.y - smax*sigma_size_) - 1;
+                        down_y = fRound(point.pt.y + smax*sigma_size_) + 1;
+
+                        if (left_x < 0 || right_x >= evolution_[i].Ldet.cols ||
+                            up_y < 0 || down_y >= evolution_[i].Ldet.rows) {
+                            is_out = true;
+                        }
+
+                        if (is_out == false) {
+                            if (is_repeated == false) {
+                                point.pt.x *= ratio;
+                                point.pt.y *= ratio;
+                                kpts_aux.push_back(point);
+                                npoints++;
+                            }
+                            else {
+                                point.pt.x *= ratio;
+                                point.pt.y *= ratio;
+                                kpts_aux[id_repeated] = point;
+                            }
+                        } // if is_out
+                    } //if is_extremum
+                }
+            } // for jx
+        } // for ix
+    } // for i
+
+    // Now filter points with the upper scale level
+    for (size_t i = 0; i < kpts_aux.size(); i++) {
+
+        is_repeated = false;
+        const cv::KeyPoint& pt = kpts_aux[i];
+        for (size_t j = i + 1; j < kpts_aux.size(); j++) {
+
+            // Compare response with the upper scale
+            if ((pt.class_id + 1) == kpts_aux[j].class_id) {
+                dist = sqrt(pow(pt.pt.x - kpts_aux[j].pt.x, 2) + pow(pt.pt.y - kpts_aux[j].pt.y, 2));
+                if (dist <= pt.size) {
+                    if (pt.response < kpts_aux[j].response) {
+                        is_repeated = true;
+                        break;
+                    }
+                }
+            }
+        }
+
+        if (is_repeated == false)
+            kpts.push_back(pt);
+    }
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method performs subpixel refinement of the detected keypoints
+ * @param kpts Vector of detected keypoints
+ */
+void AKAZEFeatures::Do_Subpixel_Refinement(std::vector<cv::KeyPoint>& kpts)
+{
+    float Dx = 0.0, Dy = 0.0, ratio = 0.0;
+    float Dxx = 0.0, Dyy = 0.0, Dxy = 0.0;
+    int x = 0, y = 0;
+    cv::Mat A = cv::Mat::zeros(2, 2, CV_32F);
+    cv::Mat b = cv::Mat::zeros(2, 1, CV_32F);
+    cv::Mat dst = cv::Mat::zeros(2, 1, CV_32F);
+
+    for (size_t i = 0; i < kpts.size(); i++) {
+        ratio = pow(2.f, kpts[i].octave);
+        x = fRound(kpts[i].pt.x / ratio);
+        y = fRound(kpts[i].pt.y / ratio);
+
+        // Compute the gradient
+        Dx = (0.5f)*(*(evolution_[kpts[i].class_id].Ldet.ptr<float>(y)+x + 1)
+            - *(evolution_[kpts[i].class_id].Ldet.ptr<float>(y)+x - 1));
+        Dy = (0.5f)*(*(evolution_[kpts[i].class_id].Ldet.ptr<float>(y + 1) + x)
+            - *(evolution_[kpts[i].class_id].Ldet.ptr<float>(y - 1) + x));
+
+        // Compute the Hessian
+        Dxx = (*(evolution_[kpts[i].class_id].Ldet.ptr<float>(y)+x + 1)
+            + *(evolution_[kpts[i].class_id].Ldet.ptr<float>(y)+x - 1)
+            - 2.0f*(*(evolution_[kpts[i].class_id].Ldet.ptr<float>(y)+x)));
+
+        Dyy = (*(evolution_[kpts[i].class_id].Ldet.ptr<float>(y + 1) + x)
+            + *(evolution_[kpts[i].class_id].Ldet.ptr<float>(y - 1) + x)
+            - 2.0f*(*(evolution_[kpts[i].class_id].Ldet.ptr<float>(y)+x)));
+
+        Dxy = (0.25f)*(*(evolution_[kpts[i].class_id].Ldet.ptr<float>(y + 1) + x + 1)
+            + (*(evolution_[kpts[i].class_id].Ldet.ptr<float>(y - 1) + x - 1)))
+            - (0.25f)*(*(evolution_[kpts[i].class_id].Ldet.ptr<float>(y - 1) + x + 1)
+            + (*(evolution_[kpts[i].class_id].Ldet.ptr<float>(y + 1) + x - 1)));
+
+        // Solve the linear system
+        *(A.ptr<float>(0)) = Dxx;
+        *(A.ptr<float>(1) + 1) = Dyy;
+        *(A.ptr<float>(0) + 1) = *(A.ptr<float>(1)) = Dxy;
+        *(b.ptr<float>(0)) = -Dx;
+        *(b.ptr<float>(1)) = -Dy;
+
+        cv::solve(A, b, dst, DECOMP_LU);
+
+        if (fabs(*(dst.ptr<float>(0))) <= 1.0f && fabs(*(dst.ptr<float>(1))) <= 1.0f) {
+            kpts[i].pt.x = x + (*(dst.ptr<float>(0)));
+            kpts[i].pt.y = y + (*(dst.ptr<float>(1)));
+            kpts[i].pt.x *= powf(2.f, (float)evolution_[kpts[i].class_id].octave);
+            kpts[i].pt.y *= powf(2.f, (float)evolution_[kpts[i].class_id].octave);
+            kpts[i].angle = 0.0;
+
+            // In OpenCV the size of a keypoint its the diameter
+            kpts[i].size *= 2.0f;
+        }
+        // Delete the point since its not stable
+        else {
+            kpts.erase(kpts.begin() + i);
+            i--;
+        }
+    }
+}
+
+/* ************************************************************************* */
+
+class SURF_Descriptor_Upright_64_Invoker : public cv::ParallelLoopBody
+{
+public:
+    SURF_Descriptor_Upright_64_Invoker(std::vector<cv::KeyPoint>& kpts, cv::Mat& desc, std::vector<TEvolution>& evolution)
+        : keypoints_(&kpts)
+        , descriptors_(&desc)
+        , evolution_(&evolution)
+    {
+    }
+
+    void operator() (const Range& range) const
+    {
+        for (int i = range.start; i < range.end; i++)
+        {
+            Get_SURF_Descriptor_Upright_64((*keypoints_)[i], descriptors_->ptr<float>(i));
+        }
+    }
+
+    void Get_SURF_Descriptor_Upright_64(const cv::KeyPoint& kpt, float* desc) const;
+
+private:
+    std::vector<cv::KeyPoint>* keypoints_;
+    cv::Mat*                   descriptors_;
+    std::vector<TEvolution>*   evolution_;
+};
+
+class SURF_Descriptor_64_Invoker : public cv::ParallelLoopBody
+{
+public:
+    SURF_Descriptor_64_Invoker(std::vector<cv::KeyPoint>& kpts, cv::Mat& desc, std::vector<TEvolution>& evolution)
+        : keypoints_(&kpts)
+        , descriptors_(&desc)
+        , evolution_(&evolution)
+    {
+    }
+
+    void operator()(const Range& range) const
+    {
+        for (int i = range.start; i < range.end; i++)
+        {
+            AKAZEFeatures::Compute_Main_Orientation((*keypoints_)[i], *evolution_);
+            Get_SURF_Descriptor_64((*keypoints_)[i], descriptors_->ptr<float>(i));
+        }
+    }
+
+    void Get_SURF_Descriptor_64(const cv::KeyPoint& kpt, float* desc) const;
+
+private:
+    std::vector<cv::KeyPoint>* keypoints_;
+    cv::Mat*                   descriptors_;
+    std::vector<TEvolution>*   evolution_;
+};
+
+class MSURF_Upright_Descriptor_64_Invoker : public cv::ParallelLoopBody
+{
+public:
+    MSURF_Upright_Descriptor_64_Invoker(std::vector<cv::KeyPoint>& kpts, cv::Mat& desc, std::vector<TEvolution>& evolution)
+        : keypoints_(&kpts)
+        , descriptors_(&desc)
+        , evolution_(&evolution)
+    {
+    }
+
+    void operator()(const Range& range) const
+    {
+        for (int i = range.start; i < range.end; i++)
+        {
+            Get_MSURF_Upright_Descriptor_64((*keypoints_)[i], descriptors_->ptr<float>(i));
+        }
+    }
+
+    void Get_MSURF_Upright_Descriptor_64(const cv::KeyPoint& kpt, float* desc) const;
+
+private:
+    std::vector<cv::KeyPoint>* keypoints_;
+    cv::Mat*                   descriptors_;
+    std::vector<TEvolution>*   evolution_;
+};
+
+class MSURF_Descriptor_64_Invoker : public cv::ParallelLoopBody
+{
+public:
+    MSURF_Descriptor_64_Invoker(std::vector<cv::KeyPoint>& kpts, cv::Mat& desc, std::vector<TEvolution>& evolution)
+        : keypoints_(&kpts)
+        , descriptors_(&desc)
+        , evolution_(&evolution)
+    {
+    }
+
+    void operator() (const Range& range) const
+    {
+        for (int i = range.start; i < range.end; i++)
+        {
+            AKAZEFeatures::Compute_Main_Orientation((*keypoints_)[i], *evolution_);
+            Get_MSURF_Descriptor_64((*keypoints_)[i], descriptors_->ptr<float>(i));
+        }
+    }
+
+    void Get_MSURF_Descriptor_64(const cv::KeyPoint& kpt, float* desc) const;
+
+private:
+    std::vector<cv::KeyPoint>* keypoints_;
+    cv::Mat*                   descriptors_;
+    std::vector<TEvolution>*   evolution_;
+};
+
+class Upright_MLDB_Full_Descriptor_Invoker : public cv::ParallelLoopBody
+{
+public:
+    Upright_MLDB_Full_Descriptor_Invoker(std::vector<cv::KeyPoint>& kpts, cv::Mat& desc, std::vector<TEvolution>& evolution, AKAZEOptions& options)
+        : keypoints_(&kpts)
+        , descriptors_(&desc)
+        , evolution_(&evolution)
+        , options_(&options)
+    {
+    }
+
+    void operator() (const Range& range) const
+    {
+        for (int i = range.start; i < range.end; i++)
+        {
+            Get_Upright_MLDB_Full_Descriptor((*keypoints_)[i], descriptors_->ptr<unsigned char>(i));
+        }
+    }
+
+    void Get_Upright_MLDB_Full_Descriptor(const cv::KeyPoint& kpt, unsigned char* desc) const;
+
+private:
+    std::vector<cv::KeyPoint>* keypoints_;
+    cv::Mat*                   descriptors_;
+    std::vector<TEvolution>*   evolution_;
+    AKAZEOptions*              options_;
+};
+
+class Upright_MLDB_Descriptor_Subset_Invoker : public cv::ParallelLoopBody
+{
+public:
+    Upright_MLDB_Descriptor_Subset_Invoker(std::vector<cv::KeyPoint>& kpts,
+        cv::Mat& desc,
+        std::vector<TEvolution>& evolution,
+        AKAZEOptions& options,
+        cv::Mat descriptorSamples,
+        cv::Mat descriptorBits)
+        : keypoints_(&kpts)
+        , descriptors_(&desc)
+        , evolution_(&evolution)
+        , options_(&options)
+        , descriptorSamples_(descriptorSamples)
+        , descriptorBits_(descriptorBits)
+    {
+    }
+
+    void operator() (const Range& range) const
+    {
+        for (int i = range.start; i < range.end; i++)
+        {
+            Get_Upright_MLDB_Descriptor_Subset((*keypoints_)[i], descriptors_->ptr<unsigned char>(i));
+        }
+    }
+
+    void Get_Upright_MLDB_Descriptor_Subset(const cv::KeyPoint& kpt, unsigned char* desc) const;
+
+private:
+    std::vector<cv::KeyPoint>* keypoints_;
+    cv::Mat*                   descriptors_;
+    std::vector<TEvolution>*   evolution_;
+    AKAZEOptions*              options_;
+
+    cv::Mat descriptorSamples_;  // List of positions in the grids to sample LDB bits from.
+    cv::Mat descriptorBits_;
+};
+
+class MLDB_Full_Descriptor_Invoker : public cv::ParallelLoopBody
+{
+public:
+    MLDB_Full_Descriptor_Invoker(std::vector<cv::KeyPoint>& kpts, cv::Mat& desc, std::vector<TEvolution>& evolution, AKAZEOptions& options)
+        : keypoints_(&kpts)
+        , descriptors_(&desc)
+        , evolution_(&evolution)
+        , options_(&options)
+    {
+    }
+
+    void operator() (const Range& range) const
+    {
+        for (int i = range.start; i < range.end; i++)
+        {
+            AKAZEFeatures::Compute_Main_Orientation((*keypoints_)[i], *evolution_);
+            Get_MLDB_Full_Descriptor((*keypoints_)[i], descriptors_->ptr<unsigned char>(i));
+        }
+    }
+
+    void Get_MLDB_Full_Descriptor(const cv::KeyPoint& kpt, unsigned char* desc) const;
+
+private:
+    std::vector<cv::KeyPoint>* keypoints_;
+    cv::Mat*                   descriptors_;
+    std::vector<TEvolution>*   evolution_;
+    AKAZEOptions*              options_;
+};
+
+class MLDB_Descriptor_Subset_Invoker : public cv::ParallelLoopBody
+{
+public:
+    MLDB_Descriptor_Subset_Invoker(std::vector<cv::KeyPoint>& kpts,
+        cv::Mat& desc,
+        std::vector<TEvolution>& evolution,
+        AKAZEOptions& options,
+        cv::Mat descriptorSamples,
+        cv::Mat descriptorBits)
+        : keypoints_(&kpts)
+        , descriptors_(&desc)
+        , evolution_(&evolution)
+        , options_(&options)
+        , descriptorSamples_(descriptorSamples)
+        , descriptorBits_(descriptorBits)
+    {
+    }
+
+    void operator() (const Range& range) const
+    {
+        for (int i = range.start; i < range.end; i++)
+        {
+            AKAZEFeatures::Compute_Main_Orientation((*keypoints_)[i], *evolution_);
+            Get_MLDB_Descriptor_Subset((*keypoints_)[i], descriptors_->ptr<unsigned char>(i));
+        }
+    }
+
+    void Get_MLDB_Descriptor_Subset(const cv::KeyPoint& kpt, unsigned char* desc) const;
+
+private:
+    std::vector<cv::KeyPoint>* keypoints_;
+    cv::Mat*                   descriptors_;
+    std::vector<TEvolution>*   evolution_;
+    AKAZEOptions*              options_;
+
+    cv::Mat descriptorSamples_;  // List of positions in the grids to sample LDB bits from.
+    cv::Mat descriptorBits_;
+};
+
+/**
+ * @brief This method  computes the set of descriptors through the nonlinear scale space
+ * @param kpts Vector of detected keypoints
+ * @param desc Matrix to store the descriptors
+ */
+void AKAZEFeatures::Compute_Descriptors(std::vector<cv::KeyPoint>& kpts, cv::Mat& desc)
+{
+    // Allocate memory for the matrix with the descriptors
+    if (options_.descriptor < cv::AKAZE::DESCRIPTOR_MLDB_UPRIGHT) {
+        desc = cv::Mat::zeros((int)kpts.size(), 64, CV_32FC1);
+    }
+    else {
+        // We use the full length binary descriptor -> 486 bits
+        if (options_.descriptor_size == 0) {
+            int t = (6 + 36 + 120)*options_.descriptor_channels;
+            desc = cv::Mat::zeros((int)kpts.size(), (int)ceil(t / 8.), CV_8UC1);
+        }
+        else {
+            // We use the random bit selection length binary descriptor
+            desc = cv::Mat::zeros((int)kpts.size(), (int)ceil(options_.descriptor_size / 8.), CV_8UC1);
+        }
+    }
+
+    switch (options_.descriptor)
+    {
+    case cv::AKAZE::DESCRIPTOR_KAZE_UPRIGHT: // Upright descriptors, not invariant to rotation
+    {
+        cv::parallel_for_(cv::Range(0, (int)kpts.size()), MSURF_Upright_Descriptor_64_Invoker(kpts, desc, evolution_));
+    }
+        break;
+    case cv::AKAZE::DESCRIPTOR_KAZE:
+    {
+        cv::parallel_for_(cv::Range(0, (int)kpts.size()), MSURF_Descriptor_64_Invoker(kpts, desc, evolution_));
+    }
+        break;
+    case cv::AKAZE::DESCRIPTOR_MLDB_UPRIGHT: // Upright descriptors, not invariant to rotation
+    {
+        if (options_.descriptor_size == 0)
+            cv::parallel_for_(cv::Range(0, (int)kpts.size()), Upright_MLDB_Full_Descriptor_Invoker(kpts, desc, evolution_, options_));
+        else
+            cv::parallel_for_(cv::Range(0, (int)kpts.size()), Upright_MLDB_Descriptor_Subset_Invoker(kpts, desc, evolution_, options_, descriptorSamples_, descriptorBits_));
+    }
+        break;
+    case cv::AKAZE::DESCRIPTOR_MLDB:
+    {
+        if (options_.descriptor_size == 0)
+            cv::parallel_for_(cv::Range(0, (int)kpts.size()), MLDB_Full_Descriptor_Invoker(kpts, desc, evolution_, options_));
+        else
+            cv::parallel_for_(cv::Range(0, (int)kpts.size()), MLDB_Descriptor_Subset_Invoker(kpts, desc, evolution_, options_, descriptorSamples_, descriptorBits_));
+    }
+        break;
+    }
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method computes the main orientation for a given keypoint
+ * @param kpt Input keypoint
+ * @note The orientation is computed using a similar approach as described in the
+ * original SURF method. See Bay et al., Speeded Up Robust Features, ECCV 2006
+ */
+void AKAZEFeatures::Compute_Main_Orientation(cv::KeyPoint& kpt, const std::vector<TEvolution>& evolution_) {
+
+    int ix = 0, iy = 0, idx = 0, s = 0, level = 0;
+    float xf = 0.0, yf = 0.0, gweight = 0.0, ratio = 0.0;
+    std::vector<float> resX(109), resY(109), Ang(109);
+    const int id[] = { 6, 5, 4, 3, 2, 1, 0, 1, 2, 3, 4, 5, 6 };
+
+    // Variables for computing the dominant direction
+    float sumX = 0.0, sumY = 0.0, max = 0.0, ang1 = 0.0, ang2 = 0.0;
+
+    // Get the information from the keypoint
+    level = kpt.class_id;
+    ratio = (float)(1 << evolution_[level].octave);
+    s = fRound(0.5f*kpt.size / ratio);
+    xf = kpt.pt.x / ratio;
+    yf = kpt.pt.y / ratio;
+
+    // Calculate derivatives responses for points within radius of 6*scale
+    for (int i = -6; i <= 6; ++i) {
+        for (int j = -6; j <= 6; ++j) {
+            if (i*i + j*j < 36) {
+                iy = fRound(yf + j*s);
+                ix = fRound(xf + i*s);
+
+                gweight = gauss25[id[i + 6]][id[j + 6]];
+                resX[idx] = gweight*(*(evolution_[level].Lx.ptr<float>(iy)+ix));
+                resY[idx] = gweight*(*(evolution_[level].Ly.ptr<float>(iy)+ix));
+
+                Ang[idx] = get_angle(resX[idx], resY[idx]);
+                ++idx;
+            }
+        }
+    }
+
+    // Loop slides pi/3 window around feature point
+    for (ang1 = 0; ang1 < (float)(2.0 * CV_PI); ang1 += 0.15f) {
+        ang2 = (ang1 + (float)(CV_PI / 3.0) >(float)(2.0*CV_PI) ? ang1 - (float)(5.0*CV_PI / 3.0) : ang1 + (float)(CV_PI / 3.0));
+        sumX = sumY = 0.f;
+
+        for (size_t k = 0; k < Ang.size(); ++k) {
+            // Get angle from the x-axis of the sample point
+            const float & ang = Ang[k];
+
+            // Determine whether the point is within the window
+            if (ang1 < ang2 && ang1 < ang && ang < ang2) {
+                sumX += resX[k];
+                sumY += resY[k];
+            }
+            else if (ang2 < ang1 &&
+                ((ang > 0 && ang < ang2) || (ang > ang1 && ang < 2.0f*CV_PI))) {
+                sumX += resX[k];
+                sumY += resY[k];
+            }
+        }
+
+        // if the vector produced from this window is longer than all
+        // previous vectors then this forms the new dominant direction
+        if (sumX*sumX + sumY*sumY > max) {
+            // store largest orientation
+            max = sumX*sumX + sumY*sumY;
+            kpt.angle = get_angle(sumX, sumY);
+        }
+    }
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method computes the upright descriptor (not rotation invariant) of
+ * the provided keypoint
+ * @param kpt Input keypoint
+ * @param desc Descriptor vector
+ * @note Rectangular grid of 24 s x 24 s. Descriptor Length 64. The descriptor is inspired
+ * from Agrawal et al., CenSurE: Center Surround Extremas for Realtime Feature Detection and Matching,
+ * ECCV 2008
+ */
+void MSURF_Upright_Descriptor_64_Invoker::Get_MSURF_Upright_Descriptor_64(const cv::KeyPoint& kpt, float *desc) const {
+
+    float dx = 0.0, dy = 0.0, mdx = 0.0, mdy = 0.0, gauss_s1 = 0.0, gauss_s2 = 0.0;
+    float rx = 0.0, ry = 0.0, len = 0.0, xf = 0.0, yf = 0.0, ys = 0.0, xs = 0.0;
+    float sample_x = 0.0, sample_y = 0.0;
+    int x1 = 0, y1 = 0, sample_step = 0, pattern_size = 0;
+    int x2 = 0, y2 = 0, kx = 0, ky = 0, i = 0, j = 0, dcount = 0;
+    float fx = 0.0, fy = 0.0, ratio = 0.0, res1 = 0.0, res2 = 0.0, res3 = 0.0, res4 = 0.0;
+    int scale = 0, dsize = 0, level = 0;
+
+    // Subregion centers for the 4x4 gaussian weighting
+    float cx = -0.5f, cy = 0.5f;
+
+    const std::vector<TEvolution>& evolution = *evolution_;
+
+    // Set the descriptor size and the sample and pattern sizes
+    dsize = 64;
+    sample_step = 5;
+    pattern_size = 12;
+
+    // Get the information from the keypoint
+    ratio = (float)(1 << kpt.octave);
+    scale = fRound(0.5f*kpt.size / ratio);
+    level = kpt.class_id;
+    yf = kpt.pt.y / ratio;
+    xf = kpt.pt.x / ratio;
+
+    i = -8;
+
+    // Calculate descriptor for this interest point
+    // Area of size 24 s x 24 s
+    while (i < pattern_size) {
+        j = -8;
+        i = i - 4;
+
+        cx += 1.0f;
+        cy = -0.5f;
+
+        while (j < pattern_size) {
+            dx = dy = mdx = mdy = 0.0;
+            cy += 1.0f;
+            j = j - 4;
+
+            ky = i + sample_step;
+            kx = j + sample_step;
+
+            ys = yf + (ky*scale);
+            xs = xf + (kx*scale);
+
+            for (int k = i; k < i + 9; k++) {
+                for (int l = j; l < j + 9; l++) {
+                    sample_y = k*scale + yf;
+                    sample_x = l*scale + xf;
+
+                    //Get the gaussian weighted x and y responses
+                    gauss_s1 = gaussian(xs - sample_x, ys - sample_y, 2.50f*scale);
+
+                    y1 = (int)(sample_y - .5);
+                    x1 = (int)(sample_x - .5);
+
+                    y2 = (int)(sample_y + .5);
+                    x2 = (int)(sample_x + .5);
+
+                    fx = sample_x - x1;
+                    fy = sample_y - y1;
+
+                    res1 = *(evolution[level].Lx.ptr<float>(y1)+x1);
+                    res2 = *(evolution[level].Lx.ptr<float>(y1)+x2);
+                    res3 = *(evolution[level].Lx.ptr<float>(y2)+x1);
+                    res4 = *(evolution[level].Lx.ptr<float>(y2)+x2);
+                    rx = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4;
+
+                    res1 = *(evolution[level].Ly.ptr<float>(y1)+x1);
+                    res2 = *(evolution[level].Ly.ptr<float>(y1)+x2);
+                    res3 = *(evolution[level].Ly.ptr<float>(y2)+x1);
+                    res4 = *(evolution[level].Ly.ptr<float>(y2)+x2);
+                    ry = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4;
+
+                    rx = gauss_s1*rx;
+                    ry = gauss_s1*ry;
+
+                    // Sum the derivatives to the cumulative descriptor
+                    dx += rx;
+                    dy += ry;
+                    mdx += fabs(rx);
+                    mdy += fabs(ry);
+                }
+            }
+
+            // Add the values to the descriptor vector
+            gauss_s2 = gaussian(cx - 2.0f, cy - 2.0f, 1.5f);
+
+            desc[dcount++] = dx*gauss_s2;
+            desc[dcount++] = dy*gauss_s2;
+            desc[dcount++] = mdx*gauss_s2;
+            desc[dcount++] = mdy*gauss_s2;
+
+            len += (dx*dx + dy*dy + mdx*mdx + mdy*mdy)*gauss_s2*gauss_s2;
+
+            j += 9;
+        }
+
+        i += 9;
+    }
+
+    // convert to unit vector
+    len = sqrt(len);
+
+    for (i = 0; i < dsize; i++) {
+        desc[i] /= len;
+    }
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method computes the descriptor of the provided keypoint given the
+ * main orientation of the keypoint
+ * @param kpt Input keypoint
+ * @param desc Descriptor vector
+ * @note Rectangular grid of 24 s x 24 s. Descriptor Length 64. The descriptor is inspired
+ * from Agrawal et al., CenSurE: Center Surround Extremas for Realtime Feature Detection and Matching,
+ * ECCV 2008
+ */
+void MSURF_Descriptor_64_Invoker::Get_MSURF_Descriptor_64(const cv::KeyPoint& kpt, float *desc) const {
+
+    float dx = 0.0, dy = 0.0, mdx = 0.0, mdy = 0.0, gauss_s1 = 0.0, gauss_s2 = 0.0;
+    float rx = 0.0, ry = 0.0, rrx = 0.0, rry = 0.0, len = 0.0, xf = 0.0, yf = 0.0, ys = 0.0, xs = 0.0;
+    float sample_x = 0.0, sample_y = 0.0, co = 0.0, si = 0.0, angle = 0.0;
+    float fx = 0.0, fy = 0.0, ratio = 0.0, res1 = 0.0, res2 = 0.0, res3 = 0.0, res4 = 0.0;
+    int x1 = 0, y1 = 0, x2 = 0, y2 = 0, sample_step = 0, pattern_size = 0;
+    int kx = 0, ky = 0, i = 0, j = 0, dcount = 0;
+    int scale = 0, dsize = 0, level = 0;
+
+    // Subregion centers for the 4x4 gaussian weighting
+    float cx = -0.5f, cy = 0.5f;
+
+    const std::vector<TEvolution>& evolution = *evolution_;
+
+    // Set the descriptor size and the sample and pattern sizes
+    dsize = 64;
+    sample_step = 5;
+    pattern_size = 12;
+
+    // Get the information from the keypoint
+    ratio = (float)(1 << kpt.octave);
+    scale = fRound(0.5f*kpt.size / ratio);
+    angle = kpt.angle;
+    level = kpt.class_id;
+    yf = kpt.pt.y / ratio;
+    xf = kpt.pt.x / ratio;
+    co = cos(angle);
+    si = sin(angle);
+
+    i = -8;
+
+    // Calculate descriptor for this interest point
+    // Area of size 24 s x 24 s
+    while (i < pattern_size) {
+        j = -8;
+        i = i - 4;
+
+        cx += 1.0f;
+        cy = -0.5f;
+
+        while (j < pattern_size) {
+            dx = dy = mdx = mdy = 0.0;
+            cy += 1.0f;
+            j = j - 4;
+
+            ky = i + sample_step;
+            kx = j + sample_step;
+
+            xs = xf + (-kx*scale*si + ky*scale*co);
+            ys = yf + (kx*scale*co + ky*scale*si);
+
+            for (int k = i; k < i + 9; ++k) {
+                for (int l = j; l < j + 9; ++l) {
+                    // Get coords of sample point on the rotated axis
+                    sample_y = yf + (l*scale*co + k*scale*si);
+                    sample_x = xf + (-l*scale*si + k*scale*co);
+
+                    // Get the gaussian weighted x and y responses
+                    gauss_s1 = gaussian(xs - sample_x, ys - sample_y, 2.5f*scale);
+
+                    y1 = fRound(sample_y - 0.5f);
+                    x1 = fRound(sample_x - 0.5f);
+
+                    y2 = fRound(sample_y + 0.5f);
+                    x2 = fRound(sample_x + 0.5f);
+
+                    fx = sample_x - x1;
+                    fy = sample_y - y1;
+
+                    res1 = *(evolution[level].Lx.ptr<float>(y1)+x1);
+                    res2 = *(evolution[level].Lx.ptr<float>(y1)+x2);
+                    res3 = *(evolution[level].Lx.ptr<float>(y2)+x1);
+                    res4 = *(evolution[level].Lx.ptr<float>(y2)+x2);
+                    rx = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4;
+
+                    res1 = *(evolution[level].Ly.ptr<float>(y1)+x1);
+                    res2 = *(evolution[level].Ly.ptr<float>(y1)+x2);
+                    res3 = *(evolution[level].Ly.ptr<float>(y2)+x1);
+                    res4 = *(evolution[level].Ly.ptr<float>(y2)+x2);
+                    ry = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4;
+
+                    // Get the x and y derivatives on the rotated axis
+                    rry = gauss_s1*(rx*co + ry*si);
+                    rrx = gauss_s1*(-rx*si + ry*co);
+
+                    // Sum the derivatives to the cumulative descriptor
+                    dx += rrx;
+                    dy += rry;
+                    mdx += fabs(rrx);
+                    mdy += fabs(rry);
+                }
+            }
+
+            // Add the values to the descriptor vector
+            gauss_s2 = gaussian(cx - 2.0f, cy - 2.0f, 1.5f);
+            desc[dcount++] = dx*gauss_s2;
+            desc[dcount++] = dy*gauss_s2;
+            desc[dcount++] = mdx*gauss_s2;
+            desc[dcount++] = mdy*gauss_s2;
+
+            len += (dx*dx + dy*dy + mdx*mdx + mdy*mdy)*gauss_s2*gauss_s2;
+
+            j += 9;
+        }
+
+        i += 9;
+    }
+
+    // convert to unit vector
+    len = sqrt(len);
+
+    for (i = 0; i < dsize; i++) {
+        desc[i] /= len;
+    }
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method computes the rupright descriptor (not rotation invariant) of
+ * the provided keypoint
+ * @param kpt Input keypoint
+ * @param desc Descriptor vector
+ */
+void Upright_MLDB_Full_Descriptor_Invoker::Get_Upright_MLDB_Full_Descriptor(const cv::KeyPoint& kpt, unsigned char *desc) const {
+
+    float di = 0.0, dx = 0.0, dy = 0.0;
+    float ri = 0.0, rx = 0.0, ry = 0.0, xf = 0.0, yf = 0.0;
+    float sample_x = 0.0, sample_y = 0.0, ratio = 0.0;
+    int x1 = 0, y1 = 0, sample_step = 0, pattern_size = 0;
+    int level = 0, nsamples = 0, scale = 0;
+    int dcount1 = 0, dcount2 = 0;
+
+    const AKAZEOptions & options = *options_;
+    const std::vector<TEvolution>& evolution = *evolution_;
+
+    // Matrices for the M-LDB descriptor
+    cv::Mat values_1 = cv::Mat::zeros(4, options.descriptor_channels, CV_32FC1);
+    cv::Mat values_2 = cv::Mat::zeros(9, options.descriptor_channels, CV_32FC1);
+    cv::Mat values_3 = cv::Mat::zeros(16, options.descriptor_channels, CV_32FC1);
+
+    // Get the information from the keypoint
+    ratio = (float)(1 << kpt.octave);
+    scale = fRound(0.5f*kpt.size / ratio);
+    level = kpt.class_id;
+    yf = kpt.pt.y / ratio;
+    xf = kpt.pt.x / ratio;
+
+    // First 2x2 grid
+    pattern_size = options_->descriptor_pattern_size;
+    sample_step = pattern_size;
+
+    for (int i = -pattern_size; i < pattern_size; i += sample_step) {
+        for (int j = -pattern_size; j < pattern_size; j += sample_step) {
+            di = dx = dy = 0.0;
+            nsamples = 0;
+
+            for (int k = i; k < i + sample_step; k++) {
+                for (int l = j; l < j + sample_step; l++) {
+
+                    // Get the coordinates of the sample point
+                    sample_y = yf + l*scale;
+                    sample_x = xf + k*scale;
+
+                    y1 = fRound(sample_y);
+                    x1 = fRound(sample_x);
+
+                    ri = *(evolution[level].Lt.ptr<float>(y1)+x1);
+                    rx = *(evolution[level].Lx.ptr<float>(y1)+x1);
+                    ry = *(evolution[level].Ly.ptr<float>(y1)+x1);
+
+                    di += ri;
+                    dx += rx;
+                    dy += ry;
+                    nsamples++;
+                }
+            }
+
+            di /= nsamples;
+            dx /= nsamples;
+            dy /= nsamples;
+
+            *(values_1.ptr<float>(dcount2)) = di;
+            *(values_1.ptr<float>(dcount2)+1) = dx;
+            *(values_1.ptr<float>(dcount2)+2) = dy;
+            dcount2++;
+        }
+    }
+
+    // Do binary comparison first level
+    for (int i = 0; i < 4; i++) {
+        for (int j = i + 1; j < 4; j++) {
+            if (*(values_1.ptr<float>(i)) > *(values_1.ptr<float>(j))) {
+                desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+            }
+            dcount1++;
+
+            if (*(values_1.ptr<float>(i)+1) > *(values_1.ptr<float>(j)+1)) {
+                desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+            }
+            dcount1++;
+
+            if (*(values_1.ptr<float>(i)+2) > *(values_1.ptr<float>(j)+2)) {
+                desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+            }
+            dcount1++;
+        }
+    }
+
+    // Second 3x3 grid
+    sample_step = static_cast<int>(ceil(pattern_size*2. / 3.));
+    dcount2 = 0;
+
+    for (int i = -pattern_size; i < pattern_size; i += sample_step) {
+        for (int j = -pattern_size; j < pattern_size; j += sample_step) {
+            di = dx = dy = 0.0;
+            nsamples = 0;
+
+            for (int k = i; k < i + sample_step; k++) {
+                for (int l = j; l < j + sample_step; l++) {
+
+                    // Get the coordinates of the sample point
+                    sample_y = yf + l*scale;
+                    sample_x = xf + k*scale;
+
+                    y1 = fRound(sample_y);
+                    x1 = fRound(sample_x);
+
+                    ri = *(evolution[level].Lt.ptr<float>(y1)+x1);
+                    rx = *(evolution[level].Lx.ptr<float>(y1)+x1);
+                    ry = *(evolution[level].Ly.ptr<float>(y1)+x1);
+
+                    di += ri;
+                    dx += rx;
+                    dy += ry;
+                    nsamples++;
+                }
+            }
+
+            di /= nsamples;
+            dx /= nsamples;
+            dy /= nsamples;
+
+            *(values_2.ptr<float>(dcount2)) = di;
+            *(values_2.ptr<float>(dcount2)+1) = dx;
+            *(values_2.ptr<float>(dcount2)+2) = dy;
+            dcount2++;
+        }
+    }
+
+    //Do binary comparison second level
+    dcount2 = 0;
+    for (int i = 0; i < 9; i++) {
+        for (int j = i + 1; j < 9; j++) {
+            if (*(values_2.ptr<float>(i)) > *(values_2.ptr<float>(j))) {
+                desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+            }
+            dcount1++;
+
+            if (*(values_2.ptr<float>(i)+1) > *(values_2.ptr<float>(j)+1)) {
+                desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+            }
+            dcount1++;
+
+            if (*(values_2.ptr<float>(i)+2) > *(values_2.ptr<float>(j)+2)) {
+                desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+            }
+            dcount1++;
+        }
+    }
+
+    // Third 4x4 grid
+    sample_step = pattern_size / 2;
+    dcount2 = 0;
+
+    for (int i = -pattern_size; i < pattern_size; i += sample_step) {
+        for (int j = -pattern_size; j < pattern_size; j += sample_step) {
+            di = dx = dy = 0.0;
+            nsamples = 0;
+
+            for (int k = i; k < i + sample_step; k++) {
+                for (int l = j; l < j + sample_step; l++) {
+
+                    // Get the coordinates of the sample point
+                    sample_y = yf + l*scale;
+                    sample_x = xf + k*scale;
+
+                    y1 = fRound(sample_y);
+                    x1 = fRound(sample_x);
+
+                    ri = *(evolution[level].Lt.ptr<float>(y1)+x1);
+                    rx = *(evolution[level].Lx.ptr<float>(y1)+x1);
+                    ry = *(evolution[level].Ly.ptr<float>(y1)+x1);
+
+                    di += ri;
+                    dx += rx;
+                    dy += ry;
+                    nsamples++;
+                }
+            }
+
+            di /= nsamples;
+            dx /= nsamples;
+            dy /= nsamples;
+
+            *(values_3.ptr<float>(dcount2)) = di;
+            *(values_3.ptr<float>(dcount2)+1) = dx;
+            *(values_3.ptr<float>(dcount2)+2) = dy;
+            dcount2++;
+        }
+    }
+
+    //Do binary comparison third level
+    dcount2 = 0;
+    for (int i = 0; i < 16; i++) {
+        for (int j = i + 1; j < 16; j++) {
+            if (*(values_3.ptr<float>(i)) > *(values_3.ptr<float>(j))) {
+                desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+            }
+            dcount1++;
+
+            if (*(values_3.ptr<float>(i)+1) > *(values_3.ptr<float>(j)+1)) {
+                desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+            }
+            dcount1++;
+
+            if (*(values_3.ptr<float>(i)+2) > *(values_3.ptr<float>(j)+2)) {
+                desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+            }
+            dcount1++;
+        }
+    }
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method computes the descriptor of the provided keypoint given the
+ * main orientation of the keypoint
+ * @param kpt Input keypoint
+ * @param desc Descriptor vector
+ */
+void MLDB_Full_Descriptor_Invoker::Get_MLDB_Full_Descriptor(const cv::KeyPoint& kpt, unsigned char *desc) const {
+
+    float di = 0.0, dx = 0.0, dy = 0.0, ratio = 0.0;
+    float ri = 0.0, rx = 0.0, ry = 0.0, rrx = 0.0, rry = 0.0, xf = 0.0, yf = 0.0;
+    float sample_x = 0.0, sample_y = 0.0, co = 0.0, si = 0.0, angle = 0.0;
+    int x1 = 0, y1 = 0, sample_step = 0, pattern_size = 0;
+    int level = 0, nsamples = 0, scale = 0;
+    int dcount1 = 0, dcount2 = 0;
+
+    const AKAZEOptions & options = *options_;
+    const std::vector<TEvolution>& evolution = *evolution_;
+
+    // Matrices for the M-LDB descriptor
+    cv::Mat values_1 = cv::Mat::zeros(4, options.descriptor_channels, CV_32FC1);
+    cv::Mat values_2 = cv::Mat::zeros(9, options.descriptor_channels, CV_32FC1);
+    cv::Mat values_3 = cv::Mat::zeros(16, options.descriptor_channels, CV_32FC1);
+
+    // Get the information from the keypoint
+    ratio = (float)(1 << kpt.octave);
+    scale = fRound(0.5f*kpt.size / ratio);
+    angle = kpt.angle;
+    level = kpt.class_id;
+    yf = kpt.pt.y / ratio;
+    xf = kpt.pt.x / ratio;
+    co = cos(angle);
+    si = sin(angle);
+
+    // First 2x2 grid
+    pattern_size = options.descriptor_pattern_size;
+    sample_step = pattern_size;
+
+    for (int i = -pattern_size; i < pattern_size; i += sample_step) {
+        for (int j = -pattern_size; j < pattern_size; j += sample_step) {
+
+            di = dx = dy = 0.0;
+            nsamples = 0;
+
+            for (float k = (float)i; k < i + sample_step; k++) {
+                for (float l = (float)j; l < j + sample_step; l++) {
+
+                    // Get the coordinates of the sample point
+                    sample_y = yf + (l*scale*co + k*scale*si);
+                    sample_x = xf + (-l*scale*si + k*scale*co);
+
+                    y1 = fRound(sample_y);
+                    x1 = fRound(sample_x);
+
+                    ri = *(evolution[level].Lt.ptr<float>(y1)+x1);
+                    rx = *(evolution[level].Lx.ptr<float>(y1)+x1);
+                    ry = *(evolution[level].Ly.ptr<float>(y1)+x1);
+
+                    di += ri;
+
+                    if (options.descriptor_channels == 2) {
+                        dx += sqrtf(rx*rx + ry*ry);
+                    }
+                    else if (options.descriptor_channels == 3) {
+                        // Get the x and y derivatives on the rotated axis
+                        rry = rx*co + ry*si;
+                        rrx = -rx*si + ry*co;
+                        dx += rrx;
+                        dy += rry;
+                    }
+
+                    nsamples++;
+                }
+            }
+
+            di /= nsamples;
+            dx /= nsamples;
+            dy /= nsamples;
+
+            *(values_1.ptr<float>(dcount2)) = di;
+            if (options.descriptor_channels > 1) {
+                *(values_1.ptr<float>(dcount2)+1) = dx;
+            }
+
+            if (options.descriptor_channels > 2) {
+                *(values_1.ptr<float>(dcount2)+2) = dy;
+            }
+
+            dcount2++;
+        }
+    }
+
+    // Do binary comparison first level
+    for (int i = 0; i < 4; i++) {
+        for (int j = i + 1; j < 4; j++) {
+            if (*(values_1.ptr<float>(i)) > *(values_1.ptr<float>(j))) {
+                desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+            }
+            dcount1++;
+        }
+    }
+
+    if (options.descriptor_channels > 1) {
+        for (int i = 0; i < 4; i++) {
+            for (int j = i + 1; j < 4; j++) {
+                if (*(values_1.ptr<float>(i)+1) > *(values_1.ptr<float>(j)+1)) {
+                    desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+                }
+
+                dcount1++;
+            }
+        }
+    }
+
+    if (options.descriptor_channels > 2) {
+        for (int i = 0; i < 4; i++) {
+            for (int j = i + 1; j < 4; j++) {
+                if (*(values_1.ptr<float>(i)+2) > *(values_1.ptr<float>(j)+2)) {
+                    desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+                }
+                dcount1++;
+            }
+        }
+    }
+
+    // Second 3x3 grid
+    sample_step = static_cast<int>(ceil(pattern_size*2. / 3.));
+    dcount2 = 0;
+
+    for (int i = -pattern_size; i < pattern_size; i += sample_step) {
+        for (int j = -pattern_size; j < pattern_size; j += sample_step) {
+
+            di = dx = dy = 0.0;
+            nsamples = 0;
+
+            for (int k = i; k < i + sample_step; k++) {
+                for (int l = j; l < j + sample_step; l++) {
+
+                    // Get the coordinates of the sample point
+                    sample_y = yf + (l*scale*co + k*scale*si);
+                    sample_x = xf + (-l*scale*si + k*scale*co);
+
+                    y1 = fRound(sample_y);
+                    x1 = fRound(sample_x);
+
+                    ri = *(evolution[level].Lt.ptr<float>(y1)+x1);
+                    rx = *(evolution[level].Lx.ptr<float>(y1)+x1);
+                    ry = *(evolution[level].Ly.ptr<float>(y1)+x1);
+                    di += ri;
+
+                    if (options.descriptor_channels == 2) {
+                        dx += sqrtf(rx*rx + ry*ry);
+                    }
+                    else if (options.descriptor_channels == 3) {
+                        // Get the x and y derivatives on the rotated axis
+                        rry = rx*co + ry*si;
+                        rrx = -rx*si + ry*co;
+                        dx += rrx;
+                        dy += rry;
+                    }
+
+                    nsamples++;
+                }
+            }
+
+            di /= nsamples;
+            dx /= nsamples;
+            dy /= nsamples;
+
+            *(values_2.ptr<float>(dcount2)) = di;
+            if (options.descriptor_channels > 1) {
+                *(values_2.ptr<float>(dcount2)+1) = dx;
+            }
+
+            if (options.descriptor_channels > 2) {
+                *(values_2.ptr<float>(dcount2)+2) = dy;
+            }
+
+            dcount2++;
+        }
+    }
+
+    // Do binary comparison second level
+    for (int i = 0; i < 9; i++) {
+        for (int j = i + 1; j < 9; j++) {
+            if (*(values_2.ptr<float>(i)) > *(values_2.ptr<float>(j))) {
+                desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+            }
+            dcount1++;
+        }
+    }
+
+    if (options.descriptor_channels > 1) {
+        for (int i = 0; i < 9; i++) {
+            for (int j = i + 1; j < 9; j++) {
+                if (*(values_2.ptr<float>(i)+1) > *(values_2.ptr<float>(j)+1)) {
+                    desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+                }
+                dcount1++;
+            }
+        }
+    }
+
+    if (options.descriptor_channels > 2) {
+        for (int i = 0; i < 9; i++) {
+            for (int j = i + 1; j < 9; j++) {
+                if (*(values_2.ptr<float>(i)+2) > *(values_2.ptr<float>(j)+2)) {
+                    desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+                }
+                dcount1++;
+            }
+        }
+    }
+
+    // Third 4x4 grid
+    sample_step = pattern_size / 2;
+    dcount2 = 0;
+
+    for (int i = -pattern_size; i < pattern_size; i += sample_step) {
+        for (int j = -pattern_size; j < pattern_size; j += sample_step) {
+            di = dx = dy = 0.0;
+            nsamples = 0;
+
+            for (int k = i; k < i + sample_step; k++) {
+                for (int l = j; l < j + sample_step; l++) {
+
+                    // Get the coordinates of the sample point
+                    sample_y = yf + (l*scale*co + k*scale*si);
+                    sample_x = xf + (-l*scale*si + k*scale*co);
+
+                    y1 = fRound(sample_y);
+                    x1 = fRound(sample_x);
+
+                    ri = *(evolution[level].Lt.ptr<float>(y1)+x1);
+                    rx = *(evolution[level].Lx.ptr<float>(y1)+x1);
+                    ry = *(evolution[level].Ly.ptr<float>(y1)+x1);
+                    di += ri;
+
+                    if (options.descriptor_channels == 2) {
+                        dx += sqrtf(rx*rx + ry*ry);
+                    }
+                    else if (options.descriptor_channels == 3) {
+                        // Get the x and y derivatives on the rotated axis
+                        rry = rx*co + ry*si;
+                        rrx = -rx*si + ry*co;
+                        dx += rrx;
+                        dy += rry;
+                    }
+
+                    nsamples++;
+                }
+            }
+
+            di /= nsamples;
+            dx /= nsamples;
+            dy /= nsamples;
+
+            *(values_3.ptr<float>(dcount2)) = di;
+            if (options.descriptor_channels > 1)
+                *(values_3.ptr<float>(dcount2)+1) = dx;
+
+            if (options.descriptor_channels > 2)
+                *(values_3.ptr<float>(dcount2)+2) = dy;
+
+            dcount2++;
+        }
+    }
+
+    // Do binary comparison third level
+    for (int i = 0; i < 16; i++) {
+        for (int j = i + 1; j < 16; j++) {
+            if (*(values_3.ptr<float>(i)) > *(values_3.ptr<float>(j))) {
+                desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+            }
+            dcount1++;
+        }
+    }
+
+    if (options.descriptor_channels > 1) {
+        for (int i = 0; i < 16; i++) {
+            for (int j = i + 1; j < 16; j++) {
+                if (*(values_3.ptr<float>(i)+1) > *(values_3.ptr<float>(j)+1)) {
+                    desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+                }
+                dcount1++;
+            }
+        }
+    }
+
+    if (options.descriptor_channels > 2) {
+        for (int i = 0; i < 16; i++) {
+            for (int j = i + 1; j < 16; j++) {
+                if (*(values_3.ptr<float>(i)+2) > *(values_3.ptr<float>(j)+2)) {
+                    desc[dcount1 / 8] |= (1 << (dcount1 % 8));
+                }
+                dcount1++;
+            }
+        }
+    }
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method computes the M-LDB descriptor of the provided keypoint given the
+ * main orientation of the keypoint. The descriptor is computed based on a subset of
+ * the bits of the whole descriptor
+ * @param kpt Input keypoint
+ * @param desc Descriptor vector
+ */
+void MLDB_Descriptor_Subset_Invoker::Get_MLDB_Descriptor_Subset(const cv::KeyPoint& kpt, unsigned char *desc) const {
+
+    float di = 0.f, dx = 0.f, dy = 0.f;
+    float rx = 0.f, ry = 0.f;
+    float sample_x = 0.f, sample_y = 0.f;
+    int x1 = 0, y1 = 0;
+
+    const AKAZEOptions & options = *options_;
+    const std::vector<TEvolution>& evolution = *evolution_;
+
+    // Get the information from the keypoint
+    float ratio = (float)(1 << kpt.octave);
+    int scale = fRound(0.5f*kpt.size / ratio);
+    float angle = kpt.angle;
+    int level = kpt.class_id;
+    float yf = kpt.pt.y / ratio;
+    float xf = kpt.pt.x / ratio;
+    float co = cos(angle);
+    float si = sin(angle);
+
+    // Allocate memory for the matrix of values
+    cv::Mat values = cv::Mat_<float>::zeros((4 + 9 + 16)*options.descriptor_channels, 1);
+
+    // Sample everything, but only do the comparisons
+    vector<int> steps(3);
+    steps.at(0) = options.descriptor_pattern_size;
+    steps.at(1) = (int)ceil(2.f*options.descriptor_pattern_size / 3.f);
+    steps.at(2) = options.descriptor_pattern_size / 2;
+
+    for (int i = 0; i < descriptorSamples_.rows; i++) {
+        const int *coords = descriptorSamples_.ptr<int>(i);
+        int sample_step = steps.at(coords[0]);
+        di = 0.0f;
+        dx = 0.0f;
+        dy = 0.0f;
+
+        for (int k = coords[1]; k < coords[1] + sample_step; k++) {
+            for (int l = coords[2]; l < coords[2] + sample_step; l++) {
+
+                // Get the coordinates of the sample point
+                sample_y = yf + (l*scale*co + k*scale*si);
+                sample_x = xf + (-l*scale*si + k*scale*co);
+
+                y1 = fRound(sample_y);
+                x1 = fRound(sample_x);
+
+                di += *(evolution[level].Lt.ptr<float>(y1)+x1);
+
+                if (options.descriptor_channels > 1) {
+                    rx = *(evolution[level].Lx.ptr<float>(y1)+x1);
+                    ry = *(evolution[level].Ly.ptr<float>(y1)+x1);
+
+                    if (options.descriptor_channels == 2) {
+                        dx += sqrtf(rx*rx + ry*ry);
+                    }
+                    else if (options.descriptor_channels == 3) {
+                        // Get the x and y derivatives on the rotated axis
+                        dx += rx*co + ry*si;
+                        dy += -rx*si + ry*co;
+                    }
+                }
+            }
+        }
+
+        *(values.ptr<float>(options.descriptor_channels*i)) = di;
+
+        if (options.descriptor_channels == 2) {
+            *(values.ptr<float>(options.descriptor_channels*i + 1)) = dx;
+        }
+        else if (options.descriptor_channels == 3) {
+            *(values.ptr<float>(options.descriptor_channels*i + 1)) = dx;
+            *(values.ptr<float>(options.descriptor_channels*i + 2)) = dy;
+        }
+    }
+
+    // Do the comparisons
+    const float *vals = values.ptr<float>(0);
+    const int *comps = descriptorBits_.ptr<int>(0);
+
+    for (int i = 0; i<descriptorBits_.rows; i++) {
+        if (vals[comps[2 * i]] > vals[comps[2 * i + 1]]) {
+            desc[i / 8] |= (1 << (i % 8));
+        }
+    }
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This method computes the upright (not rotation invariant) M-LDB descriptor
+ * of the provided keypoint given the main orientation of the keypoint.
+ * The descriptor is computed based on a subset of the bits of the whole descriptor
+ * @param kpt Input keypoint
+ * @param desc Descriptor vector
+ */
+void Upright_MLDB_Descriptor_Subset_Invoker::Get_Upright_MLDB_Descriptor_Subset(const cv::KeyPoint& kpt, unsigned char *desc) const {
+
+    float di = 0.0f, dx = 0.0f, dy = 0.0f;
+    float rx = 0.0f, ry = 0.0f;
+    float sample_x = 0.0f, sample_y = 0.0f;
+    int x1 = 0, y1 = 0;
+
+    const AKAZEOptions & options = *options_;
+    const std::vector<TEvolution>& evolution = *evolution_;
+
+    // Get the information from the keypoint
+    float ratio = (float)(1 << kpt.octave);
+    int scale = fRound(0.5f*kpt.size / ratio);
+    int level = kpt.class_id;
+    float yf = kpt.pt.y / ratio;
+    float xf = kpt.pt.x / ratio;
+
+    // Allocate memory for the matrix of values
+    Mat values = cv::Mat_<float>::zeros((4 + 9 + 16)*options.descriptor_channels, 1);
+
+    vector<int> steps(3);
+    steps.at(0) = options.descriptor_pattern_size;
+    steps.at(1) = static_cast<int>(ceil(2.f*options.descriptor_pattern_size / 3.f));
+    steps.at(2) = options.descriptor_pattern_size / 2;
+
+    for (int i = 0; i < descriptorSamples_.rows; i++) {
+        const int *coords = descriptorSamples_.ptr<int>(i);
+        int sample_step = steps.at(coords[0]);
+        di = 0.0f, dx = 0.0f, dy = 0.0f;
+
+        for (int k = coords[1]; k < coords[1] + sample_step; k++) {
+            for (int l = coords[2]; l < coords[2] + sample_step; l++) {
+
+                // Get the coordinates of the sample point
+                sample_y = yf + l*scale;
+                sample_x = xf + k*scale;
+
+                y1 = fRound(sample_y);
+                x1 = fRound(sample_x);
+                di += *(evolution[level].Lt.ptr<float>(y1)+x1);
+
+                if (options.descriptor_channels > 1) {
+                    rx = *(evolution[level].Lx.ptr<float>(y1)+x1);
+                    ry = *(evolution[level].Ly.ptr<float>(y1)+x1);
+
+                    if (options.descriptor_channels == 2) {
+                        dx += sqrtf(rx*rx + ry*ry);
+                    }
+                    else if (options.descriptor_channels == 3) {
+                        dx += rx;
+                        dy += ry;
+                    }
+                }
+            }
+        }
+
+        *(values.ptr<float>(options.descriptor_channels*i)) = di;
+
+        if (options.descriptor_channels == 2) {
+            *(values.ptr<float>(options.descriptor_channels*i + 1)) = dx;
+        }
+        else if (options.descriptor_channels == 3) {
+            *(values.ptr<float>(options.descriptor_channels*i + 1)) = dx;
+            *(values.ptr<float>(options.descriptor_channels*i + 2)) = dy;
+        }
+    }
+
+    // Do the comparisons
+    const float *vals = values.ptr<float>(0);
+    const int *comps = descriptorBits_.ptr<int>(0);
+
+    for (int i = 0; i<descriptorBits_.rows; i++) {
+        if (vals[comps[2 * i]] > vals[comps[2 * i + 1]]) {
+            desc[i / 8] |= (1 << (i % 8));
+        }
+    }
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This function computes a (quasi-random) list of bits to be taken
+ * from the full descriptor. To speed the extraction, the function creates
+ * a list of the samples that are involved in generating at least a bit (sampleList)
+ * and a list of the comparisons between those samples (comparisons)
+ * @param sampleList
+ * @param comparisons The matrix with the binary comparisons
+ * @param nbits The number of bits of the descriptor
+ * @param pattern_size The pattern size for the binary descriptor
+ * @param nchannels Number of channels to consider in the descriptor (1-3)
+ * @note The function keeps the 18 bits (3-channels by 6 comparisons) of the
+ * coarser grid, since it provides the most robust estimations
+ */
+void generateDescriptorSubsample(cv::Mat& sampleList, cv::Mat& comparisons, int nbits,
+    int pattern_size, int nchannels) {
+
+    int ssz = 0;
+    for (int i = 0; i < 3; i++) {
+        int gz = (i + 2)*(i + 2);
+        ssz += gz*(gz - 1) / 2;
+    }
+    ssz *= nchannels;
+
+    CV_Assert(nbits <= ssz); // Descriptor size can't be bigger than full descriptor
+
+    // Since the full descriptor is usually under 10k elements, we pick
+    // the selection from the full matrix.  We take as many samples per
+    // pick as the number of channels. For every pick, we
+    // take the two samples involved and put them in the sampling list
+
+    Mat_<int> fullM(ssz / nchannels, 5);
+    for (int i = 0, c = 0; i < 3; i++) {
+        int gdiv = i + 2; //grid divisions, per row
+        int gsz = gdiv*gdiv;
+        int psz = (int)ceil(2.f*pattern_size / (float)gdiv);
+
+        for (int j = 0; j < gsz; j++) {
+            for (int k = j + 1; k < gsz; k++, c++) {
+                fullM(c, 0) = i;
+                fullM(c, 1) = psz*(j % gdiv) - pattern_size;
+                fullM(c, 2) = psz*(j / gdiv) - pattern_size;
+                fullM(c, 3) = psz*(k % gdiv) - pattern_size;
+                fullM(c, 4) = psz*(k / gdiv) - pattern_size;
+            }
+        }
+    }
+
+    srand(1024);
+    Mat_<int> comps = Mat_<int>(nchannels * (int)ceil(nbits / (float)nchannels), 2);
+    comps = 1000;
+
+    // Select some samples. A sample includes all channels
+    int count = 0;
+    int npicks = (int)ceil(nbits / (float)nchannels);
+    Mat_<int> samples(29, 3);
+    Mat_<int> fullcopy = fullM.clone();
+    samples = -1;
+
+    for (int i = 0; i < npicks; i++) {
+        int k = rand() % (fullM.rows - i);
+        if (i < 6) {
+            // Force use of the coarser grid values and comparisons
+            k = i;
+        }
+
+        bool n = true;
+
+        for (int j = 0; j < count; j++) {
+            if (samples(j, 0) == fullcopy(k, 0) && samples(j, 1) == fullcopy(k, 1) && samples(j, 2) == fullcopy(k, 2)) {
+                n = false;
+                comps(i*nchannels, 0) = nchannels*j;
+                comps(i*nchannels + 1, 0) = nchannels*j + 1;
+                comps(i*nchannels + 2, 0) = nchannels*j + 2;
+                break;
+            }
+        }
+
+        if (n) {
+            samples(count, 0) = fullcopy(k, 0);
+            samples(count, 1) = fullcopy(k, 1);
+            samples(count, 2) = fullcopy(k, 2);
+            comps(i*nchannels, 0) = nchannels*count;
+            comps(i*nchannels + 1, 0) = nchannels*count + 1;
+            comps(i*nchannels + 2, 0) = nchannels*count + 2;
+            count++;
+        }
+
+        n = true;
+        for (int j = 0; j < count; j++) {
+            if (samples(j, 0) == fullcopy(k, 0) && samples(j, 1) == fullcopy(k, 3) && samples(j, 2) == fullcopy(k, 4)) {
+                n = false;
+                comps(i*nchannels, 1) = nchannels*j;
+                comps(i*nchannels + 1, 1) = nchannels*j + 1;
+                comps(i*nchannels + 2, 1) = nchannels*j + 2;
+                break;
+            }
+        }
+
+        if (n) {
+            samples(count, 0) = fullcopy(k, 0);
+            samples(count, 1) = fullcopy(k, 3);
+            samples(count, 2) = fullcopy(k, 4);
+            comps(i*nchannels, 1) = nchannels*count;
+            comps(i*nchannels + 1, 1) = nchannels*count + 1;
+            comps(i*nchannels + 2, 1) = nchannels*count + 2;
+            count++;
+        }
+
+        Mat tmp = fullcopy.row(k);
+        fullcopy.row(fullcopy.rows - i - 1).copyTo(tmp);
+    }
+
+    sampleList = samples.rowRange(0, count).clone();
+    comparisons = comps.rowRange(0, nbits).clone();
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This function computes the angle from the vector given by (X Y). From 0 to 2*Pi
+ */
+inline float get_angle(float x, float y) {
+
+    if (x >= 0 && y >= 0) {
+        return atanf(y / x);
+    }
+
+    if (x < 0 && y >= 0) {
+        return static_cast<float>(CV_PI)-atanf(-y / x);
+    }
+
+    if (x < 0 && y < 0) {
+        return static_cast<float>(CV_PI)+atanf(y / x);
+    }
+
+    if (x >= 0 && y < 0) {
+        return static_cast<float>(2.0 * CV_PI) - atanf(-y / x);
+    }
+
+    return 0;
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This function computes the value of a 2D Gaussian function
+ * @param x X Position
+ * @param y Y Position
+ * @param sig Standard Deviation
+ */
+inline float gaussian(float x, float y, float sigma) {
+    return expf(-(x*x + y*y) / (2.0f*sigma*sigma));
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This function checks descriptor limits
+ * @param x X Position
+ * @param y Y Position
+ * @param width Image width
+ * @param height Image height
+ */
+inline void check_descriptor_limits(int &x, int &y, int width, int height) {
+
+    if (x < 0) {
+        x = 0;
+    }
+
+    if (y < 0) {
+        y = 0;
+    }
+
+    if (x > width - 1) {
+        x = width - 1;
+    }
+
+    if (y > height - 1) {
+        y = height - 1;
+    }
+}
+
+/* ************************************************************************* */
+/**
+ * @brief This funtion rounds float to nearest integer
+ * @param flt Input float
+ * @return dst Nearest integer
+ */
+inline int fRound(float flt) {
+    return (int)(flt + 0.5f);
+}
\ No newline at end of file
diff --git a/modules/features2d/src/akaze/AKAZEFeatures.h b/modules/features2d/src/akaze/AKAZEFeatures.h
new file mode 100644
index 0000000000..302ef0d06d
--- /dev/null
+++ b/modules/features2d/src/akaze/AKAZEFeatures.h
@@ -0,0 +1,65 @@
+/**
+ * @file AKAZE.h
+ * @brief Main class for detecting and computing binary descriptors in an
+ * accelerated nonlinear scale space
+ * @date Mar 27, 2013
+ * @author Pablo F. Alcantarilla, Jesus Nuevo
+ */
+
+#pragma once
+
+/* ************************************************************************* */
+// Includes
+#include "precomp.hpp"
+#include "AKAZEConfig.h"
+
+/* ************************************************************************* */
+// AKAZE Class Declaration
+class AKAZEFeatures {
+
+private:
+
+    AKAZEOptions options_;                ///< Configuration options for AKAZE
+    std::vector<TEvolution> evolution_;	///< Vector of nonlinear diffusion evolution
+
+    /// FED parameters
+    int ncycles_;                  ///< Number of cycles
+    bool reordering_;              ///< Flag for reordering time steps
+    std::vector<std::vector<float > > tsteps_;  ///< Vector of FED dynamic time steps
+    std::vector<int> nsteps_;      ///< Vector of number of steps per cycle
+
+    /// Matrices for the M-LDB descriptor computation
+    cv::Mat descriptorSamples_;  // List of positions in the grids to sample LDB bits from.
+    cv::Mat descriptorBits_;
+    cv::Mat bitMask_;
+
+public:
+
+    /// Constructor with input arguments
+    AKAZEFeatures(const AKAZEOptions& options);
+
+    /// Scale Space methods
+    void Allocate_Memory_Evolution();
+    int Create_Nonlinear_Scale_Space(const cv::Mat& img);
+    void Feature_Detection(std::vector<cv::KeyPoint>& kpts);
+    void Compute_Determinant_Hessian_Response(void);
+    void Compute_Multiscale_Derivatives(void);
+    void Find_Scale_Space_Extrema(std::vector<cv::KeyPoint>& kpts);
+    void Do_Subpixel_Refinement(std::vector<cv::KeyPoint>& kpts);
+
+    // Feature description methods
+    void Compute_Descriptors(std::vector<cv::KeyPoint>& kpts, cv::Mat& desc);
+
+    static void Compute_Main_Orientation(cv::KeyPoint& kpt, const std::vector<TEvolution>& evolution_);
+};
+
+/* ************************************************************************* */
+// Inline functions
+
+// Inline functions
+void generateDescriptorSubsample(cv::Mat& sampleList, cv::Mat& comparisons,
+    int nbits, int pattern_size, int nchannels);
+float get_angle(float x, float y);
+float gaussian(float x, float y, float sigma);
+void check_descriptor_limits(int& x, int& y, int width, int height);
+int fRound(float flt);
diff --git a/modules/features2d/src/features2d_init.cpp b/modules/features2d/src/features2d_init.cpp
index 889c5b64ca..eb7145697b 100644
--- a/modules/features2d/src/features2d_init.cpp
+++ b/modules/features2d/src/features2d_init.cpp
@@ -125,6 +125,21 @@ CV_INIT_ALGORITHM(GFTTDetector, "Feature2D.GFTT",
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+CV_INIT_ALGORITHM(KAZE, "Feature2D.KAZE",
+                  obj.info()->addParam(obj, "upright", obj.upright);
+                  obj.info()->addParam(obj, "extended", obj.extended))
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+CV_INIT_ALGORITHM(AKAZE, "Feature2D.AKAZE",
+                  obj.info()->addParam(obj, "descriptor_channels", obj.descriptor_channels);
+                  obj.info()->addParam(obj, "descriptor", obj.descriptor);
+                  obj.info()->addParam(obj, "descriptor_size", obj.descriptor_size))
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+
 CV_INIT_ALGORITHM(SimpleBlobDetector, "Feature2D.SimpleBlob",
                   obj.info()->addParam(obj, "thresholdStep",    obj.params.thresholdStep);
                   obj.info()->addParam(obj, "minThreshold",     obj.params.minThreshold);
@@ -202,11 +217,13 @@ bool cv::initModule_features2d(void)
     all &= !FREAK_info_auto.name().empty();
     all &= !ORB_info_auto.name().empty();
     all &= !GFTTDetector_info_auto.name().empty();
-    all &= !HarrisDetector_info_auto.name().empty();
+    all &= !KAZE_info_auto.name().empty();
+    all &= !AKAZE_info_auto.name().empty();
+        all &= !HarrisDetector_info_auto.name().empty();
     all &= !DenseFeatureDetector_info_auto.name().empty();
     all &= !GridAdaptedFeatureDetector_info_auto.name().empty();
     all &= !BFMatcher_info_auto.name().empty();
     all &= !FlannBasedMatcher_info_auto.name().empty();
 
     return all;
-}
+}
\ No newline at end of file
diff --git a/modules/features2d/src/kaze.cpp b/modules/features2d/src/kaze.cpp
new file mode 100644
index 0000000000..88fb999d5f
--- /dev/null
+++ b/modules/features2d/src/kaze.cpp
@@ -0,0 +1,183 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2008, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/*
+OpenCV wrapper of reference implementation of
+[1] KAZE Features. Pablo F. Alcantarilla, Adrien Bartoli and Andrew J. Davison.
+In European Conference on Computer Vision (ECCV), Fiorenze, Italy, October 2012
+http://www.robesafe.com/personal/pablo.alcantarilla/papers/Alcantarilla12eccv.pdf
+@author Eugene Khvedchenya <ekhvedchenya@gmail.com>
+*/
+
+#include "precomp.hpp"
+#include "kaze/KAZEFeatures.h"
+
+namespace cv
+{
+    KAZE::KAZE()
+        : extended(false)
+        , upright(false)
+    {
+    }
+
+    KAZE::KAZE(bool _extended, bool _upright)
+        : extended(_extended)
+        , upright(_upright)
+    {
+
+    }
+    KAZE::~KAZE()
+    {
+
+    }
+
+    // returns the descriptor size in bytes
+    int KAZE::descriptorSize() const
+    {
+        return extended ? 128 : 64;
+    }
+
+    // returns the descriptor type
+    int KAZE::descriptorType() const
+    {
+        return CV_32F;
+    }
+
+    // returns the default norm type
+    int KAZE::defaultNorm() const
+    {
+        return NORM_L2;
+    }
+
+    void KAZE::operator()(InputArray image, InputArray mask, std::vector<KeyPoint>& keypoints) const
+    {
+        detectImpl(image, keypoints, mask);
+    }
+
+    void KAZE::operator()(InputArray image, InputArray mask,
+        std::vector<KeyPoint>& keypoints,
+        OutputArray descriptors,
+        bool useProvidedKeypoints) const
+    {
+        cv::Mat img = image.getMat();
+        if (img.type() != CV_8UC1)
+            cvtColor(image, img, COLOR_BGR2GRAY);
+
+        Mat img1_32;
+        img.convertTo(img1_32, CV_32F, 1.0 / 255.0, 0);
+
+        cv::Mat& desc = descriptors.getMatRef();
+
+        KAZEOptions options;
+        options.img_width = img.cols;
+        options.img_height = img.rows;
+        options.extended = extended;
+        options.upright = upright;
+
+        KAZEFeatures impl(options);
+        impl.Create_Nonlinear_Scale_Space(img1_32);
+
+        if (!useProvidedKeypoints)
+        {
+            impl.Feature_Detection(keypoints);
+        }
+
+        if (!mask.empty())
+        {
+            cv::KeyPointsFilter::runByPixelsMask(keypoints, mask.getMat());
+        }
+
+        impl.Feature_Description(keypoints, desc);
+
+        CV_Assert((!desc.rows || desc.cols == descriptorSize()));
+        CV_Assert((!desc.rows || (desc.type() == descriptorType())));
+    }
+
+    void KAZE::detectImpl(InputArray image, std::vector<KeyPoint>& keypoints, InputArray mask) const
+    {
+        Mat img = image.getMat();
+        if (img.type() != CV_8UC1)
+            cvtColor(image, img, COLOR_BGR2GRAY);
+
+        Mat img1_32;
+        img.convertTo(img1_32, CV_32F, 1.0 / 255.0, 0);
+
+        KAZEOptions options;
+        options.img_width = img.cols;
+        options.img_height = img.rows;
+        options.extended = extended;
+        options.upright = upright;
+
+        KAZEFeatures impl(options);
+        impl.Create_Nonlinear_Scale_Space(img1_32);
+        impl.Feature_Detection(keypoints);
+
+        if (!mask.empty())
+        {
+            cv::KeyPointsFilter::runByPixelsMask(keypoints, mask.getMat());
+        }
+    }
+
+    void KAZE::computeImpl(InputArray image, std::vector<KeyPoint>& keypoints, OutputArray descriptors) const
+    {
+        cv::Mat img = image.getMat();
+        if (img.type() != CV_8UC1)
+            cvtColor(image, img, COLOR_BGR2GRAY);
+
+        Mat img1_32;
+        img.convertTo(img1_32, CV_32F, 1.0 / 255.0, 0);
+
+        cv::Mat& desc = descriptors.getMatRef();
+
+        KAZEOptions options;
+        options.img_width = img.cols;
+        options.img_height = img.rows;
+        options.extended = extended;
+        options.upright = upright;
+
+        KAZEFeatures impl(options);
+        impl.Create_Nonlinear_Scale_Space(img1_32);
+        impl.Feature_Description(keypoints, desc);
+
+        CV_Assert((!desc.rows || desc.cols == descriptorSize()));
+        CV_Assert((!desc.rows || (desc.type() == descriptorType())));
+    }
+}
\ No newline at end of file
diff --git a/modules/features2d/src/kaze/KAZEConfig.h b/modules/features2d/src/kaze/KAZEConfig.h
new file mode 100644
index 0000000000..988e247372
--- /dev/null
+++ b/modules/features2d/src/kaze/KAZEConfig.h
@@ -0,0 +1,83 @@
+/**
+ * @file KAZEConfig.h
+ * @brief Configuration file
+ * @date Dec 27, 2011
+ * @author Pablo F. Alcantarilla
+ */
+
+#pragma once
+
+// OpenCV Includes
+#include "precomp.hpp"
+#include <opencv2/features2d.hpp>
+
+//*************************************************************************************
+
+struct KAZEOptions {
+
+    enum DIFFUSIVITY_TYPE {
+        PM_G1 = 0,
+        PM_G2 = 1,
+        WEICKERT = 2
+    };
+
+    KAZEOptions()
+        : diffusivity(PM_G2)
+
+        , soffset(1.60f)
+        , omax(4)
+        , nsublevels(4)
+        , img_width(0)
+        , img_height(0)
+        , sderivatives(1.0f)
+        , dthreshold(0.001f)
+        , kcontrast(0.01f)
+        , kcontrast_percentille(0.7f)
+        , kcontrast_bins(300)
+
+        , use_fed(true)
+        , upright(false)
+        , extended(false)
+
+        , use_clipping_normalilzation(false)
+        , clipping_normalization_ratio(1.6f)
+        , clipping_normalization_niter(5)
+    {
+    }
+
+    DIFFUSIVITY_TYPE diffusivity;
+
+    float soffset;
+    int omax;
+    int nsublevels;
+    int img_width;
+    int img_height;
+    float sderivatives;
+    float dthreshold;
+    float kcontrast;
+    float kcontrast_percentille;
+    int  kcontrast_bins;
+
+    bool use_fed;
+    bool upright;
+    bool extended;
+
+    bool  use_clipping_normalilzation;
+    float clipping_normalization_ratio;
+    int   clipping_normalization_niter;
+};
+
+struct TEvolution {
+    cv::Mat Lx, Ly;	// First order spatial derivatives
+    cv::Mat Lxx, Lxy, Lyy;	// Second order spatial derivatives
+    cv::Mat Lflow;	// Diffusivity image
+    cv::Mat Lt;	// Evolution image
+    cv::Mat Lsmooth; // Smoothed image
+    cv::Mat Lstep; // Evolution step update
+    cv::Mat Ldet; // Detector response
+    float etime;	// Evolution time
+    float esigma;	// Evolution sigma. For linear diffusion t = sigma^2 / 2
+    float octave;	// Image octave
+    float sublevel;	// Image sublevel in each octave
+    int sigma_size;	// Integer esigma. For computing the feature detector responses
+};
diff --git a/modules/features2d/src/kaze/KAZEFeatures.cpp b/modules/features2d/src/kaze/KAZEFeatures.cpp
new file mode 100644
index 0000000000..634f68da80
--- /dev/null
+++ b/modules/features2d/src/kaze/KAZEFeatures.cpp
@@ -0,0 +1,1532 @@
+
+//=============================================================================
+//
+// KAZE.cpp
+// Author: Pablo F. Alcantarilla
+// Institution: University d'Auvergne
+// Address: Clermont Ferrand, France
+// Date: 21/01/2012
+// Email: pablofdezalc@gmail.com
+//
+// KAZE Features Copyright 2012, Pablo F. Alcantarilla
+// All Rights Reserved
+// See LICENSE for the license information
+//=============================================================================
+
+/**
+ * @file KAZEFeatures.cpp
+ * @brief Main class for detecting and describing features in a nonlinear
+ * scale space
+ * @date Jan 21, 2012
+ * @author Pablo F. Alcantarilla
+ */
+
+#include "KAZEFeatures.h"
+
+// Namespaces
+using namespace std;
+using namespace cv;
+using namespace cv::details::kaze;
+
+//*******************************************************************************
+//*******************************************************************************
+
+/**
+ * @brief KAZE constructor with input options
+ * @param options KAZE configuration options
+ * @note The constructor allocates memory for the nonlinear scale space
+ */
+KAZEFeatures::KAZEFeatures(KAZEOptions& _options)
+    : options(_options)
+{
+    ncycles_ = 0;
+    reordering_ = true;
+
+    // Now allocate memory for the evolution
+    Allocate_Memory_Evolution();
+}
+
+//*******************************************************************************
+//*******************************************************************************
+
+/**
+ * @brief This method allocates the memory for the nonlinear diffusion evolution
+ */
+void KAZEFeatures::Allocate_Memory_Evolution(void) {
+
+    // Allocate the dimension of the matrices for the evolution
+    for (int i = 0; i <= options.omax - 1; i++) {
+        for (int j = 0; j <= options.nsublevels - 1; j++) {
+
+            TEvolution aux;
+            aux.Lx = cv::Mat::zeros(options.img_height, options.img_width, CV_32F);
+            aux.Ly = cv::Mat::zeros(options.img_height, options.img_width, CV_32F);
+            aux.Lxx = cv::Mat::zeros(options.img_height, options.img_width, CV_32F);
+            aux.Lxy = cv::Mat::zeros(options.img_height, options.img_width, CV_32F);
+            aux.Lyy = cv::Mat::zeros(options.img_height, options.img_width, CV_32F);
+            aux.Lflow = cv::Mat::zeros(options.img_height, options.img_width, CV_32F);
+            aux.Lt = cv::Mat::zeros(options.img_height, options.img_width, CV_32F);
+            aux.Lsmooth = cv::Mat::zeros(options.img_height, options.img_width, CV_32F);
+            aux.Lstep = cv::Mat::zeros(options.img_height, options.img_width, CV_32F);
+            aux.Ldet = cv::Mat::zeros(options.img_height, options.img_width, CV_32F);
+            aux.esigma = options.soffset*pow((float)2.0f, (float)(j) / (float)(options.nsublevels)+i);
+            aux.etime = 0.5f*(aux.esigma*aux.esigma);
+            aux.sigma_size = fRound(aux.esigma);
+            aux.octave = (float)i;
+            aux.sublevel = (float)j;
+            evolution_.push_back(aux);
+        }
+    }
+
+    // Allocate memory for the FED number of cycles and time steps
+    if (options.use_fed) {
+        for (size_t i = 1; i < evolution_.size(); i++) {
+            int naux = 0;
+            vector<float> tau;
+            float ttime = 0.0;
+            ttime = evolution_[i].etime - evolution_[i - 1].etime;
+            naux = fed_tau_by_process_time(ttime, 1, 0.25f, reordering_, tau);
+            nsteps_.push_back(naux);
+            tsteps_.push_back(tau);
+            ncycles_++;
+        }
+    }
+    else {
+        // Allocate memory for the auxiliary variables that are used in the AOS scheme
+        Ltx_ = Mat::zeros(options.img_width, options.img_height, CV_32F); // TODO? IS IT A BUG???
+        Lty_ = Mat::zeros(options.img_height, options.img_width, CV_32F);
+        px_ = Mat::zeros(options.img_height, options.img_width, CV_32F);
+        py_ = Mat::zeros(options.img_height, options.img_width, CV_32F);
+        ax_ = Mat::zeros(options.img_height, options.img_width, CV_32F);
+        ay_ = Mat::zeros(options.img_height, options.img_width, CV_32F);
+        bx_ = Mat::zeros(options.img_height - 1, options.img_width, CV_32F);
+        by_ = Mat::zeros(options.img_height - 1, options.img_width, CV_32F);
+        qr_ = Mat::zeros(options.img_height - 1, options.img_width, CV_32F);
+        qc_ = Mat::zeros(options.img_height, options.img_width - 1, CV_32F);
+    }
+
+}
+
+//*******************************************************************************
+//*******************************************************************************
+
+/**
+ * @brief This method creates the nonlinear scale space for a given image
+ * @param img Input image for which the nonlinear scale space needs to be created
+ * @return 0 if the nonlinear scale space was created successfully. -1 otherwise
+ */
+int KAZEFeatures::Create_Nonlinear_Scale_Space(const cv::Mat &img)
+{
+    CV_Assert(evolution_.size() > 0);
+
+    // Copy the original image to the first level of the evolution
+    img.copyTo(evolution_[0].Lt);
+    gaussian_2D_convolution(evolution_[0].Lt, evolution_[0].Lt, 0, 0, options.soffset);
+    gaussian_2D_convolution(evolution_[0].Lt, evolution_[0].Lsmooth, 0, 0, options.sderivatives);
+
+    // Firstly compute the kcontrast factor
+    Compute_KContrast(evolution_[0].Lt, options.kcontrast_percentille);
+
+    // Now generate the rest of evolution levels
+    for (size_t i = 1; i < evolution_.size(); i++) {
+
+        evolution_[i - 1].Lt.copyTo(evolution_[i].Lt);
+        gaussian_2D_convolution(evolution_[i - 1].Lt, evolution_[i].Lsmooth, 0, 0, options.sderivatives);
+
+        // Compute the Gaussian derivatives Lx and Ly
+        Scharr(evolution_[i].Lsmooth, evolution_[i].Lx, CV_32F, 1, 0, 1, 0, BORDER_DEFAULT);
+        Scharr(evolution_[i].Lsmooth, evolution_[i].Ly, CV_32F, 0, 1, 1, 0, BORDER_DEFAULT);
+
+        // Compute the conductivity equation
+        if (options.diffusivity == KAZEOptions::PM_G1) {
+            pm_g1(evolution_[i].Lx, evolution_[i].Ly, evolution_[i].Lflow, options.kcontrast);
+        }
+        else if (options.diffusivity == KAZEOptions::PM_G2) {
+            pm_g2(evolution_[i].Lx, evolution_[i].Ly, evolution_[i].Lflow, options.kcontrast);
+        }
+        else if (options.diffusivity == KAZEOptions::WEICKERT) {
+            weickert_diffusivity(evolution_[i].Lx, evolution_[i].Ly, evolution_[i].Lflow, options.kcontrast);
+        }
+
+        // Perform FED n inner steps
+        if (options.use_fed) {
+            for (int j = 0; j < nsteps_[i - 1]; j++) {
+                nld_step_scalar(evolution_[i].Lt, evolution_[i].Lflow, evolution_[i].Lstep, tsteps_[i - 1][j]);
+            }
+        }
+        else {
+            // Perform the evolution step with AOS
+            AOS_Step_Scalar(evolution_[i].Lt, evolution_[i - 1].Lt, evolution_[i].Lflow,
+                evolution_[i].etime - evolution_[i - 1].etime);
+        }
+    }
+
+    return 0;
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method computes the k contrast factor
+ * @param img Input image
+ * @param kpercentile Percentile of the gradient histogram
+ */
+void KAZEFeatures::Compute_KContrast(const cv::Mat &img, const float &kpercentile)
+{
+    options.kcontrast = compute_k_percentile(img, kpercentile, options.sderivatives, options.kcontrast_bins, 0, 0);
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method computes the multiscale derivatives for the nonlinear scale space
+ */
+void KAZEFeatures::Compute_Multiscale_Derivatives(void)
+{
+    // TODO: use cv::parallel_for_
+    for (size_t i = 0; i < evolution_.size(); i++)
+    {
+        // Compute multiscale derivatives for the detector
+        compute_scharr_derivatives(evolution_[i].Lsmooth, evolution_[i].Lx, 1, 0, evolution_[i].sigma_size);
+        compute_scharr_derivatives(evolution_[i].Lsmooth, evolution_[i].Ly, 0, 1, evolution_[i].sigma_size);
+        compute_scharr_derivatives(evolution_[i].Lx, evolution_[i].Lxx, 1, 0, evolution_[i].sigma_size);
+        compute_scharr_derivatives(evolution_[i].Ly, evolution_[i].Lyy, 0, 1, evolution_[i].sigma_size);
+        compute_scharr_derivatives(evolution_[i].Lx, evolution_[i].Lxy, 0, 1, evolution_[i].sigma_size);
+
+        evolution_[i].Lx = evolution_[i].Lx*((evolution_[i].sigma_size));
+        evolution_[i].Ly = evolution_[i].Ly*((evolution_[i].sigma_size));
+        evolution_[i].Lxx = evolution_[i].Lxx*((evolution_[i].sigma_size)*(evolution_[i].sigma_size));
+        evolution_[i].Lxy = evolution_[i].Lxy*((evolution_[i].sigma_size)*(evolution_[i].sigma_size));
+        evolution_[i].Lyy = evolution_[i].Lyy*((evolution_[i].sigma_size)*(evolution_[i].sigma_size));
+    }
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method computes the feature detector response for the nonlinear scale space
+ * @note We use the Hessian determinant as feature detector
+ */
+void KAZEFeatures::Compute_Detector_Response(void)
+{
+    float lxx = 0.0, lxy = 0.0, lyy = 0.0;
+
+    // Firstly compute the multiscale derivatives
+    Compute_Multiscale_Derivatives();
+
+    for (size_t i = 0; i < evolution_.size(); i++)
+    {
+        for (int ix = 0; ix < options.img_height; ix++)
+        {
+            for (int jx = 0; jx < options.img_width; jx++)
+            {
+                lxx = *(evolution_[i].Lxx.ptr<float>(ix)+jx);
+                lxy = *(evolution_[i].Lxy.ptr<float>(ix)+jx);
+                lyy = *(evolution_[i].Lyy.ptr<float>(ix)+jx);
+                *(evolution_[i].Ldet.ptr<float>(ix)+jx) = (lxx*lyy - lxy*lxy);
+            }
+        }
+    }
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method selects interesting keypoints through the nonlinear scale space
+ * @param kpts Vector of keypoints
+ */
+void KAZEFeatures::Feature_Detection(std::vector<cv::KeyPoint>& kpts)
+{
+    kpts.clear();
+
+    // Firstly compute the detector response for each pixel and scale level
+    Compute_Detector_Response();
+
+    // Find scale space extrema
+    Determinant_Hessian_Parallel(kpts);
+
+    // Perform some subpixel refinement
+    Do_Subpixel_Refinement(kpts);
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method performs the detection of keypoints by using the normalized
+ * score of the Hessian determinant through the nonlinear scale space
+ * @param kpts Vector of keypoints
+ * @note We compute features for each of the nonlinear scale space level in a different processing thread
+ */
+void KAZEFeatures::Determinant_Hessian_Parallel(std::vector<cv::KeyPoint>& kpts)
+{
+    int level = 0;
+    float dist = 0.0, smax = 3.0;
+    int npoints = 0, id_repeated = 0;
+    int left_x = 0, right_x = 0, up_y = 0, down_y = 0;
+    bool is_extremum = false, is_repeated = false, is_out = false;
+
+    // Delete the memory of the vector of keypoints vectors
+    // In case we use the same kaze object for multiple images
+    for (size_t i = 0; i < kpts_par_.size(); i++) {
+        vector<KeyPoint>().swap(kpts_par_[i]);
+    }
+    kpts_par_.clear();
+    vector<KeyPoint> aux;
+
+    // Allocate memory for the vector of vectors
+    for (size_t i = 1; i < evolution_.size() - 1; i++) {
+        kpts_par_.push_back(aux);
+    }
+
+    // TODO: Use cv::parallel_for_
+    for (int i = 1; i < (int)evolution_.size() - 1; i++) {
+        Find_Extremum_Threading(i);
+    }
+
+    // Now fill the vector of keypoints!!!
+    for (int i = 0; i < (int)kpts_par_.size(); i++) {
+        for (int j = 0; j < (int)kpts_par_[i].size(); j++) {
+            level = i + 1;
+            is_extremum = true;
+            is_repeated = false;
+            is_out = false;
+
+            // Check in case we have the same point as maxima in previous evolution levels
+            for (int ik = 0; ik < (int)kpts.size(); ik++) {
+                if (kpts[ik].class_id == level || kpts[ik].class_id == level + 1 || kpts[ik].class_id == level - 1) {
+                    dist = pow(kpts_par_[i][j].pt.x - kpts[ik].pt.x, 2) + pow(kpts_par_[i][j].pt.y - kpts[ik].pt.y, 2);
+
+                    if (dist < evolution_[level].sigma_size*evolution_[level].sigma_size) {
+                        if (kpts_par_[i][j].response > kpts[ik].response) {
+                            id_repeated = ik;
+                            is_repeated = true;
+                        }
+                        else {
+                            is_extremum = false;
+                        }
+
+                        break;
+                    }
+                }
+            }
+
+            if (is_extremum == true) {
+                // Check that the point is under the image limits for the descriptor computation
+                left_x = fRound(kpts_par_[i][j].pt.x - smax*kpts_par_[i][j].size);
+                right_x = fRound(kpts_par_[i][j].pt.x + smax*kpts_par_[i][j].size);
+                up_y = fRound(kpts_par_[i][j].pt.y - smax*kpts_par_[i][j].size);
+                down_y = fRound(kpts_par_[i][j].pt.y + smax*kpts_par_[i][j].size);
+
+                if (left_x < 0 || right_x >= evolution_[level].Ldet.cols ||
+                    up_y < 0 || down_y >= evolution_[level].Ldet.rows) {
+                    is_out = true;
+                }
+
+                is_out = false;
+
+                if (is_out == false) {
+                    if (is_repeated == false) {
+                        kpts.push_back(kpts_par_[i][j]);
+                        npoints++;
+                    }
+                    else {
+                        kpts[id_repeated] = kpts_par_[i][j];
+                    }
+                }
+            }
+        }
+    }
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method is called by the thread which is responsible of finding extrema
+ * at a given nonlinear scale level
+ * @param level Index in the nonlinear scale space evolution
+ */
+void KAZEFeatures::Find_Extremum_Threading(const int& level) {
+
+    float value = 0.0;
+    bool is_extremum = false;
+
+    for (int ix = 1; ix < options.img_height - 1; ix++) {
+        for (int jx = 1; jx < options.img_width - 1; jx++) {
+
+            is_extremum = false;
+            value = *(evolution_[level].Ldet.ptr<float>(ix)+jx);
+
+            // Filter the points with the detector threshold
+            if (value > options.dthreshold) {
+                if (value >= *(evolution_[level].Ldet.ptr<float>(ix)+jx - 1)) {
+                    // First check on the same scale
+                    if (check_maximum_neighbourhood(evolution_[level].Ldet, 1, value, ix, jx, 1)) {
+                        // Now check on the lower scale
+                        if (check_maximum_neighbourhood(evolution_[level - 1].Ldet, 1, value, ix, jx, 0)) {
+                            // Now check on the upper scale
+                            if (check_maximum_neighbourhood(evolution_[level + 1].Ldet, 1, value, ix, jx, 0)) {
+                                is_extremum = true;
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Add the point of interest!!
+            if (is_extremum == true) {
+                KeyPoint point;
+                point.pt.x = (float)jx;
+                point.pt.y = (float)ix;
+                point.response = fabs(value);
+                point.size = evolution_[level].esigma;
+                point.octave = (int)evolution_[level].octave;
+                point.class_id = level;
+
+                // We use the angle field for the sublevel value
+                // Then, we will replace this angle field with the main orientation
+                point.angle = evolution_[level].sublevel;
+                kpts_par_[level - 1].push_back(point);
+            }
+        }
+    }
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method performs subpixel refinement of the detected keypoints
+ * @param kpts Vector of detected keypoints
+ */
+void KAZEFeatures::Do_Subpixel_Refinement(std::vector<cv::KeyPoint> &kpts) {
+
+    int step = 1;
+    int x = 0, y = 0;
+    float Dx = 0.0, Dy = 0.0, Ds = 0.0, dsc = 0.0;
+    float Dxx = 0.0, Dyy = 0.0, Dss = 0.0, Dxy = 0.0, Dxs = 0.0, Dys = 0.0;
+    Mat A = Mat::zeros(3, 3, CV_32F);
+    Mat b = Mat::zeros(3, 1, CV_32F);
+    Mat dst = Mat::zeros(3, 1, CV_32F);
+
+    vector<KeyPoint> kpts_(kpts);
+
+    for (size_t i = 0; i < kpts_.size(); i++) {
+
+        x = static_cast<int>(kpts_[i].pt.x);
+        y = static_cast<int>(kpts_[i].pt.y);
+
+        // Compute the gradient
+        Dx = (1.0f / (2.0f*step))*(*(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y)+x + step)
+            - *(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y)+x - step));
+        Dy = (1.0f / (2.0f*step))*(*(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y + step) + x)
+            - *(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y - step) + x));
+        Ds = 0.5f*(*(evolution_[kpts_[i].class_id + 1].Ldet.ptr<float>(y)+x)
+            - *(evolution_[kpts_[i].class_id - 1].Ldet.ptr<float>(y)+x));
+
+        // Compute the Hessian
+        Dxx = (1.0f / (step*step))*(*(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y)+x + step)
+            + *(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y)+x - step)
+            - 2.0f*(*(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y)+x)));
+
+        Dyy = (1.0f / (step*step))*(*(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y + step) + x)
+            + *(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y - step) + x)
+            - 2.0f*(*(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y)+x)));
+
+        Dss = *(evolution_[kpts_[i].class_id + 1].Ldet.ptr<float>(y)+x)
+            + *(evolution_[kpts_[i].class_id - 1].Ldet.ptr<float>(y)+x)
+            - 2.0f*(*(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y)+x));
+
+        Dxy = (1.0f / (4.0f*step))*(*(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y + step) + x + step)
+            + (*(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y - step) + x - step)))
+            - (1.0f / (4.0f*step))*(*(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y - step) + x + step)
+            + (*(evolution_[kpts_[i].class_id].Ldet.ptr<float>(y + step) + x - step)));
+
+        Dxs = (1.0f / (4.0f*step))*(*(evolution_[kpts_[i].class_id + 1].Ldet.ptr<float>(y)+x + step)
+            + (*(evolution_[kpts_[i].class_id - 1].Ldet.ptr<float>(y)+x - step)))
+            - (1.0f / (4.0f*step))*(*(evolution_[kpts_[i].class_id + 1].Ldet.ptr<float>(y)+x - step)
+            + (*(evolution_[kpts_[i].class_id - 1].Ldet.ptr<float>(y)+x + step)));
+
+        Dys = (1.0f / (4.0f*step))*(*(evolution_[kpts_[i].class_id + 1].Ldet.ptr<float>(y + step) + x)
+            + (*(evolution_[kpts_[i].class_id - 1].Ldet.ptr<float>(y - step) + x)))
+            - (1.0f / (4.0f*step))*(*(evolution_[kpts_[i].class_id + 1].Ldet.ptr<float>(y - step) + x)
+            + (*(evolution_[kpts_[i].class_id - 1].Ldet.ptr<float>(y + step) + x)));
+
+        // Solve the linear system
+        *(A.ptr<float>(0)) = Dxx;
+        *(A.ptr<float>(1) + 1) = Dyy;
+        *(A.ptr<float>(2) + 2) = Dss;
+
+        *(A.ptr<float>(0) + 1) = *(A.ptr<float>(1)) = Dxy;
+        *(A.ptr<float>(0) + 2) = *(A.ptr<float>(2)) = Dxs;
+        *(A.ptr<float>(1) + 2) = *(A.ptr<float>(2) + 1) = Dys;
+
+        *(b.ptr<float>(0)) = -Dx;
+        *(b.ptr<float>(1)) = -Dy;
+        *(b.ptr<float>(2)) = -Ds;
+
+        solve(A, b, dst, DECOMP_LU);
+
+        if (fabs(*(dst.ptr<float>(0))) <= 1.0f && fabs(*(dst.ptr<float>(1))) <= 1.0f && fabs(*(dst.ptr<float>(2))) <= 1.0f) {
+            kpts_[i].pt.x += *(dst.ptr<float>(0));
+            kpts_[i].pt.y += *(dst.ptr<float>(1));
+            dsc = kpts_[i].octave + (kpts_[i].angle + *(dst.ptr<float>(2))) / ((float)(options.nsublevels));
+
+            // In OpenCV the size of a keypoint is the diameter!!
+            kpts_[i].size = 2.0f*options.soffset*pow((float)2.0f, dsc);
+            kpts_[i].angle = 0.0;
+        }
+        // Set the points to be deleted after the for loop
+        else {
+            kpts_[i].response = -1;
+        }
+    }
+
+    // Clear the vector of keypoints
+    kpts.clear();
+
+    for (size_t i = 0; i < kpts_.size(); i++) {
+        if (kpts_[i].response != -1) {
+            kpts.push_back(kpts_[i]);
+        }
+    }
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+class KAZE_Descriptor_Invoker : public cv::ParallelLoopBody
+{
+public:
+    KAZE_Descriptor_Invoker(std::vector<cv::KeyPoint> &kpts, cv::Mat &desc, std::vector<TEvolution>& evolution, const KAZEOptions& _options)
+        : _kpts(&kpts)
+        , _desc(&desc)
+        , _evolution(&evolution)
+        , options(_options)
+    {
+    }
+
+    virtual ~KAZE_Descriptor_Invoker()
+    {
+    }
+
+    void operator() (const cv::Range& range) const
+    {
+        std::vector<cv::KeyPoint> &kpts      = *_kpts;
+        cv::Mat                   &desc      = *_desc;
+        std::vector<TEvolution>   &evolution = *_evolution;
+
+        for (int i = range.start; i < range.end; i++)
+        {
+            kpts[i].angle = 0.0;
+            if (options.upright)
+            {
+                kpts[i].angle = 0.0;
+                if (options.extended)
+                    Get_KAZE_Upright_Descriptor_128(kpts[i], desc.ptr<float>((int)i));
+                else
+                    Get_KAZE_Upright_Descriptor_64(kpts[i], desc.ptr<float>((int)i));
+            }
+            else
+            {
+                KAZEFeatures::Compute_Main_Orientation(kpts[i], evolution, options);
+
+                if (options.extended)
+                    Get_KAZE_Descriptor_128(kpts[i], desc.ptr<float>((int)i));
+                else
+                    Get_KAZE_Descriptor_64(kpts[i], desc.ptr<float>((int)i));
+            }
+        }
+    }
+private:
+    void Get_KAZE_Upright_Descriptor_64(const cv::KeyPoint& kpt, float* desc) const;
+    void Get_KAZE_Descriptor_64(const cv::KeyPoint& kpt, float* desc) const;
+    void Get_KAZE_Upright_Descriptor_128(const cv::KeyPoint& kpt, float* desc) const;
+    void Get_KAZE_Descriptor_128(const cv::KeyPoint& kpt, float *desc) const;
+
+    std::vector<cv::KeyPoint> * _kpts;
+    cv::Mat                   * _desc;
+    std::vector<TEvolution>   * _evolution;
+    KAZEOptions                 options;
+};
+
+/**
+ * @brief This method  computes the set of descriptors through the nonlinear scale space
+ * @param kpts Vector of keypoints
+ * @param desc Matrix with the feature descriptors
+ */
+void KAZEFeatures::Feature_Description(std::vector<cv::KeyPoint> &kpts, cv::Mat &desc)
+{
+    // Allocate memory for the matrix of descriptors
+    if (options.extended == true) {
+        desc = Mat::zeros((int)kpts.size(), 128, CV_32FC1);
+    }
+    else {
+        desc = Mat::zeros((int)kpts.size(), 64, CV_32FC1);
+    }
+
+    cv::parallel_for_(cv::Range(0, (int)kpts.size()), KAZE_Descriptor_Invoker(kpts, desc, evolution_, options));
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method computes the main orientation for a given keypoint
+ * @param kpt Input keypoint
+ * @note The orientation is computed using a similar approach as described in the
+ * original SURF method. See Bay et al., Speeded Up Robust Features, ECCV 2006
+ */
+void KAZEFeatures::Compute_Main_Orientation(cv::KeyPoint &kpt, const std::vector<TEvolution>& evolution_, const KAZEOptions& options)
+{
+    int ix = 0, iy = 0, idx = 0, s = 0, level = 0;
+    float xf = 0.0, yf = 0.0, gweight = 0.0;
+    vector<float> resX(109), resY(109), Ang(109);
+
+    // Variables for computing the dominant direction
+    float sumX = 0.0, sumY = 0.0, max = 0.0, ang1 = 0.0, ang2 = 0.0;
+
+    // Get the information from the keypoint
+    xf = kpt.pt.x;
+    yf = kpt.pt.y;
+    level = kpt.class_id;
+    s = fRound(kpt.size / 2.0f);
+
+    // Calculate derivatives responses for points within radius of 6*scale
+    for (int i = -6; i <= 6; ++i) {
+        for (int j = -6; j <= 6; ++j) {
+            if (i*i + j*j < 36) {
+                iy = fRound(yf + j*s);
+                ix = fRound(xf + i*s);
+
+                if (iy >= 0 && iy < options.img_height && ix >= 0 && ix < options.img_width) {
+                    gweight = gaussian(iy - yf, ix - xf, 2.5f*s);
+                    resX[idx] = gweight*(*(evolution_[level].Lx.ptr<float>(iy)+ix));
+                    resY[idx] = gweight*(*(evolution_[level].Ly.ptr<float>(iy)+ix));
+                }
+                else {
+                    resX[idx] = 0.0;
+                    resY[idx] = 0.0;
+                }
+
+                Ang[idx] = getAngle(resX[idx], resY[idx]);
+                ++idx;
+            }
+        }
+    }
+
+    // Loop slides pi/3 window around feature point
+    for (ang1 = 0; ang1 < 2.0f*CV_PI; ang1 += 0.15f) {
+        ang2 = (ang1 + (float)(CV_PI / 3.0) > (float)(2.0*CV_PI) ? ang1 - (float)(5.0*CV_PI / 3.0) : ang1 + (float)(CV_PI / 3.0));
+        sumX = sumY = 0.f;
+
+        for (size_t k = 0; k < Ang.size(); ++k) {
+            // Get angle from the x-axis of the sample point
+            const float & ang = Ang[k];
+
+            // Determine whether the point is within the window
+            if (ang1 < ang2 && ang1 < ang && ang < ang2) {
+                sumX += resX[k];
+                sumY += resY[k];
+            }
+            else if (ang2 < ang1 &&
+                ((ang > 0 && ang < ang2) || (ang > ang1 && ang < (float)(2.0*CV_PI)))) {
+                sumX += resX[k];
+                sumY += resY[k];
+            }
+        }
+
+        // if the vector produced from this window is longer than all
+        // previous vectors then this forms the new dominant direction
+        if (sumX*sumX + sumY*sumY > max) {
+            // store largest orientation
+            max = sumX*sumX + sumY*sumY;
+            kpt.angle = getAngle(sumX, sumY);
+        }
+    }
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method computes the upright descriptor (not rotation invariant) of
+ * the provided keypoint
+ * @param kpt Input keypoint
+ * @param desc Descriptor vector
+ * @note Rectangular grid of 24 s x 24 s. Descriptor Length 64. The descriptor is inspired
+ * from Agrawal et al., CenSurE: Center Surround Extremas for Realtime Feature Detection and Matching,
+ * ECCV 2008
+ */
+void KAZE_Descriptor_Invoker::Get_KAZE_Upright_Descriptor_64(const cv::KeyPoint &kpt, float *desc) const
+{
+    float dx = 0.0, dy = 0.0, mdx = 0.0, mdy = 0.0, gauss_s1 = 0.0, gauss_s2 = 0.0;
+    float rx = 0.0, ry = 0.0, len = 0.0, xf = 0.0, yf = 0.0, ys = 0.0, xs = 0.0;
+    float sample_x = 0.0, sample_y = 0.0;
+    int x1 = 0, y1 = 0, sample_step = 0, pattern_size = 0;
+    int x2 = 0, y2 = 0, kx = 0, ky = 0, i = 0, j = 0, dcount = 0;
+    float fx = 0.0, fy = 0.0, res1 = 0.0, res2 = 0.0, res3 = 0.0, res4 = 0.0;
+    int dsize = 0, scale = 0, level = 0;
+
+    std::vector<TEvolution>& evolution_ = *_evolution;
+
+    // Subregion centers for the 4x4 gaussian weighting
+    float cx = -0.5f, cy = 0.5f;
+
+    // Set the descriptor size and the sample and pattern sizes
+    dsize = 64;
+    sample_step = 5;
+    pattern_size = 12;
+
+    // Get the information from the keypoint
+    yf = kpt.pt.y;
+    xf = kpt.pt.x;
+    scale = fRound(kpt.size / 2.0f);
+    level = kpt.class_id;
+
+    i = -8;
+
+    // Calculate descriptor for this interest point
+    // Area of size 24 s x 24 s
+    while (i < pattern_size) {
+        j = -8;
+        i = i - 4;
+
+        cx += 1.0f;
+        cy = -0.5f;
+
+        while (j < pattern_size) {
+
+            dx = dy = mdx = mdy = 0.0;
+            cy += 1.0f;
+            j = j - 4;
+
+            ky = i + sample_step;
+            kx = j + sample_step;
+
+            ys = yf + (ky*scale);
+            xs = xf + (kx*scale);
+
+            for (int k = i; k < i + 9; k++) {
+                for (int l = j; l < j + 9; l++) {
+
+                    sample_y = k*scale + yf;
+                    sample_x = l*scale + xf;
+
+                    //Get the gaussian weighted x and y responses
+                    gauss_s1 = gaussian(xs - sample_x, ys - sample_y, 2.5f*scale);
+
+                    y1 = (int)(sample_y - 0.5f);
+                    x1 = (int)(sample_x - 0.5f);
+
+                    checkDescriptorLimits(x1, y1, options.img_width, options.img_height);
+
+                    y2 = (int)(sample_y + 0.5f);
+                    x2 = (int)(sample_x + 0.5f);
+
+                    checkDescriptorLimits(x2, y2, options.img_width, options.img_height);
+
+                    fx = sample_x - x1;
+                    fy = sample_y - y1;
+
+                    res1 = *(evolution_[level].Lx.ptr<float>(y1)+x1);
+                    res2 = *(evolution_[level].Lx.ptr<float>(y1)+x2);
+                    res3 = *(evolution_[level].Lx.ptr<float>(y2)+x1);
+                    res4 = *(evolution_[level].Lx.ptr<float>(y2)+x2);
+                    rx = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4;
+
+                    res1 = *(evolution_[level].Ly.ptr<float>(y1)+x1);
+                    res2 = *(evolution_[level].Ly.ptr<float>(y1)+x2);
+                    res3 = *(evolution_[level].Ly.ptr<float>(y2)+x1);
+                    res4 = *(evolution_[level].Ly.ptr<float>(y2)+x2);
+                    ry = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4;
+
+                    rx = gauss_s1*rx;
+                    ry = gauss_s1*ry;
+
+                    // Sum the derivatives to the cumulative descriptor
+                    dx += rx;
+                    dy += ry;
+                    mdx += fabs(rx);
+                    mdy += fabs(ry);
+                }
+            }
+
+            // Add the values to the descriptor vector
+            gauss_s2 = gaussian(cx - 2.0f, cy - 2.0f, 1.5f);
+
+            desc[dcount++] = dx*gauss_s2;
+            desc[dcount++] = dy*gauss_s2;
+            desc[dcount++] = mdx*gauss_s2;
+            desc[dcount++] = mdy*gauss_s2;
+
+            len += (dx*dx + dy*dy + mdx*mdx + mdy*mdy)*gauss_s2*gauss_s2;
+
+            j += 9;
+        }
+
+        i += 9;
+    }
+
+    // convert to unit vector
+    len = sqrt(len);
+
+    for (i = 0; i < dsize; i++) {
+        desc[i] /= len;
+    }
+
+    if (options.use_clipping_normalilzation) {
+        clippingDescriptor(desc, dsize, options.clipping_normalization_niter, options.clipping_normalization_ratio);
+    }
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method computes the descriptor of the provided keypoint given the
+ * main orientation of the keypoint
+ * @param kpt Input keypoint
+ * @param desc Descriptor vector
+ * @note Rectangular grid of 24 s x 24 s. Descriptor Length 64. The descriptor is inspired
+ * from Agrawal et al., CenSurE: Center Surround Extremas for Realtime Feature Detection and Matching,
+ * ECCV 2008
+ */
+void KAZE_Descriptor_Invoker::Get_KAZE_Descriptor_64(const cv::KeyPoint &kpt, float *desc) const
+{
+    float dx = 0.0, dy = 0.0, mdx = 0.0, mdy = 0.0, gauss_s1 = 0.0, gauss_s2 = 0.0;
+    float rx = 0.0, ry = 0.0, rrx = 0.0, rry = 0.0, len = 0.0, xf = 0.0, yf = 0.0, ys = 0.0, xs = 0.0;
+    float sample_x = 0.0, sample_y = 0.0, co = 0.0, si = 0.0, angle = 0.0;
+    float fx = 0.0, fy = 0.0, res1 = 0.0, res2 = 0.0, res3 = 0.0, res4 = 0.0;
+    int x1 = 0, y1 = 0, x2 = 0, y2 = 0, sample_step = 0, pattern_size = 0;
+    int kx = 0, ky = 0, i = 0, j = 0, dcount = 0;
+    int dsize = 0, scale = 0, level = 0;
+
+    std::vector<TEvolution>& evolution_ = *_evolution;
+
+    // Subregion centers for the 4x4 gaussian weighting
+    float cx = -0.5f, cy = 0.5f;
+
+    // Set the descriptor size and the sample and pattern sizes
+    dsize = 64;
+    sample_step = 5;
+    pattern_size = 12;
+
+    // Get the information from the keypoint
+    yf = kpt.pt.y;
+    xf = kpt.pt.x;
+    scale = fRound(kpt.size / 2.0f);
+    angle = kpt.angle;
+    level = kpt.class_id;
+    co = cos(angle);
+    si = sin(angle);
+
+    i = -8;
+
+    // Calculate descriptor for this interest point
+    // Area of size 24 s x 24 s
+    while (i < pattern_size) {
+
+        j = -8;
+        i = i - 4;
+
+        cx += 1.0f;
+        cy = -0.5f;
+
+        while (j < pattern_size) {
+
+            dx = dy = mdx = mdy = 0.0;
+            cy += 1.0f;
+            j = j - 4;
+
+            ky = i + sample_step;
+            kx = j + sample_step;
+
+            xs = xf + (-kx*scale*si + ky*scale*co);
+            ys = yf + (kx*scale*co + ky*scale*si);
+
+            for (int k = i; k < i + 9; ++k) {
+                for (int l = j; l < j + 9; ++l) {
+
+                    // Get coords of sample point on the rotated axis
+                    sample_y = yf + (l*scale*co + k*scale*si);
+                    sample_x = xf + (-l*scale*si + k*scale*co);
+
+                    // Get the gaussian weighted x and y responses
+                    gauss_s1 = gaussian(xs - sample_x, ys - sample_y, 2.5f*scale);
+                    y1 = fRound(sample_y - 0.5f);
+                    x1 = fRound(sample_x - 0.5f);
+
+                    checkDescriptorLimits(x1, y1, options.img_width, options.img_height);
+
+                    y2 = (int)(sample_y + 0.5f);
+                    x2 = (int)(sample_x + 0.5f);
+
+                    checkDescriptorLimits(x2, y2, options.img_width, options.img_height);
+
+                    fx = sample_x - x1;
+                    fy = sample_y - y1;
+
+                    res1 = *(evolution_[level].Lx.ptr<float>(y1)+x1);
+                    res2 = *(evolution_[level].Lx.ptr<float>(y1)+x2);
+                    res3 = *(evolution_[level].Lx.ptr<float>(y2)+x1);
+                    res4 = *(evolution_[level].Lx.ptr<float>(y2)+x2);
+                    rx = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4;
+
+                    res1 = *(evolution_[level].Ly.ptr<float>(y1)+x1);
+                    res2 = *(evolution_[level].Ly.ptr<float>(y1)+x2);
+                    res3 = *(evolution_[level].Ly.ptr<float>(y2)+x1);
+                    res4 = *(evolution_[level].Ly.ptr<float>(y2)+x2);
+                    ry = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4;
+
+                    // Get the x and y derivatives on the rotated axis
+                    rry = gauss_s1*(rx*co + ry*si);
+                    rrx = gauss_s1*(-rx*si + ry*co);
+
+                    // Sum the derivatives to the cumulative descriptor
+                    dx += rrx;
+                    dy += rry;
+                    mdx += fabs(rrx);
+                    mdy += fabs(rry);
+                }
+            }
+
+            // Add the values to the descriptor vector
+            gauss_s2 = gaussian(cx - 2.0f, cy - 2.0f, 1.5f);
+            desc[dcount++] = dx*gauss_s2;
+            desc[dcount++] = dy*gauss_s2;
+            desc[dcount++] = mdx*gauss_s2;
+            desc[dcount++] = mdy*gauss_s2;
+            len += (dx*dx + dy*dy + mdx*mdx + mdy*mdy)*gauss_s2*gauss_s2;
+            j += 9;
+        }
+        i += 9;
+    }
+
+    // convert to unit vector
+    len = sqrt(len);
+
+    for (i = 0; i < dsize; i++) {
+        desc[i] /= len;
+    }
+
+    if (options.use_clipping_normalilzation) {
+        clippingDescriptor(desc, dsize, options.clipping_normalization_niter, options.clipping_normalization_ratio);
+    }
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method computes the extended upright descriptor (not rotation invariant) of
+ * the provided keypoint
+ * @param kpt Input keypoint
+ * @param desc Descriptor vector
+ * @note Rectangular grid of 24 s x 24 s. Descriptor Length 128. The descriptor is inspired
+ * from Agrawal et al., CenSurE: Center Surround Extremas for Realtime Feature Detection and Matching,
+ * ECCV 2008
+ */
+void KAZE_Descriptor_Invoker::Get_KAZE_Upright_Descriptor_128(const cv::KeyPoint &kpt, float *desc) const
+{
+    float gauss_s1 = 0.0, gauss_s2 = 0.0;
+    float rx = 0.0, ry = 0.0, len = 0.0, xf = 0.0, yf = 0.0, ys = 0.0, xs = 0.0;
+    float sample_x = 0.0, sample_y = 0.0;
+    int x1 = 0, y1 = 0, sample_step = 0, pattern_size = 0;
+    int x2 = 0, y2 = 0, kx = 0, ky = 0, i = 0, j = 0, dcount = 0;
+    float fx = 0.0, fy = 0.0, res1 = 0.0, res2 = 0.0, res3 = 0.0, res4 = 0.0;
+    float dxp = 0.0, dyp = 0.0, mdxp = 0.0, mdyp = 0.0;
+    float dxn = 0.0, dyn = 0.0, mdxn = 0.0, mdyn = 0.0;
+    int dsize = 0, scale = 0, level = 0;
+
+    // Subregion centers for the 4x4 gaussian weighting
+    float cx = -0.5f, cy = 0.5f;
+
+    std::vector<TEvolution>& evolution_ = *_evolution;
+
+    // Set the descriptor size and the sample and pattern sizes
+    dsize = 128;
+    sample_step = 5;
+    pattern_size = 12;
+
+    // Get the information from the keypoint
+    yf = kpt.pt.y;
+    xf = kpt.pt.x;
+    scale = fRound(kpt.size / 2.0f);
+    level = kpt.class_id;
+
+    i = -8;
+
+    // Calculate descriptor for this interest point
+    // Area of size 24 s x 24 s
+    while (i < pattern_size) {
+
+        j = -8;
+        i = i - 4;
+
+        cx += 1.0f;
+        cy = -0.5f;
+
+        while (j < pattern_size) {
+
+            dxp = dxn = mdxp = mdxn = 0.0;
+            dyp = dyn = mdyp = mdyn = 0.0;
+
+            cy += 1.0f;
+            j = j - 4;
+
+            ky = i + sample_step;
+            kx = j + sample_step;
+
+            ys = yf + (ky*scale);
+            xs = xf + (kx*scale);
+
+            for (int k = i; k < i + 9; k++) {
+                for (int l = j; l < j + 9; l++) {
+
+                    sample_y = k*scale + yf;
+                    sample_x = l*scale + xf;
+
+                    //Get the gaussian weighted x and y responses
+                    gauss_s1 = gaussian(xs - sample_x, ys - sample_y, 2.5f*scale);
+
+                    y1 = (int)(sample_y - 0.5f);
+                    x1 = (int)(sample_x - 0.5f);
+
+                    checkDescriptorLimits(x1, y1, options.img_width, options.img_height);
+
+                    y2 = (int)(sample_y + 0.5f);
+                    x2 = (int)(sample_x + 0.5f);
+
+                    checkDescriptorLimits(x2, y2, options.img_width, options.img_height);
+
+                    fx = sample_x - x1;
+                    fy = sample_y - y1;
+
+                    res1 = *(evolution_[level].Lx.ptr<float>(y1)+x1);
+                    res2 = *(evolution_[level].Lx.ptr<float>(y1)+x2);
+                    res3 = *(evolution_[level].Lx.ptr<float>(y2)+x1);
+                    res4 = *(evolution_[level].Lx.ptr<float>(y2)+x2);
+                    rx = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4;
+
+                    res1 = *(evolution_[level].Ly.ptr<float>(y1)+x1);
+                    res2 = *(evolution_[level].Ly.ptr<float>(y1)+x2);
+                    res3 = *(evolution_[level].Ly.ptr<float>(y2)+x1);
+                    res4 = *(evolution_[level].Ly.ptr<float>(y2)+x2);
+                    ry = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4;
+
+                    rx = gauss_s1*rx;
+                    ry = gauss_s1*ry;
+
+                    // Sum the derivatives to the cumulative descriptor
+                    if (ry >= 0.0) {
+                        dxp += rx;
+                        mdxp += fabs(rx);
+                    }
+                    else {
+                        dxn += rx;
+                        mdxn += fabs(rx);
+                    }
+
+                    if (rx >= 0.0) {
+                        dyp += ry;
+                        mdyp += fabs(ry);
+                    }
+                    else {
+                        dyn += ry;
+                        mdyn += fabs(ry);
+                    }
+                }
+            }
+
+            // Add the values to the descriptor vector
+            gauss_s2 = gaussian(cx - 2.0f, cy - 2.0f, 1.5f);
+
+            desc[dcount++] = dxp*gauss_s2;
+            desc[dcount++] = dxn*gauss_s2;
+            desc[dcount++] = mdxp*gauss_s2;
+            desc[dcount++] = mdxn*gauss_s2;
+            desc[dcount++] = dyp*gauss_s2;
+            desc[dcount++] = dyn*gauss_s2;
+            desc[dcount++] = mdyp*gauss_s2;
+            desc[dcount++] = mdyn*gauss_s2;
+
+            // Store the current length^2 of the vector
+            len += (dxp*dxp + dxn*dxn + mdxp*mdxp + mdxn*mdxn +
+                dyp*dyp + dyn*dyn + mdyp*mdyp + mdyn*mdyn)*gauss_s2*gauss_s2;
+
+            j += 9;
+        }
+
+        i += 9;
+    }
+
+    // convert to unit vector
+    len = sqrt(len);
+
+    for (i = 0; i < dsize; i++) {
+        desc[i] /= len;
+    }
+
+    if (options.use_clipping_normalilzation) {
+        clippingDescriptor(desc, dsize, options.clipping_normalization_niter, options.clipping_normalization_ratio);
+    }
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method computes the extended G-SURF descriptor of the provided keypoint
+ * given the main orientation of the keypoint
+ * @param kpt Input keypoint
+ * @param desc Descriptor vector
+ * @note Rectangular grid of 24 s x 24 s. Descriptor Length 128. The descriptor is inspired
+ * from Agrawal et al., CenSurE: Center Surround Extremas for Realtime Feature Detection and Matching,
+ * ECCV 2008
+ */
+void KAZE_Descriptor_Invoker::Get_KAZE_Descriptor_128(const cv::KeyPoint &kpt, float *desc) const
+{
+    float gauss_s1 = 0.0, gauss_s2 = 0.0;
+    float rx = 0.0, ry = 0.0, rrx = 0.0, rry = 0.0, len = 0.0, xf = 0.0, yf = 0.0, ys = 0.0, xs = 0.0;
+    float sample_x = 0.0, sample_y = 0.0, co = 0.0, si = 0.0, angle = 0.0;
+    float fx = 0.0, fy = 0.0, res1 = 0.0, res2 = 0.0, res3 = 0.0, res4 = 0.0;
+    float dxp = 0.0, dyp = 0.0, mdxp = 0.0, mdyp = 0.0;
+    float dxn = 0.0, dyn = 0.0, mdxn = 0.0, mdyn = 0.0;
+    int x1 = 0, y1 = 0, x2 = 0, y2 = 0, sample_step = 0, pattern_size = 0;
+    int kx = 0, ky = 0, i = 0, j = 0, dcount = 0;
+    int dsize = 0, scale = 0, level = 0;
+
+    std::vector<TEvolution>& evolution_ = *_evolution;
+
+    // Subregion centers for the 4x4 gaussian weighting
+    float cx = -0.5f, cy = 0.5f;
+
+    // Set the descriptor size and the sample and pattern sizes
+    dsize = 128;
+    sample_step = 5;
+    pattern_size = 12;
+
+    // Get the information from the keypoint
+    yf = kpt.pt.y;
+    xf = kpt.pt.x;
+    scale = fRound(kpt.size / 2.0f);
+    angle = kpt.angle;
+    level = kpt.class_id;
+    co = cos(angle);
+    si = sin(angle);
+
+    i = -8;
+
+    // Calculate descriptor for this interest point
+    // Area of size 24 s x 24 s
+    while (i < pattern_size) {
+
+        j = -8;
+        i = i - 4;
+
+        cx += 1.0f;
+        cy = -0.5f;
+
+        while (j < pattern_size) {
+
+            dxp = dxn = mdxp = mdxn = 0.0;
+            dyp = dyn = mdyp = mdyn = 0.0;
+
+            cy += 1.0f;
+            j = j - 4;
+
+            ky = i + sample_step;
+            kx = j + sample_step;
+
+            xs = xf + (-kx*scale*si + ky*scale*co);
+            ys = yf + (kx*scale*co + ky*scale*si);
+
+            for (int k = i; k < i + 9; ++k) {
+                for (int l = j; l < j + 9; ++l) {
+
+                    // Get coords of sample point on the rotated axis
+                    sample_y = yf + (l*scale*co + k*scale*si);
+                    sample_x = xf + (-l*scale*si + k*scale*co);
+
+                    // Get the gaussian weighted x and y responses
+                    gauss_s1 = gaussian(xs - sample_x, ys - sample_y, 2.5f*scale);
+
+                    y1 = fRound(sample_y - 0.5f);
+                    x1 = fRound(sample_x - 0.5f);
+
+                    checkDescriptorLimits(x1, y1, options.img_width, options.img_height);
+
+                    y2 = (int)(sample_y + 0.5f);
+                    x2 = (int)(sample_x + 0.5f);
+
+                    checkDescriptorLimits(x2, y2, options.img_width, options.img_height);
+
+                    fx = sample_x - x1;
+                    fy = sample_y - y1;
+
+                    res1 = *(evolution_[level].Lx.ptr<float>(y1)+x1);
+                    res2 = *(evolution_[level].Lx.ptr<float>(y1)+x2);
+                    res3 = *(evolution_[level].Lx.ptr<float>(y2)+x1);
+                    res4 = *(evolution_[level].Lx.ptr<float>(y2)+x2);
+                    rx = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4;
+
+                    res1 = *(evolution_[level].Ly.ptr<float>(y1)+x1);
+                    res2 = *(evolution_[level].Ly.ptr<float>(y1)+x2);
+                    res3 = *(evolution_[level].Ly.ptr<float>(y2)+x1);
+                    res4 = *(evolution_[level].Ly.ptr<float>(y2)+x2);
+                    ry = (1.0f - fx)*(1.0f - fy)*res1 + fx*(1.0f - fy)*res2 + (1.0f - fx)*fy*res3 + fx*fy*res4;
+
+                    // Get the x and y derivatives on the rotated axis
+                    rry = gauss_s1*(rx*co + ry*si);
+                    rrx = gauss_s1*(-rx*si + ry*co);
+
+                    // Sum the derivatives to the cumulative descriptor
+                    if (rry >= 0.0) {
+                        dxp += rrx;
+                        mdxp += fabs(rrx);
+                    }
+                    else {
+                        dxn += rrx;
+                        mdxn += fabs(rrx);
+                    }
+
+                    if (rrx >= 0.0) {
+                        dyp += rry;
+                        mdyp += fabs(rry);
+                    }
+                    else {
+                        dyn += rry;
+                        mdyn += fabs(rry);
+                    }
+                }
+            }
+
+            // Add the values to the descriptor vector
+            gauss_s2 = gaussian(cx - 2.0f, cy - 2.0f, 1.5f);
+
+            desc[dcount++] = dxp*gauss_s2;
+            desc[dcount++] = dxn*gauss_s2;
+            desc[dcount++] = mdxp*gauss_s2;
+            desc[dcount++] = mdxn*gauss_s2;
+            desc[dcount++] = dyp*gauss_s2;
+            desc[dcount++] = dyn*gauss_s2;
+            desc[dcount++] = mdyp*gauss_s2;
+            desc[dcount++] = mdyn*gauss_s2;
+
+            // Store the current length^2 of the vector
+            len += (dxp*dxp + dxn*dxn + mdxp*mdxp + mdxn*mdxn +
+                dyp*dyp + dyn*dyn + mdyp*mdyp + mdyn*mdyn)*gauss_s2*gauss_s2;
+
+            j += 9;
+        }
+
+        i += 9;
+    }
+
+    // convert to unit vector
+    len = sqrt(len);
+
+    for (i = 0; i < dsize; i++) {
+        desc[i] /= len;
+    }
+
+    if (options.use_clipping_normalilzation) {
+        clippingDescriptor(desc, dsize, options.clipping_normalization_niter, options.clipping_normalization_ratio);
+    }
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method performs a scalar non-linear diffusion step using AOS schemes
+ * @param Ld Image at a given evolution step
+ * @param Ldprev Image at a previous evolution step
+ * @param c Conductivity image
+ * @param stepsize Stepsize for the nonlinear diffusion evolution
+ * @note If c is constant, the diffusion will be linear
+ * If c is a matrix of the same size as Ld, the diffusion will be nonlinear
+ * The stepsize can be arbitrarily large
+ */
+void KAZEFeatures::AOS_Step_Scalar(cv::Mat &Ld, const cv::Mat &Ldprev, const cv::Mat &c, const float& stepsize) {
+
+    AOS_Rows(Ldprev, c, stepsize);
+    AOS_Columns(Ldprev, c, stepsize);
+
+    Ld = 0.5f*(Lty_ + Ltx_.t());
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method performs performs 1D-AOS for the image rows
+ * @param Ldprev Image at a previous evolution step
+ * @param c Conductivity image
+ * @param stepsize Stepsize for the nonlinear diffusion evolution
+ */
+void KAZEFeatures::AOS_Rows(const cv::Mat &Ldprev, const cv::Mat &c, const float& stepsize) {
+
+    // Operate on rows
+    for (int i = 0; i < qr_.rows; i++) {
+        for (int j = 0; j < qr_.cols; j++) {
+            *(qr_.ptr<float>(i)+j) = *(c.ptr<float>(i)+j) + *(c.ptr<float>(i + 1) + j);
+        }
+    }
+
+    for (int j = 0; j < py_.cols; j++) {
+        *(py_.ptr<float>(0) + j) = *(qr_.ptr<float>(0) + j);
+    }
+
+    for (int j = 0; j < py_.cols; j++) {
+        *(py_.ptr<float>(py_.rows - 1) + j) = *(qr_.ptr<float>(qr_.rows - 1) + j);
+    }
+
+    for (int i = 1; i < py_.rows - 1; i++) {
+        for (int j = 0; j < py_.cols; j++) {
+            *(py_.ptr<float>(i)+j) = *(qr_.ptr<float>(i - 1) + j) + *(qr_.ptr<float>(i)+j);
+        }
+    }
+
+    // a = 1 + t.*p; (p is -1*p)
+    // b = -t.*q;
+    ay_ = 1.0f + stepsize*py_; // p is -1*p
+    by_ = -stepsize*qr_;
+
+    // Do Thomas algorithm to solve the linear system of equations
+    Thomas(ay_, by_, Ldprev, Lty_);
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method performs performs 1D-AOS for the image columns
+ * @param Ldprev Image at a previous evolution step
+ * @param c Conductivity image
+ * @param stepsize Stepsize for the nonlinear diffusion evolution
+ */
+void KAZEFeatures::AOS_Columns(const cv::Mat &Ldprev, const cv::Mat &c, const float& stepsize) {
+
+    // Operate on columns
+    for (int j = 0; j < qc_.cols; j++) {
+        for (int i = 0; i < qc_.rows; i++) {
+            *(qc_.ptr<float>(i)+j) = *(c.ptr<float>(i)+j) + *(c.ptr<float>(i)+j + 1);
+        }
+    }
+
+    for (int i = 0; i < px_.rows; i++) {
+        *(px_.ptr<float>(i)) = *(qc_.ptr<float>(i));
+    }
+
+    for (int i = 0; i < px_.rows; i++) {
+        *(px_.ptr<float>(i)+px_.cols - 1) = *(qc_.ptr<float>(i)+qc_.cols - 1);
+    }
+
+    for (int j = 1; j < px_.cols - 1; j++) {
+        for (int i = 0; i < px_.rows; i++) {
+            *(px_.ptr<float>(i)+j) = *(qc_.ptr<float>(i)+j - 1) + *(qc_.ptr<float>(i)+j);
+        }
+    }
+
+    // a = 1 + t.*p';
+    ax_ = 1.0f + stepsize*px_.t();
+
+    // b = -t.*q';
+    bx_ = -stepsize*qc_.t();
+
+    // But take care since we need to transpose the solution!!
+    Mat Ldprevt = Ldprev.t();
+
+    // Do Thomas algorithm to solve the linear system of equations
+    Thomas(ax_, bx_, Ldprevt, Ltx_);
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This method does the Thomas algorithm for solving a tridiagonal linear system
+ * @note The matrix A must be strictly diagonally dominant for a stable solution
+ */
+void KAZEFeatures::Thomas(const cv::Mat &a, const cv::Mat &b, const cv::Mat &Ld, cv::Mat &x) {
+
+    // Auxiliary variables
+    int n = a.rows;
+    Mat m = cv::Mat::zeros(a.rows, a.cols, CV_32F);
+    Mat l = cv::Mat::zeros(b.rows, b.cols, CV_32F);
+    Mat y = cv::Mat::zeros(Ld.rows, Ld.cols, CV_32F);
+
+    /** A*x = d;																		   	   */
+    /**	/ a1 b1  0  0 0  ...    0 \  / x1 \ = / d1 \										   */
+    /**	| c1 a2 b2  0 0  ...    0 |  | x2 | = | d2 |										   */
+    /**	|  0 c2 a3 b3 0  ...    0 |  | x3 | = | d3 |										   */
+    /**	|  :  :  :  : 0  ...    0 |  |  : | = |  : |										   */
+    /**	|  :  :  :  : 0  cn-1  an |  | xn | = | dn |										   */
+
+    /** 1. LU decomposition
+     / L = / 1				 \		U = / m1 r1			   \
+     /     | l1 1 			 |	        |    m2 r2		   |
+     /     |    l2 1          |			|		m3 r3	   |
+     /	  |     : : :        |			|       :  :  :	   |
+     /	  \           ln-1 1 /			\				mn /	*/
+
+    for (int j = 0; j < m.cols; j++) {
+        *(m.ptr<float>(0) + j) = *(a.ptr<float>(0) + j);
+    }
+
+    for (int j = 0; j < y.cols; j++) {
+        *(y.ptr<float>(0) + j) = *(Ld.ptr<float>(0) + j);
+    }
+
+    // 1. Forward substitution L*y = d for y
+    for (int k = 1; k < n; k++) {
+        for (int j = 0; j < l.cols; j++) {
+            *(l.ptr<float>(k - 1) + j) = *(b.ptr<float>(k - 1) + j) / *(m.ptr<float>(k - 1) + j);
+        }
+
+        for (int j = 0; j < m.cols; j++) {
+            *(m.ptr<float>(k)+j) = *(a.ptr<float>(k)+j) - *(l.ptr<float>(k - 1) + j)*(*(b.ptr<float>(k - 1) + j));
+        }
+
+        for (int j = 0; j < y.cols; j++) {
+            *(y.ptr<float>(k)+j) = *(Ld.ptr<float>(k)+j) - *(l.ptr<float>(k - 1) + j)*(*(y.ptr<float>(k - 1) + j));
+        }
+    }
+
+    // 2. Backward substitution U*x = y
+    for (int j = 0; j < y.cols; j++) {
+        *(x.ptr<float>(n - 1) + j) = (*(y.ptr<float>(n - 1) + j)) / (*(m.ptr<float>(n - 1) + j));
+    }
+
+    for (int i = n - 2; i >= 0; i--) {
+        for (int j = 0; j < x.cols; j++) {
+            *(x.ptr<float>(i)+j) = (*(y.ptr<float>(i)+j) - (*(b.ptr<float>(i)+j))*(*(x.ptr<float>(i + 1) + j))) / (*(m.ptr<float>(i)+j));
+        }
+    }
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This function computes the angle from the vector given by (X Y). From 0 to 2*Pi
+ */
+inline float getAngle(const float& x, const float& y) {
+
+    if (x >= 0 && y >= 0)
+    {
+        return atan(y / x);
+    }
+
+    if (x < 0 && y >= 0) {
+        return (float)CV_PI - atan(-y / x);
+    }
+
+    if (x < 0 && y < 0) {
+        return (float)CV_PI + atan(y / x);
+    }
+
+    if (x >= 0 && y < 0) {
+        return 2.0f * (float)CV_PI - atan(-y / x);
+    }
+
+    return 0;
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This function performs descriptor clipping
+ * @param desc_ Pointer to the descriptor vector
+ * @param dsize Size of the descriptor vector
+ * @param iter Number of iterations
+ * @param ratio Clipping ratio
+ */
+inline void clippingDescriptor(float *desc, const int& dsize, const int& niter, const float& ratio) {
+
+    float cratio = ratio / sqrtf(static_cast<float>(dsize));
+    float len = 0.0;
+
+    for (int i = 0; i < niter; i++) {
+        len = 0.0;
+        for (int j = 0; j < dsize; j++) {
+            if (desc[j] > cratio) {
+                desc[j] = cratio;
+            }
+            else if (desc[j] < -cratio) {
+                desc[j] = -cratio;
+            }
+            len += desc[j] * desc[j];
+        }
+
+        // Normalize again
+        len = sqrt(len);
+
+        for (int j = 0; j < dsize; j++) {
+            desc[j] = desc[j] / len;
+        }
+    }
+}
+
+//**************************************************************************************
+//**************************************************************************************
+
+/**
+ * @brief This function computes the value of a 2D Gaussian function
+ * @param x X Position
+ * @param y Y Position
+ * @param sig Standard Deviation
+ */
+inline float gaussian(const float& x, const float& y, const float& sig) {
+    return exp(-(x*x + y*y) / (2.0f*sig*sig));
+}
+
+//**************************************************************************************
+//**************************************************************************************
+
+/**
+ * @brief This function checks descriptor limits
+ * @param x X Position
+ * @param y Y Position
+ * @param width Image width
+ * @param height Image height
+ */
+inline void checkDescriptorLimits(int &x, int &y, const int& width, const int& height) {
+
+    if (x < 0) {
+        x = 0;
+    }
+
+    if (y < 0) {
+        y = 0;
+    }
+
+    if (x > width - 1) {
+        x = width - 1;
+    }
+
+    if (y > height - 1) {
+        y = height - 1;
+    }
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This funtion rounds float to nearest integer
+ * @param flt Input float
+ * @return dst Nearest integer
+ */
+inline int fRound(const float& flt)
+{
+    return (int)(flt + 0.5f);
+}
diff --git a/modules/features2d/src/kaze/KAZEFeatures.h b/modules/features2d/src/kaze/KAZEFeatures.h
new file mode 100644
index 0000000000..81509c47d9
--- /dev/null
+++ b/modules/features2d/src/kaze/KAZEFeatures.h
@@ -0,0 +1,90 @@
+
+/**
+ * @file KAZE.h
+ * @brief Main program for detecting and computing descriptors in a nonlinear
+ * scale space
+ * @date Jan 21, 2012
+ * @author Pablo F. Alcantarilla
+ */
+
+#ifndef KAZE_H_
+#define KAZE_H_
+
+//*************************************************************************************
+//*************************************************************************************
+
+// Includes
+#include "KAZEConfig.h"
+#include "nldiffusion_functions.h"
+#include "fed.h"
+
+//*************************************************************************************
+//*************************************************************************************
+
+// KAZE Class Declaration
+class KAZEFeatures {
+
+private:
+
+    KAZEOptions options;
+
+    // Parameters of the Nonlinear diffusion class
+    std::vector<TEvolution> evolution_;	// Vector of nonlinear diffusion evolution
+
+    // Vector of keypoint vectors for finding extrema in multiple threads
+    std::vector<std::vector<cv::KeyPoint> > kpts_par_;
+
+    // FED parameters
+    int ncycles_;                  // Number of cycles
+    bool reordering_;              // Flag for reordering time steps
+    std::vector<std::vector<float > > tsteps_;  // Vector of FED dynamic time steps
+    std::vector<int> nsteps_;      // Vector of number of steps per cycle
+
+    // Some auxiliary variables used in the AOS step
+    cv::Mat Ltx_, Lty_, px_, py_, ax_, ay_, bx_, by_, qr_, qc_;
+
+public:
+
+    // Constructor
+    KAZEFeatures(KAZEOptions& options);
+
+    // Public methods for KAZE interface
+    void Allocate_Memory_Evolution(void);
+    int Create_Nonlinear_Scale_Space(const cv::Mat& img);
+    void Feature_Detection(std::vector<cv::KeyPoint>& kpts);
+    void Feature_Description(std::vector<cv::KeyPoint>& kpts, cv::Mat& desc);
+
+    static void Compute_Main_Orientation(cv::KeyPoint& kpt, const std::vector<TEvolution>& evolution_, const KAZEOptions& options);
+
+private:
+
+    // Feature Detection Methods
+    void Compute_KContrast(const cv::Mat& img, const float& kper);
+    void Compute_Multiscale_Derivatives(void);
+    void Compute_Detector_Response(void);
+    void Determinant_Hessian_Parallel(std::vector<cv::KeyPoint>& kpts);
+    void Find_Extremum_Threading(const int& level);
+    void Do_Subpixel_Refinement(std::vector<cv::KeyPoint>& kpts);
+
+    // AOS Methods
+    void AOS_Step_Scalar(cv::Mat &Ld, const cv::Mat &Ldprev, const cv::Mat &c, const float& stepsize);
+    void AOS_Rows(const cv::Mat &Ldprev, const cv::Mat &c, const float& stepsize);
+    void AOS_Columns(const cv::Mat &Ldprev, const cv::Mat &c, const float& stepsize);
+    void Thomas(const cv::Mat &a, const cv::Mat &b, const cv::Mat &Ld, cv::Mat &x);
+
+};
+
+//*************************************************************************************
+//*************************************************************************************
+
+// Inline functions
+float getAngle(const float& x, const float& y);
+float gaussian(const float& x, const float& y, const float& sig);
+void checkDescriptorLimits(int &x, int &y, const int& width, const int& height);
+void clippingDescriptor(float *desc, const int& dsize, const int& niter, const float& ratio);
+int fRound(const float& flt);
+
+//*************************************************************************************
+//*************************************************************************************
+
+#endif // KAZE_H_
diff --git a/modules/features2d/src/kaze/fed.cpp b/modules/features2d/src/kaze/fed.cpp
new file mode 100644
index 0000000000..7c2588559d
--- /dev/null
+++ b/modules/features2d/src/kaze/fed.cpp
@@ -0,0 +1,192 @@
+//=============================================================================
+//
+// fed.cpp
+// Authors: Pablo F. Alcantarilla (1), Jesus Nuevo (2)
+// Institutions: Georgia Institute of Technology (1)
+//               TrueVision Solutions (2)
+// Date: 15/09/2013
+// Email: pablofdezalc@gmail.com
+//
+// AKAZE Features Copyright 2013, Pablo F. Alcantarilla, Jesus Nuevo
+// All Rights Reserved
+// See LICENSE for the license information
+//=============================================================================
+
+/**
+ * @file fed.cpp
+ * @brief Functions for performing Fast Explicit Diffusion and building the
+ * nonlinear scale space
+ * @date Sep 15, 2013
+ * @author Pablo F. Alcantarilla, Jesus Nuevo
+ * @note This code is derived from FED/FJ library from Grewenig et al.,
+ * The FED/FJ library allows solving more advanced problems
+ * Please look at the following papers for more information about FED:
+ * [1] S. Grewenig, J. Weickert, C. Schroers, A. Bruhn. Cyclic Schemes for
+ * PDE-Based Image Analysis. Technical Report No. 327, Department of Mathematics,
+ * Saarland University, Saarbrücken, Germany, March 2013
+ * [2] S. Grewenig, J. Weickert, A. Bruhn. From box filtering to fast explicit diffusion.
+ * DAGM, 2010
+ *
+*/
+#include "precomp.hpp"
+#include "fed.h"
+
+using namespace std;
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This function allocates an array of the least number of time steps such
+ * that a certain stopping time for the whole process can be obtained and fills
+ * it with the respective FED time step sizes for one cycle
+ * The function returns the number of time steps per cycle or 0 on failure
+ * @param T Desired process stopping time
+ * @param M Desired number of cycles
+ * @param tau_max Stability limit for the explicit scheme
+ * @param reordering Reordering flag
+ * @param tau The vector with the dynamic step sizes
+ */
+int fed_tau_by_process_time(const float& T, const int& M, const float& tau_max,
+                            const bool& reordering, std::vector<float>& tau) {
+  // All cycles have the same fraction of the stopping time
+  return fed_tau_by_cycle_time(T/(float)M,tau_max,reordering,tau);
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This function allocates an array of the least number of time steps such
+ * that a certain stopping time for the whole process can be obtained and fills it
+ * it with the respective FED time step sizes for one cycle
+ * The function returns the number of time steps per cycle or 0 on failure
+ * @param t Desired cycle stopping time
+ * @param tau_max Stability limit for the explicit scheme
+ * @param reordering Reordering flag
+ * @param tau The vector with the dynamic step sizes
+ */
+int fed_tau_by_cycle_time(const float& t, const float& tau_max,
+                          const bool& reordering, std::vector<float> &tau) {
+  int n = 0;          // Number of time steps
+  float scale = 0.0;  // Ratio of t we search to maximal t
+
+  // Compute necessary number of time steps
+  n = (int)(ceilf(sqrtf(3.0f*t/tau_max+0.25f)-0.5f-1.0e-8f)+ 0.5f);
+  scale = 3.0f*t/(tau_max*(float)(n*(n+1)));
+
+  // Call internal FED time step creation routine
+  return fed_tau_internal(n,scale,tau_max,reordering,tau);
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This function allocates an array of time steps and fills it with FED
+ * time step sizes
+ * The function returns the number of time steps per cycle or 0 on failure
+ * @param n Number of internal steps
+ * @param scale Ratio of t we search to maximal t
+ * @param tau_max Stability limit for the explicit scheme
+ * @param reordering Reordering flag
+ * @param tau The vector with the dynamic step sizes
+ */
+int fed_tau_internal(const int& n, const float& scale, const float& tau_max,
+                     const bool& reordering, std::vector<float> &tau) {
+  float c = 0.0, d = 0.0;     // Time savers
+  vector<float> tauh;    // Helper vector for unsorted taus
+
+  if (n <= 0) {
+    return 0;
+  }
+
+  // Allocate memory for the time step size
+  tau = vector<float>(n);
+
+  if (reordering) {
+    tauh = vector<float>(n);
+  }
+
+  // Compute time saver
+  c = 1.0f / (4.0f * (float)n + 2.0f);
+  d = scale * tau_max / 2.0f;
+
+  // Set up originally ordered tau vector
+  for (int k = 0; k < n; ++k) {
+    float h = cosf((float)CV_PI * (2.0f * (float)k + 1.0f) * c);
+
+    if (reordering) {
+      tauh[k] = d / (h * h);
+    }
+    else {
+      tau[k] = d / (h * h);
+    }
+  }
+
+  // Permute list of time steps according to chosen reordering function
+  int kappa = 0, prime = 0;
+
+  if (reordering == true) {
+    // Choose kappa cycle with k = n/2
+    // This is a heuristic. We can use Leja ordering instead!!
+    kappa = n / 2;
+
+    // Get modulus for permutation
+    prime = n + 1;
+
+    while (!fed_is_prime_internal(prime)) {
+      prime++;
+    }
+
+    // Perform permutation
+    for (int k = 0, l = 0; l < n; ++k, ++l) {
+      int index = 0;
+      while ((index = ((k+1)*kappa) % prime - 1) >= n) {
+        k++;
+      }
+
+      tau[l] = tauh[index];
+    }
+  }
+
+  return n;
+}
+
+//*************************************************************************************
+//*************************************************************************************
+
+/**
+ * @brief This function checks if a number is prime or not
+ * @param number Number to check if it is prime or not
+ * @return true if the number is prime
+ */
+bool fed_is_prime_internal(const int& number) {
+  bool is_prime = false;
+
+  if (number <= 1) {
+    return false;
+  }
+  else if (number == 1 || number == 2 || number == 3 || number == 5 || number == 7) {
+    return true;
+  }
+  else if ((number % 2) == 0 || (number % 3) == 0 || (number % 5) == 0 || (number % 7) == 0) {
+    return false;
+  }
+  else {
+    is_prime = true;
+    int upperLimit = (int)sqrt(1.0f + number);
+    int divisor = 11;
+
+    while (divisor <= upperLimit ) {
+      if (number % divisor == 0)
+      {
+        is_prime = false;
+      }
+
+      divisor +=2;
+    }
+
+    return is_prime;
+  }
+}
diff --git a/modules/features2d/src/kaze/fed.h b/modules/features2d/src/kaze/fed.h
new file mode 100644
index 0000000000..c313b8134d
--- /dev/null
+++ b/modules/features2d/src/kaze/fed.h
@@ -0,0 +1,25 @@
+#ifndef FED_H
+#define FED_H
+
+//******************************************************************************
+//******************************************************************************
+
+// Includes
+#include <vector>
+
+//*************************************************************************************
+//*************************************************************************************
+
+// Declaration of functions
+int fed_tau_by_process_time(const float& T, const int& M, const float& tau_max,
+                            const bool& reordering, std::vector<float>& tau);
+int fed_tau_by_cycle_time(const float& t, const float& tau_max,
+                          const bool& reordering, std::vector<float> &tau) ;
+int fed_tau_internal(const int& n, const float& scale, const float& tau_max,
+                     const bool& reordering, std::vector<float> &tau);
+bool fed_is_prime_internal(const int& number);
+
+//*************************************************************************************
+//*************************************************************************************
+
+#endif // FED_H
diff --git a/modules/features2d/src/kaze/nldiffusion_functions.cpp b/modules/features2d/src/kaze/nldiffusion_functions.cpp
new file mode 100644
index 0000000000..ea7cd8141a
--- /dev/null
+++ b/modules/features2d/src/kaze/nldiffusion_functions.cpp
@@ -0,0 +1,437 @@
+//=============================================================================
+//
+// nldiffusion_functions.cpp
+// Author: Pablo F. Alcantarilla
+// Institution: University d'Auvergne
+// Address: Clermont Ferrand, France
+// Date: 27/12/2011
+// Email: pablofdezalc@gmail.com
+//
+// KAZE Features Copyright 2012, Pablo F. Alcantarilla
+// All Rights Reserved
+// See LICENSE for the license information
+//=============================================================================
+
+/**
+ * @file nldiffusion_functions.cpp
+ * @brief Functions for non-linear diffusion applications:
+ * 2D Gaussian Derivatives
+ * Perona and Malik conductivity equations
+ * Perona and Malik evolution
+ * @date Dec 27, 2011
+ * @author Pablo F. Alcantarilla
+ */
+
+#include "nldiffusion_functions.h"
+
+// Namespaces
+using namespace std;
+using namespace cv;
+
+/* ************************************************************************* */
+
+namespace cv {
+    namespace details {
+        namespace kaze {
+
+            /* ************************************************************************* */
+            /**
+             * @brief This function smoothes an image with a Gaussian kernel
+             * @param src Input image
+             * @param dst Output image
+             * @param ksize_x Kernel size in X-direction (horizontal)
+             * @param ksize_y Kernel size in Y-direction (vertical)
+             * @param sigma Kernel standard deviation
+             */
+            void gaussian_2D_convolution(const cv::Mat& src, cv::Mat& dst, int ksize_x, int ksize_y, float sigma) {
+
+                int ksize_x_ = 0, ksize_y_ = 0;
+
+                // Compute an appropriate kernel size according to the specified sigma
+                if (sigma > ksize_x || sigma > ksize_y || ksize_x == 0 || ksize_y == 0) {
+                    ksize_x_ = (int)ceil(2.0f*(1.0f + (sigma - 0.8f) / (0.3f)));
+                    ksize_y_ = ksize_x_;
+                }
+
+                // The kernel size must be and odd number
+                if ((ksize_x_ % 2) == 0) {
+                    ksize_x_ += 1;
+                }
+
+                if ((ksize_y_ % 2) == 0) {
+                    ksize_y_ += 1;
+                }
+
+                // Perform the Gaussian Smoothing with border replication
+                GaussianBlur(src, dst, Size(ksize_x_, ksize_y_), sigma, sigma, BORDER_REPLICATE);
+            }
+
+            /* ************************************************************************* */
+            /**
+             * @brief This function computes image derivatives with Scharr kernel
+             * @param src Input image
+             * @param dst Output image
+             * @param xorder Derivative order in X-direction (horizontal)
+             * @param yorder Derivative order in Y-direction (vertical)
+             * @note Scharr operator approximates better rotation invariance than
+             * other stencils such as Sobel. See Weickert and Scharr,
+             * A Scheme for Coherence-Enhancing Diffusion Filtering with Optimized Rotation Invariance,
+             * Journal of Visual Communication and Image Representation 2002
+             */
+            void image_derivatives_scharr(const cv::Mat& src, cv::Mat& dst, int xorder, int yorder) {
+                Scharr(src, dst, CV_32F, xorder, yorder, 1.0, 0, BORDER_DEFAULT);
+            }
+
+            /* ************************************************************************* */
+            /**
+             * @brief This function computes the Perona and Malik conductivity coefficient g1
+             * g1 = exp(-|dL|^2/k^2)
+             * @param Lx First order image derivative in X-direction (horizontal)
+             * @param Ly First order image derivative in Y-direction (vertical)
+             * @param dst Output image
+             * @param k Contrast factor parameter
+             */
+            void pm_g1(const cv::Mat& Lx, const cv::Mat& Ly, cv::Mat& dst, float k) {
+                cv::exp(-(Lx.mul(Lx) + Ly.mul(Ly)) / (k*k), dst);
+            }
+
+            /* ************************************************************************* */
+            /**
+             * @brief This function computes the Perona and Malik conductivity coefficient g2
+             * g2 = 1 / (1 + dL^2 / k^2)
+             * @param Lx First order image derivative in X-direction (horizontal)
+             * @param Ly First order image derivative in Y-direction (vertical)
+             * @param dst Output image
+             * @param k Contrast factor parameter
+             */
+            void pm_g2(const cv::Mat &Lx, const cv::Mat& Ly, cv::Mat& dst, float k) {
+                dst = 1.0f / (1.0f + (Lx.mul(Lx) + Ly.mul(Ly)) / (k*k));
+            }
+
+            /* ************************************************************************* */
+            /**
+             * @brief This function computes Weickert conductivity coefficient gw
+             * @param Lx First order image derivative in X-direction (horizontal)
+             * @param Ly First order image derivative in Y-direction (vertical)
+             * @param dst Output image
+             * @param k Contrast factor parameter
+             * @note For more information check the following paper: J. Weickert
+             * Applications of nonlinear diffusion in image processing and computer vision,
+             * Proceedings of Algorithmy 2000
+             */
+            void weickert_diffusivity(const cv::Mat& Lx, const cv::Mat& Ly, cv::Mat& dst, float k) {
+                Mat modg;
+                cv::pow((Lx.mul(Lx) + Ly.mul(Ly)) / (k*k), 4, modg);
+                cv::exp(-3.315f / modg, dst);
+                dst = 1.0f - dst;
+            }
+
+            /* ************************************************************************* */
+            /**
+            * @brief This function computes Charbonnier conductivity coefficient gc
+            * gc = 1 / sqrt(1 + dL^2 / k^2)
+            * @param Lx First order image derivative in X-direction (horizontal)
+            * @param Ly First order image derivative in Y-direction (vertical)
+            * @param dst Output image
+            * @param k Contrast factor parameter
+            * @note For more information check the following paper: J. Weickert
+            * Applications of nonlinear diffusion in image processing and computer vision,
+            * Proceedings of Algorithmy 2000
+            */
+            void charbonnier_diffusivity(const cv::Mat& Lx, const cv::Mat& Ly, cv::Mat& dst, float k) {
+                Mat den;
+                cv::sqrt(1.0f + (Lx.mul(Lx) + Ly.mul(Ly)) / (k*k), den);
+                dst = 1.0f / den;
+            }
+
+            /* ************************************************************************* */
+            /**
+             * @brief This function computes a good empirical value for the k contrast factor
+             * given an input image, the percentile (0-1), the gradient scale and the number of
+             * bins in the histogram
+             * @param img Input image
+             * @param perc Percentile of the image gradient histogram (0-1)
+             * @param gscale Scale for computing the image gradient histogram
+             * @param nbins Number of histogram bins
+             * @param ksize_x Kernel size in X-direction (horizontal) for the Gaussian smoothing kernel
+             * @param ksize_y Kernel size in Y-direction (vertical) for the Gaussian smoothing kernel
+             * @return k contrast factor
+             */
+            float compute_k_percentile(const cv::Mat& img, float perc, float gscale, int nbins, int ksize_x, int ksize_y) {
+
+                int nbin = 0, nelements = 0, nthreshold = 0, k = 0;
+                float kperc = 0.0, modg = 0.0, lx = 0.0, ly = 0.0;
+                float npoints = 0.0;
+                float hmax = 0.0;
+
+                // Create the array for the histogram
+                std::vector<int> hist(nbins, 0);
+
+                // Create the matrices
+                Mat gaussian = Mat::zeros(img.rows, img.cols, CV_32F);
+                Mat Lx = Mat::zeros(img.rows, img.cols, CV_32F);
+                Mat Ly = Mat::zeros(img.rows, img.cols, CV_32F);
+
+                // Perform the Gaussian convolution
+                gaussian_2D_convolution(img, gaussian, ksize_x, ksize_y, gscale);
+
+                // Compute the Gaussian derivatives Lx and Ly
+                Scharr(gaussian, Lx, CV_32F, 1, 0, 1, 0, cv::BORDER_DEFAULT);
+                Scharr(gaussian, Ly, CV_32F, 0, 1, 1, 0, cv::BORDER_DEFAULT);
+
+                // Skip the borders for computing the histogram
+                for (int i = 1; i < gaussian.rows - 1; i++) {
+                    for (int j = 1; j < gaussian.cols - 1; j++) {
+                        lx = *(Lx.ptr<float>(i)+j);
+                        ly = *(Ly.ptr<float>(i)+j);
+                        modg = sqrt(lx*lx + ly*ly);
+
+                        // Get the maximum
+                        if (modg > hmax) {
+                            hmax = modg;
+                        }
+                    }
+                }
+
+                // Skip the borders for computing the histogram
+                for (int i = 1; i < gaussian.rows - 1; i++) {
+                    for (int j = 1; j < gaussian.cols - 1; j++) {
+                        lx = *(Lx.ptr<float>(i)+j);
+                        ly = *(Ly.ptr<float>(i)+j);
+                        modg = sqrt(lx*lx + ly*ly);
+
+                        // Find the correspondent bin
+                        if (modg != 0.0) {
+                            nbin = (int)floor(nbins*(modg / hmax));
+
+                            if (nbin == nbins) {
+                                nbin--;
+                            }
+
+                            hist[nbin]++;
+                            npoints++;
+                        }
+                    }
+                }
+
+                // Now find the perc of the histogram percentile
+                nthreshold = (int)(npoints*perc);
+
+                for (k = 0; nelements < nthreshold && k < nbins; k++) {
+                    nelements = nelements + hist[k];
+                }
+
+                if (nelements < nthreshold)  {
+                    kperc = 0.03f;
+                }
+                else {
+                    kperc = hmax*((float)(k) / (float)nbins);
+                }
+
+                return kperc;
+            }
+
+            /* ************************************************************************* */
+            /**
+             * @brief This function computes Scharr image derivatives
+             * @param src Input image
+             * @param dst Output image
+             * @param xorder Derivative order in X-direction (horizontal)
+             * @param yorder Derivative order in Y-direction (vertical)
+             * @param scale Scale factor for the derivative size
+             */
+            void compute_scharr_derivatives(const cv::Mat& src, cv::Mat& dst, int xorder, int yorder, int scale) {
+                Mat kx, ky;
+                compute_derivative_kernels(kx, ky, xorder, yorder, scale);
+                sepFilter2D(src, dst, CV_32F, kx, ky);
+            }
+
+            /* ************************************************************************* */
+            /**
+             * @brief Compute derivative kernels for sizes different than 3
+             * @param _kx Horizontal kernel values
+             * @param _ky Vertical kernel values
+             * @param dx Derivative order in X-direction (horizontal)
+             * @param dy Derivative order in Y-direction (vertical)
+             * @param scale_ Scale factor or derivative size
+             */
+            void compute_derivative_kernels(cv::OutputArray _kx, cv::OutputArray _ky, int dx, int dy, int scale) {
+
+                int ksize = 3 + 2 * (scale - 1);
+
+                // The standard Scharr kernel
+                if (scale == 1) {
+                    getDerivKernels(_kx, _ky, dx, dy, 0, true, CV_32F);
+                    return;
+                }
+
+                _kx.create(ksize, 1, CV_32F, -1, true);
+                _ky.create(ksize, 1, CV_32F, -1, true);
+                Mat kx = _kx.getMat();
+                Mat ky = _ky.getMat();
+
+                float w = 10.0f / 3.0f;
+                float norm = 1.0f / (2.0f*scale*(w + 2.0f));
+
+                for (int k = 0; k < 2; k++) {
+                    Mat* kernel = k == 0 ? &kx : &ky;
+                    int order = k == 0 ? dx : dy;
+                    std::vector<float> kerI(ksize, 0.0f);
+
+                    if (order == 0) {
+                        kerI[0] = norm, kerI[ksize / 2] = w*norm, kerI[ksize - 1] = norm;
+                    }
+                    else if (order == 1) {
+                        kerI[0] = -1, kerI[ksize / 2] = 0, kerI[ksize - 1] = 1;
+                    }
+
+                    Mat temp(kernel->rows, kernel->cols, CV_32F, &kerI[0]);
+                    temp.copyTo(*kernel);
+                }
+            }
+
+            class Nld_Step_Scalar_Invoker : public cv::ParallelLoopBody
+            {
+            public:
+                Nld_Step_Scalar_Invoker(cv::Mat& Ld, const cv::Mat& c, cv::Mat& Lstep, float _stepsize)
+                    : _Ld(&Ld)
+                    , _c(&c)
+                    , _Lstep(&Lstep)
+                    , stepsize(_stepsize)
+                {
+                }
+
+                virtual ~Nld_Step_Scalar_Invoker()
+                {
+
+                }
+
+                void operator()(const cv::Range& range) const
+                {
+                    cv::Mat& Ld = *_Ld;
+                    const cv::Mat& c = *_c;
+                    cv::Mat& Lstep = *_Lstep;
+
+                    for (int i = range.start; i < range.end; i++)
+                    {
+                        for (int j = 1; j < Lstep.cols - 1; j++)
+                        {
+                            float xpos = ((*(c.ptr<float>(i)+j)) + (*(c.ptr<float>(i)+j + 1)))*((*(Ld.ptr<float>(i)+j + 1)) - (*(Ld.ptr<float>(i)+j)));
+                            float xneg = ((*(c.ptr<float>(i)+j - 1)) + (*(c.ptr<float>(i)+j)))*((*(Ld.ptr<float>(i)+j)) - (*(Ld.ptr<float>(i)+j - 1)));
+                            float ypos = ((*(c.ptr<float>(i)+j)) + (*(c.ptr<float>(i + 1) + j)))*((*(Ld.ptr<float>(i + 1) + j)) - (*(Ld.ptr<float>(i)+j)));
+                            float yneg = ((*(c.ptr<float>(i - 1) + j)) + (*(c.ptr<float>(i)+j)))*((*(Ld.ptr<float>(i)+j)) - (*(Ld.ptr<float>(i - 1) + j)));
+                            *(Lstep.ptr<float>(i)+j) = 0.5f*stepsize*(xpos - xneg + ypos - yneg);
+                        }
+                    }
+                }
+
+            private:
+                cv::Mat * _Ld;
+                const cv::Mat * _c;
+                cv::Mat * _Lstep;
+                float stepsize;
+            };
+
+            /* ************************************************************************* */
+            /**
+            * @brief This function performs a scalar non-linear diffusion step
+            * @param Ld2 Output image in the evolution
+            * @param c Conductivity image
+            * @param Lstep Previous image in the evolution
+            * @param stepsize The step size in time units
+            * @note Forward Euler Scheme 3x3 stencil
+            * The function c is a scalar value that depends on the gradient norm
+            * dL_by_ds = d(c dL_by_dx)_by_dx + d(c dL_by_dy)_by_dy
+            */
+            void nld_step_scalar(cv::Mat& Ld, const cv::Mat& c, cv::Mat& Lstep, float stepsize) {
+
+                cv::parallel_for_(cv::Range(1, Lstep.rows - 1), Nld_Step_Scalar_Invoker(Ld, c, Lstep, stepsize));
+
+                for (int j = 1; j < Lstep.cols - 1; j++) {
+                    float xpos = ((*(c.ptr<float>(0) + j)) + (*(c.ptr<float>(0) + j + 1)))*((*(Ld.ptr<float>(0) + j + 1)) - (*(Ld.ptr<float>(0) + j)));
+                    float xneg = ((*(c.ptr<float>(0) + j - 1)) + (*(c.ptr<float>(0) + j)))*((*(Ld.ptr<float>(0) + j)) - (*(Ld.ptr<float>(0) + j - 1)));
+                    float ypos = ((*(c.ptr<float>(0) + j)) + (*(c.ptr<float>(1) + j)))*((*(Ld.ptr<float>(1) + j)) - (*(Ld.ptr<float>(0) + j)));
+                    *(Lstep.ptr<float>(0) + j) = 0.5f*stepsize*(xpos - xneg + ypos);
+                }
+
+                for (int j = 1; j < Lstep.cols - 1; j++) {
+                    float xpos = ((*(c.ptr<float>(Lstep.rows - 1) + j)) + (*(c.ptr<float>(Lstep.rows - 1) + j + 1)))*((*(Ld.ptr<float>(Lstep.rows - 1) + j + 1)) - (*(Ld.ptr<float>(Lstep.rows - 1) + j)));
+                    float xneg = ((*(c.ptr<float>(Lstep.rows - 1) + j - 1)) + (*(c.ptr<float>(Lstep.rows - 1) + j)))*((*(Ld.ptr<float>(Lstep.rows - 1) + j)) - (*(Ld.ptr<float>(Lstep.rows - 1) + j - 1)));
+                    float ypos = ((*(c.ptr<float>(Lstep.rows - 1) + j)) + (*(c.ptr<float>(Lstep.rows - 1) + j)))*((*(Ld.ptr<float>(Lstep.rows - 1) + j)) - (*(Ld.ptr<float>(Lstep.rows - 1) + j)));
+                    float yneg = ((*(c.ptr<float>(Lstep.rows - 2) + j)) + (*(c.ptr<float>(Lstep.rows - 1) + j)))*((*(Ld.ptr<float>(Lstep.rows - 1) + j)) - (*(Ld.ptr<float>(Lstep.rows - 2) + j)));
+                    *(Lstep.ptr<float>(Lstep.rows - 1) + j) = 0.5f*stepsize*(xpos - xneg + ypos - yneg);
+                }
+
+                for (int i = 1; i < Lstep.rows - 1; i++) {
+                    float xpos = ((*(c.ptr<float>(i))) + (*(c.ptr<float>(i)+1)))*((*(Ld.ptr<float>(i)+1)) - (*(Ld.ptr<float>(i))));
+                    float xneg = ((*(c.ptr<float>(i))) + (*(c.ptr<float>(i))))*((*(Ld.ptr<float>(i))) - (*(Ld.ptr<float>(i))));
+                    float ypos = ((*(c.ptr<float>(i))) + (*(c.ptr<float>(i + 1))))*((*(Ld.ptr<float>(i + 1))) - (*(Ld.ptr<float>(i))));
+                    float yneg = ((*(c.ptr<float>(i - 1))) + (*(c.ptr<float>(i))))*((*(Ld.ptr<float>(i))) - (*(Ld.ptr<float>(i - 1))));
+                    *(Lstep.ptr<float>(i)) = 0.5f*stepsize*(xpos - xneg + ypos - yneg);
+                }
+
+                for (int i = 1; i < Lstep.rows - 1; i++) {
+                    float xneg = ((*(c.ptr<float>(i)+Lstep.cols - 2)) + (*(c.ptr<float>(i)+Lstep.cols - 1)))*((*(Ld.ptr<float>(i)+Lstep.cols - 1)) - (*(Ld.ptr<float>(i)+Lstep.cols - 2)));
+                    float ypos = ((*(c.ptr<float>(i)+Lstep.cols - 1)) + (*(c.ptr<float>(i + 1) + Lstep.cols - 1)))*((*(Ld.ptr<float>(i + 1) + Lstep.cols - 1)) - (*(Ld.ptr<float>(i)+Lstep.cols - 1)));
+                    float yneg = ((*(c.ptr<float>(i - 1) + Lstep.cols - 1)) + (*(c.ptr<float>(i)+Lstep.cols - 1)))*((*(Ld.ptr<float>(i)+Lstep.cols - 1)) - (*(Ld.ptr<float>(i - 1) + Lstep.cols - 1)));
+                    *(Lstep.ptr<float>(i)+Lstep.cols - 1) = 0.5f*stepsize*(-xneg + ypos - yneg);
+                }
+
+                Ld = Ld + Lstep;
+            }
+
+            /* ************************************************************************* */
+            /**
+            * @brief This function downsamples the input image using OpenCV resize
+            * @param img Input image to be downsampled
+            * @param dst Output image with half of the resolution of the input image
+            */
+            void halfsample_image(const cv::Mat& src, cv::Mat& dst) {
+
+                // Make sure the destination image is of the right size
+                CV_Assert(src.cols / 2 == dst.cols);
+                CV_Assert(src.rows / 2 == dst.rows);
+                resize(src, dst, dst.size(), 0, 0, cv::INTER_AREA);
+            }
+
+            /* ************************************************************************* */
+            /**
+             * @brief This function checks if a given pixel is a maximum in a local neighbourhood
+             * @param img Input image where we will perform the maximum search
+             * @param dsize Half size of the neighbourhood
+             * @param value Response value at (x,y) position
+             * @param row Image row coordinate
+             * @param col Image column coordinate
+             * @param same_img Flag to indicate if the image value at (x,y) is in the input image
+             * @return 1->is maximum, 0->otherwise
+             */
+            bool check_maximum_neighbourhood(const cv::Mat& img, int dsize, float value, int row, int col, bool same_img) {
+
+                bool response = true;
+
+                for (int i = row - dsize; i <= row + dsize; i++) {
+                    for (int j = col - dsize; j <= col + dsize; j++) {
+                        if (i >= 0 && i < img.rows && j >= 0 && j < img.cols) {
+                            if (same_img == true) {
+                                if (i != row || j != col) {
+                                    if ((*(img.ptr<float>(i)+j)) > value) {
+                                        response = false;
+                                        return response;
+                                    }
+                                }
+                            }
+                            else {
+                                if ((*(img.ptr<float>(i)+j)) > value) {
+                                    response = false;
+                                    return response;
+                                }
+                            }
+                        }
+                    }
+                }
+
+                return response;
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/features2d/src/kaze/nldiffusion_functions.h b/modules/features2d/src/kaze/nldiffusion_functions.h
new file mode 100644
index 0000000000..773f7e4619
--- /dev/null
+++ b/modules/features2d/src/kaze/nldiffusion_functions.h
@@ -0,0 +1,53 @@
+/**
+ * @file nldiffusion_functions.h
+ * @brief Functions for non-linear diffusion applications:
+ * 2D Gaussian Derivatives
+ * Perona and Malik conductivity equations
+ * Perona and Malik evolution
+ * @date Dec 27, 2011
+ * @author Pablo F. Alcantarilla
+ */
+
+#ifndef KAZE_NLDIFFUSION_FUNCTIONS_H
+#define KAZE_NLDIFFUSION_FUNCTIONS_H
+
+/* ************************************************************************* */
+// Includes
+#include "precomp.hpp"
+
+/* ************************************************************************* */
+// Declaration of functions
+
+namespace cv {
+    namespace details {
+        namespace kaze {
+
+            // Gaussian 2D convolution
+            void gaussian_2D_convolution(const cv::Mat& src, cv::Mat& dst, int ksize_x, int ksize_y, float sigma);
+
+            // Diffusivity functions
+            void pm_g1(const cv::Mat& Lx, const cv::Mat& Ly, cv::Mat& dst, float k);
+            void pm_g2(const cv::Mat& Lx, const cv::Mat& Ly, cv::Mat& dst, float k);
+            void weickert_diffusivity(const cv::Mat& Lx, const cv::Mat& Ly, cv::Mat& dst, float k);
+            void charbonnier_diffusivity(const cv::Mat& Lx, const cv::Mat& Ly, cv::Mat& dst, float k);
+
+            float compute_k_percentile(const cv::Mat& img, float perc, float gscale, int nbins, int ksize_x, int ksize_y);
+
+            // Image derivatives
+            void compute_scharr_derivatives(const cv::Mat& src, cv::Mat& dst, int xorder, int yorder, int scale);
+            void compute_derivative_kernels(cv::OutputArray _kx, cv::OutputArray _ky, int dx, int dy, int scale);
+            void image_derivatives_scharr(const cv::Mat& src, cv::Mat& dst, int xorder, int yorder);
+
+            // Nonlinear diffusion filtering scalar step
+            void nld_step_scalar(cv::Mat& Ld, const cv::Mat& c, cv::Mat& Lstep, float stepsize);
+
+            // For non-maxima suppresion
+            bool check_maximum_neighbourhood(const cv::Mat& img, int dsize, float value, int row, int col, bool same_img);
+
+            // Image downsampling
+            void halfsample_image(const cv::Mat& src, cv::Mat& dst);
+        }
+    }
+}
+
+#endif
diff --git a/modules/features2d/test/test_keypoints.cpp b/modules/features2d/test/test_keypoints.cpp
index e15d4fa17f..2a7f24ed49 100644
--- a/modules/features2d/test/test_keypoints.cpp
+++ b/modules/features2d/test/test_keypoints.cpp
@@ -166,3 +166,18 @@ TEST(Features2d_Detector_Keypoints_Dense, validation)
     CV_FeatureDetectorKeypointsTest test(Algorithm::create<FeatureDetector>("Feature2D.Dense"));
     test.safe_run();
 }
+
+TEST(Features2d_Detector_Keypoints_KAZE, validation)
+{
+    CV_FeatureDetectorKeypointsTest test(Algorithm::create<FeatureDetector>("Feature2D.KAZE"));
+    test.safe_run();
+}
+
+TEST(Features2d_Detector_Keypoints_AKAZE, validation)
+{
+    CV_FeatureDetectorKeypointsTest test_kaze(cv::Ptr<FeatureDetector>(new cv::AKAZE(cv::AKAZE::DESCRIPTOR_KAZE)));
+    test_kaze.safe_run();
+
+    CV_FeatureDetectorKeypointsTest test_mldb(cv::Ptr<FeatureDetector>(new cv::AKAZE(cv::AKAZE::DESCRIPTOR_MLDB)));
+    test_mldb.safe_run();
+}
diff --git a/modules/features2d/test/test_rotation_and_scale_invariance.cpp b/modules/features2d/test/test_rotation_and_scale_invariance.cpp
index 2fe59ca7fc..07123bed13 100644
--- a/modules/features2d/test/test_rotation_and_scale_invariance.cpp
+++ b/modules/features2d/test/test_rotation_and_scale_invariance.cpp
@@ -652,6 +652,21 @@ TEST(Features2d_ScaleInvariance_Detector_BRISK, regression)
     test.safe_run();
 }
 
+TEST(Features2d_ScaleInvariance_Detector_KAZE, regression)
+{
+    DetectorScaleInvarianceTest test(Algorithm::create<FeatureDetector>("Feature2D.KAZE"),
+        0.08f,
+        0.49f);
+    test.safe_run();
+}
+
+TEST(Features2d_ScaleInvariance_Detector_AKAZE, regression)
+{
+    DetectorScaleInvarianceTest test(Algorithm::create<FeatureDetector>("Feature2D.AKAZE"),
+        0.08f,
+        0.49f);
+    test.safe_run();
+}
 //TEST(Features2d_ScaleInvariance_Detector_ORB, regression)
 //{
 //    DetectorScaleInvarianceTest test(Algorithm::create<FeatureDetector>("Feature2D.ORB"),