Merge pull request #3549 from mshabunin:sphinx-doc-remove

This commit is contained in:
Vadim Pisarevsky 2014-12-31 09:14:22 +00:00
commit 1c8493fb0d
551 changed files with 341 additions and 74482 deletions

View File

@ -473,14 +473,22 @@ include(cmake/OpenCVFindLibsPerf.cmake)
# Detect other 3rd-party libraries/tools
# ----------------------------------------------------------------------------
# --- LATEX for pdf documentation ---
unset(HAVE_DOXYGEN CACHE)
# --- Doxygen and PlantUML for documentation ---
unset(DOXYGEN_FOUND CACHE)
if(BUILD_DOCS)
include(cmake/OpenCVFindLATEX.cmake)
find_host_program(DOXYGEN_BUILD doxygen)
if (DOXYGEN_BUILD)
set(HAVE_DOXYGEN 1)
endif (DOXYGEN_BUILD)
find_package(Doxygen)
if (PLANTUML_JAR)
message(STATUS "Using PlantUML path from command line: ${PLANTUML_JAR}")
elseif(DEFINED ENV{PLANTUML_JAR})
set(PLANTUML_JAR $ENV{PLANTUML_JAR})
message(STATUS "Using PLantUML path from environment: ${PLANTUML_JAR}")
else()
message(STATUS "To enable PlantUML support, set PLANTUML_JAR environment variable or pass -DPLANTUML_JAR=<filepath> option to cmake")
endif()
if (PLANTUML_JAR AND DOXYGEN_VERSION VERSION_LESS 1.8.8)
message(STATUS "You need Doxygen version 1.8.8 or later to use PlantUML")
unset(PLANTUML_JAR)
endif()
endif(BUILD_DOCS)
# --- Python Support ---
@ -1083,15 +1091,8 @@ endif()
if(BUILD_DOCS)
status("")
status(" Documentation:")
if(HAVE_SPHINX)
status(" Build Documentation:" PDFLATEX_COMPILER THEN YES ELSE "YES (only HTML and without math expressions)")
else()
status(" Build Documentation:" NO)
endif()
status(" Sphinx:" HAVE_SPHINX THEN "${SPHINX_BUILD} (ver ${SPHINX_VERSION})" ELSE NO)
status(" PdfLaTeX compiler:" PDFLATEX_COMPILER THEN "${PDFLATEX_COMPILER}" ELSE NO)
status(" PlantUML:" PLANTUML THEN "${PLANTUML}" ELSE NO)
status(" Doxygen:" HAVE_DOXYGEN THEN "YES (${DOXYGEN_BUILD})" ELSE NO)
status(" Doxygen:" DOXYGEN_FOUND THEN "${DOXYGEN_EXECUTABLE} (ver ${DOXYGEN_VERSION})" ELSE NO)
status(" PlantUML:" PLANTUML_JAR THEN "${PLANTUML_JAR}" ELSE NO)
endif()
# ========================== samples and tests ==========================

View File

@ -237,21 +237,3 @@ if(PYTHON2INTERP_FOUND)
set(PYTHON_DEFAULT_AVAILABLE "TRUE")
set(PYTHON_DEFAULT_EXECUTABLE "${PYTHON2_EXECUTABLE}")
endif()
unset(HAVE_SPHINX CACHE)
if(BUILD_DOCS)
find_host_program(SPHINX_BUILD sphinx-build)
find_host_program(PLANTUML plantuml)
if(SPHINX_BUILD)
execute_process(COMMAND "${SPHINX_BUILD}"
OUTPUT_QUIET
ERROR_VARIABLE SPHINX_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(SPHINX_OUTPUT MATCHES "Sphinx v([0-9][^ \n]*)")
set(SPHINX_VERSION "${CMAKE_MATCH_1}")
set(HAVE_SPHINX 1)
message(STATUS "Found Sphinx ${SPHINX_VERSION}: ${SPHINX_BUILD}")
endif()
endif()
endif(BUILD_DOCS)

View File

@ -2,7 +2,7 @@
# CMake file for OpenCV docs
#-----------------------
set(HAVE_DOC_GENERATOR BUILD_DOCS AND (HAVE_SPHINX OR HAVE_DOXYGEN))
set(HAVE_DOC_GENERATOR BUILD_DOCS AND DOXYGEN_FOUND)
if(HAVE_DOC_GENERATOR)
project(opencv_docs)
@ -34,120 +34,9 @@ if(HAVE_DOC_GENERATOR)
set(OPTIONAL_DOC_LIST "")
endif(HAVE_DOC_GENERATOR)
# ========= Sphinx docs =========
if(BUILD_DOCS AND HAVE_SPHINX)
if(NOT INSTALL_CREATE_DISTRIB)
list(APPEND DOC_LIST "${OpenCV_SOURCE_DIR}/doc/haartraining.htm")
endif()
# build lists of documentation files and generate table of contents for reference manual
set(DOC_FAKE_ROOT "${CMAKE_CURRENT_BINARY_DIR}/fake-root")
set(DOC_FAKE_ROOT_FILES "")
function(ocv_doc_add_file_to_fake_root source destination)
add_custom_command(
OUTPUT "${DOC_FAKE_ROOT}/${destination}"
COMMAND "${CMAKE_COMMAND}" -E copy "${source}" "${DOC_FAKE_ROOT}/${destination}"
DEPENDS "${source}"
COMMENT "Copying ${destination} to fake root..."
VERBATIM
)
list(APPEND DOC_FAKE_ROOT_FILES "${DOC_FAKE_ROOT}/${destination}")
set(DOC_FAKE_ROOT_FILES "${DOC_FAKE_ROOT_FILES}" PARENT_SCOPE)
endfunction()
function(ocv_doc_add_to_fake_root source)
if(ARGC GREATER 1)
set(destination "${ARGV1}")
else()
file(RELATIVE_PATH destination "${OpenCV_SOURCE_DIR}" "${source}")
endif()
if(IS_DIRECTORY "${source}")
file(GLOB_RECURSE files RELATIVE "${source}" "${source}/*")
foreach(file ${files})
ocv_doc_add_file_to_fake_root("${source}/${file}" "${destination}/${file}")
endforeach()
else()
ocv_doc_add_file_to_fake_root("${source}" "${destination}")
endif()
set(DOC_FAKE_ROOT_FILES "${DOC_FAKE_ROOT_FILES}" PARENT_SCOPE)
endfunction()
set(OPENCV_REFMAN_TOC "")
foreach(mod ${BASE_MODULES} ${EXTRA_MODULES})
if(EXISTS "${OPENCV_MODULE_opencv_${mod}_LOCATION}/doc/${mod}.rst")
ocv_doc_add_to_fake_root("${OPENCV_MODULE_opencv_${mod}_LOCATION}/doc" modules/${mod}/doc)
set(OPENCV_REFMAN_TOC "${OPENCV_REFMAN_TOC} ${mod}/doc/${mod}.rst\n")
endif()
endforeach()
configure_file("${OpenCV_SOURCE_DIR}/modules/refman.rst.in" "${DOC_FAKE_ROOT}/modules/refman.rst" @ONLY)
ocv_doc_add_to_fake_root("${OpenCV_SOURCE_DIR}/index.rst")
ocv_doc_add_to_fake_root("${OpenCV_SOURCE_DIR}/doc")
ocv_doc_add_to_fake_root("${OpenCV_SOURCE_DIR}/platforms/android")
ocv_doc_add_to_fake_root("${OpenCV_SOURCE_DIR}/samples")
set(BUILD_PLANTUML "")
if(PLANTUML)
set(BUILD_PLANTUML "-tplantuml")
endif()
if(PDFLATEX_COMPILER)
add_custom_target(docs
COMMAND ${SPHINX_BUILD} ${BUILD_PLANTUML} -b latex -c "${CMAKE_CURRENT_SOURCE_DIR}" "${DOC_FAKE_ROOT}" .
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/mymath.sty ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${PYTHON_DEFAULT_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/patch_refman_latex.py" opencv2refman.tex
COMMAND ${PYTHON_DEFAULT_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/patch_refman_latex.py" opencv2manager.tex
COMMAND ${CMAKE_COMMAND} -E echo "Generating opencv2refman.pdf"
COMMAND ${PDFLATEX_COMPILER} -interaction=batchmode opencv2refman.tex
COMMAND ${PDFLATEX_COMPILER} -interaction=batchmode opencv2refman.tex
COMMAND ${CMAKE_COMMAND} -E echo "Generating opencv2manager.pdf"
COMMAND ${PDFLATEX_COMPILER} -interaction=batchmode opencv2manager.tex
COMMAND ${PDFLATEX_COMPILER} -interaction=batchmode opencv2manager.tex
COMMAND ${CMAKE_COMMAND} -E echo "Generating opencv_user.pdf"
COMMAND ${PDFLATEX_COMPILER} -interaction=batchmode opencv_user.tex
COMMAND ${PDFLATEX_COMPILER} -interaction=batchmode opencv_user.tex
COMMAND ${CMAKE_COMMAND} -E echo "Generating opencv_tutorials.pdf"
COMMAND ${PDFLATEX_COMPILER} -interaction=batchmode opencv_tutorials.tex
COMMAND ${PDFLATEX_COMPILER} -interaction=batchmode opencv_tutorials.tex
COMMAND ${CMAKE_COMMAND} -E echo "Generating opencv_cheatsheet.pdf"
COMMAND ${PDFLATEX_COMPILER} -interaction=batchmode "${CMAKE_CURRENT_SOURCE_DIR}/opencv_cheatsheet.tex"
COMMAND ${PDFLATEX_COMPILER} -interaction=batchmode "${CMAKE_CURRENT_SOURCE_DIR}/opencv_cheatsheet.tex"
DEPENDS ${DOC_FAKE_ROOT_FILES}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating the PDF Manuals"
)
LIST(APPEND OPTIONAL_DOC_LIST "${CMAKE_BINARY_DIR}/doc/opencv2refman.pdf" "${CMAKE_BINARY_DIR}/doc/opencv2manager.pdf"
"${CMAKE_BINARY_DIR}/doc/opencv_user.pdf" "${CMAKE_BINARY_DIR}/doc/opencv_tutorials.pdf" "${CMAKE_BINARY_DIR}/doc/opencv_cheatsheet.pdf")
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(docs PROPERTIES FOLDER "documentation")
endif()
endif()
add_custom_target(html_docs
COMMAND "${SPHINX_BUILD}" ${BUILD_PLANTUML} -b html -c "${CMAKE_CURRENT_SOURCE_DIR}" "${DOC_FAKE_ROOT}" ./_html
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/mymath.sty ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${DOC_FAKE_ROOT_FILES}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating Online Documentation"
)
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(html_docs PROPERTIES FOLDER "documentation")
endif()
endif()
# ========= Doxygen docs =========
if(BUILD_DOCS AND HAVE_DOXYGEN)
if(BUILD_DOCS AND DOXYGEN_FOUND)
# not documented modules list
list(APPEND blacklist "ts" "java" "python2" "python3" "world")
@ -240,19 +129,31 @@ if(BUILD_DOCS AND HAVE_DOXYGEN)
set(CMAKE_DOXYGEN_MAIN_REFERENCE "${refs_main}")
set(CMAKE_DOXYGEN_EXTRA_REFERENCE "${refs_extra}")
set(CMAKE_EXTRA_BIB_FILES "${bibfile} ${paths_bib}")
if (CMAKE_DOXYGEN_GENERATE_QHP)
set(CMAKE_DOXYGEN_GENERATE_QHP "YES")
else()
set(CMAKE_DOXYGEN_GENERATE_QHP "NO")
endif()
list(APPEND CMAKE_DOXYGEN_HTML_FILES "${CMAKE_CURRENT_SOURCE_DIR}/opencv.ico")
list(APPEND CMAKE_DOXYGEN_HTML_FILES "${CMAKE_CURRENT_SOURCE_DIR}/pattern.png")
list(APPEND CMAKE_DOXYGEN_HTML_FILES "${CMAKE_CURRENT_SOURCE_DIR}/acircles_pattern.png")
list(APPEND CMAKE_DOXYGEN_HTML_FILES "${CMAKE_CURRENT_SOURCE_DIR}/bodybg.png")
list(APPEND CMAKE_DOXYGEN_HTML_FILES "${CMAKE_CURRENT_SOURCE_DIR}/mymath.sty")
string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_HTML_FILES "${CMAKE_DOXYGEN_HTML_FILES}")
if(PLANTUML_JAR)
set(CMAKE_DOXYGEN_PLANTUML_SUPPORT "PLANTUML_JAR_PATH = ${PLANTUML_JAR}\n")
else()
set(CMAKE_DOXYGEN_PLANTUML_SUPPORT "ALIASES += startuml{1}=\"@warning __No plantuml!__ \\n \\n @if DUMMY_PLANTUML_CODE\" enduml=\"@endif\"\n")
endif()
# writing file
configure_file(Doxyfile.in ${doxyfile} @ONLY)
configure_file(root.markdown.in ${rootfile} @ONLY)
configure_file(mymath.sty "${CMAKE_DOXYGEN_OUTPUT_PATH}/html/mymath.sty" @ONLY)
configure_file(mymath.sty "${CMAKE_DOXYGEN_OUTPUT_PATH}/latex/mymath.sty" @ONLY)
# TODO: do not store downloadable samples, but give github link instead
add_custom_target(doxygen
COMMAND "${CMAKE_COMMAND}" -E copy_directory "${CMAKE_SOURCE_DIR}/samples" "${CMAKE_DOXYGEN_OUTPUT_PATH}/html/samples"
COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/pattern.png" "${CMAKE_DOXYGEN_OUTPUT_PATH}/html"
COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/acircles_pattern.png" "${CMAKE_DOXYGEN_OUTPUT_PATH}/html"
COMMAND ${DOXYGEN_BUILD} ${doxyfile}
COMMAND ${DOXYGEN_EXECUTABLE} ${doxyfile}
DEPENDS ${doxyfile} ${rootfile} ${bibfile} ${deps}
)
endif()
@ -265,4 +166,8 @@ if(HAVE_DOC_GENERATOR)
foreach(f ${OPTIONAL_DOC_LIST})
install(FILES "${f}" DESTINATION "${OPENCV_DOC_INSTALL_PATH}" OPTIONAL COMPONENT docs)
endforeach()
# dummy targets
add_custom_target(docs)
add_custom_target(html_docs)
endif(HAVE_DOC_GENERATOR)

View File

@ -125,11 +125,11 @@ IGNORE_PREFIX =
GENERATE_HTML = YES
HTML_OUTPUT = html
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_HEADER = @CMAKE_CURRENT_SOURCE_DIR@/header.html
HTML_FOOTER = @CMAKE_CURRENT_SOURCE_DIR@/footer.html
HTML_STYLESHEET =
HTML_EXTRA_STYLESHEET =
HTML_EXTRA_FILES =
HTML_EXTRA_STYLESHEET = @CMAKE_CURRENT_SOURCE_DIR@/stylesheet.css
HTML_EXTRA_FILES = @CMAKE_DOXYGEN_HTML_FILES@
HTML_COLORSTYLE_HUE = 220
HTML_COLORSTYLE_SAT = 100
HTML_COLORSTYLE_GAMMA = 80
@ -148,10 +148,10 @@ GENERATE_CHI = NO
CHM_INDEX_ENCODING =
BINARY_TOC = NO
TOC_EXPAND = NO
GENERATE_QHP = NO
QCH_FILE =
QHP_NAMESPACE = org.doxygen.Project
QHP_VIRTUAL_FOLDER = doc
GENERATE_QHP = @CMAKE_DOXYGEN_GENERATE_QHP@
QCH_FILE = ../opencv-@OPENCV_VERSION@.qch
QHP_NAMESPACE = org.itseez.opencv.@OPENCV_VERSION@
QHP_VIRTUAL_FOLDER = opencv
QHP_CUST_FILTER_NAME =
QHP_CUST_FILTER_ATTRS =
QHP_SECT_FILTER_ATTRS =
@ -285,3 +285,4 @@ DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
DOT_CLEANUP = YES
@CMAKE_DOXYGEN_PLANTUML_SUPPORT@

View File

@ -1,12 +0,0 @@
function insertIframe (elementId, iframeSrc)
{
var iframe;
if (document.createElement && (iframe = document.createElement('iframe')))
{
iframe.src = unescape(iframeSrc);
iframe.width = "100%";
iframe.height = "511px";
var element = document.getElementById(elementId);
element.parentNode.replaceChild(iframe, element);
}
}

View File

@ -1,227 +0,0 @@
{#
basic/layout.html
~~~~~~~~~~~~~~~~~
Master layout template for Sphinx themes.
:copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
#}
{%- block doctype -%}
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
{%- endblock %}
{% set script_files = script_files + [pathto("_static/insertIframe.js", 1)] %}
{%- set reldelim1 = reldelim1 is not defined and ' &raquo;' or reldelim1 %}
{%- set reldelim2 = reldelim2 is not defined and ' |' or reldelim2 %}
{%- set render_sidebar = (not embedded) and (not theme_nosidebar|tobool) and
(sidebars != []) %}
{%- set url_root = pathto('', 1) %}
{%- if url_root == '#' %}{% set url_root = '' %}{% endif %}
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-33108845-1']);
_gaq.push(['_setDomainName', 'opencv.org']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
{%- macro relbar() %}
<div class="related">
<h3>{{ _('Navigation') }}</h3>
<ul>
{%- for rellink in rellinks %}
<li class="right" {% if loop.first %}style="margin-right: 10px"{% endif %}>
<a href="{{ pathto(rellink[0]) }}" title="{{ rellink[1]|striptags|e }}"
{{ accesskey(rellink[2]) }}>{{ rellink[3] }}</a>
{%- if not loop.first %}{{ reldelim2 }}{% endif %}</li>
{%- endfor %}
{%- block rootrellink %}
<li><a href="{{ pathto(master_doc) }}">{{ shorttitle|e }}</a>{{ reldelim1 }}</li>
{%- endblock %}
{%- for parent in parents %}
<li><a href="{{ parent.link|e }}" {% if loop.last %}{{ accesskey("U") }}{% endif %}>{{ parent.title }}</a>{{ reldelim1 }}</li>
{%- endfor %}
{%- block relbaritems %} {% endblock %}
</ul>
</div>
{%- endmacro %}
{%- macro sidebar() %}
{%- if render_sidebar %}
<div class="sphinxsidebar">
<div class="sphinxsidebarwrapper">
{%- block sidebarlogo %}
{%- if logo %}
<p class="logo"><a href="{{ pathto(master_doc) }}">
<img class="logo" src="{{ pathto('_static/' + logo, 1) }}" alt="Logo"/>
</a></p>
{%- endif %}
{%- endblock %}
{%- if sidebars == None %}
{%- block sidebarsearch %}
{%- include "searchbox.html" %}
{%- endblock %}
{%- endif %}
{%- if sidebars != None %}
{#- new style sidebar: explicitly include/exclude templates #}
{%- for sidebartemplate in sidebars %}
{%- include sidebartemplate %}
{%- endfor %}
{%- else %}
{#- old style sidebars: using blocks -- should be deprecated #}
{%- block sidebartoc %}
{%- include "localtoc.html" %}
{%- endblock %}
{%- block sidebarrel %}
{%- include "relations.html" %}
{%- endblock %}
{%- block sidebarsourcelink %}
{%- include "sourcelink.html" %}
{%- endblock %}
{%- if customsidebar %}
{%- include customsidebar %}
{%- endif %}
{%- endif %}
</div>
</div>
{%- endif %}
{%- endmacro %}
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset={{ encoding }}" />
{{ metatags }}
{%- if not embedded and docstitle %}
{%- set titlesuffix = " &mdash; "|safe + docstitle|e %}
{%- else %}
{%- set titlesuffix = "" %}
{%- endif %}
{%- block htmltitle %}
<title>{{ title|striptags|e }}{{ titlesuffix }}</title>
{%- endblock %}
<link rel="stylesheet" href="{{ pathto('_static/' + style, 1) }}" type="text/css" />
<link rel="stylesheet" href="{{ pathto('_static/pygments.css', 1) }}" type="text/css" />
{%- for cssfile in css_files %}
<link rel="stylesheet" href="{{ pathto(cssfile, 1) }}" type="text/css" />
{%- endfor %}
{%- if not embedded %}
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '{{ url_root }}',
VERSION: '{{ release|e }}',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '{{ '' if no_search_suffix else file_suffix }}',
HAS_SOURCE: {{ has_source|lower }}
};
</script>
{%- for scriptfile in script_files %}
<script type="text/javascript" src="{{ pathto(scriptfile, 1) }}"></script>
{%- endfor %}
{%- if use_opensearch %}
<link rel="search" type="application/opensearchdescription+xml"
title="{% trans docstitle=docstitle|e %}Search within {{ docstitle }}{% endtrans %}"
href="{{ pathto('_static/opensearch.xml', 1) }}"/>
{%- endif %}
{%- if favicon %}
<link rel="shortcut icon" href="{{ pathto('_static/' + favicon, 1) }}"/>
{%- endif %}
{%- endif %}
{%- block linktags %}
{%- if hasdoc('about') %}
<link rel="author" title="{{ _('About these documents') }}" href="{{ pathto('about') }}" />
{%- endif %}
{%- if hasdoc('genindex') %}
<link rel="index" title="{{ _('Index') }}" href="{{ pathto('genindex') }}" />
{%- endif %}
{%- if hasdoc('search') %}
<link rel="search" title="{{ _('Search') }}" href="{{ pathto('search') }}" />
{%- endif %}
{%- if hasdoc('copyright') %}
<link rel="copyright" title="{{ _('Copyright') }}" href="{{ pathto('copyright') }}" />
{%- endif %}
<link rel="top" title="{{ docstitle|e }}" href="{{ pathto('index') }}" />
{%- if parents %}
<link rel="up" title="{{ parents[-1].title|striptags|e }}" href="{{ parents[-1].link|e }}" />
{%- endif %}
{%- if next %}
<link rel="next" title="{{ next.title|striptags|e }}" href="{{ next.link|e }}" />
{%- endif %}
{%- if prev %}
<link rel="prev" title="{{ prev.title|striptags|e }}" href="{{ prev.link|e }}" />
{%- endif %}
{%- endblock %}
{%- block extrahead %} {% endblock %}
</head>
<body>
{%- block header %}{% endblock %}
{%- block relbar1 %}{{ relbar() }}{% endblock %}
{%- block content %}
{%- block sidebar1 %} {# possible location for sidebar #} {% endblock %}
<div class="document">
{% block document %}
<div class="documentwrapper">
{%- if not embedded %}{% if not theme_nosidebar|tobool %}
<div class="bodywrapper">
{%- endif %}{% endif %}
<div class="body">
{% block body %} {% endblock %}
</div>
<div class="feedback">
<h2>Help and Feedback</h2>
You did not find what you were looking for?
<ul>
{% if theme_lang == 'c' %}
{% endif %}
{% if theme_lang == 'cpp' %}
<li>Try the <a href="http://docs.opencv.org/opencv_cheatsheet.pdf">Cheatsheet</a>.</li>
{% endif %}
{% if theme_lang == 'py' %}
<li>Try the <a href="cookbook.html">Cookbook</a>.</li>
{% endif %}
<li>Ask a question on the <a href="http://answers.opencv.org">Q&A forum</a>.</li>
<li>If you think something is missing or wrong in the documentation,
please file a <a href="http://code.opencv.org">bug report</a>.</li>
</ul>
</div>
{%- if not embedded %}{% if not theme_nosidebar|tobool %}
</div>
{%- endif %}{% endif %}
</div>
{% endblock %}
{%- block sidebar2 %}{{ sidebar() }}{% endblock %}
<div class="clearer"></div>
</div>
{%- endblock %}
{%- block relbar2 %}{{ relbar() }}{% endblock %}
{%- block footer %}
<div class="footer">
{%- if show_copyright %}
{%- if hasdoc('copyright') %}
{% trans path=pathto('copyright'), copyright=copyright|e %}&copy; <a href="{{ path }}">Copyright</a> {{ copyright }}.{% endtrans %}
{%- else %}
{% trans copyright=copyright|e %}&copy; Copyright {{ copyright }}.{% endtrans %}
{%- endif %}
{%- endif %}
{%- if last_updated %}
{% trans last_updated=last_updated|e %}Last updated on {{ last_updated }}.{% endtrans %}
{%- endif %}
{%- if show_sphinx %}
{% trans sphinx_version=sphinx_version|e %}Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> {{ sphinx_version }}.{% endtrans %}
{%- endif %}
</div>
{%- endblock %}
</body>
</html>

View File

@ -1,21 +0,0 @@
{#
basic/searchbox.html
~~~~~~~~~~~~~~~~~~~~
Sphinx sidebar template: quick search box.
:copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
#}
{%- if pagename != "search" %}
<div id="searchbox" style="display: none">
<form class="search" action="{{ pathto('search') }}" method="get">
<input type="text" name="q" size="18" />
<input type="submit" value="{{ _('Search') }}" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</p>
</form>
</div>
<script type="text/javascript">$('#searchbox').show(0);</script>
{%- endif %}

View File

@ -1,390 +0,0 @@
/**
* Sphinx stylesheet -- default theme
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
@import url("basic.css");
/* -- page layout ----------------------------------------------------------- */
body {
font-family: {{ theme_bodyfont }};
font-size: 100%;
background-color: {{ theme_footerbgcolor }};
color: #000;
margin: 0;
padding: 0;
}
img.logo {
width: 150px;
}
div.document {
background-color: {{ theme_sidebarbgcolor }};
}
div.documentwrapper {
float: left;
width: 100%;
}
div.bodywrapper {
margin: 0 0 0 270px;
}
div.body {
background-color: {{ theme_bgcolor }};
color: {{ theme_textcolor }};
padding: 0 20px 30px 20px;
}
div.feedback {
background-color: {{ theme_feedbackbgcolor }};
color: {{ theme_feedbacktextcolor }};
padding: 20px 20px 30px 20px;
}
div.feedback h2 {
margin: 10px 0 10px 0;
}
div.feedback a {
color: {{ theme_feedbacklinkcolor }};
font-weight: bold;
}
{%- if theme_rightsidebar|tobool %}
div.bodywrapper {
margin: 0 230px 0 0;
}
{%- endif %}
div.footer {
color: {{ theme_footertextcolor }};
width: 100%;
padding: 9px 0 9px 0;
text-align: center;
font-size: 75%;
}
div.footer a {
color: {{ theme_footertextcolor }};
text-decoration: underline;
}
div.related {
background-color: {{ theme_relbarbgcolor }};
line-height: 30px;
color: {{ theme_relbartextcolor }};
}
div.related a {
color: {{ theme_relbarlinkcolor }};
}
div.sphinxsidebar {
word-wrap: break-word;
width: 270px;
{%- if theme_stickysidebar|tobool %}
top: 30px;
margin: 0;
position: fixed;
overflow: auto;
height: 100%;
{%- endif %}
{%- if theme_rightsidebar|tobool %}
float: right;
{%- if theme_stickysidebar|tobool %}
right: 0;
{%- endif %}
{%- endif %}
}
{%- if theme_stickysidebar|tobool %}
/* this is nice, but it it leads to hidden headings when jumping
to an anchor */
/*
div.related {
position: fixed;
}
div.documentwrapper {
margin-top: 30px;
}
*/
{%- endif %}
div.sphinxsidebar h3 {
font-family: {{ theme_headfont }};
color: {{ theme_sidebartextcolor }};
font-size: 1.4em;
font-weight: normal;
margin: 0;
padding: 0;
}
div.sphinxsidebar h3 a {
color: {{ theme_sidebartextcolor }};
}
div.sphinxsidebar h4 {
font-family: {{ theme_headfont }};
color: {{ theme_sidebartextcolor }};
font-size: 1.3em;
font-weight: normal;
margin: 5px 0 0 0;
padding: 0;
}
div.sphinxsidebar p {
color: {{ theme_sidebartextcolor }};
}
div.sphinxsidebar p.topless {
margin: 5px 10px 10px 10px;
}
div.sphinxsidebar ul {
margin: 10px 0 10px 10px;
padding: 0;
color: {{ theme_sidebartextcolor }};
}
div.sphinxsidebar a {
color: {{ theme_sidebarlinkcolor }};
}
div.sphinxsidebar input {
border: 1px solid {{ theme_sidebarlinkcolor }};
font-family: sans-serif;
font-size: 1em;
}
/* -- body styles ----------------------------------------------------------- */
a {
color: {{ theme_linkcolor }};
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
div.body p, div.body dd, div.body li {
text-align: justify;
line-height: 130%;
margin-top: 1em;
margin-bottom: 1em;
}
div.toctree-wrapper li, ul.simple li {
margin:0;
}
div.body h1,
div.body h2,
div.body h3,
div.body h4,
div.body h5,
div.body h6 {
font-family: {{ theme_headfont }};
background-color: {{ theme_headbgcolor }};
font-weight: normal;
color: {{ theme_headtextcolor }};
border-bottom: 1px solid #ccc;
margin: 20px -20px 10px -20px;
padding: 3px 0 3px 10px;
}
a.toc-backref, a.toc-backref:hover {
font-family: {{ theme_headfont }};
background-color: {{ theme_headbgcolor }};
font-weight: normal;
color: {{ theme_headtextcolor }};
text-decoration: none;
}
div.body h1 { margin-top: 0; font-size: 200%; }
div.body h2 { font-size: 160%; }
div.body h3 { font-size: 140%; }
div.body h4 { font-size: 120%; }
div.body h5 { font-size: 110%; }
div.body h6 { font-size: 100%; }
a.headerlink {
color: {{ theme_headlinkcolor }};
font-size: 0.8em;
padding: 0 4px 0 4px;
text-decoration: none;
}
a.headerlink:hover {
background-color: {{ theme_headlinkcolor }};
color: white;
}
div.body p, div.body dd, div.body li {
text-align: justify;
line-height: 130%;
}
div.admonition p.admonition-title + p {
display: inline;
}
div.note {
background-color: #eee;
border: 1px solid #ccc;
}
div.seealso {
background-color: #ffc;
border: 1px solid #ff6;
}
div.topic {
background-color: #eee;
}
div.warning {
background-color: #ffe4e4;
border: 1px solid #f66;
}
p.admonition-title {
display: inline;
}
p.admonition-title:after {
content: ":";
}
pre {
padding: 5px;
background-color: {{ theme_codebgcolor }};
color: {{ theme_codetextcolor }};
line-height: 120%;
border: 1px solid #ace;
border-left: none;
border-right: none;
}
tt {
color: {{ theme_headtextcolor }};
/*background-color: #ecf0f3;*/
padding: 0 1px 0 1px;
font-size: 1.2em;
}
tt.descname {
color: {{ theme_headtextcolor }};
/*background-color: #ecf0f3;*/
padding: 0 1px 0 1px;
font-size: 1.4em;
}
div.math p {
margin-top: 10px;
margin-bottom: 10px;
}
dl.function > dt:first-child {
margin-bottom: 7px;
}
dl.cfunction > dt:first-child {
margin-bottom: 7px;
color: #8080B0;
}
dl.cfunction > dt:first-child tt.descname
{
color: #8080B0;
}
dl.pyfunction > dt:first-child {
margin-bottom: 7px;
}
dl.jfunction > dt:first-child {
margin-bottom: 7px;
}
table.field-list {
margin-top: 20px;
}
/*ul.simple {
list-style: none;
}*/
em.menuselection, em.guilabel {
font-family: {{ theme_guifont }};
}
.enumeratevisibleitemswithsquare ul {
list-style: square;
margin-bottom: 0px;
margin-left: 0px;
margin-right: 0px;
margin-top: 0px;
}
.enumeratevisibleitemswithsquare li {
margin-bottom: 0.2em;
margin-left: 0px;
margin-right: 0px;
margin-top: 0.2em;
}
.enumeratevisibleitemswithsquare p {
margin-bottom: 0pt;
margin-top: 1pt;
}
.enumeratevisibleitemswithsquare dl{
margin-bottom: 0px;
margin-left: 0px;
margin-right: 0px;
margin-top: 0px;
}
.toctableopencv
{
width: 100% ;
table-layout: fixed;
}
.toctableopencv colgroup col:first-child
{
width: 100pt !important;
max-width: 100pt !important;
min-width: 100pt !important;
}
.toctableopencv colgroup col:nth-child(2)
{
width: 100% !important;
}
div.body ul.search li {
text-align: left;
}
div.linenodiv {
min-width: 1em;
text-align: right;
}
div.sphinxsidebar #searchbox input[type="text"] {
width:auto;
}
div.sphinxsidebar #searchbox input[type="submit"] {
width:auto;
}

View File

@ -1,31 +0,0 @@
[theme]
inherit = basic
stylesheet = default.css
pygments_style = sphinx
[options]
rightsidebar = false
stickysidebar = false
footerbgcolor = #004068
footertextcolor = #ffffff
sidebarbgcolor = #006090
sidebartextcolor = #ffffff
sidebarlinkcolor = #cceeff
relbarbgcolor = #003048
relbartextcolor = #ffffff
relbarlinkcolor = #ffffff
bgcolor = #ffffff
textcolor = #000000
headbgcolor = #f2f2f2
headtextcolor = #003048
headlinkcolor = #65a136
linkcolor = #0090d9
codebgcolor = #e0f5ff
codetextcolor = #333333
feedbackbgcolor = #004068
feedbacktextcolor = #ffffff
feedbacklinkcolor = #ffffff
bodyfont = sans-serif
headfont = 'Trebuchet MS', sans-serif
guifont = "Lucida Sans","Lucida Sans Unicode","Lucida Grande",Verdana,Arial,Helvetica,sans-serif
lang = none

View File

@ -1,274 +0,0 @@
{#
basic/layout.html
~~~~~~~~~~~~~~~~~
Master layout template for Sphinx themes.
:copyright: Copyright 2007-2014 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
#}
{%- block doctype -%}
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
{%- endblock %}
{% set script_files = script_files + [pathto("_static/insertIframe.js", 1)] %}
{%- set reldelim1 = reldelim1 is not defined and ' &raquo;' or reldelim1 %}
{%- set reldelim2 = reldelim2 is not defined and ' |' or reldelim2 %}
{%- set render_sidebar = (not embedded) and (not theme_nosidebar|tobool) and
(sidebars != []) %}
{%- set url_root = pathto('', 1) %}
{# XXX necessary? #}
{%- if url_root == '#' %}{% set url_root = '' %}{% endif %}
{%- if not embedded and docstitle %}
{%- set titlesuffix = " &mdash; "|safe + docstitle|e %}
{%- else %}
{%- set titlesuffix = "" %}
{%- endif %}
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-33108845-1']);
_gaq.push(['_setDomainName', 'opencv.org']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
{%- macro relbar() %}
<div class="related">
<h3>{{ _('Navigation') }}</h3>
<ul>
{%- for rellink in rellinks %}
<li class="right" {% if loop.first %}style="margin-right: 10px"{% endif %}>
<a href="{{ pathto(rellink[0]) }}" title="{{ rellink[1]|striptags|e }}"
{{ accesskey(rellink[2]) }}>{{ rellink[3] }}</a>
{%- if not loop.first %}{{ reldelim2 }}{% endif %}</li>
{%- endfor %}
{%- block rootrellink %}
<li><a href="{{ pathto(master_doc) }}">{{ shorttitle|e }}</a>{{ reldelim1 }}</li>
{%- endblock %}
{%- for parent in parents %}
<li><a href="{{ parent.link|e }}" {% if loop.last %}{{ accesskey("U") }}{% endif %}>{{ parent.title }}</a>{{ reldelim1 }}</li>
{%- endfor %}
{%- block relbaritems %} {% endblock %}
</ul>
</div>
{%- endmacro %}
{%- macro sidebar() %}
{%- if render_sidebar %}
<div class="sphinxsidebar">
<div class="sphinxsidebarwrapper">
{%- block sidebarlogo %}
{%- if logo %}
<p class="logo"><a href="{{ pathto(master_doc) }}">
<img class="logo" src="{{ pathto('_static/' + logo, 1) }}" alt="Logo"/>
</a></p>
{%- endif %}
{%- endblock %}
{%- if sidebars == None %}
{%- block sidebarsearch %}
{%- include "searchbox.html" %}
{%- endblock %}
{%- endif %}
{%- if sidebars != None %}
{#- new style sidebar: explicitly include/exclude templates #}
{%- for sidebartemplate in sidebars %}
{%- include sidebartemplate %}
{%- endfor %}
{%- else %}
{#- old style sidebars: using blocks -- should be deprecated #}
{%- block sidebartoc %}
{%- include "localtoc.html" %}
{%- endblock %}
{%- block sidebarrel %}
{%- include "relations.html" %}
{%- endblock %}
{%- if customsidebar %}
{%- include customsidebar %}
{%- endif %}
{%- endif %}
</div>
</div>
{%- endif %}
{%- endmacro %}
{%- macro script() %}
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '{{ url_root }}',
VERSION: '{{ release|e }}',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '{{ '' if no_search_suffix else file_suffix }}',
HAS_SOURCE: {{ has_source|lower }}
};
</script>
{%- for scriptfile in script_files %}
<script type="text/javascript" src="{{ pathto(scriptfile, 1) }}"></script>
{%- endfor %}
{%- endmacro %}
{%- macro css() %}
<link rel="stylesheet" href="{{ pathto('_static/' + style, 1) }}" type="text/css" />
<link rel="stylesheet" href="{{ pathto('_static/pygments.css', 1) }}" type="text/css" />
{%- for cssfile in css_files %}
<link rel="stylesheet" href="{{ pathto(cssfile, 1) }}" type="text/css" />
{%- endfor %}
{%- endmacro %}
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset={{ encoding }}" />
{{ metatags }}
{%- block htmltitle %}
<title>{{ title|striptags|e }}{{ titlesuffix }}</title>
{%- endblock %}
{{ css() }}
{%- if not embedded %}
{{ script() }}
{%- if use_opensearch %}
<link rel="search" type="application/opensearchdescription+xml"
title="{% trans docstitle=docstitle|e %}Search within {{ docstitle }}{% endtrans %}"
href="{{ pathto('_static/opensearch.xml', 1) }}"/>
{%- endif %}
{%- if favicon %}
<link rel="shortcut icon" href="{{ pathto('_static/' + favicon, 1) }}"/>
{%- endif %}
{%- endif %}
{%- block linktags %}
{%- if hasdoc('about') %}
<link rel="author" title="{{ _('About these documents') }}" href="{{ pathto('about') }}" />
{%- endif %}
{%- if hasdoc('genindex') %}
<link rel="index" title="{{ _('Index') }}" href="{{ pathto('genindex') }}" />
{%- endif %}
{%- if hasdoc('search') %}
<link rel="search" title="{{ _('Search') }}" href="{{ pathto('search') }}" />
{%- endif %}
{%- if hasdoc('copyright') %}
<link rel="copyright" title="{{ _('Copyright') }}" href="{{ pathto('copyright') }}" />
{%- endif %}
<link rel="top" title="{{ docstitle|e }}" href="{{ pathto('index') }}" />
{%- if parents %}
<link rel="up" title="{{ parents[-1].title|striptags|e }}" href="{{ parents[-1].link|e }}" />
{%- endif %}
{%- if next %}
<link rel="next" title="{{ next.title|striptags|e }}" href="{{ next.link|e }}" />
{%- endif %}
{%- if prev %}
<link rel="prev" title="{{ prev.title|striptags|e }}" href="{{ prev.link|e }}" />
{%- endif %}
{%- endblock %}
{%- block extrahead %}
<link href='http://fonts.googleapis.com/css?family=Open+Sans:300,400,700'
rel='stylesheet' type='text/css' />
{%- if not embedded %}
<style type="text/css">
table.right { float: right; margin-left: 20px; }
table.right td { border: 1px solid #ccc; }
</style>
<script type="text/javascript">
// intelligent scrolling of the sidebar content
$(window).scroll(function() {
var sb = $('.sphinxsidebarwrapper');
var win = $(window);
var sbh = sb.height();
var offset = $('.sphinxsidebar').position()['top'];
var wintop = win.scrollTop();
var winbot = wintop + win.innerHeight();
var curtop = sb.position()['top'];
var curbot = curtop + sbh;
// does sidebar fit in window?
if (sbh < win.innerHeight()) {
// yes: easy case -- always keep at the top
sb.css('top', $u.min([$u.max([0, wintop - offset - 10]),
$(document).height() - sbh - 200]));
} else {
// no: only scroll if top/bottom edge of sidebar is at
// top/bottom edge of window
if (curtop > wintop && curbot > winbot) {
sb.css('top', $u.max([wintop - offset - 10, 0]));
} else if (curtop < wintop && curbot < winbot) {
sb.css('top', $u.min([winbot - sbh - offset - 20,
$(document).height() - sbh - 200]));
}
}
});
</script>
{%- endif %}
{% endblock %}
</head>
{%- block header %}{% endblock %}
{%- block relbar1 %}{{ relbar() }}{% endblock %}
{%- block sidebar1 %} {# possible location for sidebar #} {% endblock %}
{%- block sidebar2 %}{{ sidebar() }}{% endblock %}
<body>
{%- block content %}
<div class="document">
{%- block document %}
<div class="documentwrapper">
{%- if render_sidebar %}
<div class="bodywrapper">
{%- endif %}
<div class="body">
{% block body %} {% endblock %}
</div>
<div class="feedback">
<h2>Help and Feedback</h2>
You did not find what you were looking for?
<ul>
{% if theme_lang == 'c' %}
{% endif %}
{% if theme_lang == 'cpp' %}
<li>Try the <a href="http://docs.opencv.org/opencv_cheatsheet.pdf">Cheatsheet</a>.</li>
{% endif %}
{% if theme_lang == 'py' %}
<li>Try the <a href="cookbook.html">Cookbook</a>.</li>
{% endif %}
<li>Ask a question on the <a href="http://answers.opencv.org">Q&A forum</a>.</li>
<li>If you think something is missing or wrong in the documentation,
please file a <a href="http://code.opencv.org">bug report</a>.</li>
</ul>
</div>
{%- if render_sidebar %}
</div>
{%- endif %}
</div>
{%- endblock %}
<div class="clearer"></div>
</div>
{%- endblock %}
{%- block relbar2 %}{{ relbar() }}{% endblock %}
{%- block footer %}
<div class="footer">
{%- if show_copyright %}
{%- if hasdoc('copyright') %}
{% trans path=pathto('copyright'), copyright=copyright|e %}&copy; <a href="{{ path }}">Copyright</a> {{ copyright }}.{% endtrans %}
{%- else %}
{% trans copyright=copyright|e %}&copy; Copyright {{ copyright }}.{% endtrans %}
{%- endif %}
{%- endif %}
{%- if last_updated %}
{% trans last_updated=last_updated|e %}Last updated on {{ last_updated }}.{% endtrans %}
{%- endif %}
{%- if show_sphinx %}
{% trans sphinx_version=sphinx_version|e %}Created using <a href="http://sphinx-doc.org/">Sphinx</a> {{ sphinx_version }}.{% endtrans %}
{%- endif %}
{%- if show_source and has_source and sourcename %}
<a href="{{ pathto('_sources/' + sourcename, true)|e }}" rel="nofollow">{{ _('Show this page source.') }}</a>
{%- endif %}
</div>
{%- endblock %}
</body>
</html>

View File

@ -1,21 +0,0 @@
{#
basic/searchbox.html
~~~~~~~~~~~~~~~~~~~~
Sphinx sidebar template: quick search box.
:copyright: Copyright 2007-2014 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
#}
{%- if pagename != "search" and builder != "singlehtml" %}
<div id="searchbox" style="display: none">
<h3>{{ _('Quick search') }}</h3>
<form class="search" action="{{ pathto('search') }}" method="get">
<input type="text" name="q" />
<input type="submit" value="{{ _('Go') }}" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
<script type="text/javascript">$('#searchbox').show(0);</script>
{%- endif %}

View File

@ -1,466 +0,0 @@
/*
* sphinxdoc.css_t
* ~~~~~~~~~~~~~~~
*
* Sphinx stylesheet -- sphinxdoc theme.
*
* :copyright: Copyright 2007-2014 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/
@import url("basic.css");
/* -- page layout ----------------------------------------------------------- */
body {
font-family: 'Open Sans', 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva',
'Verdana', sans-serif;
font-size: 14px;
text-align: center;
background-image: url(bodybg.png);
color: black;
padding: 0;
border-right: 1px solid #0a507a;
border-left: 1px solid #0a507a;
margin: 0 auto;
min-width: 780px;
max-width: 1080px;
}
div.document {
background-color: white;
text-align: left;
}
div.bodywrapper {
margin: 0 240px 0 0;
border-right: 1px solid #0a507a;
}
div.body {
margin: 0;
padding: 0.5em 20px 20px 20px;
}
div.related {
font-size: 1em;
color: white;
}
div.related ul {
background-image: url(relbg.png);
text-align: left;
border-top: 1px solid #002e50;
border-bottom: 1px solid #002e50;
}
div.related li + li {
display: inline;
}
div.related ul li.right {
float: right;
margin-right: 5px;
}
div.related ul li a {
margin: 0;
padding: 0 5px 0 5px;
line-height: 1.75em;
color: #f9f9f0;
text-shadow: 0px 0px 1px rgba(0, 0, 0, 0.5);
}
div.related ul li a:hover {
color: white;
text-shadow: 0px 0px 1px rgba(255, 255, 255, 0.5);
}
div.footer {
background-image: url(footerbg.png);
color: #ccc;
text-shadow: 0 0 .2px rgba(255, 255, 255, 0.8);
padding: 3px 8px 3px 0;
clear: both;
font-size: 0.8em;
text-align: center;
}
div.sphinxsidebarwrapper {
position: relative;
top: 0px;
padding: 0;
}
div.sphinxsidebar {
word-wrap: break-word;
margin: 0;
padding: 0 15px 15px 0;
width: 210px;
float: right;
font-size: 1em;
text-align: left;
}
div.sphinxsidebar .logo {
text-align: center;
}
div.sphinxsidebar .logo img {
width: 150px;
vertical-align: middle;
}
div.sphinxsidebar input {
border: 1px solid #aaa;
font-family: 'Open Sans', 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva',
'Verdana', sans-serif;
font-size: 1em;
}
div.sphinxsidebar #searchbox input[type="text"] {
width: 160px;
}
div.sphinxsidebar #searchbox input[type="submit"] {
width: 40px;
}
div.sphinxsidebar h3 {
font-size: 1.5em;
border-top: 1px solid #0a507a;
margin-top: 1em;
margin-bottom: 0.5em;
padding-top: 0.5em;
}
div.sphinxsidebar h4 {
font-size: 1.2em;
margin-bottom: 0;
}
div.sphinxsidebar h3, div.sphinxsidebar h4 {
margin-right: -15px;
margin-left: -15px;
padding-right: 14px;
padding-left: 14px;
color: #333;
font-weight: 300;
/*text-shadow: 0px 0px 0.5px rgba(0, 0, 0, 0.4);*/
}
div.sphinxsidebarwrapper > h3:first-child {
margin-top: 0.5em;
border: none;
}
div.sphinxsidebar h3 a {
color: #333;
}
div.sphinxsidebar ul {
color: #444;
margin-top: 7px;
padding: 0;
line-height: 130%;
}
div.sphinxsidebar ul ul {
margin-left: 20px;
list-style-image: url(listitem.png);
}
/* -- body styles ----------------------------------------------------------- */
p {
margin: 0.8em 0 0.5em 0;
}
a, a tt {
color: #2878a2;
}
a:hover, a tt:hover {
color: #68b8c2;
}
a tt {
border: 0;
}
h1, h2, h3, h4, h5, h6 {
color: #0a507a;
background-color: #e5f5ff;
font-weight: 300;
}
h1 {
margin: 10px 0 0 0;
}
h2 {
margin: 1.em 0 0.2em 0;
padding: 0;
}
h3 {
margin: 1em 0 -0.3em 0;
}
h1 { font-size: 200%; }
h2 { font-size: 160%; }
h3 { font-size: 140%; }
h4 { font-size: 120%; }
h5 { font-size: 110%; }
h6 { font-size: 100%; }
div a, h1 a, h2 a, h3 a, h4 a, h5 a, h6 a {
text-decoration: none;
}
div.body h1 a tt, div.body h2 a tt, div.body h3 a tt,
div.body h4 a tt, div.body h5 a tt, div.body h6 a tt {
color: #0a507a !important;
font-size: inherit !important;
}
a.headerlink {
color: #0a507a !important;
font-size: 12px;
margin-left: 6px;
padding: 0 4px 0 4px;
text-decoration: none !important;
float: right;
}
a.headerlink:hover {
background-color: #ccc;
color: white!important;
}
cite, code, tt {
font-family: 'Consolas', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono',
monospace;
font-size: 14px;
min-width: 780px;
max-width: 1080px;
}
tt {
color: #003048;
padding: 1px;
}
tt.descname, tt.descclassname, tt.xref {
font-size: 12px;
}
hr {
border: 1px solid #abc;
margin: 2em;
}
pre {
font-family: 'Consolas', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono',
monospace;
font-size: 13px;
letter-spacing: 0.015em;
line-height: 120%;
padding: 0.5em;
border: 1px solid #ccc;
border-radius: 2px;
background-color: #f8f8f8;
}
pre a {
color: inherit;
text-decoration: none;
}
td.linenos pre {
padding: 0.5em 0;
}
td.code pre {
max-width: 740px;
overflow: auto;
overflow-y: hidden; /* fixes display issues on Chrome browsers */
}
div.quotebar {
background-color: #f8f8f8;
max-width: 250px;
float: right;
padding: 0px 7px;
border: 1px solid #ccc;
margin-left: 1em;
}
div.topic {
background-color: #f8f8f8;
}
table {
border-collapse: collapse;
margin: 0 -0.5em 0 -0.5em;
}
table td, table th {
padding: 0.2em 0.5em 0.2em 0.5em;
}
div.note {
background-color: #eee;
border: 1px solid #ccc;
}
div.seealso {
background-color: #ffc;
border: 1px solid #ff6;
}
div.topic {
background-color: #eee;
}
div.warning {
background-color: #ffe4e4;
border: 1px solid #f66;
}
div.admonition ul li, div.warning ul li,
div.admonition ol li, div.warning ol li {
text-align: left;
}
div.admonition p.admonition-title + p {
display: inline;
}
p.admonition-title {
display: inline;
}
p.admonition-title:after {
content: ":";
}
/* ------------------ our styles ----------------*/
div.body p, div.body dd, div.body li {
text-align: justify;
line-height: 130%;
margin-top: 1em;
margin-bottom: 1em;
}
div.toctree-wrapper li, ul.simple li {
margin:0;
}
/*a.toc-backref {
}*/
div.feedback {
/*background-color: #;*/
/*color: #;*/
padding: 20px 20px 30px 20px;
border-top: 1px solid #002e50;
}
div.feedback h2 {
margin: 10px 0 10px 0;
}
div.feedback a {
/*color: #;*/
font-weight: bold;
}
div.math p {
margin-top: 10px;
margin-bottom: 10px;
}
dl.function > dt:first-child {
margin-bottom: 7px;
}
dl.cfunction > dt:first-child {
margin-bottom: 7px;
color: #8080B0;
}
dl.cfunction > dt:first-child tt.descname {
color: #8080B0;
}
dl.pyfunction > dt:first-child {
margin-bottom: 7px;
}
dl.jfunction > dt:first-child {
margin-bottom: 7px;
}
table.field-list {
margin-top: 20px;
}
em.menuselection, em.guilabel {
font-family: 'Lucida Sans', 'Lucida Sans Unicode', 'Lucida Grande', Verdana,
Arial, Helvetica, sans-serif;
}
.enumeratevisibleitemswithsquare ul {
list-style: square;
margin-bottom: 0px;
margin-left: 0px;
margin-right: 0px;
margin-top: 0px;
}
.enumeratevisibleitemswithsquare li {
margin-bottom: 0.2em;
margin-left: 0px;
margin-right: 0px;
margin-top: 0.2em;
}
.enumeratevisibleitemswithsquare p {
margin-bottom: 0pt;
margin-top: 1pt;
}
.enumeratevisibleitemswithsquare dl {
margin-bottom: 0px;
margin-left: 0px;
margin-right: 0px;
margin-top: 0px;
}
.toctableopencv {
width: 100% ;
table-layout: fixed;
}
.toctableopencv colgroup col:first-child {
width: 100pt !important;
max-width: 100pt !important;
min-width: 100pt !important;
}
.toctableopencv colgroup col:nth-child(2) {
width: 100% !important;
}
div.body ul.search li {
text-align: left;
}
div.linenodiv {
min-width: 1em;
text-align: right;
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 220 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 230 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 207 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 223 B

View File

@ -1,4 +0,0 @@
[theme]
inherit = basic
stylesheet = default.css
pygments_style = sphinx

View File

Before

Width:  |  Height:  |  Size: 513 B

After

Width:  |  Height:  |  Size: 513 B

View File

@ -1,190 +0,0 @@
#!/usr/bin/env python
import sys, glob
sys.path.append("../modules/python/src2/")
import hdr_parser as hp
opencv_hdr_list = [
"../modules/core/include/opencv2/core.hpp",
"../modules/ml/include/opencv2/ml.hpp",
"../modules/imgproc/include/opencv2/imgproc.hpp",
"../modules/calib3d/include/opencv2/calib3d.hpp",
"../modules/features2d/include/opencv2/features2d.hpp",
"../modules/video/include/opencv2/video/tracking.hpp",
"../modules/video/include/opencv2/video/background_segm.hpp",
"../modules/objdetect/include/opencv2/objdetect.hpp",
"../modules/imgcodecs/include/opencv2/imgcodecs.hpp",
"../modules/videoio/include/opencv2/videoio.hpp",
"../modules/highgui/include/opencv2/highgui.hpp",
]
opencv_module_list = [
"core",
"imgproc",
"calib3d",
"features2d",
"video",
"objdetect",
"imgcodecs",
"videoio",
"highgui",
"ml"
]
class RSTParser(object):
def __init__(self):
self.read_whitelist()
# reads the file containing functions and classes that do not need to be documented
def read_whitelist(self):
self.whitelist = {}
try:
wf = open("check_docs_whitelist.txt", "rt")
except IOError:
return
self.parser = hp.CppHeaderParser()
for l in wf.readlines():
cpos = l.find("#")
if cpos >= 0:
l = l[:cpos]
l = l.strip()
if not l:
continue
rst_decl = None
if "(" in l:
l = l.replace("cv::", "")
rst_decl = self.parser.parse_func_decl_no_wrap(l)
fname = rst_decl[0]
else:
fname = l.replace("::", ".")
complist = fname.split(".")
prefix = ""
alreadyListed = False
wl = []
for c in complist:
prefix = (prefix + "." + c).lstrip(".")
wl = self.whitelist.get(prefix, [])
if wl == "*":
break
if wl == "*":
continue
if not rst_decl:
self.whitelist[fname] = "*"
else:
wl.append(rst_decl)
self.whitelist[fname] = wl
wf.close()
def process_rst(self, docname):
df = open(docname, "rt")
fdecl = ""
balance = 0
lineno = 0
for l in df.readlines():
lineno += 1
ll = l.strip()
if balance == 0:
if not ll.startswith(".. c:function::") and \
not ll.startswith(".. cpp:function::") and \
not ll.startswith(".. ocv:function::") and \
not ll.startswith(".. ocv:cfunction::"):
continue
fdecl = ll[ll.find("::") + 3:]
elif balance > 0:
fdecl += ll
balance = fdecl.count("(") - fdecl.count(")")
assert balance >= 0
if balance > 0:
continue
rst_decl = self.parser.parse_func_decl_no_wrap(fdecl)
fname = rst_decl[0]
hdr_decls = self.fmap.get(fname, [])
if not hdr_decls:
fname = fname.replace("cv.", "")
hdr_decls = self.fmap.get(fname, [])
if not hdr_decls:
print "Documented function %s (%s) in %s:%d is not in the headers" % (fdecl, rst_decl[0].replace(".", "::"), docname, lineno)
continue
decl_idx = 0
for hd in hdr_decls:
if len(hd[3]) != len(rst_decl[3]):
decl_idx += 1
continue
idx = 0
for a in hd[3]:
if a[0] != rst_decl[3][idx][0] and a[0].replace("cv::", "") != rst_decl[3][idx][0]:
break
idx += 1
if idx == len(hd[3]):
break
decl_idx += 1
if decl_idx < len(hdr_decls):
self.fmap[fname] = hdr_decls[:decl_idx] + hdr_decls[decl_idx+1:]
continue
print "Documented function %s in %s:%d does not have a match" % (fdecl, docname, lineno)
df.close()
def decl2str(self, decl):
return "%s %s(%s)" % (decl[1], decl[0], ", ".join([a[0] + " " + a[1] for a in decl[3]]))
def check_module_docs(self, name):
self.parser = hp.CppHeaderParser()
decls = []
self.fmap = {}
for hname in opencv_hdr_list:
if hname.startswith("../modules/" + name):
decls += self.parser.parse(hname, wmode=False)
for d in decls:
fname = d[0]
if not fname.startswith("struct") and not fname.startswith("class") and not fname.startswith("const"):
dlist = self.fmap.get(fname, [])
dlist.append(d)
self.fmap[fname] = dlist
self.missing_docfunc_list = []
doclist = glob.glob("../modules/" + name + "/doc/*.rst")
for d in doclist:
self.process_rst(d)
print "\n\n########## The list of undocumented functions: ###########\n\n"
misscount = 0
fkeys = sorted(self.fmap.keys())
for f in fkeys:
# skip undocumented destructors
if "~" in f:
continue
decls = self.fmap[f]
fcomps = f.split(".")
prefix = ""
wlist_decls = []
for c in fcomps:
prefix = (prefix + "." + c).lstrip(".")
wlist_decls = self.whitelist.get(prefix, [])
if wlist_decls == "*":
break
if wlist_decls == "*":
continue
wlist_decls = [self.decl2str(d) for d in wlist_decls]
for d in decls:
dstr = self.decl2str(d)
# special hack for ML: skip old variants of the methods
if name == "ml" and ("CvMat" in dstr):
continue
if dstr not in wlist_decls:
misscount += 1
print "%s %s(%s)" % (d[1], d[0].replace(".", "::"), ", ".join([a[0] + " " + a[1] for a in d[3]]))
print "\n\n\nundocumented functions in %s: %d" % (name, misscount)
p = RSTParser()
for m in opencv_module_list:
print "\n\n*************************** " + m + " *************************\n"
p.check_module_docs(m)

View File

@ -1,511 +0,0 @@
#!/usr/bin/env python
import os, sys, fnmatch, re
sys.path.append("../modules/python/src2/")
sys.path.append("../modules/java/generator")
import hdr_parser as hp
import rst_parser as rp
rp.show_warnings = False
rp.show_errors = False
allmodules = rp.allmodules
DOCUMENTED_MARKER = "verified"
ERROR_001_NOTACLASS = 1
ERROR_002_NOTASTRUCT = 2
ERROR_003_INCORRECTBASE = 3
ERROR_004_MISSEDNAMESPACE = 4
ERROR_005_MISSINGPYFUNC = 5
ERROR_006_INVALIDPYOLDDOC = 6
ERROR_007_INVALIDPYDOC = 7
ERROR_008_CFUNCISNOTGLOBAL = 8
ERROR_009_OVERLOADNOTFOUND = 9
ERROR_010_UNKNOWNCLASS = 10
ERROR_011_UNKNOWNFUNC = 11
do_python_crosscheck = True
errors_disabled = [ERROR_004_MISSEDNAMESPACE]
doc_signatures_whitelist = [
# templates
"Matx", "Vec", "SparseMat_", "Scalar_", "Mat_", "Ptr", "Size_", "Point_", "Rect_", "Point3_",
"DataType", "detail::RotationWarperBase", "flann::Index_", "CalonderDescriptorExtractor",
"cuda::PtrStepSz", "cuda::PtrStep", "cuda::PtrElemStep_",
# black boxes
"CvArr", "CvFileStorage",
# other
"InputArray", "OutputArray",
]
defines = ["cvGraphEdgeIdx", "cvFree", "CV_Assert", "cvSqrt", "cvGetGraphVtx", "cvGraphVtxIdx",
"cvCaptureFromFile", "cvCaptureFromCAM", "cvCalcBackProjectPatch", "cvCalcBackProject",
"cvGetHistValue_1D", "cvGetHistValue_2D", "cvGetHistValue_3D", "cvGetHistValue_nD",
"cvQueryHistValue_1D", "cvQueryHistValue_2D", "cvQueryHistValue_3D", "cvQueryHistValue_nD",
# not a real function but behaves as function
"Mat::size",
# ugly "virtual" functions from ml module
"CvStatModel::train", "CvStatModel::predict",
# TODO:
"cvExtractSURF"
]
synonims = {
"StarDetector" : ["StarFeatureDetector"],
"MSER" : ["MserFeatureDetector"],
"GFTTDetector" : ["GoodFeaturesToTrackDetector"],
"cvCaptureFromFile" : ["cvCreateFileCapture"],
"cvCaptureFromCAM" : ["cvCreateCameraCapture"],
"cvCalcArrBackProjectPatch" : ["cvCalcBackProjectPatch"],
"cvCalcArrBackProject" : ["cvCalcBackProject"],
"InputArray" : ["_InputArray"],
"OutputArray" : ["_OutputArray"],
}
if do_python_crosscheck:
try:
import cv2
except ImportError:
print "Could not load cv2"
do_python_crosscheck = False
def get_cv2_object(name):
if name.startswith("cv2."):
name = name[4:]
if name.startswith("cv."):
name = name[3:]
if name == "Algorithm":
return cv2.Algorithm__create("Feature2D.ORB"), name
elif name == "FeatureDetector":
return cv2.FeatureDetector_create("ORB"), name
elif name == "DescriptorExtractor":
return cv2.DescriptorExtractor_create("ORB"), name
elif name == "BackgroundSubtractor":
return cv2.createBackgroundSubtractorMOG(), name
elif name == "StatModel":
return cv2.KNearest(), name
else:
try:
obj = getattr(cv2, name)()
except AttributeError:
obj = getattr(cv2, "create" + name)()
return obj, name
def compareSignatures(f, s):
# function names
if f[0] != s[0]:
return False, "name mismatch"
# return type
stype = (s[1] or "void")
ftype = f[1]
stype = re.sub(r"\b(cv|std)::", "", stype)
if ftype:
ftype = re.sub(r"\b(cv|std)::", "", ftype)
if ftype and ftype != stype:
return False, "return type mismatch"
if ("\C" in f[2]) ^ ("\C" in s[2]):
return False, "const qualifier mismatch"
if ("\S" in f[2]) ^ ("\S" in s[2]):
return False, "static qualifier mismatch"
if ("\V" in f[2]) ^ ("\V" in s[2]):
return False, "virtual qualifier mismatch"
if ("\A" in f[2]) ^ ("\A" in s[2]):
return False, "abstract qualifier mismatch"
if len(f[3]) != len(s[3]):
return False, "different number of arguments"
for idx, arg in enumerate(zip(f[3], s[3])):
farg = arg[0]
sarg = arg[1]
ftype = re.sub(r"\b(cv|std)::", "", (farg[0] or ""))
stype = re.sub(r"\b(cv|std)::", "", (sarg[0] or ""))
ftype = re.sub(r"\s+(\*|&)$", "\\1", ftype)
stype = re.sub(r"\s+(\*|&)$", "\\1", stype)
if ftype != stype:
return False, "type of argument #" + str(idx+1) + " mismatch"
fname = farg[1] or "arg" + str(idx)
sname = sarg[1] or "arg" + str(idx)
if fname != sname:
return False, "name of argument #" + str(idx+1) + " mismatch"
fdef = re.sub(r"\b(cv|std)::", "", (farg[2] or ""))
sdef = re.sub(r"\b(cv|std)::", "", (sarg[2] or ""))
if fdef != sdef:
return False, "default value of argument #" + str(idx+1) + " mismatch"
return True, "match"
def formatSignature(s):
_str = ""
if "/V" in s[2]:
_str += "virtual "
if "/S" in s[2]:
_str += "static "
if s[1]:
_str += s[1] + " "
else:
if not bool(re.match(r"(\w+\.)*(?P<cls>\w+)\.(?P=cls)", s[0])):
_str += "void "
if s[0].startswith("cv."):
_str += s[0][3:].replace(".", "::")
else:
_str += s[0].replace(".", "::")
if len(s[3]) == 0:
_str += "()"
else:
_str += "( "
for idx, arg in enumerate(s[3]):
if idx > 0:
_str += ", "
argtype = re.sub(r"\bcv::", "", arg[0])
argtype = re.sub(r"\s+(\*|&)$", "\\1", arg[0])
bidx = argtype.find('[')
if bidx < 0:
_str += argtype
else:
_str += argtype[:bidx]
_str += " "
if arg[1]:
_str += arg[1]
else:
_str += "arg" + str(idx)
if bidx >= 0:
_str += argtype[bidx:]
if arg[2]:
_str += "=" + re.sub(r"\bcv::", "", arg[2])
_str += " )"
if "/C" in s[2]:
_str += " const"
if "/A" in s[2]:
_str += " = 0"
return _str
def logerror(code, message, doc = None):
if code in errors_disabled:
return
if doc:
print doc["file"] + ":" + str(doc["line"]),
print "error %03d: %s" % (code, message)
#print
def process_module(module, path):
hppparser = hp.CppHeaderParser()
rstparser = rp.RstParser(hppparser)
rstparser.parse(module, path)
rst = rstparser.definitions
hdrlist = []
for root, dirs, files in os.walk(os.path.join(path, "include")):
for filename in fnmatch.filter(files, "*.h*"):
hdrlist.append(os.path.join(root, filename))
if module == "cuda":
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "cuda_types.hpp"))
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "cuda.hpp"))
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "cuda_stream_accessor.hpp"))
decls = []
for hname in hdrlist:
if not "ts_gtest.h" in hname:
decls += hppparser.parse(hname, wmode=False)
funcs = []
# not really needed to hardcode all the namespaces. Normally all they are collected automatically
namespaces = ['cv', 'cv.cuda', 'cvflann', 'cvflann.anyimpl', 'cvflann.lsh', 'cv.flann', 'cv.linemod', 'cv.detail', 'cvtest', 'perf', 'cv.videostab']
classes = []
structs = []
# collect namespaces and classes/structs
for decl in decls:
if decl[0].startswith("const"):
pass
elif decl[0].startswith("class") or decl[0].startswith("struct"):
if decl[0][0] == 'c':
classes.append(decl)
else:
structs.append(decl)
dotIdx = decl[0].rfind('.')
if dotIdx > 0:
namespace = decl[0][decl[0].find(' ')+1:dotIdx]
if not [c for c in classes if c[0].endswith(namespace)] and not [s for s in structs if s[0].endswith(namespace)]:
if namespace not in namespaces:
namespaces.append(namespace)
else:
funcs.append(decl)
clsnamespaces = []
# process classes
for cl in classes:
name = cl[0][cl[0].find(' ')+1:]
if name.find('.') < 0 and not name.startswith("Cv"):
logerror(ERROR_004_MISSEDNAMESPACE, "class " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention")
clsnamespaces.append(name)
if do_python_crosscheck and not name.startswith("cv.") and name.startswith("Cv"):
clsnamespaces.append("cv." + name[2:])
if name.startswith("cv."):
name = name[3:]
name = name.replace(".", "::")
sns = synonims.get(name, [])
sns.append(name)
for name in sns:
doc = rst.get(name)
if not doc:
#TODO: class is not documented
continue
doc[DOCUMENTED_MARKER] = True
# verify class marker
if not doc.get("isclass"):
logerror(ERROR_001_NOTACLASS, "class " + name + " is not marked as \"class\" in documentation", doc)
else:
# verify base
signature = doc.get("class", "")
signature = signature.replace(" public ", " ")
namespaceIdx = signature.rfind("::")
signature = ("class " + signature).strip()
hdrsignature = ("class " + name + " " + cl[1]).replace(".", "::").replace("cv::","").strip()
if signature != hdrsignature:
logerror(ERROR_003_INCORRECTBASE, "invalid base class documentation\ndocumented: " + signature + "\nactual: " + hdrsignature, doc)
# process structs
for st in structs:
name = st[0][st[0].find(' ')+1:]
if name.find('.') < 0 and not name.startswith("Cv"):
logerror(ERROR_004_MISSEDNAMESPACE, "struct " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention")
clsnamespaces.append(name)
if name.startswith("cv."):
name = name[3:]
name = name.replace(".", "::")
doc = rst.get(name)
if not doc:
#TODO: struct is not documented
continue
doc[DOCUMENTED_MARKER] = True
# verify struct marker
if not doc.get("isstruct"):
logerror(ERROR_002_NOTASTRUCT, "struct " + name + " is not marked as \"struct\" in documentation", doc)
else:
# verify base
signature = doc.get("class", "")
signature = signature.replace(", public ", " ").replace(" public ", " ")
signature = signature.replace(", protected ", " ").replace(" protected ", " ")
signature = signature.replace(", private ", " ").replace(" private ", " ")
signature = ("struct " + signature).strip()
hdrsignature = (st[0] + " " + st[1]).replace("struct cv.", "struct ").replace(".", "::").strip()
if signature != hdrsignature:
logerror(ERROR_003_INCORRECTBASE, "invalid base struct documentation\ndocumented: " + signature + "\nactual: " + hdrsignature, doc)
print st, doc
# process functions and methods
flookup = {}
for fn in funcs:
name = fn[0]
parent = None
namespace = None
for cl in clsnamespaces:
if name.startswith(cl + "."):
if cl.startswith(parent or ""):
parent = cl
if parent:
name = name[len(parent) + 1:]
for nm in namespaces:
if parent.startswith(nm + "."):
if nm.startswith(namespace or ""):
namespace = nm
if namespace:
parent = parent[len(namespace) + 1:]
else:
for nm in namespaces:
if name.startswith(nm + "."):
if nm.startswith(namespace or ""):
namespace = nm
if namespace:
name = name[len(namespace) + 1:]
#print namespace, parent, name, fn[0]
if not namespace and not parent and not name.startswith("cv") and not name.startswith("icv") and not name.startswith("CV_"):
logerror(ERROR_004_MISSEDNAMESPACE, "function " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention")
else:
fdescr = (namespace, parent, name, fn)
flookup_entry = flookup.get(fn[0], [])
flookup_entry.append(fdescr)
flookup[fn[0]] = flookup_entry
if do_python_crosscheck:
pyclsnamespaces = ["cv." + x[3:].replace(".", "_") for x in clsnamespaces]
for name, doc in rst.iteritems():
decls = doc.get("decls")
if not decls:
continue
for signature in decls:
if signature[0] == "Python1":
pname = signature[1][:signature[1].find('(')]
try:
fn = getattr(cv2.cv, pname[3:])
docstr = "cv." + fn.__doc__
except AttributeError:
logerror(ERROR_005_MISSINGPYFUNC, "could not load documented function: cv2." + pname, doc)
continue
docstring = docstr
sign = signature[1]
signature.append(DOCUMENTED_MARKER)
# convert old signature to pydoc style
if docstring.endswith("*"):
docstring = docstring[:-1]
s = None
while s != sign:
s = sign
sign = re.sub(r"^(.*\(.*)\(.*?\)(.*\) *->)", "\\1_\\2", sign)
s = None
while s != sign:
s = sign
sign = re.sub(r"\s*,\s*([^,]+)\s*=\s*[^,]+\s*(( \[.*\])?)\)", " [, \\1\\2])", sign)
sign = re.sub(r"\(\s*([^,]+)\s*=\s*[^,]+\s*(( \[.*\])?)\)", "([\\1\\2])", sign)
sign = re.sub(r"\)\s*->\s*", ") -> ", sign)
sign = sign.replace("-> convexHull", "-> CvSeq")
sign = sign.replace("-> lines", "-> CvSeq")
sign = sign.replace("-> boundingRects", "-> CvSeq")
sign = sign.replace("-> contours", "-> CvSeq")
sign = sign.replace("-> retval", "-> int")
sign = sign.replace("-> detectedObjects", "-> CvSeqOfCvAvgComp")
def retvalRplace(match):
m = match.group(1)
m = m.replace("CvScalar", "scalar")
m = m.replace("CvMemStorage", "memstorage")
m = m.replace("ROIplImage", "image")
m = m.replace("IplImage", "image")
m = m.replace("ROCvMat", "mat")
m = m.replace("CvMat", "mat")
m = m.replace("double", "float")
m = m.replace("CvSubdiv2DPoint", "point")
m = m.replace("CvBox2D", "Box2D")
m = m.replace("IplConvKernel", "kernel")
m = m.replace("CvHistogram", "hist")
m = m.replace("CvSize", "width,height")
m = m.replace("cvmatnd", "matND")
m = m.replace("CvSeqOfCvConvexityDefect", "convexityDefects")
mm = m.split(',')
if len(mm) > 1:
return "(" + ", ".join(mm) + ")"
else:
return m
docstring = re.sub(r"(?<=-> )(.*)$", retvalRplace, docstring)
docstring = docstring.replace("( [, ", "([")
if sign != docstring:
logerror(ERROR_006_INVALIDPYOLDDOC, "old-style documentation differs from pydoc\npydoc: " + docstring + "\nfixup: " + sign + "\ncvdoc: " + signature[1], doc)
elif signature[0] == "Python2":
pname = signature[1][4:signature[1].find('(')]
cvname = "cv." + pname
parent = None
for cl in pyclsnamespaces:
if cvname.startswith(cl + "."):
if cl.startswith(parent or ""):
parent = cl
try:
if parent:
instance, clsname = get_cv2_object(parent)
fn = getattr(instance, cvname[len(parent)+1:])
docstr = fn.__doc__
docprefix = "cv2." + clsname + "."
else:
fn = getattr(cv2, pname)
docstr = fn.__doc__
docprefix = "cv2."
except AttributeError:
if parent:
logerror(ERROR_005_MISSINGPYFUNC, "could not load documented member of " + parent + " class: cv2." + pname, doc)
else:
logerror(ERROR_005_MISSINGPYFUNC, "could not load documented function cv2." + pname, doc)
signature.append(DOCUMENTED_MARKER) # stop subsequent errors
continue
docstrings = [docprefix + s.replace("([, ", "([") for s in docstr.split(" or ")]
if not signature[1] in docstrings:
pydocs = "\npydoc: ".join(docstrings)
logerror(ERROR_007_INVALIDPYDOC, "documentation differs from pydoc\npydoc: " + pydocs + "\ncvdoc: " + signature[1], doc)
signature.append(DOCUMENTED_MARKER)
# verify C/C++ signatures
for name, doc in rst.iteritems():
decls = doc.get("decls")
if not decls:
continue
for signature in decls:
if signature[0] == "C" or signature[0] == "C++":
if "template" in (signature[2][1] or ""):
# TODO find a way to validate templates
signature.append(DOCUMENTED_MARKER)
continue
fd = flookup.get(signature[2][0])
if not fd:
if signature[2][0].startswith("cv."):
fd = flookup.get(signature[2][0][3:])
if not fd:
continue
else:
signature[2][0] = signature[2][0][3:]
if signature[0] == "C":
ffd = [f for f in fd if not f[0] and not f[1]] # filter out C++ stuff
if not ffd:
if fd[0][1]:
logerror(ERROR_008_CFUNCISNOTGLOBAL, "function " + fd[0][2] + " is documented as C function but is actually member of " + fd[0][1] + " class", doc)
elif fd[0][0]:
logerror(ERROR_008_CFUNCISNOTGLOBAL, "function " + fd[0][2] + " is documented as C function but is actually placed in " + fd[0][0] + " namespace", doc)
fd = ffd
error = None
for f in fd:
match, error = compareSignatures(signature[2], f[3])
if match:
signature.append(DOCUMENTED_MARKER)
break
if signature[-1] != DOCUMENTED_MARKER:
candidates = "\n\t".join([formatSignature(f[3]) for f in fd])
logerror(ERROR_009_OVERLOADNOTFOUND, signature[0] + " function " + signature[2][0].replace(".","::") + " is documented but misses in headers (" + error + ").\nDocumented as:\n\t" + signature[1] + "\nCandidates are:\n\t" + candidates, doc)
signature.append(DOCUMENTED_MARKER) # to stop subsequent error on this function
# verify that all signatures was found in the library headers
for name, doc in rst.iteritems():
# if doc.get(DOCUMENTED_MARKER, False):
# continue # this class/struct was found
if not doc.get(DOCUMENTED_MARKER, False) and (doc.get("isclass", False) or doc.get("isstruct", False)):
if name in doc_signatures_whitelist:
continue
logerror(ERROR_010_UNKNOWNCLASS, "class/struct " + name + " is mentioned in documentation but is not found in OpenCV headers", doc)
for d in doc.get("decls", []):
if d[-1] != DOCUMENTED_MARKER:
if d[0] == "C" or d[0] =="C++" or (do_python_crosscheck and d[0].startswith("Python")):
if d[0][0] == 'C':
sname = d[2][0][3:].replace(".", "::")
if sname in defines:
#TODO: need to find a way to verify #define's
continue
else:
sname = d[1][:d[1].find("(")]
prefixes = [x for x in doc_signatures_whitelist if sname.startswith(x)]
if prefixes:
# TODO: member of template class
continue
logerror(ERROR_011_UNKNOWNFUNC, d[0] + " function " + sname + " is documented but is not found in OpenCV headers. It is documented as:\n\t" + d[1], doc)
# end of process_module
if __name__ == "__main__":
if len(sys.argv) < 2:
print "Usage:\n", os.path.basename(sys.argv[0]), " <module path>"
exit(0)
modules = sys.argv[1:]
if modules[0] == "all":
modules = allmodules
for module in modules:
selfpath = os.path.dirname(os.path.abspath(sys.argv[0]))
module_path = os.path.join(selfpath, "..", "modules", module)
if not os.path.isdir(module_path):
print "Module \"" + module + "\" could not be found."
exit(1)
process_module(module, module_path)

View File

@ -1,193 +0,0 @@
# this is a list of functions, classes and methods
# that are not supposed to be documented in the near future,
# to make the output of check_docs.py script more sensible.
#
# Syntax:
# every line starting with # is a comment
# there can be empty lines
# each line includes either a class name (including all the necessary namespaces),
# or a function/method name
# or a full declaration of a function/method
# if a class name is in the whitelist, all the methods are considered "white-listed" too
# if a method/function name is listed, then all the overload variants are "white-listed".
# that is, to white list a particular overloaded variant of a function/method you need to put
# full declaration into the file
#
######################################### core #####################################
cv::Mat::MSize
cv::Mat::MStep
cv::MatConstIterator
cv::NAryMatIterator
cv::Algorithm
cv::_InputArray
cv::_OutputArray
######################################## imgproc ###################################
CvLSHOperations
cv::FilterEngine
cv::BaseFilter
cv::BaseRowFilter
cv::BaseColumnFilter
cv::Moments
###################################### features2d###################################
cv::BOWKMeansTrainer::cluster
cv::BOWTrainer::BOWTrainer
cv::BOWTrainer::clear
cv::AdjusterAdapter::clone
cv::MSER::MSER
cv::StarDetector::StarDetector
cv::SIFT::CommonParams::CommonParams
cv::SIFT::SIFT
cv::SURF::SURF
cv::SimpleBlobDetector::Params::Params
cv::FastFeatureDetector::read
cv::MserFeatureDetector::read
cv::StarFeatureDetector::read
cv::SurfFeatureDetector::read
cv::SiftFeatureDetector::read
cv::GoodFeaturesToTrackDetector::read
cv::OrbFeatureDetector::read
cv::FastFeatureDetector::write
cv::MserFeatureDetector::write
cv::StarFeatureDetector::write
cv::SurfFeatureDetector::write
cv::SiftFeatureDetector::write
cv::GoodFeaturesToTrackDetector::write
cv::OrbFeatureDetector::write
cv::DynamicAdaptedFeatureDetector::empty
cv::GridAdaptedFeatureDetector::empty
cv::PyramidAdaptedFeatureDetector::empty
cv::BriefDescriptorExtractor::descriptorSize
cv::SurfDescriptorExtractor::descriptorSize
cv::SiftDescriptorExtractor::descriptorSize
cv::OpponentColorDescriptorExtractor::descriptorSize
cv::OrbDescriptorExtractor::descriptorSize
cv::BriefDescriptorExtractor::descriptorType
cv::SurfDescriptorExtractor::descriptorType
cv::SiftDescriptorExtractor::descriptorType
cv::OpponentColorDescriptorExtractor::descriptorType
cv::OrbDescriptorExtractor::descriptorType
cv::SurfDescriptorExtractor::read
cv::SiftDescriptorExtractor::read
cv::OpponentColorDescriptorExtractor::read
cv::OrbDescriptorExtractor::read
cv::SurfDescriptorExtractor::write
cv::SiftDescriptorExtractor::write
cv::OpponentColorDescriptorExtractor::write
cv::OrbDescriptorExtractor::write
cv::OpponentColorDescriptorExtractor::empty
cv::FlannBasedMatcher::train
cv::FlannBasedMatcher::clear
cv::FlannBasedMatcher::clone
cv::FlannBasedMatcher::isMaskSupported
cv::GenericDescriptorMatcher::GenericDescriptorMatcher
cv::VectorDescriptorMatcher::clear
cv::FernDescriptorMatcher::clear
cv::OneWayDescriptorMatcher::clear
cv::VectorDescriptorMatcher::empty
cv::FernDescriptorMatcher::empty
cv::OneWayDescriptorMatcher::empty
cv::OneWayDescriptorMatcher::read
cv::VectorDescriptorMatcher::isMaskSupported
cv::FernDescriptorMatcher::isMaskSupported
cv::OneWayDescriptorMatcher::isMaskSupported
cv::VectorDescriptorMatcher::train
cv::FernDescriptorMatcher::train
cv::OneWayDescriptorMatcher::train
cv::VectorDescriptorMatcher::read
cv::FernDescriptorMatcher::read
cv::VectorDescriptorMatcher::write
cv::FernDescriptorMatcher::write
cv::OneWayDescriptorMatcher::write
cv::FastAdjuster::good
cv::StarAdjuster::good
cv::SurfAdjuster::good
cv::FastAdjuster::tooFew
cv::StarAdjuster::tooFew
cv::SurfAdjuster::tooFew
cv::FastAdjuster::tooMany
cv::StarAdjuster::tooMany
cv::SurfAdjuster::tooMany
cv::FastAdjuster::clone
cv::StarAdjuster::clone
cv::SurfAdjuster::clone
######################################## calib3d ###################################
CvLevMarq
Mat cv::findFundamentalMat( InputArray points1, InputArray points2, OutputArray mask, int method=FM_RANSAC, double param1=3., double param2=0.99)
Mat findHomography( InputArray srcPoints, InputArray dstPoints, OutputArray mask, int method=0, double ransacReprojThreshold=3);
########################################## ml ######################################
CvBoostTree
CvForestTree
CvSVMKernel
CvSVMSolver
CvDTreeTrainData
CvERTreeTrainData
CvKNearest::CvKNearest
CvKNearest::clear
CvDTreeNode::get_num_valid
CvDTreeNode::set_num_valid
CvDTree::CvDTree
CvDTree::clear
CvDTree::read
CvDTree::write
CvEM::CvEM
CvEM::clear
CvEM::read
CvEM::write
CvSVM::CvSVM
CvSVM::clear
CvSVM::read
CvSVM::write
CvMLData::CvMLData
CvRTrees::CvRTrees
CvRTrees::clear
CvRTrees::read
CvRTrees::write
CvBoost::CvBoost
CvBoost::clear
CvBoost::read
CvBoost::write
CvGBTrees::CvGBTrees
CvGBTrees::clear
CvGBTrees::read
CvGBTrees::write
CvNormalBayesClassifier::CvNormalBayerClassifier
CvNormalBayesClassifier::clear
CvNormalBayesClassifier::read
CvNormalBayesClassifier::write
CvANN_MLP::CvANN_MLP
CvANN_MLP::clear
CvANN_MLP::read
CvANN_MLP::write
CvTrainTestSplit
cvParamLattice
cvDefaultParamLattice

View File

@ -1,428 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# opencvstd documentation build configuration file, created by
# sphinx-quickstart on Mon Feb 14 00:30:43 2011.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys, os, re
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath('.'))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.pngmath', 'sphinx.ext.ifconfig', 'sphinx.ext.todo', 'sphinx.ext.extlinks', 'ocv', 'sphinx.ext.doctest']
have_plantuml_ext = False
if tags.has('plantuml'):
try:
import sphinxcontrib.plantuml
extensions.append("sphinxcontrib.plantuml")
have_plantuml_ext = True
except ImportError:
print "No module sphinxcontrib.plantuml found, sphinx will not render UML diagrams"
doctest_test_doctest_blocks = 'block'
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'OpenCV'
copyright = u'2011-2014, opencv dev team'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
version_file = open("../modules/core/include/opencv2/core/version.hpp", "rt").read()
version_major = re.search("^W*#\W*define\W+CV_VERSION_MAJOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
version_minor = re.search("^W*#\W*define\W+CV_VERSION_MINOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
version_patch = re.search("^W*#\W*define\W+CV_VERSION_REVISION\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
version_status = re.search("^W*#\W*define\W+CV_VERSION_STATUS\W+\"(.*?)\"\W*$", version_file, re.MULTILINE).group(1)
# The short X.Y version.
version = version_major + '.' + version_minor
# The full version, including alpha/beta/rc tags.
release = version_major + '.' + version_minor + '.' + version_patch + version_status
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['doc/tutorials/definitions']
if not have_plantuml_ext:
exclude_patterns.append('**/uml/*')
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
todo_include_todos=True
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'sphinxdoc'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
html_theme_path = ['_themes']
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
html_logo = 'opencv-logo2.png'
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'opencv'
# OpenCV docs use some custom LaTeX macros in the formula. Make sure we include the definitions
pngmath_latex_preamble = r"""
\usepackage{euler}\usepackage[usenames,dvipsnames]{color}\usepackage{amssymb}\usepackage{amsmath}\usepackage{bbm}\usepackage{colortbl}
\newcommand{\matTT}[9]{
\[
\left|\begin{array}{ccc}
#1 & #2 & #3\\
#4 & #5 & #6\\
#7 & #8 & #9
\end{array}\right|
\]
}
\newcommand{\fork}[4]{
\left\{
\begin{array}{l l}
#1 & \mbox{#2}\\
#3 & \mbox{#4}\\
\end{array} \right.}
\newcommand{\forkthree}[6]{
\left\{
\begin{array}{l l}
#1 & \mbox{#2}\\
#3 & \mbox{#4}\\
#5 & \mbox{#6}\\
\end{array} \right.}
\newcommand{\vecthree}[3]{
\begin{bmatrix}
#1\\
#2\\
#3
\end{bmatrix}
}
\newcommand{\vecthreethree}[9]{
\begin{bmatrix}
#1 & #2 & #3\\
#4 & #5 & #6\\
#7 & #8 & #9
\end{bmatrix}
}
"""
# -- Options for LaTeX output --------------------------------------------------
# The paper size ('letter' or 'a4').
#latex_paper_size = 'letter'
# The font size ('10pt', '11pt' or '12pt').
#latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('modules/refman', 'opencv2refman.tex', u'The OpenCV Reference Manual',
u'', 'manual'),
('doc/user_guide/user_guide', 'opencv_user.tex', u'The OpenCV User Guide',
u'', 'manual'),
('doc/tutorials/tutorials', 'opencv_tutorials.tex', u'The OpenCV Tutorials',
u'', 'manual'),
('platforms/android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual',
u'', 'manual'),
]
preamble ="""
\usepackage{euler}
\usepackage[scaled=0.85]{beramono}
\usepackage{mymath}\usepackage{amssymb}\usepackage{amsmath}\usepackage{bbm}\setcounter{secnumdepth}{1}
\usepackage{colortbl}
\usepackage{enumitem}
\setlist{labelsep=1ex}
"""
latex_elements = {'preamble': preamble}
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Additional stuff for the LaTeX preamble.
#latex_preamble = ''
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'opencv', u'The OpenCV Reference Manual',
[u'admin@opencv.org'], 1)
]
# ---- External links for tutorials -----------------
extlinks = {
'basicstructures' : ('http://docs.opencv.org/modules/core/doc/basic_structures.html#%s', None),
'oldbasicstructures' : ('http://docs.opencv.org/modules/core/doc/old_basic_structures.html#%s', None),
'readwriteimage' : ('http://docs.opencv.org/modules/imgcodecs/doc/reading_and_writing_images.html#%s', None),
'readwritevideo' : ('http://docs.opencv.org/modules/videoio/doc/reading_and_writing_video.html#%s', None),
'operationsonarrays' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html#%s', None),
'utilitysystemfunctions' : ('http://docs.opencv.org/modules/core/doc/utility_and_system_functions_and_macros.html#%s', None),
'imgprocfilter' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html#%s', None),
'svms' : ('http://docs.opencv.org/modules/ml/doc/support_vector_machines.html#%s', None),
'drawingfunc' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#%s', None),
'xmlymlpers' : ('http://docs.opencv.org/modules/core/doc/xml_yaml_persistence.html#%s', None),
'rwimg' : ('http://docs.opencv.org/modules/imgcodecs/doc/reading_and_writing_images.html#%s', None),
'hgvideo' : ('http://docs.opencv.org/modules/videoio/doc/reading_and_writing_video.html#%s', None),
'gpuinit' : ('http://docs.opencv.org/modules/gpu/doc/initalization_and_information.html#%s', None),
'gpudatastructure' : ('http://docs.opencv.org/modules/gpu/doc/data_structures.html#%s', None),
'gpuopmatrices' : ('http://docs.opencv.org/modules/gpu/doc/operations_on_matrices.html#%s', None),
'gpuperelement' : ('http://docs.opencv.org/modules/gpu/doc/per_element_operations.html#%s', None),
'gpuimgproc' : ('http://docs.opencv.org/modules/gpu/doc/image_processing.html#%s', None),
'gpumatrixreduct' : ('http://docs.opencv.org/modules/gpu/doc/matrix_reductions.html#%s', None),
'filtering' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html#%s', None),
'flann' : ('http://docs.opencv.org/modules/flann/doc/flann_fast_approximate_nearest_neighbor_search.html#%s', None ),
'calib3d' : ('http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html#%s', None ),
'feature2d' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html#%s', None ),
'imgproc_geometric' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html#%s', None ),
'miscellaneous_transformations' : ('http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#%s', None),
'user_interface' : ('http://docs.opencv.org/modules/highgui/doc/user_interface.html#%s', None),
'video' : ('http://docs.opencv.org/modules/video/doc/motion_analysis_and_object_tracking.html#%s', None),
# 'opencv_group' : ('http://answers.opencv.org/%s', None),
'opencv_qa' : ('http://answers.opencv.org/%s', None),
'how_to_contribute' : ('http://code.opencv.org/projects/opencv/wiki/How_to_contribute/%s', None),
'cvt_color' : ('http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html?highlight=cvtcolor#cvtcolor%s', None),
'imread' : ('http://docs.opencv.org/modules/imgcodecs/doc/reading_and_writing_images.html?highlight=imread#imread%s', None),
'imwrite' : ('http://docs.opencv.org/modules/imgcodecs/doc/reading_and_writing_images.html?highlight=imwrite#imwrite%s', None),
'imshow' : ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=imshow#imshow%s', None),
'named_window' : ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=namedwindow#namedwindow%s', None),
'wait_key' : ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=waitkey#waitkey%s', None),
'add_weighted' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=addweighted#addweighted%s', None),
'saturate_cast' : ('http://docs.opencv.org/modules/core/doc/utility_and_system_functions_and_macros.html?highlight=saturate_cast#saturate-cast%s', None),
'mat_zeros' : ('http://docs.opencv.org/modules/core/doc/basic_structures.html?highlight=zeros#mat-zeros%s', None),
'convert_to' : ('http://docs.opencv.org/modules/core/doc/basic_structures.html#mat-convertto%s', None),
'create_trackbar' : ('http://docs.opencv.org/modules/highgui/doc/user_interface.html?highlight=createtrackbar#createtrackbar%s', None),
'point' : ('http://docs.opencv.org/modules/core/doc/basic_structures.html#point%s', None),
'scalar' : ('http://docs.opencv.org/modules/core/doc/basic_structures.html#scalar%s', None),
'line' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#line%s', None),
'ellipse' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#ellipse%s', None),
'rectangle' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#rectangle%s', None),
'circle' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#circle%s', None),
'fill_poly' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#fillpoly%s', None),
'rng' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=rng#rng%s', None),
'put_text' : ('http://docs.opencv.org/modules/core/doc/drawing_functions.html#puttext%s', None),
'gaussian_blur' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=gaussianblur#gaussianblur%s', None),
'blur' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=blur#blur%s', None),
'median_blur' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=medianblur#medianblur%s', None),
'bilateral_filter' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=bilateralfilter#bilateralfilter%s', None),
'erode' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=erode#erode%s', None),
'dilate' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=dilate#dilate%s', None),
'get_structuring_element' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=getstructuringelement#getstructuringelement%s', None),
'flood_fill' : ( 'http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html?highlight=floodfill#floodfill%s', None),
'morphology_ex' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=morphologyex#morphologyex%s', None),
'pyr_down' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=pyrdown#pyrdown%s', None),
'pyr_up' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=pyrup#pyrup%s', None),
'resize' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=resize#resize%s', None),
'threshold' : ('http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html?highlight=threshold#threshold%s', None),
'filter2d' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=filter2d#filter2d%s', None),
'copy_make_border' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=copymakeborder#copymakeborder%s', None),
'sobel' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=sobel#sobel%s', None),
'scharr' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=scharr#scharr%s', None),
'laplacian' : ('http://docs.opencv.org/modules/imgproc/doc/filtering.html?highlight=laplacian#laplacian%s', None),
'canny' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=canny#canny%s', None),
'copy_to' : ('http://docs.opencv.org/modules/core/doc/basic_structures.html?highlight=copyto#mat-copyto%s', None),
'hough_lines' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghlines#houghlines%s', None),
'hough_lines_p' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghlinesp#houghlinesp%s', None),
'hough_circles' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=houghcircles#houghcircles%s', None),
'remap' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=remap#remap%s', None),
'warp_affine' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=warpaffine#warpaffine%s' , None),
'get_rotation_matrix_2d' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=getrotationmatrix2d#getrotationmatrix2d%s', None),
'get_affine_transform' : ('http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html?highlight=getaffinetransform#getaffinetransform%s', None),
'equalize_hist' : ('http://docs.opencv.org/modules/imgproc/doc/histograms.html?highlight=equalizehist#equalizehist%s', None),
'split' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=split#split%s', None),
'calc_hist' : ('http://docs.opencv.org/modules/imgproc/doc/histograms.html?highlight=calchist#calchist%s', None),
'normalize' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=normalize#normalize%s', None),
'match_template' : ('http://docs.opencv.org/modules/imgproc/doc/object_detection.html?highlight=matchtemplate#matchtemplate%s', None),
'min_max_loc' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=minmaxloc#minmaxloc%s', None),
'mix_channels' : ( 'http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=mixchannels#mixchannels%s', None),
'calc_back_project' : ('http://docs.opencv.org/modules/imgproc/doc/histograms.html?highlight=calcbackproject#calcbackproject%s', None),
'compare_hist' : ('http://docs.opencv.org/modules/imgproc/doc/histograms.html?highlight=comparehist#comparehist%s', None),
'corner_harris' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=cornerharris#cornerharris%s', None),
'good_features_to_track' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=goodfeaturestotrack#goodfeaturestotrack%s', None),
'corner_min_eigenval' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=cornermineigenval#cornermineigenval%s', None),
'corner_eigenvals_and_vecs' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=cornereigenvalsandvecs#cornereigenvalsandvecs%s', None),
'corner_sub_pix' : ('http://docs.opencv.org/modules/imgproc/doc/feature_detection.html?highlight=cornersubpix#cornersubpix%s', None),
'find_contours' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=findcontours#findcontours%s', None),
'convex_hull' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=convexhull#convexhull%s', None),
'draw_contours' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=drawcontours#drawcontours%s', None),
'bounding_rect' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=boundingrect#boundingrect%s', None),
'min_enclosing_circle' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=minenclosingcircle#minenclosingcircle%s', None),
'min_area_rect' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=minarearect#minarearect%s', None),
'fit_ellipse' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=fitellipse#fitellipse%s', None),
'moments' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=moments#moments%s', None),
'contour_area' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=contourarea#contourarea%s', None),
'arc_length' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=arclength#arclength%s', None),
'point_polygon_test' : ('http://docs.opencv.org/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html?highlight=pointpolygontest#pointpolygontest%s', None),
'feature_detection_and_description' : ('http://docs.opencv.org/modules/features2d/doc/feature_detection_and_description.html#%s', None),
'feature_detector' : ( 'http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_feature_detectors.html?highlight=featuredetector#FeatureDetector%s', None),
'feature_detector_detect' : ('http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_feature_detectors.html?highlight=detect#featuredetector-detect%s', None ),
'surf_feature_detector' : ('http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_feature_detectors.html?highlight=surffeaturedetector#surffeaturedetector%s', None ),
'draw_keypoints' : ('http://docs.opencv.org/modules/features2d/doc/drawing_function_of_keypoints_and_matches.html?highlight=drawkeypoints#drawkeypoints%s', None ),
'descriptor_extractor': ( 'http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_descriptor_extractors.html?highlight=descriptorextractor#descriptorextractor%s', None ),
'descriptor_extractor_compute' : ( 'http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_descriptor_extractors.html?highlight=compute#descriptorextractor-compute%s', None ),
'surf_descriptor_extractor' : ( 'http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_descriptor_extractors.html?highlight=surfdescriptorextractor#surfdescriptorextractor%s', None ),
'draw_matches' : ( 'http://docs.opencv.org/modules/features2d/doc/drawing_function_of_keypoints_and_matches.html?highlight=drawmatches#drawmatches%s', None ),
'find_homography' : ('http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html?highlight=findhomography#findhomography%s', None),
'perspective_transform' : ('http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=perspectivetransform#perspectivetransform%s', None ),
'flann_based_matcher' : ('http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_descriptor_matchers.html?highlight=flannbasedmatcher#flannbasedmatcher%s', None),
'brute_force_matcher' : ('http://docs.opencv.org/modules/features2d/doc/common_interfaces_of_descriptor_matchers.html?highlight=bruteforcematcher#bruteforcematcher%s', None ),
'cascade_classifier' : ('http://docs.opencv.org/modules/objdetect/doc/cascade_classification.html?highlight=cascadeclassifier#cascadeclassifier%s', None ),
'cascade_classifier_load' : ('http://docs.opencv.org/modules/objdetect/doc/cascade_classification.html?highlight=load#cascadeclassifier-load%s', None ),
'cascade_classifier_detect_multiscale' : ('http://docs.opencv.org/modules/objdetect/doc/cascade_classification.html?highlight=detectmultiscale#cascadeclassifier-detectmultiscale%s', None ),
'background_subtractor' : ('http://docs.opencv.org/modules/video/doc/motion_analysis_and_object_tracking.html?highlight=backgroundsubtractor#backgroundsubtractor%s', None),
'background_subtractor_mog' : ('http://docs.opencv.org/modules/video/doc/motion_analysis_and_object_tracking.html?highlight=backgroundsubtractorMOG#backgroundsubtractormog%s', None),
'background_subtractor_mog_two' : ('http://docs.opencv.org/modules/video/doc/motion_analysis_and_object_tracking.html?highlight=backgroundsubtractorMOG2#backgroundsubtractormog2%s', None),
'video_capture' : ('http://docs.opencv.org/modules/videoio/doc/reading_and_writing_video.html?highlight=videocapture#videocapture%s', None),
'ippa_convert': ('http://docs.opencv.org/modules/core/doc/ipp_async_converters.html#%s', None),
'ptr':('http://docs.opencv.org/modules/core/doc/basic_structures.html?highlight=Ptr#Ptr%s', None)
}

21
doc/footer.html Normal file
View File

@ -0,0 +1,21 @@
<!-- HTML footer for doxygen 1.8.6-->
<!-- start footer part -->
<!--BEGIN GENERATE_TREEVIEW-->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
<ul>
$navpath
<li class="footer">$generatedby
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="$relpath^doxygen.png" alt="doxygen"/></a> $doxygenversion </li>
</ul>
</div>
<!--END GENERATE_TREEVIEW-->
<!--BEGIN !GENERATE_TREEVIEW-->
<hr class="footer"/><address class="footer"><small>
$generatedby &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="$relpath^doxygen.png" alt="doxygen"/>
</a> $doxygenversion
</small></address>
<!--END !GENERATE_TREEVIEW-->
</body>
</html>

View File

@ -1,676 +0,0 @@
<html>
<head>
<meta http-equiv=Content-Type content="text/html; charset=windows-1251">
<meta name=Generator content="Microsoft Word 11 (filtered)">
<title>Object Detection Using Haar-like Features with Cascade of Boosted
Classifiers</title>
<style>
<!--
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0in;
margin-bottom:.0001pt;
text-align:justify;
font-size:12.0pt;
font-family:"Times New Roman";}
h1
{margin-top:12.0pt;
margin-right:0in;
margin-bottom:3.0pt;
margin-left:0in;
text-align:justify;
page-break-after:avoid;
font-size:16.0pt;
font-family:Arial;}
h2
{margin-top:12.0pt;
margin-right:0in;
margin-bottom:3.0pt;
margin-left:0in;
text-align:justify;
page-break-after:avoid;
font-size:14.0pt;
font-family:Arial;
font-style:italic;}
h3
{margin-top:12.0pt;
margin-right:0in;
margin-bottom:3.0pt;
margin-left:0in;
text-align:justify;
page-break-after:avoid;
font-size:13.0pt;
font-family:Arial;}
span.Typewch
{font-family:"Courier New";
font-weight:bold;}
@page Section1
{size:595.3pt 841.9pt;
margin:56.7pt 88.0pt 63.2pt 85.05pt;}
div.Section1
{page:Section1;}
/* List Definitions */
ol
{margin-bottom:0in;}
ul
{margin-bottom:0in;}
-->
</style>
</head>
<body lang=RU>
<div class=Section1>
<h1><span lang=EN-US>Rapid Object Detection With A Cascade of Boosted
Classifiers Based on Haar-like Features</span></h1>
<h2><span lang=EN-US>Introduction</span></h2>
<p class=MsoNormal><span lang=EN-US>This document describes how to train and
use a cascade of boosted classifiers for rapid object detection. A large set of
over-complete haar-like features provide the basis for the simple individual
classifiers. Examples of object detection tasks are face, eye and nose
detection, as well as logo detection. </span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>The sample detection task in this document
is logo detection, since logo detection does not require the collection of
large set of registered and carefully marked object samples. Instead we assume
that from one prototype image, a very large set of derived object examples can
be derived (</span><span class=Typewch><span lang=EN-US>createsamples</span></span><span
lang=EN-US> utility, see below).</span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>A detailed description of the training/evaluation
algorithm can be found in [1] and [2].</span></p>
<h2><span lang=EN-US>Samples Creation</span></h2>
<p class=MsoNormal><span lang=EN-US>For training a training samples must be
collected. There are two sample types: negative samples and positive samples.
Negative samples correspond to non-object images. Positive samples correspond
to object images.</span></p>
<h3><span lang=EN-US>Negative Samples</span></h3>
<p class=MsoNormal><span lang=EN-US>Negative samples are taken from arbitrary
images. These images must not contain object representations. Negative samples
are passed through background description file. It is a text file in which each
text line contains the filename (relative to the directory of the description
file) of negative sample image. This file must be created manually. Note that
the negative samples and sample images are also called background samples or
background samples images, and are used interchangeably in this document</span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>Example of negative description file:</span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>Directory structure:</span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>/img</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>  img1.jpg</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>  img2.jpg</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>bg.txt</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>&nbsp;</span></span></p>
<p class=MsoNormal><span class=Typewch><span style='font-family:"Times New Roman";
font-weight:normal'>File </span></span><span class=Typewch><span lang=EN-US>bg.txt:</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>img/img1.jpg</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>img/img2.jpg</span></span></p>
<h3><span lang=EN-US>Positive Samples</span></h3>
<p class=MsoNormal><span lang=EN-US>Positive samples are created by </span><span
class=Typewch><span lang=EN-US>createsamples</span></span><span lang=EN-US>
utility. They may be created from single object image or from collection of
previously marked up images.<br>
<br>
</span></p>
<p class=MsoNormal><span lang=EN-US>The single object image may for instance
contain a company logo. Then are large set of positive samples are created from
the given object image by randomly rotating, changing the logo color as well as
placing the logo on arbitrary background.</span></p>
<p class=MsoNormal><span lang=EN-US>The amount and range of randomness can be
controlled by command line arguments. </span></p>
<p class=MsoNormal><span lang=EN-US>Command line arguments:</span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- vec &lt;vec_file_name&gt;</span></span><span
lang=EN-US> </span></p>
<p class=MsoNormal style='margin-left:17.1pt'><span lang=EN-US>name of the
output file containing the positive samples for training</span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- img &lt;image_file_name&gt;</span></span><span
lang=EN-US> </span></p>
<p class=MsoNormal style='margin-left:17.1pt'><span lang=EN-US>source object
image (e.g., a company logo)</span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- bg &lt;background_file_name&gt;</span></span><span
lang=EN-US> </span></p>
<p class=MsoNormal style='margin-left:17.1pt'><span lang=EN-US>background
description file; contains a list of images into which randomly distorted
versions of the object are pasted for positive sample generation</span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- num &lt;number_of_samples&gt;</span></span><span
lang=EN-US> </span></p>
<p class=MsoNormal style='margin-left:17.1pt'><span lang=EN-US>number of
positive samples to generate </span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- bgcolor &lt;background_color&gt;</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
lang=EN-US>      background color (currently grayscale images are assumed); the
background color denotes the transparent color. Since there might be
compression artifacts, the amount of color tolerance can be specified by </span><span
class=Typewch><span lang=EN-US>bgthresh</span></span><span class=Typewch><span
lang=EN-US style='font-family:Arial;font-weight:normal'>. </span></span><span
lang=EN-US>All pixels between </span><span class=Typewch><span lang=EN-US>bgcolor-bgthresh</span></span><span
lang=EN-US> and </span><span class=Typewch><span lang=EN-US>bgcolor+bgthresh</span></span><span
lang=EN-US> are regarded as transparent.</span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- bgthresh &lt;background_color_threshold&gt;</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- inv</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
lang=EN-US>      if specified, the colors will be inverted</span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- randinv</span></span><span lang=EN-US> </span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
lang=EN-US>      if specified, the colors will be inverted randomly</span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- maxidev &lt;max_intensity_deviation&gt;</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>  </span></span><span lang=EN-US>maximal
intensity deviation of foreground samples pixels</span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- maxxangle &lt;max_x_rotation_angle&gt;,</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- maxyangle &lt;max_y_rotation_angle&gt;,</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- maxzangle &lt;max_z_rotation_angle&gt;</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
lang=EN-US>      maximum rotation angles in radians</span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>-show</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
lang=EN-US>      if specified, each sample will be shown. Pressing Esc will
continue creation process without samples showing. Useful debugging option.</span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- w &lt;sample_width&gt;</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>  </span></span><span class=Typewch><span
lang=EN-US style='font-family:"Times New Roman";font-weight:normal'>width (in
pixels) of the output samples</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- h &lt;sample_height&gt;</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>  </span></span><span class=Typewch><span
lang=EN-US style='font-family:"Times New Roman";font-weight:normal'>height (in
pixels) of the output samples</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>&nbsp;</span></span></p>
<p class=MsoNormal><span lang=EN-US>For following procedure is used to create a
sample object instance:</span></p>
<p class=MsoNormal><span lang=EN-US>The source image is rotated random around
all three axes. The chosen angle is limited my</span><span class=Typewch><span
lang=EN-US> -max?angle</span></span><span lang=EN-US>. Next pixels of
intensities in the range of </span><span class=Typewch><span lang=EN-US>[bg_color-bg_color_threshold;
bg_color+bg_color_threshold]</span></span><span lang=EN-US> are regarded as
transparent. White noise is added to the intensities of the foreground. If </span><span
class=Typewch><span lang=EN-US>inv</span></span><span lang=EN-US> key is
specified then foreground pixel intensities are inverted. If </span><span
class=Typewch><span lang=EN-US>randinv</span></span><span lang=EN-US> key is
specified then it is randomly selected whether for this sample inversion will
be applied. Finally, the obtained image is placed onto arbitrary background
from the background description file, resized to the pixel size specified by </span><span
class=Typewch><span lang=EN-US>w</span></span><span lang=EN-US> and </span><span
class=Typewch><span lang=EN-US>h</span></span><span lang=EN-US> and stored
into the file specified by the </span><span class=Typewch><span lang=EN-US>vec</span></span><span
lang=EN-US> command line parameter.</span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>Positive samples also may be obtained from
a collection of previously marked up images. This collection is described by
text file similar to background description file. Each line of this file
corresponds to collection image. The first element of the line is image file
name. It is followed by number of object instances. The following numbers are
the coordinates of bounding rectangles (x, y, width, height).</span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>Example of description file:</span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>Directory structure:</span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>/img</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>  img1.jpg</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>  img2.jpg</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>info.dat</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>&nbsp;</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US style='font-family:
"Times New Roman";font-weight:normal'>File </span></span><span class=Typewch><span
lang=EN-US>info.dat:</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>img/img1.jpg  1  140
100 45 45</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>img/img2.jpg  2  100
200 50 50   50 30 25 25</span></span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>Image </span><span class=Typewch><span
lang=EN-US>img1.jpg</span></span><span lang=EN-US> contains single object
instance with bounding rectangle (140, 100, 45, 45). Image </span><span
class=Typewch><span lang=EN-US>img2.jpg</span></span><span lang=EN-US> contains
two object instances.</span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>In order to create positive samples from
such collection </span><span class=Typewch><span lang=EN-US>info</span></span><span
lang=EN-US> argument should be specified instead of </span><span class=Typewch><span
lang=EN-US>img</span></span><span class=Typewch><span style='font-family:"Times New Roman";
font-weight:normal'>:</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- info &lt;collection_file_name&gt;</span></span><span
lang=EN-US> </span></p>
<p class=MsoNormal style='margin-left:17.1pt'><span lang=EN-US>description file
of marked up images collection</span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>The scheme of sample creation in this case
is as follows. The object instances are taken from images. Then they are
resized to samples size and stored in output file. No distortion is applied, so
the only affecting arguments are </span><span class=Typewch><span lang=EN-US>w</span></span><span
lang=EN-US>, </span><span class=Typewch><span lang=EN-US>-h</span></span><span
lang=EN-US>, </span><span class=Typewch><span lang=EN-US>-show</span></span><span
lang=EN-US> and </span><span class=Typewch><span lang=EN-US>num</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>.</span></span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>createsamples</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> utility may be used for examining samples stored in positive samples
file. In order to do this only </span></span><span class=Typewch><span
lang=EN-US>vec</span></span><span class=Typewch><span lang=EN-US
style='font-family:"Times New Roman";font-weight:normal'>, </span></span><span
class=Typewch><span lang=EN-US>w</span></span><span class=Typewch><span
lang=EN-US style='font-family:"Times New Roman";font-weight:normal'> and </span></span><span
class=Typewch><span lang=EN-US>h</span></span><span class=Typewch><span
lang=EN-US style='font-family:"Times New Roman";font-weight:normal'> parameters
should be specified.</span></span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>Note that for training, it does not matter
how positive samples files are generated. So the </span><span class=Typewch><span
lang=EN-US>createsamples</span></span><span lang=EN-US> utility is only one way
to collect/create a vector file of positive samples.</span></p>
<h2><span lang=EN-US>Training</span></h2>
<p class=MsoNormal><span lang=EN-US>The next step after samples creation is
training of classifier. It is performed by the </span><span class=Typewch><span
lang=EN-US>haartraining</span></span><span lang=EN-US> utility.</span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>Command line arguments:</span><span
class=Typewch><span lang=EN-US> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- data &lt;dir_name&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      directory name in which the trained classifier is stored</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- vec &lt;vec_file_name&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      file name of positive sample file (created by </span></span><span
class=Typewch><span lang=EN-US>trainingsamples</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> utility or by any other means)</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- bg &lt;background_file_name&gt;</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      background description file</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- npos &lt;number_of_positive_samples&gt;,</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- nneg &lt;number_of_negative_samples&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      number of positive/negative samples used in training of each
classifier stage. Reasonable values are npos = 7000 and nneg = 3000.</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- nstages &lt;number_of_stages&gt;</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>  </span></span><span class=Typewch><span
lang=EN-US style='font-family:"Times New Roman";font-weight:normal'>number of
stages to be trained</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- nsplits &lt;number_of_splits&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      determines the weak classifier used in stage classifiers. If </span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman"'>1</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>, then a simple stump classifier is used, if </span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman"'>2</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> and more, then CART classifier with </span></span><span class=Typewch><span
lang=EN-US>number_of_splits</span></span><span class=Typewch><span lang=EN-US
style='font-family:"Times New Roman";font-weight:normal'> internal (split)
nodes is used</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- mem &lt;memory_in_MB&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      Available memory in MB for precalculation. The more memory you
have the faster the training process</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- sym (default),</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- nonsym</span></span><span class=Typewch><span
lang=EN-US style='font-family:"Times New Roman";font-weight:normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      specifies whether the object class under training has vertical
symmetry or not. Vertical symmetry speeds up training process. For instance,
frontal faces show off vertical symmetry</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- minhitrate &lt;min_hit_rate&gt;</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      minimal desired hit rate for each stage classifier. Overall hit
rate may be estimated as </span></span><span class=Typewch><span lang=EN-US>(min_hit_rate^number_of_stages)</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- maxfalsealarm &lt;max_false_alarm_rate&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      maximal desired false alarm rate for each stage classifier. </span></span><span
class=Typewch><span style='font-family:"Times New Roman";font-weight:normal'>Overall
false alarm rate may be estimated as</span></span><span class=Typewch><span
lang=EN-US> (max_false_alarm_rate^number_of_stages)</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- weighttrimming &lt;weight_trimming&gt;</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>  </span></span><span class=Typewch><span
lang=EN-US style='font-family:"Times New Roman";font-weight:normal'>Specifies
whether and how much weight trimming should be used. A decent choice is 0.90.</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- eqw</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- mode &lt;BASIC (default) | CORE | ALL&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      selects the type of haar features set used in training. BASIC use
only upright features, while ALL uses the full set of upright and 45 degree
rotated feature set. See [1] for more details.</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- w &lt;sample_width&gt;,</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- h &lt;sample_height&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      Size of training samples (in pixels). Must have exactly the same
values as used during training samples creation (utility </span></span><span
class=Typewch><span lang=EN-US>trainingsamples</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>)</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US style='font-family:
"Times New Roman";font-weight:normal'>&nbsp;</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US style='font-family:
"Times New Roman";font-weight:normal'>Note: in order to use multiprocessor
advantage a compiler that supports OpenMP 1.0 standard should be used.</span></span></p>
<h2><span lang=EN-US>Application</span></h2>
<p class=MsoNormal><span lang=EN-US>OpenCV cvHaarDetectObjects() function (in
particular haarFaceDetect demo) is used for detection.</span></p>
<h3><span lang=EN-US>Test Samples</span></h3>
<p class=MsoNormal><span lang=EN-US>In order to evaluate the performance of
trained classifier a collection of marked up images is needed. When such
collection is not available test samples may be created from single object
image by </span><span class=Typewch><span lang=EN-US>createsamples</span></span><span
lang=EN-US> utility. The scheme of test samples creation in this case is
similar to training samples creation since each test sample is a background
image into which a randomly distorted and randomly scaled instance of the
object picture is pasted at a random position. </span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>If both </span><span class=Typewch><span
lang=EN-US>img</span></span><span lang=EN-US> and </span><span class=Typewch><span
lang=EN-US>info</span></span><span lang=EN-US> arguments are specified then
test samples will be created by </span><span class=Typewch><span lang=EN-US>createsamples</span></span><span
lang=EN-US> utility. The sample image is arbitrary distorted as it was
described below, then it is placed at random location to background image and
stored. The corresponding description line is added to the file specified by </span><span
class=Typewch><span lang=EN-US>info</span></span><span lang=EN-US> argument.</span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>The </span><span class=Typewch><span
lang=EN-US>w</span></span><span lang=EN-US> and </span><span class=Typewch><span
lang=EN-US>h</span></span><span lang=EN-US> keys determine the minimal size of
placed object picture.</span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>The test image file name format is as
follows:</span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US>imageOrderNumber_x_y_width_height.jpg</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>, where </span></span><span class=Typewch><span lang=EN-US>x</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>, </span></span><span class=Typewch><span lang=EN-US>y</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>, </span></span><span class=Typewch><span lang=EN-US>width</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> and </span></span><span class=Typewch><span lang=EN-US>height</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> are the coordinates of placed object bounding rectangle.</span></span></p>
<p class=MsoNormal><span class=Typewch><span lang=EN-US style='font-family:
"Times New Roman";font-weight:normal'>Note that you should use a background
images set different from the background image set used during training.</span></span></p>
<h3><span class=Typewch><span lang=EN-US style='font-family:"Times New Roman"'>Performance
Evaluation</span></span></h3>
<p class=MsoNormal><span lang=EN-US>In order to evaluate the performance of the
classifier </span><span class=Typewch><span lang=EN-US>performance</span></span><span
lang=EN-US> utility may be used. It takes a collection of marked up images,
applies the classifier and outputs the performance, i.e. number of found
objects, number of missed objects, number of false alarms and other
information.</span></p>
<p class=MsoNormal><span lang=EN-US>&nbsp;</span></p>
<p class=MsoNormal><span lang=EN-US>Command line arguments:</span><span
class=Typewch><span lang=EN-US> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- data &lt;dir_name&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      directory name in which the trained classifier is stored</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- info &lt;collection_file_name&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      file with test samples description</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- maxSizeDiff &lt;max_size_difference&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>,</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- maxPosDiff &lt;max_position_difference&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      determine the criterion of reference and detected rectangles
coincidence. Default values are 1.5 and 0.3 respectively.</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- sf &lt;scale_factor&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>,</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      detection parameter. Default value is 1.2.</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- w &lt;sample_width&gt;,</span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US>- h &lt;sample_height&gt;</span></span><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'> </span></span></p>
<p class=MsoNormal style='margin-left:17.1pt;text-indent:-17.1pt'><span
class=Typewch><span lang=EN-US style='font-family:"Times New Roman";font-weight:
normal'>      Size of training samples (in pixels). Must have exactly the same
values as used during training (utility </span></span><span class=Typewch><span
lang=EN-US>haartraining</span></span><span class=Typewch><span lang=EN-US
style='font-family:"Times New Roman";font-weight:normal'>)</span></span></p>
<h2><span lang=EN-US>References</span></h2>
<p class=MsoNormal><span lang=EN-US>[1] Rainer Lienhart and Jochen Maydt. An
Extended Set of Haar-like Features for Rapid Object Detection. Submitted to
ICIP2002.</span></p>
<p class=MsoNormal><span lang=EN-US>[2] Alexander Kuranov, Rainer Lienhart, and
Vadim Pisarevsky. An Empirical Analysis of Boosting Algorithms for Rapid
Objects With an Extended Set of Haar-like Features. Intel Technical Report
MRL-TR-July02-01, 2002.</span></p>
</div>
</body>
</html>

56
doc/header.html Normal file
View File

@ -0,0 +1,56 @@
<!-- HTML header for doxygen 1.8.6-->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen $doxygenversion"/>
<!--BEGIN PROJECT_NAME--><title>$projectname: $title</title><!--END PROJECT_NAME-->
<!--BEGIN !PROJECT_NAME--><title>$title</title><!--END !PROJECT_NAME-->
<link href="$relpath^opencv.ico" rel="shortcut icon" type="image/x-icon" />
<link href="$relpath^tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="$relpath^jquery.js"></script>
<script type="text/javascript" src="$relpath^dynsections.js"></script>
$treeview
$search
$mathjax
<link href="$relpath^$stylesheet" rel="stylesheet" type="text/css" />
$extrastylesheet
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<!--BEGIN TITLEAREA-->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<!--BEGIN PROJECT_LOGO-->
<td id="projectlogo"><img alt="Logo" src="$relpath^$projectlogo"/></td>
<!--END PROJECT_LOGO-->
<!--BEGIN PROJECT_NAME-->
<td style="padding-left: 0.5em;">
<div id="projectname">$projectname
<!--BEGIN PROJECT_NUMBER-->&#160;<span id="projectnumber">$projectnumber</span><!--END PROJECT_NUMBER-->
</div>
<!--BEGIN PROJECT_BRIEF--><div id="projectbrief">$projectbrief</div><!--END PROJECT_BRIEF-->
</td>
<!--END PROJECT_NAME-->
<!--BEGIN !PROJECT_NAME-->
<!--BEGIN PROJECT_BRIEF-->
<td style="padding-left: 0.5em;">
<div id="projectbrief">$projectbrief</div>
</td>
<!--END PROJECT_BRIEF-->
<!--END !PROJECT_NAME-->
<!--BEGIN DISABLE_INDEX-->
<!--BEGIN SEARCHENGINE-->
<td>$searchbox</td>
<!--END SEARCHENGINE-->
<!--END DISABLE_INDEX-->
</tr>
</tbody>
</table>
</div>
<!--END TITLEAREA-->
<!-- end header part -->

1616
doc/ocv.py

File diff suppressed because it is too large Load Diff

View File

@ -1,620 +0,0 @@
%
% The OpenCV cheatsheet structure:
%
% opencv data structures
% point, rect
% matrix
%
% creating matrices
% from scratch
% from previously allocated data: plain arrays, vectors
% converting to/from old-style structures
%
% element access, iteration through matrix elements
%
% copying & shuffling matrix data
% copying & converting the whole matrices
% extracting matrix parts & copying them
% split, merge & mixchannels
% flip, transpose, repeat
%
% matrix & image operations:
% arithmetics & logic
% matrix multiplication, inversion, determinant, trace, SVD
% statistical functions
%
% basic image processing:
% image filtering with predefined & custom filters
% example: finding local maxima
% geometrical transformations, resize, warpaffine, perspective & remap.
% color space transformations
% histograms & back projections
% contours
%
% i/o:
% displaying images
% saving/loading to/from file (XML/YAML & image file formats)
% reading videos & camera feed, writing videos
%
% operations on point sets:
% findcontours, bounding box, convex hull, min area rect,
% transformations, to/from homogeneous coordinates
% matching point sets: homography, fundamental matrix, rigid transforms
%
% 3d:
% camera calibration, pose estimation.
% uncalibrated case
% stereo: rectification, running stereo correspondence, obtaining the depth.
%
% feature detection:
% features2d toolbox
%
% object detection:
% using a classifier running on a sliding window: cascadeclassifier + hog.
% using salient point features: features2d -> matching
%
% statistical data processing:
% clustering (k-means),
% classification + regression (SVM, boosting, k-nearest),
% compressing data (PCA)
%
\documentclass[10pt,landscape]{article}
\usepackage[usenames,dvips,pdftex]{color}
\usepackage{multicol}
\usepackage{calc}
\usepackage{ifthen}
\usepackage[pdftex]{color,graphicx}
\usepackage[landscape]{geometry}
\usepackage{hyperref}
\usepackage[T1]{fontenc}
\hypersetup{colorlinks=true, filecolor=black, linkcolor=black, urlcolor=blue, citecolor=black}
\graphicspath{{./images/}}
% This sets page margins to .5 inch if using letter paper, and to 1cm
% if using A4 paper. (This probably isn't strictly necessary.)
% If using another size paper, use default 1cm margins.
\ifthenelse{\lengthtest { \paperwidth = 11in}}
{ \geometry{top=.5in,left=.5in,right=.5in,bottom=.5in} }
{\ifthenelse{ \lengthtest{ \paperwidth = 297mm}}
{\geometry{top=1cm,left=1cm,right=1cm,bottom=1cm} }
{\geometry{top=1cm,left=1cm,right=1cm,bottom=1cm} }
}
% Turn off header and footer
% \pagestyle{empty}
% Redefine section commands to use less space
\makeatletter
\renewcommand{\section}{\@startsection{section}{1}{0mm}%
{-1ex plus -.5ex minus -.2ex}%
{0.5ex plus .2ex}%x
{\normalfont\large\bfseries}}
\renewcommand{\subsection}{\@startsection{subsection}{2}{0mm}%
{-1explus -.5ex minus -.2ex}%
{0.5ex plus .2ex}%
{\normalfont\normalsize\bfseries}}
\renewcommand{\subsubsection}{\@startsection{subsubsection}{3}{0mm}%
{-1ex plus -.5ex minus -.2ex}%
{1ex plus .2ex}%
{\normalfont\small\bfseries}}
\makeatother
% Define BibTeX command
\def\BibTeX{{\rm B\kern-.05em{\sc i\kern-.025em b}\kern-.08em
T\kern-.1667em\lower.7ex\hbox{E}\kern-.125emX}}
% Don't print section numbers
\setcounter{secnumdepth}{0}
%\setlength{\parindent}{0pt}
%\setlength{\parskip}{0pt plus 0.5ex}
\newcommand{\ccode}[1]{
\begin{alltt}
#1
\end{alltt}
}
% -----------------------------------------------------------------------
\begin{document}
\raggedright
\footnotesize
\begin{multicols}{3}
% multicol parameters
% These lengths are set only within the two main columns
%\setlength{\columnseprule}{0.25pt}
\setlength{\premulticols}{1pt}
\setlength{\postmulticols}{1pt}
\setlength{\multicolsep}{1pt}
\setlength{\columnsep}{2pt}
\begin{center}
\Large{\textbf{OpenCV 2.4 Cheat Sheet (C++)}} \\
\end{center}
\newlength{\MyLen}
\settowidth{\MyLen}{\texttt{letterpaper}/\texttt{a4paper} \ }
%\section{Filesystem Concepts}
%\begin{tabular}{@{}p{\the\MyLen}%
% @{}p{\linewidth-\the\MyLen}@{}}
%\texttt{\href{http://www.ros.org/wiki/Packages}{package}} & The lowest level of ROS software organization. \\
%\texttt{\href{http://www.ros.org/wiki/Manifest}{manifest}} & Description of a ROS package. \\
%\texttt{\href{http://www.ros.org/wiki/Stack}{stack}} & Collections of ROS packages that form a higher-level library. \\
%\texttt{\href{http://www.ros.org/wiki/Stack Manifest}{stack manifest}} & Description of a ROS stack.
%\end{tabular}
\emph{The OpenCV C++ reference manual is here: \url{http://docs.opencv.org}. Use \textbf{Quick Search} to find descriptions of the particular functions and classes}
\section{Key OpenCV Classes}
\begin{tabular}{@{}p{\the\MyLen}%
@{}p{\linewidth-\the\MyLen}@{}}
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Point_}{Point\_}} & Template 2D point class \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Point3_}{Point3\_}} & Template 3D point class \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Size_}{Size\_}} & Template size (width, height) class \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Vec}{Vec}} & Template short vector class \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Matx}{Matx}} & Template small matrix class \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Scalar_}{Scalar}} & 4-element vector \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Rect_}{Rect}} & Rectangle \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Range}{Range}} & Integer value range \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Mat}{Mat}} & 2D or multi-dimensional dense array (can be used to store matrices, images, histograms, feature descriptors, voxel volumes etc.)\\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#sparsemat}{SparseMat}} & Multi-dimensional sparse array \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Ptr}{Ptr}} & Template smart pointer class
\end{tabular}
\section{Matrix Basics}
\begin{tabbing}
\textbf{Cr}\=\textbf{ea}\=\textbf{te}\={} \textbf{a matrix} \\
\> \texttt{Mat image(240, 320, CV\_8UC3);} \\
\textbf{[Re]allocate a pre-declared matrix}\\
\> \texttt{image.\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-create}{create}(480, 640, CV\_8UC3);}\\
\textbf{Create a matrix initialized with a constant}\\
\> \texttt{Mat A33(3, 3, CV\_32F, Scalar(5));} \\
\> \texttt{Mat B33(3, 3, CV\_32F); B33 = Scalar(5);} \\
\> \texttt{Mat C33 = Mat::ones(3, 3, CV\_32F)*5.;} \\
\> \texttt{Mat D33 = Mat::zeros(3, 3, CV\_32F) + 5.;} \\
\textbf{Create a matrix initialized with specified values}\\
\> \texttt{double a = CV\_PI/3;} \\
\> \texttt{Mat A22 = (Mat\_<float>(2, 2) <<} \\
\> \> \texttt{cos(a), -sin(a), sin(a), cos(a));} \\
\> \texttt{float B22data[] = \{cos(a), -sin(a), sin(a), cos(a)\};} \\
\> \texttt{Mat B22 = Mat(2, 2, CV\_32F, B22data).clone();}\\
\textbf{Initialize a random matrix}\\
\> \texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#randu}{randu}(image, Scalar(0), Scalar(256)); }\textit{// uniform dist}\\
\> \texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#randn}{randn}(image, Scalar(128), Scalar(10)); }\textit{// Gaussian dist}\\
\textbf{Convert matrix to/from other structures}\\
\>\textbf{(without copying the data)}\\
\> \texttt{Mat image\_alias = image;}\\
\> \texttt{float* Idata=new float[480*640*3];}\\
\> \texttt{Mat I(480, 640, CV\_32FC3, Idata);}\\
\> \texttt{vector<Point> iptvec(10);}\\
\> \texttt{Mat iP(iptvec); }\textit{// iP -- 10x1 CV\_32SC2 matrix}\\
\> \texttt{IplImage* oldC0 = cvCreateImage(cvSize(320,240),16,1);}\\
\> \texttt{Mat newC = cvarrToMat(oldC0);}\\
\> \texttt{IplImage oldC1 = newC; CvMat oldC2 = newC;}\\
\textbf{... (with copying the data)}\\
\> \texttt{Mat newC2 = cvarrToMat(oldC0).clone();}\\
\> \texttt{vector<Point2f> ptvec = Mat\_<Point2f>(iP);}\\
\>\\
\textbf{Access matrix elements}\\
\> \texttt{A33.at<float>(i,j) = A33.at<float>(j,i)+1;}\\
\> \texttt{Mat dyImage(image.size(), image.type());}\\
\> \texttt{for(int y = 1; y < image.rows-1; y++) \{}\\
\> \> \texttt{Vec3b* prevRow = image.ptr<Vec3b>(y-1);}\\
\> \> \texttt{Vec3b* nextRow = image.ptr<Vec3b>(y+1);}\\
\> \> \texttt{for(int x = 0; x < image.cols; x++)}\\
\> \> \> \texttt{for(int c = 0; c < 3; c++)}\\
\> \> \> \texttt{ dyImage.at<Vec3b>(y,x)[c] =}\\
\> \> \> \texttt{ saturate\_cast<uchar>(}\\
\> \> \> \texttt{ nextRow[x][c] - prevRow[x][c]);}\\
\> \texttt{\} }\\
\> \texttt{Mat\_<Vec3b>::iterator it = image.begin<Vec3b>(),}\\
\> \> \texttt{itEnd = image.end<Vec3b>();}\\
\> \texttt{for(; it != itEnd; ++it)}\\
\> \> \texttt{(*it)[1] \textasciicircum{}= 255;}\\
\end{tabbing}
\section{Matrix Manipulations: Copying, Shuffling, Part Access}
\begin{tabular}{@{}p{\the\MyLen}%
@{}p{\linewidth-\the\MyLen}@{}}
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-copyto}{src.copyTo(dst)}} & Copy matrix to another one \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-convertto}{src.convertTo(dst,type,scale,shift)}} & \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Scale and convert to another datatype \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-clone}{m.clone()}} & Make deep copy of a matrix \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-reshape}{m.reshape(nch,nrows)}} & Change matrix dimensions and/or number of channels without copying data \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-row}{m.row(i)}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-col}{m.col(i)}} & Take a matrix row/column \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-rowrange}{m.rowRange(Range(i1,i2))}}
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-colrange}{m.colRange(Range(j1,j2))}} & \ \ \ \ \ \ \ Take a matrix row/column span \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#mat-diag}{m.diag(i)}} & Take a matrix diagonal \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#Mat}{m(Range(i1,i2),Range(j1,j2)), m(roi)}} & \ \ \ \ \ \ \ \ \ \ \ \ \ Take a submatrix \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#repeat}{m.repeat(ny,nx)}} & Make a bigger matrix from a smaller one \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#flip}{flip(src,dst,dir)}} & Reverse the order of matrix rows and/or columns \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#split}{split(...)}} & Split multi-channel matrix into separate channels \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#merge}{merge(...)}} & Make a multi-channel matrix out of the separate channels \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#mixchannels}{mixChannels(...)}} & Generalized form of split() and merge() \\
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#randshuffle}{randShuffle(...)}} & Randomly shuffle matrix elements \\
\end{tabular}
\begin{tabbing}
Exa\=mple 1. Smooth image ROI in-place\\
\>\texttt{Mat imgroi = image(Rect(10, 20, 100, 100));}\\
\>\texttt{GaussianBlur(imgroi, imgroi, Size(5, 5), 1.2, 1.2);}\\
Example 2. Somewhere in a linear algebra algorithm \\
\>\texttt{m.row(i) += m.row(j)*alpha;}\\
Example 3. Copy image ROI to another image with conversion\\
\>\texttt{Rect r(1, 1, 10, 20);}\\
\>\texttt{Mat dstroi = dst(Rect(0,10,r.width,r.height));}\\
\>\texttt{src(r).convertTo(dstroi, dstroi.type(), 1, 0);}\\
\end{tabbing}
\section{Simple Matrix Operations}
OpenCV implements most common arithmetical, logical and
other matrix operations, such as
\begin{itemize}
\item
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#add}{add()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#subtract}{subtract()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#multiply}{multiply()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#divide}{divide()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#absdiff}{absdiff()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#bitwise-and}{bitwise\_and()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#bitwise-or}{bitwise\_or()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#bitwise-xor}{bitwise\_xor()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#max}{max()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#min}{min()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#compare}{compare()}}
-- correspondingly, addition, subtraction, element-wise multiplication ... comparison of two matrices or a matrix and a scalar.
\begin{tabbing}
Exa\=mple. \href{http://en.wikipedia.org/wiki/Alpha_compositing}{Alpha compositing} function:\\
\texttt{void alphaCompose(const Mat\& rgba1,}\\
\> \texttt{const Mat\& rgba2, Mat\& rgba\_dest)}\\
\texttt{\{ }\\
\> \texttt{Mat a1(rgba1.size(), rgba1.type()), ra1;}\\
\> \texttt{Mat a2(rgba2.size(), rgba2.type());}\\
\> \texttt{int mixch[]=\{3, 0, 3, 1, 3, 2, 3, 3\};}\\
\> \texttt{mixChannels(\&rgba1, 1, \&a1, 1, mixch, 4);}\\
\> \texttt{mixChannels(\&rgba2, 1, \&a2, 1, mixch, 4);}\\
\> \texttt{subtract(Scalar::all(255), a1, ra1);}\\
\> \texttt{bitwise\_or(a1, Scalar(0,0,0,255), a1);}\\
\> \texttt{bitwise\_or(a2, Scalar(0,0,0,255), a2);}\\
\> \texttt{multiply(a2, ra1, a2, 1./255);}\\
\> \texttt{multiply(a1, rgba1, a1, 1./255);}\\
\> \texttt{multiply(a2, rgba2, a2, 1./255);}\\
\> \texttt{add(a1, a2, rgba\_dest);}\\
\texttt{\}}
\end{tabbing}
\item
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#sum}{sum()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#mean}{mean()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#meanstddev}{meanStdDev()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#norm}{norm()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#countnonzero}{countNonZero()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#minmaxloc}{minMaxLoc()}},
-- various statistics of matrix elements.
\item
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#exp}{exp()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#log}{log()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#pow}{pow()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#sqrt}{sqrt()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#carttopolar}{cartToPolar()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#polartocart}{polarToCart()}}
-- the classical math functions.
\item
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#scaleadd}{scaleAdd()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#transpose}{transpose()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#gemm}{gemm()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#invert}{invert()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#solve}{solve()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#determinant}{determinant()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#trace}{trace()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#eigen}{eigen()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#SVD}{SVD}},
-- the algebraic functions + SVD class.
\item
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#dft}{dft()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#idft}{idft()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#dct}{dct()}},
\texttt{\href{http://docs.opencv.org/modules/core/doc/operations_on_arrays.html\#idct}{idct()}},
-- discrete Fourier and cosine transformations
\end{itemize}
For some operations a more convenient \href{http://docs.opencv.org/modules/core/doc/basic_structures.html\#matrix-expressions}{algebraic notation} can be used, for example:
\begin{tabbing}
\texttt{Mat}\={} \texttt{delta = (J.t()*J + lambda*}\\
\>\texttt{Mat::eye(J.cols, J.cols, J.type()))}\\
\>\texttt{.inv(CV\_SVD)*(J.t()*err);}
\end{tabbing}
implements the core of Levenberg-Marquardt optimization algorithm.
\section{Image Processsing}
\subsection{Filtering}
\begin{tabular}{@{}p{\the\MyLen}%
@{}p{\linewidth-\the\MyLen}@{}}
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#filter2d}{filter2D()}} & Non-separable linear filter \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#sepfilter2d}{sepFilter2D()}} & Separable linear filter \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#blur}{boxFilter()}}, \texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#gaussianblur}{GaussianBlur()}},
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#medianblur}{medianBlur()}},
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#bilateralfilter}{bilateralFilter()}}
& Smooth the image with one of the linear or non-linear filters \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#sobel}{Sobel()}}, \texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#scharr}{Scharr()}}
& Compute the spatial image derivatives \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#laplacian}{Laplacian()}} & compute Laplacian: $\Delta I = \frac{\partial ^ 2 I}{\partial x^2} + \frac{\partial ^ 2 I}{\partial y^2}$ \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#erode}{erode()}}, \texttt{\href{http://docs.opencv.org/modules/imgproc/doc/filtering.html\#dilate}{dilate()}} & Morphological operations \\
\end{tabular}
\begin{tabbing}
Exa\=mple. Filter image in-place with a 3x3 high-pass kernel\\
\> (preserve negative responses by shifting the result by 128):\\
\texttt{filter2D(image, image, image.depth(), (Mat\_<float>(3,3)<<}\\
\> \texttt{-1, -1, -1, -1, 9, -1, -1, -1, -1), Point(1,1), 128);}\\
\end{tabbing}
\subsection{Geometrical Transformations}
\begin{tabular}{@{}p{\the\MyLen}%
@{}p{\linewidth-\the\MyLen}@{}}
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#resize}{resize()}} & Resize image \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#getrectsubpix}{getRectSubPix()}} & Extract an image patch \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#warpaffine}{warpAffine()}} & Warp image affinely\\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#warpperspective}{warpPerspective()}} & Warp image perspectively\\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#remap}{remap()}} & Generic image warping\\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#convertmaps}{convertMaps()}} & Optimize maps for a faster remap() execution\\
\end{tabular}
\begin{tabbing}
Example. Decimate image by factor of $\sqrt{2}$:\\
\texttt{Mat dst; resize(src, dst, Size(), 1./sqrt(2), 1./sqrt(2));}
\end{tabbing}
\subsection{Various Image Transformations}
\begin{tabular}{@{}p{\the\MyLen}%
@{}p{\linewidth-\the\MyLen}@{}}
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#cvtcolor}{cvtColor()}} & Convert image from one color space to another \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#threshold}{threshold()}}, \texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#adaptivethreshold}{adaptivethreshold()}} & Convert grayscale image to binary image using a fixed or a variable threshold \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#floodfill}{floodFill()}} & Find a connected component using region growing algorithm\\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#integral}{integral()}} & Compute integral image \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#distancetransform}{distanceTransform()}}
& build distance map or discrete Voronoi diagram for a binary image. \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#watershed}{watershed()}},
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html\#grabcut}{grabCut()}}
& marker-based image segmentation algorithms.
See the samples \texttt{\href{https://github.com/Itseez/opencv/tree/master/samples/cpp/watershed.cpp}{watershed.cpp}} and \texttt{\href{https://github.com/Itseez/opencv/tree/master/samples/cpp/grabcut.cpp}{grabcut.cpp}}.
\end{tabular}
\subsection{Histograms}
\begin{tabular}{@{}p{\the\MyLen}%
@{}p{\linewidth-\the\MyLen}@{}}
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/histograms.html\#calchist}{calcHist()}} & Compute image(s) histogram \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/histograms.html\#calcbackproject}{calcBackProject()}} & Back-project the histogram \\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/histograms.html\#equalizehist}{equalizeHist()}} & Normalize image brightness and contrast\\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/histograms.html\#comparehist}{compareHist()}} & Compare two histograms\\
\end{tabular}
\begin{tabbing}
Example. Compute Hue-Saturation histogram of an image:\\
\texttt{Mat hsv, H;}\\
\texttt{cvtColor(image, hsv, CV\_BGR2HSV);}\\
\texttt{int planes[]=\{0, 1\}, hsize[] = \{32, 32\};}\\
\texttt{calcHist(\&hsv, 1, planes, Mat(), H, 2, hsize, 0);}\\
\end{tabbing}
\subsection{Contours}
See \texttt{\href{https://github.com/Itseez/opencv/tree/master/samples/cpp/contours2.cpp}{contours2.cpp}} and \texttt{\href{https://github.com/Itseez/opencv/tree/master/samples/cpp/squares.cpp}{squares.cpp}}
samples on what are the contours and how to use them.
\section{Data I/O}
\href{http://docs.opencv.org/modules/core/doc/xml_yaml_persistence.html\#xml-yaml-file-storages-writing-to-a-file-storage}{XML/YAML storages} are collections (possibly nested) of scalar values, structures and heterogeneous lists.
\begin{tabbing}
\textbf{Wr}\=\textbf{iting data to YAML (or XML)}\\
\texttt{// Type of the file is determined from the extension}\\
\texttt{FileStorage fs("test.yml", FileStorage::WRITE);}\\
\texttt{fs << "i" << 5 << "r" << 3.1 << "str" << "ABCDEFGH";}\\
\texttt{fs << "mtx" << Mat::eye(3,3,CV\_32F);}\\
\texttt{fs << "mylist" << "[" << CV\_PI << "1+1" <<}\\
\>\texttt{"\{:" << "month" << 12 << "day" << 31 << "year"}\\
\>\texttt{<< 1969 << "\}" << "]";}\\
\texttt{fs << "mystruct" << "\{" << "x" << 1 << "y" << 2 <<}\\
\>\texttt{"width" << 100 << "height" << 200 << "lbp" << "[:";}\\
\texttt{const uchar arr[] = \{0, 1, 1, 0, 1, 1, 0, 1\};}\\
\texttt{fs.writeRaw("u", arr, (int)(sizeof(arr)/sizeof(arr[0])));}\\
\texttt{fs << "]" << "\}";}
\end{tabbing}
\emph{Scalars (integers, floating-point numbers, text strings), matrices, STL vectors of scalars and some other types can be written to the file storages using \texttt{<<} operator}
\begin{tabbing}
\textbf{Re}\=\textbf{ading the data back}\\
\texttt{// Type of the file is determined from the content}\\
\texttt{FileStorage fs("test.yml", FileStorage::READ);}\\
\texttt{int i1 = (int)fs["i"]; double r1 = (double)fs["r"];}\\
\texttt{string str1 = (string)fs["str"];}\\
\texttt{Mat M; fs["mtx"] >> M;}\\
\texttt{FileNode tl = fs["mylist"];}\\
\texttt{CV\_Assert(tl.type() == FileNode::SEQ \&\& tl.size() == 3);}\\
\texttt{double tl0 = (double)tl[0]; string tl1 = (string)tl[1];}\\
\texttt{int m = (int)tl[2]["month"], d = (int)tl[2]["day"];}\\
\texttt{int year = (int)tl[2]["year"];}\\
\texttt{FileNode tm = fs["mystruct"];}\\
\texttt{Rect r; r.x = (int)tm["x"], r.y = (int)tm["y"];}\\
\texttt{r.width = (int)tm["width"], r.height = (int)tm["height"];}\\
\texttt{int lbp\_val = 0;}\\
\texttt{FileNodeIterator it = tm["lbp"].begin();}\\
\texttt{for(int k = 0; k < 8; k++, ++it)}\\
\>\texttt{lbp\_val |= ((int)*it) << k;}\\
\end{tabbing}
\emph{Scalars are read using the corresponding FileNode's cast operators. Matrices and some other types are read using \texttt{>>} operator. Lists can be read using FileNodeIterator's.}
\begin{tabbing}
\textbf{Wr}\=\textbf{iting and reading raster images}\\
\texttt{\href{http://docs.opencv.org/modules/imgcodecs/doc/reading_and_writing_images.html\#imwrite}{imwrite}("myimage.jpg", image);}\\
\texttt{Mat image\_color\_copy = \href{http://docs.opencv.org/modules/imgcodecs/doc/reading_and_writing_images.html\#imread}{imread}("myimage.jpg", 1);}\\
\texttt{Mat image\_grayscale\_copy = \href{http://docs.opencv.org/modules/imgcodecs/doc/reading_and_writing_images.html\#imread}{imread}("myimage.jpg", 0);}\\
\end{tabbing}
\emph{The functions can read/write images in the following formats: \textbf{BMP (.bmp), JPEG (.jpg, .jpeg), TIFF (.tif, .tiff), PNG (.png), PBM/PGM/PPM (.p?m), Sun Raster (.sr), JPEG 2000 (.jp2)}. Every format supports 8-bit, 1- or 3-channel images. Some formats (PNG, JPEG 2000) support 16 bits per channel.}
\begin{tabbing}
\textbf{Re}\=\textbf{ading video from a file or from a camera}\\
\texttt{VideoCapture cap;}\\
\texttt{if(argc > 1) cap.open(string(argv[1])); else cap.open(0)};\\
\texttt{Mat frame; namedWindow("video", 1);}\\
\texttt{for(;;) \{}\\
\>\texttt{cap >> frame; if(!frame.data) break;}\\
\>\texttt{imshow("video", frame); if(waitKey(30) >= 0) break;}\\
\texttt{\} }
\end{tabbing}
\section{Simple GUI (highgui module)}
\begin{tabular}{@{}p{\the\MyLen}%
@{}p{\linewidth-\the\MyLen}@{}}
\texttt{\href{http://docs.opencv.org/modules/highgui/doc/user_interface.html\#namedwindow}{namedWindow(winname,flags)}} & \ \ \ \ \ \ \ \ \ \ Create named highgui window \\
\texttt{\href{http://docs.opencv.org/modules/highgui/doc/user_interface.html\#destroywindow}{destroyWindow(winname)}} & \ \ \ Destroy the specified window \\
\texttt{\href{http://docs.opencv.org/modules/highgui/doc/user_interface.html\#imshow}{imshow(winname, mtx)}} & Show image in the window \\
\texttt{\href{http://docs.opencv.org/modules/highgui/doc/user_interface.html\#waitkey}{waitKey(delay)}} & Wait for a key press during the specified time interval (or forever). Process events while waiting. \emph{Do not forget to call this function several times a second in your code.} \\
\texttt{\href{http://docs.opencv.org/modules/highgui/doc/user_interface.html\#createtrackbar}{createTrackbar(...)}} & Add trackbar (slider) to the specified window \\
\texttt{\href{http://docs.opencv.org/modules/highgui/doc/user_interface.html\#setmousecallback}{setMouseCallback(...)}} & \ \ Set the callback on mouse clicks and movements in the specified window \\
\end{tabular}
See \texttt{\href{https://github.com/Itseez/opencv/tree/master/samples/cpp/camshiftdemo.cpp}{camshiftdemo.cpp}} and other \href{https://github.com/Itseez/opencv/tree/master/samples/}{OpenCV samples} on how to use the GUI functions.
\section{Camera Calibration, Pose Estimation and Depth Estimation}
\begin{tabular}{@{}p{\the\MyLen}%
@{}p{\linewidth-\the\MyLen}@{}}
\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#calibratecamera}{calibrateCamera()}} & Calibrate camera from several views of a calibration pattern. \\
\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#findchessboardcorners}{findChessboardCorners()}} & \ \ \ \ \ \ Find feature points on the checkerboard calibration pattern. \\
\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#solvepnp}{solvePnP()}} & Find the object pose from the known projections of its feature points. \\
\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#stereocalibrate}{stereoCalibrate()}} & Calibrate stereo camera. \\
\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#stereorectify}{stereoRectify()}} & Compute the rectification transforms for a calibrated stereo camera.\\
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html\#initundistortrectifymap}{initUndistortRectifyMap()}} & \ \ \ \ \ \ Compute rectification map (for \texttt{remap()}) for each stereo camera head.\\
\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#StereoBM}{StereoBM}}, \texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#StereoSGBM}{StereoSGBM}} & The stereo correspondence engines to be run on rectified stereo pairs.\\
\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#reprojectimageto3d}{reprojectImageTo3D()}} & Convert disparity map to 3D point cloud.\\
\texttt{\href{http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html\#findhomography}{findHomography()}} & Find best-fit perspective transformation between two 2D point sets. \\
\end{tabular}
To calibrate a camera, you can use \texttt{\href{https://github.com/Itseez/opencv/tree/master/samples/cpp/calibration.cpp}{calibration.cpp}} or
\texttt{\href{https://github.com/Itseez/opencv/tree/master/samples/cpp/stereo\_calib.cpp}{stereo\_calib.cpp}} samples.
To get the disparity maps and the point clouds, use
\texttt{\href{https://github.com/Itseez/opencv/tree/master/samples/cpp/stereo\_match.cpp}{stereo\_match.cpp}} sample.
\section{Object Detection}
\begin{tabular}{@{}p{\the\MyLen}%
@{}p{\linewidth-\the\MyLen}@{}}
\texttt{\href{http://docs.opencv.org/modules/imgproc/doc/object_detection.html\#matchtemplate}{matchTemplate}} & Compute proximity map for given template.\\
\texttt{\href{http://docs.opencv.org/modules/objdetect/doc/cascade_classification.html\#cascadeclassifier}{CascadeClassifier}} & Viola's Cascade of Boosted classifiers using Haar or LBP features. Suits for detecting faces, facial features and some other objects without diverse textures. See \texttt{\href{https://github.com/Itseez/opencv/tree/master/samples/c/facedetect.cpp}{facedetect.cpp}}\\
\texttt{{HOGDescriptor}} & N. Dalal's object detector using Histogram-of-Oriented-Gradients (HOG) features. Suits for detecting people, cars and other objects with well-defined silhouettes. See \texttt{\href{https://github.com/Itseez/opencv/tree/master/samples/cpp/peopledetect.cpp}{peopledetect.cpp}}\\
\end{tabular}
%
% feature detection:
% features2d toolbox
%
% object detection:
% using a classifier running on a sliding window: cascadeclassifier + hog.
% using salient point features: features2d -> matching
%
% statistical data processing:
% clustering (k-means),
% classification + regression (SVM, boosting, k-nearest),
% compressing data (PCA)
\end{multicols}
\end{document}

View File

@ -1,17 +0,0 @@
INSTRUCTIONS TO BUILD WIN32 PACKAGES WITH CMAKE+CPACK
------------------------------------------------------
- Install NSIS.
- Generate OpenCV solutions for MSVC using CMake as usual.
- In cmake-gui:
- Mark BUILD_PACKAGE
- Mark BUILD_EXAMPLES (If examples are desired to be shipped as binaries...)
- Unmark ENABLE_OPENMP, since this feature seems to have some issues yet...
- Mark INSTALL_*_EXAMPLES
- Open the OpenCV solution and build ALL in Debug and Release.
- Build PACKAGE, from the Release configuration. An NSIS installer package will be
created with both release and debug LIBs and DLLs.
Jose Luis Blanco, 2009/JUL/29

View File

@ -1,24 +0,0 @@
#!/usr/bin/env python
import sys
f=open(sys.argv[1], "rt")
ll = list(f.readlines())
f.close()
f=open(sys.argv[1], "wt")
singleparam = False
for l in ll:
l = l.replace("\\code{~const}}{}", "}{\\code{~const}}")
if l.startswith("\\item[{Parameters}] \\leavevmode"):
if not l.startswith("\\item[{Parameters}] \\leavevmode\\begin{itemize}"):
singleparam = True
l = "\\item[{Parameters}] \\leavevmode\\begin{itemize}[label=]\n"
if singleparam:
l += "\\item {}\n"
elif singleparam and l.startswith("\\end{description}\\end{quote}"):
l = "\\end{itemize}\n" + l
singleparam = False
f.write(l)
f.close()

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

@ -1,112 +0,0 @@
.. _Bindings_Basics:
How OpenCV-Python Bindings Works?
************************************
Goal
=====
Learn:
* How OpenCV-Python bindings are generated?
* How to extend new OpenCV modules to Python?
How OpenCV-Python bindings are generated?
=========================================
In OpenCV, all algorithms are implemented in C++. But these algorithms can be used from different languages like Python, Java etc. This is made possible by the bindings generators. These generators create a bridge between C++ and Python which enables users to call C++ functions from Python. To get a complete picture of what is happening in background, a good knowledge of Python/C API is required. A simple example on extending C++ functions to Python can be found in official Python documentation[1]. So extending all functions in OpenCV to Python by writing their wrapper functions manually is a time-consuming task. So OpenCV does it in a more intelligent way. OpenCV generates these wrapper functions automatically from the C++ headers using some Python scripts which are located in ``modules/python/src2``. We will look into what they do.
First, ``modules/python/CMakeFiles.txt`` is a CMake script which checks the modules to be extended to Python. It will automatically check all the modules to be extended and grab their header files. These header files contain list of all classes, functions, constants etc. for that particular modules.
Second, these header files are passed to a Python script, ``modules/python/src2/gen2.py``. This is the Python bindings generator script. It calls another Python script ``modules/python/src2/hdr_parser.py``. This is the header parser script. This header parser splits the complete header file into small Python lists. So these lists contain all details about a particular function, class etc. For example, a function will be parsed to get a list containing function name, return type, input arguments, argument types etc. Final list contains details of all the functions, structs, classes etc. in that header file.
But header parser doesn't parse all the functions/classes in the header file. The developer has to specify which functions should be exported to Python. For that, there are certain macros added to the beginning of these declarations which enables the header parser to identify functions to be parsed. These macros are added by the developer who programs the particular function. In short, the developer decides which functions should be extended to Python and which are not. Details of those macros will be given in next session.
So header parser returns a final big list of parsed functions. Our generator script (gen2.py) will create wrapper functions for all the functions/classes/enums/structs parsed by header parser (You can find these header files during compilation in the ``build/modules/python/`` folder as ``pyopencv_generated_*.h`` files). But there may be some basic OpenCV datatypes like Mat, Vec4i, Size. They need to be extended manually. For example, a Mat type should be extended to Numpy array, Size should be extended to a tuple of two integers etc. Similarly, there may be some complex structs/classes/functions etc. which need to be extended manually. All such manual wrapper functions are placed in ``modules/python/src2/pycv2.hpp``.
So now only thing left is the compilation of these wrapper files which gives us **cv2** module. So when you call a function, say ``res = equalizeHist(img1,img2)`` in Python, you pass two numpy arrays and you expect another numpy array as the output. So these numpy arrays are converted to ``cv::Mat`` and then calls the ``equalizeHist()`` function in C++. Final result, ``res`` will be converted back into a Numpy array. So in short, almost all operations are done in C++ which gives us almost same speed as that of C++.
So this is the basic version of how OpenCV-Python bindings are generated.
How to extend new modules to Python?
=====================================
Header parser parse the header files based on some wrapper macros added to function declaration. Enumeration constants don't need any wrapper macros. They are automatically wrapped. But remaining functions, classes etc. need wrapper macros.
Functions are extended using ``CV_EXPORTS_W`` macro. An example is shown below.
.. code-block:: cpp
CV_EXPORTS_W void equalizeHist( InputArray src, OutputArray dst );
Header parser can understand the input and output arguments from keywords like ``InputArray, OutputArray`` etc. But sometimes, we may need to hardcode inputs and outputs. For that, macros like ``CV_OUT, CV_IN_OUT`` etc. are used.
.. code-block:: cpp
CV_EXPORTS_W void minEnclosingCircle( InputArray points,
CV_OUT Point2f& center, CV_OUT float& radius );
For large classes also, ``CV_EXPORTS_W`` is used. To extend class methods, ``CV_WRAP`` is used. Similarly, ``CV_PROP`` is used for class fields.
.. code-block:: cpp
class CV_EXPORTS_W CLAHE : public Algorithm
{
public:
CV_WRAP virtual void apply(InputArray src, OutputArray dst) = 0;
CV_WRAP virtual void setClipLimit(double clipLimit) = 0;
CV_WRAP virtual double getClipLimit() const = 0;
}
Overloaded functions can be extended using ``CV_EXPORTS_AS``. But we need to pass a new name so that each function will be called by that name in Python. Take the case of integral function below. Three functions are available, so each one is named with a suffix in Python. Similarly ``CV_WRAP_AS`` can be used to wrap overloaded methods.
.. code-block:: cpp
//! computes the integral image
CV_EXPORTS_W void integral( InputArray src, OutputArray sum, int sdepth = -1 );
//! computes the integral image and integral for the squared image
CV_EXPORTS_AS(integral2) void integral( InputArray src, OutputArray sum,
OutputArray sqsum, int sdepth = -1, int sqdepth = -1 );
//! computes the integral image, integral for the squared image and the tilted integral image
CV_EXPORTS_AS(integral3) void integral( InputArray src, OutputArray sum,
OutputArray sqsum, OutputArray tilted,
int sdepth = -1, int sqdepth = -1 );
Small classes/structs are extended using ``CV_EXPORTS_W_SIMPLE``. These structs are passed by value to C++ functions. Examples are KeyPoint, Match etc. Their methods are extended by ``CV_WRAP`` and fields are extended by ``CV_PROP_RW``.
.. code-block:: cpp
class CV_EXPORTS_W_SIMPLE DMatch
{
public:
CV_WRAP DMatch();
CV_WRAP DMatch(int _queryIdx, int _trainIdx, float _distance);
CV_WRAP DMatch(int _queryIdx, int _trainIdx, int _imgIdx, float _distance);
CV_PROP_RW int queryIdx; // query descriptor index
CV_PROP_RW int trainIdx; // train descriptor index
CV_PROP_RW int imgIdx; // train image index
CV_PROP_RW float distance;
};
Some other small classes/structs can be exported using ``CV_EXPORTS_W_MAP`` where it is exported to a Python native dictionary. Moments() is an example of it.
.. code-block:: cpp
class CV_EXPORTS_W_MAP Moments
{
public:
//! spatial moments
CV_PROP_RW double m00, m10, m01, m20, m11, m02, m30, m21, m12, m03;
//! central moments
CV_PROP_RW double mu20, mu11, mu02, mu30, mu21, mu12, mu03;
//! central normalized moments
CV_PROP_RW double nu20, nu11, nu02, nu30, nu21, nu12, nu03;
};
So these are the major extension macros available in OpenCV. Typically, a developer has to put proper macros in their appropriate positions. Rest is done by generator scripts. Sometimes, there may be an exceptional cases where generator scripts cannot create the wrappers. Such functions need to be handled manually. But most of the time, a code written according to OpenCV coding guidelines will be automatically wrapped by generator scripts.

View File

@ -1,36 +0,0 @@
.. _PY_Table-Of-Content-Bindings:
OpenCV-Python Bindings
--------------------------------
Here, you will learn how OpenCV-Python bindings are generated.
* :ref:`Bindings_Basics`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|bind1| Learn how OpenCV-Python bindings are generated.
=========== ======================================================
.. |bind1| image:: images/nlm_icon.jpg
:height: 90pt
:width: 90pt
.. raw:: latex
\pagebreak
.. We use a custom table of content format and as the table of content only informs Sphinx about the hierarchy of the files, no need to show it.
.. toctree::
:hidden:
../py_bindings_basics/py_bindings_basics

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

@ -1,213 +0,0 @@
.. _calibration:
Camera Calibration
********************
Goal
=======
In this section,
* We will learn about distortions in camera, intrinsic and extrinsic parameters of camera etc.
* We will learn to find these parameters, undistort images etc.
Basics
========
Today's cheap pinhole cameras introduces a lot of distortion to images. Two major distortions are radial distortion and tangential distortion.
Due to radial distortion, straight lines will appear curved. Its effect is more as we move away from the center of image. For example, one image is shown below, where two edges of a chess board are marked with red lines. But you can see that border is not a straight line and doesn't match with the red line. All the expected straight lines are bulged out. Visit `Distortion (optics) <http://en.wikipedia.org/wiki/Distortion_%28optics%29>`_ for more details.
.. image:: images/calib_radial.jpg
:alt: Radial Distortion
:align: center
This distortion is solved as follows:
.. math::
x_{corrected} = x( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6) \\
y_{corrected} = y( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6)
Similarly, another distortion is the tangential distortion which occurs because image taking lense is not aligned perfectly parallel to the imaging plane. So some areas in image may look nearer than expected. It is solved as below:
.. math::
x_{corrected} = x + [ 2p_1xy + p_2(r^2+2x^2)] \\
y_{corrected} = y + [ p_1(r^2+ 2y^2)+ 2p_2xy]
In short, we need to find five parameters, known as distortion coefficients given by:
.. math::
Distortion \; coefficients=(k_1 \hspace{10pt} k_2 \hspace{10pt} p_1 \hspace{10pt} p_2 \hspace{10pt} k_3)
In addition to this, we need to find a few more information, like intrinsic and extrinsic parameters of a camera. Intrinsic parameters are specific to a camera. It includes information like focal length (:math:`f_x,f_y`), optical centers (:math:`c_x, c_y`) etc. It is also called camera matrix. It depends on the camera only, so once calculated, it can be stored for future purposes. It is expressed as a 3x3 matrix:
.. math::
camera \; matrix = \left [ \begin{matrix} f_x & 0 & c_x \\ 0 & f_y & c_y \\ 0 & 0 & 1 \end{matrix} \right ]
Extrinsic parameters corresponds to rotation and translation vectors which translates a coordinates of a 3D point to a coordinate system.
For stereo applications, these distortions need to be corrected first. To find all these parameters, what we have to do is to provide some sample images of a well defined pattern (eg, chess board). We find some specific points in it ( square corners in chess board). We know its coordinates in real world space and we know its coordinates in image. With these data, some mathematical problem is solved in background to get the distortion coefficients. That is the summary of the whole story. For better results, we need atleast 10 test patterns.
Code
========
As mentioned above, we need atleast 10 test patterns for camera calibration. OpenCV comes with some images of chess board (see ``samples/cpp/left01.jpg -- left14.jpg``), so we will utilize it. For sake of understanding, consider just one image of a chess board. Important input datas needed for camera calibration is a set of 3D real world points and its corresponding 2D image points. 2D image points are OK which we can easily find from the image. (These image points are locations where two black squares touch each other in chess boards)
What about the 3D points from real world space? Those images are taken from a static camera and chess boards are placed at different locations and orientations. So we need to know :math:`(X,Y,Z)` values. But for simplicity, we can say chess board was kept stationary at XY plane, (so Z=0 always) and camera was moved accordingly. This consideration helps us to find only X,Y values. Now for X,Y values, we can simply pass the points as (0,0), (1,0), (2,0), ... which denotes the location of points. In this case, the results we get will be in the scale of size of chess board square. But if we know the square size, (say 30 mm), and we can pass the values as (0,0),(30,0),(60,0),..., we get the results in mm. (In this case, we don't know square size since we didn't take those images, so we pass in terms of square size).
3D points are called **object points** and 2D image points are called **image points.**
Setup
---------
So to find pattern in chess board, we use the function, **cv2.findChessboardCorners()**. We also need to pass what kind of pattern we are looking, like 8x8 grid, 5x5 grid etc. In this example, we use 7x6 grid. (Normally a chess board has 8x8 squares and 7x7 internal corners). It returns the corner points and retval which will be True if pattern is obtained. These corners will be placed in an order (from left-to-right, top-to-bottom)
.. seealso:: This function may not be able to find the required pattern in all the images. So one good option is to write the code such that, it starts the camera and check each frame for required pattern. Once pattern is obtained, find the corners and store it in a list. Also provides some interval before reading next frame so that we can adjust our chess board in different direction. Continue this process until required number of good patterns are obtained. Even in the example provided here, we are not sure out of 14 images given, how many are good. So we read all the images and take the good ones.
.. seealso:: Instead of chess board, we can use some circular grid, but then use the function **cv2.findCirclesGrid()** to find the pattern. It is said that less number of images are enough when using circular grid.
Once we find the corners, we can increase their accuracy using **cv2.cornerSubPix()**. We can also draw the pattern using **cv2.drawChessboardCorners()**. All these steps are included in below code:
::
import numpy as np
import cv2
import glob
# termination criteria
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
objp = np.zeros((6*7,3), np.float32)
objp[:,:2] = np.mgrid[0:7,0:6].T.reshape(-1,2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
images = glob.glob('*.jpg')
for fname in images:
img = cv2.imread(fname)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = cv2.findChessboardCorners(gray, (7,6),None)
# If found, add object points, image points (after refining them)
if ret == True:
objpoints.append(objp)
cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
imgpoints.append(corners)
# Draw and display the corners
cv2.drawChessboardCorners(img, (7,6), corners2,ret)
cv2.imshow('img',img)
cv2.waitKey(500)
cv2.destroyAllWindows()
One image with pattern drawn on it is shown below:
.. image:: images/calib_pattern.jpg
:alt: Calibration Pattern
:align: center
Calibration
------------
So now we have our object points and image points we are ready to go for calibration. For that we use the function, **cv2.calibrateCamera()**. It returns the camera matrix, distortion coefficients, rotation and translation vectors etc.
::
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1],None,None)
Undistortion
---------------
We have got what we were trying. Now we can take an image and undistort it. OpenCV comes with two methods, we will see both. But before that, we can refine the camera matrix based on a free scaling parameter using **cv2.getOptimalNewCameraMatrix()**. If the scaling parameter ``alpha=0``, it returns undistorted image with minimum unwanted pixels. So it may even remove some pixels at image corners. If ``alpha=1``, all pixels are retained with some extra black images. It also returns an image ROI which can be used to crop the result.
So we take a new image (``left12.jpg`` in this case. That is the first image in this chapter)
::
img = cv2.imread('left12.jpg')
h, w = img.shape[:2]
newcameramtx, roi=cv2.getOptimalNewCameraMatrix(mtx,dist,(w,h),1,(w,h))
1. Using **cv2.undistort()**
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This is the shortest path. Just call the function and use ROI obtained above to crop the result.
::
# undistort
dst = cv2.undistort(img, mtx, dist, None, newcameramtx)
# crop the image
x,y,w,h = roi
dst = dst[y:y+h, x:x+w]
cv2.imwrite('calibresult.png',dst)
2. Using **remapping**
^^^^^^^^^^^^^^^^^^^^^^^^^^^
This is curved path. First find a mapping function from distorted image to undistorted image. Then use the remap function.
::
# undistort
mapx,mapy = cv2.initUndistortRectifyMap(mtx,dist,None,newcameramtx,(w,h),5)
dst = cv2.remap(img,mapx,mapy,cv2.INTER_LINEAR)
# crop the image
x,y,w,h = roi
dst = dst[y:y+h, x:x+w]
cv2.imwrite('calibresult.png',dst)
Both the methods give the same result. See the result below:
.. image:: images/calib_result.jpg
:alt: Calibration Result
:align: center
You can see in the result that all the edges are straight.
Now you can store the camera matrix and distortion coefficients using write functions in Numpy (np.savez, np.savetxt etc) for future uses.
Re-projection Error
=======================
Re-projection error gives a good estimation of just how exact is the found parameters. This should be as close to zero as possible. Given the intrinsic, distortion, rotation and translation matrices, we first transform the object point to image point using **cv2.projectPoints()**. Then we calculate the absolute norm between what we got with our transformation and the corner finding algorithm. To find the average error we calculate the arithmetical mean of the errors calculate for all the calibration images.
::
mean_error = 0
for i in xrange(len(objpoints)):
imgpoints2, _ = cv2.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
error = cv2.norm(imgpoints[i],imgpoints2, cv2.NORM_L2)/len(imgpoints2)
tot_error += error
print "total error: ", mean_error/len(objpoints)
Additional Resources
======================
Exercises
============
#. Try camera calibration with circular grid.

View File

@ -1,67 +0,0 @@
.. _py_depthmap:
Depth Map from Stereo Images
******************************
Goal
=======
In this session,
* We will learn to create depth map from stereo images.
Basics
===========
In last session, we saw basic concepts like epipolar constraints and other related terms. We also saw that if we have two images of same scene, we can get depth information from that in an intuitive way. Below is an image and some simple mathematical formulas which proves that intuition. (Image Courtesy :
.. image:: images/stereo_depth.jpg
:alt: Calculating depth
:align: center
The above diagram contains equivalent triangles. Writing their equivalent equations will yield us following result:
.. math::
disparity = x - x' = \frac{Bf}{Z}
:math:`x` and :math:`x'` are the distance between points in image plane corresponding to the scene point 3D and their camera center. :math:`B` is the distance between two cameras (which we know) and :math:`f` is the focal length of camera (already known). So in short, above equation says that the depth of a point in a scene is inversely proportional to the difference in distance of corresponding image points and their camera centers. So with this information, we can derive the depth of all pixels in an image.
So it finds corresponding matches between two images. We have already seen how epiline constraint make this operation faster and accurate. Once it finds matches, it finds the disparity. Let's see how we can do it with OpenCV.
Code
========
Below code snippet shows a simple procedure to create disparity map.
::
import numpy as np
import cv2
from matplotlib import pyplot as plt
imgL = cv2.imread('tsukuba_l.png',0)
imgR = cv2.imread('tsukuba_r.png',0)
stereo = cv2.createStereoBM(numDisparities=16, blockSize=15)
disparity = stereo.compute(imgL,imgR)
plt.imshow(disparity,'gray')
plt.show()
Below image contains the original image (left) and its disparity map (right). As you can see, result is contaminated with high degree of noise. By adjusting the values of numDisparities and blockSize, you can get a better result.
.. image:: images/disparity_map.jpg
:alt: Disparity Map
:align: center
.. note:: More details to be added
Additional Resources
=============================
Exercises
============
1. OpenCV samples contain an example of generating disparity map and its 3D reconstruction. Check ``stereo_match.py`` in OpenCV-Python samples.

View File

@ -1,158 +0,0 @@
.. _epipolar_geometry:
Epipolar Geometry
*********************
Goal
========
In this section,
* We will learn about the basics of multiview geometry
* We will see what is epipole, epipolar lines, epipolar constraint etc.
Basic Concepts
=================
When we take an image using pin-hole camera, we loose an important information, ie depth of the image. Or how far is each point in the image from the camera because it is a 3D-to-2D conversion. So it is an important question whether we can find the depth information using these cameras. And the answer is to use more than one camera. Our eyes works in similar way where we use two cameras (two eyes) which is called stereo vision. So let's see what OpenCV provides in this field.
(*Learning OpenCV* by Gary Bradsky has a lot of information in this field.)
Before going to depth images, let's first understand some basic concepts in multiview geometry. In this section we will deal with epipolar geometry. See the image below which shows a basic setup with two cameras taking the image of same scene.
.. image:: images/epipolar.jpg
:alt: Epipolar geometry
:align: center
If we are using only the left camera, we can't find the 3D point corresponding to the point :math:`x` in image because every point on the line :math:`OX` projects to the same point on the image plane. But consider the right image also. Now different points on the line :math:`OX` projects to different points (:math:`x'`) in right plane. So with these two images, we can triangulate the correct 3D point. This is the whole idea.
The projection of the different points on :math:`OX` form a line on right plane (line :math:`l'`). We call it **epiline** corresponding to the point :math:`x`. It means, to find the point :math:`x` on the right image, search along this epiline. It should be somewhere on this line (Think of it this way, to find the matching point in other image, you need not search the whole image, just search along the epiline. So it provides better performance and accuracy). This is called **Epipolar Constraint**. Similarly all points will have its corresponding epilines in the other image. The plane :math:`XOO'` is called **Epipolar Plane**.
:math:`O` and :math:`O'` are the camera centers. From the setup given above, you can see that projection of right camera :math:`O'` is seen on the left image at the point, :math:`e`. It is called the **epipole**. Epipole is the point of intersection of line through camera centers and the image planes. Similarly :math:`e'` is the epipole of the left camera. In some cases, you won't be able to locate the epipole in the image, they may be outside the image (which means, one camera doesn't see the other).
All the epilines pass through its epipole. So to find the location of epipole, we can find many epilines and find their intersection point.
So in this session, we focus on finding epipolar lines and epipoles. But to find them, we need two more ingredients, **Fundamental Matrix (F)** and **Essential Matrix (E)**. Essential Matrix contains the information about translation and rotation, which describe the location of the second camera relative to the first in global coordinates. See the image below (Image courtesy: Learning OpenCV by Gary Bradsky):
.. image:: images/essential_matrix.jpg
:alt: Essential Matrix
:align: center
But we prefer measurements to be done in pixel coordinates, right? Fundamental Matrix contains the same information as Essential Matrix in addition to the information about the intrinsics of both cameras so that we can relate the two cameras in pixel coordinates. (If we are using rectified images and normalize the point by dividing by the focal lengths, :math:`F=E`). In simple words, Fundamental Matrix F, maps a point in one image to a line (epiline) in the other image. This is calculated from matching points from both the images. A minimum of 8 such points are required to find the fundamental matrix (while using 8-point algorithm). More points are preferred and use RANSAC to get a more robust result.
Code
=========
So first we need to find as many possible matches between two images to find the fundamental matrix. For this, we use SIFT descriptors with FLANN based matcher and ratio test.
::
import cv2
import numpy as np
from matplotlib import pyplot as plt
img1 = cv2.imread('myleft.jpg',0) #queryimage # left image
img2 = cv2.imread('myright.jpg',0) #trainimage # right image
sift = cv2.SIFT()
# find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img1,None)
kp2, des2 = sift.detectAndCompute(img2,None)
# FLANN parameters
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1,des2,k=2)
good = []
pts1 = []
pts2 = []
# ratio test as per Lowe's paper
for i,(m,n) in enumerate(matches):
if m.distance < 0.8*n.distance:
good.append(m)
pts2.append(kp2[m.trainIdx].pt)
pts1.append(kp1[m.queryIdx].pt)
Now we have the list of best matches from both the images. Let's find the Fundamental Matrix.
::
pts1 = np.int32(pts1)
pts2 = np.int32(pts2)
F, mask = cv2.findFundamentalMat(pts1,pts2,cv2.FM_LMEDS)
# We select only inlier points
pts1 = pts1[mask.ravel()==1]
pts2 = pts2[mask.ravel()==1]
Next we find the epilines. Epilines corresponding to the points in first image is drawn on second image. So mentioning of correct images are important here. We get an array of lines. So we define a new function to draw these lines on the images.
::
def drawlines(img1,img2,lines,pts1,pts2):
''' img1 - image on which we draw the epilines for the points in img2
lines - corresponding epilines '''
r,c = img1.shape
img1 = cv2.cvtColor(img1,cv2.COLOR_GRAY2BGR)
img2 = cv2.cvtColor(img2,cv2.COLOR_GRAY2BGR)
for r,pt1,pt2 in zip(lines,pts1,pts2):
color = tuple(np.random.randint(0,255,3).tolist())
x0,y0 = map(int, [0, -r[2]/r[1] ])
x1,y1 = map(int, [c, -(r[2]+r[0]*c)/r[1] ])
img1 = cv2.line(img1, (x0,y0), (x1,y1), color,1)
img1 = cv2.circle(img1,tuple(pt1),5,color,-1)
img2 = cv2.circle(img2,tuple(pt2),5,color,-1)
return img1,img2
Now we find the epilines in both the images and draw them.
::
# Find epilines corresponding to points in right image (second image) and
# drawing its lines on left image
lines1 = cv2.computeCorrespondEpilines(pts2.reshape(-1,1,2), 2,F)
lines1 = lines1.reshape(-1,3)
img5,img6 = drawlines(img1,img2,lines1,pts1,pts2)
# Find epilines corresponding to points in left image (first image) and
# drawing its lines on right image
lines2 = cv2.computeCorrespondEpilines(pts1.reshape(-1,1,2), 1,F)
lines2 = lines2.reshape(-1,3)
img3,img4 = drawlines(img2,img1,lines2,pts2,pts1)
plt.subplot(121),plt.imshow(img5)
plt.subplot(122),plt.imshow(img3)
plt.show()
Below is the result we get:
.. image:: images/epiresult.jpg
:alt: Epilines
:align: center
You can see in the left image that all epilines are converging at a point outside the image at right side. That meeting point is the epipole.
For better results, images with good resolution and many non-planar points should be used.
Additional Resources
==========================
Exercises
=============
#. One important topic is the forward movement of camera. Then epipoles will be seen at the same locations in both with epilines emerging from a fixed point. `See this discussion <http://answers.opencv.org/question/17912/location-of-epipole/>`_.
#. Fundamental Matrix estimation is sensitive to quality of matches, outliers etc. It becomes worse when all selected matches lie on the same plane. `Check this discussion <http://answers.opencv.org/question/18125/epilines-not-correct/>`_.

View File

@ -1,132 +0,0 @@
.. _pose_estimation:
Pose Estimation
*********************
Goal
==========
In this section,
* We will learn to exploit calib3d module to create some 3D effects in images.
Basics
========
This is going to be a small section. During the last session on camera calibration, you have found the camera matrix, distortion coefficients etc. Given a pattern image, we can utilize the above information to calculate its pose, or how the object is situated in space, like how it is rotated, how it is displaced etc. For a planar object, we can assume Z=0, such that, the problem now becomes how camera is placed in space to see our pattern image. So, if we know how the object lies in the space, we can draw some 2D diagrams in it to simulate the 3D effect. Let's see how to do it.
Our problem is, we want to draw our 3D coordinate axis (X, Y, Z axes) on our chessboard's first corner. X axis in blue color, Y axis in green color and Z axis in red color. So in-effect, Z axis should feel like it is perpendicular to our chessboard plane.
First, let's load the camera matrix and distortion coefficients from the previous calibration result.
::
import cv2
import numpy as np
import glob
# Load previously saved data
with np.load('B.npz') as X:
mtx, dist, _, _ = [X[i] for i in ('mtx','dist','rvecs','tvecs')]
Now let's create a function, ``draw`` which takes the corners in the chessboard (obtained using **cv2.findChessboardCorners()**) and **axis points** to draw a 3D axis.
::
def draw(img, corners, imgpts):
corner = tuple(corners[0].ravel())
img = cv2.line(img, corner, tuple(imgpts[0].ravel()), (255,0,0), 5)
img = cv2.line(img, corner, tuple(imgpts[1].ravel()), (0,255,0), 5)
img = cv2.line(img, corner, tuple(imgpts[2].ravel()), (0,0,255), 5)
return img
Then as in previous case, we create termination criteria, object points (3D points of corners in chessboard) and axis points. Axis points are points in 3D space for drawing the axis. We draw axis of length 3 (units will be in terms of chess square size since we calibrated based on that size). So our X axis is drawn from (0,0,0) to (3,0,0), so for Y axis. For Z axis, it is drawn from (0,0,0) to (0,0,-3). Negative denotes it is drawn towards the camera.
::
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
objp = np.zeros((6*7,3), np.float32)
objp[:,:2] = np.mgrid[0:7,0:6].T.reshape(-1,2)
axis = np.float32([[3,0,0], [0,3,0], [0,0,-3]]).reshape(-1,3)
Now, as usual, we load each image. Search for 7x6 grid. If found, we refine it with subcorner pixels. Then to calculate the rotation and translation, we use the function, **cv2.solvePnPRansac()**. Once we those transformation matrices, we use them to project our **axis points** to the image plane. In simple words, we find the points on image plane corresponding to each of (3,0,0),(0,3,0),(0,0,3) in 3D space. Once we get them, we draw lines from the first corner to each of these points using our ``draw()`` function. Done !!!
::
for fname in glob.glob('left*.jpg'):
img = cv2.imread(fname)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret, corners = cv2.findChessboardCorners(gray, (7,6),None)
if ret == True:
corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
# Find the rotation and translation vectors.
rvecs, tvecs, inliers = cv2.solvePnPRansac(objp, corners2, mtx, dist)
# project 3D points to image plane
imgpts, jac = cv2.projectPoints(axis, rvecs, tvecs, mtx, dist)
img = draw(img,corners2,imgpts)
cv2.imshow('img',img)
k = cv2.waitKey(0) & 0xff
if k == 's':
cv2.imwrite(fname[:6]+'.png', img)
cv2.destroyAllWindows()
See some results below. Notice that each axis is 3 squares long.:
.. image:: images/pose_1.jpg
:alt: Pose Estimation
:align: center
Render a Cube
---------------
If you want to draw a cube, modify the draw() function and axis points as follows.
Modified draw() function:
::
def draw(img, corners, imgpts):
imgpts = np.int32(imgpts).reshape(-1,2)
# draw ground floor in green
img = cv2.drawContours(img, [imgpts[:4]],-1,(0,255,0),-3)
# draw pillars in blue color
for i,j in zip(range(4),range(4,8)):
img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]),(255),3)
# draw top layer in red color
img = cv2.drawContours(img, [imgpts[4:]],-1,(0,0,255),3)
return img
Modified axis points. They are the 8 corners of a cube in 3D space:
::
axis = np.float32([[0,0,0], [0,3,0], [3,3,0], [3,0,0],
[0,0,-3],[0,3,-3],[3,3,-3],[3,0,-3] ])
And look at the result below:
.. image:: images/pose_2.jpg
:alt: Pose Estimation
:align: center
If you are interested in graphics, augmented reality etc, you can use OpenGL to render more complicated figures.
Additional Resources
===========================
Exercises
===========

View File

@ -1,79 +0,0 @@
.. _PY_Table-Of-Content-Calib:
Camera Calibration and 3D Reconstruction
----------------------------------------------
* :ref:`calibration`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|calib_1| Let's find how good is our camera. Is there any distortion in images taken with it? If so how to correct it?
=========== ======================================================
.. |calib_1| image:: images/calibration_icon.jpg
:height: 90pt
:width: 90pt
* :ref:`pose_estimation`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|calib_2| This is a small section which will help you to create some cool 3D effects with calib module.
=========== ======================================================
.. |calib_2| image:: images/pose_icon.jpg
:height: 90pt
:width: 90pt
* :ref:`epipolar_geometry`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|calib_3| Let's understand epipolar geometry and epipolar constraint.
=========== ======================================================
.. |calib_3| image:: images/epipolar_icon.jpg
:height: 90pt
:width: 90pt
* :ref:`py_depthmap`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|calib_4| Extract depth information from 2D images.
=========== ======================================================
.. |calib_4| image:: images/depthmap_icon.jpg
:height: 90pt
:width: 90pt
.. raw:: latex
\pagebreak
.. We use a custom table of content format and as the table of content only informs Sphinx about the hierarchy of the files, no need to show it.
.. toctree::
:hidden:
../py_calibration/py_calibration
../py_pose/py_pose
../py_epipolar_geometry/py_epipolar_geometry
../py_depthmap/py_depthmap

View File

Before

Width:  |  Height:  |  Size: 2.0 KiB

After

Width:  |  Height:  |  Size: 2.0 KiB

View File

Before

Width:  |  Height:  |  Size: 3.1 KiB

After

Width:  |  Height:  |  Size: 3.1 KiB

View File

Before

Width:  |  Height:  |  Size: 4.1 KiB

After

Width:  |  Height:  |  Size: 4.1 KiB

View File

Before

Width:  |  Height:  |  Size: 2.9 KiB

After

Width:  |  Height:  |  Size: 2.9 KiB

View File

@ -1,181 +0,0 @@
.. _Basic_Ops:
Basic Operations on Images
******************************
Goal
=======
Learn to:
* Access pixel values and modify them
* Access image properties
* Setting Region of Image (ROI)
* Splitting and Merging images
Almost all the operations in this section is mainly related to Numpy rather than OpenCV. A good knowledge of Numpy is required to write better optimized code with OpenCV.
*( Examples will be shown in Python terminal since most of them are just single line codes )*
Accessing and Modifying pixel values
=======================================
Let's load a color image first:
::
>>> import cv2
>>> import numpy as np
>>> img = cv2.imread('messi5.jpg')
You can access a pixel value by its row and column coordinates. For BGR image, it returns an array of Blue, Green, Red values. For grayscale image, just corresponding intensity is returned.
::
>>> px = img[100,100]
>>> print px
[157 166 200]
# accessing only blue pixel
>>> blue = img[100,100,0]
>>> print blue
157
You can modify the pixel values the same way.
::
>>> img[100,100] = [255,255,255]
>>> print img[100,100]
[255 255 255]
.. warning:: Numpy is a optimized library for fast array calculations. So simply accessing each and every pixel values and modifying it will be very slow and it is discouraged.
.. note:: Above mentioned method is normally used for selecting a region of array, say first 5 rows and last 3 columns like that. For individual pixel access, Numpy array methods, ``array.item()`` and ``array.itemset()`` is considered to be better. But it always returns a scalar. So if you want to access all B,G,R values, you need to call ``array.item()`` separately for all.
Better pixel accessing and editing method :
.. code-block:: python
# accessing RED value
>>> img.item(10,10,2)
59
# modifying RED value
>>> img.itemset((10,10,2),100)
>>> img.item(10,10,2)
100
Accessing Image Properties
=============================
Image properties include number of rows, columns and channels, type of image data, number of pixels etc.
Shape of image is accessed by ``img.shape``. It returns a tuple of number of rows, columns and channels (if image is color):
::
>>> print img.shape
(342, 548, 3)
.. note:: If image is grayscale, tuple returned contains only number of rows and columns. So it is a good method to check if loaded image is grayscale or color image.
Total number of pixels is accessed by ``img.size``:
::
>>> print img.size
562248
Image datatype is obtained by ``img.dtype``:
::
>>> print img.dtype
uint8
.. note:: ``img.dtype`` is very important while debugging because a large number of errors in OpenCV-Python code is caused by invalid datatype.
Image ROI
===========
Sometimes, you will have to play with certain region of images. For eye detection in images, first face detection is done all over the image and when face is obtained, we select the face region alone and search for eyes inside it instead of searching whole image. It improves accuracy (because eyes are always on faces :D ) and performance (because we search for a small area)
ROI is again obtained using Numpy indexing. Here I am selecting the ball and copying it to another region in the image:
::
>>> ball = img[280:340, 330:390]
>>> img[273:333, 100:160] = ball
Check the results below:
.. image:: images/roi.jpg
:alt: Image ROI
:align: center
Splitting and Merging Image Channels
======================================
Sometimes you will need to work separately on B,G,R channels of image. Then you need to split the BGR images to single planes. Or another time, you may need to join these individual channels to BGR image. You can do it simply by:
::
>>> b,g,r = cv2.split(img)
>>> img = cv2.merge((b,g,r))
Or
>>> b = img[:,:,0]
Suppose, you want to make all the red pixels to zero, you need not split like this and put it equal to zero. You can simply use Numpy indexing, and that is more faster.
::
>>> img[:,:,2] = 0
.. warning:: ``cv2.split()`` is a costly operation (in terms of time). So do it only if you need it. Otherwise go for Numpy indexing.
Making Borders for Images (Padding)
====================================
If you want to create a border around the image, something like a photo frame, you can use **cv2.copyMakeBorder()** function. But it has more applications for convolution operation, zero padding etc. This function takes following arguments:
* **src** - input image
* **top**, **bottom**, **left**, **right** - border width in number of pixels in corresponding directions
* **borderType** - Flag defining what kind of border to be added. It can be following types:
* **cv2.BORDER_CONSTANT** - Adds a constant colored border. The value should be given as next argument.
* **cv2.BORDER_REFLECT** - Border will be mirror reflection of the border elements, like this : *fedcba|abcdefgh|hgfedcb*
* **cv2.BORDER_REFLECT_101** or **cv2.BORDER_DEFAULT** - Same as above, but with a slight change, like this : *gfedcb|abcdefgh|gfedcba*
* **cv2.BORDER_REPLICATE** - Last element is replicated throughout, like this: *aaaaaa|abcdefgh|hhhhhhh*
* **cv2.BORDER_WRAP** - Can't explain, it will look like this : *cdefgh|abcdefgh|abcdefg*
* **value** - Color of border if border type is ``cv2.BORDER_CONSTANT``
Below is a sample code demonstrating all these border types for better understanding:
::
import cv2
import numpy as np
from matplotlib import pyplot as plt
BLUE = [255,0,0]
img1 = cv2.imread('opencv_logo.png')
replicate = cv2.copyMakeBorder(img1,10,10,10,10,cv2.BORDER_REPLICATE)
reflect = cv2.copyMakeBorder(img1,10,10,10,10,cv2.BORDER_REFLECT)
reflect101 = cv2.copyMakeBorder(img1,10,10,10,10,cv2.BORDER_REFLECT_101)
wrap = cv2.copyMakeBorder(img1,10,10,10,10,cv2.BORDER_WRAP)
constant= cv2.copyMakeBorder(img1,10,10,10,10,cv2.BORDER_CONSTANT,value=BLUE)
plt.subplot(231),plt.imshow(img1,'gray'),plt.title('ORIGINAL')
plt.subplot(232),plt.imshow(replicate,'gray'),plt.title('REPLICATE')
plt.subplot(233),plt.imshow(reflect,'gray'),plt.title('REFLECT')
plt.subplot(234),plt.imshow(reflect101,'gray'),plt.title('REFLECT_101')
plt.subplot(235),plt.imshow(wrap,'gray'),plt.title('WRAP')
plt.subplot(236),plt.imshow(constant,'gray'),plt.title('CONSTANT')
plt.show()
See the result below. (Image is displayed with matplotlib. So RED and BLUE planes will be interchanged):
.. image:: images/border.jpg
:alt: Border Types
:align: center
Additional Resources
=========================
Exercises
===========

View File

@ -1,115 +0,0 @@
.. _Image_Arithmetics:
Arithmetic Operations on Images
*********************************
Goal
=====
* Learn several arithmetic operations on images like addition, subtraction, bitwise operations etc.
* You will learn these functions : **cv2.add()**, **cv2.addWeighted()** etc.
Image Addition
================
You can add two images by OpenCV function, ``cv2.add()`` or simply by numpy operation, ``res = img1 + img2``. Both images should be of same depth and type, or second image can just be a scalar value.
.. note:: There is a difference between OpenCV addition and Numpy addition. OpenCV addition is a saturated operation while Numpy addition is a modulo operation.
For example, consider below sample:
::
>>> x = np.uint8([250])
>>> y = np.uint8([10])
>>> print cv2.add(x,y) # 250+10 = 260 => 255
[[255]]
>>> print x+y # 250+10 = 260 % 256 = 4
[4]
It will be more visible when you add two images. OpenCV function will provide a better result. So always better stick to OpenCV functions.
Image Blending
=================
This is also image addition, but different weights are given to images so that it gives a feeling of blending or transparency. Images are added as per the equation below:
.. math::
g(x) = (1 - \alpha)f_{0}(x) + \alpha f_{1}(x)
By varying :math:`\alpha` from :math:`0 \rightarrow 1`, you can perform a cool transition between one image to another.
Here I took two images to blend them together. First image is given a weight of 0.7 and second image is given 0.3. ``cv2.addWeighted()`` applies following equation on the image.
.. math::
dst = \alpha \cdot img1 + \beta \cdot img2 + \gamma
Here :math:`\gamma` is taken as zero.
::
img1 = cv2.imread('ml.png')
img2 = cv2.imread('opencv_logo.jpg')
dst = cv2.addWeighted(img1,0.7,img2,0.3,0)
cv2.imshow('dst',dst)
cv2.waitKey(0)
cv2.destroyAllWindows()
Check the result below:
.. image:: images/blending.jpg
:alt: Image Blending
:align: center
Bitwise Operations
===================
This includes bitwise AND, OR, NOT and XOR operations. They will be highly useful while extracting any part of the image (as we will see in coming chapters), defining and working with non-rectangular ROI etc. Below we will see an example on how to change a particular region of an image.
I want to put OpenCV logo above an image. If I add two images, it will change color. If I blend it, I get an transparent effect. But I want it to be opaque. If it was a rectangular region, I could use ROI as we did in last chapter. But OpenCV logo is a not a rectangular shape. So you can do it with bitwise operations as below:
::
# Load two images
img1 = cv2.imread('messi5.jpg')
img2 = cv2.imread('opencv_logo.png')
# I want to put logo on top-left corner, So I create a ROI
rows,cols,channels = img2.shape
roi = img1[0:rows, 0:cols ]
# Now create a mask of logo and create its inverse mask also
img2gray = cv2.cvtColor(img2,cv2.COLOR_BGR2GRAY)
ret, mask = cv2.threshold(img2gray, 10, 255, cv2.THRESH_BINARY)
mask_inv = cv2.bitwise_not(mask)
# Now black-out the area of logo in ROI
img1_bg = cv2.bitwise_and(roi,roi,mask = mask_inv)
# Take only region of logo from logo image.
img2_fg = cv2.bitwise_and(img2,img2,mask = mask)
# Put logo in ROI and modify the main image
dst = cv2.add(img1_bg,img2_fg)
img1[0:rows, 0:cols ] = dst
cv2.imshow('res',img1)
cv2.waitKey(0)
cv2.destroyAllWindows()
See the result below. Left image shows the mask we created. Right image shows the final result. For more understanding, display all the intermediate images in the above code, especially ``img1_bg`` and ``img2_fg``.
.. image:: images/overlay.jpg
:alt: Otsu's Thresholding
:align: center
Additional Resources
======================
Exercises
============
#. Create a slide show of images in a folder with smooth transition between images using ``cv2.addWeighted`` function

View File

@ -1,4 +0,0 @@
.. _Mathematical_Tools:
Mathematical Tools in OpenCV
********************************

View File

@ -1,141 +0,0 @@
.. _Optimization_Techniques:
Performance Measurement and Improvement Techniques
****************************************************
Goal
======
In image processing, since you are dealing with large number of operations per second, it is mandatory that your code is not only providing the correct solution, but also in the fastest manner. So in this chapter, you will learn
* To measure the performance of your code.
* Some tips to improve the performance of your code.
* You will see these functions : **cv2.getTickCount**, **cv2.getTickFrequency** etc.
Apart from OpenCV, Python also provides a module **time** which is helpful in measuring the time of execution. Another module **profile** helps to get detailed report on the code, like how much time each function in the code took, how many times the function was called etc. But, if you are using IPython, all these features are integrated in an user-friendly manner. We will see some important ones, and for more details, check links in **Additional Resouces** section.
Measuring Performance with OpenCV
==================================
**cv2.getTickCount** function returns the number of clock-cycles after a reference event (like the moment machine was switched ON) to the moment this function is called. So if you call it before and after the function execution, you get number of clock-cycles used to execute a function.
**cv2.getTickFrequency** function returns the frequency of clock-cycles, or the number of clock-cycles per second. So to find the time of execution in seconds, you can do following:
::
e1 = cv2.getTickCount()
# your code execution
e2 = cv2.getTickCount()
time = (e2 - e1)/ cv2.getTickFrequency()
We will demonstrate with following example. Following example apply median filtering with a kernel of odd size ranging from 5 to 49. (Don't worry about what will the result look like, that is not our goal):
::
img1 = cv2.imread('messi5.jpg')
e1 = cv2.getTickCount()
for i in xrange(5,49,2):
img1 = cv2.medianBlur(img1,i)
e2 = cv2.getTickCount()
t = (e2 - e1)/cv2.getTickFrequency()
print t
# Result I got is 0.521107655 seconds
.. note:: You can do the same with ``time`` module. Instead of ``cv2.getTickCount``, use ``time.time()`` function. Then take the difference of two times.
Default Optimization in OpenCV
================================
Many of the OpenCV functions are optimized using SSE2, AVX etc. It contains unoptimized code also. So if our system support these features, we should exploit them (almost all modern day processors support them). It is enabled by default while compiling. So OpenCV runs the optimized code if it is enabled, else it runs the unoptimized code. You can use **cv2.useOptimized()** to check if it is enabled/disabled and **cv2.setUseOptimized()** to enable/disable it. Let's see a simple example.
::
# check if optimization is enabled
In [5]: cv2.useOptimized()
Out[5]: True
In [6]: %timeit res = cv2.medianBlur(img,49)
10 loops, best of 3: 34.9 ms per loop
# Disable it
In [7]: cv2.setUseOptimized(False)
In [8]: cv2.useOptimized()
Out[8]: False
In [9]: %timeit res = cv2.medianBlur(img,49)
10 loops, best of 3: 64.1 ms per loop
See, optimized median filtering is ~2x faster than unoptimized version. If you check its source, you can see median filtering is SIMD optimized. So you can use this to enable optimization at the top of your code (remember it is enabled by default).
Measuring Performance in IPython
============================================================
Sometimes you may need to compare the performance of two similar operations. IPython gives you a magic command ``%timeit`` to perform this. It runs the code several times to get more accurate results. Once again, they are suitable to measure single line codes.
For example, do you know which of the following addition operation is better, ``x = 5; y = x**2``, ``x = 5; y = x*x``, ``x = np.uint8([5]); y = x*x`` or ``y = np.square(x)`` ? We will find it with %timeit in IPython shell.
::
In [10]: x = 5
In [11]: %timeit y=x**2
10000000 loops, best of 3: 73 ns per loop
In [12]: %timeit y=x*x
10000000 loops, best of 3: 58.3 ns per loop
In [15]: z = np.uint8([5])
In [17]: %timeit y=z*z
1000000 loops, best of 3: 1.25 us per loop
In [19]: %timeit y=np.square(z)
1000000 loops, best of 3: 1.16 us per loop
You can see that, ``x = 5 ; y = x*x`` is fastest and it is around 20x faster compared to Numpy. If you consider the array creation also, it may reach upto 100x faster. Cool, right? *(Numpy devs are working on this issue)*
.. note:: Python scalar operations are faster than Numpy scalar operations. So for operations including one or two elements, Python scalar is better than Numpy arrays. Numpy takes advantage when size of array is a little bit bigger.
We will try one more example. This time, we will compare the performance of **cv2.countNonZero()** and **np.count_nonzero()** for same image.
::
In [35]: %timeit z = cv2.countNonZero(img)
100000 loops, best of 3: 15.8 us per loop
In [36]: %timeit z = np.count_nonzero(img)
1000 loops, best of 3: 370 us per loop
See, OpenCV function is nearly 25x faster than Numpy function.
.. note:: Normally, OpenCV functions are faster than Numpy functions. So for same operation, OpenCV functions are preferred. But, there can be exceptions, especially when Numpy works with views instead of copies.
More IPython magic commands
=============================
There are several other magic commands to measure the performance, profiling, line profiling, memory measurement etc. They all are well documented. So only links to those docs are provided here. Interested readers are recommended to try them out.
Performance Optimization Techniques
=====================================
There are several techniques and coding methods to exploit maximum performance of Python and Numpy. Only relevant ones are noted here and links are given to important sources. The main thing to be noted here is that, first try to implement the algorithm in a simple manner. Once it is working, profile it, find the bottlenecks and optimize them.
#. Avoid using loops in Python as far as possible, especially double/triple loops etc. They are inherently slow.
#. Vectorize the algorithm/code to the maximum possible extent because Numpy and OpenCV are optimized for vector operations.
#. Exploit the cache coherence.
#. Never make copies of array unless it is needed. Try to use views instead. Array copying is a costly operation.
Even after doing all these operations, if your code is still slow, or use of large loops are inevitable, use additional libraries like Cython to make it faster.
Additional Resources
======================
1. `Python Optimization Techniques <http://wiki.python.org/moin/PythonSpeed/PerformanceTips>`_
2. Scipy Lecture Notes - `Advanced Numpy <http://scipy-lectures.github.io/advanced/advanced_numpy/index.html#advanced-numpy>`_
3. `Timing and Profiling in IPython <http://pynash.org/2013/03/06/timing-and-profiling.html>`_
Exercises
============

View File

@ -1,75 +0,0 @@
.. _PY_Table-Of-Content-Core:
Core Operations
-----------------------------------------------------------
* :ref:`Basic_Ops`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|core_1| Learn to read and edit pixel values, working with image ROI and other basic operations.
=========== ======================================================
.. |core_1| image:: images/pixel_ops.jpg
:height: 90pt
:width: 90pt
* :ref:`Image_Arithmetics`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|core_2| Perform arithmetic operations on images
=========== ======================================================
.. |core_2| image:: images/image_arithmetic.jpg
:height: 90pt
:width: 90pt
* :ref:`Optimization_Techniques`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|core_4| Getting a solution is important. But getting it in the fastest way is more important. Learn to check the speed of your code, optimize the code etc.
=========== ======================================================
.. |core_4| image:: images/speed.jpg
:height: 90pt
:width: 90pt
* :ref:`Mathematical_Tools`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|core_5| Learn some of the mathematical tools provided by OpenCV like PCA, SVD etc.
=========== ======================================================
.. |core_5| image:: images/maths_tools.jpg
:height: 90pt
:width: 90pt
.. raw:: latex
\pagebreak
.. We use a custom table of content format and as the table of content only informs Sphinx about the hierarchy of the files, no need to show it.
.. toctree::
:hidden:
../py_basic_ops/py_basic_ops
../py_image_arithmetics/py_image_arithmetics
../py_optimization/py_optimization
../py_maths_tools/py_maths_tools

View File

Before

Width:  |  Height:  |  Size: 4.7 KiB

After

Width:  |  Height:  |  Size: 4.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.1 KiB

After

Width:  |  Height:  |  Size: 3.1 KiB

View File

Before

Width:  |  Height:  |  Size: 4.7 KiB

After

Width:  |  Height:  |  Size: 4.7 KiB

View File

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 2.8 KiB

View File

Before

Width:  |  Height:  |  Size: 5.4 KiB

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

Before

Width:  |  Height:  |  Size: 7.1 KiB

After

Width:  |  Height:  |  Size: 7.1 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

@ -1,63 +0,0 @@
.. _BRIEF:
BRIEF (Binary Robust Independent Elementary Features)
***********************************************************
Goal
=======
In this chapter
* We will see the basics of BRIEF algorithm
Theory
=============
We know SIFT uses 128-dim vector for descriptors. Since it is using floating point numbers, it takes basically 512 bytes. Similarly SURF also takes minimum of 256 bytes (for 64-dim). Creating such a vector for thousands of features takes a lot of memory which are not feasible for resouce-constraint applications especially for embedded systems. Larger the memory, longer the time it takes for matching.
But all these dimensions may not be needed for actual matching. We can compress it using several methods like PCA, LDA etc. Even other methods like hashing using LSH (Locality Sensitive Hashing) is used to convert these SIFT descriptors in floating point numbers to binary strings. These binary strings are used to match features using Hamming distance. This provides better speed-up because finding hamming distance is just applying XOR and bit count, which are very fast in modern CPUs with SSE instructions. But here, we need to find the descriptors first, then only we can apply hashing, which doesn't solve our initial problem on memory.
BRIEF comes into picture at this moment. It provides a shortcut to find the binary strings directly without finding descriptors. It takes smoothened image patch and selects a set of :math:`n_d` (x,y) location pairs in an unique way (explained in paper). Then some pixel intensity comparisons are done on these location pairs. For eg, let first location pairs be :math:`p` and :math:`q`. If :math:`I(p) < I(q)`, then its result is 1, else it is 0. This is applied for all the :math:`n_d` location pairs to get a :math:`n_d`-dimensional bitstring.
This :math:`n_d` can be 128, 256 or 512. OpenCV supports all of these, but by default, it would be 256 (OpenCV represents it in bytes. So the values will be 16, 32 and 64). So once you get this, you can use Hamming Distance to match these descriptors.
One important point is that BRIEF is a feature descriptor, it doesn't provide any method to find the features. So you will have to use any other feature detectors like SIFT, SURF etc. The paper recommends to use CenSurE which is a fast detector and BRIEF works even slightly better for CenSurE points than for SURF points.
In short, BRIEF is a faster method feature descriptor calculation and matching. It also provides high recognition rate unless there is large in-plane rotation.
BRIEF in OpenCV
=====================
Below code shows the computation of BRIEF descriptors with the help of CenSurE detector. (CenSurE detector is called STAR detector in OpenCV)
::
import numpy as np
import cv2
from matplotlib import pyplot as plt
img = cv2.imread('simple.jpg',0)
# Initiate STAR detector
star = cv2.FeatureDetector_create("STAR")
# Initiate BRIEF extractor
brief = cv2.DescriptorExtractor_create("BRIEF")
# find the keypoints with STAR
kp = star.detect(img,None)
# compute the descriptors with BRIEF
kp, des = brief.compute(img, kp)
print brief.getInt('bytes')
print des.shape
The function ``brief.getInt('bytes')`` gives the :math:`n_d` size used in bytes. By default it is 32. Next one is matching, which will be done in another chapter.
Additional Resources
==========================
#. Michael Calonder, Vincent Lepetit, Christoph Strecha, and Pascal Fua, "BRIEF: Binary Robust Independent Elementary Features", 11th European Conference on Computer Vision (ECCV), Heraklion, Crete. LNCS Springer, September 2010.
#. LSH (Locality Sensitive Hasing) at wikipedia.

View File

@ -1,136 +0,0 @@
.. _FAST:
FAST Algorithm for Corner Detection
*************************************
Goal
=======
In this chapter,
* We will understand the basics of FAST algorithm
* We will find corners using OpenCV functionalities for FAST algorithm.
Theory
=========
We saw several feature detectors and many of them are really good. But when looking from a real-time application point of view, they are not fast enough. One best example would be SLAM (Simultaneous Localization and Mapping) mobile robot which have limited computational resources.
As a solution to this, FAST (Features from Accelerated Segment Test) algorithm was proposed by Edward Rosten and Tom Drummond in their paper "Machine learning for high-speed corner detection" in 2006 (Later revised it in 2010). A basic summary of the algorithm is presented below. Refer original paper for more details (All the images are taken from original paper).
Feature Detection using FAST
------------------------------
1. Select a pixel :math:`p` in the image which is to be identified as an interest point or not. Let its intensity be :math:`I_p`.
2. Select appropriate threshold value :math:`t`.
3. Consider a circle of 16 pixels around the pixel under test. (See the image below)
.. image:: images/fast_speedtest.jpg
:alt: A corner in the image
:align: center
4. Now the pixel :math:`p` is a corner if there exists a set of :math:`n` contiguous pixels in the circle (of 16 pixels) which are all brighter than :math:`I_p + t`, or all darker than :math:`I_p t`. (Shown as white dash lines in the above image). :math:`n` was chosen to be 12.
5. A **high-speed test** was proposed to exclude a large number of non-corners. This test examines only the four pixels at 1, 9, 5 and 13 (First 1 and 9 are tested if they are too brighter or darker. If so, then checks 5 and 13). If :math:`p` is a corner, then at least three of these must all be brighter than :math:`I_p + t` or darker than :math:`I_p t`. If neither of these is the case, then :math:`p` cannot be a corner. The full segment test criterion can then be applied to the passed candidates by examining all pixels in the circle. This detector in itself exhibits high performance, but there are several weaknesses:
* It does not reject as many candidates for n < 12.
* The choice of pixels is not optimal because its efficiency depends on ordering of the questions and distribution of corner appearances.
* Results of high-speed tests are thrown away.
* Multiple features are detected adjacent to one another.
First 3 points are addressed with a machine learning approach. Last one is addressed using non-maximal suppression.
Machine Learning a Corner Detector
------------------------------------
1. Select a set of images for training (preferably from the target application domain)
2. Run FAST algorithm in every images to find feature points.
3. For every feature point, store the 16 pixels around it as a vector. Do it for all the images to get feature vector :math:`P`.
4. Each pixel (say :math:`x`) in these 16 pixels can have one of the following three states:
.. image:: images/fast_eqns.jpg
:alt: FAST equation
:align: center
5. Depending on these states, the feature vector :math:`P` is subdivided into 3 subsets, :math:`P_d`, :math:`P_s`, :math:`P_b`.
6. Define a new boolean variable, :math:`K_p`, which is true if :math:`p` is a corner and false otherwise.
7. Use the ID3 algorithm (decision tree classifier) to query each subset using the variable :math:`K_p` for the knowledge about the true class. It selects the :math:`x` which yields the most information about whether the candidate pixel is a corner, measured by the entropy of :math:`K_p`.
8. This is recursively applied to all the subsets until its entropy is zero.
9. The decision tree so created is used for fast detection in other images.
Non-maximal Suppression
---------------------------
Detecting multiple interest points in adjacent locations is another problem. It is solved by using Non-maximum Suppression.
1. Compute a score function, :math:`V` for all the detected feature points. :math:`V` is the sum of absolute difference between :math:`p` and 16 surrounding pixels values.
2. Consider two adjacent keypoints and compute their :math:`V` values.
3. Discard the one with lower :math:`V` value.
Summary
-----------
It is several times faster than other existing corner detectors.
But it is not robust to high levels of noise. It is dependant on a threshold.
FAST Feature Detector in OpenCV
==================================
It is called as any other feature detector in OpenCV. If you want, you can specify the threshold, whether non-maximum suppression to be applied or not, the neighborhood to be used etc.
For the neighborhood, three flags are defined, ``cv2.FAST_FEATURE_DETECTOR_TYPE_5_8``, ``cv2.FAST_FEATURE_DETECTOR_TYPE_7_12`` and ``cv2.FAST_FEATURE_DETECTOR_TYPE_9_16``. Below is a simple code on how to detect and draw the FAST feature points.
::
import numpy as np
import cv2
from matplotlib import pyplot as plt
img = cv2.imread('simple.jpg',0)
# Initiate FAST object with default values
fast = cv2.FastFeatureDetector()
# find and draw the keypoints
kp = fast.detect(img,None)
img2 = cv2.drawKeypoints(img, kp, color=(255,0,0))
# Print all default params
print "Threshold: ", fast.getInt('threshold')
print "nonmaxSuppression: ", fast.getBool('nonmaxSuppression')
print "neighborhood: ", fast.getInt('type')
print "Total Keypoints with nonmaxSuppression: ", len(kp)
cv2.imwrite('fast_true.png',img2)
# Disable nonmaxSuppression
fast.setBool('nonmaxSuppression',0)
kp = fast.detect(img,None)
print "Total Keypoints without nonmaxSuppression: ", len(kp)
img3 = cv2.drawKeypoints(img, kp, color=(255,0,0))
cv2.imwrite('fast_false.png',img3)
See the results. First image shows FAST with nonmaxSuppression and second one without nonmaxSuppression:
.. image:: images/fast_kp.jpg
:alt: FAST Keypoints
:align: center
Additional Resources
=========================
#. Edward Rosten and Tom Drummond, “Machine learning for high speed corner detection” in 9th European Conference on Computer Vision, vol. 1, 2006, pp. 430443.
#. Edward Rosten, Reid Porter, and Tom Drummond, "Faster and better: a machine learning approach to corner detection" in IEEE Trans. Pattern Analysis and Machine Intelligence, 2010, vol 32, pp. 105-119.
Exercises
============

View File

@ -1,110 +0,0 @@
.. _PY_feature_homography:
Feature Matching + Homography to find Objects
***********************************************
Goal
======
In this chapter,
* We will mix up the feature matching and findHomography from calib3d module to find known objects in a complex image.
Basics
=========
So what we did in last session? We used a queryImage, found some feature points in it, we took another trainImage, found the features in that image too and we found the best matches among them. In short, we found locations of some parts of an object in another cluttered image. This information is sufficient to find the object exactly on the trainImage.
For that, we can use a function from calib3d module, ie **cv2.findHomography()**. If we pass the set of points from both the images, it will find the perpective transformation of that object. Then we can use **cv2.perspectiveTransform()** to find the object. It needs atleast four correct points to find the transformation.
We have seen that there can be some possible errors while matching which may affect the result. To solve this problem, algorithm uses RANSAC or LEAST_MEDIAN (which can be decided by the flags). So good matches which provide correct estimation are called inliers and remaining are called outliers. **cv2.findHomography()** returns a mask which specifies the inlier and outlier points.
So let's do it !!!
Code
=========
First, as usual, let's find SIFT features in images and apply the ratio test to find the best matches.
::
import numpy as np
import cv2
from matplotlib import pyplot as plt
MIN_MATCH_COUNT = 10
img1 = cv2.imread('box.png',0) # queryImage
img2 = cv2.imread('box_in_scene.png',0) # trainImage
# Initiate SIFT detector
sift = cv2.SIFT()
# find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img1,None)
kp2, des2 = sift.detectAndCompute(img2,None)
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks = 50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1,des2,k=2)
# store all the good matches as per Lowe's ratio test.
good = []
for m,n in matches:
if m.distance < 0.7*n.distance:
good.append(m)
Now we set a condition that atleast 10 matches (defined by MIN_MATCH_COUNT) are to be there to find the object. Otherwise simply show a message saying not enough matches are present.
If enough matches are found, we extract the locations of matched keypoints in both the images. They are passed to find the perpective transformation. Once we get this 3x3 transformation matrix, we use it to transform the corners of queryImage to corresponding points in trainImage. Then we draw it.
::
if len(good)>MIN_MATCH_COUNT:
src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
matchesMask = mask.ravel().tolist()
h,w = img1.shape
pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
dst = cv2.perspectiveTransform(pts,M)
img2 = cv2.polylines(img2,[np.int32(dst)],True,255,3, cv2.LINE_AA)
else:
print "Not enough matches are found - %d/%d" % (len(good),MIN_MATCH_COUNT)
matchesMask = None
Finally we draw our inliers (if successfully found the object) or matching keypoints (if failed).
::
draw_params = dict(matchColor = (0,255,0), # draw matches in green color
singlePointColor = None,
matchesMask = matchesMask, # draw only inliers
flags = 2)
img3 = cv2.drawMatches(img1,kp1,img2,kp2,good,None,**draw_params)
plt.imshow(img3, 'gray'),plt.show()
See the result below. Object is marked in white color in cluttered image:
.. image:: images/homography_findobj.jpg
:alt: Finding object with feature homography
:align: center
Additional Resources
============================
Exercises
==================

View File

@ -1,154 +0,0 @@
.. _Harris_Corners:
Harris Corner Detection
****************************
Goal
=======
In this chapter,
* We will understand the concepts behind Harris Corner Detection.
* We will see the functions: **cv2.cornerHarris()**, **cv2.cornerSubPix()**
Theory
==========
In last chapter, we saw that corners are regions in the image with large variation in intensity in all the directions. One early attempt to find these corners was done by **Chris Harris & Mike Stephens** in their paper **A Combined Corner and Edge Detector** in 1988, so now it is called Harris Corner Detector. He took this simple idea to a mathematical form. It basically finds the difference in intensity for a displacement of :math:`(u,v)` in all directions. This is expressed as below:
.. math::
E(u,v) = \sum_{x,y} \underbrace{w(x,y)}_\text{window function} \, [\underbrace{I(x+u,y+v)}_\text{shifted intensity}-\underbrace{I(x,y)}_\text{intensity}]^2
Window function is either a rectangular window or gaussian window which gives weights to pixels underneath.
We have to maximize this function :math:`E(u,v)` for corner detection. That means, we have to maximize the second term. Applying Taylor Expansion to above equation and using some mathematical steps (please refer any standard text books you like for full derivation), we get the final equation as:
.. math::
E(u,v) \approx \begin{bmatrix} u & v \end{bmatrix} M \begin{bmatrix} u \\ v \end{bmatrix}
where
.. math::
M = \sum_{x,y} w(x,y) \begin{bmatrix}I_x I_x & I_x I_y \\
I_x I_y & I_y I_y \end{bmatrix}
Here, :math:`I_x` and :math:`I_y` are image derivatives in x and y directions respectively. (Can be easily found out using **cv2.Sobel()**).
Then comes the main part. After this, they created a score, basically an equation, which will determine if a window can contain a corner or not.
.. math::
R = det(M) - k(trace(M))^2
where
* :math:`det(M) = \lambda_1 \lambda_2`
* :math:`trace(M) = \lambda_1 + \lambda_2`
* :math:`\lambda_1` and :math:`\lambda_2` are the eigen values of M
So the values of these eigen values decide whether a region is corner, edge or flat.
* When :math:`|R|` is small, which happens when :math:`\lambda_1` and :math:`\lambda_2` are small, the region is flat.
* When :math:`R<0`, which happens when :math:`\lambda_1 >> \lambda_2` or vice versa, the region is edge.
* When :math:`R` is large, which happens when :math:`\lambda_1` and :math:`\lambda_2` are large and :math:`\lambda_1 \sim \lambda_2`, the region is a corner.
It can be represented in a nice picture as follows:
.. image:: images/harris_region.jpg
:alt: Classification of Image Points
:align: center
So the result of Harris Corner Detection is a grayscale image with these scores. Thresholding for a suitable give you the corners in the image. We will do it with a simple image.
Harris Corner Detector in OpenCV
====================================
OpenCV has the function **cv2.cornerHarris()** for this purpose. Its arguments are :
* **img** - Input image, it should be grayscale and float32 type.
* **blockSize** - It is the size of neighbourhood considered for corner detection
* **ksize** - Aperture parameter of Sobel derivative used.
* **k** - Harris detector free parameter in the equation.
See the example below:
::
import cv2
import numpy as np
filename = 'chessboard.jpg'
img = cv2.imread(filename)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
gray = np.float32(gray)
dst = cv2.cornerHarris(gray,2,3,0.04)
#result is dilated for marking the corners, not important
dst = cv2.dilate(dst,None)
# Threshold for an optimal value, it may vary depending on the image.
img[dst>0.01*dst.max()]=[0,0,255]
cv2.imshow('dst',img)
if cv2.waitKey(0) & 0xff == 27:
cv2.destroyAllWindows()
Below are the three results:
.. image:: images/harris_result.jpg
:alt: Harris Corner Detection
:align: center
Corner with SubPixel Accuracy
===============================
Sometimes, you may need to find the corners with maximum accuracy. OpenCV comes with a function **cv2.cornerSubPix()** which further refines the corners detected with sub-pixel accuracy. Below is an example. As usual, we need to find the harris corners first. Then we pass the centroids of these corners (There may be a bunch of pixels at a corner, we take their centroid) to refine them. Harris corners are marked in red pixels and refined corners are marked in green pixels. For this function, we have to define the criteria when to stop the iteration. We stop it after a specified number of iteration or a certain accuracy is achieved, whichever occurs first. We also need to define the size of neighbourhood it would search for corners.
::
import cv2
import numpy as np
filename = 'chessboard2.jpg'
img = cv2.imread(filename)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# find Harris corners
gray = np.float32(gray)
dst = cv2.cornerHarris(gray,2,3,0.04)
dst = cv2.dilate(dst,None)
ret, dst = cv2.threshold(dst,0.01*dst.max(),255,0)
dst = np.uint8(dst)
# find centroids
ret, labels, stats, centroids = cv2.connectedComponentsWithStats(dst)
# define the criteria to stop and refine the corners
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.001)
corners = cv2.cornerSubPix(gray,np.float32(centroids),(5,5),(-1,-1),criteria)
# Now draw them
res = np.hstack((centroids,corners))
res = np.int0(res)
img[res[:,1],res[:,0]]=[0,0,255]
img[res[:,3],res[:,2]] = [0,255,0]
cv2.imwrite('subpixel5.png',img)
Below is the result, where some important locations are shown in zoomed window to visualize:
.. image:: images/subpixel3.png
:alt: Corner Detection with SubPixel Accuracy
:align: center
Additional Resources
======================
Exercises
============

View File

@ -1,52 +0,0 @@
.. _Features_Meaning:
Understanding Features
************************
Goal
=====
In this chapter, we will just try to understand what are features, why are they important, why corners are important etc.
Explanation
==============
Most of you will have played the jigsaw puzzle games. You get a lot of small pieces of a images, where you need to assemble them correctly to form a big real image. **The question is, how you do it?** What about the projecting the same theory to a computer program so that computer can play jigsaw puzzles? If the computer can play jigsaw puzzles, why can't we give a lot of real-life images of a good natural scenery to computer and tell it to stitch all those images to a big single image? If the computer can stitch several natural images to one, what about giving a lot of pictures of a building or any structure and tell computer to create a 3D model out of it?
Well, the questions and imaginations continue. But it all depends on the most basic question: How do you play jigsaw puzzles? How do you arrange lots of scrambled image pieces into a big single image? How can you stitch a lot of natural images to a single image?
The answer is, we are looking for specific patterns or specific features which are unique, which can be easily tracked, which can be easily compared. If we go for a definition of such a feature, we may find it difficult to express it in words, but we know what are they. If some one asks you to point out one good feature which can be compared across several images, you can point out one. That is why, even small children can simply play these games. We search for these features in an image, we find them, we find the same features in other images, we align them. That's it. (In jigsaw puzzle, we look more into continuity of different images). All these abilities are present in us inherently.
So our one basic question expands to more in number, but becomes more specific. **What are these features?**. *(The answer should be understandable to a computer also.)*
Well, it is difficult to say how humans find these features. It is already programmed in our brain. But if we look deep into some pictures and search for different patterns, we will find something interesting. For example, take below image:
.. image:: images/feature_building.jpg
:alt: Understanding features
:align: center
Image is very simple. At the top of image, six small image patches are given. Question for you is to find the exact location of these patches in the original image. How many correct results you can find ?
A and B are flat surfaces, and they are spread in a lot of area. It is difficult to find the exact location of these patches.
C and D are much more simpler. They are edges of the building. You can find an approximate location, but exact location is still difficult. It is because, along the edge, it is same everywhere. Normal to the edge, it is different. So edge is a much better feature compared to flat area, but not good enough (It is good in jigsaw puzzle for comparing continuity of edges).
Finally, E and F are some corners of the building. And they can be easily found out. Because at corners, wherever you move this patch, it will look different. So they can be considered as a good feature. So now we move into more simpler (and widely used image) for better understanding.
.. image:: images/feature_simple.png
:alt: Features
:align: center
Just like above, blue patch is flat area and difficult to find and track. Wherever you move the blue patch, it looks the same. For black patch, it is an edge. If you move it in vertical direction (i.e. along the gradient) it changes. Put along the edge (parallel to edge), it looks the same. And for red patch, it is a corner. Wherever you move the patch, it looks different, means it is unique. So basically, corners are considered to be good features in an image. (Not just corners, in some cases blobs are considered good features).
So now we answered our question, "what are these features?". But next question arises. How do we find them? Or how do we find the corners?. That also we answered in an intuitive way, i.e., look for the regions in images which have maximum variation when moved (by a small amount) in all regions around it. This would be projected into computer language in coming chapters. So finding these image features is called **Feature Detection**.
So we found the features in image (Assume you did it). Once you found it, you should find the same in the other images. What we do? We take a region around the feature, we explain it in our own words, like "upper part is blue sky, lower part is building region, on that building there are some glasses etc" and you search for the same area in other images. Basically, you are describing the feature. Similar way, computer also should describe the region around the feature so that it can find it in other images. So called description is called **Feature Description**. Once you have the features and its description, you can find same features in all images and align them, stitch them or do whatever you want.
So in this module, we are looking to different algorithms in OpenCV to find features, describe them, match them etc.
Additional Resources
=======================
Exercises
===========

View File

@ -1,207 +0,0 @@
.. _Matcher:
Feature Matching
*********************************************
Goal
=====
In this chapter
* We will see how to match features in one image with others.
* We will use the Brute-Force matcher and FLANN Matcher in OpenCV
Basics of Brute-Force Matcher
===================================
Brute-Force matcher is simple. It takes the descriptor of one feature in first set and is matched with all other features in second set using some distance calculation. And the closest one is returned.
For BF matcher, first we have to create the BFMatcher object using **cv2.BFMatcher()**. It takes two optional params. First one is ``normType``. It specifies the distance measurement to be used. By default, it is ``cv2.NORM_L2``. It is good for SIFT, SURF etc (``cv2.NORM_L1`` is also there). For binary string based descriptors like ORB, BRIEF, BRISK etc, ``cv2.NORM_HAMMING`` should be used, which used Hamming distance as measurement. If ORB is using ``WTA_K == 3 or 4``, ``cv2.NORM_HAMMING2`` should be used.
Second param is boolean variable, ``crossCheck`` which is false by default. If it is true, Matcher returns only those matches with value (i,j) such that i-th descriptor in set A has j-th descriptor in set B as the best match and vice-versa. That is, the two features in both sets should match each other. It provides consistant result, and is a good alternative to ratio test proposed by D.Lowe in SIFT paper.
Once it is created, two important methods are *BFMatcher.match()* and *BFMatcher.knnMatch()*. First one returns the best match. Second method returns `k` best matches where k is specified by the user. It may be useful when we need to do additional work on that.
Like we used cv2.drawKeypoints() to draw keypoints, **cv2.drawMatches()** helps us to draw the matches. It stacks two images horizontally and draw lines from first image to second image showing best matches. There is also **cv2.drawMatchesKnn** which draws all the k best matches. If k=2, it will draw two match-lines for each keypoint. So we have to pass a mask if we want to selectively draw it.
Let's see one example for each of SURF and ORB (Both use different distance measurements).
Brute-Force Matching with ORB Descriptors
--------------------------------------------
Here, we will see a simple example on how to match features between two images. In this case, I have a queryImage and a trainImage. We will try to find the queryImage in trainImage using feature matching. ( The images are ``/samples/c/box.png`` and ``/samples/c/box_in_scene.png``)
We are using SIFT descriptors to match features. So let's start with loading images, finding descriptors etc.
::
import numpy as np
import cv2
from matplotlib import pyplot as plt
img1 = cv2.imread('box.png',0) # queryImage
img2 = cv2.imread('box_in_scene.png',0) # trainImage
# Initiate SIFT detector
orb = cv2.ORB()
# find the keypoints and descriptors with SIFT
kp1, des1 = orb.detectAndCompute(img1,None)
kp2, des2 = orb.detectAndCompute(img2,None)
Next we create a BFMatcher object with distance measurement ``cv2.NORM_HAMMING`` (since we are using ORB) and ``crossCheck`` is switched on for better results. Then we use Matcher.match() method to get the best matches in two images. We sort them in ascending order of their distances so that best matches (with low distance) come to front. Then we draw only first 10 matches (Just for sake of visibility. You can increase it as you like)
::
# create BFMatcher object
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
# Match descriptors.
matches = bf.match(des1,des2)
# Sort them in the order of their distance.
matches = sorted(matches, key = lambda x:x.distance)
# Draw first 10 matches.
img3 = cv2.drawMatches(img1,kp1,img2,kp2,matches[:10], flags=2)
plt.imshow(img3),plt.show()
Below is the result I got:
.. image:: images/matcher_result1.jpg
:alt: ORB Feature Matching with Brute-Force
:align: center
What is this Matcher Object?
-----------------------------------
The result of ``matches = bf.match(des1,des2)`` line is a list of DMatch objects. This DMatch object has following attributes:
* ``DMatch.distance`` - Distance between descriptors. The lower, the better it is.
* ``DMatch.trainIdx`` - Index of the descriptor in train descriptors
* ``DMatch.queryIdx`` - Index of the descriptor in query descriptors
* ``DMatch.imgIdx`` - Index of the train image.
Brute-Force Matching with SIFT Descriptors and Ratio Test
-------------------------------------------------------------
This time, we will use ``BFMatcher.knnMatch()`` to get k best matches. In this example, we will take k=2 so that we can apply ratio test explained by D.Lowe in his paper.
::
import numpy as np
import cv2
from matplotlib import pyplot as plt
img1 = cv2.imread('box.png',0) # queryImage
img2 = cv2.imread('box_in_scene.png',0) # trainImage
# Initiate SIFT detector
sift = cv2.SIFT()
# find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img1,None)
kp2, des2 = sift.detectAndCompute(img2,None)
# BFMatcher with default params
bf = cv2.BFMatcher()
matches = bf.knnMatch(des1,des2, k=2)
# Apply ratio test
good = []
for m,n in matches:
if m.distance < 0.75*n.distance:
good.append([m])
# cv2.drawMatchesKnn expects list of lists as matches.
img3 = cv2.drawMatchesKnn(img1,kp1,img2,kp2,good,flags=2)
plt.imshow(img3),plt.show()
See the result below:
.. image:: images/matcher_result2.jpg
:alt: SIFT Descriptor with ratio test
:align: center
FLANN based Matcher
==========================
FLANN stands for Fast Library for Approximate Nearest Neighbors. It contains a collection of algorithms optimized for fast nearest neighbor search in large datasets and for high dimensional features. It works more faster than BFMatcher for large datasets. We will see the second example with FLANN based matcher.
For FLANN based matcher, we need to pass two dictionaries which specifies the algorithm to be used, its related parameters etc. First one is IndexParams. For various algorithms, the information to be passed is explained in FLANN docs. As a summary, for algorithms like SIFT, SURF etc. you can pass following:
::
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
While using ORB, you can pass the following. The commented values are recommended as per the docs, but it didn't provide required results in some cases. Other values worked fine.:
::
index_params= dict(algorithm = FLANN_INDEX_LSH,
table_number = 6, # 12
key_size = 12, # 20
multi_probe_level = 1) #2
Second dictionary is the SearchParams. It specifies the number of times the trees in the index should be recursively traversed. Higher values gives better precision, but also takes more time. If you want to change the value, pass ``search_params = dict(checks=100)``.
With these informations, we are good to go.
::
import numpy as np
import cv2
from matplotlib import pyplot as plt
img1 = cv2.imread('box.png',0) # queryImage
img2 = cv2.imread('box_in_scene.png',0) # trainImage
# Initiate SIFT detector
sift = cv2.SIFT()
# find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img1,None)
kp2, des2 = sift.detectAndCompute(img2,None)
# FLANN parameters
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50) # or pass empty dictionary
flann = cv2.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1,des2,k=2)
# Need to draw only good matches, so create a mask
matchesMask = [[0,0] for i in xrange(len(matches))]
# ratio test as per Lowe's paper
for i,(m,n) in enumerate(matches):
if m.distance < 0.7*n.distance:
matchesMask[i]=[1,0]
draw_params = dict(matchColor = (0,255,0),
singlePointColor = (255,0,0),
matchesMask = matchesMask,
flags = 0)
img3 = cv2.drawMatchesKnn(img1,kp1,img2,kp2,matches,None,**draw_params)
plt.imshow(img3,),plt.show()
See the result below:
.. image:: images/matcher_flann.jpg
:alt: FLANN based matching
:align: center
Additional Resources
========================
Exercises
=================

View File

@ -1,75 +0,0 @@
.. _ORB:
ORB (Oriented FAST and Rotated BRIEF)
***************************************
Goal
======
In this chapter,
* We will see the basics of ORB
Theory
==========
As an OpenCV enthusiast, the most important thing about the ORB is that it came from "OpenCV Labs". This algorithm was brought up by Ethan Rublee, Vincent Rabaud, Kurt Konolige and Gary R. Bradski in their paper **ORB: An efficient alternative to SIFT or SURF** in 2011. As the title says, it is a good alternative to SIFT and SURF in computation cost, matching performance and mainly the patents. Yes, SIFT and SURF are patented and you are supposed to pay them for its use. But ORB is not !!!
ORB is basically a fusion of FAST keypoint detector and BRIEF descriptor with many modifications to enhance the performance. First it use FAST to find keypoints, then apply Harris corner measure to find top N points among them. It also use pyramid to produce multiscale-features. But one problem is that, FAST doesn't compute the orientation. So what about rotation invariance? Authors came up with following modification.
It computes the intensity weighted centroid of the patch with located corner at center. The direction of the vector from this corner point to centroid gives the orientation. To improve the rotation invariance, moments are computed with x and y which should be in a circular region of radius :math:`r`, where :math:`r` is the size of the patch.
Now for descriptors, ORB use BRIEF descriptors. But we have already seen that BRIEF performs poorly with rotation. So what ORB does is to "steer" BRIEF according to the orientation of keypoints. For any feature set of :math:`n` binary tests at location
:math:`(x_i, y_i)`, define a :math:`2 \times n` matrix, :math:`S` which contains the coordinates of these pixels. Then using the orientation of patch, :math:`\theta`, its rotation matrix is found and rotates the :math:`S` to get steered(rotated) version :math:`S_\theta`.
ORB discretize the angle to increments of :math:`2 \pi /30` (12 degrees), and construct a lookup table of precomputed BRIEF patterns. As long as the keypoint orientation :math:`\theta` is consistent across views, the correct set of points :math:`S_\theta` will be used to compute its descriptor.
BRIEF has an important property that each bit feature has a large variance and a mean near 0.5. But once it is oriented along keypoint direction, it loses this property and become more distributed. High variance makes a feature more discriminative, since it responds differentially to inputs. Another desirable property is to have the tests uncorrelated, since then each test will contribute to the result. To resolve all these, ORB runs a greedy search among all possible binary tests to find the ones that have both high variance and means close to 0.5, as well as being uncorrelated. The result is called **rBRIEF**.
For descriptor matching, multi-probe LSH which improves on the traditional LSH, is used. The paper says ORB is much faster than SURF and SIFT and ORB descriptor works better than SURF. ORB is a good choice in low-power devices for panorama stitching etc.
ORB in OpenCV
================
As usual, we have to create an ORB object with the function, **cv2.ORB()** or using feature2d common interface. It has a number of optional parameters. Most useful ones are ``nFeatures`` which denotes maximum number of features to be retained (by default 500), ``scoreType`` which denotes whether Harris score or FAST score to rank the features (by default, Harris score) etc. Another parameter, ``WTA_K`` decides number of points that produce each element of the oriented BRIEF descriptor. By default it is two, ie selects two points at a time. In that case, for matching, ``NORM_HAMMING`` distance is used. If WTA_K is 3 or 4, which takes 3 or 4 points to produce BRIEF descriptor, then matching distance is defined by ``NORM_HAMMING2``.
Below is a simple code which shows the use of ORB.
::
import numpy as np
import cv2
from matplotlib import pyplot as plt
img = cv2.imread('simple.jpg',0)
# Initiate STAR detector
orb = cv2.ORB()
# find the keypoints with ORB
kp = orb.detect(img,None)
# compute the descriptors with ORB
kp, des = orb.compute(img, kp)
# draw only keypoints location,not size and orientation
img2 = cv2.drawKeypoints(img,kp,color=(0,255,0), flags=0)
plt.imshow(img2),plt.show()
See the result below:
.. image:: images/orb_kp.jpg
:alt: ORB Keypoints
:align: center
ORB feature matching, we will do in another chapter.
Additional Resources
==========================
#. Ethan Rublee, Vincent Rabaud, Kurt Konolige, Gary R. Bradski: ORB: An efficient alternative to SIFT or SURF. ICCV 2011: 2564-2571.
Exercises
==============

View File

@ -1,77 +0,0 @@
.. _shi_tomasi:
Shi-Tomasi Corner Detector & Good Features to Track
*******************************************************
Goal
=======
In this chapter,
* We will learn about the another corner detector: Shi-Tomasi Corner Detector
* We will see the function: **cv2.goodFeaturesToTrack()**
Theory
=========
In last chapter, we saw Harris Corner Detector. Later in 1994, J. Shi and C. Tomasi made a small modification to it in their paper **Good Features to Track** which shows better results compared to Harris Corner Detector. The scoring function in Harris Corner Detector was given by:
.. math::
R = \lambda_1 \lambda_2 - k(\lambda_1+\lambda_2)^2
Instead of this, Shi-Tomasi proposed:
.. math::
R = min(\lambda_1, \lambda_2)
If it is a greater than a threshold value, it is considered as a corner. If we plot it in :math:`\lambda_1 - \lambda_2` space as we did in Harris Corner Detector, we get an image as below:
.. image:: images/shitomasi_space.png
:alt: Shi-Tomasi Corner Space
:align: center
From the figure, you can see that only when :math:`\lambda_1` and :math:`\lambda_2` are above a minimum value, :math:`\lambda_{min}`, it is conidered as a corner(green region).
Code
=======
OpenCV has a function, **cv2.goodFeaturesToTrack()**. It finds N strongest corners in the image by Shi-Tomasi method (or Harris Corner Detection, if you specify it). As usual, image should be a grayscale image. Then you specify number of corners you want to find. Then you specify the quality level, which is a value between 0-1, which denotes the minimum quality of corner below which everyone is rejected. Then we provide the minimum euclidean distance between corners detected.
With all these informations, the function finds corners in the image. All corners below quality level are rejected. Then it sorts the remaining corners based on quality in the descending order. Then function takes first strongest corner, throws away all the nearby corners in the range of minimum distance and returns N strongest corners.
In below example, we will try to find 25 best corners:
::
import numpy as np
import cv2
from matplotlib import pyplot as plt
img = cv2.imread('simple.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
corners = cv2.goodFeaturesToTrack(gray,25,0.01,10)
corners = np.int0(corners)
for i in corners:
x,y = i.ravel()
cv2.circle(img,(x,y),3,255,-1)
plt.imshow(img),plt.show()
See the result below:
.. image:: images/shitomasi_block1.jpg
:alt: Shi-Tomasi Corners
:align: center
This function is more appropriate for tracking. We will see that when its time comes.
Additional Resources
======================
Exercises
============

View File

@ -1,139 +0,0 @@
.. _sift_intro:
Introduction to SIFT (Scale-Invariant Feature Transform)
*************************************************************
Goal
======
In this chapter,
* We will learn about the concepts of SIFT algorithm
* We will learn to find SIFT Keypoints and Descriptors.
Theory
========
In last couple of chapters, we saw some corner detectors like Harris etc. They are rotation-invariant, which means, even if the image is rotated, we can find the same corners. It is obvious because corners remain corners in rotated image also. But what about scaling? A corner may not be a corner if the image is scaled. For example, check a simple image below. A corner in a small image within a small window is flat when it is zoomed in the same window. So Harris corner is not scale invariant.
.. image:: images/sift_scale_invariant.jpg
:alt: Scale-Invariance
:align: center
So, in 2004, **D.Lowe**, University of British Columbia, came up with a new algorithm, Scale Invariant Feature Transform (SIFT) in his paper, **Distinctive Image Features from Scale-Invariant Keypoints**, which extract keypoints and compute its descriptors. *(This paper is easy to understand and considered to be best material available on SIFT. So this explanation is just a short summary of this paper)*.
There are mainly four steps involved in SIFT algorithm. We will see them one-by-one.
1. Scale-space Extrema Detection
--------------------------------------
From the image above, it is obvious that we can't use the same window to detect keypoints with different scale. It is OK with small corner. But to detect larger corners we need larger windows. For this, scale-space filtering is used. In it, Laplacian of Gaussian is found for the image with various :math:`\sigma` values. LoG acts as a blob detector which detects blobs in various sizes due to change in :math:`\sigma`. In short, :math:`\sigma` acts as a scaling parameter. For eg, in the above image, gaussian kernel with low :math:`\sigma` gives high value for small corner while guassian kernel with high :math:`\sigma` fits well for larger corner. So, we can find the local maxima across the scale and space which gives us a list of :math:`(x,y,\sigma)` values which means there is a potential keypoint at (x,y) at :math:`\sigma` scale.
But this LoG is a little costly, so SIFT algorithm uses Difference of Gaussians which is an approximation of LoG. Difference of Gaussian is obtained as the difference of Gaussian blurring of an image with two different :math:`\sigma`, let it be :math:`\sigma` and :math:`k\sigma`. This process is done for different octaves of the image in Gaussian Pyramid. It is represented in below image:
.. image:: images/sift_dog.jpg
:alt: Difference of Gaussian
:align: center
Once this DoG are found, images are searched for local extrema over scale and space. For eg, one pixel in an image is compared with its 8 neighbours as well as 9 pixels in next scale and 9 pixels in previous scales. If it is a local extrema, it is a potential keypoint. It basically means that keypoint is best represented in that scale. It is shown in below image:
.. image:: images/sift_local_extrema.jpg
:alt: Difference of Gaussian
:align: center
Regarding different parameters, the paper gives some empirical data which can be summarized as, number of octaves = 4, number of scale levels = 5, initial :math:`\sigma=1.6`, :math:`k=\sqrt{2}` etc as optimal values.
2. Keypoint Localization
------------------------------------
Once potential keypoints locations are found, they have to be refined to get more accurate results. They used Taylor series expansion of scale space to get more accurate location of extrema, and if the intensity at this extrema is less than a threshold value (0.03 as per the paper), it is rejected. This threshold is called **contrastThreshold** in OpenCV
DoG has higher response for edges, so edges also need to be removed. For this, a concept similar to Harris corner detector is used. They used a 2x2 Hessian matrix (H) to compute the pricipal curvature. We know from Harris corner detector that for edges, one eigen value is larger than the other. So here they used a simple function,
.. math:
\frac{Tr(H)^2}{Det(H)} < \frac{(r+1)^2}{r} \; \text{where} \; r = \frac{\lambda_1}{\lambda_2}; \; \lambda_1 > \lambda_2
If this ratio is greater than a threshold, called **edgeThreshold** in OpenCV, that keypoint is discarded. It is given as 10 in paper.
So it eliminates any low-contrast keypoints and edge keypoints and what remains is strong interest points.
3. Orientation Assignment
-----------------------------------
Now an orientation is assigned to each keypoint to achieve invariance to image rotation. A neigbourhood is taken around the keypoint location depending on the scale, and the gradient magnitude and direction is calculated in that region. An orientation histogram with 36 bins covering 360 degrees is created. (It is weighted by gradient magnitude and gaussian-weighted circular window with :math:`\sigma` equal to 1.5 times the scale of keypoint. The highest peak in the histogram is taken and any peak above 80% of it is also considered to calculate the orientation. It creates keypoints with same location and scale, but different directions. It contribute to stability of matching.
4. Keypoint Descriptor
-----------------------------------------
Now keypoint descriptor is created. A 16x16 neighbourhood around the keypoint is taken. It is devided into 16 sub-blocks of 4x4 size. For each sub-block, 8 bin orientation histogram is created. So a total of 128 bin values are available. It is represented as a vector to form keypoint descriptor. In addition to this, several measures are taken to achieve robustness against illumination changes, rotation etc.
5. Keypoint Matching
----------------------------------------
Keypoints between two images are matched by identifying their nearest neighbours. But in some cases, the second closest-match may be very near to the first. It may happen due to noise or some other reasons. In that case, ratio of closest-distance to second-closest distance is taken. If it is greater than 0.8, they are rejected. It eliminaters around 90% of false matches while discards only 5% correct matches, as per the paper.
So this is a summary of SIFT algorithm. For more details and understanding, reading the original paper is highly recommended. Remember one thing, this algorithm is patented. So this algorithm is included in Non-free module in OpenCV.
SIFT in OpenCV
=================
So now let's see SIFT functionalities available in OpenCV. Let's start with keypoint detection and draw them. First we have to construct a SIFT object. We can pass different parameters to it which are optional and they are well explained in docs.
::
import cv2
import numpy as np
img = cv2.imread('home.jpg')
gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
sift = cv2.SIFT()
kp = sift.detect(gray,None)
img=cv2.drawKeypoints(gray,kp)
cv2.imwrite('sift_keypoints.jpg',img)
**sift.detect()** function finds the keypoint in the images. You can pass a mask if you want to search only a part of image. Each keypoint is a special structure which has many attributes like its (x,y) coordinates, size of the meaningful neighbourhood, angle which specifies its orientation, response that specifies strength of keypoints etc.
OpenCV also provides **cv2.drawKeyPoints()** function which draws the small circles on the locations of keypoints. If you pass a flag, **cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS** to it, it will draw a circle with size of keypoint and it will even show its orientation. See below example.
::
img=cv2.drawKeypoints(gray,kp,flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
cv2.imwrite('sift_keypoints.jpg',img)
See the two results below:
.. image:: images/sift_keypoints.jpg
:alt: SIFT Keypoints
:align: center
Now to calculate the descriptor, OpenCV provides two methods.
1. Since you already found keypoints, you can call **sift.compute()** which computes the descriptors from the keypoints we have found. Eg: ``kp,des = sift.compute(gray,kp)``
2. If you didn't find keypoints, directly find keypoints and descriptors in a single step with the function, **sift.detectAndCompute()**.
We will see the second method:
::
sift = cv2.SIFT()
kp, des = sift.detectAndCompute(gray,None)
Here kp will be a list of keypoints and des is a numpy array of shape :math:`Number\_of\_Keypoints \times 128`.
So we got keypoints, descriptors etc. Now we want to see how to match keypoints in different images. That we will learn in coming chapters.
Additional Resources
=====================
Exercises
=============

View File

@ -1,145 +0,0 @@
.. _SURF:
Introduction to SURF (Speeded-Up Robust Features)
*****************************************************
Goal
======
In this chapter,
* We will see the basics of SURF
* We will see SURF functionalities in OpenCV
Theory
==========
In last chapter, we saw SIFT for keypoint detection and description. But it was comparatively slow and people needed more speeded-up version. In 2006, three people, Bay, H., Tuytelaars, T. and Van Gool, L, published another paper, "SURF: Speeded Up Robust Features" which introduced a new algorithm called SURF. As name suggests, it is a speeded-up version of SIFT.
In SIFT, Lowe approximated Laplacian of Gaussian with Difference of Gaussian for finding scale-space. SURF goes a little further and approximates LoG with Box Filter. Below image shows a demonstration of such an approximation. One big advantage of this approximation is that, convolution with box filter can be easily calculated with the help of integral images. And it can be done in parallel for different scales. Also the SURF rely on determinant of Hessian matrix for both scale and location.
.. image:: images/surf_boxfilter.jpg
:alt: Box Filter approximation of Laplacian
:align: center
For orientation assignment, SURF uses wavelet responses in horizontal and vertical direction for a neighbourhood of size 6s. Adequate guassian weights are also applied to it. Then they are plotted in a space as given in below image. The dominant orientation is estimated by calculating the sum of all responses within a sliding orientation window of angle 60 degrees. Interesting thing is that, wavelet response can be found out using integral images very easily at any scale. For many applications, rotation invariance is not required, so no need of finding this orientation, which speeds up the process. SURF provides such a functionality called Upright-SURF or U-SURF. It improves speed and is robust upto :math:`\pm 15^{\circ}`. OpenCV supports both, depending upon the flag, **upright**. If it is 0, orientation is calculated. If it is 1, orientation is not calculated and it is more faster.
.. image:: images/surf_orientation.jpg
:alt: Orientation Assignment in SURF
:align: center
For feature description, SURF uses Wavelet responses in horizontal and vertical direction (again, use of integral images makes things easier). A neighbourhood of size 20sX20s is taken around the keypoint where s is the size. It is divided into 4x4 subregions. For each subregion, horizontal and vertical wavelet responses are taken and a vector is formed like this, :math:`v=( \sum{d_x}, \sum{d_y}, \sum{|d_x|}, \sum{|d_y|})`. This when represented as a vector gives SURF feature descriptor with total 64 dimensions. Lower the dimension, higher the speed of computation and matching, but provide better distinctiveness of features.
For more distinctiveness, SURF feature descriptor has an extended 128 dimension version. The sums of :math:`d_x` and :math:`|d_x|` are computed separately for :math:`d_y < 0` and :math:`d_y \geq 0`. Similarly, the sums of :math:`d_y` and :math:`|d_y|` are split
up according to the sign of :math:`d_x` , thereby doubling the number of features. It doesn't add much computation complexity. OpenCV supports both by setting the value of flag **extended** with 0 and 1 for 64-dim and 128-dim respectively (default is 128-dim)
Another important improvement is the use of sign of Laplacian (trace of Hessian Matrix) for underlying interest point. It adds no computation cost since it is already computed during detection. The sign of the Laplacian distinguishes bright blobs on dark backgrounds from the reverse situation. In the matching stage, we only compare features if they have the same type of contrast (as shown in image below). This minimal information allows for faster matching, without reducing the descriptor's performance.
.. image:: images/surf_matching.jpg
:alt: Fast Indexing for Matching
:align: center
In short, SURF adds a lot of features to improve the speed in every step. Analysis shows it is 3 times faster than SIFT while performance is comparable to SIFT. SURF is good at handling images with blurring and rotation, but not good at handling viewpoint change and illumination change.
SURF in OpenCV
====================
OpenCV provides SURF functionalities just like SIFT. You initiate a SURF object with some optional conditions like 64/128-dim descriptors, Upright/Normal SURF etc. All the details are well explained in docs. Then as we did in SIFT, we can use SURF.detect(), SURF.compute() etc for finding keypoints and descriptors.
First we will see a simple demo on how to find SURF keypoints and descriptors and draw it. All examples are shown in Python terminal since it is just same as SIFT only.
::
>>> img = cv2.imread('fly.png',0)
# Create SURF object. You can specify params here or later.
# Here I set Hessian Threshold to 400
>>> surf = cv2.SURF(400)
# Find keypoints and descriptors directly
>>> kp, des = surf.detectAndCompute(img,None)
>>> len(kp)
699
1199 keypoints is too much to show in a picture. We reduce it to some 50 to draw it on an image. While matching, we may need all those features, but not now. So we increase the Hessian Threshold.
::
# Check present Hessian threshold
>>> print surf.hessianThreshold
400.0
# We set it to some 50000. Remember, it is just for representing in picture.
# In actual cases, it is better to have a value 300-500
>>> surf.hessianThreshold = 50000
# Again compute keypoints and check its number.
>>> kp, des = surf.detectAndCompute(img,None)
>>> print len(kp)
47
It is less than 50. Let's draw it on the image.
::
>>> img2 = cv2.drawKeypoints(img,kp,None,(255,0,0),4)
>>> plt.imshow(img2),plt.show()
See the result below. You can see that SURF is more like a blob detector. It detects the white blobs on wings of butterfly. You can test it with other images.
.. image:: images/surf_kp1.jpg
:alt: SURF Keypoints with Orientation
:align: center
Now I want to apply U-SURF, so that it won't find the orientation.
::
# Check upright flag, if it False, set it to True
>>> print surf.upright
False
>>> surf.upright = True
# Recompute the feature points and draw it
>>> kp = surf.detect(img,None)
>>> img2 = cv2.drawKeypoints(img,kp,None,(255,0,0),4)
>>> plt.imshow(img2),plt.show()
See the results below. All the orientations are shown in same direction. It is more faster than previous. If you are working on cases where orientation is not a problem (like panorama stitching) etc, this is better.
.. image:: images/surf_kp2.jpg
:alt: Upright-SURF
:align: center
Finally we check the descriptor size and change it to 128 if it is only 64-dim.
::
# Find size of descriptor
>>> print surf.descriptorSize()
64
# That means flag, "extended" is False.
>>> surf.extended
False
# So we make it to True to get 128-dim descriptors.
>>> surf.extended = True
>>> kp, des = surf.detectAndCompute(img,None)
>>> print surf.descriptorSize()
128
>>> print des.shape
(47, 128)
Remaining part is matching which we will do in another chapter.
Additional Resources
=======================
Exercises
==============

View File

@ -1,170 +0,0 @@
.. _PY_Table-Of-Content-Feature2D:
Feature Detection and Description
------------------------------------------
* :ref:`Features_Meaning`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|f2d_1| What are the main features in an image? How can finding those features be useful to us?
=========== ======================================================
.. |f2d_1| image:: images/features_icon.jpg
:height: 90pt
:width: 90pt
* :ref:`Harris_Corners`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|f2d_2| Okay, Corners are good features? But how do we find them?
=========== ======================================================
.. |f2d_2| image:: images/harris_icon.jpg
:height: 90pt
:width: 90pt
* :ref:`shi_tomasi`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|f2d_3| We will look into Shi-Tomasi corner detection
=========== ======================================================
.. |f2d_3| image:: images/shi_icon.jpg
:height: 90pt
:width: 90pt
* :ref:`sift_intro`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|f2d_4| Harris corner detector is not good enough when scale of image changes. Lowe developed a breakthrough method to find scale-invariant features and it is called SIFT
=========== ======================================================
.. |f2d_4| image:: images/sift_icon.jpg
:height: 90pt
:width: 90pt
* :ref:`SURF`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|f2d_5| SIFT is really good, but not fast enough, so people came up with a speeded-up version called SURF.
=========== ======================================================
.. |f2d_5| image:: images/surf_icon.jpg
:height: 90pt
:width: 90pt
* :ref:`FAST`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|f2d_06| All the above feature detection methods are good in some way. But they are not fast enough to work in real-time applications like SLAM. There comes the FAST algorithm, which is really "FAST".
=========== ======================================================
.. |f2d_06| image:: images/fast_icon.jpg
:height: 90pt
:width: 90pt
* :ref:`BRIEF`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|f2d_07| SIFT uses a feature descriptor with 128 floating point numbers. Consider thousands of such features. It takes lots of memory and more time for matching. We can compress it to make it faster. But still we have to calculate it first. There comes BRIEF which gives the shortcut to find binary descriptors with less memory, faster matching, still higher recognition rate.
=========== ======================================================
.. |f2d_07| image:: images/brief.jpg
:height: 90pt
:width: 90pt
* :ref:`ORB`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|f2d_08| SIFT and SURF are good in what they do, but what if you have to pay a few dollars every year to use them in your applications? Yeah, they are patented!!! To solve that problem, OpenCV devs came up with a new "FREE" alternative to SIFT & SURF, and that is ORB.
=========== ======================================================
.. |f2d_08| image:: images/orb.jpg
:height: 90pt
:width: 90pt
* :ref:`Matcher`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|f2d_09| We know a great deal about feature detectors and descriptors. It is time to learn how to match different descriptors. OpenCV provides two techniques, Brute-Force matcher and FLANN based matcher.
=========== ======================================================
.. |f2d_09| image:: images/matching.jpg
:height: 90pt
:width: 90pt
* :ref:`PY_feature_homography`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|f2d_10| Now we know about feature matching. Let's mix it up with `calib3d` module to find objects in a complex image.
=========== ======================================================
.. |f2d_10| image:: images/homography_icon.jpg
:height: 90pt
:width: 90pt
.. raw:: latex
\pagebreak
.. We use a custom table of content format and as the table of content only informs Sphinx about the hierarchy of the files, no need to show it.
.. toctree::
:hidden:
../py_features_meaning/py_features_meaning
../py_features_harris/py_features_harris
../py_shi_tomasi/py_shi_tomasi
../py_sift_intro/py_sift_intro
../py_surf_intro/py_surf_intro
../py_fast/py_fast
../py_brief/py_brief
../py_orb/py_orb
../py_matcher/py_matcher
../py_feature_homography/py_feature_homography

View File

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 2.5 KiB

View File

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 2.5 KiB

View File

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

Before

Width:  |  Height:  |  Size: 2.3 KiB

After

Width:  |  Height:  |  Size: 2.3 KiB

View File

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 2.5 KiB

View File

@ -1,106 +0,0 @@
.. _Drawing_Functions:
Drawing Functions in OpenCV
******************************
Goal
=====
.. container:: enumeratevisibleitemswithsquare
* Learn to draw different geometric shapes with OpenCV
* You will learn these functions : **cv2.line()**, **cv2.circle()** , **cv2.rectangle()**, **cv2.ellipse()**, **cv2.putText()** etc.
Code
=====
In all the above functions, you will see some common arguments as given below:
* img : The image where you want to draw the shapes
* color : Color of the shape. for BGR, pass it as a tuple, eg: ``(255,0,0)`` for blue. For grayscale, just pass the scalar value.
* thickness : Thickness of the line or circle etc. If **-1** is passed for closed figures like circles, it will fill the shape. *default thickness = 1*
* lineType : Type of line, whether 8-connected, anti-aliased line etc. *By default, it is 8-connected.* ``cv2.LINE_AA`` gives anti-aliased line which looks great for curves.
Drawing Line
-------------
To draw a line, you need to pass starting and ending coordinates of line. We will create a black image and draw a blue line on it from top-left to bottom-right corners.
::
import numpy as np
import cv2
# Create a black image
img = np.zeros((512,512,3), np.uint8)
# Draw a diagonal blue line with thickness of 5 px
cv2.line(img,(0,0),(511,511),(255,0,0),5)
Drawing Rectangle
-------------------
To draw a rectangle, you need top-left corner and bottom-right corner of rectangle. This time we will draw a green rectangle at the top-right corner of image.
::
cv2.rectangle(img,(384,0),(510,128),(0,255,0),3)
Drawing Circle
----------------
To draw a circle, you need its center coordinates and radius. We will draw a circle inside the rectangle drawn above.
::
cv2.circle(img,(447,63), 63, (0,0,255), -1)
Drawing Ellipse
--------------------
To draw the ellipse, we need to pass several arguments. One argument is the center location (x,y). Next argument is axes lengths (major axis length, minor axis length). ``angle`` is the angle of rotation of ellipse in anti-clockwise direction. ``startAngle`` and ``endAngle`` denotes the starting and ending of ellipse arc measured in clockwise direction from major axis. i.e. giving values 0 and 360 gives the full ellipse. For more details, check the documentation of **cv2.ellipse()**. Below example draws a half ellipse at the center of the image.
::
cv2.ellipse(img,(256,256),(100,50),0,0,180,255,-1)
Drawing Polygon
------------------
To draw a polygon, first you need coordinates of vertices. Make those points into an array of shape ``ROWSx1x2`` where ROWS are number of vertices and it should be of type ``int32``. Here we draw a small polygon of with four vertices in yellow color.
::
pts = np.array([[10,5],[20,30],[70,20],[50,10]], np.int32)
pts = pts.reshape((-1,1,2))
cv2.polylines(img,[pts],True,(0,255,255))
.. Note:: If third argument is ``False``, you will get a polylines joining all the points, not a closed shape.
.. Note:: ``cv2.polylines()`` can be used to draw multiple lines. Just create a list of all the lines you want to draw and pass it to the function. All lines will be drawn individually. It is a much better and faster way to draw a group of lines than calling ``cv2.line()`` for each line.
Adding Text to Images:
------------------------
To put texts in images, you need specify following things.
* Text data that you want to write
* Position coordinates of where you want put it (i.e. bottom-left corner where data starts).
* Font type (Check **cv2.putText()** docs for supported fonts)
* Font Scale (specifies the size of font)
* regular things like color, thickness, lineType etc. For better look, ``lineType = cv2.LINE_AA`` is recommended.
We will write **OpenCV** on our image in white color.
::
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(img,'OpenCV',(10,500), font, 4,(255,255,255),2,cv2.LINE_AA)
Result
----------
So it is time to see the final result of our drawing. As you studied in previous articles, display the image to see it.
.. image:: images/drawing_result.jpg
:alt: Drawing Functions in OpenCV
:align: center
Additional Resources
========================
1. The angles used in ellipse function is not our circular angles. For more details, visit `this discussion <http://answers.opencv.org/question/14541/angles-in-ellipse-function/>`_.
Exercises
==============
#. Try to create the logo of OpenCV using drawing functions available in OpenCV.

View File

@ -1,141 +0,0 @@
.. _PY_Display_Image:
Getting Started with Images
*****************************
Goals
======
.. container:: enumeratevisibleitemswithsquare
* Here, you will learn how to read an image, how to display it and how to save it back
* You will learn these functions : **cv2.imread()**, **cv2.imshow()** , **cv2.imwrite()**
* Optionally, you will learn how to display images with Matplotlib
Using OpenCV
=============
Read an image
--------------
Use the function **cv2.imread()** to read an image. The image should be in the working directory or a full path of image should be given.
Second argument is a flag which specifies the way image should be read.
* cv2.IMREAD_COLOR : Loads a color image. Any transparency of image will be neglected. It is the default flag.
* cv2.IMREAD_GRAYSCALE : Loads image in grayscale mode
* cv2.IMREAD_UNCHANGED : Loads image as such including alpha channel
.. note:: Instead of these three flags, you can simply pass integers 1, 0 or -1 respectively.
See the code below:
::
import numpy as np
import cv2
# Load an color image in grayscale
img = cv2.imread('messi5.jpg',0)
.. warning:: Even if the image path is wrong, it won't throw any error, but ``print img`` will give you ``None``
Display an image
-----------------
Use the function **cv2.imshow()** to display an image in a window. The window automatically fits to the image size.
First argument is a window name which is a string. second argument is our image. You can create as many windows as you wish, but with different window names.
::
cv2.imshow('image',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
A screenshot of the window will look like this (in Fedora-Gnome machine):
.. image:: images/opencv_screenshot.jpg
:alt: Screenshot of Image Window in OpenCV
:align: center
**cv2.waitKey()** is a keyboard binding function. Its argument is the time in milliseconds. The function waits for specified milliseconds for any keyboard event. If you press any key in that time, the program continues. If **0** is passed, it waits indefinitely for a key stroke. It can also be set to detect specific key strokes like, if key `a` is pressed etc which we will discuss below.
.. note:: Besides binding keyboard events this function also processes many other GUI events, so you MUST use it to actually display the image.
**cv2.destroyAllWindows()** simply destroys all the windows we created. If you want to destroy any specific window, use the function **cv2.destroyWindow()** where you pass the exact window name as the argument.
.. note:: There is a special case where you can already create a window and load image to it later. In that case, you can specify whether window is resizable or not. It is done with the function **cv2.namedWindow()**. By default, the flag is ``cv2.WINDOW_AUTOSIZE``. But if you specify flag to be ``cv2.WINDOW_NORMAL``, you can resize window. It will be helpful when image is too large in dimension and adding track bar to windows.
See the code below:
::
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
cv2.imshow('image',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Write an image
---------------
Use the function **cv2.imwrite()** to save an image.
First argument is the file name, second argument is the image you want to save.
::
cv2.imwrite('messigray.png',img)
This will save the image in PNG format in the working directory.
Sum it up
---------------
Below program loads an image in grayscale, displays it, save the image if you press 's' and exit, or simply exit without saving if you press `ESC` key.
::
import numpy as np
import cv2
img = cv2.imread('messi5.jpg',0)
cv2.imshow('image',img)
k = cv2.waitKey(0)
if k == 27: # wait for ESC key to exit
cv2.destroyAllWindows()
elif k == ord('s'): # wait for 's' key to save and exit
cv2.imwrite('messigray.png',img)
cv2.destroyAllWindows()
.. warning:: If you are using a 64-bit machine, you will have to modify ``k = cv2.waitKey(0)`` line as follows : ``k = cv2.waitKey(0) & 0xFF``
Using Matplotlib
=================
Matplotlib is a plotting library for Python which gives you wide variety of plotting methods. You will see them in coming articles. Here, you will learn how to display image with Matplotlib. You can zoom images, save it etc using Matplotlib.
::
import numpy as np
import cv2
from matplotlib import pyplot as plt
img = cv2.imread('messi5.jpg',0)
plt.imshow(img, cmap = 'gray', interpolation = 'bicubic')
plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
plt.show()
A screen-shot of the window will look like this :
.. image:: images/matplotlib_screenshot.jpg
:alt: Screenshot of Image Window in Matplotlib
:align: center
.. seealso:: Plenty of plotting options are available in Matplotlib. Please refer to Matplotlib docs for more details. Some, we will see on the way.
.. warning:: Color image loaded by OpenCV is in BGR mode. But Matplotlib displays in RGB mode. So color images will not be displayed correctly in Matplotlib if image is read with OpenCV. Please see the exercises for more details.
Additional Resources
======================
#. `Matplotlib Plotting Styles and Features <http://matplotlib.org/api/pyplot_api.html>`_
Exercises
==========
#. There is some problem when you try to load color image in OpenCV and display it in Matplotlib. Read `this discussion <http://stackoverflow.com/a/15074748/1134940>`_ and understand it.

View File

@ -1,108 +0,0 @@
.. _Mouse_Handling:
Mouse as a Paint-Brush
***********************
Goal
======
.. container:: enumeratevisibleitemswithsquare
* Learn to handle mouse events in OpenCV
* You will learn these functions : **cv2.setMouseCallback()**
Simple Demo
=============
Here, we create a simple application which draws a circle on an image wherever we double-click on it.
First we create a mouse callback function which is executed when a mouse event take place. Mouse event can be anything related to mouse like left-button down, left-button up, left-button double-click etc. It gives us the coordinates (x,y) for every mouse event. With this event and location, we can do whatever we like. To list all available events available, run the following code in Python terminal:
::
>>> import cv2
>>> events = [i for i in dir(cv2) if 'EVENT' in i]
>>> print events
Creating mouse callback function has a specific format which is same everywhere. It differs only in what the function does. So our mouse callback function does one thing, it draws a circle where we double-click. So see the code below. Code is self-explanatory from comments :
::
import cv2
import numpy as np
# mouse callback function
def draw_circle(event,x,y,flags,param):
if event == cv2.EVENT_LBUTTONDBLCLK:
cv2.circle(img,(x,y),100,(255,0,0),-1)
# Create a black image, a window and bind the function to window
img = np.zeros((512,512,3), np.uint8)
cv2.namedWindow('image')
cv2.setMouseCallback('image',draw_circle)
while(1):
cv2.imshow('image',img)
if cv2.waitKey(20) & 0xFF == 27:
break
cv2.destroyAllWindows()
More Advanced Demo
===================
Now we go for a much better application. In this, we draw either rectangles or circles (depending on the mode we select) by dragging the mouse like we do in Paint application. So our mouse callback function has two parts, one to draw rectangle and other to draw the circles. This specific example will be really helpful in creating and understanding some interactive applications like object tracking, image segmentation etc.
::
import cv2
import numpy as np
drawing = False # true if mouse is pressed
mode = True # if True, draw rectangle. Press 'm' to toggle to curve
ix,iy = -1,-1
# mouse callback function
def draw_circle(event,x,y,flags,param):
global ix,iy,drawing,mode
if event == cv2.EVENT_LBUTTONDOWN:
drawing = True
ix,iy = x,y
elif event == cv2.EVENT_MOUSEMOVE:
if drawing == True:
if mode == True:
cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),-1)
else:
cv2.circle(img,(x,y),5,(0,0,255),-1)
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
if mode == True:
cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),-1)
else:
cv2.circle(img,(x,y),5,(0,0,255),-1)
Next we have to bind this mouse callback function to OpenCV window. In the main loop, we should set a keyboard binding for key 'm' to toggle between rectangle and circle.
::
img = np.zeros((512,512,3), np.uint8)
cv2.namedWindow('image')
cv2.setMouseCallback('image',draw_circle)
while(1):
cv2.imshow('image',img)
k = cv2.waitKey(1) & 0xFF
if k == ord('m'):
mode = not mode
elif k == 27:
break
cv2.destroyAllWindows()
Additional Resources
========================
Exercises
==========
#. In our last example, we drew filled rectangle. You modify the code to draw an unfilled rectangle.

View File

@ -1,89 +0,0 @@
.. _PY_Table-Of-Content-Gui:
Gui Features in OpenCV
-----------------------------------------------------------
* :ref:`PY_Display_Image`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|gui_1| Learn to load an image, display it and save it back
=========== ======================================================
.. |gui_1| image:: images/image_display.jpg
:height: 90pt
:width: 90pt
* :ref:`Display_Video`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|gui_2| Learn to play videos, capture videos from Camera and write it as a video
=========== ======================================================
.. |gui_2| image:: images/video_display.jpg
:height: 90pt
:width: 90pt
* :ref:`Drawing_Functions`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|gui_5| Learn to draw lines, rectangles, ellipses, circles etc with OpenCV
=========== ======================================================
.. |gui_5| image:: images/drawing.jpg
:height: 90pt
:width: 90pt
* :ref:`Mouse_Handling`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|gui_3| Draw stuffs with your mouse
=========== ======================================================
.. |gui_3| image:: images/mouse_drawing.jpg
:height: 90pt
:width: 90pt
* :ref:`Trackbar`
.. tabularcolumns:: m{100pt} m{300pt}
.. cssclass:: toctableopencv
=========== ======================================================
|gui_4| Create trackbar to control certain parameters
=========== ======================================================
.. |gui_4| image:: images/trackbar.jpg
:height: 90pt
:width: 90pt
.. raw:: latex
\pagebreak
.. We use a custom table of content format and as the table of content only informs Sphinx about the hierarchy of the files, no need to show it.
.. toctree::
:hidden:
../py_image_display/py_image_display
../py_video_display/py_video_display
../py_drawing_functions/py_drawing_functions
../py_mouse_handling/py_mouse_handling
../py_trackbar/py_trackbar

View File

@ -1,71 +0,0 @@
.. _Trackbar:
Trackbar as the Color Palette
********************************
Goal
=====
.. container:: enumeratevisibleitemswithsquare
* Learn to bind trackbar to OpenCV windows
* You will learn these functions : **cv2.getTrackbarPos()**, **cv2.createTrackbar()** etc.
Code Demo
==========
Here we will create a simple application which shows the color you specify. You have a window which shows the color and three trackbars to specify each of B,G,R colors. You slide the trackbar and correspondingly window color changes. By default, initial color will be set to Black.
For cv2.getTrackbarPos() function, first argument is the trackbar name, second one is the window name to which it is attached, third argument is the default value, fourth one is the maximum value and fifth one is the callback function which is executed everytime trackbar value changes. The callback function always has a default argument which is the trackbar position. In our case, function does nothing, so we simply pass.
Another important application of trackbar is to use it as a button or switch. OpenCV, by default, doesn't have button functionality. So you can use trackbar to get such functionality. In our application, we have created one switch in which application works only if switch is ON, otherwise screen is always black.
::
import cv2
import numpy as np
def nothing(x):
pass
# Create a black image, a window
img = np.zeros((300,512,3), np.uint8)
cv2.namedWindow('image')
# create trackbars for color change
cv2.createTrackbar('R','image',0,255,nothing)
cv2.createTrackbar('G','image',0,255,nothing)
cv2.createTrackbar('B','image',0,255,nothing)
# create switch for ON/OFF functionality
switch = '0 : OFF \n1 : ON'
cv2.createTrackbar(switch, 'image',0,1,nothing)
while(1):
cv2.imshow('image',img)
k = cv2.waitKey(1) & 0xFF
if k == 27:
break
# get current positions of four trackbars
r = cv2.getTrackbarPos('R','image')
g = cv2.getTrackbarPos('G','image')
b = cv2.getTrackbarPos('B','image')
s = cv2.getTrackbarPos(switch,'image')
if s == 0:
img[:] = 0
else:
img[:] = [b,g,r]
cv2.destroyAllWindows()
The screenshot of the application looks like below :
.. image:: images/trackbar_screenshot.jpg
:alt: Screenshot of Image with Trackbars
:align: center
Exercises
===========
#. Create a Paint application with adjustable colors and brush radius using trackbars. For drawing, refer previous tutorial on mouse handling.

View File

@ -1,132 +0,0 @@
.. _Display_Video:
Getting Started with Videos
*****************************
Goal
=====
.. container:: enumeratevisibleitemswithsquare
* Learn to read video, display video and save video.
* Learn to capture from Camera and display it.
* You will learn these functions : **cv2.VideoCapture()**, **cv2.VideoWriter()**
Capture Video from Camera
===========================
Often, we have to capture live stream with camera. OpenCV provides a very simple interface to this. Let's capture a video from the camera (I am using the in-built webcam of my laptop), convert it into grayscale video and display it. Just a simple task to get started.
To capture a video, you need to create a **VideoCapture** object. Its argument can be either the device index or the name of a video file. Device index is just the number to specify which camera. Normally one camera will be connected (as in my case). So I simply pass 0 (or -1). You can select the second camera by passing 1 and so on. After that, you can capture frame-by-frame. But at the end, don't forget to release the capture.
::
import numpy as np
import cv2
cap = cv2.VideoCapture(0)
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
# Our operations on the frame come here
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Display the resulting frame
cv2.imshow('frame',gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
``cap.read()`` returns a bool (True/False). If frame is read correctly, it will be True. So you can check end of the video by checking this return value.
Sometimes, ``cap`` may not have initialized the capture. In that case, this code shows error. You can check whether it is initialized or not by the method **cap.isOpened()**. If it is True, OK. Otherwise open it using **cap.open()**.
You can also access some of the features of this video using **cap.get(propId)** method where propId is a number from 0 to 18. Each number denotes a property of the video (if it is applicable to that video) and full details can be seen here: `Property Identifier <http://docs.opencv.org/modules/highgui/doc/reading_and_writing_video.html#videocapture-get>`_. Some of these values can be modified using **cap.set(propId, value)**. Value is the new value you want.
For example, I can check the frame width and height by ``cap.get(3)`` and ``cap.get(4)``. It gives me 640x480 by default. But I want to modify it to 320x240. Just use ``ret = cap.set(3,320)`` and ``ret = cap.set(4,240)``.
.. Note:: If you are getting error, make sure camera is working fine using any other camera application (like Cheese in Linux).
Playing Video from file
========================
It is same as capturing from Camera, just change camera index with video file name. Also while displaying the frame, use appropriate time for ``cv2.waitKey()``. If it is too less, video will be very fast and if it is too high, video will be slow (Well, that is how you can display videos in slow motion). 25 milliseconds will be OK in normal cases.
::
import numpy as np
import cv2
cap = cv2.VideoCapture('vtest.avi')
while(cap.isOpened()):
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cv2.imshow('frame',gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
.. Note:: Make sure proper versions of ffmpeg or gstreamer is installed. Sometimes, it is a headache to work with Video Capture mostly due to wrong installation of ffmpeg/gstreamer.
Saving a Video
================
So we capture a video, process it frame-by-frame and we want to save that video. For images, it is very simple, just use ``cv2.imwrite()``. Here a little more work is required.
This time we create a **VideoWriter** object. We should specify the output file name (eg: output.avi). Then we should specify the **FourCC** code (details in next paragraph). Then number of frames per second (fps) and frame size should be passed. And last one is **isColor** flag. If it is True, encoder expect color frame, otherwise it works with grayscale frame.
`FourCC <http://en.wikipedia.org/wiki/FourCC>`_ is a 4-byte code used to specify the video codec. The list of available codes can be found in `fourcc.org <http://www.fourcc.org/codecs.php>`_. It is platform dependent. Following codecs works fine for me.
* In Fedora: DIVX, XVID, MJPG, X264, WMV1, WMV2. (XVID is more preferable. MJPG results in high size video. X264 gives very small size video)
* In Windows: DIVX (More to be tested and added)
* In OSX : *(I don't have access to OSX. Can some one fill this?)*
FourCC code is passed as ``cv2.VideoWriter_fourcc('M','J','P','G')`` or ``cv2.VideoWriter_fourcc(*'MJPG)`` for MJPG.
Below code capture from a Camera, flip every frame in vertical direction and saves it.
::
import numpy as np
import cv2
cap = cv2.VideoCapture(0)
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi',fourcc, 20.0, (640,480))
while(cap.isOpened()):
ret, frame = cap.read()
if ret==True:
frame = cv2.flip(frame,0)
# write the flipped frame
out.write(frame)
cv2.imshow('frame',frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
# Release everything if job is finished
cap.release()
out.release()
cv2.destroyAllWindows()
Additional Resources
==========================
Exercises
=================

View File

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

Before

Width:  |  Height:  |  Size: 4.1 KiB

After

Width:  |  Height:  |  Size: 4.1 KiB

View File

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 2.4 KiB

View File

Before

Width:  |  Height:  |  Size: 58 KiB

After

Width:  |  Height:  |  Size: 58 KiB

View File

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 2.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.3 KiB

After

Width:  |  Height:  |  Size: 3.3 KiB

View File

Before

Width:  |  Height:  |  Size: 65 KiB

After

Width:  |  Height:  |  Size: 65 KiB

View File

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 2.8 KiB

View File

Before

Width:  |  Height:  |  Size: 2.3 KiB

After

Width:  |  Height:  |  Size: 2.3 KiB

View File

Before

Width:  |  Height:  |  Size: 5.3 KiB

After

Width:  |  Height:  |  Size: 5.3 KiB

View File

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 12 KiB

View File

Before

Width:  |  Height:  |  Size: 4.9 KiB

After

Width:  |  Height:  |  Size: 4.9 KiB

Some files were not shown because too many files have changed in this diff Show More