2011-07-06 01:29:54 +08:00
import os , sys , re , string , glob
from string import Template
class DeclarationParser ( object ) :
def __init__ ( self , line = None ) :
if line is None :
self . fdecl = " "
self . lang = " "
self . balance = 0
return
self . lang = self . getLang ( line )
assert self . lang is not None
self . fdecl = line [ line . find ( " :: " ) + 2 : ] . strip ( )
self . balance = self . fdecl . count ( " ( " ) - self . fdecl . count ( " ) " )
assert self . balance > = 0
def append ( self , line ) :
self . fdecl + = line
self . balance = self . fdecl . count ( " ( " ) - self . fdecl . count ( " ) " )
def isready ( self ) :
return self . balance == 0
def getLang ( self , line ) :
if line . startswith ( " .. ocv:function:: " ) :
return " C++ "
if line . startswith ( " .. ocv:cfunction:: " ) :
return " C "
if line . startswith ( " .. ocv:pyfunction:: " ) :
return " Python2 "
if line . startswith ( " .. ocv:pyoldfunction:: " ) :
return " Python1 "
if line . startswith ( " .. ocv:jfunction:: " ) :
return " Java "
return None
def hasDeclaration ( self , line ) :
return self . getLang ( line ) is not None
class ParamParser ( object ) :
def __init__ ( self , line = None ) :
if line is None :
self . prefix = " "
self . name = " "
self . comment = " "
self . active = False
return
offset = line . find ( " :param " )
assert offset > 0
self . prefix = line [ : offset ]
2011-07-07 19:23:51 +08:00
assert self . prefix == " " * len ( self . prefix ) , " :param definition should be prefixed with spaces "
2011-07-06 01:29:54 +08:00
line = line [ offset + 6 : ] . lstrip ( )
name_end = line . find ( " : " )
assert name_end > 0
self . name = line [ : name_end ]
self . comment = line [ name_end + 1 : ] . lstrip ( )
self . active = True
def append ( self , line ) :
assert self . active
if ( self . hasDeclaration ( line ) ) :
self . active = False
elif line . startswith ( self . prefix ) or not line :
self . comment + = " \n " + line . lstrip ( )
else :
self . active = False
def hasDeclaration ( self , line ) :
return line . lstrip ( ) . startswith ( " :param " )
class RstParser ( object ) :
def __init__ ( self , cpp_parser ) :
self . cpp_parser = cpp_parser
self . definitions = { }
2011-07-07 19:23:51 +08:00
self . sections_parsed = 0
self . sections_total = 0
self . sections_skipped = 0
2011-07-06 01:29:54 +08:00
2011-07-07 19:23:51 +08:00
def parse ( self , module_name , module_path = None ) :
if module_path is None :
module_path = " ../ " + module_name
2011-07-06 01:29:54 +08:00
doclist = glob . glob ( os . path . join ( module_path , " doc/*.rst " ) )
for doc in doclist :
2011-07-07 19:23:51 +08:00
self . parse_rst_file ( module_name , doc )
def parse_section_safe ( self , module_name , section_name , file_name , lineno , lines ) :
try :
self . parse_section ( module_name , section_name , file_name , lineno , lines )
except AssertionError , args :
print " RST parser error: assertion in \" %s \" File: %s (line %s ) " % ( section_name , file_name , lineno )
print " Details: %s " % args
def parse_section ( self , module_name , section_name , file_name , lineno , lines ) :
self . sections_total + = 1
# skip sections having whitespace in name
if section_name . find ( " " ) > = 0 and section_name . find ( " ::operator " ) < 0 :
print " SKIPPED: \" %s \" File: %s (line %s ) " % ( section_name , file_name , lineno )
self . sections_skipped + = 1
return
2011-07-06 01:29:54 +08:00
func = { }
func [ " name " ] = section_name
func [ " file " ] = file_name
func [ " line " ] = lineno
2011-07-07 19:23:51 +08:00
func [ " module " ] = module_name
2011-07-06 01:29:54 +08:00
# parse section name
2011-07-07 19:23:51 +08:00
section_name = self . parse_namespace ( func , section_name )
class_separator_idx = section_name . find ( " :: " )
2011-07-06 01:29:54 +08:00
if class_separator_idx > 0 :
2011-07-07 19:23:51 +08:00
func [ " class " ] = section_name [ : class_separator_idx ]
func [ " method " ] = section_name [ class_separator_idx + 2 : ]
2011-07-06 01:29:54 +08:00
else :
2011-07-07 19:23:51 +08:00
func [ " method " ] = section_name
2011-07-06 01:29:54 +08:00
skip_code_lines = False
expected_brief = True
fdecl = DeclarationParser ( )
pdecl = ParamParser ( )
for l in lines :
# read tail of function/method declaration if needed
if not fdecl . isready ( ) :
fdecl . append ( ll )
if fdecl . isready ( ) :
self . add_new_fdecl ( func , fdecl )
continue
# skip lines if line-skipping mode is activated
if skip_code_lines :
2011-07-07 19:23:51 +08:00
if not l or l . startswith ( " " ) :
2011-07-06 01:29:54 +08:00
continue
else :
skip_code_lines = False
ll = l . strip ( )
if ll == " .. " : #strange construction...
continue
# turn on line-skipping mode for code fragments
if ll . endswith ( " :: " ) :
skip_code_lines = True
ll = ll [ : len ( ll ) - 3 ]
if ll . startswith ( " .. code-block:: " ) :
skip_code_lines = True
continue
# continue param parsing
if pdecl . active :
pdecl . append ( l )
if pdecl . active :
continue
else :
self . add_new_pdecl ( func , pdecl )
2011-07-07 19:23:51 +08:00
# do not continue - current line can contain next parameter definition
2011-07-06 01:29:54 +08:00
# todo: parse structure members; skip them for now
if ll . startswith ( " .. ocv:member:: " ) :
skip_code_lines = True
continue
# parse class & struct definitions
if ll . startswith ( " .. ocv:class:: " ) :
func [ " class " ] = ll [ ll . find ( " :: " ) + 2 : ] . strip ( )
if " method " in func :
del func [ " method " ]
func [ " isclass " ] = True
expected_brief = True
continue
if ll . startswith ( " .. ocv:struct:: " ) :
func [ " class " ] = ll [ ll . find ( " :: " ) + 2 : ] . strip ( )
if " method " in func :
del func [ " method " ]
func [ " isstruct " ] = True
expected_brief = True
continue
# parse function/method definitions
if fdecl . hasDeclaration ( ll ) :
fdecl = DeclarationParser ( ll )
if fdecl . isready ( ) :
self . add_new_fdecl ( func , fdecl )
expected_brief = False
continue
# parse parameters
if pdecl . hasDeclaration ( l ) :
pdecl = ParamParser ( l )
expected_brief = False
continue
# record brief description
if expected_brief and len ( ll ) == 0 :
if " brief " in func :
expected_brief = False
continue
if expected_brief :
func [ " brief " ] = func . get ( " brief " , " " ) + " \n " + ll
if skip_code_lines :
expected_brief = False #force end brief if code block begins
continue
# record other lines as long description
func [ " long " ] = func . get ( " long " , " " ) + " \n " + ll
# endfor l in lines
2011-07-07 19:23:51 +08:00
if fdecl . balance != 0 :
print " RST parser error: invalid parentheses balance in \" %s \" File: %s (line %s ) " % ( section_name , file_name , lineno )
2011-07-06 01:29:54 +08:00
# save last parameter if needed
if pdecl . active :
self . add_new_pdecl ( func , pdecl )
# add definition to list
func = self . normalize ( func )
if self . validate ( func ) :
self . definitions [ func [ " name " ] ] = func
2011-07-07 19:23:51 +08:00
self . sections_parsed + = 1
2011-07-06 01:29:54 +08:00
#self.print_info(func)
elif func :
self . print_info ( func , True )
2011-07-07 19:23:51 +08:00
pass
2011-07-06 01:29:54 +08:00
2011-07-07 19:23:51 +08:00
def parse_rst_file ( self , module_name , doc ) :
2011-07-06 01:29:54 +08:00
doc = os . path . abspath ( doc )
lineno = 0
2011-07-07 19:23:51 +08:00
whitespace_warnings = 0
max_whitespace_warnings = 10
2011-07-06 01:29:54 +08:00
lines = [ ]
flineno = 0
fname = " "
prev_line = None
df = open ( doc , " rt " )
for l in df . readlines ( ) :
lineno + = 1
2011-07-07 19:23:51 +08:00
# handle tabs
if l . find ( " \t " ) > = 0 :
whitespace_warnings + = 1
if whitespace_warnings < = max_whitespace_warnings :
print " RST parser warning: tab symbol instead of space is used at file %s (line %s ) " % ( doc , lineno )
l = l . replace ( " \t " , " " )
# handle first line
2011-07-06 01:29:54 +08:00
if prev_line == None :
prev_line = l . rstrip ( )
continue
2011-07-07 19:23:51 +08:00
2011-07-06 01:29:54 +08:00
ll = l . rstrip ( )
if len ( prev_line ) > 0 and len ( ll ) > = len ( prev_line ) and ll == " - " * len ( ll ) :
2011-07-07 19:23:51 +08:00
# new function candidate
2011-07-06 01:29:54 +08:00
if len ( lines ) > 1 :
2011-07-07 19:23:51 +08:00
self . parse_section_safe ( module_name , fname , doc , flineno , lines [ : len ( lines ) - 1 ] )
2011-07-06 01:29:54 +08:00
lines = [ ]
flineno = lineno - 1
fname = prev_line . strip ( )
elif flineno > 0 :
lines . append ( ll )
prev_line = ll
df . close ( )
2011-07-07 19:23:51 +08:00
# don't forget about the last function section in file!!!
2011-07-06 01:29:54 +08:00
if len ( lines ) > 1 :
2011-07-07 19:23:51 +08:00
self . parse_section_safe ( module_name , fname , doc , flineno , lines [ : len ( lines ) ] )
def parse_namespace ( self , func , section_name ) :
known_namespaces = [ " cv " , " gpu " , " flann " ]
l = section_name . strip ( )
for namespace in known_namespaces :
if l . startswith ( namespace + " :: " ) :
func [ " namespace " ] = namespace
return l [ len ( namespace ) + 2 : ]
return section_name
2011-07-06 01:29:54 +08:00
def add_new_fdecl ( self , func , decl ) :
decls = func . get ( " decls " , [ ] )
if ( decl . lang == " C++ " or decl . lang == " C " ) :
rst_decl = self . cpp_parser . parse_func_decl_no_wrap ( decl . fdecl )
decls . append ( ( decl . lang , decl . fdecl , rst_decl ) )
else :
decls . append ( ( decl . lang , decl . fdecl ) )
func [ " decls " ] = decls
def add_new_pdecl ( self , func , decl ) :
params = func . get ( " params " , { } )
if decl . name in params :
2011-07-07 19:23:51 +08:00
print " RST parser error: redefinition of parameter \" %s \" in \" %s \" File: %s (line %s ) " \
2011-07-06 01:29:54 +08:00
% ( decl . name , func [ " name " ] , func [ " file " ] , func [ " line " ] )
else :
params [ decl . name ] = decl . comment
func [ " params " ] = params
def print_info ( self , func , skipped = False ) :
2011-07-07 19:23:51 +08:00
print
2011-07-06 01:29:54 +08:00
if skipped :
print " SKIPPED DEFINITION: "
print " name: %s " % ( func . get ( " name " , " ~empty~ " ) )
print " file: %s (line %s ) " % ( func . get ( " file " , " ~empty~ " ) , func . get ( " line " , " ~empty~ " ) )
print " is class: %s " % func . get ( " isclass " , False )
print " is struct: %s " % func . get ( " isstruct " , False )
2011-07-07 19:23:51 +08:00
print " module: %s " % func . get ( " module " , " ~unknown~ " )
print " namespace: %s " % func . get ( " namespace " , " ~empty~ " )
2011-07-06 01:29:54 +08:00
print " class: %s " % ( func . get ( " class " , " ~empty~ " ) )
print " method: %s " % ( func . get ( " method " , " ~empty~ " ) )
print " brief: %s " % ( func . get ( " brief " , " ~empty~ " ) )
if " decls " in func :
print " declarations: "
for d in func [ " decls " ] :
2011-07-07 19:23:51 +08:00
print " %7s : %s " % ( d [ 0 ] , re . sub ( r " [ ]+ " , " " , d [ 1 ] ) )
2011-07-06 01:29:54 +08:00
if " params " in func :
print " parameters: "
for name , comment in func [ " params " ] . items ( ) :
print " %23s : %s " % ( name , comment )
if not skipped :
print " long: %s " % ( func . get ( " long " , " ~empty~ " ) )
2011-07-07 19:23:51 +08:00
print
2011-07-06 01:29:54 +08:00
def validate ( self , func ) :
if func . get ( " decls " , None ) is None :
2011-07-07 19:23:51 +08:00
if not func . get ( " isclass " , False ) and not func . get ( " isstruct " , False ) :
2011-07-06 01:29:54 +08:00
return False
if func [ " name " ] in self . definitions :
2011-07-07 19:23:51 +08:00
print " RST parser error: \" %s \" from file: %s (line %s ) is already documented in file: %s (line %s ) " \
2011-07-06 01:29:54 +08:00
% ( func [ " name " ] , func [ " file " ] , func [ " line " ] , self . definitions [ func [ " name " ] ] [ " file " ] , self . definitions [ func [ " name " ] ] [ " line " ] )
return False
#todo: validate parameter names
return True
def normalize ( self , func ) :
if not func :
return func
func [ " name " ] = self . normalizeText ( func [ " name " ] )
if " method " in func :
func [ " method " ] = self . normalizeText ( func [ " method " ] )
if " class " in func :
func [ " class " ] = self . normalizeText ( func [ " class " ] )
if " brief " in func :
func [ " brief " ] = self . normalizeText ( func . get ( " brief " , None ) )
if not func [ " brief " ] :
del func [ " brief " ]
if " long " in func :
func [ " long " ] = self . normalizeText ( func . get ( " long " , None ) )
if not func [ " long " ] :
del func [ " long " ]
if " decls " in func :
func [ " decls " ] . sort ( )
if " params " in func :
params = { }
for name , comment in func [ " params " ] . items ( ) :
cmt = self . normalizeText ( comment )
if cmt :
params [ name ] = cmt
func [ " params " ] = params
2011-07-07 19:23:51 +08:00
# special case for old C functions - section name should omit "cv" prefix
if not func . get ( " isclass " , False ) and not func . get ( " isstruct " , False ) :
self . fixOldCFunctionName ( func )
2011-07-06 01:29:54 +08:00
return func
2011-07-07 19:23:51 +08:00
def fixOldCFunctionName ( self , func ) :
if not " decls " in func :
return
fname = None
for decl in func [ " decls " ] :
if decl [ 0 ] != " C " and decl [ 0 ] != " Python1 " :
return
if decl [ 0 ] == " C " :
fname = decl [ 2 ] [ 0 ]
if fname is None :
return
fname = fname . replace ( " . " , " :: " )
if fname == " cv::cv " + func . get ( " name " , " " ) :
func [ " name " ] = fname [ 2 : ]
func [ " method " ] = fname [ 2 : ]
else :
print " RST parser warning: invalid definition of old C function \" %s \" - section name is \" %s \" instead of \" %s \" . File: %s (line %s ) " % ( fname , func [ " name " ] , fname [ 6 : ] , func [ " file " ] , func [ " line " ] )
# self.print_info(func)
2011-07-06 01:29:54 +08:00
def normalizeText ( self , s ) :
if s is None :
return s
# normalize line endings
s = re . sub ( r " \ r \ n " , " \n " , s )
# remove tailing ::
s = re . sub ( r " ::$ " , " \n " , s )
# remove extra line breaks before/after _ or ,
2011-07-07 19:23:51 +08:00
s = re . sub ( r " \ n[ ]*([_,]) \ n " , r " \ 1 " , s )
2011-07-06 01:29:54 +08:00
# remove extra line breaks after `
#s = re.sub(r"`\n", "` ", s)
# remove extra line breaks before *
s = re . sub ( r " \ n \ n \ * " , " \n \ * " , s )
# remove extra line breaks before #.
s = re . sub ( r " \ n \ n# \ . " , " \n #. " , s )
# remove extra line breaks after #.
s = re . sub ( r " \ n# \ . \ n " , " \n #. " , s )
# remove extra line breaks before `
2011-07-07 19:23:51 +08:00
s = re . sub ( r " \ n[ ]*` " , " ` " , s )
2011-07-06 01:29:54 +08:00
# remove trailing whitespaces
2011-07-07 19:23:51 +08:00
s = re . sub ( r " [ ]+$ " , " " , s )
2011-07-06 01:29:54 +08:00
# remove whitespace before .
2011-07-07 19:23:51 +08:00
s = re . sub ( r " [ ]+ \ . " , " \ . " , s )
2011-07-06 01:29:54 +08:00
# remove .. for references
s = re . sub ( r " \ . \ . \ [ " , " [ " , s )
# unescape
s = re . sub ( r " \\ (.) " , " \\ 1 " , s )
# compress whitespace
2011-07-07 19:23:51 +08:00
s = re . sub ( r " [ ]+ " , " " , s )
2011-07-06 01:29:54 +08:00
s = s . replace ( " ** " , " " )
s = s . replace ( " `` " , " \" " )
s = s . replace ( " ` " , " \" " )
s = s . replace ( " \" \" " , " \" " )
s = s . replace ( " :ocv:cfunc: " , " " )
s = s . replace ( " :math: " , " " )
s = s . replace ( " :ocv:class: " , " " )
s = s . replace ( " :ocv:func: " , " " )
s = s . replace ( " ]_ " , " ] " )
s = s . strip ( )
return s
if __name__ == " __main__ " :
if len ( sys . argv ) < 1 :
print " Usage: \n " , os . path . basename ( sys . argv [ 0 ] ) , " <module path> "
exit ( 0 )
rst_parser_dir = os . path . dirname ( os . path . abspath ( sys . argv [ 0 ] ) )
hdr_parser_path = os . path . join ( rst_parser_dir , " ../python/src2 " )
sys . path . append ( hdr_parser_path )
import hdr_parser
module = sys . argv [ 1 ]
2011-07-07 19:23:51 +08:00
if module != " all " and not os . path . isdir ( os . path . join ( rst_parser_dir , " ../ " + module ) ) :
2011-07-06 01:29:54 +08:00
print " Module \" " + module + " \" could not be found. "
exit ( 1 )
parser = RstParser ( hdr_parser . CppHeaderParser ( ) )
2011-07-07 19:23:51 +08:00
if module == " all " :
for m in [ " androidcamera " , " calib3d " , " contrib " , " core " , " features2d " , " flann " , " gpu " , " haartraining " , " highgui " , " imgproc " , " java " , " legacy " , " ml " , " objdetect " , " ocl " , " python " , " stitching " , " traincascade " , " ts " , " video " ] :
parser . parse ( m , os . path . join ( rst_parser_dir , " ../ " + m ) )
else :
parser . parse ( module , os . path . join ( rst_parser_dir , " ../ " + module ) )
# summary
print
print " RST Parser Summary: "
print " Total sections: %s " % parser . sections_total
print " Skipped sections: %s " % parser . sections_skipped
print " Parsed sections: %s " % parser . sections_parsed
print " Invalid sections: %s " % ( parser . sections_total - parser . sections_parsed - parser . sections_skipped )
# statistic by language
stat = { }
classes = 0
structs = 0
for name , d in parser . definitions . items ( ) :
if d . get ( " isclass " , False ) :
classes + = 1
elif d . get ( " isstruct " , False ) :
structs + = 1
else :
for decl in d . get ( " decls " , [ ] ) :
stat [ decl [ 0 ] ] = stat . get ( decl [ 0 ] , 0 ) + 1
print
print " classes documented: %s " % classes
print " structs documented: %s " % structs
for lang in sorted ( stat . items ( ) ) :
print " %7s functions documented: %s " % lang
2011-07-06 01:29:54 +08:00