2019-03-30 16:12:32 +08:00
#!/usr/bin/env python3
"""
Author : Bryan Gillespie
A massively parallel gcov wrapper for generating intermediate coverage formats fast
The goal of fastcov is to generate code coverage intermediate formats as fast as possible
( ideally < 1 second ) , even for large projects with hundreds of gcda objects . The intermediate
formats may then be consumed by a report generator such as lcov ' s genhtml, or a dedicated front
end such as coveralls .
Sample Usage :
$ cd build_dir
$ . / fastcov . py - - zerocounters
$ < run unit tests >
2019-04-04 15:50:27 +08:00
$ . / fastcov . py - - exclude / usr / include test / - - lcov - o report . info
2019-03-30 16:12:32 +08:00
$ genhtml - o code_coverage report . info
"""
import re
import os
import sys
import glob
import json
2019-04-06 06:24:19 +08:00
import time
2019-03-30 16:12:32 +08:00
import argparse
import threading
import subprocess
import multiprocessing
MINIMUM_GCOV = ( 9 , 0 , 0 )
2019-04-06 06:24:19 +08:00
MINIMUM_CHUNK_SIZE = 5
2019-03-30 16:12:32 +08:00
# Interesting metrics
2019-04-06 06:24:19 +08:00
START_TIME = time . time ( )
2019-03-30 16:12:32 +08:00
GCOVS_TOTAL = [ ]
GCOVS_SKIPPED = [ ]
def chunks ( l , n ) :
""" Yield successive n-sized chunks from l. """
for i in range ( 0 , len ( l ) , n ) :
yield l [ i : i + n ]
2019-04-06 06:24:19 +08:00
def stopwatch ( ) :
""" Return number of seconds since last time this was called """
global START_TIME
end_time = time . time ( )
delta = end_time - START_TIME
START_TIME = end_time
return delta
def parseVersionFromLine ( version_str ) :
""" Given a string containing a dotted integer version, parse out integers and return as tuple """
version = re . search ( r ' ( \ d+ \ . \ d+ \ . \ d+)[^ \ .] ' , version_str )
if not version :
return ( 0 , 0 , 0 )
return tuple ( map ( int , version . group ( 1 ) . split ( " . " ) ) )
2019-03-30 16:12:32 +08:00
def getGcovVersion ( gcov ) :
p = subprocess . Popen ( [ gcov , " -v " ] , stdout = subprocess . PIPE )
output = p . communicate ( ) [ 0 ] . decode ( ' UTF-8 ' )
p . wait ( )
2019-04-06 06:24:19 +08:00
return parseVersionFromLine ( output . split ( " \n " ) [ 0 ] )
2019-03-30 16:12:32 +08:00
def removeFiles ( files ) :
for file in files :
os . remove ( file )
def getFilteredGcdaFiles ( gcda_files , exclude ) :
def excludeGcda ( gcda ) :
for ex in exclude :
if ex in gcda :
return False
return True
return list ( filter ( excludeGcda , gcda_files ) )
def getGcdaFiles ( cwd , gcda_files ) :
if not gcda_files :
2019-04-05 13:54:46 +08:00
gcda_files = glob . glob ( os . path . join ( os . path . abspath ( cwd ) , " **/*.gcda " ) , recursive = True )
2019-03-30 16:12:32 +08:00
return gcda_files
2019-04-04 15:50:27 +08:00
def gcovWorker ( cwd , gcov , files , chunk , gcov_filter_options , branch_coverage ) :
gcov_args = " -it "
if branch_coverage :
gcov_args + = " b "
p = subprocess . Popen ( [ gcov , gcov_args ] + chunk , cwd = cwd , stdout = subprocess . PIPE , stderr = subprocess . DEVNULL )
2019-03-30 16:12:32 +08:00
for line in iter ( p . stdout . readline , b ' ' ) :
intermediate_json = json . loads ( line . decode ( sys . stdout . encoding ) )
2019-04-06 06:24:19 +08:00
intermediate_json_files = processGcovs ( cwd , intermediate_json [ " files " ] , gcov_filter_options )
2019-03-30 16:12:32 +08:00
for f in intermediate_json_files :
files . append ( f ) #thread safe, there might be a better way to do this though
GCOVS_TOTAL . append ( len ( intermediate_json [ " files " ] ) )
GCOVS_SKIPPED . append ( len ( intermediate_json [ " files " ] ) - len ( intermediate_json_files ) )
p . wait ( )
2019-04-04 15:50:27 +08:00
def processGcdas ( cwd , gcov , jobs , gcda_files , gcov_filter_options , branch_coverage ) :
2019-03-30 16:12:32 +08:00
chunk_size = max ( MINIMUM_CHUNK_SIZE , int ( len ( gcda_files ) / jobs ) + 1 )
threads = [ ]
intermediate_json_files = [ ]
for chunk in chunks ( gcda_files , chunk_size ) :
2019-04-04 15:50:27 +08:00
t = threading . Thread ( target = gcovWorker , args = ( cwd , gcov , intermediate_json_files , chunk , gcov_filter_options , branch_coverage ) )
2019-03-30 16:12:32 +08:00
threads . append ( t )
t . start ( )
2019-04-06 06:24:19 +08:00
log ( " Spawned %d gcov threads, each processing at most %d gcda files " % ( len ( threads ) , chunk_size ) )
2019-03-30 16:12:32 +08:00
for t in threads :
t . join ( )
return intermediate_json_files
2019-04-06 06:24:19 +08:00
def processGcov ( cwd , gcov , files , gcov_filter_options ) :
# Add absolute path
gcov [ " file_abs " ] = os . path . abspath ( os . path . join ( cwd , gcov [ " file " ] ) )
2019-04-04 15:50:27 +08:00
# If explicit sources were passed, check for match
if gcov_filter_options [ " sources " ] :
2019-04-06 06:24:19 +08:00
if gcov [ " file_abs " ] in gcov_filter_options [ " sources " ] :
2019-04-04 15:50:27 +08:00
files . append ( gcov )
return
# Check include filter
if gcov_filter_options [ " include " ] :
for ex in gcov_filter_options [ " include " ] :
if ex in gcov [ " file " ] :
files . append ( gcov )
break
return
# Check exclude filter
for ex in gcov_filter_options [ " exclude " ] :
2019-03-30 16:12:32 +08:00
if ex in gcov [ " file " ] :
return
2019-04-04 15:50:27 +08:00
2019-03-30 16:12:32 +08:00
files . append ( gcov )
2019-04-06 06:24:19 +08:00
def processGcovs ( cwd , gcov_files , gcov_filter_options ) :
2019-03-30 16:12:32 +08:00
files = [ ]
for gcov in gcov_files :
2019-04-06 06:24:19 +08:00
processGcov ( cwd , gcov , files , gcov_filter_options )
2019-03-30 16:12:32 +08:00
return files
2019-04-06 06:24:19 +08:00
def dumpBranchCoverageToLcovInfo ( f , branches ) :
2019-04-04 15:50:27 +08:00
branch_miss = 0
2019-04-06 06:24:19 +08:00
for line_num , branch_counts in branches . items ( ) :
for i , count in enumerate ( branch_counts ) :
2019-04-04 15:50:27 +08:00
#Branch (<line number>, <block number>, <branch number>, <taken>)
2019-04-06 06:24:19 +08:00
f . write ( " BRDA: %s , %d , %d , %d \n " % ( line_num , int ( i / 2 ) , i , count ) )
branch_miss + = int ( count == 0 )
f . write ( " BRF: %d \n " % len ( branches ) ) #Branches Found
f . write ( " BRH: %d \n " % ( len ( branches ) - branch_miss ) ) #Branches Hit
2019-04-04 15:50:27 +08:00
2019-04-06 06:24:19 +08:00
def dumpToLcovInfo ( fastcov_json , output ) :
2019-03-30 16:12:32 +08:00
with open ( output , " w " ) as f :
2019-04-06 06:24:19 +08:00
for sf , data in fastcov_json [ " sources " ] . items ( ) :
2019-04-04 15:50:27 +08:00
f . write ( " SF: %s \n " % sf ) #Source File
2019-03-30 16:12:32 +08:00
fn_miss = 0
2019-04-06 06:24:19 +08:00
for function , fdata in data [ " functions " ] . items ( ) :
f . write ( " FN: %d , %s \n " % ( fdata [ " start_line " ] , function ) ) #Function Start Line
f . write ( " FNDA: %d , %s \n " % ( fdata [ " execution_count " ] , function ) ) #Function Hits
fn_miss + = int ( fdata [ " execution_count " ] == 0 )
f . write ( " FNF: %d \n " % len ( data [ " functions " ] ) ) #Functions Found
f . write ( " FNH: %d \n " % ( len ( data [ " functions " ] ) - fn_miss ) ) #Functions Hit
2019-04-04 15:50:27 +08:00
2019-04-06 06:24:19 +08:00
if data [ " branches " ] :
dumpBranchCoverageToLcovInfo ( f , data [ " branches " ] )
2019-04-04 15:50:27 +08:00
2019-03-30 16:12:32 +08:00
line_miss = 0
2019-04-06 06:24:19 +08:00
for line_num , count in data [ " lines " ] . items ( ) :
f . write ( " DA: %s , %d \n " % ( line_num , count ) ) #Line
line_miss + = int ( count == 0 )
f . write ( " LF: %d \n " % len ( data [ " lines " ] ) ) #Lines Found
f . write ( " LH: %d \n " % ( len ( data [ " lines " ] ) - line_miss ) ) #Lines Hit
2019-03-30 16:12:32 +08:00
f . write ( " end_of_record \n " )
2019-04-06 06:24:19 +08:00
def exclMarkerWorker ( fastcov_sources , chunk ) :
for source in chunk :
# If there are no covered lines, skip
if not fastcov_sources [ source ] [ " lines " ] :
continue
start_line = 0
end_line = 0
with open ( source ) as f :
for i , line in enumerate ( f , 1 ) : #Start enumeration at line 1
if not " LCOV_EXCL " in line :
continue
if " LCOV_EXCL_LINE " in line :
if str ( i ) in fastcov_sources [ source ] [ " lines " ] :
del fastcov_sources [ source ] [ " lines " ] [ str ( i ) ]
if str ( i ) in fastcov_sources [ source ] [ " branches " ] :
del fastcov_sources [ source ] [ " branches " ] [ str ( i ) ]
elif " LCOV_EXCL_START " in line :
start_line = i
elif " LCOV_EXCL_STOP " in line :
end_line = i
if not start_line :
end_line = 0
continue
for key in [ " lines " , " branches " ] :
for line_num in list ( fastcov_sources [ source ] [ key ] . keys ( ) ) :
if int ( line_num ) < = end_line and int ( line_num ) > = start_line :
del fastcov_sources [ source ] [ key ] [ line_num ]
start_line = end_line = 0
def scanExclusionMarkers ( fastcov_json , jobs ) :
chunk_size = max ( MINIMUM_CHUNK_SIZE , int ( len ( fastcov_json [ " sources " ] ) / jobs ) + 1 )
threads = [ ]
for chunk in chunks ( list ( fastcov_json [ " sources " ] . keys ( ) ) , chunk_size ) :
t = threading . Thread ( target = exclMarkerWorker , args = ( fastcov_json [ " sources " ] , chunk ) )
threads . append ( t )
t . start ( )
log ( " Spawned %d threads each scanning at most %d source files " % ( len ( threads ) , chunk_size ) )
for t in threads :
t . join ( )
def distillFunction ( function_raw , functions ) :
function_name = function_raw [ " name " ]
if function_name not in functions :
functions [ function_name ] = {
" start_line " : function_raw [ " start_line " ] ,
" execution_count " : function_raw [ " execution_count " ]
}
else :
functions [ function_name ] [ " execution_count " ] + = function_raw [ " execution_count " ]
def distillLine ( line_raw , lines , branches ) :
line_number = str ( line_raw [ " line_number " ] )
if line_number not in lines :
lines [ line_number ] = line_raw [ " count " ]
else :
lines [ line_number ] + = line_raw [ " count " ]
for i , branch in enumerate ( line_raw [ " branches " ] ) :
if line_number not in branches :
branches [ line_number ] = [ ]
blen = len ( branches [ line_number ] )
glen = len ( line_raw [ " branches " ] )
if blen < glen :
branches [ line_number ] + = [ 0 ] * ( glen - blen )
branches [ line_number ] [ i ] + = branch [ " count " ]
def distillSource ( source_raw , sources ) :
source_name = source_raw [ " file_abs " ]
if source_name not in sources :
sources [ source_name ] = {
" functions " : { } ,
" branches " : { } ,
" lines " : { } ,
}
for function in source_raw [ " functions " ] :
distillFunction ( function , sources [ source_name ] [ " functions " ] )
for line in source_raw [ " lines " ] :
distillLine ( line , sources [ source_name ] [ " lines " ] , sources [ source_name ] [ " branches " ] )
def distillReport ( report_raw ) :
report_json = {
" sources " : { }
}
for source in report_raw :
distillSource ( source , report_json [ " sources " ] )
return report_json
def dumpToJson ( intermediate , output ) :
2019-03-30 16:12:32 +08:00
with open ( output , " w " ) as f :
json . dump ( intermediate , f )
def log ( line ) :
if not args . quiet :
2019-04-06 06:24:19 +08:00
print ( " [ {:.3f} s] {} " . format ( stopwatch ( ) , line ) )
2019-03-30 16:12:32 +08:00
2019-04-04 15:50:27 +08:00
def getGcovFilterOptions ( args ) :
return {
2019-04-06 06:24:19 +08:00
" sources " : set ( [ os . path . abspath ( s ) for s in args . sources ] ) , #Make paths absolute, use set for fast lookups
2019-04-04 15:50:27 +08:00
" include " : args . includepost ,
" exclude " : args . excludepost ,
}
2019-03-30 16:12:32 +08:00
def main ( args ) :
# Need at least gcov 9.0.0 because that's when gcov JSON and stdout streaming was introduced
current_gcov_version = getGcovVersion ( args . gcov )
if current_gcov_version < MINIMUM_GCOV :
sys . stderr . write ( " Minimum gcov version {} required, found {} \n " . format ( " . " . join ( map ( str , MINIMUM_GCOV ) ) , " . " . join ( map ( str , current_gcov_version ) ) ) )
exit ( 1 )
2019-04-06 06:24:19 +08:00
# Get list of gcda files to process
2019-03-30 16:12:32 +08:00
gcda_files = getGcdaFiles ( args . directory , args . gcda_files )
2019-04-06 06:24:19 +08:00
log ( " Found {} .gcda files " . format ( len ( gcda_files ) ) )
2019-03-30 16:12:32 +08:00
2019-04-06 06:24:19 +08:00
# If gcda filtering is enabled, filter them out now
2019-03-30 16:12:32 +08:00
if args . excludepre :
gcda_files = getFilteredGcdaFiles ( gcda_files , args . excludepre )
2019-04-06 06:24:19 +08:00
log ( " {} .gcda files after filtering " . format ( len ( gcda_files ) ) )
2019-03-30 16:12:32 +08:00
# We "zero" the "counters" by simply deleting all gcda files
if args . zerocounters :
removeFiles ( gcda_files )
2019-04-06 06:24:19 +08:00
log ( " {} .gcda files removed " . format ( len ( gcda_files ) ) )
2019-03-30 16:12:32 +08:00
return
2019-04-06 06:24:19 +08:00
# Fire up one gcov per cpu and start processing gcdas
2019-04-04 15:50:27 +08:00
gcov_filter_options = getGcovFilterOptions ( args )
intermediate_json_files = processGcdas ( args . cdirectory , args . gcov , args . jobs , gcda_files , gcov_filter_options , args . branchcoverage )
2019-03-30 16:12:32 +08:00
2019-04-06 06:24:19 +08:00
# Summarize processing results
2019-03-30 16:12:32 +08:00
gcov_total = sum ( GCOVS_TOTAL )
gcov_skipped = sum ( GCOVS_SKIPPED )
2019-04-06 06:24:19 +08:00
log ( " Processed {} .gcov files ( {} total, {} skipped) " . format ( gcov_total - gcov_skipped , gcov_total , gcov_skipped ) )
# Distill all the extraneous info gcov gives us down to the core report
fastcov_json = distillReport ( intermediate_json_files )
log ( " Aggregated raw gcov JSON into fastcov JSON report " )
2019-03-30 16:12:32 +08:00
2019-04-06 06:24:19 +08:00
# Dump to desired file format
2019-03-30 16:12:32 +08:00
if args . lcov :
2019-04-06 06:24:19 +08:00
scanExclusionMarkers ( fastcov_json , args . jobs )
log ( " Scanned {} source files for exclusion markers " . format ( len ( fastcov_json [ " sources " ] ) ) )
dumpToLcovInfo ( fastcov_json , args . output )
log ( " Created lcov info file ' {} ' " . format ( args . output ) )
elif args . gcov_raw :
dumpToJson ( intermediate_json_files , args . output )
log ( " Created gcov raw json file ' {} ' " . format ( args . output ) )
2019-03-30 16:12:32 +08:00
else :
2019-04-06 06:24:19 +08:00
dumpToJson ( fastcov_json , args . output )
log ( " Created fastcov json file ' {} ' " . format ( args . output ) )
2019-03-30 16:12:32 +08:00
if __name__ == ' __main__ ' :
parser = argparse . ArgumentParser ( description = ' A parallel gcov wrapper for fast coverage report generation ' )
parser . add_argument ( ' -z ' , ' --zerocounters ' , dest = ' zerocounters ' , action = " store_true " , help = ' Recursively delete all gcda files ' )
2019-04-04 15:50:27 +08:00
# Enable Branch Coverage
parser . add_argument ( ' -b ' , ' --branch-coverage ' , dest = ' branchcoverage ' , action = " store_true " , help = ' Include branch counts in the coverage report ' )
# Filtering Options
2019-04-06 06:24:19 +08:00
parser . add_argument ( ' -s ' , ' --source-files ' , dest = ' sources ' , nargs = " + " , metavar = ' ' , default = [ ] , help = ' Filter: Specify exactly which source files should be included in the final report. Paths must be either absolute or relative to current directory. ' )
parser . add_argument ( ' -e ' , ' --exclude ' , dest = ' excludepost ' , nargs = " + " , metavar = ' ' , default = [ ] , help = ' Filter: Exclude source files from final report if they contain one of the provided substrings (i.e. /usr/include test/, etc.) ' )
parser . add_argument ( ' -i ' , ' --include ' , dest = ' includepost ' , nargs = " + " , metavar = ' ' , default = [ ] , help = ' Filter: Only include source files in final report that contain one of the provided substrings (i.e. src/ etc.) ' )
parser . add_argument ( ' -f ' , ' --gcda-files ' , dest = ' gcda_files ' , nargs = " + " , metavar = ' ' , default = [ ] , help = ' Filter: Specify exactly which gcda files should be processed instead of recursively searching the search directory. ' )
parser . add_argument ( ' -E ' , ' --exclude-gcda ' , dest = ' excludepre ' , nargs = " + " , metavar = ' ' , default = [ ] , help = ' Filter: Exclude gcda files from being processed via simple find matching (not regex) ' )
2019-03-30 16:12:32 +08:00
2019-04-04 15:50:27 +08:00
parser . add_argument ( ' -g ' , ' --gcov ' , dest = ' gcov ' , default = ' gcov ' , help = ' Which gcov binary to use ' )
2019-03-30 16:12:32 +08:00
parser . add_argument ( ' -d ' , ' --search-directory ' , dest = ' directory ' , default = " . " , help = ' Base directory to recursively search for gcda files (default: .) ' )
2019-04-06 06:24:19 +08:00
parser . add_argument ( ' -c ' , ' --compiler-directory ' , dest = ' cdirectory ' , default = " . " , help = ' Base directory compiler was invoked from (default: .) \
This needs to be set if invoking fastcov from somewhere other than the base compiler directory . ' )
2019-03-30 16:12:32 +08:00
parser . add_argument ( ' -j ' , ' --jobs ' , dest = ' jobs ' , type = int , default = multiprocessing . cpu_count ( ) , help = ' Number of parallel gcov to spawn (default: %d ). ' % multiprocessing . cpu_count ( ) )
2019-04-06 06:24:19 +08:00
parser . add_argument ( ' -m ' , ' --minimum-chunk-size ' , dest = ' minimum_chunk ' , type = int , default = 5 , help = ' Minimum number of files a thread should process (default: 5). \
If you have only 4 gcda files but they are monstrously huge , you could change this value to a 1 so that each thread will only process 1 gcda . Otherise fastcov will spawn only 1 thread to process all of them . ' )
2019-03-30 16:12:32 +08:00
2019-04-06 06:24:19 +08:00
parser . add_argument ( ' -l ' , ' --lcov ' , dest = ' lcov ' , action = " store_true " , help = ' Output in lcov info format instead of fastcov json ' )
parser . add_argument ( ' -r ' , ' --gcov-raw ' , dest = ' gcov_raw ' , action = " store_true " , help = ' Output in gcov raw json instead of fastcov json ' )
parser . add_argument ( ' -o ' , ' --output ' , dest = ' output ' , default = " coverage.json " , help = ' Name of output file (default: coverage.json) ' )
2019-03-30 16:12:32 +08:00
parser . add_argument ( ' -q ' , ' --quiet ' , dest = ' quiet ' , action = " store_true " , help = ' Suppress output to stdout ' )
2019-04-06 06:24:19 +08:00
2019-03-30 16:12:32 +08:00
args = parser . parse_args ( )
main ( args )