2018-12-18 18:40:04 +08:00
# Import required modules
import cv2 as cv
import math
import argparse
############ Add argument parser for command line arguments ############
parser = argparse . ArgumentParser ( description = ' Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2) ' )
parser . add_argument ( ' --input ' , help = ' Path to input image or video file. Skip this argument to capture frames from a camera. ' )
parser . add_argument ( ' --model ' , required = True ,
help = ' Path to a binary .pb file of model contains trained weights. ' )
parser . add_argument ( ' --width ' , type = int , default = 320 ,
help = ' Preprocess input image by resizing to a specific width. It should be multiple by 32. ' )
parser . add_argument ( ' --height ' , type = int , default = 320 ,
help = ' Preprocess input image by resizing to a specific height. It should be multiple by 32. ' )
parser . add_argument ( ' --thr ' , type = float , default = 0.5 ,
help = ' Confidence threshold. ' )
parser . add_argument ( ' --nms ' , type = float , default = 0.4 ,
help = ' Non-maximum suppression threshold. ' )
args = parser . parse_args ( )
############ Utility functions ############
def decode ( scores , geometry , scoreThresh ) :
detections = [ ]
confidences = [ ]
############ CHECK DIMENSIONS AND SHAPES OF geometry AND scores ############
assert len ( scores . shape ) == 4 , " Incorrect dimensions of scores "
assert len ( geometry . shape ) == 4 , " Incorrect dimensions of geometry "
assert scores . shape [ 0 ] == 1 , " Invalid dimensions of scores "
assert geometry . shape [ 0 ] == 1 , " Invalid dimensions of geometry "
assert scores . shape [ 1 ] == 1 , " Invalid dimensions of scores "
assert geometry . shape [ 1 ] == 5 , " Invalid dimensions of geometry "
assert scores . shape [ 2 ] == geometry . shape [ 2 ] , " Invalid dimensions of scores and geometry "
assert scores . shape [ 3 ] == geometry . shape [ 3 ] , " Invalid dimensions of scores and geometry "
height = scores . shape [ 2 ]
width = scores . shape [ 3 ]
for y in range ( 0 , height ) :
# Extract data from scores
scoresData = scores [ 0 ] [ 0 ] [ y ]
x0_data = geometry [ 0 ] [ 0 ] [ y ]
x1_data = geometry [ 0 ] [ 1 ] [ y ]
x2_data = geometry [ 0 ] [ 2 ] [ y ]
x3_data = geometry [ 0 ] [ 3 ] [ y ]
anglesData = geometry [ 0 ] [ 4 ] [ y ]
for x in range ( 0 , width ) :
score = scoresData [ x ]
# If score is lower than threshold score, move to next x
if ( score < scoreThresh ) :
continue
# Calculate offset
offsetX = x * 4.0
offsetY = y * 4.0
angle = anglesData [ x ]
# Calculate cos and sin of angle
cosA = math . cos ( angle )
sinA = math . sin ( angle )
h = x0_data [ x ] + x2_data [ x ]
w = x1_data [ x ] + x3_data [ x ]
# Calculate offset
offset = ( [ offsetX + cosA * x1_data [ x ] + sinA * x2_data [ x ] , offsetY - sinA * x1_data [ x ] + cosA * x2_data [ x ] ] )
# Find points for rectangle
p1 = ( - sinA * h + offset [ 0 ] , - cosA * h + offset [ 1 ] )
p3 = ( - cosA * w + offset [ 0 ] , sinA * w + offset [ 1 ] )
center = ( 0.5 * ( p1 [ 0 ] + p3 [ 0 ] ) , 0.5 * ( p1 [ 1 ] + p3 [ 1 ] ) )
detections . append ( ( center , ( w , h ) , - 1 * angle * 180.0 / math . pi ) )
confidences . append ( float ( score ) )
# Return detections and confidences
return [ detections , confidences ]
def main ( ) :
# Read and store arguments
confThreshold = args . thr
nmsThreshold = args . nms
inpWidth = args . width
inpHeight = args . height
model = args . model
# Load network
net = cv . dnn . readNet ( model )
# Create a new named window
kWinName = " EAST: An Efficient and Accurate Scene Text Detector "
cv . namedWindow ( kWinName , cv . WINDOW_NORMAL )
outNames = [ ]
outNames . append ( " feature_fusion/Conv_7/Sigmoid " )
outNames . append ( " feature_fusion/concat_3 " )
# Open a video file or an image file or a camera stream
cap = cv . VideoCapture ( args . input if args . input else 0 )
while cv . waitKey ( 1 ) < 0 :
# Read frame
hasFrame , frame = cap . read ( )
if not hasFrame :
cv . waitKey ( )
break
# Get frame height and width
height_ = frame . shape [ 0 ]
width_ = frame . shape [ 1 ]
rW = width_ / float ( inpWidth )
rH = height_ / float ( inpHeight )
# Create a 4D blob from frame.
blob = cv . dnn . blobFromImage ( frame , 1.0 , ( inpWidth , inpHeight ) , ( 123.68 , 116.78 , 103.94 ) , True , False )
# Run the model
net . setInput ( blob )
outs = net . forward ( outNames )
t , _ = net . getPerfProfile ( )
label = ' Inference time: %.2f ms ' % ( t * 1000.0 / cv . getTickFrequency ( ) )
# Get scores and geometry
scores = outs [ 0 ]
geometry = outs [ 1 ]
[ boxes , confidences ] = decode ( scores , geometry , confThreshold )
# Apply NMS
indices = cv . dnn . NMSBoxesRotated ( boxes , confidences , confThreshold , nmsThreshold )
for i in indices :
# get 4 corners of the rotated rect
vertices = cv . boxPoints ( boxes [ i [ 0 ] ] )
# scale the bounding box coordinates based on the respective ratios
for j in range ( 4 ) :
vertices [ j ] [ 0 ] * = rW
vertices [ j ] [ 1 ] * = rH
for j in range ( 4 ) :
p1 = ( vertices [ j ] [ 0 ] , vertices [ j ] [ 1 ] )
p2 = ( vertices [ ( j + 1 ) % 4 ] [ 0 ] , vertices [ ( j + 1 ) % 4 ] [ 1 ] )
2019-10-16 23:49:33 +08:00
cv . line ( frame , p1 , p2 , ( 0 , 255 , 0 ) , 1 )
2018-12-18 18:40:04 +08:00
# Put efficiency information
cv . putText ( frame , label , ( 0 , 15 ) , cv . FONT_HERSHEY_SIMPLEX , 0.5 , ( 0 , 255 , 0 ) )
# Display the frame
cv . imshow ( kWinName , frame )
if __name__ == " __main__ " :
main ( )