mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 02:59:07 +08:00
Add RISC-V V support (#4346)
Convert riscv-v-spec-1.0.pdf into 111 PNG images, then perform OCR on each one in sequence, and measure the testing time on banana_f3: old: 31m16.267s new: 16m51.155s Co-authored-by: sunyuechi <sunyuechi@iscas.ac.cn> Co-authored-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
d7c0a05ffa
commit
16fc9d90a4
@ -199,6 +199,15 @@ libtesseract_la_LIBADD += libtesseract_neon.la
|
|||||||
noinst_LTLIBRARIES += libtesseract_neon.la
|
noinst_LTLIBRARIES += libtesseract_neon.la
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
if HAVE_RVV
|
||||||
|
libtesseract_rvv_la_CXXFLAGS = $(RVV_CXXFLAGS)
|
||||||
|
libtesseract_rvv_la_CXXFLAGS += -O3
|
||||||
|
libtesseract_rvv_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
|
||||||
|
libtesseract_rvv_la_SOURCES = src/arch/intsimdmatrixrvv.cpp
|
||||||
|
libtesseract_la_LIBADD += libtesseract_rvv.la
|
||||||
|
noinst_LTLIBRARIES += libtesseract_rvv.la
|
||||||
|
endif
|
||||||
|
|
||||||
libtesseract_la_SOURCES += src/arch/intsimdmatrix.cpp
|
libtesseract_la_SOURCES += src/arch/intsimdmatrix.cpp
|
||||||
libtesseract_la_SOURCES += src/arch/simddetect.cpp
|
libtesseract_la_SOURCES += src/arch/simddetect.cpp
|
||||||
|
|
||||||
|
21
configure.ac
21
configure.ac
@ -131,6 +131,7 @@ AM_CONDITIONAL([HAVE_AVX512F], false)
|
|||||||
AM_CONDITIONAL([HAVE_FMA], false)
|
AM_CONDITIONAL([HAVE_FMA], false)
|
||||||
AM_CONDITIONAL([HAVE_SSE4_1], false)
|
AM_CONDITIONAL([HAVE_SSE4_1], false)
|
||||||
AM_CONDITIONAL([HAVE_NEON], false)
|
AM_CONDITIONAL([HAVE_NEON], false)
|
||||||
|
AM_CONDITIONAL([HAVE_RVV], false)
|
||||||
|
|
||||||
case "${host_cpu}" in
|
case "${host_cpu}" in
|
||||||
|
|
||||||
@ -188,6 +189,16 @@ case "${host_cpu}" in
|
|||||||
|
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
riscv*)
|
||||||
|
|
||||||
|
AX_CHECK_COMPILE_FLAG([-march=rv64gcv], [rvv=true], [rvv=false], [$WERROR])
|
||||||
|
AM_CONDITIONAL([HAVE_RVV], [$rvv])
|
||||||
|
if $rvv; then
|
||||||
|
AC_DEFINE([HAVE_RVV], [1], [Enable RVV instructions])
|
||||||
|
check_for_rvv=1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
*)
|
*)
|
||||||
|
|
||||||
AC_MSG_WARN([No compiler options for $host_cpu])
|
AC_MSG_WARN([No compiler options for $host_cpu])
|
||||||
@ -207,6 +218,16 @@ if test x$check_for_neon = x1; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# additional checks for RVV targets
|
||||||
|
if test x$check_for_rvv = x1; then
|
||||||
|
AC_MSG_NOTICE([checking how to detect RVV availability])
|
||||||
|
AC_CHECK_FUNCS([getauxval])
|
||||||
|
|
||||||
|
if test $ac_cv_func_getauxval = no; then
|
||||||
|
AC_MSG_WARN([RVV is available, but we don't know how to check for it. Will not be able to use RVV.])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
AX_CHECK_COMPILE_FLAG([-fopenmp-simd], [openmp_simd=true], [openmp_simd=false], [$WERROR])
|
AX_CHECK_COMPILE_FLAG([-fopenmp-simd], [openmp_simd=true], [openmp_simd=false], [$WERROR])
|
||||||
AM_CONDITIONAL([OPENMP_SIMD], $openmp_simd)
|
AM_CONDITIONAL([OPENMP_SIMD], $openmp_simd)
|
||||||
|
|
||||||
|
@ -115,6 +115,8 @@ struct TESS_API IntSimdMatrix {
|
|||||||
static const IntSimdMatrix *intSimdMatrix;
|
static const IntSimdMatrix *intSimdMatrix;
|
||||||
// Only available with NEON.
|
// Only available with NEON.
|
||||||
static const IntSimdMatrix intSimdMatrixNEON;
|
static const IntSimdMatrix intSimdMatrixNEON;
|
||||||
|
// Only available with RVV.
|
||||||
|
static const IntSimdMatrix intSimdMatrixRVV;
|
||||||
// Only available with AVX2 / AVX / FMA / SSE.
|
// Only available with AVX2 / AVX / FMA / SSE.
|
||||||
static const IntSimdMatrix intSimdMatrixAVX2;
|
static const IntSimdMatrix intSimdMatrixAVX2;
|
||||||
static const IntSimdMatrix intSimdMatrixSSE;
|
static const IntSimdMatrix intSimdMatrixSSE;
|
||||||
|
88
src/arch/intsimdmatrixrvv.cpp
Normal file
88
src/arch/intsimdmatrixrvv.cpp
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// File: intsimdmatrixrvv.cpp
|
||||||
|
// Description: matrix-vector product for 8-bit data on rvv.
|
||||||
|
// Author: sunyuechi
|
||||||
|
//
|
||||||
|
// Copyright (c) 2024 Institute of Software Chinese Academy of Sciences (ISCAS).
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
# include "config_auto.h" // for HAVE_RVV, ...
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
# include "intsimdmatrix.h"
|
||||||
|
# include "tesstypes.h"
|
||||||
|
|
||||||
|
namespace tesseract {
|
||||||
|
|
||||||
|
static int DotProduct(const int8_t *u, const int8_t *v, int num) {
|
||||||
|
int total = 0;
|
||||||
|
|
||||||
|
asm __volatile__ (
|
||||||
|
" .option arch, +v \n\t"
|
||||||
|
" vsetvli t0,zero,e32,m8,ta,ma \n\t"
|
||||||
|
" vmv.v.i v0,0 \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
" vsetvli t0,%[num],e8,m2,ta,ma \n\t"
|
||||||
|
" vle8.v v16,0(%[u]) \n\t"
|
||||||
|
" vle8.v v24,0(%[v]) \n\t"
|
||||||
|
" sub %[num],%[num],t0 \n\t"
|
||||||
|
" vwmul.vv v8,v24,v16 \n\t"
|
||||||
|
" add %[u],%[u],t0 \n\t"
|
||||||
|
" add %[v],%[v],t0 \n\t"
|
||||||
|
" vsetvli zero,zero,e16,m4,tu,ma \n\t"
|
||||||
|
" vwadd.wv v0,v0,v8 \n\t"
|
||||||
|
" bnez %[num],1b \n\t"
|
||||||
|
" vsetvli t0,zero,e32,m8,ta,ma \n\t"
|
||||||
|
" vmv.s.x v8,zero \n\t"
|
||||||
|
" vredsum.vs v0,v0,v8 \n\t"
|
||||||
|
" vmv.x.s %[total],v0 \n\t"
|
||||||
|
: [u] "+r" (u),
|
||||||
|
[v] "+r" (v),
|
||||||
|
[num] "+r" (num),
|
||||||
|
[total] "+r" (total)
|
||||||
|
:
|
||||||
|
: "cc", "memory"
|
||||||
|
);
|
||||||
|
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const TFloat *scales,
|
||||||
|
const int8_t *u, TFloat *v) {
|
||||||
|
int num_out = dim1;
|
||||||
|
int num_in = dim2 - 1;
|
||||||
|
for (int i = 0; i < num_out; ++i) {
|
||||||
|
const int8_t *wi_start = wi + i * dim2;
|
||||||
|
int total = DotProduct(wi_start, u, num_in);
|
||||||
|
// Add in the bias and apply scaling.
|
||||||
|
v[i] = (total + wi_start[num_in] * INT8_MAX) * scales[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const IntSimdMatrix IntSimdMatrix::intSimdMatrixRVV = {
|
||||||
|
// Function.
|
||||||
|
matrixDotVector,
|
||||||
|
// Number of 32 bit outputs held in each register.
|
||||||
|
1,
|
||||||
|
// Maximum number of registers that we will use to hold outputs.
|
||||||
|
1,
|
||||||
|
// Number of 8 bit inputs in the inputs register.
|
||||||
|
1,
|
||||||
|
// Number of inputs in each weight group.
|
||||||
|
1
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace tesseract.
|
||||||
|
|
||||||
|
#endif /* HAVE_RVV */
|
@ -65,6 +65,13 @@
|
|||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(HAVE_RVV)
|
||||||
|
# if defined(HAVE_GETAUXVAL)
|
||||||
|
# include <sys/auxv.h>
|
||||||
|
# define HWCAP_RV(letter) (1ul << ((letter) - 'A'))
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
|
|
||||||
// Computes and returns the dot product of the two n-vectors u and v.
|
// Computes and returns the dot product of the two n-vectors u and v.
|
||||||
@ -89,6 +96,8 @@ bool SIMDDetect::neon_available_ = true;
|
|||||||
#elif defined(HAVE_NEON)
|
#elif defined(HAVE_NEON)
|
||||||
// If true, then Neon has been detected.
|
// If true, then Neon has been detected.
|
||||||
bool SIMDDetect::neon_available_;
|
bool SIMDDetect::neon_available_;
|
||||||
|
#elif defined(HAVE_RVV)
|
||||||
|
bool SIMDDetect::rvv_available_;
|
||||||
#else
|
#else
|
||||||
// If true, then AVX has been detected.
|
// If true, then AVX has been detected.
|
||||||
bool SIMDDetect::avx_available_;
|
bool SIMDDetect::avx_available_;
|
||||||
@ -229,6 +238,13 @@ SIMDDetect::SIMDDetect() {
|
|||||||
elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
|
elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
|
||||||
neon_available_ = hwcap & HWCAP_NEON;
|
neon_available_ = hwcap & HWCAP_NEON;
|
||||||
# endif
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(HAVE_RVV)
|
||||||
|
# if defined(HAVE_GETAUXVAL)
|
||||||
|
const unsigned long hwcap = getauxval(AT_HWCAP);
|
||||||
|
rvv_available_ = hwcap & HWCAP_RV('V');
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Select code for calculation of dot product based on autodetection.
|
// Select code for calculation of dot product based on autodetection.
|
||||||
@ -258,6 +274,10 @@ SIMDDetect::SIMDDetect() {
|
|||||||
} else if (neon_available_) {
|
} else if (neon_available_) {
|
||||||
// NEON detected.
|
// NEON detected.
|
||||||
SetDotProduct(DotProductNEON, &IntSimdMatrix::intSimdMatrixNEON);
|
SetDotProduct(DotProductNEON, &IntSimdMatrix::intSimdMatrixNEON);
|
||||||
|
#endif
|
||||||
|
#if defined(HAVE_RVV)
|
||||||
|
} else if (rvv_available_) {
|
||||||
|
SetDotProduct(DotProductGeneric, &IntSimdMatrix::intSimdMatrixRVV);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,6 +63,10 @@ public:
|
|||||||
static inline bool IsNEONAvailable() {
|
static inline bool IsNEONAvailable() {
|
||||||
return detector.neon_available_;
|
return detector.neon_available_;
|
||||||
}
|
}
|
||||||
|
// Returns true if RVV is available on this system.
|
||||||
|
static inline bool IsRVVAvailable() {
|
||||||
|
return detector.rvv_available_;
|
||||||
|
}
|
||||||
|
|
||||||
// Update settings after config variable was set.
|
// Update settings after config variable was set.
|
||||||
static TESS_API void Update();
|
static TESS_API void Update();
|
||||||
@ -86,6 +90,8 @@ private:
|
|||||||
static TESS_API bool sse_available_;
|
static TESS_API bool sse_available_;
|
||||||
// If true, then NEON has been detected.
|
// If true, then NEON has been detected.
|
||||||
static TESS_API bool neon_available_;
|
static TESS_API bool neon_available_;
|
||||||
|
// If true, then RVV has been detected.
|
||||||
|
static TESS_API bool rvv_available_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace tesseract
|
} // namespace tesseract
|
||||||
|
@ -112,6 +112,9 @@ static void PrintVersionInfo() {
|
|||||||
#if defined(HAVE_NEON) || defined(__aarch64__)
|
#if defined(HAVE_NEON) || defined(__aarch64__)
|
||||||
if (tesseract::SIMDDetect::IsNEONAvailable())
|
if (tesseract::SIMDDetect::IsNEONAvailable())
|
||||||
printf(" Found NEON\n");
|
printf(" Found NEON\n");
|
||||||
|
#elif defined(HAVE_RVV)
|
||||||
|
if (tesseract::SIMDDetect::IsRVVAvailable())
|
||||||
|
printf(" Found RVV\n");
|
||||||
#else
|
#else
|
||||||
if (tesseract::SIMDDetect::IsAVX512BWAvailable()) {
|
if (tesseract::SIMDDetect::IsAVX512BWAvailable()) {
|
||||||
printf(" Found AVX512BW\n");
|
printf(" Found AVX512BW\n");
|
||||||
|
Loading…
Reference in New Issue
Block a user