mirror of
synced 2025-01-22 17:43:12 +08:00
Added clapack * bring a small subset of Lapack, automatically converted to C, into OpenCV * added missing lsame_ prototype * * small fix in make_clapack script * trying to fix remaining CI problems * fixed character arrays' initializers * get rid of F2C_STR_MAX * * added back single-precision versions for QR, LU and Cholesky decompositions. It adds very little extra overhead. * added stub version of sdesdd. * uncommented calls to all the single-precision Lapack functions from opencv/core/src/hal_internal.cpp. * fixed warning from Visual Studio + cleaned f2c runtime a bit * * regenerated Lapack w/o forward declarations of intrinsic functions (such as sqrt(), r_cnjg() etc.) * at once, trailing whitespaces are removed from the generated sources, just in case * since there is no declarations of intrinsic functions anymore, we could turn some of them into inline functions * trying to eliminate the crash on ARM * fixed API and semantics of s_copy * * CLapack has been tested successfully. It's now time to restore the standard LAPACK detection procedure * removed some more trailing whitespaces * * retained only the essential stuff in CLapack * added checks to lapack calls to gracefully return "not implemented" instead of returning invalid results with "ok" status * disabled warning when building lapack * cmake: update LAPACK detection Co-authored-by: Alexander Alekhin <alexander.a.alekhin@gmail.com>
273 lines
7.7 KiB
273 lines
7.7 KiB
appdoc = """
This is generator of CLapack subset.
The usage:
1. Make sure you have the special version of f2c installed.
Grab it from https://github.com/vpisarev/f2c/tree/for_lapack.
2. Download fresh version of Lapack from
You may choose some specific version or the latest snapshot.
3. If necessary, edit "roots" and "banlist" variables in this script, specify the needed and unneeded functions
4. From within a working directory run
$ python3 <opencv_root>/3rdparty/clapack/make_clapack.py <lapack_root>
$ F2C=<path_to_custom_f2c> python3 <opencv_root>/3rdparty/clapack/make_clapack.py <lapack_root>
it will generate "new_clapack" directory with "include" and "src" subdirectories.
5. erase opencv/3rdparty/clapack/src and replace it with new_clapack/src.
6. copy new_clapack/include/lapack.h to opencv/3rdparty/clapack/include.
7. optionally, edit opencv/3rdparty/clapack/CMakeLists.txt and update CLAPACK_VERSION as needed.
This is it. Now build it and enjoy.
import glob, re, os, shutil, subprocess, sys
roots = ["cgemm_", "dgemm_", "sgemm_", "zgemm_",
"dgeev_", "dgesdd_",
#"dgesv_", "dgetrf_", "dposv_", "dpotrf_", "dgels_", "dgeqrf_",
#"sgesv_", "sgetrf_", "sposv_", "spotrf_", "sgels_", "sgeqrf_"
banlist = ["slamch_", "slamc3_", "dlamch_", "dlamc3_", "lsame_", "xerbla_"]
if len(sys.argv) < 2:
lapack_root = sys.argv[1]
dst_path = "."
def error(msg):
print ("error: " + msg)
def file2fun(fname):
return (os.path.basename(fname)[:-2]).upper()
def print_graph(m):
for (k, neighbors) in sorted(m.items()):
print (k + " : " + ", ".join(sorted(list(neighbors))))
blas_path = os.path.join(lapack_root, "BLAS/SRC")
lapack_path = os.path.join(lapack_root, "SRC")
roots = [f[:-1].upper() for f in roots]
banlist = [f[:-1].upper() for f in banlist]
def fun2file(func):
filename = func.lower() + ".f"
blas_loc = blas_path + "/" + filename
lapack_loc = lapack_path + "/" + filename
if os.path.exists(blas_loc):
return blas_loc
elif os.path.exists(lapack_loc):
return lapack_loc
error("neither %s nor %s exist" % (blas_loc, lapack_loc))
all_files = glob.glob(blas_path + "/*.f") + glob.glob(lapack_path + "/*.f")
all_funcs = [file2fun(fname) for fname in all_files]
all_funcs_set = set(all_funcs).difference(set(banlist))
all_funcs = sorted(list(all_funcs_set))
func_deps = {}
#print all_funcs
words_regexp = re.compile(r'\w+')
def scan_deps(func):
global func_deps
if func in func_deps:
func_deps[func] = set([]) # to avoid possibly infinite recursion
f = open(fun2file(func), 'rt')
deps = []
external_mode = False
for l in f.readlines():
if l.startswith('*'):
l = l.strip().upper()
if l.startswith('EXTERNAL '):
external_mode = True
elif l.startswith('$') and external_mode:
external_mode = False
if not external_mode:
for w in words_regexp.findall(l):
if w in all_funcs_set:
# remove func from its dependencies
deps = set(deps).difference(set([func]))
func_deps[func] = deps
for d in deps:
for r in roots:
selected_funcs = sorted(func_deps.keys())
print ("total files before amalgamation: %d" % len(selected_funcs))
inv_deps = {}
for func in selected_funcs:
inv_deps[func] = set([])
for (func, deps) in func_deps.items():
for d in deps:
inv_deps[d] = inv_deps[d].union(set([func]))
func_home = {}
for func in selected_funcs:
func_home[func] = func
def get_home0(func, func0):
used_by = inv_deps[func]
if len(used_by) == 1:
p = list(used_by)[0]
if p != func and p != func0:
return get_home0(p, func0)
return func
return func
# try to merge some files
for func in selected_funcs:
func_home[func] = get_home0(func, func)
# try to merge some files even more
for iters in range(100):
homes_changed = False
for (func, used_by) in inv_deps.items():
p0 = func_home[func]
n = len(used_by)
if n == 1:
p = list(used_by)[0]
p1 = func_home[p]
if p1 != p0:
func_home[func] = p1
homes_changed = True
elif n > 1:
phomes = set([])
for p in used_by:
if len(phomes) == 1:
p1 = list(phomes)[0]
if p1 != p0:
func_home[func] = p1
homes_changed = True
if not homes_changed:
res_files = {}
for (func, h) in func_home.items():
elems = res_files.get(h, set([]))
res_files[h] = elems
print ("total files after amalgamation: %d" % len(res_files))
outdir = os.path.join(dst_path, "new_clapack")
outdir_src = os.path.join(outdir, "src")
outdir_inc = os.path.join(outdir, "include")
shutil.rmtree(outdir, ignore_errors=True)
except os.error:
except os.error:
f2c_appname = os.getenv("F2C", default="f2c")
print ("f2c used: %s" % f2c_appname)
f2c_getver_cmd = f2c_appname + " -v"
verstr = subprocess.check_output(f2c_getver_cmd.split(' ')).decode("utf-8")
if "for_lapack" not in verstr:
error("invalid version of f2c\n" + appdoc)
f2c_flags = "-ctypes -localconst -no-proto"
f2c_cmd0 = f2c_appname + " " + f2c_flags
f2c_cmd1 = f2c_appname + " -hdr none " + f2c_flags
lapack_protos = {}
extract_fn_regexp = re.compile(r'.+?(\w+)\s*\(')
def extract_proto(func, csrc):
global lapack_protos
cname = func.lower() + "_"
cfname = func.lower() + ".c"
regexp_str = r'\n(?:/\* Subroutine \*/\s*)?\w+\s+\w+\s*\((?:.|\n)+?\)[\s\n]*\{'
proto_regexp = re.compile(regexp_str)
ps = proto_regexp.findall(csrc)
for p in ps:
n = p.find("*/")
if n < 0:
n = 0
n += 2
p = p[n:-1].strip() + ";"
fns = extract_fn_regexp.findall(p)
if len(fns) != 1:
error("prototype of function (%s) when analyzing %s cannot be parsed" % (p, cfname))
fn = fns[0]
if fn not in lapack_protos:
p = re.sub(r'\bcomplex\b', 'lapack_complex', p)
p = re.sub(r'\bdoublecomplex\b', 'lapack_doublecomplex', p)
lapack_protos[fn] = p
for (filename, funcs) in sorted(res_files.items()):
out = ""
f2c_cmd = f2c_cmd0
for func in sorted(list(funcs)):
ffilename = fun2file(func)
print ("running " + f2c_cmd + " on " + ffilename + " ...")
ffile = open(ffilename, 'rt')
delta_out = subprocess.check_output(f2c_cmd.split(' '), stdin=ffile).decode("utf-8")
# remove trailing whitespaces
delta_out = '\n'.join([l.rstrip() for l in delta_out.split('\n')])
extract_proto(func, delta_out)
out += delta_out
f2c_cmd = f2c_cmd1
outname = os.path.join(outdir_src, filename.lower() + ".c")
outfile = open(outname, 'wt')
proto_hdr = """// this is auto-generated header for Lapack subset
#ifndef __CLAPACK_H__
#define __CLAPACK_H__
#include "cblas.h"
#ifdef __cplusplus
extern "C" {
#ifdef __cplusplus
""" % "\n\n".join([p for (n, p) in sorted(lapack_protos.items())])
proto_hdr_fname = os.path.join(outdir_inc, "lapack.h")
f = open(proto_hdr_fname, 'wt')