mirror of
https://github.com/opencv/opencv.git
synced 2024-11-27 20:50:25 +08:00
Merge pull request #25387 from fengyuentau:complete-float16_t-renaming
Rename remaining float16_t for future proof #25387 Resolves comment: https://github.com/opencv/opencv/pull/25217#discussion_r1547733187. `std::float16_t` and `std::bfloat16_t` are introduced since c++23: https://en.cppreference.com/w/cpp/types/floating-point. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
2c5b296ab2
commit
197626a5bf
@ -900,7 +900,9 @@ inline hfloat hfloatFromBits(ushort w) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !defined(__OPENCV_BUILD) && !(defined __STDCPP_FLOAT16_T__) && !(defined __ARM_NEON)
|
||||
typedef hfloat float16_t;
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -494,10 +494,9 @@ void convBlockMR1_F32(int np, const float * a, const float * b, float *c, const
|
||||
void convBlock_F16(int np, const char * _a, const char * _b, char * _c, int ldc, bool init_c, int width,
|
||||
const int convMR_fp16, const int convNR_fp16)
|
||||
{
|
||||
typedef __fp16 float16_t;
|
||||
const float16_t* a = (const float16_t*)_a;
|
||||
const float16_t* b = (const float16_t*)_b;
|
||||
float16_t* c = (float16_t*)_c;
|
||||
const __fp16* a = (const __fp16*)_a;
|
||||
const __fp16* b = (const __fp16*)_b;
|
||||
__fp16* c = (__fp16*)_c;
|
||||
CV_Assert(convMR_fp16 == 8 && convNR_fp16 == 24);
|
||||
|
||||
float16x8_t c00 = vdupq_n_f16(0), c01 = c00, c02 = c00;
|
||||
@ -638,12 +637,11 @@ void convBlock_F16(int np, const char * _a, const char * _b, char * _c, int ldc,
|
||||
void convBlockMR1_F16(int np, const char* _a, const char* _b, float *c, const float _bias, bool init_c,
|
||||
const float minval, const float maxval, bool ifMinMaxAct, const int width, const int convNR_FP16)
|
||||
{
|
||||
typedef __fp16 float16_t;
|
||||
CV_Assert(convNR_FP16 == 24); // CONV_NR_FP16 = 24
|
||||
const float16_t* a = (const float16_t*)_a;
|
||||
const float16_t* b = (const float16_t*)_b;
|
||||
const __fp16* a = (const __fp16*)_a;
|
||||
const __fp16* b = (const __fp16*)_b;
|
||||
|
||||
const float16_t bias = (float16_t)_bias;
|
||||
const __fp16 bias = (__fp16)_bias;
|
||||
|
||||
float16x8_t c0 = vdupq_n_f16(bias), c1 = c0, c2 = c0;
|
||||
|
||||
|
@ -85,7 +85,7 @@ int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _outpu
|
||||
// works at FP 16.
|
||||
CONV_WINO_ATOM = CONV_WINO_ATOM_F16;
|
||||
CONV_WINO_NATOMS = CONV_WINO_NATOMS_F16;
|
||||
esz = sizeof(float16_t);
|
||||
esz = sizeof(__fp16);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -435,10 +435,9 @@ void winofunc_AtXA_8x8_F32(const float* inptr, int inpstep,
|
||||
void winofunc_accum_F16(const char* _inwptr, const char* _wptr, char* _outbuf, int Cg, int iblock,
|
||||
const int winoIblock, const int winoKblock, const int winoAtomF16, const int winoNatomF16)
|
||||
{
|
||||
typedef __fp16 float16_t;
|
||||
const float16_t* inwptr = (const float16_t*)_inwptr;
|
||||
const float16_t* wptr = (const float16_t*)_wptr;
|
||||
float16_t* outbuf = (float16_t*)_outbuf;
|
||||
const __fp16* inwptr = (const __fp16*)_inwptr;
|
||||
const __fp16* wptr = (const __fp16*)_wptr;
|
||||
__fp16* outbuf = (__fp16*)_outbuf;
|
||||
|
||||
CV_Assert(winoIblock == 6 && winoKblock == 4 && winoAtomF16 == 8);
|
||||
|
||||
@ -591,8 +590,7 @@ void winofunc_accum_F16(const char* _inwptr, const char* _wptr, char* _outbuf, i
|
||||
void winofunc_BtXB_8x8_F16(const float * inptr, int inpstep,
|
||||
char * _outptr, int Cg, const int winoIblock, const int winoAtomF16)
|
||||
{
|
||||
typedef __fp16 float16_t;
|
||||
float16_t* outptr = (float16_t*)_outptr;
|
||||
__fp16* outptr = (__fp16*)_outptr;
|
||||
float32x4_t x00 = vld1q_f32(inptr), x01 = vld1q_f32(inptr + 4);
|
||||
float32x4_t x10 = vld1q_f32(inptr + inpstep), x11 = vld1q_f32(inptr + inpstep + 4);
|
||||
float32x4_t x20 = vld1q_f32(inptr + inpstep*2), x21 = vld1q_f32(inptr + inpstep*2 + 4);
|
||||
@ -757,8 +755,7 @@ void winofunc_AtXA_8x8_F16(const char* _inptr, int inpstep,
|
||||
float * bpptr, int bpstep, float* outptr, int outstep,
|
||||
float bias, float minval, float maxval, bool ifMinMaxAct)
|
||||
{
|
||||
typedef __fp16 float16_t;
|
||||
const float16_t* inptr = (const float16_t*)_inptr;
|
||||
const __fp16* inptr = (const __fp16*)_inptr;
|
||||
|
||||
float32x4_t x00 = vcvt_f32_f16(vld1_f16(inptr)), x01 = vcvt_f32_f16(vld1_f16(inptr + 4));
|
||||
float32x4_t x10 = vcvt_f32_f16(vld1_f16(inptr + inpstep)), x11 = vcvt_f32_f16(vld1_f16(inptr + inpstep + 4));
|
||||
|
@ -26,7 +26,7 @@ void convBlockMR1_F32(int np, const float* a, const float* b, float *c, const fl
|
||||
|
||||
#ifdef CONV_ARM_FP16
|
||||
// Fast convert float 32 to float16
|
||||
static inline void _cvt32f16f(const float* src, float16_t* dst, int len)
|
||||
static inline void _cvt32f16f(const float* src, __fp16* dst, int len)
|
||||
{
|
||||
int j = 0;
|
||||
const int VECSZ = 4;
|
||||
@ -60,7 +60,7 @@ static inline void _cvt32f16f(const float* src, float16_t* dst, int len)
|
||||
vst1_f16(dst_FP16 + j, hv);
|
||||
}
|
||||
for( ; j < len; j++ )
|
||||
dst[j] = float16_t(src[j]);
|
||||
dst[j] = __fp16(src[j]);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -74,12 +74,12 @@ float* FastConv::getWeightsWino()
|
||||
return alignPtr(weightsWinoBuf.data(), VEC_ALIGN);
|
||||
}
|
||||
|
||||
float16_t* FastConv::getWeightsFP16()
|
||||
hfloat* FastConv::getWeightsFP16()
|
||||
{
|
||||
return alignPtr(weightsBuf_FP16.data(), VEC_ALIGN);
|
||||
}
|
||||
|
||||
float16_t* FastConv::getWeightsWinoFP16()
|
||||
hfloat* FastConv::getWeightsWinoFP16()
|
||||
{
|
||||
return alignPtr(weightsWinoBuf_FP16.data(), VEC_ALIGN);
|
||||
}
|
||||
@ -209,7 +209,7 @@ Ptr<FastConv> initFastConv(
|
||||
if (conv->useFP16)
|
||||
{
|
||||
conv->weightsBuf_FP16.resize(nweights + VEC_ALIGN);
|
||||
auto weightsPtr_FP16 = conv->getWeightsFP16();
|
||||
auto weightsPtr_FP16 = (__fp16*)conv->getWeightsFP16();
|
||||
|
||||
parallel_for_(Range(0, C), [&](const Range& r0){
|
||||
for(int c = r0.start; c < r0.end; c++)
|
||||
@ -269,11 +269,11 @@ Ptr<FastConv> initFastConv(
|
||||
|
||||
float* wptrWino = nullptr;
|
||||
#ifdef CONV_ARM_FP16
|
||||
float16_t* wptrWino_FP16 = nullptr;
|
||||
__fp16* wptrWino_FP16 = nullptr;
|
||||
if (conv->useFP16)
|
||||
{
|
||||
conv->weightsWinoBuf_FP16.resize(nweights + VEC_ALIGN);
|
||||
wptrWino_FP16 = conv->getWeightsWinoFP16();
|
||||
wptrWino_FP16 = (__fp16*)conv->getWeightsWinoFP16();
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@ -323,7 +323,7 @@ Ptr<FastConv> initFastConv(
|
||||
#ifdef CONV_ARM_FP16
|
||||
if (conv->useFP16)
|
||||
{
|
||||
float16_t* wptr = wptrWino_FP16 + (g*Kg_nblocks + ki) * Cg *CONV_WINO_KBLOCK*CONV_WINO_AREA +
|
||||
__fp16* wptr = wptrWino_FP16 + (g*Kg_nblocks + ki) * Cg *CONV_WINO_KBLOCK*CONV_WINO_AREA +
|
||||
(c*CONV_WINO_KBLOCK + dk)*CONV_WINO_ATOM_F16;
|
||||
for (int i = 0; i < CONV_WINO_NATOMS_F16; i++,
|
||||
wptr += Cg * CONV_WINO_KBLOCK * CONV_WINO_ATOM_F16)
|
||||
@ -331,7 +331,7 @@ Ptr<FastConv> initFastConv(
|
||||
CV_Assert(wptrWino_FP16 <= wptr && wptr + CONV_WINO_ATOM_F16 <= wptrWino_FP16 + nweights);
|
||||
for (int j = 0; j < CONV_WINO_ATOM_F16; j++)
|
||||
{
|
||||
wptr[j] = (float16_t)kernelTm[i * CONV_WINO_ATOM_F16 + j];
|
||||
wptr[j] = (__fp16)kernelTm[i * CONV_WINO_ATOM_F16 + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -367,12 +367,12 @@ Ptr<FastConv> initFastConv(
|
||||
int numStripsMR_FP16 = (Kg + CONV_MR_FP16 - 1) / CONV_MR_FP16;
|
||||
int Kg_aligned_FP16 = numStripsMR_FP16 * CONV_MR_FP16;
|
||||
size_t nweights_FP16 = ngroups * Kg_aligned_FP16 * DkHkWkCg;
|
||||
float16_t* weightsPtr_FP16 = nullptr;
|
||||
__fp16* weightsPtr_FP16 = nullptr;
|
||||
|
||||
if (conv->useFP16)
|
||||
{
|
||||
conv->weightsBuf_FP16.resize(nweights_FP16 + VEC_ALIGN);
|
||||
weightsPtr_FP16 = conv->getWeightsFP16();
|
||||
weightsPtr_FP16 = (__fp16*)conv->getWeightsFP16();
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@ -394,7 +394,7 @@ Ptr<FastConv> initFastConv(
|
||||
int startK = si * CONV_MR_FP16;
|
||||
CV_Assert(startK < Kg_aligned_FP16);
|
||||
|
||||
float16_t* packed_wptr = weightsPtr_FP16 + DkHkWkCg * (startK + g * Kg_aligned_FP16);
|
||||
__fp16* packed_wptr = weightsPtr_FP16 + DkHkWkCg * (startK + g * Kg_aligned_FP16);
|
||||
int dk = Kg - startK < CONV_MR_FP16 ? Kg - startK : CONV_MR_FP16; // check if we need zero padding.
|
||||
|
||||
int k_idx = g*Kg + startK;
|
||||
@ -405,9 +405,9 @@ Ptr<FastConv> initFastConv(
|
||||
const float* wptr = srcWeights + wstep * k_idx + c*Hk*Wk*Dk + hwd;
|
||||
int k = 0;
|
||||
for(; k < dk; k++, wptr += wstep)
|
||||
packed_wptr[k] = (float16_t)(*wptr);
|
||||
packed_wptr[k] = (__fp16)(*wptr);
|
||||
for(; k < CONV_MR_FP16; k++)
|
||||
packed_wptr[k] = (float16_t)0.f;
|
||||
packed_wptr[k] = (__fp16)0.f;
|
||||
}
|
||||
}
|
||||
}});
|
||||
@ -467,8 +467,8 @@ static inline void packData8(char*& inpbuf, float*& inptrIn, int& in_w, int& x0,
|
||||
float* inptrInC = (float* )inptrIn;
|
||||
|
||||
#ifdef CONV_ARM_FP16
|
||||
float16_t* inpbufC_FP16 = (float16_t *)inpbufC;
|
||||
if (esz == sizeof(float16_t))
|
||||
__fp16* inpbufC_FP16 = (__fp16 *)inpbufC;
|
||||
if (esz == sizeof(__fp16))
|
||||
{
|
||||
if (stride_w == 1)
|
||||
{
|
||||
@ -565,16 +565,16 @@ static inline void packData2(char *& inpbuf, float*& inptrIn, int& in_w, int& x0
|
||||
float* inptrInC = inptrIn;
|
||||
|
||||
#ifdef CONV_ARM_FP16
|
||||
float16_t* inpbufC_FP16 = (float16_t *)inpbufC;
|
||||
if (esz == sizeof(float16_t))
|
||||
__fp16* inpbufC_FP16 = (__fp16 *)inpbufC;
|
||||
if (esz == sizeof(__fp16))
|
||||
{
|
||||
for (int k = 0; k < ksize; k++)
|
||||
{
|
||||
int k1 = ofstab[k];
|
||||
float v0 = inptrInC[k1];
|
||||
float v1 = inptrInC[k1 + stride_w];
|
||||
inpbufC_FP16[k*CONV_NR_FP16] = (float16_t)v0;
|
||||
inpbufC_FP16[k*CONV_NR_FP16+1] = (float16_t)v1;
|
||||
inpbufC_FP16[k*CONV_NR_FP16] = (__fp16)v0;
|
||||
inpbufC_FP16[k*CONV_NR_FP16+1] = (__fp16)v1;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
@ -630,7 +630,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
|
||||
if (useFP16)
|
||||
{
|
||||
for (int c = 0; c < Cg; c++, inptr += inp_planesize, inpbuf += CONV_NR_esz)
|
||||
_cvt32f16f(inptr, (float16_t *)inpbuf, CONV_NR);
|
||||
_cvt32f16f(inptr, (__fp16 *)inpbuf, CONV_NR);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@ -644,7 +644,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
|
||||
{
|
||||
for (int c = 0; c < Cg; c++, inptr += inp_planesize, inpbuf += CONV_NR_esz)
|
||||
{
|
||||
_cvt32f16f(inptr, (float16_t *)inpbuf, slice_len);
|
||||
_cvt32f16f(inptr, (__fp16 *)inpbuf, slice_len);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -704,11 +704,11 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
|
||||
#ifdef CONV_ARM_FP16
|
||||
if (useFP16)
|
||||
{
|
||||
float16_t* inpbufC = (float16_t *)inpbuf + s0;
|
||||
__fp16* inpbufC = (__fp16 *)inpbuf + s0;
|
||||
for (int w = w0; w < w1; w++)
|
||||
{
|
||||
int imgofs = w*dilation_w;
|
||||
inpbufC[w*CONV_NR] = (float16_t)inptrInC[imgofs];
|
||||
inpbufC[w*CONV_NR] = (__fp16)inptrInC[imgofs];
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -765,14 +765,14 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
|
||||
#ifdef CONV_ARM_FP16
|
||||
if (useFP16)
|
||||
{
|
||||
float16_t* inpbufC = (float16_t *)inpbuf + s0;
|
||||
__fp16* inpbufC = (__fp16 *)inpbuf + s0;
|
||||
|
||||
for (int h = h0; h < h1; h++)
|
||||
{
|
||||
for (int w = w0; w < w1; w++)
|
||||
{
|
||||
int imgofs = h*(dilation_h*Wi) + w*dilation_w;
|
||||
inpbufC[(h*Wk + w)*CONV_NR] = (float16_t)inptrInC[imgofs];
|
||||
inpbufC[(h*Wk + w)*CONV_NR] = (__fp16)inptrInC[imgofs];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -838,7 +838,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
|
||||
#ifdef CONV_ARM_FP16
|
||||
if (useFP16)
|
||||
{
|
||||
float16_t* inpbufC = (float16_t* )inpbuf + s0;
|
||||
__fp16* inpbufC = (__fp16* )inpbuf + s0;
|
||||
|
||||
for ( int d = d0; d < d1; d++)
|
||||
{
|
||||
@ -847,7 +847,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
|
||||
for (int w = w0; w < w1; w++)
|
||||
{
|
||||
int imgofs = d*dilation_d*HWi + h*(dilation_h*Wi) + w*dilation_w;
|
||||
inpbufC[((d*Hk + h)*Wk + w)*CONV_NR] = (float16_t)inptrInC[imgofs];
|
||||
inpbufC[((d*Hk + h)*Wk + w)*CONV_NR] = (__fp16)inptrInC[imgofs];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -889,7 +889,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
|
||||
{
|
||||
float* inpbuf_ki = (float* )inpbuf + k * CONV_NR * Cg + i;
|
||||
#ifdef CONV_ARM_FP16
|
||||
float16_t * inpbuf_ki_FP16 = (float16_t *)inpbuf + k * CONV_NR * Cg + i;
|
||||
__fp16 * inpbuf_ki_FP16 = (__fp16 *)inpbuf + k * CONV_NR * Cg + i;
|
||||
#endif
|
||||
|
||||
int zi = z0 * stride_d + dz - pad_front;
|
||||
@ -1053,7 +1053,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
|
||||
if (useFP16)
|
||||
{
|
||||
for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize)
|
||||
inpbuf_ki_FP16[0] = (float16_t)(*inptr_ki);
|
||||
inpbuf_ki_FP16[0] = (__fp16)(*inptr_ki);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@ -1069,7 +1069,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
|
||||
if (useFP16)
|
||||
{
|
||||
for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR)
|
||||
inpbuf_ki_FP16[0] = (float16_t)0.f;
|
||||
inpbuf_ki_FP16[0] = (__fp16)0.f;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@ -1257,7 +1257,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
|
||||
// works at FP 16.
|
||||
CONV_NR = CONV_NR_FP16;
|
||||
CONV_MR = CONV_MR_FP16;
|
||||
esz = sizeof(float16_t);
|
||||
esz = sizeof(__fp16);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1511,7 +1511,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
|
||||
|
||||
char *wptr = weights + (k0_block * DkHkWkCg + c0 * CONV_MR) * esz;
|
||||
float *cptr = cbuf_task + stripe * CONV_NR;
|
||||
float16_t* cptr_f16 = (float16_t*)cbuf_task + stripe*CONV_NR;
|
||||
hfloat* cptr_f16 = (hfloat*)cbuf_task + stripe*CONV_NR;
|
||||
for (int k = k0_block; k < k1_block; k += CONV_MR,
|
||||
wptr += DkHkWkCg * CONV_MR * esz, cptr += CONV_MR * ldc, cptr_f16 += CONV_MR * ldc)
|
||||
{
|
||||
@ -1547,7 +1547,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
|
||||
|
||||
size_t outofs = ((n * ngroups + g) * Kg + k0_block) * out_planesize + zyx0;
|
||||
const float *cptr = cbuf_task;
|
||||
const float16_t *cptr_fp16 = (const float16_t *)cbuf_task;
|
||||
const hfloat *cptr_fp16 = (const hfloat *)cbuf_task;
|
||||
float *outptr = out + outofs;
|
||||
const float *pbptr = fusedAddPtr0 ? fusedAddPtr0 + outofs : 0;
|
||||
|
||||
|
@ -62,10 +62,10 @@ struct FastConv
|
||||
float* getWeights();
|
||||
float* getWeightsWino();
|
||||
|
||||
std::vector<float16_t> weightsBuf_FP16;
|
||||
std::vector<float16_t> weightsWinoBuf_FP16;
|
||||
float16_t* getWeightsFP16();
|
||||
float16_t* getWeightsWinoFP16();
|
||||
std::vector<hfloat> weightsBuf_FP16;
|
||||
std::vector<hfloat> weightsWinoBuf_FP16;
|
||||
hfloat* getWeightsFP16();
|
||||
hfloat* getWeightsWinoFP16();
|
||||
|
||||
int conv_type;
|
||||
int conv_dim; // Flag for conv1d, conv2d, or conv3d.
|
||||
|
@ -1742,12 +1742,12 @@ Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto)
|
||||
#endif
|
||||
const ::google::protobuf::RepeatedField<int32_t> field = tensor_proto.int32_data();
|
||||
|
||||
AutoBuffer<float16_t, 16> aligned_val;
|
||||
AutoBuffer<hfloat, 16> aligned_val;
|
||||
size_t sz = tensor_proto.int32_data().size();
|
||||
aligned_val.allocate(sz);
|
||||
float16_t* bufPtr = aligned_val.data();
|
||||
hfloat* bufPtr = aligned_val.data();
|
||||
|
||||
float16_t *fp16Ptr = (float16_t *)field.data();
|
||||
hfloat *fp16Ptr = (hfloat *)field.data();
|
||||
for (int i = 0; i < sz; i++)
|
||||
{
|
||||
bufPtr[i] = fp16Ptr[i*2 + offset];
|
||||
@ -1759,11 +1759,11 @@ Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto)
|
||||
char* val = const_cast<char*>(tensor_proto.raw_data().c_str());
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
// Aligned pointer is required.
|
||||
AutoBuffer<float16_t, 16> aligned_val;
|
||||
if (!isAligned<sizeof(float16_t)>(val))
|
||||
AutoBuffer<hfloat, 16> aligned_val;
|
||||
if (!isAligned<sizeof(hfloat)>(val))
|
||||
{
|
||||
size_t sz = tensor_proto.raw_data().size();
|
||||
aligned_val.allocate(divUp(sz, sizeof(float16_t)));
|
||||
aligned_val.allocate(divUp(sz, sizeof(hfloat)));
|
||||
memcpy(aligned_val.data(), val, sz);
|
||||
val = (char*)aligned_val.data();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user