mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 03:00:14 +08:00
Merge pull request #24274 from vrabaud:webp_1.3.2
Merge pull request #24274 from vrabaud:webp_1.3.2
Bump libwebp to 1.3.2 #24274
This is version [c1ffd9a](c1ffd9ac75
)
It is 1.3.2 with a few patches that were made right after to help compilation.
No need for patches on the OpenCV side!
### Pull Request Readiness Checklist
See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request
- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
This commit is contained in:
parent
5e9191558d
commit
687fc11626
10
3rdparty/libwebp/CMakeLists.txt
vendored
10
3rdparty/libwebp/CMakeLists.txt
vendored
@ -9,8 +9,8 @@ if(ANDROID)
|
||||
ocv_include_directories(${CPUFEATURES_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
file(GLOB lib_srcs src/dec/*.c src/demux/*.c src/dsp/*.c src/enc/*.c src/mux/*.c src/utils/*.c src/webp/*.c)
|
||||
file(GLOB lib_hdrs src/dec/*.h src/demux/*.h src/dsp/*.h src/enc/*.h src/mux/*.h src/utils/*.h src/webp/*.h)
|
||||
file(GLOB lib_srcs sharpyuv/*.c src/dec/*.c src/demux/*.c src/dsp/*.c src/enc/*.c src/mux/*.c src/utils/*.c src/webp/*.c)
|
||||
file(GLOB lib_hdrs sharpyuv/*.h src/dec/*.h src/demux/*.h src/dsp/*.h src/enc/*.h src/mux/*.h src/utils/*.h src/webp/*.h)
|
||||
|
||||
# FIXIT
|
||||
if(ANDROID AND ARMEABI_V7A AND NOT NEON)
|
||||
@ -21,12 +21,6 @@ if(ANDROID AND ARMEABI_V7A AND NOT NEON)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# FIX for quant.h - requires C99 for() loops
|
||||
ocv_check_flag_support(C "-std=c99" _varname "${CMAKE_C_FLAGS}")
|
||||
if(${_varname})
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
|
||||
endif()
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------
|
||||
# Define the library target:
|
||||
|
@ -1,22 +0,0 @@
|
||||
diff --git a/3rdparty/libwebp/src/dsp/msa_macro.h b/3rdparty/libwebp/src/dsp/msa_macro.h
|
||||
index de026a1d9e..a16c0bb300 100644
|
||||
--- a/3rdparty/libwebp/src/dsp/msa_macro.h
|
||||
+++ b/3rdparty/libwebp/src/dsp/msa_macro.h
|
||||
@@ -73,7 +73,7 @@
|
||||
static inline TYPE FUNC_NAME(const void* const psrc) { \
|
||||
const uint8_t* const psrc_m = (const uint8_t*)psrc; \
|
||||
TYPE val_m; \
|
||||
- asm volatile ( \
|
||||
+ __asm__ volatile ( \
|
||||
"" #INSTR " %[val_m], %[psrc_m] \n\t" \
|
||||
: [val_m] "=r" (val_m) \
|
||||
: [psrc_m] "m" (*psrc_m)); \
|
||||
@@ -86,7 +86,7 @@
|
||||
static inline void FUNC_NAME(TYPE val, void* const pdst) { \
|
||||
uint8_t* const pdst_m = (uint8_t*)pdst; \
|
||||
TYPE val_m = val; \
|
||||
- asm volatile ( \
|
||||
+ __asm__ volatile ( \
|
||||
" " #INSTR " %[val_m], %[pdst_m] \n\t" \
|
||||
: [pdst_m] "=m" (*pdst_m) \
|
||||
: [val_m] "r" (val_m)); \
|
565
3rdparty/libwebp/sharpyuv/sharpyuv.c
vendored
Normal file
565
3rdparty/libwebp/sharpyuv/sharpyuv.c
vendored
Normal file
@ -0,0 +1,565 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Sharp RGB to YUV conversion.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/webp/types.h"
|
||||
#include "sharpyuv/sharpyuv_cpu.h"
|
||||
#include "sharpyuv/sharpyuv_dsp.h"
|
||||
#include "sharpyuv/sharpyuv_gamma.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
int SharpYuvGetVersion(void) {
|
||||
return SHARPYUV_VERSION;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Sharp RGB->YUV conversion
|
||||
|
||||
static const int kNumIterations = 4;
|
||||
|
||||
#define YUV_FIX 16 // fixed-point precision for RGB->YUV
|
||||
static const int kYuvHalf = 1 << (YUV_FIX - 1);
|
||||
|
||||
// Max bit depth so that intermediate calculations fit in 16 bits.
|
||||
static const int kMaxBitDepth = 14;
|
||||
|
||||
// Returns the precision shift to use based on the input rgb_bit_depth.
|
||||
static int GetPrecisionShift(int rgb_bit_depth) {
|
||||
// Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove
|
||||
// bits if needed.
|
||||
return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2
|
||||
: (kMaxBitDepth - rgb_bit_depth);
|
||||
}
|
||||
|
||||
typedef int16_t fixed_t; // signed type with extra precision for UV
|
||||
typedef uint16_t fixed_y_t; // unsigned type with extra precision for W
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static uint8_t clip_8b(fixed_t v) {
|
||||
return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
|
||||
}
|
||||
|
||||
static uint16_t clip(fixed_t v, int max) {
|
||||
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
|
||||
}
|
||||
|
||||
static fixed_y_t clip_bit_depth(int y, int bit_depth) {
|
||||
const int max = (1 << bit_depth) - 1;
|
||||
return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int RGBToGray(int64_t r, int64_t g, int64_t b) {
|
||||
const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf;
|
||||
return (int)(luma >> YUV_FIX);
|
||||
}
|
||||
|
||||
static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
|
||||
int rgb_bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type) {
|
||||
const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
|
||||
const uint32_t A = SharpYuvGammaToLinear(a, bit_depth, transfer_type);
|
||||
const uint32_t B = SharpYuvGammaToLinear(b, bit_depth, transfer_type);
|
||||
const uint32_t C = SharpYuvGammaToLinear(c, bit_depth, transfer_type);
|
||||
const uint32_t D = SharpYuvGammaToLinear(d, bit_depth, transfer_type);
|
||||
return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth,
|
||||
transfer_type);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
|
||||
int rgb_bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type) {
|
||||
const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
|
||||
int i;
|
||||
for (i = 0; i < w; ++i) {
|
||||
const uint32_t R =
|
||||
SharpYuvGammaToLinear(src[0 * w + i], bit_depth, transfer_type);
|
||||
const uint32_t G =
|
||||
SharpYuvGammaToLinear(src[1 * w + i], bit_depth, transfer_type);
|
||||
const uint32_t B =
|
||||
SharpYuvGammaToLinear(src[2 * w + i], bit_depth, transfer_type);
|
||||
const uint32_t Y = RGBToGray(R, G, B);
|
||||
dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth, transfer_type);
|
||||
}
|
||||
}
|
||||
|
||||
static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
|
||||
fixed_t* dst, int uv_w, int rgb_bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type) {
|
||||
int i;
|
||||
for (i = 0; i < uv_w; ++i) {
|
||||
const int r =
|
||||
ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0],
|
||||
src2[0 * uv_w + 1], rgb_bit_depth, transfer_type);
|
||||
const int g =
|
||||
ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0],
|
||||
src2[2 * uv_w + 1], rgb_bit_depth, transfer_type);
|
||||
const int b =
|
||||
ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0],
|
||||
src2[4 * uv_w + 1], rgb_bit_depth, transfer_type);
|
||||
const int W = RGBToGray(r, g, b);
|
||||
dst[0 * uv_w] = (fixed_t)(r - W);
|
||||
dst[1 * uv_w] = (fixed_t)(g - W);
|
||||
dst[2 * uv_w] = (fixed_t)(b - W);
|
||||
dst += 1;
|
||||
src1 += 2;
|
||||
src2 += 2;
|
||||
}
|
||||
}
|
||||
|
||||
static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
|
||||
int i;
|
||||
assert(w > 0);
|
||||
for (i = 0; i < w; ++i) {
|
||||
y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) {
|
||||
const int v0 = (A * 3 + B + 2) >> 2;
|
||||
return clip_bit_depth(v0 + W0, bit_depth);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE int Shift(int v, int shift) {
|
||||
return (shift >= 0) ? (v << shift) : (v >> -shift);
|
||||
}
|
||||
|
||||
static void ImportOneRow(const uint8_t* const r_ptr,
|
||||
const uint8_t* const g_ptr,
|
||||
const uint8_t* const b_ptr,
|
||||
int rgb_step,
|
||||
int rgb_bit_depth,
|
||||
int pic_width,
|
||||
fixed_y_t* const dst) {
|
||||
// Convert the rgb_step from a number of bytes to a number of uint8_t or
|
||||
// uint16_t values depending the bit depth.
|
||||
const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step;
|
||||
int i;
|
||||
const int w = (pic_width + 1) & ~1;
|
||||
for (i = 0; i < pic_width; ++i) {
|
||||
const int off = i * step;
|
||||
const int shift = GetPrecisionShift(rgb_bit_depth);
|
||||
if (rgb_bit_depth == 8) {
|
||||
dst[i + 0 * w] = Shift(r_ptr[off], shift);
|
||||
dst[i + 1 * w] = Shift(g_ptr[off], shift);
|
||||
dst[i + 2 * w] = Shift(b_ptr[off], shift);
|
||||
} else {
|
||||
dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift);
|
||||
dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift);
|
||||
dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift);
|
||||
}
|
||||
}
|
||||
if (pic_width & 1) { // replicate rightmost pixel
|
||||
dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
|
||||
dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
|
||||
dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
|
||||
}
|
||||
}
|
||||
|
||||
static void InterpolateTwoRows(const fixed_y_t* const best_y,
|
||||
const fixed_t* prev_uv,
|
||||
const fixed_t* cur_uv,
|
||||
const fixed_t* next_uv,
|
||||
int w,
|
||||
fixed_y_t* out1,
|
||||
fixed_y_t* out2,
|
||||
int rgb_bit_depth) {
|
||||
const int uv_w = w >> 1;
|
||||
const int len = (w - 1) >> 1; // length to filter
|
||||
int k = 3;
|
||||
const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
|
||||
while (k-- > 0) { // process each R/G/B segments in turn
|
||||
// special boundary case for i==0
|
||||
out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);
|
||||
out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);
|
||||
|
||||
SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1,
|
||||
bit_depth);
|
||||
SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1,
|
||||
bit_depth);
|
||||
|
||||
// special boundary case for i == w - 1 when w is even
|
||||
if (!(w & 1)) {
|
||||
out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
|
||||
best_y[w - 1 + 0], bit_depth);
|
||||
out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
|
||||
best_y[w - 1 + w], bit_depth);
|
||||
}
|
||||
out1 += w;
|
||||
out2 += w;
|
||||
prev_uv += uv_w;
|
||||
cur_uv += uv_w;
|
||||
next_uv += uv_w;
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
|
||||
const int coeffs[4], int sfix) {
|
||||
const int srounder = 1 << (YUV_FIX + sfix - 1);
|
||||
const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
|
||||
coeffs[3] + srounder;
|
||||
return (luma >> (YUV_FIX + sfix));
|
||||
}
|
||||
|
||||
static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
|
||||
uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
|
||||
int u_stride, uint8_t* v_ptr, int v_stride,
|
||||
int rgb_bit_depth,
|
||||
int yuv_bit_depth, int width, int height,
|
||||
const SharpYuvConversionMatrix* yuv_matrix) {
|
||||
int i, j;
|
||||
const fixed_t* const best_uv_base = best_uv;
|
||||
const int w = (width + 1) & ~1;
|
||||
const int h = (height + 1) & ~1;
|
||||
const int uv_w = w >> 1;
|
||||
const int uv_h = h >> 1;
|
||||
const int sfix = GetPrecisionShift(rgb_bit_depth);
|
||||
const int yuv_max = (1 << yuv_bit_depth) - 1;
|
||||
|
||||
for (best_uv = best_uv_base, j = 0; j < height; ++j) {
|
||||
for (i = 0; i < width; ++i) {
|
||||
const int off = (i >> 1);
|
||||
const int W = best_y[i];
|
||||
const int r = best_uv[off + 0 * uv_w] + W;
|
||||
const int g = best_uv[off + 1 * uv_w] + W;
|
||||
const int b = best_uv[off + 2 * uv_w] + W;
|
||||
const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix);
|
||||
if (yuv_bit_depth <= 8) {
|
||||
y_ptr[i] = clip_8b(y);
|
||||
} else {
|
||||
((uint16_t*)y_ptr)[i] = clip(y, yuv_max);
|
||||
}
|
||||
}
|
||||
best_y += w;
|
||||
best_uv += (j & 1) * 3 * uv_w;
|
||||
y_ptr += y_stride;
|
||||
}
|
||||
for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
|
||||
for (i = 0; i < uv_w; ++i) {
|
||||
const int off = i;
|
||||
// Note r, g and b values here are off by W, but a constant offset on all
|
||||
// 3 components doesn't change the value of u and v with a YCbCr matrix.
|
||||
const int r = best_uv[off + 0 * uv_w];
|
||||
const int g = best_uv[off + 1 * uv_w];
|
||||
const int b = best_uv[off + 2 * uv_w];
|
||||
const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix);
|
||||
const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix);
|
||||
if (yuv_bit_depth <= 8) {
|
||||
u_ptr[i] = clip_8b(u);
|
||||
v_ptr[i] = clip_8b(v);
|
||||
} else {
|
||||
((uint16_t*)u_ptr)[i] = clip(u, yuv_max);
|
||||
((uint16_t*)v_ptr)[i] = clip(v, yuv_max);
|
||||
}
|
||||
}
|
||||
best_uv += 3 * uv_w;
|
||||
u_ptr += u_stride;
|
||||
v_ptr += v_stride;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main function
|
||||
|
||||
static void* SafeMalloc(uint64_t nmemb, size_t size) {
|
||||
const uint64_t total_size = nmemb * (uint64_t)size;
|
||||
if (total_size != (size_t)total_size) return NULL;
|
||||
return malloc((size_t)total_size);
|
||||
}
|
||||
|
||||
#define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T)))
|
||||
|
||||
static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
|
||||
const uint8_t* b_ptr, int rgb_step, int rgb_stride,
|
||||
int rgb_bit_depth, uint8_t* y_ptr, int y_stride,
|
||||
uint8_t* u_ptr, int u_stride, uint8_t* v_ptr,
|
||||
int v_stride, int yuv_bit_depth, int width,
|
||||
int height,
|
||||
const SharpYuvConversionMatrix* yuv_matrix,
|
||||
SharpYuvTransferFunctionType transfer_type) {
|
||||
// we expand the right/bottom border if needed
|
||||
const int w = (width + 1) & ~1;
|
||||
const int h = (height + 1) & ~1;
|
||||
const int uv_w = w >> 1;
|
||||
const int uv_h = h >> 1;
|
||||
uint64_t prev_diff_y_sum = ~0;
|
||||
int j, iter;
|
||||
|
||||
// TODO(skal): allocate one big memory chunk. But for now, it's easier
|
||||
// for valgrind debugging to have several chunks.
|
||||
fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch
|
||||
fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
|
||||
fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
|
||||
fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
|
||||
fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
|
||||
fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
|
||||
fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
|
||||
fixed_y_t* best_y = best_y_base;
|
||||
fixed_y_t* target_y = target_y_base;
|
||||
fixed_t* best_uv = best_uv_base;
|
||||
fixed_t* target_uv = target_uv_base;
|
||||
const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
|
||||
int ok;
|
||||
assert(w > 0);
|
||||
assert(h > 0);
|
||||
|
||||
if (best_y_base == NULL || best_uv_base == NULL ||
|
||||
target_y_base == NULL || target_uv_base == NULL ||
|
||||
best_rgb_y == NULL || best_rgb_uv == NULL ||
|
||||
tmp_buffer == NULL) {
|
||||
ok = 0;
|
||||
goto End;
|
||||
}
|
||||
|
||||
// Import RGB samples to W/RGB representation.
|
||||
for (j = 0; j < height; j += 2) {
|
||||
const int is_last_row = (j == height - 1);
|
||||
fixed_y_t* const src1 = tmp_buffer + 0 * w;
|
||||
fixed_y_t* const src2 = tmp_buffer + 3 * w;
|
||||
|
||||
// prepare two rows of input
|
||||
ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
|
||||
src1);
|
||||
if (!is_last_row) {
|
||||
ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
|
||||
rgb_step, rgb_bit_depth, width, src2);
|
||||
} else {
|
||||
memcpy(src2, src1, 3 * w * sizeof(*src2));
|
||||
}
|
||||
StoreGray(src1, best_y + 0, w);
|
||||
StoreGray(src2, best_y + w, w);
|
||||
|
||||
UpdateW(src1, target_y, w, rgb_bit_depth, transfer_type);
|
||||
UpdateW(src2, target_y + w, w, rgb_bit_depth, transfer_type);
|
||||
UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth, transfer_type);
|
||||
memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
|
||||
best_y += 2 * w;
|
||||
best_uv += 3 * uv_w;
|
||||
target_y += 2 * w;
|
||||
target_uv += 3 * uv_w;
|
||||
r_ptr += 2 * rgb_stride;
|
||||
g_ptr += 2 * rgb_stride;
|
||||
b_ptr += 2 * rgb_stride;
|
||||
}
|
||||
|
||||
// Iterate and resolve clipping conflicts.
|
||||
for (iter = 0; iter < kNumIterations; ++iter) {
|
||||
const fixed_t* cur_uv = best_uv_base;
|
||||
const fixed_t* prev_uv = best_uv_base;
|
||||
uint64_t diff_y_sum = 0;
|
||||
|
||||
best_y = best_y_base;
|
||||
best_uv = best_uv_base;
|
||||
target_y = target_y_base;
|
||||
target_uv = target_uv_base;
|
||||
for (j = 0; j < h; j += 2) {
|
||||
fixed_y_t* const src1 = tmp_buffer + 0 * w;
|
||||
fixed_y_t* const src2 = tmp_buffer + 3 * w;
|
||||
{
|
||||
const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
|
||||
InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
|
||||
src1, src2, rgb_bit_depth);
|
||||
prev_uv = cur_uv;
|
||||
cur_uv = next_uv;
|
||||
}
|
||||
|
||||
UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth, transfer_type);
|
||||
UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth, transfer_type);
|
||||
UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth, transfer_type);
|
||||
|
||||
// update two rows of Y and one row of RGB
|
||||
diff_y_sum +=
|
||||
SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w,
|
||||
rgb_bit_depth + GetPrecisionShift(rgb_bit_depth));
|
||||
SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
|
||||
|
||||
best_y += 2 * w;
|
||||
best_uv += 3 * uv_w;
|
||||
target_y += 2 * w;
|
||||
target_uv += 3 * uv_w;
|
||||
}
|
||||
// test exit condition
|
||||
if (iter > 0) {
|
||||
if (diff_y_sum < diff_y_threshold) break;
|
||||
if (diff_y_sum > prev_diff_y_sum) break;
|
||||
}
|
||||
prev_diff_y_sum = diff_y_sum;
|
||||
}
|
||||
|
||||
// final reconstruction
|
||||
ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr,
|
||||
u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
|
||||
width, height, yuv_matrix);
|
||||
|
||||
End:
|
||||
free(best_y_base);
|
||||
free(best_uv_base);
|
||||
free(target_y_base);
|
||||
free(target_uv_base);
|
||||
free(best_rgb_y);
|
||||
free(best_rgb_uv);
|
||||
free(tmp_buffer);
|
||||
return ok;
|
||||
}
|
||||
#undef SAFE_ALLOC
|
||||
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
#include <pthread.h> // NOLINT
|
||||
|
||||
#define LOCK_ACCESS \
|
||||
static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \
|
||||
if (pthread_mutex_lock(&sharpyuv_lock)) return
|
||||
#define UNLOCK_ACCESS_AND_RETURN \
|
||||
do { \
|
||||
(void)pthread_mutex_unlock(&sharpyuv_lock); \
|
||||
return; \
|
||||
} while (0)
|
||||
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
|
||||
#define LOCK_ACCESS do {} while (0)
|
||||
#define UNLOCK_ACCESS_AND_RETURN return
|
||||
#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
|
||||
// Hidden exported init function.
|
||||
// By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed,
|
||||
// users can declare it as extern and call it with an alternate VP8CPUInfo
|
||||
// function.
|
||||
extern VP8CPUInfo SharpYuvGetCPUInfo;
|
||||
SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func);
|
||||
void SharpYuvInit(VP8CPUInfo cpu_info_func) {
|
||||
static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&sharpyuv_last_cpuinfo_used;
|
||||
LOCK_ACCESS;
|
||||
// Only update SharpYuvGetCPUInfo when called from external code to avoid a
|
||||
// race on reading the value in SharpYuvConvert().
|
||||
if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) {
|
||||
SharpYuvGetCPUInfo = cpu_info_func;
|
||||
}
|
||||
if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) {
|
||||
UNLOCK_ACCESS_AND_RETURN;
|
||||
}
|
||||
|
||||
SharpYuvInitDsp();
|
||||
SharpYuvInitGammaTables();
|
||||
|
||||
sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo;
|
||||
UNLOCK_ACCESS_AND_RETURN;
|
||||
}
|
||||
|
||||
int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr,
|
||||
int rgb_step, int rgb_stride, int rgb_bit_depth,
|
||||
void* y_ptr, int y_stride, void* u_ptr, int u_stride,
|
||||
void* v_ptr, int v_stride, int yuv_bit_depth, int width,
|
||||
int height, const SharpYuvConversionMatrix* yuv_matrix) {
|
||||
SharpYuvOptions options;
|
||||
options.yuv_matrix = yuv_matrix;
|
||||
options.transfer_type = kSharpYuvTransferFunctionSrgb;
|
||||
return SharpYuvConvertWithOptions(
|
||||
r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, rgb_bit_depth, y_ptr, y_stride,
|
||||
u_ptr, u_stride, v_ptr, v_stride, yuv_bit_depth, width, height, &options);
|
||||
}
|
||||
|
||||
int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix* yuv_matrix,
|
||||
SharpYuvOptions* options, int version) {
|
||||
const int major = (version >> 24);
|
||||
const int minor = (version >> 16) & 0xff;
|
||||
if (options == NULL || yuv_matrix == NULL ||
|
||||
(major == SHARPYUV_VERSION_MAJOR && major == 0 &&
|
||||
minor != SHARPYUV_VERSION_MINOR) ||
|
||||
(major != SHARPYUV_VERSION_MAJOR)) {
|
||||
return 0;
|
||||
}
|
||||
options->yuv_matrix = yuv_matrix;
|
||||
options->transfer_type = kSharpYuvTransferFunctionSrgb;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr,
|
||||
const void* b_ptr, int rgb_step, int rgb_stride,
|
||||
int rgb_bit_depth, void* y_ptr, int y_stride,
|
||||
void* u_ptr, int u_stride, void* v_ptr,
|
||||
int v_stride, int yuv_bit_depth, int width,
|
||||
int height, const SharpYuvOptions* options) {
|
||||
const SharpYuvConversionMatrix* yuv_matrix = options->yuv_matrix;
|
||||
SharpYuvTransferFunctionType transfer_type = options->transfer_type;
|
||||
SharpYuvConversionMatrix scaled_matrix;
|
||||
const int rgb_max = (1 << rgb_bit_depth) - 1;
|
||||
const int rgb_round = 1 << (rgb_bit_depth - 1);
|
||||
const int yuv_max = (1 << yuv_bit_depth) - 1;
|
||||
const int sfix = GetPrecisionShift(rgb_bit_depth);
|
||||
|
||||
if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX ||
|
||||
r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL ||
|
||||
u_ptr == NULL || v_ptr == NULL) {
|
||||
return 0;
|
||||
}
|
||||
if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 &&
|
||||
rgb_bit_depth != 16) {
|
||||
return 0;
|
||||
}
|
||||
if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) {
|
||||
return 0;
|
||||
}
|
||||
if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) {
|
||||
// Step/stride should be even for uint16_t buffers.
|
||||
return 0;
|
||||
}
|
||||
if (yuv_bit_depth > 8 &&
|
||||
(y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) {
|
||||
// Stride should be even for uint16_t buffers.
|
||||
return 0;
|
||||
}
|
||||
// The address of the function pointer is used to avoid a read race.
|
||||
SharpYuvInit((VP8CPUInfo)&SharpYuvGetCPUInfo);
|
||||
|
||||
// Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the
|
||||
// rgb->yuv conversion matrix.
|
||||
if (rgb_bit_depth == yuv_bit_depth) {
|
||||
memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix));
|
||||
} else {
|
||||
int i;
|
||||
for (i = 0; i < 3; ++i) {
|
||||
scaled_matrix.rgb_to_y[i] =
|
||||
(yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max;
|
||||
scaled_matrix.rgb_to_u[i] =
|
||||
(yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max;
|
||||
scaled_matrix.rgb_to_v[i] =
|
||||
(yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max;
|
||||
}
|
||||
}
|
||||
// Also incorporate precision change scaling.
|
||||
scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix);
|
||||
scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
|
||||
scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);
|
||||
|
||||
return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
|
||||
rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
|
||||
v_ptr, v_stride, yuv_bit_depth, width, height,
|
||||
&scaled_matrix, transfer_type);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
174
3rdparty/libwebp/sharpyuv/sharpyuv.h
vendored
Normal file
174
3rdparty/libwebp/sharpyuv/sharpyuv.h
vendored
Normal file
@ -0,0 +1,174 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Sharp RGB to YUV conversion.
|
||||
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef SHARPYUV_EXTERN
|
||||
#ifdef WEBP_EXTERN
|
||||
#define SHARPYUV_EXTERN WEBP_EXTERN
|
||||
#else
|
||||
// This explicitly marks library functions and allows for changing the
|
||||
// signature for e.g., Windows DLL builds.
|
||||
#if defined(__GNUC__) && __GNUC__ >= 4
|
||||
#define SHARPYUV_EXTERN extern __attribute__((visibility("default")))
|
||||
#else
|
||||
#if defined(_MSC_VER) && defined(WEBP_DLL)
|
||||
#define SHARPYUV_EXTERN __declspec(dllexport)
|
||||
#else
|
||||
#define SHARPYUV_EXTERN extern
|
||||
#endif /* _MSC_VER && WEBP_DLL */
|
||||
#endif /* __GNUC__ >= 4 */
|
||||
#endif /* WEBP_EXTERN */
|
||||
#endif /* SHARPYUV_EXTERN */
|
||||
|
||||
#ifndef SHARPYUV_INLINE
|
||||
#ifdef WEBP_INLINE
|
||||
#define SHARPYUV_INLINE WEBP_INLINE
|
||||
#else
|
||||
#ifndef _MSC_VER
|
||||
#if defined(__cplusplus) || !defined(__STRICT_ANSI__) || \
|
||||
(defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
|
||||
#define SHARPYUV_INLINE inline
|
||||
#else
|
||||
#define SHARPYUV_INLINE
|
||||
#endif
|
||||
#else
|
||||
#define SHARPYUV_INLINE __forceinline
|
||||
#endif /* _MSC_VER */
|
||||
#endif /* WEBP_INLINE */
|
||||
#endif /* SHARPYUV_INLINE */
|
||||
|
||||
// SharpYUV API version following the convention from semver.org
|
||||
#define SHARPYUV_VERSION_MAJOR 0
|
||||
#define SHARPYUV_VERSION_MINOR 4
|
||||
#define SHARPYUV_VERSION_PATCH 0
|
||||
// Version as a uint32_t. The major number is the high 8 bits.
|
||||
// The minor number is the middle 8 bits. The patch number is the low 16 bits.
|
||||
#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \
|
||||
(((MAJOR) << 24) | ((MINOR) << 16) | (PATCH))
|
||||
#define SHARPYUV_VERSION \
|
||||
SHARPYUV_MAKE_VERSION(SHARPYUV_VERSION_MAJOR, SHARPYUV_VERSION_MINOR, \
|
||||
SHARPYUV_VERSION_PATCH)
|
||||
|
||||
// Returns the library's version number, packed in hexadecimal. See
|
||||
// SHARPYUV_VERSION.
|
||||
SHARPYUV_EXTERN int SharpYuvGetVersion(void);
|
||||
|
||||
// RGB to YUV conversion matrix, in 16 bit fixed point.
|
||||
// y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
|
||||
// u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
|
||||
// v = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3]
|
||||
// Then y, u and v values are divided by 1<<16 and rounded.
|
||||
typedef struct {
|
||||
int rgb_to_y[4];
|
||||
int rgb_to_u[4];
|
||||
int rgb_to_v[4];
|
||||
} SharpYuvConversionMatrix;
|
||||
|
||||
typedef struct SharpYuvOptions SharpYuvOptions;
|
||||
|
||||
// Enums for transfer functions, as defined in H.273,
|
||||
// https://www.itu.int/rec/T-REC-H.273-202107-I/en
|
||||
typedef enum SharpYuvTransferFunctionType {
|
||||
// 0 is reserved
|
||||
kSharpYuvTransferFunctionBt709 = 1,
|
||||
// 2 is unspecified
|
||||
// 3 is reserved
|
||||
kSharpYuvTransferFunctionBt470M = 4,
|
||||
kSharpYuvTransferFunctionBt470Bg = 5,
|
||||
kSharpYuvTransferFunctionBt601 = 6,
|
||||
kSharpYuvTransferFunctionSmpte240 = 7,
|
||||
kSharpYuvTransferFunctionLinear = 8,
|
||||
kSharpYuvTransferFunctionLog100 = 9,
|
||||
kSharpYuvTransferFunctionLog100_Sqrt10 = 10,
|
||||
kSharpYuvTransferFunctionIec61966 = 11,
|
||||
kSharpYuvTransferFunctionBt1361 = 12,
|
||||
kSharpYuvTransferFunctionSrgb = 13,
|
||||
kSharpYuvTransferFunctionBt2020_10Bit = 14,
|
||||
kSharpYuvTransferFunctionBt2020_12Bit = 15,
|
||||
kSharpYuvTransferFunctionSmpte2084 = 16, // PQ
|
||||
kSharpYuvTransferFunctionSmpte428 = 17,
|
||||
kSharpYuvTransferFunctionHlg = 18,
|
||||
kSharpYuvTransferFunctionNum
|
||||
} SharpYuvTransferFunctionType;
|
||||
|
||||
// Converts RGB to YUV420 using a downsampling algorithm that minimizes
|
||||
// artefacts caused by chroma subsampling.
|
||||
// This is slower than standard downsampling (averaging of 4 UV values).
|
||||
// Assumes that the image will be upsampled using a bilinear filter. If nearest
|
||||
// neighbor is used instead, the upsampled image might look worse than with
|
||||
// standard downsampling.
|
||||
// r_ptr, g_ptr, b_ptr: pointers to the source r, g and b channels. Should point
|
||||
// to uint8_t buffers if rgb_bit_depth is 8, or uint16_t buffers otherwise.
|
||||
// rgb_step: distance in bytes between two horizontally adjacent pixels on the
|
||||
// r, g and b channels. If rgb_bit_depth is > 8, it should be a
|
||||
// multiple of 2.
|
||||
// rgb_stride: distance in bytes between two vertically adjacent pixels on the
|
||||
// r, g, and b channels. If rgb_bit_depth is > 8, it should be a
|
||||
// multiple of 2.
|
||||
// rgb_bit_depth: number of bits for each r/g/b value. One of: 8, 10, 12, 16.
|
||||
// Note: 16 bit input is truncated to 14 bits before conversion to yuv.
|
||||
// yuv_bit_depth: number of bits for each y/u/v value. One of: 8, 10, 12.
|
||||
// y_ptr, u_ptr, v_ptr: pointers to the destination y, u and v channels. Should
|
||||
// point to uint8_t buffers if yuv_bit_depth is 8, or uint16_t buffers
|
||||
// otherwise.
|
||||
// y_stride, u_stride, v_stride: distance in bytes between two vertically
|
||||
// adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they
|
||||
// should be multiples of 2.
|
||||
// width, height: width and height of the image in pixels
|
||||
// This function calls SharpYuvConvertWithOptions with a default transfer
|
||||
// function of kSharpYuvTransferFunctionSrgb.
|
||||
SHARPYUV_EXTERN int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
|
||||
const void* b_ptr, int rgb_step,
|
||||
int rgb_stride, int rgb_bit_depth,
|
||||
void* y_ptr, int y_stride, void* u_ptr,
|
||||
int u_stride, void* v_ptr, int v_stride,
|
||||
int yuv_bit_depth, int width, int height,
|
||||
const SharpYuvConversionMatrix* yuv_matrix);
|
||||
|
||||
struct SharpYuvOptions {
|
||||
// This matrix cannot be NULL and can be initialized by
|
||||
// SharpYuvComputeConversionMatrix.
|
||||
const SharpYuvConversionMatrix* yuv_matrix;
|
||||
SharpYuvTransferFunctionType transfer_type;
|
||||
};
|
||||
|
||||
// Internal, version-checked, entry point
|
||||
SHARPYUV_EXTERN int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix*,
|
||||
SharpYuvOptions*, int);
|
||||
|
||||
// Should always be called, to initialize a fresh SharpYuvOptions
|
||||
// structure before modification. SharpYuvOptionsInit() must have succeeded
|
||||
// before using the 'options' object.
|
||||
static SHARPYUV_INLINE int SharpYuvOptionsInit(
|
||||
const SharpYuvConversionMatrix* yuv_matrix, SharpYuvOptions* options) {
|
||||
return SharpYuvOptionsInitInternal(yuv_matrix, options, SHARPYUV_VERSION);
|
||||
}
|
||||
|
||||
SHARPYUV_EXTERN int SharpYuvConvertWithOptions(
|
||||
const void* r_ptr, const void* g_ptr, const void* b_ptr, int rgb_step,
|
||||
int rgb_stride, int rgb_bit_depth, void* y_ptr, int y_stride, void* u_ptr,
|
||||
int u_stride, void* v_ptr, int v_stride, int yuv_bit_depth, int width,
|
||||
int height, const SharpYuvOptions* options);
|
||||
|
||||
// TODO(b/194336375): Add YUV444 to YUV420 conversion. Maybe also add 422
|
||||
// support (it's rarely used in practice, especially for images).
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_H_
|
14
3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c
vendored
Normal file
14
3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
#include "sharpyuv/sharpyuv_cpu.h"
|
||||
|
||||
// Include src/dsp/cpu.c to create SharpYuvGetCPUInfo from VP8GetCPUInfo. The
|
||||
// function pointer is renamed in sharpyuv_cpu.h.
|
||||
#include "src/dsp/cpu.c"
|
22
3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h
vendored
Normal file
22
3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_CPU_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_CPU_H_
|
||||
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
|
||||
// Avoid exporting SharpYuvGetCPUInfo in shared object / DLL builds.
|
||||
// SharpYuvInit() replaces the use of the function pointer.
|
||||
#undef WEBP_EXTERN
|
||||
#define WEBP_EXTERN extern
|
||||
#define VP8GetCPUInfo SharpYuvGetCPUInfo
|
||||
#include "src/dsp/cpu.h"
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_CPU_H_
|
110
3rdparty/libwebp/sharpyuv/sharpyuv_csp.c
vendored
Normal file
110
3rdparty/libwebp/sharpyuv/sharpyuv_csp.c
vendored
Normal file
@ -0,0 +1,110 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Colorspace utilities.
|
||||
|
||||
#include "sharpyuv/sharpyuv_csp.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
|
||||
static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); }
|
||||
|
||||
void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
|
||||
SharpYuvConversionMatrix* matrix) {
|
||||
const float kr = yuv_color_space->kr;
|
||||
const float kb = yuv_color_space->kb;
|
||||
const float kg = 1.0f - kr - kb;
|
||||
const float cr = 0.5f / (1.0f - kb);
|
||||
const float cb = 0.5f / (1.0f - kr);
|
||||
|
||||
const int shift = yuv_color_space->bit_depth - 8;
|
||||
|
||||
const float denom = (float)((1 << yuv_color_space->bit_depth) - 1);
|
||||
float scale_y = 1.0f;
|
||||
float add_y = 0.0f;
|
||||
float scale_u = cr;
|
||||
float scale_v = cb;
|
||||
float add_uv = (float)(128 << shift);
|
||||
assert(yuv_color_space->bit_depth >= 8);
|
||||
|
||||
if (yuv_color_space->range == kSharpYuvRangeLimited) {
|
||||
scale_y *= (219 << shift) / denom;
|
||||
scale_u *= (224 << shift) / denom;
|
||||
scale_v *= (224 << shift) / denom;
|
||||
add_y = (float)(16 << shift);
|
||||
}
|
||||
|
||||
matrix->rgb_to_y[0] = ToFixed16(kr * scale_y);
|
||||
matrix->rgb_to_y[1] = ToFixed16(kg * scale_y);
|
||||
matrix->rgb_to_y[2] = ToFixed16(kb * scale_y);
|
||||
matrix->rgb_to_y[3] = ToFixed16(add_y);
|
||||
|
||||
matrix->rgb_to_u[0] = ToFixed16(-kr * scale_u);
|
||||
matrix->rgb_to_u[1] = ToFixed16(-kg * scale_u);
|
||||
matrix->rgb_to_u[2] = ToFixed16((1 - kb) * scale_u);
|
||||
matrix->rgb_to_u[3] = ToFixed16(add_uv);
|
||||
|
||||
matrix->rgb_to_v[0] = ToFixed16((1 - kr) * scale_v);
|
||||
matrix->rgb_to_v[1] = ToFixed16(-kg * scale_v);
|
||||
matrix->rgb_to_v[2] = ToFixed16(-kb * scale_v);
|
||||
matrix->rgb_to_v[3] = ToFixed16(add_uv);
|
||||
}
|
||||
|
||||
// Matrices are in YUV_FIX fixed point precision.
|
||||
// WebP's matrix, similar but not identical to kRec601LimitedMatrix.
|
||||
static const SharpYuvConversionMatrix kWebpMatrix = {
|
||||
{16839, 33059, 6420, 16 << 16},
|
||||
{-9719, -19081, 28800, 128 << 16},
|
||||
{28800, -24116, -4684, 128 << 16},
|
||||
};
|
||||
// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeLimited
|
||||
static const SharpYuvConversionMatrix kRec601LimitedMatrix = {
|
||||
{16829, 33039, 6416, 16 << 16},
|
||||
{-9714, -19071, 28784, 128 << 16},
|
||||
{28784, -24103, -4681, 128 << 16},
|
||||
};
|
||||
// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeFull
|
||||
static const SharpYuvConversionMatrix kRec601FullMatrix = {
|
||||
{19595, 38470, 7471, 0},
|
||||
{-11058, -21710, 32768, 128 << 16},
|
||||
{32768, -27439, -5329, 128 << 16},
|
||||
};
|
||||
// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeLimited
|
||||
static const SharpYuvConversionMatrix kRec709LimitedMatrix = {
|
||||
{11966, 40254, 4064, 16 << 16},
|
||||
{-6596, -22189, 28784, 128 << 16},
|
||||
{28784, -26145, -2639, 128 << 16},
|
||||
};
|
||||
// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeFull
|
||||
static const SharpYuvConversionMatrix kRec709FullMatrix = {
|
||||
{13933, 46871, 4732, 0},
|
||||
{-7509, -25259, 32768, 128 << 16},
|
||||
{32768, -29763, -3005, 128 << 16},
|
||||
};
|
||||
|
||||
const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
|
||||
SharpYuvMatrixType matrix_type) {
|
||||
switch (matrix_type) {
|
||||
case kSharpYuvMatrixWebp:
|
||||
return &kWebpMatrix;
|
||||
case kSharpYuvMatrixRec601Limited:
|
||||
return &kRec601LimitedMatrix;
|
||||
case kSharpYuvMatrixRec601Full:
|
||||
return &kRec601FullMatrix;
|
||||
case kSharpYuvMatrixRec709Limited:
|
||||
return &kRec709LimitedMatrix;
|
||||
case kSharpYuvMatrixRec709Full:
|
||||
return &kRec709FullMatrix;
|
||||
case kSharpYuvMatrixNum:
|
||||
return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
60
3rdparty/libwebp/sharpyuv/sharpyuv_csp.h
vendored
Normal file
60
3rdparty/libwebp/sharpyuv/sharpyuv_csp.h
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Colorspace utilities.
|
||||
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_CSP_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_CSP_H_
|
||||
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Range of YUV values.
|
||||
typedef enum {
|
||||
kSharpYuvRangeFull, // YUV values between [0;255] (for 8 bit)
|
||||
kSharpYuvRangeLimited // Y in [16;235], YUV in [16;240] (for 8 bit)
|
||||
} SharpYuvRange;
|
||||
|
||||
// Constants that define a YUV color space.
|
||||
typedef struct {
|
||||
// Kr and Kb are defined such that:
|
||||
// Y = Kr * r + Kg * g + Kb * b where Kg = 1 - Kr - Kb.
|
||||
float kr;
|
||||
float kb;
|
||||
int bit_depth; // 8, 10 or 12
|
||||
SharpYuvRange range;
|
||||
} SharpYuvColorSpace;
|
||||
|
||||
// Fills in 'matrix' for the given YUVColorSpace.
|
||||
SHARPYUV_EXTERN void SharpYuvComputeConversionMatrix(
|
||||
const SharpYuvColorSpace* yuv_color_space,
|
||||
SharpYuvConversionMatrix* matrix);
|
||||
|
||||
// Enums for precomputed conversion matrices.
|
||||
typedef enum {
|
||||
kSharpYuvMatrixWebp = 0,
|
||||
kSharpYuvMatrixRec601Limited,
|
||||
kSharpYuvMatrixRec601Full,
|
||||
kSharpYuvMatrixRec709Limited,
|
||||
kSharpYuvMatrixRec709Full,
|
||||
kSharpYuvMatrixNum
|
||||
} SharpYuvMatrixType;
|
||||
|
||||
// Returns a pointer to a matrix for one of the predefined colorspaces.
|
||||
SHARPYUV_EXTERN const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
|
||||
SharpYuvMatrixType matrix_type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_CSP_H_
|
104
3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c
vendored
Normal file
104
3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c
vendored
Normal file
@ -0,0 +1,104 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Speed-critical functions for Sharp YUV.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "sharpyuv/sharpyuv_dsp.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "sharpyuv/sharpyuv_cpu.h"
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static uint16_t clip(int v, int max) {
|
||||
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYuvUpdateY_C(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len, int bit_depth) {
|
||||
uint64_t diff = 0;
|
||||
int i;
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
for (i = 0; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)dst[i] + diff_y;
|
||||
dst[i] = clip(new_y, max_y);
|
||||
diff += (uint64_t)abs(diff_y);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYuvUpdateRGB_C(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow_C(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth) {
|
||||
int i;
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
for (i = 0; i < len; ++i, ++A, ++B) {
|
||||
const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
|
||||
const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
|
||||
out[2 * i + 0] = clip(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
|
||||
uint16_t* dst, int len, int bit_depth);
|
||||
void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, int16_t* dst,
|
||||
int len);
|
||||
void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth);
|
||||
|
||||
extern VP8CPUInfo SharpYuvGetCPUInfo;
|
||||
extern void InitSharpYuvSSE2(void);
|
||||
extern void InitSharpYuvNEON(void);
|
||||
|
||||
void SharpYuvInitDsp(void) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
SharpYuvUpdateY = SharpYuvUpdateY_C;
|
||||
SharpYuvUpdateRGB = SharpYuvUpdateRGB_C;
|
||||
SharpYuvFilterRow = SharpYuvFilterRow_C;
|
||||
#endif
|
||||
|
||||
if (SharpYuvGetCPUInfo != NULL) {
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (SharpYuvGetCPUInfo(kSSE2)) {
|
||||
InitSharpYuvSSE2();
|
||||
}
|
||||
#endif // WEBP_HAVE_SSE2
|
||||
}
|
||||
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(SharpYuvGetCPUInfo != NULL && SharpYuvGetCPUInfo(kNEON))) {
|
||||
InitSharpYuvNEON();
|
||||
}
|
||||
#endif // WEBP_HAVE_NEON
|
||||
|
||||
assert(SharpYuvUpdateY != NULL);
|
||||
assert(SharpYuvUpdateRGB != NULL);
|
||||
assert(SharpYuvFilterRow != NULL);
|
||||
}
|
28
3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h
vendored
Normal file
28
3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Speed-critical functions for Sharp YUV.
|
||||
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_DSP_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_DSP_H_
|
||||
|
||||
#include "sharpyuv/sharpyuv_cpu.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
|
||||
uint16_t* dst, int len, int bit_depth);
|
||||
extern void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref,
|
||||
int16_t* dst, int len);
|
||||
extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth);
|
||||
|
||||
void SharpYuvInitDsp(void);
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_DSP_H_
|
419
3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c
vendored
Normal file
419
3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c
vendored
Normal file
@ -0,0 +1,419 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Gamma correction utilities.
|
||||
|
||||
#include "sharpyuv/sharpyuv_gamma.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "src/webp/types.h"
|
||||
|
||||
// Gamma correction compensates loss of resolution during chroma subsampling.
|
||||
// Size of pre-computed table for converting from gamma to linear.
|
||||
#define GAMMA_TO_LINEAR_TAB_BITS 10
|
||||
#define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS)
|
||||
static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
|
||||
#define LINEAR_TO_GAMMA_TAB_BITS 9
|
||||
#define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
|
||||
static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];
|
||||
|
||||
static const double kGammaF = 1. / 0.45;
|
||||
#define GAMMA_TO_LINEAR_BITS 16
|
||||
|
||||
static volatile int kGammaTablesSOk = 0;
|
||||
void SharpYuvInitGammaTables(void) {
|
||||
assert(GAMMA_TO_LINEAR_BITS <= 16);
|
||||
if (!kGammaTablesSOk) {
|
||||
int v;
|
||||
const double a = 0.09929682680944;
|
||||
const double thresh = 0.018053968510807;
|
||||
const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
|
||||
// Precompute gamma to linear table.
|
||||
{
|
||||
const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE;
|
||||
const double a_rec = 1. / (1. + a);
|
||||
for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) {
|
||||
const double g = norm * v;
|
||||
double value;
|
||||
if (g <= thresh * 4.5) {
|
||||
value = g / 4.5;
|
||||
} else {
|
||||
value = pow(a_rec * (g + a), kGammaF);
|
||||
}
|
||||
kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
|
||||
}
|
||||
// to prevent small rounding errors to cause read-overflow:
|
||||
kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] =
|
||||
kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE];
|
||||
}
|
||||
// Precompute linear to gamma table.
|
||||
{
|
||||
const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE;
|
||||
for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) {
|
||||
const double g = scale * v;
|
||||
double value;
|
||||
if (g <= thresh) {
|
||||
value = 4.5 * g;
|
||||
} else {
|
||||
value = (1. + a) * pow(g, 1. / kGammaF) - a;
|
||||
}
|
||||
kLinearToGammaTabS[v] =
|
||||
(uint32_t)(final_scale * value + 0.5);
|
||||
}
|
||||
// to prevent small rounding errors to cause read-overflow:
|
||||
kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
|
||||
kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE];
|
||||
}
|
||||
kGammaTablesSOk = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE int Shift(int v, int shift) {
|
||||
return (shift >= 0) ? (v << shift) : (v >> -shift);
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab,
|
||||
int tab_pos_shift_right,
|
||||
int tab_value_shift) {
|
||||
const uint32_t tab_pos = Shift(v, -tab_pos_shift_right);
|
||||
// fractional part, in 'tab_pos_shift' fixed-point precision
|
||||
const uint32_t x = v - (tab_pos << tab_pos_shift_right); // fractional part
|
||||
// v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1])
|
||||
const uint32_t v0 = Shift(tab[tab_pos + 0], tab_value_shift);
|
||||
const uint32_t v1 = Shift(tab[tab_pos + 1], tab_value_shift);
|
||||
// Final interpolation.
|
||||
const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0.
|
||||
const int half =
|
||||
(tab_pos_shift_right > 0) ? 1 << (tab_pos_shift_right - 1) : 0;
|
||||
const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift_right);
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32_t ToLinearSrgb(uint16_t v, int bit_depth) {
|
||||
const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth;
|
||||
if (shift > 0) {
|
||||
return kGammaToLinearTabS[v << shift];
|
||||
}
|
||||
return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0);
|
||||
}
|
||||
|
||||
static uint16_t FromLinearSrgb(uint32_t value, int bit_depth) {
|
||||
return FixedPointInterpolation(
|
||||
value, kLinearToGammaTabS,
|
||||
(GAMMA_TO_LINEAR_BITS - LINEAR_TO_GAMMA_TAB_BITS),
|
||||
bit_depth - GAMMA_TO_LINEAR_BITS);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define CLAMP(x, low, high) \
|
||||
(((x) < (low)) ? (low) : (((high) < (x)) ? (high) : (x)))
|
||||
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
||||
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
|
||||
|
||||
static WEBP_INLINE float Roundf(float x) {
|
||||
if (x < 0)
|
||||
return (float)ceil((double)(x - 0.5f));
|
||||
else
|
||||
return (float)floor((double)(x + 0.5f));
|
||||
}
|
||||
|
||||
static WEBP_INLINE float Powf(float base, float exp) {
|
||||
return (float)pow((double)base, (double)exp);
|
||||
}
|
||||
|
||||
static WEBP_INLINE float Log10f(float x) { return (float)log10((double)x); }
|
||||
|
||||
static float ToLinear709(float gamma) {
|
||||
if (gamma < 0.f) {
|
||||
return 0.f;
|
||||
} else if (gamma < 4.5f * 0.018053968510807f) {
|
||||
return gamma / 4.5f;
|
||||
} else if (gamma < 1.f) {
|
||||
return Powf((gamma + 0.09929682680944f) / 1.09929682680944f, 1.f / 0.45f);
|
||||
}
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
static float FromLinear709(float linear) {
|
||||
if (linear < 0.f) {
|
||||
return 0.f;
|
||||
} else if (linear < 0.018053968510807f) {
|
||||
return linear * 4.5f;
|
||||
} else if (linear < 1.f) {
|
||||
return 1.09929682680944f * Powf(linear, 0.45f) - 0.09929682680944f;
|
||||
}
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
static float ToLinear470M(float gamma) {
|
||||
return Powf(CLAMP(gamma, 0.f, 1.f), 1.f / 2.2f);
|
||||
}
|
||||
|
||||
static float FromLinear470M(float linear) {
|
||||
return Powf(CLAMP(linear, 0.f, 1.f), 2.2f);
|
||||
}
|
||||
|
||||
static float ToLinear470Bg(float gamma) {
|
||||
return Powf(CLAMP(gamma, 0.f, 1.f), 1.f / 2.8f);
|
||||
}
|
||||
|
||||
static float FromLinear470Bg(float linear) {
|
||||
return Powf(CLAMP(linear, 0.f, 1.f), 2.8f);
|
||||
}
|
||||
|
||||
static float ToLinearSmpte240(float gamma) {
|
||||
if (gamma < 0.f) {
|
||||
return 0.f;
|
||||
} else if (gamma < 4.f * 0.022821585529445f) {
|
||||
return gamma / 4.f;
|
||||
} else if (gamma < 1.f) {
|
||||
return Powf((gamma + 0.111572195921731f) / 1.111572195921731f, 1.f / 0.45f);
|
||||
}
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
static float FromLinearSmpte240(float linear) {
|
||||
if (linear < 0.f) {
|
||||
return 0.f;
|
||||
} else if (linear < 0.022821585529445f) {
|
||||
return linear * 4.f;
|
||||
} else if (linear < 1.f) {
|
||||
return 1.111572195921731f * Powf(linear, 0.45f) - 0.111572195921731f;
|
||||
}
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
static float ToLinearLog100(float gamma) {
|
||||
return (gamma < 0.01f) ? 0.0f : 1.0f + Log10f(MIN(gamma, 1.f)) / 2.0f;
|
||||
}
|
||||
|
||||
static float FromLinearLog100(float linear) {
|
||||
// The function is non-bijective so choose the middle of [0, 0.01].
|
||||
const float mid_interval = 0.01f / 2.f;
|
||||
return (linear <= 0.0f) ? mid_interval
|
||||
: Powf(10.0f, 2.f * (MIN(linear, 1.f) - 1.0f));
|
||||
}
|
||||
|
||||
static float ToLinearLog100Sqrt10(float gamma) {
|
||||
return (gamma < 0.00316227766f) ? 0.0f
|
||||
: 1.0f + Log10f(MIN(gamma, 1.f)) / 2.5f;
|
||||
}
|
||||
|
||||
static float FromLinearLog100Sqrt10(float linear) {
|
||||
// The function is non-bijective so choose the middle of [0, 0.00316227766f[.
|
||||
const float mid_interval = 0.00316227766f / 2.f;
|
||||
return (linear < 0.0f) ? mid_interval
|
||||
: Powf(10.0f, 2.5f * (MIN(linear, 1.f) - 1.0f));
|
||||
}
|
||||
|
||||
static float ToLinearIec61966(float gamma) {
|
||||
if (gamma <= -4.5f * 0.018053968510807f) {
|
||||
return Powf((-gamma + 0.09929682680944f) / -1.09929682680944f, 1.f / 0.45f);
|
||||
} else if (gamma < 4.5f * 0.018053968510807f) {
|
||||
return gamma / 4.5f;
|
||||
}
|
||||
return Powf((gamma + 0.09929682680944f) / 1.09929682680944f, 1.f / 0.45f);
|
||||
}
|
||||
|
||||
static float FromLinearIec61966(float linear) {
|
||||
if (linear <= -0.018053968510807f) {
|
||||
return -1.09929682680944f * Powf(-linear, 0.45f) + 0.09929682680944f;
|
||||
} else if (linear < 0.018053968510807f) {
|
||||
return linear * 4.5f;
|
||||
}
|
||||
return 1.09929682680944f * Powf(linear, 0.45f) - 0.09929682680944f;
|
||||
}
|
||||
|
||||
static float ToLinearBt1361(float gamma) {
|
||||
if (gamma < -0.25f) {
|
||||
return -0.25f;
|
||||
} else if (gamma < 0.f) {
|
||||
return Powf((gamma - 0.02482420670236f) / -0.27482420670236f, 1.f / 0.45f) /
|
||||
-4.f;
|
||||
} else if (gamma < 4.5f * 0.018053968510807f) {
|
||||
return gamma / 4.5f;
|
||||
} else if (gamma < 1.f) {
|
||||
return Powf((gamma + 0.09929682680944f) / 1.09929682680944f, 1.f / 0.45f);
|
||||
}
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
static float FromLinearBt1361(float linear) {
|
||||
if (linear < -0.25f) {
|
||||
return -0.25f;
|
||||
} else if (linear < 0.f) {
|
||||
return -0.27482420670236f * Powf(-4.f * linear, 0.45f) + 0.02482420670236f;
|
||||
} else if (linear < 0.018053968510807f) {
|
||||
return linear * 4.5f;
|
||||
} else if (linear < 1.f) {
|
||||
return 1.09929682680944f * Powf(linear, 0.45f) - 0.09929682680944f;
|
||||
}
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
static float ToLinearPq(float gamma) {
|
||||
if (gamma > 0.f) {
|
||||
const float pow_gamma = Powf(gamma, 32.f / 2523.f);
|
||||
const float num = MAX(pow_gamma - 107.f / 128.f, 0.0f);
|
||||
const float den = MAX(2413.f / 128.f - 2392.f / 128.f * pow_gamma, FLT_MIN);
|
||||
return Powf(num / den, 4096.f / 653.f);
|
||||
}
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
static float FromLinearPq(float linear) {
|
||||
if (linear > 0.f) {
|
||||
const float pow_linear = Powf(linear, 653.f / 4096.f);
|
||||
const float num = 107.f / 128.f + 2413.f / 128.f * pow_linear;
|
||||
const float den = 1.0f + 2392.f / 128.f * pow_linear;
|
||||
return Powf(num / den, 2523.f / 32.f);
|
||||
}
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
static float ToLinearSmpte428(float gamma) {
|
||||
return Powf(0.91655527974030934f * MAX(gamma, 0.f), 1.f / 2.6f);
|
||||
}
|
||||
|
||||
static float FromLinearSmpte428(float linear) {
|
||||
return Powf(MAX(linear, 0.f), 2.6f) / 0.91655527974030934f;
|
||||
}
|
||||
|
||||
// Conversion in BT.2100 requires RGB info. Simplify to gamma correction here.
|
||||
static float ToLinearHlg(float gamma) {
|
||||
if (gamma < 0.f) {
|
||||
return 0.f;
|
||||
} else if (gamma <= 0.5f) {
|
||||
return Powf((gamma * gamma) * (1.f / 3.f), 1.2f);
|
||||
}
|
||||
return Powf((expf((gamma - 0.55991073f) / 0.17883277f) + 0.28466892f) / 12.0f,
|
||||
1.2f);
|
||||
}
|
||||
|
||||
static float FromLinearHlg(float linear) {
|
||||
linear = Powf(linear, 1.f / 1.2f);
|
||||
if (linear < 0.f) {
|
||||
return 0.f;
|
||||
} else if (linear <= (1.f / 12.f)) {
|
||||
return sqrtf(3.f * linear);
|
||||
}
|
||||
return 0.17883277f * logf(12.f * linear - 0.28466892f) + 0.55991073f;
|
||||
}
|
||||
|
||||
uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type) {
|
||||
float v_float, linear;
|
||||
if (transfer_type == kSharpYuvTransferFunctionSrgb) {
|
||||
return ToLinearSrgb(v, bit_depth);
|
||||
}
|
||||
v_float = (float)v / ((1 << bit_depth) - 1);
|
||||
switch (transfer_type) {
|
||||
case kSharpYuvTransferFunctionBt709:
|
||||
case kSharpYuvTransferFunctionBt601:
|
||||
case kSharpYuvTransferFunctionBt2020_10Bit:
|
||||
case kSharpYuvTransferFunctionBt2020_12Bit:
|
||||
linear = ToLinear709(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionBt470M:
|
||||
linear = ToLinear470M(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionBt470Bg:
|
||||
linear = ToLinear470Bg(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionSmpte240:
|
||||
linear = ToLinearSmpte240(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionLinear:
|
||||
return v;
|
||||
case kSharpYuvTransferFunctionLog100:
|
||||
linear = ToLinearLog100(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionLog100_Sqrt10:
|
||||
linear = ToLinearLog100Sqrt10(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionIec61966:
|
||||
linear = ToLinearIec61966(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionBt1361:
|
||||
linear = ToLinearBt1361(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionSmpte2084:
|
||||
linear = ToLinearPq(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionSmpte428:
|
||||
linear = ToLinearSmpte428(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionHlg:
|
||||
linear = ToLinearHlg(v_float);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
linear = 0;
|
||||
break;
|
||||
}
|
||||
return (uint32_t)Roundf(linear * ((1 << 16) - 1));
|
||||
}
|
||||
|
||||
uint16_t SharpYuvLinearToGamma(uint32_t v, int bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type) {
|
||||
float v_float, linear;
|
||||
if (transfer_type == kSharpYuvTransferFunctionSrgb) {
|
||||
return FromLinearSrgb(v, bit_depth);
|
||||
}
|
||||
v_float = (float)v / ((1 << 16) - 1);
|
||||
switch (transfer_type) {
|
||||
case kSharpYuvTransferFunctionBt709:
|
||||
case kSharpYuvTransferFunctionBt601:
|
||||
case kSharpYuvTransferFunctionBt2020_10Bit:
|
||||
case kSharpYuvTransferFunctionBt2020_12Bit:
|
||||
linear = FromLinear709(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionBt470M:
|
||||
linear = FromLinear470M(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionBt470Bg:
|
||||
linear = FromLinear470Bg(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionSmpte240:
|
||||
linear = FromLinearSmpte240(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionLinear:
|
||||
return v;
|
||||
case kSharpYuvTransferFunctionLog100:
|
||||
linear = FromLinearLog100(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionLog100_Sqrt10:
|
||||
linear = FromLinearLog100Sqrt10(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionIec61966:
|
||||
linear = FromLinearIec61966(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionBt1361:
|
||||
linear = FromLinearBt1361(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionSmpte2084:
|
||||
linear = FromLinearPq(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionSmpte428:
|
||||
linear = FromLinearSmpte428(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionHlg:
|
||||
linear = FromLinearHlg(v_float);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
linear = 0;
|
||||
break;
|
||||
}
|
||||
return (uint16_t)Roundf(linear * ((1 << bit_depth) - 1));
|
||||
}
|
38
3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h
vendored
Normal file
38
3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Gamma correction utilities.
|
||||
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
|
||||
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Initializes precomputed tables. Must be called once before calling
|
||||
// SharpYuvGammaToLinear or SharpYuvLinearToGamma.
|
||||
void SharpYuvInitGammaTables(void);
|
||||
|
||||
// Converts a 'bit_depth'-bit gamma color value to a 16-bit linear value.
|
||||
uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type);
|
||||
|
||||
// Converts a 16-bit linear color value to a 'bit_depth'-bit gamma value.
|
||||
uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
|
181
3rdparty/libwebp/sharpyuv/sharpyuv_neon.c
vendored
Normal file
181
3rdparty/libwebp/sharpyuv/sharpyuv_neon.c
vendored
Normal file
@ -0,0 +1,181 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Speed-critical functions for Sharp YUV.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "sharpyuv/sharpyuv_dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <arm_neon.h>
|
||||
|
||||
static uint16_t clip_NEON(int v, int max) {
|
||||
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const int16x8_t zero = vdupq_n_s16(0);
|
||||
const int16x8_t max = vdupq_n_s16(max_y);
|
||||
uint64x2_t sum = vdupq_n_u64(0);
|
||||
uint64_t diff;
|
||||
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
|
||||
const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
|
||||
const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
|
||||
const int16x8_t D = vsubq_s16(A, B); // diff_y
|
||||
const int16x8_t F = vaddq_s16(C, D); // new_y
|
||||
const uint16x8_t H =
|
||||
vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
|
||||
const int16x8_t I = vabsq_s16(D); // abs(diff_y)
|
||||
vst1q_u16(dst + i, H);
|
||||
sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
|
||||
}
|
||||
diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
|
||||
for (; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)(dst[i]) + diff_y;
|
||||
dst[i] = clip_NEON(new_y, max_y);
|
||||
diff += (uint64_t)(abs(diff_y));
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYuvUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t A = vld1q_s16(ref + i);
|
||||
const int16x8_t B = vld1q_s16(src + i);
|
||||
const int16x8_t C = vld1q_s16(dst + i);
|
||||
const int16x8_t D = vsubq_s16(A, B); // diff_uv
|
||||
const int16x8_t E = vaddq_s16(C, D); // new_uv
|
||||
vst1q_s16(dst + i, E);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow16_NEON(const int16_t* A, const int16_t* B,
|
||||
int len, const uint16_t* best_y,
|
||||
uint16_t* out, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const int16x8_t max = vdupq_n_s16(max_y);
|
||||
const int16x8_t zero = vdupq_n_s16(0);
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t a0 = vld1q_s16(A + i + 0);
|
||||
const int16x8_t a1 = vld1q_s16(A + i + 1);
|
||||
const int16x8_t b0 = vld1q_s16(B + i + 0);
|
||||
const int16x8_t b1 = vld1q_s16(B + i + 1);
|
||||
const int16x8_t a0b1 = vaddq_s16(a0, b1);
|
||||
const int16x8_t a1b0 = vaddq_s16(a1, b0);
|
||||
const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1)
|
||||
const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0)
|
||||
const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
|
||||
const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
|
||||
const int16x8_t e0 = vrhaddq_s16(c1, a0);
|
||||
const int16x8_t e1 = vrhaddq_s16(c0, a1);
|
||||
const int16x8x2_t f = vzipq_s16(e0, e1);
|
||||
const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
|
||||
const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
|
||||
const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
|
||||
const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
|
||||
const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
|
||||
const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
|
||||
vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
|
||||
vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow32_NEON(const int16_t* A, const int16_t* B,
|
||||
int len, const uint16_t* best_y,
|
||||
uint16_t* out, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const uint16x8_t max = vdupq_n_u16(max_y);
|
||||
for (i = 0; i + 4 <= len; i += 4) {
|
||||
const int16x4_t a0 = vld1_s16(A + i + 0);
|
||||
const int16x4_t a1 = vld1_s16(A + i + 1);
|
||||
const int16x4_t b0 = vld1_s16(B + i + 0);
|
||||
const int16x4_t b1 = vld1_s16(B + i + 1);
|
||||
const int32x4_t a0b1 = vaddl_s16(a0, b1);
|
||||
const int32x4_t a1b0 = vaddl_s16(a1, b0);
|
||||
const int32x4_t a0a1b0b1 = vaddq_s32(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const int32x4_t a0b1_2 = vaddq_s32(a0b1, a0b1); // 2*(A0+B1)
|
||||
const int32x4_t a1b0_2 = vaddq_s32(a1b0, a1b0); // 2*(A1+B0)
|
||||
const int32x4_t c0 = vshrq_n_s32(vaddq_s32(a0b1_2, a0a1b0b1), 3);
|
||||
const int32x4_t c1 = vshrq_n_s32(vaddq_s32(a1b0_2, a0a1b0b1), 3);
|
||||
const int32x4_t e0 = vrhaddq_s32(c1, vmovl_s16(a0));
|
||||
const int32x4_t e1 = vrhaddq_s32(c0, vmovl_s16(a1));
|
||||
const int32x4x2_t f = vzipq_s32(e0, e1);
|
||||
|
||||
const int16x8_t g = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i));
|
||||
const int32x4_t h0 = vaddw_s16(f.val[0], vget_low_s16(g));
|
||||
const int32x4_t h1 = vaddw_s16(f.val[1], vget_high_s16(g));
|
||||
const uint16x8_t i_16 = vcombine_u16(vqmovun_s32(h0), vqmovun_s32(h1));
|
||||
const uint16x8_t i_clamped = vminq_u16(i_16, max);
|
||||
vst1q_u16(out + 2 * i + 0, i_clamped);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth) {
|
||||
if (bit_depth <= 10) {
|
||||
SharpYuvFilterRow16_NEON(A, B, len, best_y, out, bit_depth);
|
||||
} else {
|
||||
SharpYuvFilterRow32_NEON(A, B, len, best_y, out, bit_depth);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void InitSharpYuvNEON(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvNEON(void) {
|
||||
SharpYuvUpdateY = SharpYuvUpdateY_NEON;
|
||||
SharpYuvUpdateRGB = SharpYuvUpdateRGB_NEON;
|
||||
SharpYuvFilterRow = SharpYuvFilterRow_NEON;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_NEON
|
||||
|
||||
extern void InitSharpYuvNEON(void);
|
||||
|
||||
void InitSharpYuvNEON(void) {}
|
||||
|
||||
#endif // WEBP_USE_NEON
|
201
3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c
vendored
Normal file
201
3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c
vendored
Normal file
@ -0,0 +1,201 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Speed-critical functions for Sharp YUV.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "sharpyuv/sharpyuv_dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <stdlib.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
static uint16_t clip_SSE2(int v, int max) {
|
||||
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
uint64_t diff = 0;
|
||||
uint32_t tmp[4];
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max = _mm_set1_epi16(max_y);
|
||||
const __m128i one = _mm_set1_epi16(1);
|
||||
__m128i sum = zero;
|
||||
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
|
||||
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
|
||||
const __m128i D = _mm_sub_epi16(A, B); // diff_y
|
||||
const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0)
|
||||
const __m128i F = _mm_add_epi16(C, D); // new_y
|
||||
const __m128i G = _mm_or_si128(E, one); // -1 or 1
|
||||
const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
|
||||
const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...))
|
||||
_mm_storeu_si128((__m128i*)(dst + i), H);
|
||||
sum = _mm_add_epi32(sum, I);
|
||||
}
|
||||
_mm_storeu_si128((__m128i*)tmp, sum);
|
||||
diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
|
||||
for (; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)dst[i] + diff_y;
|
||||
dst[i] = clip_SSE2(new_y, max_y);
|
||||
diff += (uint64_t)abs(diff_y);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYuvUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i = 0;
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
|
||||
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
|
||||
const __m128i D = _mm_sub_epi16(A, B); // diff_uv
|
||||
const __m128i E = _mm_add_epi16(C, D); // new_uv
|
||||
_mm_storeu_si128((__m128i*)(dst + i), E);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow16_SSE2(const int16_t* A, const int16_t* B,
|
||||
int len, const uint16_t* best_y,
|
||||
uint16_t* out, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const __m128i kCst8 = _mm_set1_epi16(8);
|
||||
const __m128i max = _mm_set1_epi16(max_y);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
|
||||
const __m128i a0b1 = _mm_add_epi16(a0, b1);
|
||||
const __m128i a1b0 = _mm_add_epi16(a1, b0);
|
||||
const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
|
||||
const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1)
|
||||
const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0)
|
||||
const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
|
||||
const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
|
||||
const __m128i d0 = _mm_add_epi16(c1, a0);
|
||||
const __m128i d1 = _mm_add_epi16(c0, a1);
|
||||
const __m128i e0 = _mm_srai_epi16(d0, 1);
|
||||
const __m128i e1 = _mm_srai_epi16(d1, 1);
|
||||
const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
|
||||
const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
|
||||
const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
|
||||
const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
|
||||
const __m128i h0 = _mm_add_epi16(g0, f0);
|
||||
const __m128i h1 = _mm_add_epi16(g1, f1);
|
||||
const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
|
||||
const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
// (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
|
||||
// = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
|
||||
// We reuse the common sub-expressions.
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE __m128i s16_to_s32(__m128i in) {
|
||||
return _mm_srai_epi32(_mm_unpacklo_epi16(in, in), 16);
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow32_SSE2(const int16_t* A, const int16_t* B,
|
||||
int len, const uint16_t* best_y,
|
||||
uint16_t* out, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const __m128i kCst8 = _mm_set1_epi32(8);
|
||||
const __m128i max = _mm_set1_epi16(max_y);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 4 <= len; i += 4) {
|
||||
const __m128i a0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 0)));
|
||||
const __m128i a1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 1)));
|
||||
const __m128i b0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 0)));
|
||||
const __m128i b1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 1)));
|
||||
const __m128i a0b1 = _mm_add_epi32(a0, b1);
|
||||
const __m128i a1b0 = _mm_add_epi32(a1, b0);
|
||||
const __m128i a0a1b0b1 = _mm_add_epi32(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const __m128i a0a1b0b1_8 = _mm_add_epi32(a0a1b0b1, kCst8);
|
||||
const __m128i a0b1_2 = _mm_add_epi32(a0b1, a0b1); // 2*(A0+B1)
|
||||
const __m128i a1b0_2 = _mm_add_epi32(a1b0, a1b0); // 2*(A1+B0)
|
||||
const __m128i c0 = _mm_srai_epi32(_mm_add_epi32(a0b1_2, a0a1b0b1_8), 3);
|
||||
const __m128i c1 = _mm_srai_epi32(_mm_add_epi32(a1b0_2, a0a1b0b1_8), 3);
|
||||
const __m128i d0 = _mm_add_epi32(c1, a0);
|
||||
const __m128i d1 = _mm_add_epi32(c0, a1);
|
||||
const __m128i e0 = _mm_srai_epi32(d0, 1);
|
||||
const __m128i e1 = _mm_srai_epi32(d1, 1);
|
||||
const __m128i f0 = _mm_unpacklo_epi32(e0, e1);
|
||||
const __m128i f1 = _mm_unpackhi_epi32(e0, e1);
|
||||
const __m128i g = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
|
||||
const __m128i h_16 = _mm_add_epi16(g, _mm_packs_epi32(f0, f1));
|
||||
const __m128i final = _mm_max_epi16(_mm_min_epi16(h_16, max), zero);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 0), final);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
// (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
|
||||
// = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
|
||||
// We reuse the common sub-expressions.
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth) {
|
||||
if (bit_depth <= 10) {
|
||||
SharpYuvFilterRow16_SSE2(A, B, len, best_y, out, bit_depth);
|
||||
} else {
|
||||
SharpYuvFilterRow32_SSE2(A, B, len, best_y, out, bit_depth);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void InitSharpYuvSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvSSE2(void) {
|
||||
SharpYuvUpdateY = SharpYuvUpdateY_SSE2;
|
||||
SharpYuvUpdateRGB = SharpYuvUpdateRGB_SSE2;
|
||||
SharpYuvFilterRow = SharpYuvFilterRow_SSE2;
|
||||
}
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
extern void InitSharpYuvSSE2(void);
|
||||
|
||||
void InitSharpYuvSSE2(void) {}
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
37
3rdparty/libwebp/src/dec/alpha_dec.c
vendored
37
3rdparty/libwebp/src/dec/alpha_dec.c
vendored
@ -117,21 +117,12 @@ static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) {
|
||||
const uint8_t* deltas = dec->alpha_data_ + ALPHA_HEADER_LEN + row * width;
|
||||
uint8_t* dst = dec->alpha_plane_ + row * width;
|
||||
assert(deltas <= &dec->alpha_data_[dec->alpha_data_size_]);
|
||||
if (alph_dec->filter_ != WEBP_FILTER_NONE) {
|
||||
assert(WebPUnfilters[alph_dec->filter_] != NULL);
|
||||
for (y = 0; y < num_rows; ++y) {
|
||||
WebPUnfilters[alph_dec->filter_](prev_line, deltas, dst, width);
|
||||
prev_line = dst;
|
||||
dst += width;
|
||||
deltas += width;
|
||||
}
|
||||
} else {
|
||||
for (y = 0; y < num_rows; ++y) {
|
||||
memcpy(dst, deltas, width * sizeof(*dst));
|
||||
prev_line = dst;
|
||||
dst += width;
|
||||
deltas += width;
|
||||
}
|
||||
assert(WebPUnfilters[alph_dec->filter_] != NULL);
|
||||
for (y = 0; y < num_rows; ++y) {
|
||||
WebPUnfilters[alph_dec->filter_](prev_line, deltas, dst, width);
|
||||
prev_line = dst;
|
||||
dst += width;
|
||||
deltas += width;
|
||||
}
|
||||
dec->alpha_prev_line_ = prev_line;
|
||||
} else { // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
|
||||
@ -155,7 +146,8 @@ static int AllocateAlphaPlane(VP8Decoder* const dec, const VP8Io* const io) {
|
||||
dec->alpha_plane_mem_ =
|
||||
(uint8_t*)WebPSafeMalloc(alpha_size, sizeof(*dec->alpha_plane_));
|
||||
if (dec->alpha_plane_mem_ == NULL) {
|
||||
return 0;
|
||||
return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
|
||||
"Alpha decoder initialization failed.");
|
||||
}
|
||||
dec->alpha_plane_ = dec->alpha_plane_mem_;
|
||||
dec->alpha_prev_line_ = NULL;
|
||||
@ -183,16 +175,25 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
|
||||
assert(dec != NULL && io != NULL);
|
||||
|
||||
if (row < 0 || num_rows <= 0 || row + num_rows > height) {
|
||||
return NULL; // sanity check.
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!dec->is_alpha_decoded_) {
|
||||
if (dec->alph_dec_ == NULL) { // Initialize decoder.
|
||||
dec->alph_dec_ = ALPHNew();
|
||||
if (dec->alph_dec_ == NULL) return NULL;
|
||||
if (dec->alph_dec_ == NULL) {
|
||||
VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
|
||||
"Alpha decoder initialization failed.");
|
||||
return NULL;
|
||||
}
|
||||
if (!AllocateAlphaPlane(dec, io)) goto Error;
|
||||
if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
|
||||
io, dec->alpha_plane_)) {
|
||||
VP8LDecoder* const vp8l_dec = dec->alph_dec_->vp8l_dec_;
|
||||
VP8SetError(dec,
|
||||
(vp8l_dec == NULL) ? VP8_STATUS_OUT_OF_MEMORY
|
||||
: vp8l_dec->status_,
|
||||
"Alpha decoder initialization failed.");
|
||||
goto Error;
|
||||
}
|
||||
// if we allowed use of alpha dithering, check whether it's needed at all
|
||||
|
12
3rdparty/libwebp/src/dec/buffer_dec.c
vendored
12
3rdparty/libwebp/src/dec/buffer_dec.c
vendored
@ -75,7 +75,7 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
|
||||
const WebPRGBABuffer* const buf = &buffer->u.RGBA;
|
||||
const int stride = abs(buf->stride);
|
||||
const uint64_t size =
|
||||
MIN_BUFFER_SIZE(width * kModeBpp[mode], height, stride);
|
||||
MIN_BUFFER_SIZE((uint64_t)width * kModeBpp[mode], height, stride);
|
||||
ok &= (size <= buf->size);
|
||||
ok &= (stride >= width * kModeBpp[mode]);
|
||||
ok &= (buf->rgba != NULL);
|
||||
@ -102,7 +102,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
|
||||
int stride;
|
||||
uint64_t size;
|
||||
|
||||
if ((uint64_t)w * kModeBpp[mode] >= (1ull << 32)) {
|
||||
if ((uint64_t)w * kModeBpp[mode] >= (1ull << 31)) {
|
||||
return VP8_STATUS_INVALID_PARAM;
|
||||
}
|
||||
stride = w * kModeBpp[mode];
|
||||
@ -117,7 +117,6 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
|
||||
}
|
||||
total_size = size + 2 * uv_size + a_size;
|
||||
|
||||
// Security/sanity checks
|
||||
output = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*output));
|
||||
if (output == NULL) {
|
||||
return VP8_STATUS_OUT_OF_MEMORY;
|
||||
@ -156,11 +155,11 @@ VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
|
||||
}
|
||||
if (WebPIsRGBMode(buffer->colorspace)) {
|
||||
WebPRGBABuffer* const buf = &buffer->u.RGBA;
|
||||
buf->rgba += (buffer->height - 1) * buf->stride;
|
||||
buf->rgba += (int64_t)(buffer->height - 1) * buf->stride;
|
||||
buf->stride = -buf->stride;
|
||||
} else {
|
||||
WebPYUVABuffer* const buf = &buffer->u.YUVA;
|
||||
const int H = buffer->height;
|
||||
const int64_t H = buffer->height;
|
||||
buf->y += (H - 1) * buf->y_stride;
|
||||
buf->y_stride = -buf->y_stride;
|
||||
buf->u += ((H - 1) >> 1) * buf->u_stride;
|
||||
@ -188,8 +187,7 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height,
|
||||
const int ch = options->crop_height;
|
||||
const int x = options->crop_left & ~1;
|
||||
const int y = options->crop_top & ~1;
|
||||
if (x < 0 || y < 0 || cw <= 0 || ch <= 0 ||
|
||||
x + cw > width || y + ch > height) {
|
||||
if (!WebPCheckCropDimensions(width, height, x, y, cw, ch)) {
|
||||
return VP8_STATUS_INVALID_PARAM; // out of frame boundary.
|
||||
}
|
||||
width = cw;
|
||||
|
2
3rdparty/libwebp/src/dec/frame_dec.c
vendored
2
3rdparty/libwebp/src/dec/frame_dec.c
vendored
@ -705,7 +705,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
||||
+ cache_size + alpha_size + WEBP_ALIGN_CST;
|
||||
uint8_t* mem;
|
||||
|
||||
if (needed != (size_t)needed) return 0; // check for overflow
|
||||
if (!CheckSizeOverflow(needed)) return 0; // check for overflow
|
||||
if (needed > dec->mem_size_) {
|
||||
WebPSafeFree(dec->mem_);
|
||||
dec->mem_size_ = 0;
|
||||
|
98
3rdparty/libwebp/src/dec/io_dec.c
vendored
98
3rdparty/libwebp/src/dec/io_dec.c
vendored
@ -298,46 +298,57 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
|
||||
const int uv_out_height = (out_height + 1) >> 1;
|
||||
const int uv_in_width = (io->mb_w + 1) >> 1;
|
||||
const int uv_in_height = (io->mb_h + 1) >> 1;
|
||||
const size_t work_size = 2 * out_width; // scratch memory for luma rescaler
|
||||
// scratch memory for luma rescaler
|
||||
const size_t work_size = 2 * (size_t)out_width;
|
||||
const size_t uv_work_size = 2 * uv_out_width; // and for each u/v ones
|
||||
size_t tmp_size, rescaler_size;
|
||||
uint64_t total_size;
|
||||
size_t rescaler_size;
|
||||
rescaler_t* work;
|
||||
WebPRescaler* scalers;
|
||||
const int num_rescalers = has_alpha ? 4 : 3;
|
||||
|
||||
tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work);
|
||||
total_size = ((uint64_t)work_size + 2 * uv_work_size) * sizeof(*work);
|
||||
if (has_alpha) {
|
||||
tmp_size += work_size * sizeof(*work);
|
||||
total_size += (uint64_t)work_size * sizeof(*work);
|
||||
}
|
||||
rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST;
|
||||
total_size += rescaler_size;
|
||||
if (!CheckSizeOverflow(total_size)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
p->memory = WebPSafeMalloc(1ULL, tmp_size + rescaler_size);
|
||||
p->memory = WebPSafeMalloc(1ULL, (size_t)total_size);
|
||||
if (p->memory == NULL) {
|
||||
return 0; // memory error
|
||||
}
|
||||
work = (rescaler_t*)p->memory;
|
||||
|
||||
scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + tmp_size);
|
||||
scalers = (WebPRescaler*)WEBP_ALIGN(
|
||||
(const uint8_t*)work + total_size - rescaler_size);
|
||||
p->scaler_y = &scalers[0];
|
||||
p->scaler_u = &scalers[1];
|
||||
p->scaler_v = &scalers[2];
|
||||
p->scaler_a = has_alpha ? &scalers[3] : NULL;
|
||||
|
||||
WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
|
||||
buf->y, out_width, out_height, buf->y_stride, 1,
|
||||
work);
|
||||
WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
|
||||
buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
|
||||
work + work_size);
|
||||
WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
|
||||
buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
|
||||
work + work_size + uv_work_size);
|
||||
if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
|
||||
buf->y, out_width, out_height, buf->y_stride, 1,
|
||||
work) ||
|
||||
!WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
|
||||
buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
|
||||
work + work_size) ||
|
||||
!WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
|
||||
buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
|
||||
work + work_size + uv_work_size)) {
|
||||
return 0;
|
||||
}
|
||||
p->emit = EmitRescaledYUV;
|
||||
|
||||
if (has_alpha) {
|
||||
WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
|
||||
buf->a, out_width, out_height, buf->a_stride, 1,
|
||||
work + work_size + 2 * uv_work_size);
|
||||
if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
|
||||
buf->a, out_width, out_height, buf->a_stride, 1,
|
||||
work + work_size + 2 * uv_work_size)) {
|
||||
return 0;
|
||||
}
|
||||
p->emit_alpha = EmitRescaledAlphaYUV;
|
||||
WebPInitAlphaProcessing();
|
||||
}
|
||||
@ -480,51 +491,58 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
|
||||
const int out_height = io->scaled_height;
|
||||
const int uv_in_width = (io->mb_w + 1) >> 1;
|
||||
const int uv_in_height = (io->mb_h + 1) >> 1;
|
||||
const size_t work_size = 2 * out_width; // scratch memory for one rescaler
|
||||
// scratch memory for one rescaler
|
||||
const size_t work_size = 2 * (size_t)out_width;
|
||||
rescaler_t* work; // rescalers work area
|
||||
uint8_t* tmp; // tmp storage for scaled YUV444 samples before RGB conversion
|
||||
size_t tmp_size1, tmp_size2, total_size, rescaler_size;
|
||||
uint64_t tmp_size1, tmp_size2, total_size;
|
||||
size_t rescaler_size;
|
||||
WebPRescaler* scalers;
|
||||
const int num_rescalers = has_alpha ? 4 : 3;
|
||||
|
||||
tmp_size1 = 3 * work_size;
|
||||
tmp_size2 = 3 * out_width;
|
||||
if (has_alpha) {
|
||||
tmp_size1 += work_size;
|
||||
tmp_size2 += out_width;
|
||||
}
|
||||
tmp_size1 = (uint64_t)num_rescalers * work_size;
|
||||
tmp_size2 = (uint64_t)num_rescalers * out_width;
|
||||
total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp);
|
||||
rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST;
|
||||
total_size += rescaler_size;
|
||||
if (!CheckSizeOverflow(total_size)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
p->memory = WebPSafeMalloc(1ULL, total_size + rescaler_size);
|
||||
p->memory = WebPSafeMalloc(1ULL, (size_t)total_size);
|
||||
if (p->memory == NULL) {
|
||||
return 0; // memory error
|
||||
}
|
||||
work = (rescaler_t*)p->memory;
|
||||
tmp = (uint8_t*)(work + tmp_size1);
|
||||
|
||||
scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size);
|
||||
scalers = (WebPRescaler*)WEBP_ALIGN(
|
||||
(const uint8_t*)work + total_size - rescaler_size);
|
||||
p->scaler_y = &scalers[0];
|
||||
p->scaler_u = &scalers[1];
|
||||
p->scaler_v = &scalers[2];
|
||||
p->scaler_a = has_alpha ? &scalers[3] : NULL;
|
||||
|
||||
WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
|
||||
tmp + 0 * out_width, out_width, out_height, 0, 1,
|
||||
work + 0 * work_size);
|
||||
WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
|
||||
tmp + 1 * out_width, out_width, out_height, 0, 1,
|
||||
work + 1 * work_size);
|
||||
WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
|
||||
tmp + 2 * out_width, out_width, out_height, 0, 1,
|
||||
work + 2 * work_size);
|
||||
if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
|
||||
tmp + 0 * out_width, out_width, out_height, 0, 1,
|
||||
work + 0 * work_size) ||
|
||||
!WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
|
||||
tmp + 1 * out_width, out_width, out_height, 0, 1,
|
||||
work + 1 * work_size) ||
|
||||
!WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
|
||||
tmp + 2 * out_width, out_width, out_height, 0, 1,
|
||||
work + 2 * work_size)) {
|
||||
return 0;
|
||||
}
|
||||
p->emit = EmitRescaledRGB;
|
||||
WebPInitYUV444Converters();
|
||||
|
||||
if (has_alpha) {
|
||||
WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
|
||||
tmp + 3 * out_width, out_width, out_height, 0, 1,
|
||||
work + 3 * work_size);
|
||||
if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
|
||||
tmp + 3 * out_width, out_width, out_height, 0, 1,
|
||||
work + 3 * work_size)) {
|
||||
return 0;
|
||||
}
|
||||
p->emit_alpha = EmitRescaledAlphaRGB;
|
||||
if (p->output->colorspace == MODE_RGBA_4444 ||
|
||||
p->output->colorspace == MODE_rgbA_4444) {
|
||||
|
3
3rdparty/libwebp/src/dec/tree_dec.c
vendored
3
3rdparty/libwebp/src/dec/tree_dec.c
vendored
@ -12,10 +12,11 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "src/dec/vp8i_dec.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/utils/bit_reader_inl_utils.h"
|
||||
|
||||
#if !defined(USE_GENERIC_TREE)
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64
|
||||
// using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
|
||||
#define USE_GENERIC_TREE 1 // ALTERNATE_CODE
|
||||
#else
|
||||
|
6
3rdparty/libwebp/src/dec/vp8_dec.c
vendored
6
3rdparty/libwebp/src/dec/vp8_dec.c
vendored
@ -335,7 +335,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
|
||||
io->scaled_width = io->width;
|
||||
io->scaled_height = io->height;
|
||||
|
||||
io->mb_w = io->width; // sanity check
|
||||
io->mb_w = io->width; // for soundness
|
||||
io->mb_h = io->height; // ditto
|
||||
|
||||
VP8ResetProba(&dec->proba_);
|
||||
@ -403,7 +403,7 @@ static const uint8_t kZigzag[16] = {
|
||||
0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
|
||||
};
|
||||
|
||||
// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
|
||||
// See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2
|
||||
static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
|
||||
int v;
|
||||
if (!VP8GetBit(br, p[3], "coeffs")) {
|
||||
@ -494,6 +494,8 @@ static int GetCoeffsAlt(VP8BitReader* const br,
|
||||
return 16;
|
||||
}
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
|
||||
WEBP_DSP_INIT_FUNC(InitGetCoeffs) {
|
||||
if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) {
|
||||
GetCoeffs = GetCoeffsAlt;
|
||||
|
4
3rdparty/libwebp/src/dec/vp8i_dec.h
vendored
4
3rdparty/libwebp/src/dec/vp8i_dec.h
vendored
@ -31,8 +31,8 @@ extern "C" {
|
||||
|
||||
// version numbers
|
||||
#define DEC_MAJ_VERSION 1
|
||||
#define DEC_MIN_VERSION 2
|
||||
#define DEC_REV_VERSION 0
|
||||
#define DEC_MIN_VERSION 3
|
||||
#define DEC_REV_VERSION 1
|
||||
|
||||
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
|
||||
// Constraints are: We need to store one 16x16 block of luma samples (y),
|
||||
|
244
3rdparty/libwebp/src/dec/vp8l_dec.c
vendored
244
3rdparty/libwebp/src/dec/vp8l_dec.c
vendored
@ -12,6 +12,7 @@
|
||||
// Authors: Vikas Arora (vikaas.arora@gmail.com)
|
||||
// Jyrki Alakuijala (jyrki@google.com)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/dec/alphai_dec.h"
|
||||
@ -84,7 +85,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
|
||||
// to 256 (green component values) + 24 (length prefix values)
|
||||
// + color_cache_size (between 0 and 2048).
|
||||
// All values computed for 8-bit first level lookup with Mark Adler's tool:
|
||||
// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
|
||||
// https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c
|
||||
#define FIXED_TABLE_SIZE (630 * 3 + 410)
|
||||
static const uint16_t kTableSize[12] = {
|
||||
FIXED_TABLE_SIZE + 654,
|
||||
@ -101,6 +102,14 @@ static const uint16_t kTableSize[12] = {
|
||||
FIXED_TABLE_SIZE + 2704
|
||||
};
|
||||
|
||||
static int VP8LSetError(VP8LDecoder* const dec, VP8StatusCode error) {
|
||||
// The oldest error reported takes precedence over the new one.
|
||||
if (dec->status_ == VP8_STATUS_OK || dec->status_ == VP8_STATUS_SUSPENDED) {
|
||||
dec->status_ = error;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int DecodeImageStream(int xsize, int ysize,
|
||||
int is_level0,
|
||||
VP8LDecoder* const dec,
|
||||
@ -178,7 +187,7 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Decodes the next Huffman code from bit-stream.
|
||||
// FillBitWindow(br) needs to be called at minimum every second call
|
||||
// VP8LFillBitWindow(br) needs to be called at minimum every second call
|
||||
// to ReadSymbol, in order to pre-fetch enough bits.
|
||||
static WEBP_INLINE int ReadSymbol(const HuffmanCode* table,
|
||||
VP8LBitReader* const br) {
|
||||
@ -253,11 +262,11 @@ static int ReadHuffmanCodeLengths(
|
||||
int symbol;
|
||||
int max_symbol;
|
||||
int prev_code_len = DEFAULT_CODE_LENGTH;
|
||||
HuffmanCode table[1 << LENGTHS_TABLE_BITS];
|
||||
HuffmanTables tables;
|
||||
|
||||
if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS,
|
||||
code_length_code_lengths,
|
||||
NUM_CODE_LENGTH_CODES)) {
|
||||
if (!VP8LHuffmanTablesAllocate(1 << LENGTHS_TABLE_BITS, &tables) ||
|
||||
!VP8LBuildHuffmanTable(&tables, LENGTHS_TABLE_BITS,
|
||||
code_length_code_lengths, NUM_CODE_LENGTH_CODES)) {
|
||||
goto End;
|
||||
}
|
||||
|
||||
@ -277,7 +286,7 @@ static int ReadHuffmanCodeLengths(
|
||||
int code_len;
|
||||
if (max_symbol-- == 0) break;
|
||||
VP8LFillBitWindow(br);
|
||||
p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
|
||||
p = &tables.curr_segment->start[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
|
||||
VP8LSetBitPos(br, br->bit_pos_ + p->bits);
|
||||
code_len = p->value;
|
||||
if (code_len < kCodeLengthLiterals) {
|
||||
@ -300,14 +309,16 @@ static int ReadHuffmanCodeLengths(
|
||||
ok = 1;
|
||||
|
||||
End:
|
||||
if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
|
||||
VP8LHuffmanTablesDeallocate(&tables);
|
||||
if (!ok) return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
|
||||
return ok;
|
||||
}
|
||||
|
||||
// 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman
|
||||
// tree.
|
||||
static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
|
||||
int* const code_lengths, HuffmanCode* const table) {
|
||||
int* const code_lengths,
|
||||
HuffmanTables* const table) {
|
||||
int ok = 0;
|
||||
int size = 0;
|
||||
VP8LBitReader* const br = &dec->br_;
|
||||
@ -321,7 +332,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
|
||||
// The first code is either 1 bit or 8 bit code.
|
||||
int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8);
|
||||
code_lengths[symbol] = 1;
|
||||
// The second code (if present), is always 8 bit long.
|
||||
// The second code (if present), is always 8 bits long.
|
||||
if (num_symbols == 2) {
|
||||
symbol = VP8LReadBits(br, 8);
|
||||
code_lengths[symbol] = 1;
|
||||
@ -331,10 +342,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
|
||||
int i;
|
||||
int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 };
|
||||
const int num_codes = VP8LReadBits(br, 4) + 4;
|
||||
if (num_codes > NUM_CODE_LENGTH_CODES) {
|
||||
dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
|
||||
return 0;
|
||||
}
|
||||
assert(num_codes <= NUM_CODE_LENGTH_CODES);
|
||||
|
||||
for (i = 0; i < num_codes; ++i) {
|
||||
code_length_code_lengths[kCodeLengthCodeOrder[i]] = VP8LReadBits(br, 3);
|
||||
@ -349,36 +357,35 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
|
||||
code_lengths, alphabet_size);
|
||||
}
|
||||
if (!ok || size == 0) {
|
||||
dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
|
||||
return 0;
|
||||
return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
int color_cache_bits, int allow_recursion) {
|
||||
int i, j;
|
||||
int i;
|
||||
VP8LBitReader* const br = &dec->br_;
|
||||
VP8LMetadata* const hdr = &dec->hdr_;
|
||||
uint32_t* huffman_image = NULL;
|
||||
HTreeGroup* htree_groups = NULL;
|
||||
HuffmanCode* huffman_tables = NULL;
|
||||
HuffmanCode* huffman_table = NULL;
|
||||
HuffmanTables* huffman_tables = &hdr->huffman_tables_;
|
||||
int num_htree_groups = 1;
|
||||
int num_htree_groups_max = 1;
|
||||
int max_alphabet_size = 0;
|
||||
int* code_lengths = NULL;
|
||||
const int table_size = kTableSize[color_cache_bits];
|
||||
int* mapping = NULL;
|
||||
int ok = 0;
|
||||
|
||||
// Check the table has been 0 initialized (through InitMetadata).
|
||||
assert(huffman_tables->root.start == NULL);
|
||||
assert(huffman_tables->curr_segment == NULL);
|
||||
|
||||
if (allow_recursion && VP8LReadBits(br, 1)) {
|
||||
// use meta Huffman codes.
|
||||
const int huffman_precision = VP8LReadBits(br, 3) + 2;
|
||||
const int huffman_xsize = VP8LSubSampleSize(xsize, huffman_precision);
|
||||
const int huffman_ysize = VP8LSubSampleSize(ysize, huffman_precision);
|
||||
const int huffman_pixs = huffman_xsize * huffman_ysize;
|
||||
if (!DecodeImageStream(huffman_xsize, huffman_ysize, 0, dec,
|
||||
if (!DecodeImageStream(huffman_xsize, huffman_ysize, /*is_level0=*/0, dec,
|
||||
&huffman_image)) {
|
||||
goto Error;
|
||||
}
|
||||
@ -402,7 +409,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
// values [0, num_htree_groups)
|
||||
mapping = (int*)WebPSafeMalloc(num_htree_groups_max, sizeof(*mapping));
|
||||
if (mapping == NULL) {
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
|
||||
goto Error;
|
||||
}
|
||||
// -1 means a value is unmapped, and therefore unused in the Huffman
|
||||
@ -421,29 +428,55 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
|
||||
if (br->eos_) goto Error;
|
||||
|
||||
// Find maximum alphabet size for the htree group.
|
||||
for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
|
||||
int alphabet_size = kAlphabetSize[j];
|
||||
if (j == 0 && color_cache_bits > 0) {
|
||||
alphabet_size += 1 << color_cache_bits;
|
||||
}
|
||||
if (max_alphabet_size < alphabet_size) {
|
||||
max_alphabet_size = alphabet_size;
|
||||
}
|
||||
if (!ReadHuffmanCodesHelper(color_cache_bits, num_htree_groups,
|
||||
num_htree_groups_max, mapping, dec,
|
||||
huffman_tables, &htree_groups)) {
|
||||
goto Error;
|
||||
}
|
||||
ok = 1;
|
||||
|
||||
code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size,
|
||||
sizeof(*code_lengths));
|
||||
huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size,
|
||||
sizeof(*huffman_tables));
|
||||
htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
|
||||
// All OK. Finalize pointers.
|
||||
hdr->huffman_image_ = huffman_image;
|
||||
hdr->num_htree_groups_ = num_htree_groups;
|
||||
hdr->htree_groups_ = htree_groups;
|
||||
|
||||
if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) {
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
Error:
|
||||
WebPSafeFree(mapping);
|
||||
if (!ok) {
|
||||
WebPSafeFree(huffman_image);
|
||||
VP8LHuffmanTablesDeallocate(huffman_tables);
|
||||
VP8LHtreeGroupsFree(htree_groups);
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
int ReadHuffmanCodesHelper(int color_cache_bits, int num_htree_groups,
|
||||
int num_htree_groups_max, const int* const mapping,
|
||||
VP8LDecoder* const dec,
|
||||
HuffmanTables* const huffman_tables,
|
||||
HTreeGroup** const htree_groups) {
|
||||
int i, j, ok = 0;
|
||||
const int max_alphabet_size =
|
||||
kAlphabetSize[0] + ((color_cache_bits > 0) ? 1 << color_cache_bits : 0);
|
||||
const int table_size = kTableSize[color_cache_bits];
|
||||
int* code_lengths = NULL;
|
||||
|
||||
if ((mapping == NULL && num_htree_groups != num_htree_groups_max) ||
|
||||
num_htree_groups > num_htree_groups_max) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
code_lengths =
|
||||
(int*)WebPSafeCalloc((uint64_t)max_alphabet_size, sizeof(*code_lengths));
|
||||
*htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
|
||||
|
||||
if (*htree_groups == NULL || code_lengths == NULL ||
|
||||
!VP8LHuffmanTablesAllocate(num_htree_groups * table_size,
|
||||
huffman_tables)) {
|
||||
VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
|
||||
goto Error;
|
||||
}
|
||||
|
||||
huffman_table = huffman_tables;
|
||||
for (i = 0; i < num_htree_groups_max; ++i) {
|
||||
// If the index "i" is unused in the Huffman image, just make sure the
|
||||
// coefficients are valid but do not store them.
|
||||
@ -460,7 +493,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
}
|
||||
} else {
|
||||
HTreeGroup* const htree_group =
|
||||
&htree_groups[(mapping == NULL) ? i : mapping[i]];
|
||||
&(*htree_groups)[(mapping == NULL) ? i : mapping[i]];
|
||||
HuffmanCode** const htrees = htree_group->htrees;
|
||||
int size;
|
||||
int total_size = 0;
|
||||
@ -468,19 +501,20 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
int max_bits = 0;
|
||||
for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
|
||||
int alphabet_size = kAlphabetSize[j];
|
||||
htrees[j] = huffman_table;
|
||||
if (j == 0 && color_cache_bits > 0) {
|
||||
alphabet_size += (1 << color_cache_bits);
|
||||
}
|
||||
size = ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_table);
|
||||
size =
|
||||
ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_tables);
|
||||
htrees[j] = huffman_tables->curr_segment->curr_table;
|
||||
if (size == 0) {
|
||||
goto Error;
|
||||
}
|
||||
if (is_trivial_literal && kLiteralMap[j] == 1) {
|
||||
is_trivial_literal = (huffman_table->bits == 0);
|
||||
is_trivial_literal = (htrees[j]->bits == 0);
|
||||
}
|
||||
total_size += huffman_table->bits;
|
||||
huffman_table += size;
|
||||
total_size += htrees[j]->bits;
|
||||
huffman_tables->curr_segment->curr_table += size;
|
||||
if (j <= ALPHA) {
|
||||
int local_max_bits = code_lengths[0];
|
||||
int k;
|
||||
@ -511,19 +545,12 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
}
|
||||
ok = 1;
|
||||
|
||||
// All OK. Finalize pointers.
|
||||
hdr->huffman_image_ = huffman_image;
|
||||
hdr->num_htree_groups_ = num_htree_groups;
|
||||
hdr->htree_groups_ = htree_groups;
|
||||
hdr->huffman_tables_ = huffman_tables;
|
||||
|
||||
Error:
|
||||
WebPSafeFree(code_lengths);
|
||||
WebPSafeFree(mapping);
|
||||
if (!ok) {
|
||||
WebPSafeFree(huffman_image);
|
||||
WebPSafeFree(huffman_tables);
|
||||
VP8LHtreeGroupsFree(htree_groups);
|
||||
VP8LHuffmanTablesDeallocate(huffman_tables);
|
||||
VP8LHtreeGroupsFree(*htree_groups);
|
||||
*htree_groups = NULL;
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
@ -547,8 +574,7 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
|
||||
scaled_data_size * sizeof(*scaled_data);
|
||||
uint8_t* memory = (uint8_t*)WebPSafeMalloc(memory_size, sizeof(*memory));
|
||||
if (memory == NULL) {
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
return 0;
|
||||
return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
|
||||
}
|
||||
assert(dec->rescaler_memory == NULL);
|
||||
dec->rescaler_memory = memory;
|
||||
@ -559,8 +585,11 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
|
||||
memory += work_size * sizeof(*work);
|
||||
scaled_data = (uint32_t*)memory;
|
||||
|
||||
WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data,
|
||||
out_width, out_height, 0, num_channels, work);
|
||||
if (!WebPRescalerInit(dec->rescaler, in_width, in_height,
|
||||
(uint8_t*)scaled_data, out_width, out_height,
|
||||
0, num_channels, work)) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#endif // WEBP_REDUCE_SIZE
|
||||
@ -574,13 +603,14 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
|
||||
static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
|
||||
int rgba_stride, uint8_t* const rgba) {
|
||||
uint32_t* const src = (uint32_t*)rescaler->dst;
|
||||
uint8_t* dst = rgba;
|
||||
const int dst_width = rescaler->dst_width;
|
||||
int num_lines_out = 0;
|
||||
while (WebPRescalerHasPendingOutput(rescaler)) {
|
||||
uint8_t* const dst = rgba + num_lines_out * rgba_stride;
|
||||
WebPRescalerExportRow(rescaler);
|
||||
WebPMultARGBRow(src, dst_width, 1);
|
||||
VP8LConvertFromBGRA(src, dst_width, colorspace, dst);
|
||||
dst += rgba_stride;
|
||||
++num_lines_out;
|
||||
}
|
||||
return num_lines_out;
|
||||
@ -594,8 +624,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
|
||||
int num_lines_in = 0;
|
||||
int num_lines_out = 0;
|
||||
while (num_lines_in < mb_h) {
|
||||
uint8_t* const row_in = in + num_lines_in * in_stride;
|
||||
uint8_t* const row_out = out + num_lines_out * out_stride;
|
||||
uint8_t* const row_in = in + (uint64_t)num_lines_in * in_stride;
|
||||
uint8_t* const row_out = out + (uint64_t)num_lines_out * out_stride;
|
||||
const int lines_left = mb_h - num_lines_in;
|
||||
const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
|
||||
int lines_imported;
|
||||
@ -796,7 +826,8 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
|
||||
const WebPDecBuffer* const output = dec->output_;
|
||||
if (WebPIsRGBMode(output->colorspace)) { // convert to RGBA
|
||||
const WebPRGBABuffer* const buf = &output->u.RGBA;
|
||||
uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
|
||||
uint8_t* const rgba =
|
||||
buf->rgba + (int64_t)dec->last_out_row_ * buf->stride;
|
||||
const int num_rows_out =
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
io->use_scaling ?
|
||||
@ -1077,12 +1108,10 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
|
||||
End:
|
||||
br->eos_ = VP8LIsEndOfStream(br);
|
||||
if (!ok || (br->eos_ && pos < end)) {
|
||||
ok = 0;
|
||||
dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED
|
||||
: VP8_STATUS_BITSTREAM_ERROR;
|
||||
} else {
|
||||
dec->last_pixel_ = pos;
|
||||
return VP8LSetError(
|
||||
dec, br->eos_ ? VP8_STATUS_SUSPENDED : VP8_STATUS_BITSTREAM_ERROR);
|
||||
}
|
||||
dec->last_pixel_ = pos;
|
||||
return ok;
|
||||
}
|
||||
|
||||
@ -1232,9 +1261,20 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
|
||||
}
|
||||
|
||||
br->eos_ = VP8LIsEndOfStream(br);
|
||||
if (dec->incremental_ && br->eos_ && src < src_end) {
|
||||
// In incremental decoding:
|
||||
// br->eos_ && src < src_last: if 'br' reached the end of the buffer and
|
||||
// 'src_last' has not been reached yet, there is not enough data. 'dec' has to
|
||||
// be reset until there is more data.
|
||||
// !br->eos_ && src < src_last: this cannot happen as either the buffer is
|
||||
// fully read, either enough has been read to reach 'src_last'.
|
||||
// src >= src_last: 'src_last' is reached, all is fine. 'src' can actually go
|
||||
// beyond 'src_last' in case the image is cropped and an LZ77 goes further.
|
||||
// The buffer might have been enough or there is some left. 'br->eos_' does
|
||||
// not matter.
|
||||
assert(!dec->incremental_ || (br->eos_ && src < src_last) || src >= src_last);
|
||||
if (dec->incremental_ && br->eos_ && src < src_last) {
|
||||
RestoreState(dec);
|
||||
} else if (!br->eos_) {
|
||||
} else if ((dec->incremental_ && src >= src_last) || !br->eos_) {
|
||||
// Process the remaining rows corresponding to last row-block.
|
||||
if (process_func != NULL) {
|
||||
process_func(dec, row > last_row ? last_row : row);
|
||||
@ -1249,8 +1289,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
|
||||
return 1;
|
||||
|
||||
Error:
|
||||
dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
|
||||
return 0;
|
||||
return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@ -1276,7 +1315,7 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
|
||||
uint8_t* const new_data = (uint8_t*)new_color_map;
|
||||
new_color_map[0] = transform->data_[0];
|
||||
for (i = 4; i < 4 * num_colors; ++i) {
|
||||
// Equivalent to AddPixelEq(), on a byte-basis.
|
||||
// Equivalent to VP8LAddPixels(), on a byte-basis.
|
||||
new_data[i] = (data[i] + new_data[i - 4]) & 0xff;
|
||||
}
|
||||
for (; i < 4 * final_num_colors; ++i) {
|
||||
@ -1317,7 +1356,7 @@ static int ReadTransform(int* const xsize, int const* ysize,
|
||||
transform->bits_),
|
||||
VP8LSubSampleSize(transform->ysize_,
|
||||
transform->bits_),
|
||||
0, dec, &transform->data_);
|
||||
/*is_level0=*/0, dec, &transform->data_);
|
||||
break;
|
||||
case COLOR_INDEXING_TRANSFORM: {
|
||||
const int num_colors = VP8LReadBits(br, 8) + 1;
|
||||
@ -1327,11 +1366,14 @@ static int ReadTransform(int* const xsize, int const* ysize,
|
||||
: 3;
|
||||
*xsize = VP8LSubSampleSize(transform->xsize_, bits);
|
||||
transform->bits_ = bits;
|
||||
ok = DecodeImageStream(num_colors, 1, 0, dec, &transform->data_);
|
||||
ok = ok && ExpandColorMap(num_colors, transform);
|
||||
ok = DecodeImageStream(num_colors, /*ysize=*/1, /*is_level0=*/0, dec,
|
||||
&transform->data_);
|
||||
if (ok && !ExpandColorMap(num_colors, transform)) {
|
||||
return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SUBTRACT_GREEN:
|
||||
case SUBTRACT_GREEN_TRANSFORM:
|
||||
break;
|
||||
default:
|
||||
assert(0); // can't happen
|
||||
@ -1353,7 +1395,7 @@ static void ClearMetadata(VP8LMetadata* const hdr) {
|
||||
assert(hdr != NULL);
|
||||
|
||||
WebPSafeFree(hdr->huffman_image_);
|
||||
WebPSafeFree(hdr->huffman_tables_);
|
||||
VP8LHuffmanTablesDeallocate(&hdr->huffman_tables_);
|
||||
VP8LHtreeGroupsFree(hdr->htree_groups_);
|
||||
VP8LColorCacheClear(&hdr->color_cache_);
|
||||
VP8LColorCacheClear(&hdr->saved_color_cache_);
|
||||
@ -1434,7 +1476,7 @@ static int DecodeImageStream(int xsize, int ysize,
|
||||
color_cache_bits = VP8LReadBits(br, 4);
|
||||
ok = (color_cache_bits >= 1 && color_cache_bits <= MAX_CACHE_BITS);
|
||||
if (!ok) {
|
||||
dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
|
||||
VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
|
||||
goto End;
|
||||
}
|
||||
}
|
||||
@ -1443,7 +1485,7 @@ static int DecodeImageStream(int xsize, int ysize,
|
||||
ok = ok && ReadHuffmanCodes(dec, transform_xsize, transform_ysize,
|
||||
color_cache_bits, is_level0);
|
||||
if (!ok) {
|
||||
dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
|
||||
VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
|
||||
goto End;
|
||||
}
|
||||
|
||||
@ -1451,8 +1493,7 @@ static int DecodeImageStream(int xsize, int ysize,
|
||||
if (color_cache_bits > 0) {
|
||||
hdr->color_cache_size_ = 1 << color_cache_bits;
|
||||
if (!VP8LColorCacheInit(&hdr->color_cache_, color_cache_bits)) {
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
ok = 0;
|
||||
ok = VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
|
||||
goto End;
|
||||
}
|
||||
} else {
|
||||
@ -1469,8 +1510,7 @@ static int DecodeImageStream(int xsize, int ysize,
|
||||
const uint64_t total_size = (uint64_t)transform_xsize * transform_ysize;
|
||||
data = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*data));
|
||||
if (data == NULL) {
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
ok = 0;
|
||||
ok = VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
|
||||
goto End;
|
||||
}
|
||||
}
|
||||
@ -1514,9 +1554,8 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {
|
||||
assert(dec->width_ <= final_width);
|
||||
dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t));
|
||||
if (dec->pixels_ == NULL) {
|
||||
dec->argb_cache_ = NULL; // for sanity check
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
return 0;
|
||||
dec->argb_cache_ = NULL; // for soundness
|
||||
return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
|
||||
}
|
||||
dec->argb_cache_ = dec->pixels_ + num_pixels + cache_top_pixels;
|
||||
return 1;
|
||||
@ -1524,11 +1563,10 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {
|
||||
|
||||
static int AllocateInternalBuffers8b(VP8LDecoder* const dec) {
|
||||
const uint64_t total_num_pixels = (uint64_t)dec->width_ * dec->height_;
|
||||
dec->argb_cache_ = NULL; // for sanity check
|
||||
dec->argb_cache_ = NULL; // for soundness
|
||||
dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint8_t));
|
||||
if (dec->pixels_ == NULL) {
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
return 0;
|
||||
return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
@ -1583,7 +1621,8 @@ int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
|
||||
dec->status_ = VP8_STATUS_OK;
|
||||
VP8LInitBitReader(&dec->br_, data, data_size);
|
||||
|
||||
if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, 1, dec, NULL)) {
|
||||
if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, /*is_level0=*/1,
|
||||
dec, /*decoded_data=*/NULL)) {
|
||||
goto Err;
|
||||
}
|
||||
|
||||
@ -1638,22 +1677,24 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
|
||||
|
||||
if (dec == NULL) return 0;
|
||||
if (io == NULL) {
|
||||
dec->status_ = VP8_STATUS_INVALID_PARAM;
|
||||
return 0;
|
||||
return VP8LSetError(dec, VP8_STATUS_INVALID_PARAM);
|
||||
}
|
||||
|
||||
dec->io_ = io;
|
||||
dec->status_ = VP8_STATUS_OK;
|
||||
VP8LInitBitReader(&dec->br_, io->data, io->data_size);
|
||||
if (!ReadImageInfo(&dec->br_, &width, &height, &has_alpha)) {
|
||||
dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
|
||||
VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
|
||||
goto Error;
|
||||
}
|
||||
dec->state_ = READ_DIM;
|
||||
io->width = width;
|
||||
io->height = height;
|
||||
|
||||
if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Error;
|
||||
if (!DecodeImageStream(width, height, /*is_level0=*/1, dec,
|
||||
/*decoded_data=*/NULL)) {
|
||||
goto Error;
|
||||
}
|
||||
return 1;
|
||||
|
||||
Error:
|
||||
@ -1666,10 +1707,9 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
|
||||
VP8Io* io = NULL;
|
||||
WebPDecParams* params = NULL;
|
||||
|
||||
// Sanity checks.
|
||||
if (dec == NULL) return 0;
|
||||
|
||||
assert(dec->hdr_.huffman_tables_ != NULL);
|
||||
assert(dec->hdr_.huffman_tables_.root.start != NULL);
|
||||
assert(dec->hdr_.htree_groups_ != NULL);
|
||||
assert(dec->hdr_.num_htree_groups_ > 0);
|
||||
|
||||
@ -1684,7 +1724,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
|
||||
assert(dec->output_ != NULL);
|
||||
|
||||
if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) {
|
||||
dec->status_ = VP8_STATUS_INVALID_PARAM;
|
||||
VP8LSetError(dec, VP8_STATUS_INVALID_PARAM);
|
||||
goto Err;
|
||||
}
|
||||
|
||||
@ -1694,7 +1734,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
|
||||
if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
|
||||
#else
|
||||
if (io->use_scaling) {
|
||||
dec->status_ = VP8_STATUS_INVALID_PARAM;
|
||||
VP8LSetError(dec, VP8_STATUS_INVALID_PARAM);
|
||||
goto Err;
|
||||
}
|
||||
#endif
|
||||
@ -1712,7 +1752,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
|
||||
dec->hdr_.saved_color_cache_.colors_ == NULL) {
|
||||
if (!VP8LColorCacheInit(&dec->hdr_.saved_color_cache_,
|
||||
dec->hdr_.color_cache_.hash_bits_)) {
|
||||
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
|
||||
VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
|
||||
goto Err;
|
||||
}
|
||||
}
|
||||
|
15
3rdparty/libwebp/src/dec/vp8li_dec.h
vendored
15
3rdparty/libwebp/src/dec/vp8li_dec.h
vendored
@ -51,7 +51,7 @@ typedef struct {
|
||||
uint32_t* huffman_image_;
|
||||
int num_htree_groups_;
|
||||
HTreeGroup* htree_groups_;
|
||||
HuffmanCode* huffman_tables_;
|
||||
HuffmanTables huffman_tables_;
|
||||
} VP8LMetadata;
|
||||
|
||||
typedef struct VP8LDecoder VP8LDecoder;
|
||||
@ -126,6 +126,19 @@ void VP8LClear(VP8LDecoder* const dec);
|
||||
// Clears and deallocate a lossless decoder instance.
|
||||
void VP8LDelete(VP8LDecoder* const dec);
|
||||
|
||||
// Helper function for reading the different Huffman codes and storing them in
|
||||
// 'huffman_tables' and 'htree_groups'.
|
||||
// If mapping is NULL 'num_htree_groups_max' must equal 'num_htree_groups'.
|
||||
// If it is not NULL, it maps 'num_htree_groups_max' indices to the
|
||||
// 'num_htree_groups' groups. If 'num_htree_groups_max' > 'num_htree_groups',
|
||||
// some of those indices map to -1. This is used for non-balanced codes to
|
||||
// limit memory usage.
|
||||
int ReadHuffmanCodesHelper(int color_cache_bits, int num_htree_groups,
|
||||
int num_htree_groups_max, const int* const mapping,
|
||||
VP8LDecoder* const dec,
|
||||
HuffmanTables* const huffman_tables,
|
||||
HTreeGroup** const htree_groups);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
50
3rdparty/libwebp/src/dec/webp_dec.c
vendored
50
3rdparty/libwebp/src/dec/webp_dec.c
vendored
@ -179,7 +179,7 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
|
||||
return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size.
|
||||
}
|
||||
// For odd-sized chunk-payload, there's one byte padding at the end.
|
||||
disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1;
|
||||
disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1u;
|
||||
total_size += disk_chunk_size;
|
||||
|
||||
// Check that total bytes skipped so far does not exceed riff_size.
|
||||
@ -658,19 +658,26 @@ uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size,
|
||||
uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
|
||||
int* width, int* height, uint8_t** u, uint8_t** v,
|
||||
int* stride, int* uv_stride) {
|
||||
WebPDecBuffer output; // only to preserve the side-infos
|
||||
uint8_t* const out = Decode(MODE_YUV, data, data_size,
|
||||
width, height, &output);
|
||||
|
||||
if (out != NULL) {
|
||||
const WebPYUVABuffer* const buf = &output.u.YUVA;
|
||||
*u = buf->u;
|
||||
*v = buf->v;
|
||||
*stride = buf->y_stride;
|
||||
*uv_stride = buf->u_stride;
|
||||
assert(buf->u_stride == buf->v_stride);
|
||||
// data, width and height are checked by Decode().
|
||||
if (u == NULL || v == NULL || stride == NULL || uv_stride == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
{
|
||||
WebPDecBuffer output; // only to preserve the side-infos
|
||||
uint8_t* const out = Decode(MODE_YUV, data, data_size,
|
||||
width, height, &output);
|
||||
|
||||
if (out != NULL) {
|
||||
const WebPYUVABuffer* const buf = &output.u.YUVA;
|
||||
*u = buf->u;
|
||||
*v = buf->v;
|
||||
*stride = buf->y_stride;
|
||||
*uv_stride = buf->u_stride;
|
||||
assert(buf->u_stride == buf->v_stride);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static void DefaultFeatures(WebPBitstreamFeatures* const features) {
|
||||
@ -785,6 +792,13 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
|
||||
//------------------------------------------------------------------------------
|
||||
// Cropping and rescaling.
|
||||
|
||||
int WebPCheckCropDimensions(int image_width, int image_height,
|
||||
int x, int y, int w, int h) {
|
||||
return !(x < 0 || y < 0 || w <= 0 || h <= 0 ||
|
||||
x >= image_width || w > image_width || w > image_width - x ||
|
||||
y >= image_height || h > image_height || h > image_height - y);
|
||||
}
|
||||
|
||||
int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
|
||||
VP8Io* const io, WEBP_CSP_MODE src_colorspace) {
|
||||
const int W = io->width;
|
||||
@ -792,7 +806,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
|
||||
int x = 0, y = 0, w = W, h = H;
|
||||
|
||||
// Cropping
|
||||
io->use_cropping = (options != NULL) && (options->use_cropping > 0);
|
||||
io->use_cropping = (options != NULL) && options->use_cropping;
|
||||
if (io->use_cropping) {
|
||||
w = options->crop_width;
|
||||
h = options->crop_height;
|
||||
@ -802,7 +816,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
|
||||
x &= ~1;
|
||||
y &= ~1;
|
||||
}
|
||||
if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) {
|
||||
if (!WebPCheckCropDimensions(W, H, x, y, w, h)) {
|
||||
return 0; // out of frame boundary error
|
||||
}
|
||||
}
|
||||
@ -814,7 +828,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
|
||||
io->mb_h = h;
|
||||
|
||||
// Scaling
|
||||
io->use_scaling = (options != NULL) && (options->use_scaling > 0);
|
||||
io->use_scaling = (options != NULL) && options->use_scaling;
|
||||
if (io->use_scaling) {
|
||||
int scaled_width = options->scaled_width;
|
||||
int scaled_height = options->scaled_height;
|
||||
@ -835,8 +849,8 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
|
||||
|
||||
if (io->use_scaling) {
|
||||
// disable filter (only for large downscaling ratio).
|
||||
io->bypass_filtering = (io->scaled_width < W * 3 / 4) &&
|
||||
(io->scaled_height < H * 3 / 4);
|
||||
io->bypass_filtering |= (io->scaled_width < W * 3 / 4) &&
|
||||
(io->scaled_height < H * 3 / 4);
|
||||
io->fancy_upsampling = 0;
|
||||
}
|
||||
return 1;
|
||||
|
4
3rdparty/libwebp/src/dec/webpi_dec.h
vendored
4
3rdparty/libwebp/src/dec/webpi_dec.h
vendored
@ -77,6 +77,10 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);
|
||||
//------------------------------------------------------------------------------
|
||||
// Misc utils
|
||||
|
||||
// Returns true if crop dimensions are within image bounds.
|
||||
int WebPCheckCropDimensions(int image_width, int image_height,
|
||||
int x, int y, int w, int h);
|
||||
|
||||
// Initializes VP8Io with custom setup, io and teardown functions. The default
|
||||
// hooks will use the supplied 'params' as io->opaque handle.
|
||||
void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io);
|
||||
|
52
3rdparty/libwebp/src/demux/anim_decode.c
vendored
52
3rdparty/libwebp/src/demux/anim_decode.c
vendored
@ -23,6 +23,14 @@
|
||||
|
||||
#define NUM_CHANNELS 4
|
||||
|
||||
// Channel extraction from a uint32_t representation of a uint8_t RGBA/BGRA
|
||||
// buffer.
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
#define CHANNEL_SHIFT(i) (24 - (i) * 8)
|
||||
#else
|
||||
#define CHANNEL_SHIFT(i) ((i) * 8)
|
||||
#endif
|
||||
|
||||
typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int);
|
||||
static void BlendPixelRowNonPremult(uint32_t* const src,
|
||||
const uint32_t* const dst, int num_pixels);
|
||||
@ -87,11 +95,19 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal(
|
||||
int abi_version) {
|
||||
WebPAnimDecoderOptions options;
|
||||
WebPAnimDecoder* dec = NULL;
|
||||
WebPBitstreamFeatures features;
|
||||
if (webp_data == NULL ||
|
||||
WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_DEMUX_ABI_VERSION)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Validate the bitstream before doing expensive allocations. The demuxer may
|
||||
// be more tolerant than the decoder.
|
||||
if (WebPGetFeatures(webp_data->bytes, webp_data->size, &features) !=
|
||||
VP8_STATUS_OK) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Note: calloc() so that the pointer members are initialized to NULL.
|
||||
dec = (WebPAnimDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
|
||||
if (dec == NULL) goto Error;
|
||||
@ -145,7 +161,7 @@ static int ZeroFillCanvas(uint8_t* buf, uint32_t canvas_width,
|
||||
uint32_t canvas_height) {
|
||||
const uint64_t size =
|
||||
(uint64_t)canvas_width * canvas_height * NUM_CHANNELS * sizeof(*buf);
|
||||
if (size != (size_t)size) return 0;
|
||||
if (!CheckSizeOverflow(size)) return 0;
|
||||
memset(buf, 0, (size_t)size);
|
||||
return 1;
|
||||
}
|
||||
@ -166,7 +182,7 @@ static void ZeroFillFrameRect(uint8_t* buf, int buf_stride, int x_offset,
|
||||
static int CopyCanvas(const uint8_t* src, uint8_t* dst,
|
||||
uint32_t width, uint32_t height) {
|
||||
const uint64_t size = (uint64_t)width * height * NUM_CHANNELS;
|
||||
if (size != (size_t)size) return 0;
|
||||
if (!CheckSizeOverflow(size)) return 0;
|
||||
assert(src != NULL && dst != NULL);
|
||||
memcpy(dst, src, (size_t)size);
|
||||
return 1;
|
||||
@ -201,35 +217,35 @@ static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a,
|
||||
const uint8_t dst_channel = (dst >> shift) & 0xff;
|
||||
const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a;
|
||||
assert(blend_unscaled < (1ULL << 32) / scale);
|
||||
return (blend_unscaled * scale) >> 24;
|
||||
return (blend_unscaled * scale) >> CHANNEL_SHIFT(3);
|
||||
}
|
||||
|
||||
// Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha.
|
||||
static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) {
|
||||
const uint8_t src_a = (src >> 24) & 0xff;
|
||||
const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff;
|
||||
|
||||
if (src_a == 0) {
|
||||
return dst;
|
||||
} else {
|
||||
const uint8_t dst_a = (dst >> 24) & 0xff;
|
||||
const uint8_t dst_a = (dst >> CHANNEL_SHIFT(3)) & 0xff;
|
||||
// This is the approximate integer arithmetic for the actual formula:
|
||||
// dst_factor_a = (dst_a * (255 - src_a)) / 255.
|
||||
const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8;
|
||||
const uint8_t blend_a = src_a + dst_factor_a;
|
||||
const uint32_t scale = (1UL << 24) / blend_a;
|
||||
|
||||
const uint8_t blend_r =
|
||||
BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0);
|
||||
const uint8_t blend_g =
|
||||
BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8);
|
||||
const uint8_t blend_b =
|
||||
BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16);
|
||||
const uint8_t blend_r = BlendChannelNonPremult(
|
||||
src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(0));
|
||||
const uint8_t blend_g = BlendChannelNonPremult(
|
||||
src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(1));
|
||||
const uint8_t blend_b = BlendChannelNonPremult(
|
||||
src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(2));
|
||||
assert(src_a + dst_factor_a < 256);
|
||||
|
||||
return (blend_r << 0) |
|
||||
(blend_g << 8) |
|
||||
(blend_b << 16) |
|
||||
((uint32_t)blend_a << 24);
|
||||
return ((uint32_t)blend_r << CHANNEL_SHIFT(0)) |
|
||||
((uint32_t)blend_g << CHANNEL_SHIFT(1)) |
|
||||
((uint32_t)blend_b << CHANNEL_SHIFT(2)) |
|
||||
((uint32_t)blend_a << CHANNEL_SHIFT(3));
|
||||
}
|
||||
}
|
||||
|
||||
@ -239,7 +255,7 @@ static void BlendPixelRowNonPremult(uint32_t* const src,
|
||||
const uint32_t* const dst, int num_pixels) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const uint8_t src_alpha = (src[i] >> 24) & 0xff;
|
||||
const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff;
|
||||
if (src_alpha != 0xff) {
|
||||
src[i] = BlendPixelNonPremult(src[i], dst[i]);
|
||||
}
|
||||
@ -256,7 +272,7 @@ static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) {
|
||||
|
||||
// Blend 'src' over 'dst' assuming they are pre-multiplied by alpha.
|
||||
static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) {
|
||||
const uint8_t src_a = (src >> 24) & 0xff;
|
||||
const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff;
|
||||
return src + ChannelwiseMultiply(dst, 256 - src_a);
|
||||
}
|
||||
|
||||
@ -266,7 +282,7 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
|
||||
int num_pixels) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const uint8_t src_alpha = (src[i] >> 24) & 0xff;
|
||||
const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff;
|
||||
if (src_alpha != 0xff) {
|
||||
src[i] = BlendPixelPremult(src[i], dst[i]);
|
||||
}
|
||||
|
23
3rdparty/libwebp/src/demux/demux.c
vendored
23
3rdparty/libwebp/src/demux/demux.c
vendored
@ -24,8 +24,8 @@
|
||||
#include "src/webp/format_constants.h"
|
||||
|
||||
#define DMUX_MAJ_VERSION 1
|
||||
#define DMUX_MIN_VERSION 2
|
||||
#define DMUX_REV_VERSION 0
|
||||
#define DMUX_MIN_VERSION 3
|
||||
#define DMUX_REV_VERSION 1
|
||||
|
||||
typedef struct {
|
||||
size_t start_; // start location of the data
|
||||
@ -221,12 +221,16 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
|
||||
const size_t chunk_start_offset = mem->start_;
|
||||
const uint32_t fourcc = ReadLE32(mem);
|
||||
const uint32_t payload_size = ReadLE32(mem);
|
||||
const uint32_t payload_size_padded = payload_size + (payload_size & 1);
|
||||
const size_t payload_available = (payload_size_padded > MemDataSize(mem))
|
||||
? MemDataSize(mem) : payload_size_padded;
|
||||
const size_t chunk_size = CHUNK_HEADER_SIZE + payload_available;
|
||||
uint32_t payload_size_padded;
|
||||
size_t payload_available;
|
||||
size_t chunk_size;
|
||||
|
||||
if (payload_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
|
||||
|
||||
payload_size_padded = payload_size + (payload_size & 1);
|
||||
payload_available = (payload_size_padded > MemDataSize(mem))
|
||||
? MemDataSize(mem) : payload_size_padded;
|
||||
chunk_size = CHUNK_HEADER_SIZE + payload_available;
|
||||
if (SizeIsInvalid(mem, payload_size_padded)) return PARSE_ERROR;
|
||||
if (payload_size_padded > MemDataSize(mem)) status = PARSE_NEED_MORE_DATA;
|
||||
|
||||
@ -451,9 +455,11 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
|
||||
const size_t chunk_start_offset = mem->start_;
|
||||
const uint32_t fourcc = ReadLE32(mem);
|
||||
const uint32_t chunk_size = ReadLE32(mem);
|
||||
const uint32_t chunk_size_padded = chunk_size + (chunk_size & 1);
|
||||
uint32_t chunk_size_padded;
|
||||
|
||||
if (chunk_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
|
||||
|
||||
chunk_size_padded = chunk_size + (chunk_size & 1);
|
||||
if (SizeIsInvalid(mem, chunk_size_padded)) return PARSE_ERROR;
|
||||
|
||||
switch (fourcc) {
|
||||
@ -608,7 +614,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
|
||||
|
||||
while (f != NULL) {
|
||||
const int cur_frame_set = f->frame_num_;
|
||||
int frame_count = 0;
|
||||
|
||||
// Check frame properties.
|
||||
for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
|
||||
@ -643,8 +648,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
|
||||
dmux->canvas_width_, dmux->canvas_height_)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
++frame_count;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
|
66
3rdparty/libwebp/src/dsp/alpha_processing.c
vendored
66
3rdparty/libwebp/src/dsp/alpha_processing.c
vendored
@ -157,7 +157,8 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
|
||||
}
|
||||
}
|
||||
|
||||
void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,
|
||||
const uint8_t* WEBP_RESTRICT const alpha,
|
||||
int width, int inverse) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
@ -178,7 +179,8 @@ void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
#undef MFIX
|
||||
|
||||
void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
|
||||
void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr,
|
||||
const uint8_t* WEBP_RESTRICT const alpha,
|
||||
int width, int inverse);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -193,8 +195,8 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
|
||||
}
|
||||
}
|
||||
|
||||
void WebPMultRows(uint8_t* ptr, int stride,
|
||||
const uint8_t* alpha, int alpha_stride,
|
||||
void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,
|
||||
const uint8_t* WEBP_RESTRICT alpha, int alpha_stride,
|
||||
int width, int num_rows, int inverse) {
|
||||
int n;
|
||||
for (n = 0; n < num_rows; ++n) {
|
||||
@ -290,9 +292,9 @@ static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride,
|
||||
static int DispatchAlpha_C(const uint8_t* WEBP_RESTRICT alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride) {
|
||||
uint32_t alpha_mask = 0xff;
|
||||
int i, j;
|
||||
|
||||
@ -309,9 +311,10 @@ static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride,
|
||||
return (alpha_mask != 0xff);
|
||||
}
|
||||
|
||||
static void DispatchAlphaToGreen_C(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride) {
|
||||
static void DispatchAlphaToGreen_C(const uint8_t* WEBP_RESTRICT alpha,
|
||||
int alpha_stride, int width, int height,
|
||||
uint32_t* WEBP_RESTRICT dst,
|
||||
int dst_stride) {
|
||||
int i, j;
|
||||
for (j = 0; j < height; ++j) {
|
||||
for (i = 0; i < width; ++i) {
|
||||
@ -322,9 +325,9 @@ static void DispatchAlphaToGreen_C(const uint8_t* alpha, int alpha_stride,
|
||||
}
|
||||
}
|
||||
|
||||
static int ExtractAlpha_C(const uint8_t* argb, int argb_stride,
|
||||
static int ExtractAlpha_C(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
|
||||
uint8_t alpha_mask = 0xff;
|
||||
int i, j;
|
||||
|
||||
@ -340,7 +343,8 @@ static int ExtractAlpha_C(const uint8_t* argb, int argb_stride,
|
||||
return (alpha_mask == 0xff);
|
||||
}
|
||||
|
||||
static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) {
|
||||
static void ExtractGreen_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT alpha, int size) {
|
||||
int i;
|
||||
for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8;
|
||||
}
|
||||
@ -372,8 +376,11 @@ static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
|
||||
}
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int len, uint32_t* out) {
|
||||
static void PackARGB_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT r,
|
||||
const uint8_t* WEBP_RESTRICT g,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
int len, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
|
||||
@ -381,8 +388,10 @@ static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
}
|
||||
#endif
|
||||
|
||||
static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out) {
|
||||
static void PackRGB_C(const uint8_t* WEBP_RESTRICT r,
|
||||
const uint8_t* WEBP_RESTRICT g,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
int len, int step, uint32_t* WEBP_RESTRICT out) {
|
||||
int i, offset = 0;
|
||||
for (i = 0; i < len; ++i) {
|
||||
out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
|
||||
@ -392,16 +401,22 @@ static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
|
||||
void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
|
||||
void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
|
||||
int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
||||
void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
|
||||
int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
||||
void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
|
||||
int (*WebPDispatchAlpha)(const uint8_t* WEBP_RESTRICT, int, int, int,
|
||||
uint8_t* WEBP_RESTRICT, int);
|
||||
void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT, int, int, int,
|
||||
uint32_t* WEBP_RESTRICT, int);
|
||||
int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT, int, int, int,
|
||||
uint8_t* WEBP_RESTRICT, int);
|
||||
void (*WebPExtractGreen)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT alpha, int size);
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||
const uint8_t* b, int, uint32_t*);
|
||||
#endif
|
||||
void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out);
|
||||
void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r,
|
||||
const uint8_t* WEBP_RESTRICT g,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
int len, int step, uint32_t* WEBP_RESTRICT out);
|
||||
|
||||
int (*WebPHasAlpha8b)(const uint8_t* src, int length);
|
||||
int (*WebPHasAlpha32b)(const uint8_t* src, int length);
|
||||
@ -410,6 +425,7 @@ void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color);
|
||||
//------------------------------------------------------------------------------
|
||||
// Init function
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void WebPInitAlphaProcessingMIPSdspR2(void);
|
||||
extern void WebPInitAlphaProcessingSSE2(void);
|
||||
extern void WebPInitAlphaProcessingSSE41(void);
|
||||
@ -438,10 +454,10 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) {
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
WebPInitAlphaProcessingSSE2();
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
WebPInitAlphaProcessingSSE41();
|
||||
}
|
||||
@ -455,7 +471,7 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
WebPInitAlphaProcessingNEON();
|
||||
|
27
3rdparty/libwebp/src/dsp/alpha_processing_neon.c
vendored
27
3rdparty/libwebp/src/dsp/alpha_processing_neon.c
vendored
@ -80,10 +80,10 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
uint32_t alpha_mask = 0xffffffffu;
|
||||
static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
|
||||
int alpha_stride, int width, int height,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride) {
|
||||
uint32_t alpha_mask = 0xffu;
|
||||
uint8x8_t mask8 = vdup_n_u8(0xff);
|
||||
uint32_t tmp[2];
|
||||
int i, j;
|
||||
@ -107,14 +107,16 @@ static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride,
|
||||
dst += dst_stride;
|
||||
}
|
||||
vst1_u8((uint8_t*)tmp, mask8);
|
||||
alpha_mask *= 0x01010101;
|
||||
alpha_mask &= tmp[0];
|
||||
alpha_mask &= tmp[1];
|
||||
return (alpha_mask != 0xffffffffu);
|
||||
}
|
||||
|
||||
static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride) {
|
||||
static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha,
|
||||
int alpha_stride, int width, int height,
|
||||
uint32_t* WEBP_RESTRICT dst,
|
||||
int dst_stride) {
|
||||
int i, j;
|
||||
uint8x8x4_t greens; // leave A/R/B channels zero'd.
|
||||
greens.val[0] = vdup_n_u8(0);
|
||||
@ -131,10 +133,10 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride,
|
||||
}
|
||||
}
|
||||
|
||||
static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride,
|
||||
static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
uint32_t alpha_mask = 0xffffffffu;
|
||||
uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
|
||||
uint32_t alpha_mask = 0xffu;
|
||||
uint8x8_t mask8 = vdup_n_u8(0xff);
|
||||
uint32_t tmp[2];
|
||||
int i, j;
|
||||
@ -156,13 +158,14 @@ static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride,
|
||||
alpha += alpha_stride;
|
||||
}
|
||||
vst1_u8((uint8_t*)tmp, mask8);
|
||||
alpha_mask *= 0x01010101;
|
||||
alpha_mask &= tmp[0];
|
||||
alpha_mask &= tmp[1];
|
||||
return (alpha_mask == 0xffffffffu);
|
||||
}
|
||||
|
||||
static void ExtractGreen_NEON(const uint32_t* argb,
|
||||
uint8_t* alpha, int size) {
|
||||
static void ExtractGreen_NEON(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT alpha, int size) {
|
||||
int i;
|
||||
for (i = 0; i + 16 <= size; i += 16) {
|
||||
const uint8x16x4_t rgbX = vld4q_u8((const uint8_t*)(argb + i));
|
||||
|
73
3rdparty/libwebp/src/dsp/alpha_processing_sse2.c
vendored
73
3rdparty/libwebp/src/dsp/alpha_processing_sse2.c
vendored
@ -18,16 +18,16 @@
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride) {
|
||||
static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha,
|
||||
int alpha_stride, int width, int height,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride) {
|
||||
// alpha_and stores an 'and' operation of all the alpha[] values. The final
|
||||
// value is not 0xff if any of the alpha[] is not equal to 0xff.
|
||||
uint32_t alpha_and = 0xff;
|
||||
int i, j;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i rgb_mask = _mm_set1_epi32(0xffffff00u); // to preserve RGB
|
||||
const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
|
||||
const __m128i rgb_mask = _mm_set1_epi32((int)0xffffff00); // to preserve RGB
|
||||
const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0);
|
||||
__m128i all_alphas = all_0xff;
|
||||
|
||||
// We must be able to access 3 extra bytes after the last written byte
|
||||
@ -72,9 +72,10 @@ static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
|
||||
return (alpha_and != 0xff);
|
||||
}
|
||||
|
||||
static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride) {
|
||||
static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha,
|
||||
int alpha_stride, int width, int height,
|
||||
uint32_t* WEBP_RESTRICT dst,
|
||||
int dst_stride) {
|
||||
int i, j;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const int limit = width & ~15;
|
||||
@ -98,15 +99,15 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
|
||||
}
|
||||
}
|
||||
|
||||
static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
|
||||
static int ExtractAlpha_SSE2(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
|
||||
// alpha_and stores an 'and' operation of all the alpha[] values. The final
|
||||
// value is not 0xff if any of the alpha[] is not equal to 0xff.
|
||||
uint32_t alpha_and = 0xff;
|
||||
int i, j;
|
||||
const __m128i a_mask = _mm_set1_epi32(0xffu); // to preserve alpha
|
||||
const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
|
||||
const __m128i a_mask = _mm_set1_epi32(0xff); // to preserve alpha
|
||||
const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0);
|
||||
__m128i all_alphas = all_0xff;
|
||||
|
||||
// We must be able to access 3 extra bytes after the last written byte
|
||||
@ -143,6 +144,46 @@ static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
|
||||
return (alpha_and == 0xff);
|
||||
}
|
||||
|
||||
static void ExtractGreen_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT alpha, int size) {
|
||||
int i;
|
||||
const __m128i mask = _mm_set1_epi32(0xff);
|
||||
const __m128i* src = (const __m128i*)argb;
|
||||
|
||||
for (i = 0; i + 16 <= size; i += 16, src += 4) {
|
||||
const __m128i a0 = _mm_loadu_si128(src + 0);
|
||||
const __m128i a1 = _mm_loadu_si128(src + 1);
|
||||
const __m128i a2 = _mm_loadu_si128(src + 2);
|
||||
const __m128i a3 = _mm_loadu_si128(src + 3);
|
||||
const __m128i b0 = _mm_srli_epi32(a0, 8);
|
||||
const __m128i b1 = _mm_srli_epi32(a1, 8);
|
||||
const __m128i b2 = _mm_srli_epi32(a2, 8);
|
||||
const __m128i b3 = _mm_srli_epi32(a3, 8);
|
||||
const __m128i c0 = _mm_and_si128(b0, mask);
|
||||
const __m128i c1 = _mm_and_si128(b1, mask);
|
||||
const __m128i c2 = _mm_and_si128(b2, mask);
|
||||
const __m128i c3 = _mm_and_si128(b3, mask);
|
||||
const __m128i d0 = _mm_packs_epi32(c0, c1);
|
||||
const __m128i d1 = _mm_packs_epi32(c2, c3);
|
||||
const __m128i e = _mm_packus_epi16(d0, d1);
|
||||
// store
|
||||
_mm_storeu_si128((__m128i*)&alpha[i], e);
|
||||
}
|
||||
if (i + 8 <= size) {
|
||||
const __m128i a0 = _mm_loadu_si128(src + 0);
|
||||
const __m128i a1 = _mm_loadu_si128(src + 1);
|
||||
const __m128i b0 = _mm_srli_epi32(a0, 8);
|
||||
const __m128i b1 = _mm_srli_epi32(a1, 8);
|
||||
const __m128i c0 = _mm_and_si128(b0, mask);
|
||||
const __m128i c1 = _mm_and_si128(b1, mask);
|
||||
const __m128i d = _mm_packs_epi32(c0, c1);
|
||||
const __m128i e = _mm_packus_epi16(d, d);
|
||||
_mm_storel_epi64((__m128i*)&alpha[i], e);
|
||||
i += 8;
|
||||
}
|
||||
for (; i < size; ++i) alpha[i] = argb[i] >> 8;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Non-dither premultiplied modes
|
||||
|
||||
@ -177,7 +218,7 @@ static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
|
||||
static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
|
||||
int w, int h, int stride) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i kMult = _mm_set1_epi16(0x8081u);
|
||||
const __m128i kMult = _mm_set1_epi16((short)0x8081);
|
||||
const __m128i kMask = _mm_set_epi16(0, 0xff, 0xff, 0, 0, 0xff, 0xff, 0);
|
||||
const int kSpan = 4;
|
||||
while (h-- > 0) {
|
||||
@ -266,7 +307,7 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
|
||||
}
|
||||
|
||||
static void AlphaReplace_SSE2(uint32_t* src, int length, uint32_t color) {
|
||||
const __m128i m_color = _mm_set1_epi32(color);
|
||||
const __m128i m_color = _mm_set1_epi32((int)color);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int i = 0;
|
||||
for (; i + 8 <= length; i += 8) {
|
||||
@ -317,7 +358,8 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
|
||||
if (width > 0) WebPMultARGBRow_C(ptr + x, width, inverse);
|
||||
}
|
||||
|
||||
static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
static void MultRow_SSE2(uint8_t* WEBP_RESTRICT const ptr,
|
||||
const uint8_t* WEBP_RESTRICT const alpha,
|
||||
int width, int inverse) {
|
||||
int x = 0;
|
||||
if (!inverse) {
|
||||
@ -352,6 +394,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
|
||||
WebPDispatchAlpha = DispatchAlpha_SSE2;
|
||||
WebPDispatchAlphaToGreen = DispatchAlphaToGreen_SSE2;
|
||||
WebPExtractAlpha = ExtractAlpha_SSE2;
|
||||
WebPExtractGreen = ExtractGreen_SSE2;
|
||||
|
||||
WebPHasAlpha8b = HasAlpha8b_SSE2;
|
||||
WebPHasAlpha32b = HasAlpha32b_SSE2;
|
||||
|
@ -19,14 +19,14 @@
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
static int ExtractAlpha_SSE41(const uint8_t* WEBP_RESTRICT argb,
|
||||
int argb_stride, int width, int height,
|
||||
uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
|
||||
// alpha_and stores an 'and' operation of all the alpha[] values. The final
|
||||
// value is not 0xff if any of the alpha[] is not equal to 0xff.
|
||||
uint32_t alpha_and = 0xff;
|
||||
int i, j;
|
||||
const __m128i all_0xff = _mm_set1_epi32(~0u);
|
||||
const __m128i all_0xff = _mm_set1_epi32(~0);
|
||||
__m128i all_alphas = all_0xff;
|
||||
|
||||
// We must be able to access 3 extra bytes after the last written byte
|
||||
|
5
3rdparty/libwebp/src/dsp/cost.c
vendored
5
3rdparty/libwebp/src/dsp/cost.c
vendored
@ -374,6 +374,7 @@ static void SetResidualCoeffs_C(const int16_t* const coeffs,
|
||||
VP8GetResidualCostFunc VP8GetResidualCost;
|
||||
VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8EncDspCostInitMIPS32(void);
|
||||
extern void VP8EncDspCostInitMIPSdspR2(void);
|
||||
extern void VP8EncDspCostInitSSE2(void);
|
||||
@ -395,12 +396,12 @@ WEBP_DSP_INIT_FUNC(VP8EncDspCostInit) {
|
||||
VP8EncDspCostInitMIPSdspR2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
VP8EncDspCostInitSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (VP8GetCPUInfo(kNEON)) {
|
||||
VP8EncDspCostInitNEON();
|
||||
}
|
||||
|
4
3rdparty/libwebp/src/dsp/cost_neon.c
vendored
4
3rdparty/libwebp/src/dsp/cost_neon.c
vendored
@ -29,7 +29,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
|
||||
const uint8x16_t eob = vcombine_u8(vqmovn_u16(eob_0), vqmovn_u16(eob_1));
|
||||
const uint8x16_t masked = vandq_u8(eob, vld1q_u8(position));
|
||||
|
||||
#ifdef __aarch64__
|
||||
#if WEBP_AARCH64
|
||||
res->last = vmaxvq_u8(masked) - 1;
|
||||
#else
|
||||
const uint8x8_t eob_8x8 = vmax_u8(vget_low_u8(masked), vget_high_u8(masked));
|
||||
@ -43,7 +43,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
|
||||
|
||||
vst1_lane_s32(&res->last, vreinterpret_s32_u32(eob_32x2), 0);
|
||||
--res->last;
|
||||
#endif // __aarch64__
|
||||
#endif // WEBP_AARCH64
|
||||
|
||||
res->coeffs = coeffs;
|
||||
}
|
||||
|
21
3rdparty/libwebp/src/dsp/cpu.c
vendored
21
3rdparty/libwebp/src/dsp/cpu.c
vendored
@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Christian Duvivier (cduvivier@google.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
|
||||
#if defined(WEBP_HAVE_NEON_RTCD)
|
||||
#include <stdio.h>
|
||||
@ -173,6 +173,7 @@ static int x86CPUInfo(CPUFeature feature) {
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
|
||||
#elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test.
|
||||
static int AndroidCPUInfo(CPUFeature feature) {
|
||||
@ -184,22 +185,23 @@ static int AndroidCPUInfo(CPUFeature feature) {
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
|
||||
#elif defined(EMSCRIPTEN) // also needs to be before generic NEON test
|
||||
// Use compile flags as an indicator of SIMD support instead of a runtime check.
|
||||
static int wasmCPUInfo(CPUFeature feature) {
|
||||
switch (feature) {
|
||||
#ifdef WEBP_USE_SSE2
|
||||
#ifdef WEBP_HAVE_SSE2
|
||||
case kSSE2:
|
||||
return 1;
|
||||
#endif
|
||||
#ifdef WEBP_USE_SSE41
|
||||
#ifdef WEBP_HAVE_SSE41
|
||||
case kSSE3:
|
||||
case kSlowSSSE3:
|
||||
case kSSE4_1:
|
||||
return 1;
|
||||
#endif
|
||||
#ifdef WEBP_USE_NEON
|
||||
#ifdef WEBP_HAVE_NEON
|
||||
case kNEON:
|
||||
return 1;
|
||||
#endif
|
||||
@ -208,10 +210,12 @@ static int wasmCPUInfo(CPUFeature feature) {
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo;
|
||||
#elif defined(WEBP_USE_NEON)
|
||||
// define a dummy function to enable turning off NEON at runtime by setting
|
||||
// VP8DecGetCPUInfo = NULL
|
||||
#elif defined(WEBP_HAVE_NEON)
|
||||
// In most cases this function doesn't check for NEON support (it's assumed by
|
||||
// the configuration), but enables turning off NEON at runtime, for testing
|
||||
// purposes, by setting VP8GetCPUInfo = NULL.
|
||||
static int armCPUInfo(CPUFeature feature) {
|
||||
if (feature != kNEON) return 0;
|
||||
#if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
|
||||
@ -235,6 +239,7 @@ static int armCPUInfo(CPUFeature feature) {
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
|
||||
#elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
|
||||
defined(WEBP_USE_MSA)
|
||||
@ -246,7 +251,9 @@ static int mipsCPUInfo(CPUFeature feature) {
|
||||
}
|
||||
|
||||
}
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
|
||||
#else
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
VP8CPUInfo VP8GetCPUInfo = NULL;
|
||||
#endif
|
||||
|
266
3rdparty/libwebp/src/dsp/cpu.h
vendored
Normal file
266
3rdparty/libwebp/src/dsp/cpu.h
vendored
Normal file
@ -0,0 +1,266 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// CPU detection functions and macros.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_DSP_CPU_H_
|
||||
#define WEBP_DSP_CPU_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
|
||||
#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
|
||||
#else
|
||||
#define LOCAL_GCC_VERSION 0
|
||||
#define LOCAL_GCC_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
|
||||
#define LOCAL_CLANG_PREREQ(maj, min) \
|
||||
(LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
|
||||
#else
|
||||
#define LOCAL_CLANG_VERSION 0
|
||||
#define LOCAL_CLANG_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// x86 defines.
|
||||
|
||||
#if !defined(HAVE_CONFIG_H)
|
||||
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
|
||||
// files without intrinsics, allowing the corresponding Init() to be called.
|
||||
// Files containing intrinsics will need to be built targeting the instruction
|
||||
// set so should succeed on one of the earlier tests.
|
||||
#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
|
||||
#define WEBP_USE_SSE2
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
|
||||
#define WEBP_HAVE_SSE2
|
||||
#endif
|
||||
|
||||
#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
|
||||
#define WEBP_USE_SSE41
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
|
||||
#define WEBP_HAVE_SSE41
|
||||
#endif
|
||||
|
||||
#undef WEBP_MSC_SSE41
|
||||
#undef WEBP_MSC_SSE2
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Arm defines.
|
||||
|
||||
// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
|
||||
// inline assembly would need to be modified for use with Native Client.
|
||||
#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
|
||||
!defined(__native_client__)
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
|
||||
defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
|
||||
#define WEBP_ANDROID_NEON // Android targets that may have NEON
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
|
||||
// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
|
||||
// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with
|
||||
// vtbl4_u8(); a fix was made in 16.6.
|
||||
#if defined(_MSC_VER) && \
|
||||
((_MSC_VER >= 1700 && defined(_M_ARM)) || \
|
||||
(_MSC_VER >= 1926 && (defined(_M_ARM64) || defined(_M_ARM64EC))))
|
||||
#define WEBP_USE_NEON
|
||||
#define WEBP_USE_INTRINSICS
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#define WEBP_AARCH64 1
|
||||
#else
|
||||
#define WEBP_AARCH64 0
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
|
||||
#define WEBP_HAVE_NEON
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// MIPS defines.
|
||||
|
||||
#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \
|
||||
(__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
|
||||
#define WEBP_USE_MIPS32
|
||||
#if (__mips_isa_rev >= 2)
|
||||
#define WEBP_USE_MIPS32_R2
|
||||
#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
|
||||
#define WEBP_USE_MIPS_DSP_R2
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
|
||||
#define WEBP_USE_MSA
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#ifndef WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_DSP_OMIT_C_CODE 1
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_NEON_OMIT_C_CODE 1
|
||||
#else
|
||||
#define WEBP_NEON_OMIT_C_CODE 0
|
||||
#endif
|
||||
|
||||
#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64)
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 1
|
||||
#else
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 0
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// This macro prevents thread_sanitizer from reporting known concurrent writes.
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(thread_sanitizer)
|
||||
#undef WEBP_TSAN_IGNORE_FUNCTION
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#define WEBP_MSAN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
#include <pthread.h> // NOLINT
|
||||
|
||||
#define WEBP_DSP_INIT(func) \
|
||||
do { \
|
||||
static volatile VP8CPUInfo func##_last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func##_last_cpuinfo_used; \
|
||||
static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \
|
||||
if (pthread_mutex_lock(&func##_lock)) break; \
|
||||
if (func##_last_cpuinfo_used != VP8GetCPUInfo) func(); \
|
||||
func##_last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
(void)pthread_mutex_unlock(&func##_lock); \
|
||||
} while (0)
|
||||
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
|
||||
#define WEBP_DSP_INIT(func) \
|
||||
do { \
|
||||
static volatile VP8CPUInfo func##_last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func##_last_cpuinfo_used; \
|
||||
if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \
|
||||
func(); \
|
||||
func##_last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
} while (0)
|
||||
#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
|
||||
// Defines an Init + helper function that control multiple initialization of
|
||||
// function pointers / tables.
|
||||
/* Usage:
|
||||
WEBP_DSP_INIT_FUNC(InitFunc) {
|
||||
...function body
|
||||
}
|
||||
*/
|
||||
#define WEBP_DSP_INIT_FUNC(name) \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void); \
|
||||
WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void)
|
||||
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#if defined(__clang__) && defined(__has_attribute)
|
||||
#if __has_attribute(no_sanitize)
|
||||
// This macro prevents the undefined behavior sanitizer from reporting
|
||||
// failures. This is only meant to silence unaligned loads on platforms that
|
||||
// are known to support them.
|
||||
#undef WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined")))
|
||||
|
||||
// This macro prevents the undefined behavior sanitizer from reporting
|
||||
// failures related to unsigned integer overflows. This is only meant to
|
||||
// silence cases where this well defined behavior is expected.
|
||||
#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
|
||||
__attribute__((no_sanitize("unsigned-integer-overflow")))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
|
||||
// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
|
||||
#if !defined(WEBP_OFFSET_PTR)
|
||||
#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
|
||||
#endif
|
||||
|
||||
// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
|
||||
#if !defined(WEBP_SWAP_16BIT_CSP)
|
||||
#define WEBP_SWAP_16BIT_CSP 0
|
||||
#endif
|
||||
|
||||
// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
|
||||
#if !defined(WORDS_BIGENDIAN) && \
|
||||
(defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
|
||||
(defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
|
||||
#define WORDS_BIGENDIAN
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
kSSE2,
|
||||
kSSE3,
|
||||
kSlowSSSE3, // special feature for slow SSSE3 architectures
|
||||
kSSE4_1,
|
||||
kAVX,
|
||||
kAVX2,
|
||||
kNEON,
|
||||
kMIPS32,
|
||||
kMIPSdspR2,
|
||||
kMSA
|
||||
} CPUFeature;
|
||||
|
||||
// returns true if the CPU supports the feature.
|
||||
typedef int (*VP8CPUInfo)(CPUFeature feature);
|
||||
|
||||
#endif // WEBP_DSP_CPU_H_
|
7
3rdparty/libwebp/src/dsp/dec.c
vendored
7
3rdparty/libwebp/src/dsp/dec.c
vendored
@ -734,6 +734,7 @@ VP8SimpleFilterFunc VP8SimpleHFilter16i;
|
||||
void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride);
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8DspInitSSE2(void);
|
||||
extern void VP8DspInitSSE41(void);
|
||||
extern void VP8DspInitNEON(void);
|
||||
@ -807,10 +808,10 @@ WEBP_DSP_INIT_FUNC(VP8DspInit) {
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
VP8DspInitSSE2();
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
VP8DspInitSSE41();
|
||||
}
|
||||
@ -834,7 +835,7 @@ WEBP_DSP_INIT_FUNC(VP8DspInit) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8DspInitNEON();
|
||||
|
4
3rdparty/libwebp/src/dsp/dec_neon.c
vendored
4
3rdparty/libwebp/src/dsp/dec_neon.c
vendored
@ -1428,7 +1428,7 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) {
|
||||
|
||||
if (do_top) {
|
||||
const uint8x8_t A = vld1_u8(dst - BPS); // top row
|
||||
#if defined(__aarch64__)
|
||||
#if WEBP_AARCH64
|
||||
const uint16_t p2 = vaddlv_u8(A);
|
||||
sum_top = vdupq_n_u16(p2);
|
||||
#else
|
||||
@ -1511,7 +1511,7 @@ static WEBP_INLINE void DC16_NEON(uint8_t* dst, int do_top, int do_left) {
|
||||
|
||||
if (do_top) {
|
||||
const uint8x16_t A = vld1q_u8(dst - BPS); // top row
|
||||
#if defined(__aarch64__)
|
||||
#if WEBP_AARCH64
|
||||
const uint16_t p3 = vaddlvq_u8(A);
|
||||
sum_top = vdupq_n_u16(p3);
|
||||
#else
|
||||
|
93
3rdparty/libwebp/src/dsp/dec_sse2.c
vendored
93
3rdparty/libwebp/src/dsp/dec_sse2.c
vendored
@ -158,10 +158,10 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
dst3 = _mm_loadl_epi64((__m128i*)(dst + 3 * BPS));
|
||||
} else {
|
||||
// Load four bytes/pixels per line.
|
||||
dst0 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 0 * BPS));
|
||||
dst1 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 1 * BPS));
|
||||
dst2 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 2 * BPS));
|
||||
dst3 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 3 * BPS));
|
||||
dst0 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 0 * BPS));
|
||||
dst1 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 1 * BPS));
|
||||
dst2 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 2 * BPS));
|
||||
dst3 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 3 * BPS));
|
||||
}
|
||||
// Convert to 16b.
|
||||
dst0 = _mm_unpacklo_epi8(dst0, zero);
|
||||
@ -187,10 +187,10 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
_mm_storel_epi64((__m128i*)(dst + 3 * BPS), dst3);
|
||||
} else {
|
||||
// Store four bytes/pixels per line.
|
||||
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
|
||||
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
|
||||
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
|
||||
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
|
||||
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -213,10 +213,10 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
const __m128i m3 = _mm_subs_epi16(B, d4);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
// Load the source pixels.
|
||||
__m128i dst0 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 0 * BPS));
|
||||
__m128i dst1 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 1 * BPS));
|
||||
__m128i dst2 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 2 * BPS));
|
||||
__m128i dst3 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 3 * BPS));
|
||||
__m128i dst0 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 0 * BPS));
|
||||
__m128i dst1 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 1 * BPS));
|
||||
__m128i dst2 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 2 * BPS));
|
||||
__m128i dst3 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 3 * BPS));
|
||||
// Convert to 16b.
|
||||
dst0 = _mm_unpacklo_epi8(dst0, zero);
|
||||
dst1 = _mm_unpacklo_epi8(dst1, zero);
|
||||
@ -233,10 +233,10 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
dst2 = _mm_packus_epi16(dst2, dst2);
|
||||
dst3 = _mm_packus_epi16(dst3, dst3);
|
||||
// Store the results.
|
||||
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
|
||||
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
|
||||
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
|
||||
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
|
||||
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
|
||||
}
|
||||
#undef MUL
|
||||
#endif // USE_TRANSFORM_AC3
|
||||
@ -477,11 +477,11 @@ static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
|
||||
// A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00
|
||||
// A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10
|
||||
const __m128i A0 = _mm_set_epi32(
|
||||
WebPMemToUint32(&b[6 * stride]), WebPMemToUint32(&b[2 * stride]),
|
||||
WebPMemToUint32(&b[4 * stride]), WebPMemToUint32(&b[0 * stride]));
|
||||
WebPMemToInt32(&b[6 * stride]), WebPMemToInt32(&b[2 * stride]),
|
||||
WebPMemToInt32(&b[4 * stride]), WebPMemToInt32(&b[0 * stride]));
|
||||
const __m128i A1 = _mm_set_epi32(
|
||||
WebPMemToUint32(&b[7 * stride]), WebPMemToUint32(&b[3 * stride]),
|
||||
WebPMemToUint32(&b[5 * stride]), WebPMemToUint32(&b[1 * stride]));
|
||||
WebPMemToInt32(&b[7 * stride]), WebPMemToInt32(&b[3 * stride]),
|
||||
WebPMemToInt32(&b[5 * stride]), WebPMemToInt32(&b[1 * stride]));
|
||||
|
||||
// B0 = 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00
|
||||
// B1 = 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20
|
||||
@ -540,7 +540,7 @@ static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
|
||||
uint8_t* dst, int stride) {
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i, dst += stride) {
|
||||
WebPUint32ToMem(dst, _mm_cvtsi128_si32(*x));
|
||||
WebPInt32ToMem(dst, _mm_cvtsi128_si32(*x));
|
||||
*x = _mm_srli_si128(*x, 4);
|
||||
}
|
||||
}
|
||||
@ -908,10 +908,10 @@ static void VE4_SSE2(uint8_t* dst) { // vertical
|
||||
const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one);
|
||||
const __m128i b = _mm_subs_epu8(a, lsb);
|
||||
const __m128i avg = _mm_avg_epu8(b, BCDEFGH0);
|
||||
const uint32_t vals = _mm_cvtsi128_si32(avg);
|
||||
const int vals = _mm_cvtsi128_si32(avg);
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
WebPUint32ToMem(dst + i * BPS, vals);
|
||||
WebPInt32ToMem(dst + i * BPS, vals);
|
||||
}
|
||||
}
|
||||
|
||||
@ -925,10 +925,10 @@ static void LD4_SSE2(uint8_t* dst) { // Down-Left
|
||||
const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
|
||||
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
|
||||
const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
|
||||
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg ));
|
||||
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
|
||||
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg ));
|
||||
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
|
||||
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
|
||||
@ -946,10 +946,10 @@ static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
|
||||
const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
|
||||
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
|
||||
const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
|
||||
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd ));
|
||||
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh ));
|
||||
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd ));
|
||||
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh ));
|
||||
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
|
||||
|
||||
// these two are hard to implement in SSE2, so we keep the C-version:
|
||||
DST(0, 2) = AVG3(J, I, X);
|
||||
@ -970,11 +970,12 @@ static void VL4_SSE2(uint8_t* dst) { // Vertical-Left
|
||||
const __m128i abbc = _mm_or_si128(ab, bc);
|
||||
const __m128i lsb2 = _mm_and_si128(abbc, lsb1);
|
||||
const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
|
||||
const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
|
||||
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
|
||||
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
|
||||
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
|
||||
const uint32_t extra_out =
|
||||
(uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
|
||||
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
|
||||
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
|
||||
|
||||
// these two are hard to get and irregular
|
||||
DST(3, 2) = (extra_out >> 0) & 0xff;
|
||||
@ -990,7 +991,7 @@ static void RD4_SSE2(uint8_t* dst) { // Down-right
|
||||
const uint32_t K = dst[-1 + 2 * BPS];
|
||||
const uint32_t L = dst[-1 + 3 * BPS];
|
||||
const __m128i LKJI_____ =
|
||||
_mm_cvtsi32_si128(L | (K << 8) | (J << 16) | (I << 24));
|
||||
_mm_cvtsi32_si128((int)(L | (K << 8) | (J << 16) | (I << 24)));
|
||||
const __m128i LKJIXABCD = _mm_or_si128(LKJI_____, ____XABCD);
|
||||
const __m128i KJIXABCD_ = _mm_srli_si128(LKJIXABCD, 1);
|
||||
const __m128i JIXABCD__ = _mm_srli_si128(LKJIXABCD, 2);
|
||||
@ -998,10 +999,10 @@ static void RD4_SSE2(uint8_t* dst) { // Down-right
|
||||
const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
|
||||
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
|
||||
const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg ));
|
||||
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
|
||||
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
|
||||
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg ));
|
||||
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
|
||||
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
#undef DST
|
||||
@ -1015,13 +1016,13 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int y;
|
||||
if (size == 4) {
|
||||
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top));
|
||||
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top));
|
||||
const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
|
||||
for (y = 0; y < 4; ++y, dst += BPS) {
|
||||
const int val = dst[-1] - top[-1];
|
||||
const __m128i base = _mm_set1_epi16(val);
|
||||
const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
|
||||
WebPUint32ToMem(dst, _mm_cvtsi128_si32(out));
|
||||
WebPInt32ToMem(dst, _mm_cvtsi128_si32(out));
|
||||
}
|
||||
} else if (size == 8) {
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
@ -1062,7 +1063,7 @@ static void VE16_SSE2(uint8_t* dst) {
|
||||
static void HE16_SSE2(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
for (j = 16; j > 0; --j) {
|
||||
const __m128i values = _mm_set1_epi8(dst[-1]);
|
||||
const __m128i values = _mm_set1_epi8((char)dst[-1]);
|
||||
_mm_storeu_si128((__m128i*)dst, values);
|
||||
dst += BPS;
|
||||
}
|
||||
@ -1070,7 +1071,7 @@ static void HE16_SSE2(uint8_t* dst) { // horizontal
|
||||
|
||||
static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
const __m128i values = _mm_set1_epi8((char)v);
|
||||
for (j = 0; j < 16; ++j) {
|
||||
_mm_storeu_si128((__m128i*)(dst + j * BPS), values);
|
||||
}
|
||||
@ -1130,7 +1131,7 @@ static void VE8uv_SSE2(uint8_t* dst) { // vertical
|
||||
// helper for chroma-DC predictions
|
||||
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
const __m128i values = _mm_set1_epi8((char)v);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
_mm_storel_epi64((__m128i*)(dst + j * BPS), values);
|
||||
}
|
||||
|
2
3rdparty/libwebp/src/dsp/dec_sse41.c
vendored
2
3rdparty/libwebp/src/dsp/dec_sse41.c
vendored
@ -23,7 +23,7 @@ static void HE16_SSE41(uint8_t* dst) { // horizontal
|
||||
int j;
|
||||
const __m128i kShuffle3 = _mm_set1_epi8(3);
|
||||
for (j = 16; j > 0; --j) {
|
||||
const __m128i in = _mm_cvtsi32_si128(WebPMemToUint32(dst - 4));
|
||||
const __m128i in = _mm_cvtsi32_si128(WebPMemToInt32(dst - 4));
|
||||
const __m128i values = _mm_shuffle_epi8(in, kShuffle3);
|
||||
_mm_storeu_si128((__m128i*)dst, values);
|
||||
dst += BPS;
|
||||
|
262
3rdparty/libwebp/src/dsp/dsp.h
vendored
262
3rdparty/libwebp/src/dsp/dsp.h
vendored
@ -18,6 +18,7 @@
|
||||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -27,205 +28,22 @@ extern "C" {
|
||||
#define BPS 32 // this is the common stride for enc/dec
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// CPU detection
|
||||
// WEBP_RESTRICT
|
||||
|
||||
// Declares a pointer with the restrict type qualifier if available.
|
||||
// This allows code to hint to the compiler that only this pointer references a
|
||||
// particular object or memory region within the scope of the block in which it
|
||||
// is declared. This may allow for improved optimizations due to the lack of
|
||||
// pointer aliasing. See also:
|
||||
// https://en.cppreference.com/w/c/language/restrict
|
||||
#if defined(__GNUC__)
|
||||
# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
|
||||
# define LOCAL_GCC_PREREQ(maj, min) \
|
||||
(LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
|
||||
#define WEBP_RESTRICT __restrict__
|
||||
#elif defined(_MSC_VER)
|
||||
#define WEBP_RESTRICT __restrict
|
||||
#else
|
||||
# define LOCAL_GCC_VERSION 0
|
||||
# define LOCAL_GCC_PREREQ(maj, min) 0
|
||||
#define WEBP_RESTRICT
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
|
||||
# define LOCAL_CLANG_PREREQ(maj, min) \
|
||||
(LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
|
||||
#else
|
||||
# define LOCAL_CLANG_VERSION 0
|
||||
# define LOCAL_CLANG_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
#if !defined(HAVE_CONFIG_H)
|
||||
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
|
||||
// files without intrinsics, allowing the corresponding Init() to be called.
|
||||
// Files containing intrinsics will need to be built targeting the instruction
|
||||
// set so should succeed on one of the earlier tests.
|
||||
#if defined(__SSE2__) || defined(WEBP_MSC_SSE2) || defined(WEBP_HAVE_SSE2)
|
||||
#define WEBP_USE_SSE2
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_1__) || defined(WEBP_MSC_SSE41) || defined(WEBP_HAVE_SSE41)
|
||||
#define WEBP_USE_SSE41
|
||||
#endif
|
||||
|
||||
#undef WEBP_MSC_SSE41
|
||||
#undef WEBP_MSC_SSE2
|
||||
|
||||
// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
|
||||
// inline assembly would need to be modified for use with Native Client.
|
||||
#if (defined(__ARM_NEON__) || \
|
||||
defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \
|
||||
!defined(__native_client__)
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
|
||||
defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
|
||||
#define WEBP_ANDROID_NEON // Android targets that may have NEON
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
|
||||
#define WEBP_USE_NEON
|
||||
#define WEBP_USE_INTRINSICS
|
||||
#endif
|
||||
|
||||
#if defined(__mips__) && !defined(__mips64) && \
|
||||
defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
|
||||
#define WEBP_USE_MIPS32
|
||||
#if (__mips_isa_rev >= 2)
|
||||
#define WEBP_USE_MIPS32_R2
|
||||
#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
|
||||
#define WEBP_USE_MIPS_DSP_R2
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
|
||||
#define WEBP_USE_MSA
|
||||
#endif
|
||||
|
||||
#ifndef WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_DSP_OMIT_C_CODE 1
|
||||
#endif
|
||||
|
||||
#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_NEON_OMIT_C_CODE 1
|
||||
#else
|
||||
#define WEBP_NEON_OMIT_C_CODE 0
|
||||
#endif
|
||||
|
||||
#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 1
|
||||
#else
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 0
|
||||
#endif
|
||||
|
||||
// This macro prevents thread_sanitizer from reporting known concurrent writes.
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(thread_sanitizer)
|
||||
#undef WEBP_TSAN_IGNORE_FUNCTION
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
#include <pthread.h> // NOLINT
|
||||
|
||||
#define WEBP_DSP_INIT(func) do { \
|
||||
static volatile VP8CPUInfo func ## _last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func ## _last_cpuinfo_used; \
|
||||
static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \
|
||||
if (pthread_mutex_lock(&func ## _lock)) break; \
|
||||
if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func(); \
|
||||
func ## _last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
(void)pthread_mutex_unlock(&func ## _lock); \
|
||||
} while (0)
|
||||
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
|
||||
#define WEBP_DSP_INIT(func) do { \
|
||||
static volatile VP8CPUInfo func ## _last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func ## _last_cpuinfo_used; \
|
||||
if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break; \
|
||||
func(); \
|
||||
func ## _last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
} while (0)
|
||||
#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
|
||||
// Defines an Init + helper function that control multiple initialization of
|
||||
// function pointers / tables.
|
||||
/* Usage:
|
||||
WEBP_DSP_INIT_FUNC(InitFunc) {
|
||||
...function body
|
||||
}
|
||||
*/
|
||||
#define WEBP_DSP_INIT_FUNC(name) \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \
|
||||
WEBP_TSAN_IGNORE_FUNCTION void name(void) { \
|
||||
WEBP_DSP_INIT(name ## _body); \
|
||||
} \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void)
|
||||
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#if defined(__clang__) && defined(__has_attribute)
|
||||
#if __has_attribute(no_sanitize)
|
||||
// This macro prevents the undefined behavior sanitizer from reporting
|
||||
// failures. This is only meant to silence unaligned loads on platforms that
|
||||
// are known to support them.
|
||||
#undef WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF \
|
||||
__attribute__((no_sanitize("undefined")))
|
||||
|
||||
// This macro prevents the undefined behavior sanitizer from reporting
|
||||
// failures related to unsigned integer overflows. This is only meant to
|
||||
// silence cases where this well defined behavior is expected.
|
||||
#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
|
||||
__attribute__((no_sanitize("unsigned-integer-overflow")))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
|
||||
// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
|
||||
#if !defined(WEBP_OFFSET_PTR)
|
||||
#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
|
||||
#endif
|
||||
|
||||
// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
|
||||
#if !defined(WEBP_SWAP_16BIT_CSP)
|
||||
#define WEBP_SWAP_16BIT_CSP 0
|
||||
#endif
|
||||
|
||||
// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
|
||||
#if !defined(WORDS_BIGENDIAN) && \
|
||||
(defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
|
||||
(defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
|
||||
#define WORDS_BIGENDIAN
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
kSSE2,
|
||||
kSSE3,
|
||||
kSlowSSSE3, // special feature for slow SSSE3 architectures
|
||||
kSSE4_1,
|
||||
kAVX,
|
||||
kAVX2,
|
||||
kNEON,
|
||||
kMIPS32,
|
||||
kMIPSdspR2,
|
||||
kMSA
|
||||
} CPUFeature;
|
||||
// returns true if the CPU supports the feature.
|
||||
typedef int (*VP8CPUInfo)(CPUFeature feature);
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Init stub generator
|
||||
@ -514,15 +332,6 @@ extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
|
||||
// utilities for accurate RGB->YUV conversion
|
||||
extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref,
|
||||
uint16_t* dst, int len);
|
||||
extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref,
|
||||
int16_t* dst, int len);
|
||||
extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B,
|
||||
int len,
|
||||
const uint16_t* best_y, uint16_t* out);
|
||||
|
||||
// Must be called before using the above.
|
||||
void WebPInitConvertARGBToYUV(void);
|
||||
|
||||
@ -578,26 +387,29 @@ extern void (*WebPApplyAlphaMultiply4444)(
|
||||
|
||||
// Dispatch the values from alpha[] plane to the ARGB destination 'dst'.
|
||||
// Returns true if alpha[] plane has non-trivial values different from 0xff.
|
||||
extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride);
|
||||
extern int (*WebPDispatchAlpha)(const uint8_t* WEBP_RESTRICT alpha,
|
||||
int alpha_stride, int width, int height,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride);
|
||||
|
||||
// Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the
|
||||
// A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units.
|
||||
extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride);
|
||||
extern void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT alpha,
|
||||
int alpha_stride, int width, int height,
|
||||
uint32_t* WEBP_RESTRICT dst,
|
||||
int dst_stride);
|
||||
|
||||
// Extract the alpha values from 32b values in argb[] and pack them into alpha[]
|
||||
// (this is the opposite of WebPDispatchAlpha).
|
||||
// Returns true if there's only trivial 0xff alpha values.
|
||||
extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride);
|
||||
extern int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT argb,
|
||||
int argb_stride, int width, int height,
|
||||
uint8_t* WEBP_RESTRICT alpha,
|
||||
int alpha_stride);
|
||||
|
||||
// Extract the green values from 32b values in argb[] and pack them into alpha[]
|
||||
// (this is the opposite of WebPDispatchAlphaToGreen).
|
||||
extern void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
|
||||
extern void (*WebPExtractGreen)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT alpha, int size);
|
||||
|
||||
// Pre-Multiply operation transforms x into x * A / 255 (where x=Y,R,G or B).
|
||||
// Un-Multiply operation transforms x into x * 255 / A.
|
||||
@ -610,29 +422,35 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
|
||||
int inverse);
|
||||
|
||||
// Same for a row of single values, with side alpha values.
|
||||
extern void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
extern void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr,
|
||||
const uint8_t* WEBP_RESTRICT const alpha,
|
||||
int width, int inverse);
|
||||
|
||||
// Same a WebPMultRow(), but for several 'num_rows' rows.
|
||||
void WebPMultRows(uint8_t* ptr, int stride,
|
||||
const uint8_t* alpha, int alpha_stride,
|
||||
void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,
|
||||
const uint8_t* WEBP_RESTRICT alpha, int alpha_stride,
|
||||
int width, int num_rows, int inverse);
|
||||
|
||||
// Plain-C versions, used as fallback by some implementations.
|
||||
void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
|
||||
void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,
|
||||
const uint8_t* WEBP_RESTRICT const alpha,
|
||||
int width, int inverse);
|
||||
void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
// ARGB packing function: a/r/g/b input is rgba or bgra order.
|
||||
extern void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r,
|
||||
const uint8_t* g, const uint8_t* b, int len,
|
||||
uint32_t* out);
|
||||
extern void (*WebPPackARGB)(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT r,
|
||||
const uint8_t* WEBP_RESTRICT g,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
int len, uint32_t* WEBP_RESTRICT out);
|
||||
#endif
|
||||
|
||||
// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
|
||||
extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||
int len, int step, uint32_t* out);
|
||||
extern void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r,
|
||||
const uint8_t* WEBP_RESTRICT g,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
int len, int step, uint32_t* WEBP_RESTRICT out);
|
||||
|
||||
// This function returns true if src[i] contains a value different from 0xff.
|
||||
extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
|
||||
|
7
3rdparty/libwebp/src/dsp/enc.c
vendored
7
3rdparty/libwebp/src/dsp/enc.c
vendored
@ -732,6 +732,7 @@ VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
|
||||
VP8BlockCopy VP8Copy4x4;
|
||||
VP8BlockCopy VP8Copy16x8;
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8EncDspInitSSE2(void);
|
||||
extern void VP8EncDspInitSSE41(void);
|
||||
extern void VP8EncDspInitNEON(void);
|
||||
@ -773,10 +774,10 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) {
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
VP8EncDspInitSSE2();
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
VP8EncDspInitSSE41();
|
||||
}
|
||||
@ -800,7 +801,7 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8EncDspInitNEON();
|
||||
|
13
3rdparty/libwebp/src/dsp/enc_neon.c
vendored
13
3rdparty/libwebp/src/dsp/enc_neon.c
vendored
@ -9,7 +9,7 @@
|
||||
//
|
||||
// ARM NEON version of speed-critical encoding functions.
|
||||
//
|
||||
// adapted from libvpx (http://www.webmproject.org/code/)
|
||||
// adapted from libvpx (https://www.webmproject.org/code/)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
@ -764,9 +764,14 @@ static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
|
||||
|
||||
// Horizontal sum of all four uint32_t values in 'sum'.
|
||||
static int SumToInt_NEON(uint32x4_t sum) {
|
||||
#if WEBP_AARCH64
|
||||
return (int)vaddvq_u32(sum);
|
||||
#else
|
||||
const uint64x2_t sum2 = vpaddlq_u32(sum);
|
||||
const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1);
|
||||
return (int)sum3;
|
||||
const uint32x2_t sum3 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(sum2)),
|
||||
vreinterpret_u32_u64(vget_high_u64(sum2)));
|
||||
return (int)vget_lane_u32(sum3, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
@ -860,7 +865,7 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
uint8x8x4_t shuffles;
|
||||
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
|
||||
// non-standard versions there.
|
||||
#if defined(__APPLE__) && defined(__aarch64__) && \
|
||||
#if defined(__APPLE__) && WEBP_AARCH64 && \
|
||||
defined(__apple_build_version__) && (__apple_build_version__< 6020037)
|
||||
uint8x16x2_t all_out;
|
||||
INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1));
|
||||
|
293
3rdparty/libwebp/src/dsp/enc_sse2.c
vendored
293
3rdparty/libwebp/src/dsp/enc_sse2.c
vendored
@ -25,9 +25,160 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
// Does one inverse transform.
|
||||
static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
// K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16
|
||||
//
|
||||
// To be able to use signed 16-bit integers, we use the following trick to
|
||||
// have constants within range:
|
||||
// - Associated constants are obtained by subtracting the 16-bit fixed point
|
||||
// version of one:
|
||||
// k = K - (1 << 16) => K = k + (1 << 16)
|
||||
// K1 = 85267 => k1 = 20091
|
||||
// K2 = 35468 => k2 = -30068
|
||||
// - The multiplication of a variable by a constant become the sum of the
|
||||
// variable and the multiplication of that variable by the associated
|
||||
// constant:
|
||||
// (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
|
||||
const __m128i k1k2 = _mm_set_epi16(-30068, -30068, -30068, -30068,
|
||||
20091, 20091, 20091, 20091);
|
||||
const __m128i k2k1 = _mm_set_epi16(20091, 20091, 20091, 20091,
|
||||
-30068, -30068, -30068, -30068);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i zero_four = _mm_set_epi16(0, 0, 0, 0, 4, 4, 4, 4);
|
||||
__m128i T01, T23;
|
||||
|
||||
// Load and concatenate the transform coefficients.
|
||||
const __m128i in01 = _mm_loadu_si128((const __m128i*)&in[0]);
|
||||
const __m128i in23 = _mm_loadu_si128((const __m128i*)&in[8]);
|
||||
// a00 a10 a20 a30 a01 a11 a21 a31
|
||||
// a02 a12 a22 a32 a03 a13 a23 a33
|
||||
|
||||
// Vertical pass and subsequent transpose.
|
||||
{
|
||||
const __m128i in1 = _mm_unpackhi_epi64(in01, in01);
|
||||
const __m128i in3 = _mm_unpackhi_epi64(in23, in23);
|
||||
|
||||
// First pass, c and d calculations are longer because of the "trick"
|
||||
// multiplications.
|
||||
// c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
|
||||
// d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
|
||||
const __m128i a_d3 = _mm_add_epi16(in01, in23);
|
||||
const __m128i b_c3 = _mm_sub_epi16(in01, in23);
|
||||
const __m128i c1d1 = _mm_mulhi_epi16(in1, k2k1);
|
||||
const __m128i c2d2 = _mm_mulhi_epi16(in3, k1k2);
|
||||
const __m128i c3 = _mm_unpackhi_epi64(b_c3, b_c3);
|
||||
const __m128i c4 = _mm_sub_epi16(c1d1, c2d2);
|
||||
const __m128i c = _mm_add_epi16(c3, c4);
|
||||
const __m128i d4u = _mm_add_epi16(c1d1, c2d2);
|
||||
const __m128i du = _mm_add_epi16(a_d3, d4u);
|
||||
const __m128i d = _mm_unpackhi_epi64(du, du);
|
||||
|
||||
// Second pass.
|
||||
const __m128i comb_ab = _mm_unpacklo_epi64(a_d3, b_c3);
|
||||
const __m128i comb_dc = _mm_unpacklo_epi64(d, c);
|
||||
|
||||
const __m128i tmp01 = _mm_add_epi16(comb_ab, comb_dc);
|
||||
const __m128i tmp32 = _mm_sub_epi16(comb_ab, comb_dc);
|
||||
const __m128i tmp23 = _mm_shuffle_epi32(tmp32, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
|
||||
const __m128i transpose_0 = _mm_unpacklo_epi16(tmp01, tmp23);
|
||||
const __m128i transpose_1 = _mm_unpackhi_epi16(tmp01, tmp23);
|
||||
// a00 a20 a01 a21 a02 a22 a03 a23
|
||||
// a10 a30 a11 a31 a12 a32 a13 a33
|
||||
|
||||
T01 = _mm_unpacklo_epi16(transpose_0, transpose_1);
|
||||
T23 = _mm_unpackhi_epi16(transpose_0, transpose_1);
|
||||
// a00 a10 a20 a30 a01 a11 a21 a31
|
||||
// a02 a12 a22 a32 a03 a13 a23 a33
|
||||
}
|
||||
|
||||
// Horizontal pass and subsequent transpose.
|
||||
{
|
||||
const __m128i T1 = _mm_unpackhi_epi64(T01, T01);
|
||||
const __m128i T3 = _mm_unpackhi_epi64(T23, T23);
|
||||
|
||||
// First pass, c and d calculations are longer because of the "trick"
|
||||
// multiplications.
|
||||
const __m128i dc = _mm_add_epi16(T01, zero_four);
|
||||
|
||||
// c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
|
||||
// d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
|
||||
const __m128i a_d3 = _mm_add_epi16(dc, T23);
|
||||
const __m128i b_c3 = _mm_sub_epi16(dc, T23);
|
||||
const __m128i c1d1 = _mm_mulhi_epi16(T1, k2k1);
|
||||
const __m128i c2d2 = _mm_mulhi_epi16(T3, k1k2);
|
||||
const __m128i c3 = _mm_unpackhi_epi64(b_c3, b_c3);
|
||||
const __m128i c4 = _mm_sub_epi16(c1d1, c2d2);
|
||||
const __m128i c = _mm_add_epi16(c3, c4);
|
||||
const __m128i d4u = _mm_add_epi16(c1d1, c2d2);
|
||||
const __m128i du = _mm_add_epi16(a_d3, d4u);
|
||||
const __m128i d = _mm_unpackhi_epi64(du, du);
|
||||
|
||||
// Second pass.
|
||||
const __m128i comb_ab = _mm_unpacklo_epi64(a_d3, b_c3);
|
||||
const __m128i comb_dc = _mm_unpacklo_epi64(d, c);
|
||||
|
||||
const __m128i tmp01 = _mm_add_epi16(comb_ab, comb_dc);
|
||||
const __m128i tmp32 = _mm_sub_epi16(comb_ab, comb_dc);
|
||||
const __m128i tmp23 = _mm_shuffle_epi32(tmp32, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
|
||||
const __m128i shifted01 = _mm_srai_epi16(tmp01, 3);
|
||||
const __m128i shifted23 = _mm_srai_epi16(tmp23, 3);
|
||||
// a00 a01 a02 a03 a10 a11 a12 a13
|
||||
// a20 a21 a22 a23 a30 a31 a32 a33
|
||||
|
||||
const __m128i transpose_0 = _mm_unpacklo_epi16(shifted01, shifted23);
|
||||
const __m128i transpose_1 = _mm_unpackhi_epi16(shifted01, shifted23);
|
||||
// a00 a20 a01 a21 a02 a22 a03 a23
|
||||
// a10 a30 a11 a31 a12 a32 a13 a33
|
||||
|
||||
T01 = _mm_unpacklo_epi16(transpose_0, transpose_1);
|
||||
T23 = _mm_unpackhi_epi16(transpose_0, transpose_1);
|
||||
// a00 a10 a20 a30 a01 a11 a21 a31
|
||||
// a02 a12 a22 a32 a03 a13 a23 a33
|
||||
}
|
||||
|
||||
// Add inverse transform to 'ref' and store.
|
||||
{
|
||||
// Load the reference(s).
|
||||
__m128i ref01, ref23, ref0123;
|
||||
int32_t buf[4];
|
||||
|
||||
// Load four bytes/pixels per line.
|
||||
const __m128i ref0 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[0 * BPS]));
|
||||
const __m128i ref1 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[1 * BPS]));
|
||||
const __m128i ref2 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[2 * BPS]));
|
||||
const __m128i ref3 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[3 * BPS]));
|
||||
ref01 = _mm_unpacklo_epi32(ref0, ref1);
|
||||
ref23 = _mm_unpacklo_epi32(ref2, ref3);
|
||||
|
||||
// Convert to 16b.
|
||||
ref01 = _mm_unpacklo_epi8(ref01, zero);
|
||||
ref23 = _mm_unpacklo_epi8(ref23, zero);
|
||||
// Add the inverse transform(s).
|
||||
ref01 = _mm_add_epi16(ref01, T01);
|
||||
ref23 = _mm_add_epi16(ref23, T23);
|
||||
// Unsigned saturate to 8b.
|
||||
ref0123 = _mm_packus_epi16(ref01, ref23);
|
||||
|
||||
_mm_storeu_si128((__m128i *)buf, ref0123);
|
||||
|
||||
// Store four bytes/pixels per line.
|
||||
WebPInt32ToMem(&dst[0 * BPS], buf[0]);
|
||||
WebPInt32ToMem(&dst[1 * BPS], buf[1]);
|
||||
WebPInt32ToMem(&dst[2 * BPS], buf[2]);
|
||||
WebPInt32ToMem(&dst[3 * BPS], buf[3]);
|
||||
}
|
||||
}
|
||||
|
||||
// Does two inverse transforms.
|
||||
static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -49,33 +200,21 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
__m128i T0, T1, T2, T3;
|
||||
|
||||
// Load and concatenate the transform coefficients (we'll do two inverse
|
||||
// transforms in parallel). In the case of only one inverse transform, the
|
||||
// second half of the vectors will just contain random value we'll never
|
||||
// use nor store.
|
||||
// transforms in parallel).
|
||||
__m128i in0, in1, in2, in3;
|
||||
{
|
||||
in0 = _mm_loadl_epi64((const __m128i*)&in[0]);
|
||||
in1 = _mm_loadl_epi64((const __m128i*)&in[4]);
|
||||
in2 = _mm_loadl_epi64((const __m128i*)&in[8]);
|
||||
in3 = _mm_loadl_epi64((const __m128i*)&in[12]);
|
||||
// a00 a10 a20 a30 x x x x
|
||||
// a01 a11 a21 a31 x x x x
|
||||
// a02 a12 a22 a32 x x x x
|
||||
// a03 a13 a23 a33 x x x x
|
||||
if (do_two) {
|
||||
const __m128i inB0 = _mm_loadl_epi64((const __m128i*)&in[16]);
|
||||
const __m128i inB1 = _mm_loadl_epi64((const __m128i*)&in[20]);
|
||||
const __m128i inB2 = _mm_loadl_epi64((const __m128i*)&in[24]);
|
||||
const __m128i inB3 = _mm_loadl_epi64((const __m128i*)&in[28]);
|
||||
in0 = _mm_unpacklo_epi64(in0, inB0);
|
||||
in1 = _mm_unpacklo_epi64(in1, inB1);
|
||||
in2 = _mm_unpacklo_epi64(in2, inB2);
|
||||
in3 = _mm_unpacklo_epi64(in3, inB3);
|
||||
// a00 a10 a20 a30 b00 b10 b20 b30
|
||||
// a01 a11 a21 a31 b01 b11 b21 b31
|
||||
// a02 a12 a22 a32 b02 b12 b22 b32
|
||||
// a03 a13 a23 a33 b03 b13 b23 b33
|
||||
}
|
||||
const __m128i tmp0 = _mm_loadu_si128((const __m128i*)&in[0]);
|
||||
const __m128i tmp1 = _mm_loadu_si128((const __m128i*)&in[8]);
|
||||
const __m128i tmp2 = _mm_loadu_si128((const __m128i*)&in[16]);
|
||||
const __m128i tmp3 = _mm_loadu_si128((const __m128i*)&in[24]);
|
||||
in0 = _mm_unpacklo_epi64(tmp0, tmp2);
|
||||
in1 = _mm_unpackhi_epi64(tmp0, tmp2);
|
||||
in2 = _mm_unpacklo_epi64(tmp1, tmp3);
|
||||
in3 = _mm_unpackhi_epi64(tmp1, tmp3);
|
||||
// a00 a10 a20 a30 b00 b10 b20 b30
|
||||
// a01 a11 a21 a31 b01 b11 b21 b31
|
||||
// a02 a12 a22 a32 b02 b12 b22 b32
|
||||
// a03 a13 a23 a33 b03 b13 b23 b33
|
||||
}
|
||||
|
||||
// Vertical pass and subsequent transpose.
|
||||
@ -148,19 +287,11 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
// Load the reference(s).
|
||||
__m128i ref0, ref1, ref2, ref3;
|
||||
if (do_two) {
|
||||
// Load eight bytes/pixels per line.
|
||||
ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
|
||||
ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
|
||||
ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
|
||||
ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
|
||||
} else {
|
||||
// Load four bytes/pixels per line.
|
||||
ref0 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[0 * BPS]));
|
||||
ref1 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[1 * BPS]));
|
||||
ref2 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[2 * BPS]));
|
||||
ref3 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[3 * BPS]));
|
||||
}
|
||||
// Load eight bytes/pixels per line.
|
||||
ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
|
||||
ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
|
||||
ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
|
||||
ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
|
||||
// Convert to 16b.
|
||||
ref0 = _mm_unpacklo_epi8(ref0, zero);
|
||||
ref1 = _mm_unpacklo_epi8(ref1, zero);
|
||||
@ -176,20 +307,21 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
ref1 = _mm_packus_epi16(ref1, ref1);
|
||||
ref2 = _mm_packus_epi16(ref2, ref2);
|
||||
ref3 = _mm_packus_epi16(ref3, ref3);
|
||||
// Store the results.
|
||||
if (do_two) {
|
||||
// Store eight bytes/pixels per line.
|
||||
_mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0);
|
||||
_mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1);
|
||||
_mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2);
|
||||
_mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3);
|
||||
} else {
|
||||
// Store four bytes/pixels per line.
|
||||
WebPUint32ToMem(&dst[0 * BPS], _mm_cvtsi128_si32(ref0));
|
||||
WebPUint32ToMem(&dst[1 * BPS], _mm_cvtsi128_si32(ref1));
|
||||
WebPUint32ToMem(&dst[2 * BPS], _mm_cvtsi128_si32(ref2));
|
||||
WebPUint32ToMem(&dst[3 * BPS], _mm_cvtsi128_si32(ref3));
|
||||
}
|
||||
// Store eight bytes/pixels per line.
|
||||
_mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0);
|
||||
_mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1);
|
||||
_mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2);
|
||||
_mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3);
|
||||
}
|
||||
}
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
if (do_two) {
|
||||
ITransform_Two_SSE2(ref, in, dst);
|
||||
} else {
|
||||
ITransform_One_SSE2(ref, in, dst);
|
||||
}
|
||||
}
|
||||
|
||||
@ -481,7 +613,7 @@ static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
|
||||
// helper for chroma-DC predictions
|
||||
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
const __m128i values = _mm_set1_epi8((char)v);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
_mm_storel_epi64((__m128i*)(dst + j * BPS), values);
|
||||
}
|
||||
@ -489,7 +621,7 @@ static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
|
||||
|
||||
static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
|
||||
int j;
|
||||
const __m128i values = _mm_set1_epi8(v);
|
||||
const __m128i values = _mm_set1_epi8((char)v);
|
||||
for (j = 0; j < 16; ++j) {
|
||||
_mm_store_si128((__m128i*)(dst + j * BPS), values);
|
||||
}
|
||||
@ -540,7 +672,7 @@ static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
|
||||
static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
const __m128i values = _mm_set1_epi8(left[j]);
|
||||
const __m128i values = _mm_set1_epi8((char)left[j]);
|
||||
_mm_storel_epi64((__m128i*)dst, values);
|
||||
dst += BPS;
|
||||
}
|
||||
@ -549,7 +681,7 @@ static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
const __m128i values = _mm_set1_epi8(left[j]);
|
||||
const __m128i values = _mm_set1_epi8((char)left[j]);
|
||||
_mm_store_si128((__m128i*)dst, values);
|
||||
dst += BPS;
|
||||
}
|
||||
@ -722,10 +854,10 @@ static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
|
||||
const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one);
|
||||
const __m128i b = _mm_subs_epu8(a, lsb);
|
||||
const __m128i avg = _mm_avg_epu8(b, BCDEFGH0);
|
||||
const uint32_t vals = _mm_cvtsi128_si32(avg);
|
||||
const int vals = _mm_cvtsi128_si32(avg);
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
WebPUint32ToMem(dst + i * BPS, vals);
|
||||
WebPInt32ToMem(dst + i * BPS, vals);
|
||||
}
|
||||
}
|
||||
|
||||
@ -760,10 +892,10 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
|
||||
const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
|
||||
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
|
||||
const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
|
||||
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg ));
|
||||
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
|
||||
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg ));
|
||||
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
|
||||
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
|
||||
@ -782,10 +914,10 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
|
||||
const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
|
||||
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
|
||||
const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
|
||||
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd ));
|
||||
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh ));
|
||||
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd ));
|
||||
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh ));
|
||||
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
|
||||
|
||||
// these two are hard to implement in SSE2, so we keep the C-version:
|
||||
DST(0, 2) = AVG3(J, I, X);
|
||||
@ -807,11 +939,12 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
|
||||
const __m128i abbc = _mm_or_si128(ab, bc);
|
||||
const __m128i lsb2 = _mm_and_si128(abbc, lsb1);
|
||||
const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
|
||||
const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
|
||||
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
|
||||
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
|
||||
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
|
||||
const uint32_t extra_out =
|
||||
(uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
|
||||
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
|
||||
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
|
||||
|
||||
// these two are hard to get and irregular
|
||||
DST(3, 2) = (extra_out >> 0) & 0xff;
|
||||
@ -829,10 +962,10 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
|
||||
const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
|
||||
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
|
||||
const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
|
||||
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg ));
|
||||
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
|
||||
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
|
||||
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg ));
|
||||
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
|
||||
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
@ -875,14 +1008,14 @@ static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
|
||||
static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top));
|
||||
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top));
|
||||
const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
|
||||
int y;
|
||||
for (y = 0; y < 4; ++y, dst += BPS) {
|
||||
const int val = top[-2 - y] - top[-1];
|
||||
const __m128i base = _mm_set1_epi16(val);
|
||||
const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
|
||||
WebPUint32ToMem(dst, _mm_cvtsi128_si32(out));
|
||||
WebPInt32ToMem(dst, _mm_cvtsi128_si32(out));
|
||||
}
|
||||
}
|
||||
|
||||
|
14
3rdparty/libwebp/src/dsp/filters.c
vendored
14
3rdparty/libwebp/src/dsp/filters.c
vendored
@ -189,6 +189,12 @@ static void GradientFilter_C(const uint8_t* data, int width, int height,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void NoneUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
(void)prev;
|
||||
if (out != in) memcpy(out, in, width * sizeof(*out));
|
||||
}
|
||||
|
||||
static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
uint8_t* out, int width) {
|
||||
uint8_t pred = (prev == NULL) ? 0 : prev[0];
|
||||
@ -233,13 +239,14 @@ static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in,
|
||||
WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
|
||||
WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8FiltersInitMIPSdspR2(void);
|
||||
extern void VP8FiltersInitMSA(void);
|
||||
extern void VP8FiltersInitNEON(void);
|
||||
extern void VP8FiltersInitSSE2(void);
|
||||
|
||||
WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
|
||||
WebPUnfilters[WEBP_FILTER_NONE] = NULL;
|
||||
WebPUnfilters[WEBP_FILTER_NONE] = NoneUnfilter_C;
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C;
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_C;
|
||||
@ -254,7 +261,7 @@ WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
|
||||
#endif
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
VP8FiltersInitSSE2();
|
||||
}
|
||||
@ -271,13 +278,14 @@ WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8FiltersInitNEON();
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(WebPUnfilters[WEBP_FILTER_NONE] != NULL);
|
||||
assert(WebPUnfilters[WEBP_FILTER_HORIZONTAL] != NULL);
|
||||
assert(WebPUnfilters[WEBP_FILTER_VERTICAL] != NULL);
|
||||
assert(WebPUnfilters[WEBP_FILTER_GRADIENT] != NULL);
|
||||
|
5
3rdparty/libwebp/src/dsp/filters_sse2.c
vendored
5
3rdparty/libwebp/src/dsp/filters_sse2.c
vendored
@ -320,7 +320,12 @@ extern void VP8FiltersInitSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) {
|
||||
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_SSE2;
|
||||
#if defined(CHROMIUM)
|
||||
// TODO(crbug.com/654974)
|
||||
(void)VerticalUnfilter_SSE2;
|
||||
#else
|
||||
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_SSE2;
|
||||
#endif
|
||||
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_SSE2;
|
||||
|
||||
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_SSE2;
|
||||
|
81
3rdparty/libwebp/src/dsp/lossless.c
vendored
81
3rdparty/libwebp/src/dsp/lossless.c
vendored
@ -49,7 +49,7 @@ static WEBP_INLINE uint32_t Clip255(uint32_t a) {
|
||||
}
|
||||
|
||||
static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
|
||||
return Clip255(a + b - c);
|
||||
return Clip255((uint32_t)(a + b - c));
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
|
||||
@ -66,7 +66,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
|
||||
}
|
||||
|
||||
static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
|
||||
return Clip255(a + (a - b) / 2);
|
||||
return Clip255((uint32_t)(a + (a - b) / 2));
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
|
||||
@ -107,63 +107,77 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Predictors
|
||||
|
||||
uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor0_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)top;
|
||||
(void)left;
|
||||
return ARGB_BLACK;
|
||||
}
|
||||
uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor1_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)top;
|
||||
return left;
|
||||
return *left;
|
||||
}
|
||||
uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor2_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)left;
|
||||
return top[0];
|
||||
}
|
||||
uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor3_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)left;
|
||||
return top[1];
|
||||
}
|
||||
uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor4_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)left;
|
||||
return top[-1];
|
||||
}
|
||||
uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average3(left, top[0], top[1]);
|
||||
uint32_t VP8LPredictor5_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average3(*left, top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(left, top[-1]);
|
||||
uint32_t VP8LPredictor6_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(*left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(left, top[0]);
|
||||
uint32_t VP8LPredictor7_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(*left, top[0]);
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor8_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(top[-1], top[0]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top) {
|
||||
uint32_t VP8LPredictor9_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2(top[0], top[1]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
|
||||
uint32_t VP8LPredictor10_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average4(*left, top[-1], top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Select(top[0], left, top[-1]);
|
||||
uint32_t VP8LPredictor11_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Select(top[0], *left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
|
||||
uint32_t VP8LPredictor12_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
|
||||
uint32_t VP8LPredictor13_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
|
||||
@ -279,10 +293,10 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
|
||||
const uint32_t red = argb >> 16;
|
||||
int new_red = red & 0xff;
|
||||
int new_blue = argb & 0xff;
|
||||
new_red += ColorTransformDelta(m->green_to_red_, green);
|
||||
new_red += ColorTransformDelta((int8_t)m->green_to_red_, green);
|
||||
new_red &= 0xff;
|
||||
new_blue += ColorTransformDelta(m->green_to_blue_, green);
|
||||
new_blue += ColorTransformDelta(m->red_to_blue_, (int8_t)new_red);
|
||||
new_blue += ColorTransformDelta((int8_t)m->green_to_blue_, green);
|
||||
new_blue += ColorTransformDelta((int8_t)m->red_to_blue_, (int8_t)new_red);
|
||||
new_blue &= 0xff;
|
||||
dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
|
||||
}
|
||||
@ -381,7 +395,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
|
||||
assert(row_start < row_end);
|
||||
assert(row_end <= transform->ysize_);
|
||||
switch (transform->type_) {
|
||||
case SUBTRACT_GREEN:
|
||||
case SUBTRACT_GREEN_TRANSFORM:
|
||||
VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
|
||||
break;
|
||||
case PREDICTOR_TRANSFORM:
|
||||
@ -574,7 +588,9 @@ VP8LConvertFunc VP8LConvertBGRAToBGR;
|
||||
VP8LMapARGBFunc VP8LMapColor32b;
|
||||
VP8LMapAlphaFunc VP8LMapColor8b;
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8LDspInitSSE2(void);
|
||||
extern void VP8LDspInitSSE41(void);
|
||||
extern void VP8LDspInitNEON(void);
|
||||
extern void VP8LDspInitMIPSdspR2(void);
|
||||
extern void VP8LDspInitMSA(void);
|
||||
@ -621,9 +637,14 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) {
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
VP8LDspInitSSE2();
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
VP8LDspInitSSE41();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||
@ -638,7 +659,7 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8LDspInitNEON();
|
||||
|
53
3rdparty/libwebp/src/dsp/lossless.h
vendored
53
3rdparty/libwebp/src/dsp/lossless.h
vendored
@ -28,23 +28,38 @@ extern "C" {
|
||||
//------------------------------------------------------------------------------
|
||||
// Decoding
|
||||
|
||||
typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top);
|
||||
typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
extern VP8LPredictorFunc VP8LPredictors[16];
|
||||
|
||||
uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top);
|
||||
uint32_t VP8LPredictor0_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor1_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor2_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor3_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor4_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor5_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor6_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor7_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor8_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor9_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor10_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor11_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor12_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor13_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
|
||||
// These Add/Sub function expects upper[-1] and out[-1] to be readable.
|
||||
typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
|
||||
@ -167,9 +182,9 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
|
||||
// -----------------------------------------------------------------------------
|
||||
// Huffman-cost related functions.
|
||||
|
||||
typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||
typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
||||
int length);
|
||||
typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
||||
int length);
|
||||
typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
|
||||
const int Y[256]);
|
||||
|
||||
@ -183,7 +198,7 @@ typedef struct { // small struct to hold counters
|
||||
} VP8LStreaks;
|
||||
|
||||
typedef struct { // small struct to hold bit entropy results
|
||||
double entropy; // entropy
|
||||
float entropy; // entropy
|
||||
uint32_t sum; // sum of the population
|
||||
int nonzeros; // number of non-zero elements in the population
|
||||
uint32_t max_val; // maximum value in the population
|
||||
|
8
3rdparty/libwebp/src/dsp/lossless_common.h
vendored
8
3rdparty/libwebp/src/dsp/lossless_common.h
vendored
@ -16,9 +16,9 @@
|
||||
#ifndef WEBP_DSP_LOSSLESS_COMMON_H_
|
||||
#define WEBP_DSP_LOSSLESS_COMMON_H_
|
||||
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -166,7 +166,7 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Transform-related functions use din both encoding and decoding.
|
||||
// Transform-related functions used in both encoding and decoding.
|
||||
|
||||
// Macros used to create a batch predictor that iteratively uses a
|
||||
// one-pixel predictor.
|
||||
@ -179,7 +179,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
|
||||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x); \
|
||||
const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \
|
||||
out[x] = VP8LAddPixels(in[x], pred); \
|
||||
} \
|
||||
}
|
||||
|
73
3rdparty/libwebp/src/dsp/lossless_enc.c
vendored
73
3rdparty/libwebp/src/dsp/lossless_enc.c
vendored
@ -329,6 +329,15 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
|
||||
static float FastSLog2Slow_C(uint32_t v) {
|
||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||
#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
|
||||
// use clz if available
|
||||
const int log_cnt = BitsLog2Floor(v) - 7;
|
||||
const uint32_t y = 1 << log_cnt;
|
||||
int correction = 0;
|
||||
const float v_f = (float)v;
|
||||
const uint32_t orig_v = v;
|
||||
v >>= log_cnt;
|
||||
#else
|
||||
int log_cnt = 0;
|
||||
uint32_t y = 1;
|
||||
int correction = 0;
|
||||
@ -339,6 +348,7 @@ static float FastSLog2Slow_C(uint32_t v) {
|
||||
v = v >> 1;
|
||||
y = y << 1;
|
||||
} while (v >= LOG_LOOKUP_IDX_MAX);
|
||||
#endif
|
||||
// vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
|
||||
// Xf = floor(Xf) * (1 + (v % y) / v)
|
||||
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
|
||||
@ -355,6 +365,14 @@ static float FastSLog2Slow_C(uint32_t v) {
|
||||
static float FastLog2Slow_C(uint32_t v) {
|
||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||
#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
|
||||
// use clz if available
|
||||
const int log_cnt = BitsLog2Floor(v) - 7;
|
||||
const uint32_t y = 1 << log_cnt;
|
||||
const uint32_t orig_v = v;
|
||||
double log_2;
|
||||
v >>= log_cnt;
|
||||
#else
|
||||
int log_cnt = 0;
|
||||
uint32_t y = 1;
|
||||
const uint32_t orig_v = v;
|
||||
@ -364,6 +382,7 @@ static float FastLog2Slow_C(uint32_t v) {
|
||||
v = v >> 1;
|
||||
y = y << 1;
|
||||
} while (v >= LOG_LOOKUP_IDX_MAX);
|
||||
#endif
|
||||
log_2 = kLog2Table[v] + log_cnt;
|
||||
if (orig_v >= APPROX_LOG_MAX) {
|
||||
// Since the division is still expensive, add this correction factor only
|
||||
@ -383,7 +402,7 @@ static float FastLog2Slow_C(uint32_t v) {
|
||||
// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
|
||||
static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
|
||||
int i;
|
||||
double retval = 0.;
|
||||
float retval = 0.f;
|
||||
int sumX = 0, sumXY = 0;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
const int x = X[i];
|
||||
@ -399,7 +418,7 @@ static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
|
||||
}
|
||||
}
|
||||
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
||||
return (float)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
|
||||
@ -503,11 +522,11 @@ static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
|
||||
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const int argb = argb_data[i];
|
||||
const int argb = (int)argb_data[i];
|
||||
const int green = (argb >> 8) & 0xff;
|
||||
const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
|
||||
const uint32_t new_b = (((argb >> 0) & 0xff) - green) & 0xff;
|
||||
argb_data[i] = (argb & 0xff00ff00u) | (new_r << 16) | new_b;
|
||||
argb_data[i] = ((uint32_t)argb & 0xff00ff00u) | (new_r << 16) | new_b;
|
||||
}
|
||||
}
|
||||
|
||||
@ -528,10 +547,10 @@ void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
|
||||
const int8_t red = U32ToS8(argb >> 16);
|
||||
int new_red = red & 0xff;
|
||||
int new_blue = argb & 0xff;
|
||||
new_red -= ColorTransformDelta(m->green_to_red_, green);
|
||||
new_red -= ColorTransformDelta((int8_t)m->green_to_red_, green);
|
||||
new_red &= 0xff;
|
||||
new_blue -= ColorTransformDelta(m->green_to_blue_, green);
|
||||
new_blue -= ColorTransformDelta(m->red_to_blue_, red);
|
||||
new_blue -= ColorTransformDelta((int8_t)m->green_to_blue_, green);
|
||||
new_blue -= ColorTransformDelta((int8_t)m->red_to_blue_, red);
|
||||
new_blue &= 0xff;
|
||||
data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
|
||||
}
|
||||
@ -541,7 +560,7 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
|
||||
uint32_t argb) {
|
||||
const int8_t green = U32ToS8(argb >> 8);
|
||||
int new_red = argb >> 16;
|
||||
new_red -= ColorTransformDelta(green_to_red, green);
|
||||
new_red -= ColorTransformDelta((int8_t)green_to_red, green);
|
||||
return (new_red & 0xff);
|
||||
}
|
||||
|
||||
@ -550,9 +569,9 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
|
||||
uint32_t argb) {
|
||||
const int8_t green = U32ToS8(argb >> 8);
|
||||
const int8_t red = U32ToS8(argb >> 16);
|
||||
uint8_t new_blue = argb & 0xff;
|
||||
new_blue -= ColorTransformDelta(green_to_blue, green);
|
||||
new_blue -= ColorTransformDelta(red_to_blue, red);
|
||||
int new_blue = argb & 0xff;
|
||||
new_blue -= ColorTransformDelta((int8_t)green_to_blue, green);
|
||||
new_blue -= ColorTransformDelta((int8_t)red_to_blue, red);
|
||||
return (new_blue & 0xff);
|
||||
}
|
||||
|
||||
@ -617,20 +636,25 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static double ExtraCost_C(const uint32_t* population, int length) {
|
||||
static uint32_t ExtraCost_C(const uint32_t* population, int length) {
|
||||
int i;
|
||||
double cost = 0.;
|
||||
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
|
||||
uint32_t cost = population[4] + population[5];
|
||||
assert(length % 2 == 0);
|
||||
for (i = 2; i < length / 2 - 1; ++i) {
|
||||
cost += i * (population[2 * i + 2] + population[2 * i + 3]);
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
|
||||
int length) {
|
||||
static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
|
||||
int length) {
|
||||
int i;
|
||||
double cost = 0.;
|
||||
for (i = 2; i < length - 2; ++i) {
|
||||
const int xy = X[i + 2] + Y[i + 2];
|
||||
cost += (i >> 1) * xy;
|
||||
uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
|
||||
assert(length % 2 == 0);
|
||||
for (i = 2; i < length / 2 - 1; ++i) {
|
||||
const int xy0 = X[2 * i + 2] + Y[2 * i + 2];
|
||||
const int xy1 = X[2 * i + 3] + Y[2 * i + 3];
|
||||
cost += i * (xy0 + xy1);
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
@ -726,7 +750,7 @@ static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
const uint32_t pred = \
|
||||
VP8LPredictor##PREDICTOR_I##_C(in[x - 1], upper + x); \
|
||||
VP8LPredictor##PREDICTOR_I##_C(&in[x - 1], upper + x); \
|
||||
out[x] = VP8LSubPixels(in[x], pred); \
|
||||
} \
|
||||
}
|
||||
@ -772,6 +796,7 @@ VP8LBundleColorMapFunc VP8LBundleColorMap;
|
||||
VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
|
||||
VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8LEncDspInitSSE2(void);
|
||||
extern void VP8LEncDspInitSSE41(void);
|
||||
extern void VP8LEncDspInitNEON(void);
|
||||
@ -843,10 +868,10 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
VP8LEncDspInitSSE2();
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
VP8LEncDspInitSSE41();
|
||||
}
|
||||
@ -870,7 +895,7 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
VP8LEncDspInitNEON();
|
||||
|
22
3rdparty/libwebp/src/dsp/lossless_enc_mips32.c
vendored
22
3rdparty/libwebp/src/dsp/lossless_enc_mips32.c
vendored
@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
|
||||
// cost += i * *(pop + 1);
|
||||
// pop += 2;
|
||||
// }
|
||||
// return (double)cost;
|
||||
static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
// return cost;
|
||||
static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
int i, temp0, temp1;
|
||||
const uint32_t* pop = &population[4];
|
||||
const uint32_t* const LoopEnd = &population[length];
|
||||
@ -130,7 +130,7 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
: "memory", "hi", "lo"
|
||||
);
|
||||
|
||||
return (double)((int64_t)temp0 << 32 | temp1);
|
||||
return ((int64_t)temp0 << 32 | temp1);
|
||||
}
|
||||
|
||||
// C version of this function:
|
||||
@ -148,9 +148,9 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
// pX += 2;
|
||||
// pY += 2;
|
||||
// }
|
||||
// return (double)cost;
|
||||
static double ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length) {
|
||||
// return cost;
|
||||
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length) {
|
||||
int i, temp0, temp1, temp2, temp3;
|
||||
const uint32_t* pX = &X[4];
|
||||
const uint32_t* pY = &Y[4];
|
||||
@ -183,7 +183,7 @@ static double ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
: "memory", "hi", "lo"
|
||||
);
|
||||
|
||||
return (double)((int64_t)temp0 << 32 | temp1);
|
||||
return ((int64_t)temp0 << 32 | temp1);
|
||||
}
|
||||
|
||||
#define HUFFMAN_COST_PASS \
|
||||
@ -347,24 +347,24 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
|
||||
static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
|
||||
uint32_t* pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const uint32_t end = ((size) / 4) * 4;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
int i;
|
||||
ASM_START
|
||||
ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)
|
||||
ASM_END_0
|
||||
for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i];
|
||||
for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
|
||||
}
|
||||
|
||||
static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const uint32_t end = ((size) / 4) * 4;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
int i;
|
||||
ASM_START
|
||||
ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)
|
||||
ASM_END_1
|
||||
for (i = end; i < size; ++i) pout[i] += pa[i];
|
||||
for (i = 0; i < size - end; ++i) pout[i] += pa[i];
|
||||
}
|
||||
|
||||
#undef ASM_END_1
|
||||
|
2
3rdparty/libwebp/src/dsp/lossless_enc_neon.c
vendored
2
3rdparty/libwebp/src/dsp/lossless_enc_neon.c
vendored
@ -25,7 +25,7 @@
|
||||
|
||||
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
|
||||
// non-standard versions there.
|
||||
#if defined(__APPLE__) && defined(__aarch64__) && \
|
||||
#if defined(__APPLE__) && WEBP_AARCH64 && \
|
||||
defined(__apple_build_version__) && (__apple_build_version__< 6020037)
|
||||
#define USE_VTBLQ
|
||||
#endif
|
||||
|
117
3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
vendored
117
3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
vendored
@ -54,8 +54,8 @@ static void TransformColor_SSE2(const VP8LMultipliers* const m,
|
||||
const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_),
|
||||
CST_5b(m->green_to_blue_));
|
||||
const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
|
||||
const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
|
||||
const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff); // red-blue masks
|
||||
const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00); // alpha-green masks
|
||||
const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff); // red-blue masks
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
|
||||
@ -232,79 +232,55 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Entropy
|
||||
|
||||
// Checks whether the X or Y contribution is worth computing and adding.
|
||||
// Used in loop unrolling.
|
||||
#define ANALYZE_X_OR_Y(x_or_y, j) \
|
||||
do { \
|
||||
if ((x_or_y)[i + (j)] != 0) retval -= VP8LFastSLog2((x_or_y)[i + (j)]); \
|
||||
} while (0)
|
||||
// TODO(https://crbug.com/webp/499): this function produces different results
|
||||
// from the C code due to use of double/float resulting in output differences
|
||||
// when compared to -noasm.
|
||||
#if !(defined(WEBP_HAVE_SLOW_CLZ_CTZ) || defined(__i386__) || defined(_M_IX86))
|
||||
|
||||
// Checks whether the X + Y contribution is worth computing and adding.
|
||||
// Used in loop unrolling.
|
||||
#define ANALYZE_XY(j) \
|
||||
do { \
|
||||
if (tmp[j] != 0) { \
|
||||
retval -= VP8LFastSLog2(tmp[j]); \
|
||||
ANALYZE_X_OR_Y(X, j); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#if !(defined(__i386__) || defined(_M_IX86))
|
||||
static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
|
||||
int i;
|
||||
double retval = 0.;
|
||||
int sumX, sumXY;
|
||||
int32_t tmp[4];
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
// Sums up X + Y, 4 ints at a time (and will merge it at the end for sumXY).
|
||||
__m128i sumXY_128 = zero;
|
||||
__m128i sumX_128 = zero;
|
||||
float retval = 0.f;
|
||||
int sumX = 0, sumXY = 0;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
for (i = 0; i < 256; i += 4) {
|
||||
const __m128i x = _mm_loadu_si128((const __m128i*)(X + i));
|
||||
const __m128i y = _mm_loadu_si128((const __m128i*)(Y + i));
|
||||
|
||||
// Check if any X is non-zero: this actually provides a speedup as X is
|
||||
// usually sparse.
|
||||
if (_mm_movemask_epi8(_mm_cmpeq_epi32(x, zero)) != 0xFFFF) {
|
||||
const __m128i xy_128 = _mm_add_epi32(x, y);
|
||||
sumXY_128 = _mm_add_epi32(sumXY_128, xy_128);
|
||||
|
||||
sumX_128 = _mm_add_epi32(sumX_128, x);
|
||||
|
||||
// Analyze the different X + Y.
|
||||
_mm_storeu_si128((__m128i*)tmp, xy_128);
|
||||
|
||||
ANALYZE_XY(0);
|
||||
ANALYZE_XY(1);
|
||||
ANALYZE_XY(2);
|
||||
ANALYZE_XY(3);
|
||||
} else {
|
||||
// X is fully 0, so only deal with Y.
|
||||
sumXY_128 = _mm_add_epi32(sumXY_128, y);
|
||||
|
||||
ANALYZE_X_OR_Y(Y, 0);
|
||||
ANALYZE_X_OR_Y(Y, 1);
|
||||
ANALYZE_X_OR_Y(Y, 2);
|
||||
ANALYZE_X_OR_Y(Y, 3);
|
||||
for (i = 0; i < 256; i += 16) {
|
||||
const __m128i x0 = _mm_loadu_si128((const __m128i*)(X + i + 0));
|
||||
const __m128i y0 = _mm_loadu_si128((const __m128i*)(Y + i + 0));
|
||||
const __m128i x1 = _mm_loadu_si128((const __m128i*)(X + i + 4));
|
||||
const __m128i y1 = _mm_loadu_si128((const __m128i*)(Y + i + 4));
|
||||
const __m128i x2 = _mm_loadu_si128((const __m128i*)(X + i + 8));
|
||||
const __m128i y2 = _mm_loadu_si128((const __m128i*)(Y + i + 8));
|
||||
const __m128i x3 = _mm_loadu_si128((const __m128i*)(X + i + 12));
|
||||
const __m128i y3 = _mm_loadu_si128((const __m128i*)(Y + i + 12));
|
||||
const __m128i x4 = _mm_packs_epi16(_mm_packs_epi32(x0, x1),
|
||||
_mm_packs_epi32(x2, x3));
|
||||
const __m128i y4 = _mm_packs_epi16(_mm_packs_epi32(y0, y1),
|
||||
_mm_packs_epi32(y2, y3));
|
||||
const int32_t mx = _mm_movemask_epi8(_mm_cmpgt_epi8(x4, zero));
|
||||
int32_t my = _mm_movemask_epi8(_mm_cmpgt_epi8(y4, zero)) | mx;
|
||||
while (my) {
|
||||
const int32_t j = BitsCtz(my);
|
||||
int xy;
|
||||
if ((mx >> j) & 1) {
|
||||
const int x = X[i + j];
|
||||
sumXY += x;
|
||||
retval -= VP8LFastSLog2(x);
|
||||
}
|
||||
xy = X[i + j] + Y[i + j];
|
||||
sumX += xy;
|
||||
retval -= VP8LFastSLog2(xy);
|
||||
my &= my - 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Sum up sumX_128 to get sumX.
|
||||
_mm_storeu_si128((__m128i*)tmp, sumX_128);
|
||||
sumX = tmp[3] + tmp[2] + tmp[1] + tmp[0];
|
||||
|
||||
// Sum up sumXY_128 to get sumXY.
|
||||
_mm_storeu_si128((__m128i*)tmp, sumXY_128);
|
||||
sumXY = tmp[3] + tmp[2] + tmp[1] + tmp[0];
|
||||
|
||||
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
||||
return (float)retval;
|
||||
return retval;
|
||||
}
|
||||
#endif // !(defined(__i386__) || defined(_M_IX86))
|
||||
|
||||
#undef ANALYZE_X_OR_Y
|
||||
#undef ANALYZE_XY
|
||||
#else
|
||||
|
||||
#define DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC // won't be faster
|
||||
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@ -400,7 +376,7 @@ static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits,
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
const __m128i mask_or = _mm_set1_epi32(0xff000000);
|
||||
const __m128i mask_or = _mm_set1_epi32((int)0xff000000);
|
||||
const __m128i mul_cst = _mm_set1_epi16(0x0104);
|
||||
const __m128i mask_mul = _mm_set1_epi16(0x0f00);
|
||||
for (x = 0; x + 16 <= width; x += 16, dst += 4) {
|
||||
@ -451,7 +427,7 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0,
|
||||
static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
const __m128i black = _mm_set1_epi32(ARGB_BLACK);
|
||||
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
|
||||
const __m128i res = _mm_sub_epi8(src, black);
|
||||
@ -662,10 +638,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
|
||||
VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE2;
|
||||
VP8LAddVector = AddVector_SSE2;
|
||||
VP8LAddVectorEq = AddVectorEq_SSE2;
|
||||
// TODO(https://crbug.com/webp/499): this function produces different results
|
||||
// from the C code due to use of double/float resulting in output differences
|
||||
// when compared to -noasm.
|
||||
#if !(defined(__i386__) || defined(_M_IX86))
|
||||
#if !defined(DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC)
|
||||
VP8LCombinedShannonEntropy = CombinedShannonEntropy_SSE2;
|
||||
#endif
|
||||
VP8LVectorMismatch = VectorMismatch_SSE2;
|
||||
|
173
3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
vendored
173
3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
vendored
@ -18,8 +18,53 @@
|
||||
#include <smmintrin.h>
|
||||
#include "src/dsp/lossless.h"
|
||||
|
||||
// For sign-extended multiplying constants, pre-shifted by 5:
|
||||
#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5)
|
||||
//------------------------------------------------------------------------------
|
||||
// Cost operations.
|
||||
|
||||
static WEBP_INLINE uint32_t HorizontalSum_SSE41(__m128i cost) {
|
||||
cost = _mm_add_epi32(cost, _mm_srli_si128(cost, 8));
|
||||
cost = _mm_add_epi32(cost, _mm_srli_si128(cost, 4));
|
||||
return _mm_cvtsi128_si32(cost);
|
||||
}
|
||||
|
||||
static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) {
|
||||
int i;
|
||||
__m128i cost = _mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]);
|
||||
assert(length % 8 == 0);
|
||||
|
||||
for (i = 8; i + 8 <= length; i += 8) {
|
||||
const int j = (i - 2) >> 1;
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
const __m128i w = _mm_set_epi32(j + 3, j + 2, j + 1, j);
|
||||
const __m128i a2 = _mm_hadd_epi32(a0, a1);
|
||||
const __m128i mul = _mm_mullo_epi32(a2, w);
|
||||
cost = _mm_add_epi32(mul, cost);
|
||||
}
|
||||
return HorizontalSum_SSE41(cost);
|
||||
}
|
||||
|
||||
static uint32_t ExtraCostCombined_SSE41(const uint32_t* const a,
|
||||
const uint32_t* const b, int length) {
|
||||
int i;
|
||||
__m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]),
|
||||
_mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4]));
|
||||
assert(length % 8 == 0);
|
||||
|
||||
for (i = 8; i + 8 <= length; i += 8) {
|
||||
const int j = (i - 2) >> 1;
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||
const __m128i w = _mm_set_epi32(j + 3, j + 2, j + 1, j);
|
||||
const __m128i a2 = _mm_hadd_epi32(a0, a1);
|
||||
const __m128i b2 = _mm_hadd_epi32(b0, b1);
|
||||
const __m128i mul = _mm_mullo_epi32(_mm_add_epi32(a2, b2), w);
|
||||
cost = _mm_add_epi32(mul, cost);
|
||||
}
|
||||
return HorizontalSum_SSE41(cost);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Subtract-Green Transform
|
||||
@ -44,46 +89,50 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
|
||||
#define SPAN 8
|
||||
// For sign-extended multiplying constants, pre-shifted by 5:
|
||||
#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5)
|
||||
|
||||
#define MK_CST_16(HI, LO) \
|
||||
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||
|
||||
static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
int histo[]) {
|
||||
const __m128i mults_r = _mm_set1_epi16(CST_5b(red_to_blue));
|
||||
const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_blue));
|
||||
const __m128i mask_g = _mm_set1_epi16((short)0xff00); // green mask
|
||||
const __m128i mask_gb = _mm_set1_epi32(0xffff); // green/blue mask
|
||||
const __m128i mask_b = _mm_set1_epi16(0x00ff); // blue mask
|
||||
const __m128i shuffler_lo = _mm_setr_epi8(-1, 2, -1, 6, -1, 10, -1, 14, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1);
|
||||
const __m128i shuffler_hi = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
2, -1, 6, -1, 10, -1, 14);
|
||||
int y;
|
||||
for (y = 0; y < tile_height; ++y) {
|
||||
const uint32_t* const src = argb + y * stride;
|
||||
int i, x;
|
||||
for (x = 0; x + SPAN <= tile_width; x += SPAN) {
|
||||
uint16_t values[SPAN];
|
||||
const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]);
|
||||
const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]);
|
||||
const __m128i r0 = _mm_shuffle_epi8(in0, shuffler_lo);
|
||||
const __m128i r1 = _mm_shuffle_epi8(in1, shuffler_hi);
|
||||
const __m128i r = _mm_or_si128(r0, r1); // r 0
|
||||
const __m128i gb0 = _mm_and_si128(in0, mask_gb);
|
||||
const __m128i gb1 = _mm_and_si128(in1, mask_gb);
|
||||
const __m128i gb = _mm_packus_epi32(gb0, gb1); // g b
|
||||
const __m128i g = _mm_and_si128(gb, mask_g); // g 0
|
||||
const __m128i A = _mm_mulhi_epi16(r, mults_r); // x dbr
|
||||
const __m128i B = _mm_mulhi_epi16(g, mults_g); // x dbg
|
||||
const __m128i C = _mm_sub_epi8(gb, B); // x b'
|
||||
const __m128i D = _mm_sub_epi8(C, A); // x b''
|
||||
const __m128i E = _mm_and_si128(D, mask_b); // 0 b''
|
||||
_mm_storeu_si128((__m128i*)values, E);
|
||||
for (i = 0; i < SPAN; ++i) ++histo[values[i]];
|
||||
const __m128i mult =
|
||||
MK_CST_16(CST_5b(red_to_blue) + 256,CST_5b(green_to_blue));
|
||||
const __m128i perm =
|
||||
_mm_setr_epi8(-1, 1, -1, 2, -1, 5, -1, 6, -1, 9, -1, 10, -1, 13, -1, 14);
|
||||
if (tile_width >= 4) {
|
||||
int y;
|
||||
for (y = 0; y < tile_height; ++y) {
|
||||
const uint32_t* const src = argb + y * stride;
|
||||
const __m128i A1 = _mm_loadu_si128((const __m128i*)src);
|
||||
const __m128i B1 = _mm_shuffle_epi8(A1, perm);
|
||||
const __m128i C1 = _mm_mulhi_epi16(B1, mult);
|
||||
const __m128i D1 = _mm_sub_epi16(A1, C1);
|
||||
__m128i E = _mm_add_epi16(_mm_srli_epi32(D1, 16), D1);
|
||||
int x;
|
||||
for (x = 4; x + 4 <= tile_width; x += 4) {
|
||||
const __m128i A2 = _mm_loadu_si128((const __m128i*)(src + x));
|
||||
__m128i B2, C2, D2;
|
||||
++histo[_mm_extract_epi8(E, 0)];
|
||||
B2 = _mm_shuffle_epi8(A2, perm);
|
||||
++histo[_mm_extract_epi8(E, 4)];
|
||||
C2 = _mm_mulhi_epi16(B2, mult);
|
||||
++histo[_mm_extract_epi8(E, 8)];
|
||||
D2 = _mm_sub_epi16(A2, C2);
|
||||
++histo[_mm_extract_epi8(E, 12)];
|
||||
E = _mm_add_epi16(_mm_srli_epi32(D2, 16), D2);
|
||||
}
|
||||
++histo[_mm_extract_epi8(E, 0)];
|
||||
++histo[_mm_extract_epi8(E, 4)];
|
||||
++histo[_mm_extract_epi8(E, 8)];
|
||||
++histo[_mm_extract_epi8(E, 12)];
|
||||
}
|
||||
}
|
||||
{
|
||||
const int left_over = tile_width & (SPAN - 1);
|
||||
const int left_over = tile_width & 3;
|
||||
if (left_over > 0) {
|
||||
VP8LCollectColorBlueTransforms_C(argb + tile_width - left_over, stride,
|
||||
left_over, tile_height,
|
||||
@ -95,33 +144,37 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, int histo[]) {
|
||||
const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_red));
|
||||
const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
|
||||
const __m128i mask = _mm_set1_epi16(0xff);
|
||||
|
||||
int y;
|
||||
for (y = 0; y < tile_height; ++y) {
|
||||
const uint32_t* const src = argb + y * stride;
|
||||
int i, x;
|
||||
for (x = 0; x + SPAN <= tile_width; x += SPAN) {
|
||||
uint16_t values[SPAN];
|
||||
const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]);
|
||||
const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]);
|
||||
const __m128i g0 = _mm_and_si128(in0, mask_g); // 0 0 | g 0
|
||||
const __m128i g1 = _mm_and_si128(in1, mask_g);
|
||||
const __m128i g = _mm_packus_epi32(g0, g1); // g 0
|
||||
const __m128i A0 = _mm_srli_epi32(in0, 16); // 0 0 | x r
|
||||
const __m128i A1 = _mm_srli_epi32(in1, 16);
|
||||
const __m128i A = _mm_packus_epi32(A0, A1); // x r
|
||||
const __m128i B = _mm_mulhi_epi16(g, mults_g); // x dr
|
||||
const __m128i C = _mm_sub_epi8(A, B); // x r'
|
||||
const __m128i D = _mm_and_si128(C, mask); // 0 r'
|
||||
_mm_storeu_si128((__m128i*)values, D);
|
||||
for (i = 0; i < SPAN; ++i) ++histo[values[i]];
|
||||
const __m128i mult = MK_CST_16(0, CST_5b(green_to_red));
|
||||
const __m128i mask_g = _mm_set1_epi32(0x0000ff00);
|
||||
if (tile_width >= 4) {
|
||||
int y;
|
||||
for (y = 0; y < tile_height; ++y) {
|
||||
const uint32_t* const src = argb + y * stride;
|
||||
const __m128i A1 = _mm_loadu_si128((const __m128i*)src);
|
||||
const __m128i B1 = _mm_and_si128(A1, mask_g);
|
||||
const __m128i C1 = _mm_madd_epi16(B1, mult);
|
||||
__m128i D = _mm_sub_epi16(A1, C1);
|
||||
int x;
|
||||
for (x = 4; x + 4 <= tile_width; x += 4) {
|
||||
const __m128i A2 = _mm_loadu_si128((const __m128i*)(src + x));
|
||||
__m128i B2, C2;
|
||||
++histo[_mm_extract_epi8(D, 2)];
|
||||
B2 = _mm_and_si128(A2, mask_g);
|
||||
++histo[_mm_extract_epi8(D, 6)];
|
||||
C2 = _mm_madd_epi16(B2, mult);
|
||||
++histo[_mm_extract_epi8(D, 10)];
|
||||
++histo[_mm_extract_epi8(D, 14)];
|
||||
D = _mm_sub_epi16(A2, C2);
|
||||
}
|
||||
++histo[_mm_extract_epi8(D, 2)];
|
||||
++histo[_mm_extract_epi8(D, 6)];
|
||||
++histo[_mm_extract_epi8(D, 10)];
|
||||
++histo[_mm_extract_epi8(D, 14)];
|
||||
}
|
||||
}
|
||||
{
|
||||
const int left_over = tile_width & (SPAN - 1);
|
||||
const int left_over = tile_width & 3;
|
||||
if (left_over > 0) {
|
||||
VP8LCollectColorRedTransforms_C(argb + tile_width - left_over, stride,
|
||||
left_over, tile_height, green_to_red,
|
||||
@ -130,12 +183,16 @@ static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
#undef MK_CST_16
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern void VP8LEncDspInitSSE41(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
|
||||
VP8LExtraCost = ExtraCost_SSE41;
|
||||
VP8LExtraCostCombined = ExtraCostCombined_SSE41;
|
||||
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE41;
|
||||
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE41;
|
||||
VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE41;
|
||||
|
37
3rdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
vendored
37
3rdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
vendored
@ -188,46 +188,51 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
|
||||
return Average2(Average2(a0, a1), Average2(a2, a3));
|
||||
}
|
||||
|
||||
static uint32_t Predictor5_MIPSdspR2(uint32_t left, const uint32_t* const top) {
|
||||
return Average3(left, top[0], top[1]);
|
||||
static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average3(*left, top[0], top[1]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor6_MIPSdspR2(uint32_t left, const uint32_t* const top) {
|
||||
return Average2(left, top[-1]);
|
||||
static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average2(*left, top[-1]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor7_MIPSdspR2(uint32_t left, const uint32_t* const top) {
|
||||
return Average2(left, top[0]);
|
||||
static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average2(*left, top[0]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor8_MIPSdspR2(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)left;
|
||||
return Average2(top[-1], top[0]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor9_MIPSdspR2(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)left;
|
||||
return Average2(top[0], top[1]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor10_MIPSdspR2(uint32_t left,
|
||||
static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average4(left, top[-1], top[0], top[1]);
|
||||
return Average4(*left, top[-1], top[0], top[1]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor11_MIPSdspR2(uint32_t left,
|
||||
static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Select(top[0], left, top[-1]);
|
||||
return Select(top[0], *left, top[-1]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor12_MIPSdspR2(uint32_t left,
|
||||
static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return ClampedAddSubtractFull(left, top[0], top[-1]);
|
||||
return ClampedAddSubtractFull(*left, top[0], top[-1]);
|
||||
}
|
||||
|
||||
static uint32_t Predictor13_MIPSdspR2(uint32_t left,
|
||||
static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return ClampedAddSubtractHalf(left, top[0], top[-1]);
|
||||
return ClampedAddSubtractHalf(*left, top[0], top[-1]);
|
||||
}
|
||||
|
||||
// Add green to blue and red channels (i.e. perform the inverse transform of
|
||||
|
22
3rdparty/libwebp/src/dsp/lossless_neon.c
vendored
22
3rdparty/libwebp/src/dsp/lossless_neon.c
vendored
@ -188,17 +188,21 @@ static WEBP_INLINE uint32_t Average3_NEON(uint32_t a0, uint32_t a1,
|
||||
return avg;
|
||||
}
|
||||
|
||||
static uint32_t Predictor5_NEON(uint32_t left, const uint32_t* const top) {
|
||||
return Average3_NEON(left, top[0], top[1]);
|
||||
static uint32_t Predictor5_NEON(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average3_NEON(*left, top[0], top[1]);
|
||||
}
|
||||
static uint32_t Predictor6_NEON(uint32_t left, const uint32_t* const top) {
|
||||
return Average2_NEON(left, top[-1]);
|
||||
static uint32_t Predictor6_NEON(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average2_NEON(*left, top[-1]);
|
||||
}
|
||||
static uint32_t Predictor7_NEON(uint32_t left, const uint32_t* const top) {
|
||||
return Average2_NEON(left, top[0]);
|
||||
static uint32_t Predictor7_NEON(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return Average2_NEON(*left, top[0]);
|
||||
}
|
||||
static uint32_t Predictor13_NEON(uint32_t left, const uint32_t* const top) {
|
||||
return ClampedAddSubtractHalf_NEON(left, top[0], top[-1]);
|
||||
static uint32_t Predictor13_NEON(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
return ClampedAddSubtractHalf_NEON(*left, top[0], top[-1]);
|
||||
}
|
||||
|
||||
// Batch versions of those functions.
|
||||
@ -494,7 +498,7 @@ static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
|
||||
// non-standard versions there.
|
||||
#if defined(__APPLE__) && defined(__aarch64__) && \
|
||||
#if defined(__APPLE__) && WEBP_AARCH64 && \
|
||||
defined(__apple_build_version__) && (__apple_build_version__< 6020037)
|
||||
#define USE_VTBLQ
|
||||
#endif
|
||||
|
130
3rdparty/libwebp/src/dsp/lossless_sse2.c
vendored
130
3rdparty/libwebp/src/dsp/lossless_sse2.c
vendored
@ -18,7 +18,6 @@
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include <assert.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -28,23 +27,22 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull_SSE2(uint32_t c0,
|
||||
uint32_t c1,
|
||||
uint32_t c2) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
|
||||
const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
|
||||
const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
|
||||
const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c0), zero);
|
||||
const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c1), zero);
|
||||
const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c2), zero);
|
||||
const __m128i V1 = _mm_add_epi16(C0, C1);
|
||||
const __m128i V2 = _mm_sub_epi16(V1, C2);
|
||||
const __m128i b = _mm_packus_epi16(V2, V2);
|
||||
const uint32_t output = _mm_cvtsi128_si32(b);
|
||||
return output;
|
||||
return (uint32_t)_mm_cvtsi128_si32(b);
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0,
|
||||
uint32_t c1,
|
||||
uint32_t c2) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
|
||||
const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
|
||||
const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
|
||||
const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c0), zero);
|
||||
const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c1), zero);
|
||||
const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c2), zero);
|
||||
const __m128i avg = _mm_add_epi16(C1, C0);
|
||||
const __m128i A0 = _mm_srli_epi16(avg, 1);
|
||||
const __m128i A1 = _mm_sub_epi16(A0, B0);
|
||||
@ -53,16 +51,15 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0,
|
||||
const __m128i A3 = _mm_srai_epi16(A2, 1);
|
||||
const __m128i A4 = _mm_add_epi16(A0, A3);
|
||||
const __m128i A5 = _mm_packus_epi16(A4, A4);
|
||||
const uint32_t output = _mm_cvtsi128_si32(A5);
|
||||
return output;
|
||||
return (uint32_t)_mm_cvtsi128_si32(A5);
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t Select_SSE2(uint32_t a, uint32_t b, uint32_t c) {
|
||||
int pa_minus_pb;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i A0 = _mm_cvtsi32_si128(a);
|
||||
const __m128i B0 = _mm_cvtsi32_si128(b);
|
||||
const __m128i C0 = _mm_cvtsi32_si128(c);
|
||||
const __m128i A0 = _mm_cvtsi32_si128((int)a);
|
||||
const __m128i B0 = _mm_cvtsi32_si128((int)b);
|
||||
const __m128i C0 = _mm_cvtsi32_si128((int)c);
|
||||
const __m128i AC0 = _mm_subs_epu8(A0, C0);
|
||||
const __m128i CA0 = _mm_subs_epu8(C0, A0);
|
||||
const __m128i BC0 = _mm_subs_epu8(B0, C0);
|
||||
@ -95,8 +92,8 @@ static WEBP_INLINE void Average2_uint32_SSE2(const uint32_t a0,
|
||||
__m128i* const avg) {
|
||||
// (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1)
|
||||
const __m128i ones = _mm_set1_epi8(1);
|
||||
const __m128i A0 = _mm_cvtsi32_si128(a0);
|
||||
const __m128i A1 = _mm_cvtsi32_si128(a1);
|
||||
const __m128i A0 = _mm_cvtsi32_si128((int)a0);
|
||||
const __m128i A1 = _mm_cvtsi32_si128((int)a1);
|
||||
const __m128i avg1 = _mm_avg_epu8(A0, A1);
|
||||
const __m128i one = _mm_and_si128(_mm_xor_si128(A0, A1), ones);
|
||||
*avg = _mm_sub_epi8(avg1, one);
|
||||
@ -104,8 +101,8 @@ static WEBP_INLINE void Average2_uint32_SSE2(const uint32_t a0,
|
||||
|
||||
static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a0), zero);
|
||||
const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
|
||||
const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a0), zero);
|
||||
const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a1), zero);
|
||||
const __m128i sum = _mm_add_epi16(A1, A0);
|
||||
return _mm_srli_epi16(sum, 1);
|
||||
}
|
||||
@ -113,19 +110,18 @@ static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) {
|
||||
static WEBP_INLINE uint32_t Average2_SSE2(uint32_t a0, uint32_t a1) {
|
||||
__m128i output;
|
||||
Average2_uint32_SSE2(a0, a1, &output);
|
||||
return _mm_cvtsi128_si32(output);
|
||||
return (uint32_t)_mm_cvtsi128_si32(output);
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t Average3_SSE2(uint32_t a0, uint32_t a1,
|
||||
uint32_t a2) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i avg1 = Average2_uint32_16_SSE2(a0, a2);
|
||||
const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
|
||||
const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a1), zero);
|
||||
const __m128i sum = _mm_add_epi16(avg1, A1);
|
||||
const __m128i avg2 = _mm_srli_epi16(sum, 1);
|
||||
const __m128i A2 = _mm_packus_epi16(avg2, avg2);
|
||||
const uint32_t output = _mm_cvtsi128_si32(A2);
|
||||
return output;
|
||||
return (uint32_t)_mm_cvtsi128_si32(A2);
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
|
||||
@ -135,46 +131,54 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
|
||||
const __m128i sum = _mm_add_epi16(avg2, avg1);
|
||||
const __m128i avg3 = _mm_srli_epi16(sum, 1);
|
||||
const __m128i A0 = _mm_packus_epi16(avg3, avg3);
|
||||
const uint32_t output = _mm_cvtsi128_si32(A0);
|
||||
return output;
|
||||
return (uint32_t)_mm_cvtsi128_si32(A0);
|
||||
}
|
||||
|
||||
static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average3_SSE2(left, top[0], top[1]);
|
||||
static uint32_t Predictor5_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average3_SSE2(*left, top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2_SSE2(left, top[-1]);
|
||||
static uint32_t Predictor6_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2_SSE2(*left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average2_SSE2(left, top[0]);
|
||||
static uint32_t Predictor7_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2_SSE2(*left, top[0]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor8_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2_SSE2(top[-1], top[0]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
static uint32_t Predictor9_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average2_SSE2(top[0], top[1]);
|
||||
(void)left;
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Average4_SSE2(left, top[-1], top[0], top[1]);
|
||||
static uint32_t Predictor10_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Average4_SSE2(*left, top[-1], top[0], top[1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = Select_SSE2(top[0], left, top[-1]);
|
||||
static uint32_t Predictor11_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = Select_SSE2(top[0], *left, top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractFull_SSE2(left, top[0], top[-1]);
|
||||
static uint32_t Predictor12_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractFull_SSE2(*left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractHalf_SSE2(left, top[0], top[-1]);
|
||||
static uint32_t Predictor13_SSE2(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
const uint32_t pred = ClampedAddSubtractHalf_SSE2(*left, top[0], top[-1]);
|
||||
return pred;
|
||||
}
|
||||
|
||||
@ -184,7 +188,7 @@ static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) {
|
||||
static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
const __m128i black = _mm_set1_epi32(ARGB_BLACK);
|
||||
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
|
||||
const __m128i res = _mm_add_epi8(src, black);
|
||||
@ -200,7 +204,7 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
__m128i prev = _mm_set1_epi32(out[-1]);
|
||||
__m128i prev = _mm_set1_epi32((int)out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
// a | b | c | d
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
|
||||
@ -277,12 +281,12 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
|
||||
#undef GENERATE_PREDICTOR_2
|
||||
|
||||
// Predictor10: average of (average of (L,TL), average of (T, TR)).
|
||||
#define DO_PRED10(OUT) do { \
|
||||
__m128i avgLTL, avg; \
|
||||
Average2_m128i(&L, &TL, &avgLTL); \
|
||||
Average2_m128i(&avgTTR, &avgLTL, &avg); \
|
||||
L = _mm_add_epi8(avg, src); \
|
||||
out[i + (OUT)] = _mm_cvtsi128_si32(L); \
|
||||
#define DO_PRED10(OUT) do { \
|
||||
__m128i avgLTL, avg; \
|
||||
Average2_m128i(&L, &TL, &avgLTL); \
|
||||
Average2_m128i(&avgTTR, &avgLTL, &avg); \
|
||||
L = _mm_add_epi8(avg, src); \
|
||||
out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(L); \
|
||||
} while (0)
|
||||
|
||||
#define DO_PRED10_SHIFT do { \
|
||||
@ -295,7 +299,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
|
||||
static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
__m128i L = _mm_cvtsi32_si128(out[-1]);
|
||||
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
__m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
|
||||
__m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
|
||||
@ -328,7 +332,7 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
const __m128i B = _mm_andnot_si128(mask, T); \
|
||||
const __m128i pred = _mm_or_si128(A, B); /* pred = (pa > b)? L : T*/ \
|
||||
L = _mm_add_epi8(src, pred); \
|
||||
out[i + (OUT)] = _mm_cvtsi128_si32(L); \
|
||||
out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(L); \
|
||||
} while (0)
|
||||
|
||||
#define DO_PRED11_SHIFT do { \
|
||||
@ -343,7 +347,7 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
__m128i pa;
|
||||
__m128i L = _mm_cvtsi32_si128(out[-1]);
|
||||
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
__m128i T = _mm_loadu_si128((const __m128i*)&upper[i]);
|
||||
__m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
|
||||
@ -376,12 +380,12 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
#undef DO_PRED11_SHIFT
|
||||
|
||||
// Predictor12: ClampedAddSubtractFull.
|
||||
#define DO_PRED12(DIFF, LANE, OUT) do { \
|
||||
const __m128i all = _mm_add_epi16(L, (DIFF)); \
|
||||
const __m128i alls = _mm_packus_epi16(all, all); \
|
||||
const __m128i res = _mm_add_epi8(src, alls); \
|
||||
out[i + (OUT)] = _mm_cvtsi128_si32(res); \
|
||||
L = _mm_unpacklo_epi8(res, zero); \
|
||||
#define DO_PRED12(DIFF, LANE, OUT) do { \
|
||||
const __m128i all = _mm_add_epi16(L, (DIFF)); \
|
||||
const __m128i alls = _mm_packus_epi16(all, all); \
|
||||
const __m128i res = _mm_add_epi8(src, alls); \
|
||||
out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(res); \
|
||||
L = _mm_unpacklo_epi8(res, zero); \
|
||||
} while (0)
|
||||
|
||||
#define DO_PRED12_SHIFT(DIFF, LANE) do { \
|
||||
@ -394,7 +398,7 @@ static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i L8 = _mm_cvtsi32_si128(out[-1]);
|
||||
const __m128i L8 = _mm_cvtsi32_si128((int)out[-1]);
|
||||
__m128i L = _mm_unpacklo_epi8(L8, zero);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
// Load 4 pixels at a time.
|
||||
@ -460,7 +464,7 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
|
||||
const __m128i mults_b2 = MK_CST_16(CST(red_to_blue_), 0);
|
||||
#undef MK_CST_16
|
||||
#undef CST
|
||||
const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
|
||||
const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00); // alpha-green masks
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb
|
||||
@ -524,7 +528,7 @@ static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
|
||||
|
||||
static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu);
|
||||
const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ff);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
while (num_pixels >= 8) {
|
||||
@ -553,7 +557,7 @@ static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
|
||||
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
|
||||
const __m128i mask_0xf0 = _mm_set1_epi8(0xf0);
|
||||
const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
while (num_pixels >= 8) {
|
||||
@ -588,8 +592,8 @@ static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
|
||||
|
||||
static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i mask_0xe0 = _mm_set1_epi8(0xe0);
|
||||
const __m128i mask_0xf8 = _mm_set1_epi8(0xf8);
|
||||
const __m128i mask_0xe0 = _mm_set1_epi8((char)0xe0);
|
||||
const __m128i mask_0xf8 = _mm_set1_epi8((char)0xf8);
|
||||
const __m128i mask_0x07 = _mm_set1_epi8(0x07);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
|
133
3rdparty/libwebp/src/dsp/lossless_sse41.c
vendored
Normal file
133
3rdparty/libwebp/src/dsp/lossless_sse41.c
vendored
Normal file
@ -0,0 +1,133 @@
|
||||
// Copyright 2021 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// SSE41 variant of methods for lossless decoder
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
#include "src/dsp/common_sse41.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color-space conversion functions
|
||||
|
||||
static void TransformColorInverse_SSE41(const VP8LMultipliers* const m,
|
||||
const uint32_t* const src,
|
||||
int num_pixels, uint32_t* dst) {
|
||||
// sign-extended multiplying constants, pre-shifted by 5.
|
||||
#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend
|
||||
const __m128i mults_rb =
|
||||
_mm_set1_epi32((int)((uint32_t)CST(green_to_red_) << 16 |
|
||||
(CST(green_to_blue_) & 0xffff)));
|
||||
const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue_));
|
||||
#undef CST
|
||||
const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00);
|
||||
const __m128i perm1 = _mm_setr_epi8(-1, 1, -1, 1, -1, 5, -1, 5,
|
||||
-1, 9, -1, 9, -1, 13, -1, 13);
|
||||
const __m128i perm2 = _mm_setr_epi8(-1, 2, -1, -1, -1, 6, -1, -1,
|
||||
-1, 10, -1, -1, -1, 14, -1, -1);
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i B = _mm_shuffle_epi8(A, perm1); // argb -> g0g0
|
||||
const __m128i C = _mm_mulhi_epi16(B, mults_rb);
|
||||
const __m128i D = _mm_add_epi8(A, C);
|
||||
const __m128i E = _mm_shuffle_epi8(D, perm2);
|
||||
const __m128i F = _mm_mulhi_epi16(E, mults_b2);
|
||||
const __m128i G = _mm_add_epi8(D, F);
|
||||
const __m128i out = _mm_blendv_epi8(G, A, mask_ag);
|
||||
_mm_storeu_si128((__m128i*)&dst[i], out);
|
||||
}
|
||||
// Fall-back to C-version for left-overs.
|
||||
if (i != num_pixels) {
|
||||
VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#define ARGB_TO_RGB_SSE41 do { \
|
||||
while (num_pixels >= 16) { \
|
||||
const __m128i in0 = _mm_loadu_si128(in + 0); \
|
||||
const __m128i in1 = _mm_loadu_si128(in + 1); \
|
||||
const __m128i in2 = _mm_loadu_si128(in + 2); \
|
||||
const __m128i in3 = _mm_loadu_si128(in + 3); \
|
||||
const __m128i a0 = _mm_shuffle_epi8(in0, perm0); \
|
||||
const __m128i a1 = _mm_shuffle_epi8(in1, perm1); \
|
||||
const __m128i a2 = _mm_shuffle_epi8(in2, perm2); \
|
||||
const __m128i a3 = _mm_shuffle_epi8(in3, perm3); \
|
||||
const __m128i b0 = _mm_blend_epi16(a0, a1, 0xc0); \
|
||||
const __m128i b1 = _mm_blend_epi16(a1, a2, 0xf0); \
|
||||
const __m128i b2 = _mm_blend_epi16(a2, a3, 0xfc); \
|
||||
_mm_storeu_si128(out + 0, b0); \
|
||||
_mm_storeu_si128(out + 1, b1); \
|
||||
_mm_storeu_si128(out + 2, b2); \
|
||||
in += 4; \
|
||||
out += 3; \
|
||||
num_pixels -= 16; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9,
|
||||
8, 14, 13, 12, -1, -1, -1, -1);
|
||||
const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39);
|
||||
const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e);
|
||||
const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93);
|
||||
|
||||
ARGB_TO_RGB_SSE41;
|
||||
|
||||
// left-overs
|
||||
if (num_pixels > 0) {
|
||||
VP8LConvertBGRAToRGB_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_SSE41(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10,
|
||||
12, 13, 14, -1, -1, -1, -1);
|
||||
const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39);
|
||||
const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e);
|
||||
const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93);
|
||||
|
||||
ARGB_TO_RGB_SSE41;
|
||||
|
||||
// left-overs
|
||||
if (num_pixels > 0) {
|
||||
VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
|
||||
}
|
||||
}
|
||||
|
||||
#undef ARGB_TO_RGB_SSE41
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
extern void VP8LDspInitSSE41(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE41(void) {
|
||||
VP8LTransformColorInverse = TransformColorInverse_SSE41;
|
||||
VP8LConvertBGRAToRGB = ConvertBGRAToRGB_SSE41;
|
||||
VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE41;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE41
|
||||
|
||||
WEBP_DSP_INIT_STUB(VP8LDspInitSSE41)
|
||||
|
||||
#endif // WEBP_USE_SSE41
|
37
3rdparty/libwebp/src/dsp/msa_macro.h
vendored
37
3rdparty/libwebp/src/dsp/msa_macro.h
vendored
@ -14,6 +14,10 @@
|
||||
#ifndef WEBP_DSP_MSA_MACRO_H_
|
||||
#define WEBP_DSP_MSA_MACRO_H_
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_MSA)
|
||||
|
||||
#include <stdint.h>
|
||||
#include <msa.h>
|
||||
|
||||
@ -69,27 +73,25 @@
|
||||
#define ST_UW(...) ST_W(v4u32, __VA_ARGS__)
|
||||
#define ST_SW(...) ST_W(v4i32, __VA_ARGS__)
|
||||
|
||||
#define MSA_LOAD_FUNC(TYPE, INSTR, FUNC_NAME) \
|
||||
static inline TYPE FUNC_NAME(const void* const psrc) { \
|
||||
const uint8_t* const psrc_m = (const uint8_t*)psrc; \
|
||||
TYPE val_m; \
|
||||
__asm__ volatile ( \
|
||||
"" #INSTR " %[val_m], %[psrc_m] \n\t" \
|
||||
: [val_m] "=r" (val_m) \
|
||||
: [psrc_m] "m" (*psrc_m)); \
|
||||
return val_m; \
|
||||
#define MSA_LOAD_FUNC(TYPE, INSTR, FUNC_NAME) \
|
||||
static inline TYPE FUNC_NAME(const void* const psrc) { \
|
||||
const uint8_t* const psrc_m = (const uint8_t*)psrc; \
|
||||
TYPE val_m; \
|
||||
__asm__ volatile("" #INSTR " %[val_m], %[psrc_m] \n\t" \
|
||||
: [val_m] "=r"(val_m) \
|
||||
: [psrc_m] "m"(*psrc_m)); \
|
||||
return val_m; \
|
||||
}
|
||||
|
||||
#define MSA_LOAD(psrc, FUNC_NAME) FUNC_NAME(psrc)
|
||||
|
||||
#define MSA_STORE_FUNC(TYPE, INSTR, FUNC_NAME) \
|
||||
static inline void FUNC_NAME(TYPE val, void* const pdst) { \
|
||||
uint8_t* const pdst_m = (uint8_t*)pdst; \
|
||||
TYPE val_m = val; \
|
||||
__asm__ volatile ( \
|
||||
" " #INSTR " %[val_m], %[pdst_m] \n\t" \
|
||||
: [pdst_m] "=m" (*pdst_m) \
|
||||
: [val_m] "r" (val_m)); \
|
||||
#define MSA_STORE_FUNC(TYPE, INSTR, FUNC_NAME) \
|
||||
static inline void FUNC_NAME(TYPE val, void* const pdst) { \
|
||||
uint8_t* const pdst_m = (uint8_t*)pdst; \
|
||||
TYPE val_m = val; \
|
||||
__asm__ volatile(" " #INSTR " %[val_m], %[pdst_m] \n\t" \
|
||||
: [pdst_m] "=m"(*pdst_m) \
|
||||
: [val_m] "r"(val_m)); \
|
||||
}
|
||||
|
||||
#define MSA_STORE(val, pdst, FUNC_NAME) FUNC_NAME(val, pdst)
|
||||
@ -1389,4 +1391,5 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) {
|
||||
} while (0)
|
||||
#define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
|
||||
|
||||
#endif // WEBP_USE_MSA
|
||||
#endif // WEBP_DSP_MSA_MACRO_H_
|
||||
|
11
3rdparty/libwebp/src/dsp/neon.h
vendored
11
3rdparty/libwebp/src/dsp/neon.h
vendored
@ -12,14 +12,16 @@
|
||||
#ifndef WEBP_DSP_NEON_H_
|
||||
#define WEBP_DSP_NEON_H_
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
// Right now, some intrinsics functions seem slower, so we disable them
|
||||
// everywhere except newer clang/gcc or aarch64 where the inline assembly is
|
||||
// incompatible.
|
||||
#if LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,9) || defined(__aarch64__)
|
||||
#if LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 9) || WEBP_AARCH64
|
||||
#define WEBP_USE_INTRINSICS // use intrinsics when possible
|
||||
#endif
|
||||
|
||||
@ -44,7 +46,7 @@
|
||||
// if using intrinsics, this flag avoids some functions that make gcc-4.6.3
|
||||
// crash ("internal compiler error: in immed_double_const, at emit-rtl.").
|
||||
// (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183)
|
||||
#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
|
||||
#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64)
|
||||
#define WORK_AROUND_GCC
|
||||
#endif
|
||||
|
||||
@ -98,4 +100,5 @@ static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) {
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#endif // WEBP_USE_NEON
|
||||
#endif // WEBP_DSP_NEON_H_
|
||||
|
16
3rdparty/libwebp/src/dsp/quant.h
vendored
16
3rdparty/libwebp/src/dsp/quant.h
vendored
@ -21,18 +21,24 @@
|
||||
|
||||
#define IsFlat IsFlat_NEON
|
||||
|
||||
static uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) {
|
||||
static uint32_t horizontal_add_uint32x4(const uint32x4_t a) {
|
||||
#if WEBP_AARCH64
|
||||
return vaddvq_u32(a);
|
||||
#else
|
||||
const uint64x2_t b = vpaddlq_u32(a);
|
||||
return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
|
||||
vreinterpret_u32_u64(vget_high_u64(b)));
|
||||
const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
|
||||
vreinterpret_u32_u64(vget_high_u64(b)));
|
||||
return vget_lane_u32(c, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
|
||||
int thresh) {
|
||||
const int16x8_t tst_ones = vdupq_n_s16(-1);
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int i;
|
||||
|
||||
for (int i = 0; i < num_blocks; ++i) {
|
||||
for (i = 0; i < num_blocks; ++i) {
|
||||
// Set DC to zero.
|
||||
const int16x8_t a_0 = vsetq_lane_s16(0, vld1q_s16(levels), 0);
|
||||
const int16x8_t a_1 = vld1q_s16(levels + 8);
|
||||
@ -45,7 +51,7 @@ static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
|
||||
|
||||
levels += 16;
|
||||
}
|
||||
return thresh >= (int32_t)vget_lane_u32(horizontal_add_uint32x4(sum), 0);
|
||||
return thresh >= (int)horizontal_add_uint32x4(sum);
|
||||
}
|
||||
|
||||
#else
|
||||
|
12
3rdparty/libwebp/src/dsp/rescaler.c
vendored
12
3rdparty/libwebp/src/dsp/rescaler.c
vendored
@ -38,8 +38,9 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk,
|
||||
int x_out = channel;
|
||||
// simple bilinear interpolation
|
||||
int accum = wrk->x_add;
|
||||
int left = src[x_in];
|
||||
int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left;
|
||||
rescaler_t left = (rescaler_t)src[x_in];
|
||||
rescaler_t right =
|
||||
(wrk->src_width > 1) ? (rescaler_t)src[x_in + x_stride] : left;
|
||||
x_in += x_stride;
|
||||
while (1) {
|
||||
wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
|
||||
@ -50,7 +51,7 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk,
|
||||
left = right;
|
||||
x_in += x_stride;
|
||||
assert(x_in < wrk->src_width * x_stride);
|
||||
right = src[x_in];
|
||||
right = (rescaler_t)src[x_in];
|
||||
accum += wrk->x_add;
|
||||
}
|
||||
}
|
||||
@ -196,6 +197,7 @@ WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
|
||||
WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
|
||||
WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void WebPRescalerDspInitSSE2(void);
|
||||
extern void WebPRescalerDspInitMIPS32(void);
|
||||
extern void WebPRescalerDspInitMIPSdspR2(void);
|
||||
@ -213,7 +215,7 @@ WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) {
|
||||
WebPRescalerImportRowShrink = WebPRescalerImportRowShrink_C;
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
WebPRescalerDspInitSSE2();
|
||||
}
|
||||
@ -235,7 +237,7 @@ WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
WebPRescalerDspInitNEON();
|
||||
|
6
3rdparty/libwebp/src/dsp/rescaler_sse2.c
vendored
6
3rdparty/libwebp/src/dsp/rescaler_sse2.c
vendored
@ -85,7 +85,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
||||
const __m128i mult = _mm_cvtsi32_si128(((x_add - accum) << 16) | accum);
|
||||
const __m128i out = _mm_madd_epi16(cur_pixels, mult);
|
||||
assert(sizeof(*frow) == sizeof(uint32_t));
|
||||
WebPUint32ToMem((uint8_t*)frow, _mm_cvtsi128_si32(out));
|
||||
WebPInt32ToMem((uint8_t*)frow, _mm_cvtsi128_si32(out));
|
||||
frow += 1;
|
||||
if (frow >= frow_end) break;
|
||||
accum -= wrk->x_sub;
|
||||
@ -132,7 +132,7 @@ static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk,
|
||||
__m128i base = zero;
|
||||
accum += wrk->x_add;
|
||||
while (accum > 0) {
|
||||
const __m128i A = _mm_cvtsi32_si128(WebPMemToUint32(src));
|
||||
const __m128i A = _mm_cvtsi32_si128(WebPMemToInt32(src));
|
||||
src += 4;
|
||||
base = _mm_unpacklo_epi8(A, zero);
|
||||
// To avoid overflow, we need: base * x_add / x_sub < 32768
|
||||
@ -198,7 +198,7 @@ static WEBP_INLINE void ProcessRow_SSE2(const __m128i* const A0,
|
||||
const __m128i* const mult,
|
||||
uint8_t* const dst) {
|
||||
const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
|
||||
const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0);
|
||||
const __m128i mask = _mm_set_epi32(~0, 0, ~0, 0);
|
||||
const __m128i B0 = _mm_mul_epu32(*A0, *mult);
|
||||
const __m128i B1 = _mm_mul_epu32(*A1, *mult);
|
||||
const __m128i B2 = _mm_mul_epu32(*A2, *mult);
|
||||
|
3
3rdparty/libwebp/src/dsp/ssim.c
vendored
3
3rdparty/libwebp/src/dsp/ssim.c
vendored
@ -137,6 +137,7 @@ VP8SSIMGetClippedFunc VP8SSIMGetClipped;
|
||||
VP8AccumulateSSEFunc VP8AccumulateSSE;
|
||||
#endif
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8SSIMDspInitSSE2(void);
|
||||
|
||||
WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) {
|
||||
@ -150,7 +151,7 @@ WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) {
|
||||
#endif
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
VP8SSIMDspInitSSE2();
|
||||
}
|
||||
|
11
3rdparty/libwebp/src/dsp/upsampling.c
vendored
11
3rdparty/libwebp/src/dsp/upsampling.c
vendored
@ -215,6 +215,7 @@ static void EmptyYuv444Func(const uint8_t* y,
|
||||
|
||||
WebPYUV444Converter WebPYUV444Converters[MODE_LAST];
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void WebPInitYUV444ConvertersMIPSdspR2(void);
|
||||
extern void WebPInitYUV444ConvertersSSE2(void);
|
||||
extern void WebPInitYUV444ConvertersSSE41(void);
|
||||
@ -233,12 +234,12 @@ WEBP_DSP_INIT_FUNC(WebPInitYUV444Converters) {
|
||||
WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444_C;
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
WebPInitYUV444ConvertersSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
WebPInitYUV444ConvertersSSE41();
|
||||
}
|
||||
@ -278,12 +279,12 @@ WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) {
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
WebPInitUpsamplersSSE2();
|
||||
}
|
||||
#endif
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
WebPInitUpsamplersSSE41();
|
||||
}
|
||||
@ -300,7 +301,7 @@ WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
WebPInitUpsamplersNEON();
|
||||
|
2
3rdparty/libwebp/src/dsp/upsampling_neon.c
vendored
2
3rdparty/libwebp/src/dsp/upsampling_neon.c
vendored
@ -111,7 +111,7 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
|
||||
vst4_u8(out, v255_r_g_b); \
|
||||
} while (0)
|
||||
|
||||
#if !defined(WEBP_SWAP_16BIT_CSP)
|
||||
#if (WEBP_SWAP_16BIT_CSP == 0)
|
||||
#define ZIP_U8(lo, hi) vzip_u8((lo), (hi))
|
||||
#else
|
||||
#define ZIP_U8(lo, hi) vzip_u8((hi), (lo))
|
||||
|
2
3rdparty/libwebp/src/dsp/upsampling_sse2.c
vendored
2
3rdparty/libwebp/src/dsp/upsampling_sse2.c
vendored
@ -121,7 +121,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
int uv_pos, pos; \
|
||||
/* 16byte-aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[14 * 32 + 15] = { 0 }; \
|
||||
uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
|
||||
uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~(uintptr_t)15); \
|
||||
uint8_t* const r_v = r_u + 32; \
|
||||
\
|
||||
assert(top_y != NULL); \
|
||||
|
85
3rdparty/libwebp/src/dsp/yuv.c
vendored
85
3rdparty/libwebp/src/dsp/yuv.c
vendored
@ -70,6 +70,7 @@ void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
|
||||
|
||||
WebPSamplerRowFunc WebPSamplers[MODE_LAST];
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void WebPInitSamplersSSE2(void);
|
||||
extern void WebPInitSamplersSSE41(void);
|
||||
extern void WebPInitSamplersMIPS32(void);
|
||||
@ -90,16 +91,16 @@ WEBP_DSP_INIT_FUNC(WebPInitSamplers) {
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
WebPInitSamplersSSE2();
|
||||
}
|
||||
#endif // WEBP_USE_SSE2
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
#endif // WEBP_HAVE_SSE2
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
WebPInitSamplersSSE41();
|
||||
}
|
||||
#endif // WEBP_USE_SSE41
|
||||
#endif // WEBP_HAVE_SSE41
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
if (VP8GetCPUInfo(kMIPS32)) {
|
||||
WebPInitSamplersMIPS32();
|
||||
@ -194,50 +195,6 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
|
||||
static uint16_t clip_y(int v) {
|
||||
return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len) {
|
||||
uint64_t diff = 0;
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)dst[i] + diff_y;
|
||||
dst[i] = clip_y(new_y);
|
||||
diff += (uint64_t)abs(diff_y);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i, ++A, ++B) {
|
||||
const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
|
||||
const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
|
||||
out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
|
||||
out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#undef MAX_Y
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
|
||||
void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
|
||||
void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
|
||||
@ -247,18 +204,9 @@ void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
|
||||
void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store);
|
||||
|
||||
uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len);
|
||||
void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len);
|
||||
void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out);
|
||||
|
||||
extern void WebPInitConvertARGBToYUVSSE2(void);
|
||||
extern void WebPInitConvertARGBToYUVSSE41(void);
|
||||
extern void WebPInitConvertARGBToYUVNEON(void);
|
||||
extern void WebPInitSharpYUVSSE2(void);
|
||||
extern void WebPInitSharpYUVNEON(void);
|
||||
|
||||
WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
|
||||
WebPConvertARGBToY = ConvertARGBToY_C;
|
||||
@ -269,40 +217,29 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
|
||||
|
||||
WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPSharpYUVUpdateY = SharpYUVUpdateY_C;
|
||||
WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C;
|
||||
WebPSharpYUVFilterRow = SharpYUVFilterRow_C;
|
||||
#endif
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
WebPInitConvertARGBToYUVSSE2();
|
||||
WebPInitSharpYUVSSE2();
|
||||
}
|
||||
#endif // WEBP_USE_SSE2
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
#endif // WEBP_HAVE_SSE2
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
if (VP8GetCPUInfo(kSSE4_1)) {
|
||||
WebPInitConvertARGBToYUVSSE41();
|
||||
}
|
||||
#endif // WEBP_USE_SSE41
|
||||
#endif // WEBP_HAVE_SSE41
|
||||
}
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
WebPInitConvertARGBToYUVNEON();
|
||||
WebPInitSharpYUVNEON();
|
||||
}
|
||||
#endif // WEBP_USE_NEON
|
||||
#endif // WEBP_HAVE_NEON
|
||||
|
||||
assert(WebPConvertARGBToY != NULL);
|
||||
assert(WebPConvertARGBToUV != NULL);
|
||||
assert(WebPConvertRGB24ToY != NULL);
|
||||
assert(WebPConvertBGR24ToY != NULL);
|
||||
assert(WebPConvertRGBA32ToUV != NULL);
|
||||
assert(WebPSharpYUVUpdateY != NULL);
|
||||
assert(WebPSharpYUVUpdateRGB != NULL);
|
||||
assert(WebPSharpYUVFilterRow != NULL);
|
||||
}
|
||||
|
2
3rdparty/libwebp/src/dsp/yuv.h
vendored
2
3rdparty/libwebp/src/dsp/yuv.h
vendored
@ -10,7 +10,7 @@
|
||||
// inline YUV<->RGB conversion function
|
||||
//
|
||||
// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
|
||||
// More information at: http://en.wikipedia.org/wiki/YCbCr
|
||||
// More information at: https://en.wikipedia.org/wiki/YCbCr
|
||||
// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
|
||||
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
|
||||
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
|
||||
|
108
3rdparty/libwebp/src/dsp/yuv_neon.c
vendored
108
3rdparty/libwebp/src/dsp/yuv_neon.c
vendored
@ -173,116 +173,8 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) {
|
||||
WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
|
||||
static uint16_t clip_y_NEON(int v) {
|
||||
return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len) {
|
||||
int i;
|
||||
const int16x8_t zero = vdupq_n_s16(0);
|
||||
const int16x8_t max = vdupq_n_s16(MAX_Y);
|
||||
uint64x2_t sum = vdupq_n_u64(0);
|
||||
uint64_t diff;
|
||||
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
|
||||
const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
|
||||
const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
|
||||
const int16x8_t D = vsubq_s16(A, B); // diff_y
|
||||
const int16x8_t F = vaddq_s16(C, D); // new_y
|
||||
const uint16x8_t H =
|
||||
vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
|
||||
const int16x8_t I = vabsq_s16(D); // abs(diff_y)
|
||||
vst1q_u16(dst + i, H);
|
||||
sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
|
||||
}
|
||||
diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
|
||||
for (; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)(dst[i]) + diff_y;
|
||||
dst[i] = clip_y_NEON(new_y);
|
||||
diff += (uint64_t)(abs(diff_y));
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t A = vld1q_s16(ref + i);
|
||||
const int16x8_t B = vld1q_s16(src + i);
|
||||
const int16x8_t C = vld1q_s16(dst + i);
|
||||
const int16x8_t D = vsubq_s16(A, B); // diff_uv
|
||||
const int16x8_t E = vaddq_s16(C, D); // new_uv
|
||||
vst1q_s16(dst + i, E);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out) {
|
||||
int i;
|
||||
const int16x8_t max = vdupq_n_s16(MAX_Y);
|
||||
const int16x8_t zero = vdupq_n_s16(0);
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t a0 = vld1q_s16(A + i + 0);
|
||||
const int16x8_t a1 = vld1q_s16(A + i + 1);
|
||||
const int16x8_t b0 = vld1q_s16(B + i + 0);
|
||||
const int16x8_t b1 = vld1q_s16(B + i + 1);
|
||||
const int16x8_t a0b1 = vaddq_s16(a0, b1);
|
||||
const int16x8_t a1b0 = vaddq_s16(a1, b0);
|
||||
const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1)
|
||||
const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0)
|
||||
const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
|
||||
const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
|
||||
const int16x8_t d0 = vaddq_s16(c1, a0);
|
||||
const int16x8_t d1 = vaddq_s16(c0, a1);
|
||||
const int16x8_t e0 = vrshrq_n_s16(d0, 1);
|
||||
const int16x8_t e1 = vrshrq_n_s16(d1, 1);
|
||||
const int16x8x2_t f = vzipq_s16(e0, e1);
|
||||
const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
|
||||
const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
|
||||
const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
|
||||
const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
|
||||
const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
|
||||
const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
|
||||
vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
|
||||
vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0);
|
||||
out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1);
|
||||
}
|
||||
}
|
||||
#undef MAX_Y
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void WebPInitSharpYUVNEON(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) {
|
||||
WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON;
|
||||
WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON;
|
||||
WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_NEON
|
||||
|
||||
WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON)
|
||||
WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON)
|
||||
|
||||
#endif // WEBP_USE_NEON
|
||||
|
132
3rdparty/libwebp/src/dsp/yuv_sse2.c
vendored
132
3rdparty/libwebp/src/dsp/yuv_sse2.c
vendored
@ -15,10 +15,12 @@
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include <stdlib.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
|
||||
|
||||
@ -74,7 +76,7 @@ static WEBP_INLINE __m128i Load_HI_16_SSE2(const uint8_t* src) {
|
||||
// Load and replicate the U/V samples
|
||||
static WEBP_INLINE __m128i Load_UV_HI_8_SSE2(const uint8_t* src) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src);
|
||||
const __m128i tmp0 = _mm_cvtsi32_si128(WebPMemToInt32(src));
|
||||
const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0);
|
||||
return _mm_unpacklo_epi16(tmp1, tmp1); // replicate samples
|
||||
}
|
||||
@ -130,7 +132,7 @@ static WEBP_INLINE void PackAndStore4444_SSE2(const __m128i* const R,
|
||||
const __m128i rg0 = _mm_packus_epi16(*B, *A);
|
||||
const __m128i ba0 = _mm_packus_epi16(*R, *G);
|
||||
#endif
|
||||
const __m128i mask_0xf0 = _mm_set1_epi8(0xf0);
|
||||
const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0);
|
||||
const __m128i rb1 = _mm_unpacklo_epi8(rg0, ba0); // rbrbrbrbrb...
|
||||
const __m128i ga1 = _mm_unpackhi_epi8(rg0, ba0); // gagagagaga...
|
||||
const __m128i rb2 = _mm_and_si128(rb1, mask_0xf0);
|
||||
@ -147,9 +149,10 @@ static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R,
|
||||
const __m128i r0 = _mm_packus_epi16(*R, *R);
|
||||
const __m128i g0 = _mm_packus_epi16(*G, *G);
|
||||
const __m128i b0 = _mm_packus_epi16(*B, *B);
|
||||
const __m128i r1 = _mm_and_si128(r0, _mm_set1_epi8(0xf8));
|
||||
const __m128i r1 = _mm_and_si128(r0, _mm_set1_epi8((char)0xf8));
|
||||
const __m128i b1 = _mm_and_si128(_mm_srli_epi16(b0, 3), _mm_set1_epi8(0x1f));
|
||||
const __m128i g1 = _mm_srli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0xe0)), 5);
|
||||
const __m128i g1 =
|
||||
_mm_srli_epi16(_mm_and_si128(g0, _mm_set1_epi8((char)0xe0)), 5);
|
||||
const __m128i g2 = _mm_slli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0x1c)), 3);
|
||||
const __m128i rg = _mm_or_si128(r1, g1);
|
||||
const __m128i gb = _mm_or_si128(g2, b1);
|
||||
@ -747,128 +750,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) {
|
||||
WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
|
||||
static uint16_t clip_y(int v) {
|
||||
return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYUVUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len) {
|
||||
uint64_t diff = 0;
|
||||
uint32_t tmp[4];
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max = _mm_set1_epi16(MAX_Y);
|
||||
const __m128i one = _mm_set1_epi16(1);
|
||||
__m128i sum = zero;
|
||||
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
|
||||
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
|
||||
const __m128i D = _mm_sub_epi16(A, B); // diff_y
|
||||
const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0)
|
||||
const __m128i F = _mm_add_epi16(C, D); // new_y
|
||||
const __m128i G = _mm_or_si128(E, one); // -1 or 1
|
||||
const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
|
||||
const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...))
|
||||
_mm_storeu_si128((__m128i*)(dst + i), H);
|
||||
sum = _mm_add_epi32(sum, I);
|
||||
}
|
||||
_mm_storeu_si128((__m128i*)tmp, sum);
|
||||
diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
|
||||
for (; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)dst[i] + diff_y;
|
||||
dst[i] = clip_y(new_y);
|
||||
diff += (uint64_t)abs(diff_y);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYUVUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i = 0;
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
|
||||
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
|
||||
const __m128i D = _mm_sub_epi16(A, B); // diff_uv
|
||||
const __m128i E = _mm_add_epi16(C, D); // new_uv
|
||||
_mm_storeu_si128((__m128i*)(dst + i), E);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYUVFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out) {
|
||||
int i;
|
||||
const __m128i kCst8 = _mm_set1_epi16(8);
|
||||
const __m128i max = _mm_set1_epi16(MAX_Y);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
|
||||
const __m128i a0b1 = _mm_add_epi16(a0, b1);
|
||||
const __m128i a1b0 = _mm_add_epi16(a1, b0);
|
||||
const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
|
||||
const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1)
|
||||
const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0)
|
||||
const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
|
||||
const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
|
||||
const __m128i d0 = _mm_add_epi16(c1, a0);
|
||||
const __m128i d1 = _mm_add_epi16(c0, a1);
|
||||
const __m128i e0 = _mm_srai_epi16(d0, 1);
|
||||
const __m128i e1 = _mm_srai_epi16(d1, 1);
|
||||
const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
|
||||
const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
|
||||
const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
|
||||
const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
|
||||
const __m128i h0 = _mm_add_epi16(g0, f0);
|
||||
const __m128i h1 = _mm_add_epi16(g1, f1);
|
||||
const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
|
||||
const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
// (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
|
||||
// = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
|
||||
// We reuse the common sub-expressions.
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
|
||||
out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
|
||||
}
|
||||
}
|
||||
|
||||
#undef MAX_Y
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void WebPInitSharpYUVSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVSSE2(void) {
|
||||
WebPSharpYUVUpdateY = SharpYUVUpdateY_SSE2;
|
||||
WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_SSE2;
|
||||
WebPSharpYUVFilterRow = SharpYUVFilterRow_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2)
|
||||
WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2)
|
||||
WEBP_DSP_INIT_STUB(WebPInitSharpYUVSSE2)
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
||||
|
6
3rdparty/libwebp/src/dsp/yuv_sse41.c
vendored
6
3rdparty/libwebp/src/dsp/yuv_sse41.c
vendored
@ -15,10 +15,12 @@
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
#include "src/dsp/common_sse41.h"
|
||||
#include <stdlib.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include "src/dsp/common_sse41.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
|
||||
|
||||
@ -74,7 +76,7 @@ static WEBP_INLINE __m128i Load_HI_16_SSE41(const uint8_t* src) {
|
||||
// Load and replicate the U/V samples
|
||||
static WEBP_INLINE __m128i Load_UV_HI_8_SSE41(const uint8_t* src) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src);
|
||||
const __m128i tmp0 = _mm_cvtsi32_si128(WebPMemToInt32(src));
|
||||
const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0);
|
||||
return _mm_unpacklo_epi16(tmp1, tmp1); // replicate samples
|
||||
}
|
||||
|
32
3rdparty/libwebp/src/enc/alpha_enc.c
vendored
32
3rdparty/libwebp/src/enc/alpha_enc.c
vendored
@ -13,6 +13,7 @@
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
@ -54,7 +55,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
|
||||
WebPConfig config;
|
||||
WebPPicture picture;
|
||||
|
||||
WebPPictureInit(&picture);
|
||||
if (!WebPPictureInit(&picture)) return 0;
|
||||
picture.width = width;
|
||||
picture.height = height;
|
||||
picture.use_argb = 1;
|
||||
@ -86,7 +87,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
|
||||
// a decoder bug related to alpha with color cache.
|
||||
// See: https://code.google.com/p/webp/issues/detail?id=239
|
||||
// Need to re-enable this later.
|
||||
ok = (VP8LEncodeStream(&config, &picture, bw, 0 /*use_cache*/) == VP8_ENC_OK);
|
||||
ok = VP8LEncodeStream(&config, &picture, bw, /*use_cache=*/0);
|
||||
WebPPictureFree(&picture);
|
||||
ok = ok && !bw->error_;
|
||||
if (!ok) {
|
||||
@ -140,6 +141,11 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
|
||||
!reduce_levels, &tmp_bw, &result->stats);
|
||||
if (ok) {
|
||||
output = VP8LBitWriterFinish(&tmp_bw);
|
||||
if (tmp_bw.error_) {
|
||||
VP8LBitWriterWipeOut(&tmp_bw);
|
||||
memset(&result->bw, 0, sizeof(result->bw));
|
||||
return 0;
|
||||
}
|
||||
output_size = VP8LBitWriterNumBytes(&tmp_bw);
|
||||
if (output_size > data_size) {
|
||||
// compressed size is larger than source! Revert to uncompressed mode.
|
||||
@ -148,6 +154,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
|
||||
}
|
||||
} else {
|
||||
VP8LBitWriterWipeOut(&tmp_bw);
|
||||
memset(&result->bw, 0, sizeof(result->bw));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -162,7 +169,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
|
||||
header = method | (filter << 2);
|
||||
if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4;
|
||||
|
||||
VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size);
|
||||
if (!VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size)) ok = 0;
|
||||
ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN);
|
||||
ok = ok && VP8BitWriterAppend(&result->bw, output, output_size);
|
||||
|
||||
@ -303,7 +310,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
|
||||
int ok = 1;
|
||||
const int reduce_levels = (quality < 100);
|
||||
|
||||
// quick sanity checks
|
||||
// quick correctness checks
|
||||
assert((uint64_t)data_size == (uint64_t)width * height); // as per spec
|
||||
assert(enc != NULL && pic != NULL && pic->a != NULL);
|
||||
assert(output != NULL && output_size != NULL);
|
||||
@ -312,11 +319,11 @@ static int EncodeAlpha(VP8Encoder* const enc,
|
||||
assert(filter >= WEBP_FILTER_NONE && filter <= WEBP_FILTER_FAST);
|
||||
|
||||
if (quality < 0 || quality > 100) {
|
||||
return 0;
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
|
||||
}
|
||||
|
||||
if (method < ALPHA_NO_COMPRESSION || method > ALPHA_LOSSLESS_COMPRESSION) {
|
||||
return 0;
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
|
||||
}
|
||||
|
||||
if (method == ALPHA_NO_COMPRESSION) {
|
||||
@ -326,7 +333,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
|
||||
|
||||
quant_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size);
|
||||
if (quant_alpha == NULL) {
|
||||
return 0;
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
|
||||
// Extract alpha data (width x height) from raw_data (stride x height).
|
||||
@ -346,6 +353,9 @@ static int EncodeAlpha(VP8Encoder* const enc,
|
||||
ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method,
|
||||
filter, reduce_levels, effort_level, output,
|
||||
output_size, pic->stats);
|
||||
if (!ok) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise
|
||||
}
|
||||
#if !defined(WEBP_DISABLE_STATS)
|
||||
if (pic->stats != NULL) { // need stats?
|
||||
pic->stats->coded_size += (int)(*output_size);
|
||||
@ -361,7 +371,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
|
||||
//------------------------------------------------------------------------------
|
||||
// Main calls
|
||||
|
||||
static int CompressAlphaJob(void* arg1, void* dummy) {
|
||||
static int CompressAlphaJob(void* arg1, void* unused) {
|
||||
VP8Encoder* const enc = (VP8Encoder*)arg1;
|
||||
const WebPConfig* config = enc->config_;
|
||||
uint8_t* alpha_data = NULL;
|
||||
@ -375,13 +385,13 @@ static int CompressAlphaJob(void* arg1, void* dummy) {
|
||||
filter, effort_level, &alpha_data, &alpha_size)) {
|
||||
return 0;
|
||||
}
|
||||
if (alpha_size != (uint32_t)alpha_size) { // Sanity check.
|
||||
if (alpha_size != (uint32_t)alpha_size) { // Soundness check.
|
||||
WebPSafeFree(alpha_data);
|
||||
return 0;
|
||||
}
|
||||
enc->alpha_data_size_ = (uint32_t)alpha_size;
|
||||
enc->alpha_data_ = alpha_data;
|
||||
(void)dummy;
|
||||
(void)unused;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -405,7 +415,7 @@ int VP8EncStartAlpha(VP8Encoder* const enc) {
|
||||
WebPWorker* const worker = &enc->alpha_worker_;
|
||||
// Makes sure worker is good to go.
|
||||
if (!WebPGetWorkerInterface()->Reset(worker)) {
|
||||
return 0;
|
||||
return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
WebPGetWorkerInterface()->Launch(worker);
|
||||
return 1;
|
||||
|
12
3rdparty/libwebp/src/enc/analysis_enc.c
vendored
12
3rdparty/libwebp/src/enc/analysis_enc.c
vendored
@ -391,12 +391,14 @@ static int DoSegmentsJob(void* arg1, void* arg2) {
|
||||
return ok;
|
||||
}
|
||||
|
||||
#ifdef WEBP_USE_THREAD
|
||||
static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {
|
||||
int i;
|
||||
for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i];
|
||||
dst->alpha += src->alpha;
|
||||
dst->uv_alpha += src->uv_alpha;
|
||||
}
|
||||
#endif
|
||||
|
||||
// initialize the job struct with some tasks to perform
|
||||
static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
|
||||
@ -425,10 +427,10 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
|
||||
(enc->method_ <= 1); // for method 0 - 1, we need preds_[] to be filled.
|
||||
if (do_segments) {
|
||||
const int last_row = enc->mb_h_;
|
||||
// We give a little more than a half work to the main thread.
|
||||
const int split_row = (9 * last_row + 15) >> 4;
|
||||
const int total_mb = last_row * enc->mb_w_;
|
||||
#ifdef WEBP_USE_THREAD
|
||||
// We give a little more than a half work to the main thread.
|
||||
const int split_row = (9 * last_row + 15) >> 4;
|
||||
const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it
|
||||
const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow);
|
||||
#else
|
||||
@ -438,6 +440,7 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
|
||||
WebPGetWorkerInterface();
|
||||
SegmentJob main_job;
|
||||
if (do_mt) {
|
||||
#ifdef WEBP_USE_THREAD
|
||||
SegmentJob side_job;
|
||||
// Note the use of '&' instead of '&&' because we must call the functions
|
||||
// no matter what.
|
||||
@ -455,6 +458,7 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
|
||||
}
|
||||
worker_interface->End(&side_job.worker);
|
||||
if (ok) MergeJobs(&side_job, &main_job); // merge results together
|
||||
#endif // WEBP_USE_THREAD
|
||||
} else {
|
||||
// Even for single-thread case, we use the generic Worker tools.
|
||||
InitSegmentJob(enc, &main_job, 0, last_row);
|
||||
@ -470,6 +474,10 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
|
||||
} else { // Use only one default segment.
|
||||
ResetAllMBInfo(enc);
|
||||
}
|
||||
if (!ok) {
|
||||
return WebPEncodingSetError(enc->pic_,
|
||||
VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
|
@ -15,10 +15,11 @@
|
||||
//
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/enc/backward_references_enc.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/utils/color_cache_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
@ -30,15 +31,15 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
|
||||
const PixOrCopy v);
|
||||
|
||||
typedef struct {
|
||||
double alpha_[VALUES_IN_BYTE];
|
||||
double red_[VALUES_IN_BYTE];
|
||||
double blue_[VALUES_IN_BYTE];
|
||||
double distance_[NUM_DISTANCE_CODES];
|
||||
double* literal_;
|
||||
float alpha_[VALUES_IN_BYTE];
|
||||
float red_[VALUES_IN_BYTE];
|
||||
float blue_[VALUES_IN_BYTE];
|
||||
float distance_[NUM_DISTANCE_CODES];
|
||||
float* literal_;
|
||||
} CostModel;
|
||||
|
||||
static void ConvertPopulationCountTableToBitEstimates(
|
||||
int num_symbols, const uint32_t population_counts[], double output[]) {
|
||||
int num_symbols, const uint32_t population_counts[], float output[]) {
|
||||
uint32_t sum = 0;
|
||||
int nonzeros = 0;
|
||||
int i;
|
||||
@ -51,7 +52,7 @@ static void ConvertPopulationCountTableToBitEstimates(
|
||||
if (nonzeros <= 1) {
|
||||
memset(output, 0, num_symbols * sizeof(*output));
|
||||
} else {
|
||||
const double logsum = VP8LFastLog2(sum);
|
||||
const float logsum = VP8LFastLog2(sum);
|
||||
for (i = 0; i < num_symbols; ++i) {
|
||||
output[i] = logsum - VP8LFastLog2(population_counts[i]);
|
||||
}
|
||||
@ -75,8 +76,8 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
|
||||
}
|
||||
|
||||
ConvertPopulationCountTableToBitEstimates(
|
||||
VP8LHistogramNumCodes(histo->palette_code_bits_),
|
||||
histo->literal_, m->literal_);
|
||||
VP8LHistogramNumCodes(histo->palette_code_bits_), histo->literal_,
|
||||
m->literal_);
|
||||
ConvertPopulationCountTableToBitEstimates(
|
||||
VALUES_IN_BYTE, histo->red_, m->red_);
|
||||
ConvertPopulationCountTableToBitEstimates(
|
||||
@ -92,27 +93,27 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
|
||||
return ok;
|
||||
}
|
||||
|
||||
static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
|
||||
static WEBP_INLINE float GetLiteralCost(const CostModel* const m, uint32_t v) {
|
||||
return m->alpha_[v >> 24] +
|
||||
m->red_[(v >> 16) & 0xff] +
|
||||
m->literal_[(v >> 8) & 0xff] +
|
||||
m->blue_[v & 0xff];
|
||||
}
|
||||
|
||||
static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
|
||||
static WEBP_INLINE float GetCacheCost(const CostModel* const m, uint32_t idx) {
|
||||
const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
|
||||
return m->literal_[literal_idx];
|
||||
}
|
||||
|
||||
static WEBP_INLINE double GetLengthCost(const CostModel* const m,
|
||||
uint32_t length) {
|
||||
static WEBP_INLINE float GetLengthCost(const CostModel* const m,
|
||||
uint32_t length) {
|
||||
int code, extra_bits;
|
||||
VP8LPrefixEncodeBits(length, &code, &extra_bits);
|
||||
return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
|
||||
}
|
||||
|
||||
static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
|
||||
uint32_t distance) {
|
||||
static WEBP_INLINE float GetDistanceCost(const CostModel* const m,
|
||||
uint32_t distance) {
|
||||
int code, extra_bits;
|
||||
VP8LPrefixEncodeBits(distance, &code, &extra_bits);
|
||||
return m->distance_[code] + extra_bits;
|
||||
@ -122,20 +123,20 @@ static WEBP_INLINE void AddSingleLiteralWithCostModel(
|
||||
const uint32_t* const argb, VP8LColorCache* const hashers,
|
||||
const CostModel* const cost_model, int idx, int use_color_cache,
|
||||
float prev_cost, float* const cost, uint16_t* const dist_array) {
|
||||
double cost_val = prev_cost;
|
||||
float cost_val = prev_cost;
|
||||
const uint32_t color = argb[idx];
|
||||
const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1;
|
||||
if (ix >= 0) {
|
||||
// use_color_cache is true and hashers contains color
|
||||
const double mul0 = 0.68;
|
||||
const float mul0 = 0.68f;
|
||||
cost_val += GetCacheCost(cost_model, ix) * mul0;
|
||||
} else {
|
||||
const double mul1 = 0.82;
|
||||
const float mul1 = 0.82f;
|
||||
if (use_color_cache) VP8LColorCacheInsert(hashers, color);
|
||||
cost_val += GetLiteralCost(cost_model, color) * mul1;
|
||||
}
|
||||
if (cost[idx] > cost_val) {
|
||||
cost[idx] = (float)cost_val;
|
||||
cost[idx] = cost_val;
|
||||
dist_array[idx] = 1; // only one is inserted.
|
||||
}
|
||||
}
|
||||
@ -172,7 +173,7 @@ struct CostInterval {
|
||||
|
||||
// The GetLengthCost(cost_model, k) are cached in a CostCacheInterval.
|
||||
typedef struct {
|
||||
double cost_;
|
||||
float cost_;
|
||||
int start_;
|
||||
int end_; // Exclusive.
|
||||
} CostCacheInterval;
|
||||
@ -187,7 +188,7 @@ typedef struct {
|
||||
int count_; // The number of stored intervals.
|
||||
CostCacheInterval* cache_intervals_;
|
||||
size_t cache_intervals_size_;
|
||||
double cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k).
|
||||
float cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k).
|
||||
float* costs_;
|
||||
uint16_t* dist_array_;
|
||||
// Most of the time, we only need few intervals -> use a free-list, to avoid
|
||||
@ -262,10 +263,13 @@ static int CostManagerInit(CostManager* const manager,
|
||||
CostManagerInitFreeList(manager);
|
||||
|
||||
// Fill in the cost_cache_.
|
||||
manager->cache_intervals_size_ = 1;
|
||||
manager->cost_cache_[0] = GetLengthCost(cost_model, 0);
|
||||
for (i = 1; i < cost_cache_size; ++i) {
|
||||
// Has to be done in two passes due to a GCC bug on i686
|
||||
// related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
|
||||
for (i = 0; i < cost_cache_size; ++i) {
|
||||
manager->cost_cache_[i] = GetLengthCost(cost_model, i);
|
||||
}
|
||||
manager->cache_intervals_size_ = 1;
|
||||
for (i = 1; i < cost_cache_size; ++i) {
|
||||
// Get the number of bound intervals.
|
||||
if (manager->cost_cache_[i] != manager->cost_cache_[i - 1]) {
|
||||
++manager->cache_intervals_size_;
|
||||
@ -294,7 +298,7 @@ static int CostManagerInit(CostManager* const manager,
|
||||
cur->end_ = 1;
|
||||
cur->cost_ = manager->cost_cache_[0];
|
||||
for (i = 1; i < cost_cache_size; ++i) {
|
||||
const double cost_val = manager->cost_cache_[i];
|
||||
const float cost_val = manager->cost_cache_[i];
|
||||
if (cost_val != cur->cost_) {
|
||||
++cur;
|
||||
// Initialize an interval.
|
||||
@ -303,6 +307,8 @@ static int CostManagerInit(CostManager* const manager,
|
||||
}
|
||||
cur->end_ = i + 1;
|
||||
}
|
||||
assert((size_t)(cur - manager->cache_intervals_) + 1 ==
|
||||
manager->cache_intervals_size_);
|
||||
}
|
||||
|
||||
manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_));
|
||||
@ -311,7 +317,7 @@ static int CostManagerInit(CostManager* const manager,
|
||||
return 0;
|
||||
}
|
||||
// Set the initial costs_ high for every pixel as we will keep the minimum.
|
||||
for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f;
|
||||
for (i = 0; i < pix_count; ++i) manager->costs_[i] = FLT_MAX;
|
||||
|
||||
return 1;
|
||||
}
|
||||
@ -457,7 +463,7 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager,
|
||||
// If handling the interval or one of its subintervals becomes to heavy, its
|
||||
// contribution is added to the costs right away.
|
||||
static WEBP_INLINE void PushInterval(CostManager* const manager,
|
||||
double distance_cost, int position,
|
||||
float distance_cost, int position,
|
||||
int len) {
|
||||
size_t i;
|
||||
CostInterval* interval = manager->head_;
|
||||
@ -474,7 +480,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
|
||||
const int k = j - position;
|
||||
float cost_tmp;
|
||||
assert(k >= 0 && k < MAX_LENGTH);
|
||||
cost_tmp = (float)(distance_cost + manager->cost_cache_[k]);
|
||||
cost_tmp = distance_cost + manager->cost_cache_[k];
|
||||
|
||||
if (manager->costs_[j] > cost_tmp) {
|
||||
manager->costs_[j] = cost_tmp;
|
||||
@ -492,7 +498,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
|
||||
const int end = position + (cost_cache_intervals[i].end_ > len
|
||||
? len
|
||||
: cost_cache_intervals[i].end_);
|
||||
const float cost = (float)(distance_cost + cost_cache_intervals[i].cost_);
|
||||
const float cost = distance_cost + cost_cache_intervals[i].cost_;
|
||||
|
||||
for (; interval != NULL && interval->start_ < end;
|
||||
interval = interval_next) {
|
||||
@ -570,22 +576,21 @@ static int BackwardReferencesHashChainDistanceOnly(
|
||||
const int pix_count = xsize * ysize;
|
||||
const int use_color_cache = (cache_bits > 0);
|
||||
const size_t literal_array_size =
|
||||
sizeof(double) * (NUM_LITERAL_CODES + NUM_LENGTH_CODES +
|
||||
((cache_bits > 0) ? (1 << cache_bits) : 0));
|
||||
sizeof(float) * (VP8LHistogramNumCodes(cache_bits));
|
||||
const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
|
||||
CostModel* const cost_model =
|
||||
(CostModel*)WebPSafeCalloc(1ULL, cost_model_size);
|
||||
VP8LColorCache hashers;
|
||||
CostManager* cost_manager =
|
||||
(CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager));
|
||||
(CostManager*)WebPSafeCalloc(1ULL, sizeof(*cost_manager));
|
||||
int offset_prev = -1, len_prev = -1;
|
||||
double offset_cost = -1;
|
||||
float offset_cost = -1.f;
|
||||
int first_offset_is_constant = -1; // initialized with 'impossible' value
|
||||
int reach = 0;
|
||||
|
||||
if (cost_model == NULL || cost_manager == NULL) goto Error;
|
||||
|
||||
cost_model->literal_ = (double*)(cost_model + 1);
|
||||
cost_model->literal_ = (float*)(cost_model + 1);
|
||||
if (use_color_cache) {
|
||||
cc_init = VP8LColorCacheInit(&hashers, cache_bits);
|
||||
if (!cc_init) goto Error;
|
||||
@ -675,7 +680,7 @@ static int BackwardReferencesHashChainDistanceOnly(
|
||||
}
|
||||
|
||||
ok = !refs->error_;
|
||||
Error:
|
||||
Error:
|
||||
if (cc_init) VP8LColorCacheClear(&hashers);
|
||||
CostManagerClear(cost_manager);
|
||||
WebPSafeFree(cost_model);
|
||||
|
@ -10,6 +10,8 @@
|
||||
// Author: Jyrki Alakuijala (jyrki@google.com)
|
||||
//
|
||||
|
||||
#include "src/enc/backward_references_enc.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
@ -17,10 +19,11 @@
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/enc/backward_references_enc.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/utils/color_cache_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/encode.h"
|
||||
|
||||
#define MIN_BLOCK_SIZE 256 // minimum block size for backward references
|
||||
|
||||
@ -255,10 +258,13 @@ static WEBP_INLINE int MaxFindCopyLength(int len) {
|
||||
|
||||
int VP8LHashChainFill(VP8LHashChain* const p, int quality,
|
||||
const uint32_t* const argb, int xsize, int ysize,
|
||||
int low_effort) {
|
||||
int low_effort, const WebPPicture* const pic,
|
||||
int percent_range, int* const percent) {
|
||||
const int size = xsize * ysize;
|
||||
const int iter_max = GetMaxItersForQuality(quality);
|
||||
const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize);
|
||||
int remaining_percent = percent_range;
|
||||
int percent_start = *percent;
|
||||
int pos;
|
||||
int argb_comp;
|
||||
uint32_t base_position;
|
||||
@ -276,7 +282,12 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
|
||||
|
||||
hash_to_first_index =
|
||||
(int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index));
|
||||
if (hash_to_first_index == NULL) return 0;
|
||||
if (hash_to_first_index == NULL) {
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
|
||||
percent_range = remaining_percent / 2;
|
||||
remaining_percent -= percent_range;
|
||||
|
||||
// Set the int32_t array to -1.
|
||||
memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index));
|
||||
@ -323,12 +334,22 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
|
||||
hash_to_first_index[hash_code] = pos++;
|
||||
argb_comp = argb_comp_next;
|
||||
}
|
||||
|
||||
if (!WebPReportProgress(
|
||||
pic, percent_start + percent_range * pos / (size - 2), percent)) {
|
||||
WebPSafeFree(hash_to_first_index);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
// Process the penultimate pixel.
|
||||
chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)];
|
||||
|
||||
WebPSafeFree(hash_to_first_index);
|
||||
|
||||
percent_start += percent_range;
|
||||
if (!WebPReportProgress(pic, percent_start, percent)) return 0;
|
||||
percent_range = remaining_percent;
|
||||
|
||||
// Find the best match interval at each pixel, defined by an offset to the
|
||||
// pixel and a length. The right-most pixel cannot match anything to the right
|
||||
// (hence a best length of 0) and the left-most pixel nothing to the left
|
||||
@ -417,8 +438,17 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
|
||||
max_base_position = base_position;
|
||||
}
|
||||
}
|
||||
|
||||
if (!WebPReportProgress(pic,
|
||||
percent_start + percent_range *
|
||||
(size - 2 - base_position) /
|
||||
(size - 2),
|
||||
percent)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
|
||||
return WebPReportProgress(pic, percent_start + percent_range, percent);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache,
|
||||
@ -728,7 +758,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
|
||||
int* const best_cache_bits) {
|
||||
int i;
|
||||
const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits;
|
||||
double entropy_min = MAX_ENTROPY;
|
||||
float entropy_min = MAX_ENTROPY;
|
||||
int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 };
|
||||
VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1];
|
||||
VP8LRefsCursor c = VP8LRefsCursorInit(refs);
|
||||
@ -813,14 +843,14 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
|
||||
}
|
||||
|
||||
for (i = 0; i <= cache_bits_max; ++i) {
|
||||
const double entropy = VP8LHistogramEstimateBits(histos[i]);
|
||||
const float entropy = VP8LHistogramEstimateBits(histos[i]);
|
||||
if (i == 0 || entropy < entropy_min) {
|
||||
entropy_min = entropy;
|
||||
*best_cache_bits = i;
|
||||
}
|
||||
}
|
||||
ok = 1;
|
||||
Error:
|
||||
Error:
|
||||
for (i = 0; i <= cache_bits_max; ++i) {
|
||||
if (cc_init[i]) VP8LColorCacheClear(&hashers[i]);
|
||||
VP8LFreeHistogram(histos[i]);
|
||||
@ -890,7 +920,7 @@ static int GetBackwardReferences(int width, int height,
|
||||
int i, lz77_type;
|
||||
// Index 0 is for a color cache, index 1 for no cache (if needed).
|
||||
int lz77_types_best[2] = {0, 0};
|
||||
double bit_costs_best[2] = {DBL_MAX, DBL_MAX};
|
||||
float bit_costs_best[2] = {FLT_MAX, FLT_MAX};
|
||||
VP8LHashChain hash_chain_box;
|
||||
VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1];
|
||||
int status = 0;
|
||||
@ -902,7 +932,7 @@ static int GetBackwardReferences(int width, int height,
|
||||
for (lz77_type = 1; lz77_types_to_try;
|
||||
lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) {
|
||||
int res = 0;
|
||||
double bit_cost = 0.;
|
||||
float bit_cost = 0.f;
|
||||
if ((lz77_types_to_try & lz77_type) == 0) continue;
|
||||
switch (lz77_type) {
|
||||
case kLZ77RLE:
|
||||
@ -976,15 +1006,16 @@ static int GetBackwardReferences(int width, int height,
|
||||
const VP8LHashChain* const hash_chain_tmp =
|
||||
(lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box;
|
||||
const int cache_bits = (i == 1) ? 0 : *cache_bits_best;
|
||||
if (VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
|
||||
hash_chain_tmp, &refs[i],
|
||||
refs_tmp)) {
|
||||
double bit_cost_trace;
|
||||
VP8LHistogramCreate(histo, refs_tmp, cache_bits);
|
||||
bit_cost_trace = VP8LHistogramEstimateBits(histo);
|
||||
if (bit_cost_trace < bit_costs_best[i]) {
|
||||
BackwardRefsSwap(refs_tmp, &refs[i]);
|
||||
}
|
||||
float bit_cost_trace;
|
||||
if (!VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
|
||||
hash_chain_tmp, &refs[i],
|
||||
refs_tmp)) {
|
||||
goto Error;
|
||||
}
|
||||
VP8LHistogramCreate(histo, refs_tmp, cache_bits);
|
||||
bit_cost_trace = VP8LHistogramEstimateBits(histo);
|
||||
if (bit_cost_trace < bit_costs_best[i]) {
|
||||
BackwardRefsSwap(refs_tmp, &refs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1000,31 +1031,35 @@ static int GetBackwardReferences(int width, int height,
|
||||
}
|
||||
status = 1;
|
||||
|
||||
Error:
|
||||
Error:
|
||||
VP8LHashChainClear(&hash_chain_box);
|
||||
VP8LFreeHistogram(histo);
|
||||
return status;
|
||||
}
|
||||
|
||||
WebPEncodingError VP8LGetBackwardReferences(
|
||||
int VP8LGetBackwardReferences(
|
||||
int width, int height, const uint32_t* const argb, int quality,
|
||||
int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache,
|
||||
const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs,
|
||||
int* const cache_bits_best) {
|
||||
int* const cache_bits_best, const WebPPicture* const pic, int percent_range,
|
||||
int* const percent) {
|
||||
if (low_effort) {
|
||||
VP8LBackwardRefs* refs_best;
|
||||
*cache_bits_best = cache_bits_max;
|
||||
refs_best = GetBackwardReferencesLowEffort(
|
||||
width, height, argb, cache_bits_best, hash_chain, refs);
|
||||
if (refs_best == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
|
||||
if (refs_best == NULL) {
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
// Set it in first position.
|
||||
BackwardRefsSwap(refs_best, &refs[0]);
|
||||
} else {
|
||||
if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try,
|
||||
cache_bits_max, do_no_cache, hash_chain, refs,
|
||||
cache_bits_best)) {
|
||||
return VP8_ENC_ERROR_OUT_OF_MEMORY;
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
}
|
||||
return VP8_ENC_OK;
|
||||
|
||||
return WebPReportProgress(pic, *percent + percent_range, percent);
|
||||
}
|
||||
|
@ -134,10 +134,11 @@ struct VP8LHashChain {
|
||||
|
||||
// Must be called first, to set size.
|
||||
int VP8LHashChainInit(VP8LHashChain* const p, int size);
|
||||
// Pre-compute the best matches for argb.
|
||||
// Pre-compute the best matches for argb. pic and percent are for progress.
|
||||
int VP8LHashChainFill(VP8LHashChain* const p, int quality,
|
||||
const uint32_t* const argb, int xsize, int ysize,
|
||||
int low_effort);
|
||||
int low_effort, const WebPPicture* const pic,
|
||||
int percent_range, int* const percent);
|
||||
void VP8LHashChainClear(VP8LHashChain* const p); // release memory
|
||||
|
||||
static WEBP_INLINE int VP8LHashChainFindOffset(const VP8LHashChain* const p,
|
||||
@ -227,11 +228,14 @@ enum VP8LLZ77Type {
|
||||
// VP8LBackwardRefs is put in the first element, the best value with no-cache in
|
||||
// the second element.
|
||||
// In both cases, the last element is used as temporary internally.
|
||||
WebPEncodingError VP8LGetBackwardReferences(
|
||||
// pic and percent are for progress.
|
||||
// Returns false in case of error (stored in pic->error_code).
|
||||
int VP8LGetBackwardReferences(
|
||||
int width, int height, const uint32_t* const argb, int quality,
|
||||
int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache,
|
||||
const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs,
|
||||
int* const cache_bits_best);
|
||||
int* const cache_bits_best, const WebPPicture* const pic, int percent_range,
|
||||
int* const percent);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
21
3rdparty/libwebp/src/enc/frame_enc.c
vendored
21
3rdparty/libwebp/src/enc/frame_enc.c
vendored
@ -578,7 +578,7 @@ static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
|
||||
uint64_t size = 0;
|
||||
uint64_t size_p0 = 0;
|
||||
uint64_t distortion = 0;
|
||||
const uint64_t pixel_count = nb_mbs * 384;
|
||||
const uint64_t pixel_count = (uint64_t)nb_mbs * 384;
|
||||
|
||||
VP8IteratorInit(enc, &it);
|
||||
SetLoopParams(enc, s->q);
|
||||
@ -689,7 +689,7 @@ static int PreLoopInitialize(VP8Encoder* const enc) {
|
||||
}
|
||||
if (!ok) {
|
||||
VP8EncFreeBitWriters(enc); // malloc error occurred
|
||||
WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
@ -719,6 +719,7 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
|
||||
} else {
|
||||
// Something bad happened -> need to do some memory cleanup.
|
||||
VP8EncFreeBitWriters(enc);
|
||||
return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
@ -754,6 +755,11 @@ int VP8EncLoop(VP8Encoder* const enc) {
|
||||
// *then* decide how to code the skip decision if there's one.
|
||||
if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
|
||||
CodeResiduals(it.bw_, &it, &info);
|
||||
if (it.bw_->error_) {
|
||||
// enc->pic_->error_code is set in PostLoopFinalize().
|
||||
ok = 0;
|
||||
break;
|
||||
}
|
||||
} else { // reset predictors after a skip
|
||||
ResetAfterSkip(&it);
|
||||
}
|
||||
@ -778,11 +784,12 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
|
||||
// Roughly refresh the proba eight times per pass
|
||||
int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
|
||||
int num_pass_left = enc->config_->pass;
|
||||
int remaining_progress = 40; // percents
|
||||
const int do_search = enc->do_search_;
|
||||
VP8EncIterator it;
|
||||
VP8EncProba* const proba = &enc->proba_;
|
||||
const VP8RDLevel rd_opt = enc->rd_opt_level_;
|
||||
const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384;
|
||||
const uint64_t pixel_count = (uint64_t)enc->mb_w_ * enc->mb_h_ * 384;
|
||||
PassStats stats;
|
||||
int ok;
|
||||
|
||||
@ -805,6 +812,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
|
||||
uint64_t size_p0 = 0;
|
||||
uint64_t distortion = 0;
|
||||
int cnt = max_count;
|
||||
// The final number of passes is not trivial to know in advance.
|
||||
const int pass_progress = remaining_progress / (2 + num_pass_left);
|
||||
remaining_progress -= pass_progress;
|
||||
VP8IteratorInit(enc, &it);
|
||||
SetLoopParams(enc, stats.q);
|
||||
if (is_last_pass) {
|
||||
@ -832,7 +842,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
|
||||
StoreSideInfo(&it);
|
||||
VP8StoreFilterStats(&it);
|
||||
VP8IteratorExport(&it);
|
||||
ok = VP8IteratorProgress(&it, 20);
|
||||
ok = VP8IteratorProgress(&it, pass_progress);
|
||||
}
|
||||
VP8IteratorSaveBoundary(&it);
|
||||
} while (ok && VP8IteratorNext(&it));
|
||||
@ -878,7 +888,8 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
|
||||
ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
|
||||
(const uint8_t*)proba->coeffs_, 1);
|
||||
}
|
||||
ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
|
||||
ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + remaining_progress,
|
||||
&enc->percent_);
|
||||
return PostLoopFinalize(&it, ok);
|
||||
}
|
||||
|
||||
|
252
3rdparty/libwebp/src/enc/histogram_enc.c
vendored
252
3rdparty/libwebp/src/enc/histogram_enc.c
vendored
@ -13,15 +13,17 @@
|
||||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "src/enc/backward_references_enc.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/enc/backward_references_enc.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
#define MAX_COST 1.e38
|
||||
#define MAX_BIT_COST FLT_MAX
|
||||
|
||||
// Number of partitions for the three dominant (literal, red and blue) symbol
|
||||
// costs.
|
||||
@ -228,8 +230,8 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
|
||||
// -----------------------------------------------------------------------------
|
||||
// Entropy-related functions.
|
||||
|
||||
static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
|
||||
double mix;
|
||||
static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) {
|
||||
float mix;
|
||||
if (entropy->nonzeros < 5) {
|
||||
if (entropy->nonzeros <= 1) {
|
||||
return 0;
|
||||
@ -238,67 +240,67 @@ static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
|
||||
// Let's mix in a bit of entropy to favor good clustering when
|
||||
// distributions of these are combined.
|
||||
if (entropy->nonzeros == 2) {
|
||||
return 0.99 * entropy->sum + 0.01 * entropy->entropy;
|
||||
return 0.99f * entropy->sum + 0.01f * entropy->entropy;
|
||||
}
|
||||
// No matter what the entropy says, we cannot be better than min_limit
|
||||
// with Huffman coding. I am mixing a bit of entropy into the
|
||||
// min_limit since it produces much better (~0.5 %) compression results
|
||||
// perhaps because of better entropy clustering.
|
||||
if (entropy->nonzeros == 3) {
|
||||
mix = 0.95;
|
||||
mix = 0.95f;
|
||||
} else {
|
||||
mix = 0.7; // nonzeros == 4.
|
||||
mix = 0.7f; // nonzeros == 4.
|
||||
}
|
||||
} else {
|
||||
mix = 0.627;
|
||||
mix = 0.627f;
|
||||
}
|
||||
|
||||
{
|
||||
double min_limit = 2 * entropy->sum - entropy->max_val;
|
||||
min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy;
|
||||
float min_limit = 2.f * entropy->sum - entropy->max_val;
|
||||
min_limit = mix * min_limit + (1.f - mix) * entropy->entropy;
|
||||
return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;
|
||||
}
|
||||
}
|
||||
|
||||
double VP8LBitsEntropy(const uint32_t* const array, int n) {
|
||||
float VP8LBitsEntropy(const uint32_t* const array, int n) {
|
||||
VP8LBitEntropy entropy;
|
||||
VP8LBitsEntropyUnrefined(array, n, &entropy);
|
||||
|
||||
return BitsEntropyRefine(&entropy);
|
||||
}
|
||||
|
||||
static double InitialHuffmanCost(void) {
|
||||
static float InitialHuffmanCost(void) {
|
||||
// Small bias because Huffman code length is typically not stored in
|
||||
// full length.
|
||||
static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
|
||||
static const double kSmallBias = 9.1;
|
||||
static const float kSmallBias = 9.1f;
|
||||
return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
|
||||
}
|
||||
|
||||
// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
|
||||
static double FinalHuffmanCost(const VP8LStreaks* const stats) {
|
||||
static float FinalHuffmanCost(const VP8LStreaks* const stats) {
|
||||
// The constants in this function are experimental and got rounded from
|
||||
// their original values in 1/8 when switched to 1/1024.
|
||||
double retval = InitialHuffmanCost();
|
||||
float retval = InitialHuffmanCost();
|
||||
// Second coefficient: Many zeros in the histogram are covered efficiently
|
||||
// by a run-length encode. Originally 2/8.
|
||||
retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1];
|
||||
retval += stats->counts[0] * 1.5625f + 0.234375f * stats->streaks[0][1];
|
||||
// Second coefficient: Constant values are encoded less efficiently, but still
|
||||
// RLE'ed. Originally 6/8.
|
||||
retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1];
|
||||
retval += stats->counts[1] * 2.578125f + 0.703125f * stats->streaks[1][1];
|
||||
// 0s are usually encoded more efficiently than non-0s.
|
||||
// Originally 15/8.
|
||||
retval += 1.796875 * stats->streaks[0][0];
|
||||
retval += 1.796875f * stats->streaks[0][0];
|
||||
// Originally 26/8.
|
||||
retval += 3.28125 * stats->streaks[1][0];
|
||||
retval += 3.28125f * stats->streaks[1][0];
|
||||
return retval;
|
||||
}
|
||||
|
||||
// Get the symbol entropy for the distribution 'population'.
|
||||
// Set 'trivial_sym', if there's only one symbol present in the distribution.
|
||||
static double PopulationCost(const uint32_t* const population, int length,
|
||||
uint32_t* const trivial_sym,
|
||||
uint8_t* const is_used) {
|
||||
static float PopulationCost(const uint32_t* const population, int length,
|
||||
uint32_t* const trivial_sym,
|
||||
uint8_t* const is_used) {
|
||||
VP8LBitEntropy bit_entropy;
|
||||
VP8LStreaks stats;
|
||||
VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
|
||||
@ -314,11 +316,10 @@ static double PopulationCost(const uint32_t* const population, int length,
|
||||
|
||||
// trivial_at_end is 1 if the two histograms only have one element that is
|
||||
// non-zero: both the zero-th one, or both the last one.
|
||||
static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
|
||||
const uint32_t* const Y,
|
||||
int length, int is_X_used,
|
||||
int is_Y_used,
|
||||
int trivial_at_end) {
|
||||
static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length,
|
||||
int is_X_used, int is_Y_used,
|
||||
int trivial_at_end) {
|
||||
VP8LStreaks stats;
|
||||
if (trivial_at_end) {
|
||||
// This configuration is due to palettization that transforms an indexed
|
||||
@ -356,16 +357,18 @@ static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
|
||||
}
|
||||
|
||||
// Estimates the Entropy + Huffman + other block overhead size cost.
|
||||
double VP8LHistogramEstimateBits(VP8LHistogram* const p) {
|
||||
return
|
||||
PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
|
||||
NULL, &p->is_used_[0])
|
||||
+ PopulationCost(p->red_, NUM_LITERAL_CODES, NULL, &p->is_used_[1])
|
||||
+ PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL, &p->is_used_[2])
|
||||
+ PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3])
|
||||
+ PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL, &p->is_used_[4])
|
||||
+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
|
||||
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
||||
float VP8LHistogramEstimateBits(VP8LHistogram* const p) {
|
||||
return PopulationCost(p->literal_,
|
||||
VP8LHistogramNumCodes(p->palette_code_bits_), NULL,
|
||||
&p->is_used_[0]) +
|
||||
PopulationCost(p->red_, NUM_LITERAL_CODES, NULL, &p->is_used_[1]) +
|
||||
PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL, &p->is_used_[2]) +
|
||||
PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3]) +
|
||||
PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL,
|
||||
&p->is_used_[4]) +
|
||||
(float)VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES,
|
||||
NUM_LENGTH_CODES) +
|
||||
(float)VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@ -373,17 +376,16 @@ double VP8LHistogramEstimateBits(VP8LHistogram* const p) {
|
||||
|
||||
static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
double cost_threshold,
|
||||
double* cost) {
|
||||
float cost_threshold, float* cost) {
|
||||
const int palette_code_bits = a->palette_code_bits_;
|
||||
int trivial_at_end = 0;
|
||||
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
||||
*cost += GetCombinedEntropy(a->literal_, b->literal_,
|
||||
VP8LHistogramNumCodes(palette_code_bits),
|
||||
a->is_used_[0], b->is_used_[0], 0);
|
||||
*cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
|
||||
b->literal_ + NUM_LITERAL_CODES,
|
||||
NUM_LENGTH_CODES);
|
||||
*cost += (float)VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
|
||||
b->literal_ + NUM_LITERAL_CODES,
|
||||
NUM_LENGTH_CODES);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
|
||||
if (a->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM &&
|
||||
@ -417,8 +419,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
|
||||
*cost +=
|
||||
GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES,
|
||||
a->is_used_[4], b->is_used_[4], 0);
|
||||
*cost +=
|
||||
VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
|
||||
*cost += (float)VP8LExtraCostCombined(a->distance_, b->distance_,
|
||||
NUM_DISTANCE_CODES);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
|
||||
return 1;
|
||||
@ -439,12 +441,11 @@ static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a,
|
||||
// Since the previous score passed is 'cost_threshold', we only need to compare
|
||||
// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
|
||||
// early.
|
||||
static double HistogramAddEval(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out,
|
||||
double cost_threshold) {
|
||||
double cost = 0;
|
||||
const double sum_cost = a->bit_cost_ + b->bit_cost_;
|
||||
static float HistogramAddEval(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out, float cost_threshold) {
|
||||
float cost = 0;
|
||||
const float sum_cost = a->bit_cost_ + b->bit_cost_;
|
||||
cost_threshold += sum_cost;
|
||||
|
||||
if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
|
||||
@ -459,10 +460,10 @@ static double HistogramAddEval(const VP8LHistogram* const a,
|
||||
// Same as HistogramAddEval(), except that the resulting histogram
|
||||
// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
|
||||
// the term C(b) which is constant over all the evaluations.
|
||||
static double HistogramAddThresh(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
double cost_threshold) {
|
||||
double cost;
|
||||
static float HistogramAddThresh(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
float cost_threshold) {
|
||||
float cost;
|
||||
assert(a != NULL && b != NULL);
|
||||
cost = -a->bit_cost_;
|
||||
GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
|
||||
@ -473,24 +474,22 @@ static double HistogramAddThresh(const VP8LHistogram* const a,
|
||||
|
||||
// The structure to keep track of cost range for the three dominant entropy
|
||||
// symbols.
|
||||
// TODO(skal): Evaluate if float can be used here instead of double for
|
||||
// representing the entropy costs.
|
||||
typedef struct {
|
||||
double literal_max_;
|
||||
double literal_min_;
|
||||
double red_max_;
|
||||
double red_min_;
|
||||
double blue_max_;
|
||||
double blue_min_;
|
||||
float literal_max_;
|
||||
float literal_min_;
|
||||
float red_max_;
|
||||
float red_min_;
|
||||
float blue_max_;
|
||||
float blue_min_;
|
||||
} DominantCostRange;
|
||||
|
||||
static void DominantCostRangeInit(DominantCostRange* const c) {
|
||||
c->literal_max_ = 0.;
|
||||
c->literal_min_ = MAX_COST;
|
||||
c->literal_min_ = MAX_BIT_COST;
|
||||
c->red_max_ = 0.;
|
||||
c->red_min_ = MAX_COST;
|
||||
c->red_min_ = MAX_BIT_COST;
|
||||
c->blue_max_ = 0.;
|
||||
c->blue_min_ = MAX_COST;
|
||||
c->blue_min_ = MAX_BIT_COST;
|
||||
}
|
||||
|
||||
static void UpdateDominantCostRange(
|
||||
@ -505,16 +504,15 @@ static void UpdateDominantCostRange(
|
||||
|
||||
static void UpdateHistogramCost(VP8LHistogram* const h) {
|
||||
uint32_t alpha_sym, red_sym, blue_sym;
|
||||
const double alpha_cost =
|
||||
PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym,
|
||||
&h->is_used_[3]);
|
||||
const double distance_cost =
|
||||
const float alpha_cost =
|
||||
PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, &h->is_used_[3]);
|
||||
const float distance_cost =
|
||||
PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) +
|
||||
VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
|
||||
(float)VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
|
||||
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
|
||||
h->literal_cost_ =
|
||||
PopulationCost(h->literal_, num_codes, NULL, &h->is_used_[0]) +
|
||||
VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
|
||||
(float)VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
|
||||
h->red_cost_ =
|
||||
PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym, &h->is_used_[1]);
|
||||
h->blue_cost_ =
|
||||
@ -529,10 +527,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) {
|
||||
}
|
||||
}
|
||||
|
||||
static int GetBinIdForEntropy(double min, double max, double val) {
|
||||
const double range = max - min;
|
||||
static int GetBinIdForEntropy(float min, float max, float val) {
|
||||
const float range = max - min;
|
||||
if (range > 0.) {
|
||||
const double delta = val - min;
|
||||
const float delta = val - min;
|
||||
return (int)((NUM_PARTITIONS - 1e-6) * delta / range);
|
||||
} else {
|
||||
return 0;
|
||||
@ -641,15 +639,11 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
|
||||
|
||||
// Merges some histograms with same bin_id together if it's advantageous.
|
||||
// Sets the remaining histograms to NULL.
|
||||
static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
|
||||
int* num_used,
|
||||
const uint16_t* const clusters,
|
||||
uint16_t* const cluster_mappings,
|
||||
VP8LHistogram* cur_combo,
|
||||
const uint16_t* const bin_map,
|
||||
int num_bins,
|
||||
double combine_cost_factor,
|
||||
int low_effort) {
|
||||
static void HistogramCombineEntropyBin(
|
||||
VP8LHistogramSet* const image_histo, int* num_used,
|
||||
const uint16_t* const clusters, uint16_t* const cluster_mappings,
|
||||
VP8LHistogram* cur_combo, const uint16_t* const bin_map, int num_bins,
|
||||
float combine_cost_factor, int low_effort) {
|
||||
VP8LHistogram** const histograms = image_histo->histograms;
|
||||
int idx;
|
||||
struct {
|
||||
@ -679,11 +673,10 @@ static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
|
||||
cluster_mappings[clusters[idx]] = clusters[first];
|
||||
} else {
|
||||
// try to merge #idx into #first (both share the same bin_id)
|
||||
const double bit_cost = histograms[idx]->bit_cost_;
|
||||
const double bit_cost_thresh = -bit_cost * combine_cost_factor;
|
||||
const double curr_cost_diff =
|
||||
HistogramAddEval(histograms[first], histograms[idx],
|
||||
cur_combo, bit_cost_thresh);
|
||||
const float bit_cost = histograms[idx]->bit_cost_;
|
||||
const float bit_cost_thresh = -bit_cost * combine_cost_factor;
|
||||
const float curr_cost_diff = HistogramAddEval(
|
||||
histograms[first], histograms[idx], cur_combo, bit_cost_thresh);
|
||||
if (curr_cost_diff < bit_cost_thresh) {
|
||||
// Try to merge two histograms only if the combo is a trivial one or
|
||||
// the two candidate histograms are already non-trivial.
|
||||
@ -731,8 +724,8 @@ static uint32_t MyRand(uint32_t* const seed) {
|
||||
typedef struct {
|
||||
int idx1;
|
||||
int idx2;
|
||||
double cost_diff;
|
||||
double cost_combo;
|
||||
float cost_diff;
|
||||
float cost_combo;
|
||||
} HistogramPair;
|
||||
|
||||
typedef struct {
|
||||
@ -787,10 +780,9 @@ static void HistoQueueUpdateHead(HistoQueue* const histo_queue,
|
||||
// Update the cost diff and combo of a pair of histograms. This needs to be
|
||||
// called when the the histograms have been merged with a third one.
|
||||
static void HistoQueueUpdatePair(const VP8LHistogram* const h1,
|
||||
const VP8LHistogram* const h2,
|
||||
double threshold,
|
||||
const VP8LHistogram* const h2, float threshold,
|
||||
HistogramPair* const pair) {
|
||||
const double sum_cost = h1->bit_cost_ + h2->bit_cost_;
|
||||
const float sum_cost = h1->bit_cost_ + h2->bit_cost_;
|
||||
pair->cost_combo = 0.;
|
||||
GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair->cost_combo);
|
||||
pair->cost_diff = pair->cost_combo - sum_cost;
|
||||
@ -799,9 +791,9 @@ static void HistoQueueUpdatePair(const VP8LHistogram* const h1,
|
||||
// Create a pair from indices "idx1" and "idx2" provided its cost
|
||||
// is inferior to "threshold", a negative entropy.
|
||||
// It returns the cost of the pair, or 0. if it superior to threshold.
|
||||
static double HistoQueuePush(HistoQueue* const histo_queue,
|
||||
VP8LHistogram** const histograms, int idx1,
|
||||
int idx2, double threshold) {
|
||||
static float HistoQueuePush(HistoQueue* const histo_queue,
|
||||
VP8LHistogram** const histograms, int idx1,
|
||||
int idx2, float threshold) {
|
||||
const VP8LHistogram* h1;
|
||||
const VP8LHistogram* h2;
|
||||
HistogramPair pair;
|
||||
@ -945,8 +937,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
|
||||
++tries_with_no_success < num_tries_no_success;
|
||||
++iter) {
|
||||
int* mapping_index;
|
||||
double best_cost =
|
||||
(histo_queue.size == 0) ? 0. : histo_queue.queue[0].cost_diff;
|
||||
float best_cost =
|
||||
(histo_queue.size == 0) ? 0.f : histo_queue.queue[0].cost_diff;
|
||||
int best_idx1 = -1, best_idx2 = 1;
|
||||
const uint32_t rand_range = (*num_used - 1) * (*num_used);
|
||||
// (*num_used) / 2 was chosen empirically. Less means faster but worse
|
||||
@ -955,7 +947,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
|
||||
|
||||
// Pick random samples.
|
||||
for (j = 0; *num_used >= 2 && j < num_tries; ++j) {
|
||||
double curr_cost;
|
||||
float curr_cost;
|
||||
// Choose two different histograms at random and try to combine them.
|
||||
const uint32_t tmp = MyRand(&seed) % rand_range;
|
||||
uint32_t idx1 = tmp / (*num_used - 1);
|
||||
@ -1034,7 +1026,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
|
||||
*do_greedy = (*num_used <= min_cluster_size);
|
||||
ok = 1;
|
||||
|
||||
End:
|
||||
End:
|
||||
HistoQueueClear(&histo_queue);
|
||||
WebPSafeFree(mappings);
|
||||
return ok;
|
||||
@ -1057,7 +1049,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
|
||||
if (out_size > 1) {
|
||||
for (i = 0; i < in_size; ++i) {
|
||||
int best_out = 0;
|
||||
double best_bits = MAX_COST;
|
||||
float best_bits = MAX_BIT_COST;
|
||||
int k;
|
||||
if (in_histo[i] == NULL) {
|
||||
// Arbitrarily set to the previous value if unused to help future LZ77.
|
||||
@ -1065,7 +1057,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
|
||||
continue;
|
||||
}
|
||||
for (k = 0; k < out_size; ++k) {
|
||||
double cur_bits;
|
||||
float cur_bits;
|
||||
cur_bits = HistogramAddThresh(out_histo[k], in_histo[i], best_bits);
|
||||
if (k == 0 || cur_bits < best_bits) {
|
||||
best_bits = cur_bits;
|
||||
@ -1093,13 +1085,13 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
|
||||
}
|
||||
}
|
||||
|
||||
static double GetCombineCostFactor(int histo_size, int quality) {
|
||||
double combine_cost_factor = 0.16;
|
||||
static float GetCombineCostFactor(int histo_size, int quality) {
|
||||
float combine_cost_factor = 0.16f;
|
||||
if (quality < 90) {
|
||||
if (histo_size > 256) combine_cost_factor /= 2.;
|
||||
if (histo_size > 512) combine_cost_factor /= 2.;
|
||||
if (histo_size > 1024) combine_cost_factor /= 2.;
|
||||
if (quality <= 50) combine_cost_factor /= 2.;
|
||||
if (histo_size > 256) combine_cost_factor /= 2.f;
|
||||
if (histo_size > 512) combine_cost_factor /= 2.f;
|
||||
if (histo_size > 1024) combine_cost_factor /= 2.f;
|
||||
if (quality <= 50) combine_cost_factor /= 2.f;
|
||||
}
|
||||
return combine_cost_factor;
|
||||
}
|
||||
@ -1169,15 +1161,17 @@ static void RemoveEmptyHistograms(VP8LHistogramSet* const image_histo) {
|
||||
}
|
||||
|
||||
int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
const VP8LBackwardRefs* const refs,
|
||||
int quality, int low_effort,
|
||||
int histo_bits, int cache_bits,
|
||||
const VP8LBackwardRefs* const refs, int quality,
|
||||
int low_effort, int histogram_bits, int cache_bits,
|
||||
VP8LHistogramSet* const image_histo,
|
||||
VP8LHistogram* const tmp_histo,
|
||||
uint16_t* const histogram_symbols) {
|
||||
int ok = 0;
|
||||
const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
|
||||
const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
|
||||
uint16_t* const histogram_symbols,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent) {
|
||||
const int histo_xsize =
|
||||
histogram_bits ? VP8LSubSampleSize(xsize, histogram_bits) : 1;
|
||||
const int histo_ysize =
|
||||
histogram_bits ? VP8LSubSampleSize(ysize, histogram_bits) : 1;
|
||||
const int image_histo_raw_size = histo_xsize * histo_ysize;
|
||||
VP8LHistogramSet* const orig_histo =
|
||||
VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits);
|
||||
@ -1187,13 +1181,16 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
|
||||
int entropy_combine;
|
||||
uint16_t* const map_tmp =
|
||||
WebPSafeMalloc(2 * image_histo_raw_size, sizeof(map_tmp));
|
||||
WebPSafeMalloc(2 * image_histo_raw_size, sizeof(*map_tmp));
|
||||
uint16_t* const cluster_mappings = map_tmp + image_histo_raw_size;
|
||||
int num_used = image_histo_raw_size;
|
||||
if (orig_histo == NULL || map_tmp == NULL) goto Error;
|
||||
if (orig_histo == NULL || map_tmp == NULL) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
goto Error;
|
||||
}
|
||||
|
||||
// Construct the histograms from backward references.
|
||||
HistogramBuild(xsize, histo_bits, refs, orig_histo);
|
||||
HistogramBuild(xsize, histogram_bits, refs, orig_histo);
|
||||
// Copies the histograms and computes its bit_cost.
|
||||
// histogram_symbols is optimized
|
||||
HistogramCopyAndAnalyze(orig_histo, image_histo, &num_used,
|
||||
@ -1204,16 +1201,15 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
|
||||
if (entropy_combine) {
|
||||
uint16_t* const bin_map = map_tmp;
|
||||
const double combine_cost_factor =
|
||||
const float combine_cost_factor =
|
||||
GetCombineCostFactor(image_histo_raw_size, quality);
|
||||
const uint32_t num_clusters = num_used;
|
||||
|
||||
HistogramAnalyzeEntropyBin(image_histo, bin_map, low_effort);
|
||||
// Collapse histograms with similar entropy.
|
||||
HistogramCombineEntropyBin(image_histo, &num_used, histogram_symbols,
|
||||
cluster_mappings, tmp_histo, bin_map,
|
||||
entropy_combine_num_bins, combine_cost_factor,
|
||||
low_effort);
|
||||
HistogramCombineEntropyBin(
|
||||
image_histo, &num_used, histogram_symbols, cluster_mappings, tmp_histo,
|
||||
bin_map, entropy_combine_num_bins, combine_cost_factor, low_effort);
|
||||
OptimizeHistogramSymbols(image_histo, cluster_mappings, num_clusters,
|
||||
map_tmp, histogram_symbols);
|
||||
}
|
||||
@ -1227,11 +1223,13 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
int do_greedy;
|
||||
if (!HistogramCombineStochastic(image_histo, &num_used, threshold_size,
|
||||
&do_greedy)) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
goto Error;
|
||||
}
|
||||
if (do_greedy) {
|
||||
RemoveEmptyHistograms(image_histo);
|
||||
if (!HistogramCombineGreedy(image_histo, &num_used)) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
goto Error;
|
||||
}
|
||||
}
|
||||
@ -1241,10 +1239,12 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
RemoveEmptyHistograms(image_histo);
|
||||
HistogramRemap(orig_histo, image_histo, histogram_symbols);
|
||||
|
||||
ok = 1;
|
||||
if (!WebPReportProgress(pic, *percent + percent_range, percent)) {
|
||||
goto Error;
|
||||
}
|
||||
|
||||
Error:
|
||||
VP8LFreeHistogramSet(orig_histo);
|
||||
WebPSafeFree(map_tmp);
|
||||
return ok;
|
||||
return (pic->error_code == VP8_ENC_OK);
|
||||
}
|
||||
|
30
3rdparty/libwebp/src/enc/histogram_enc.h
vendored
30
3rdparty/libwebp/src/enc/histogram_enc.h
vendored
@ -40,10 +40,10 @@ typedef struct {
|
||||
int palette_code_bits_;
|
||||
uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha
|
||||
// literal symbols are single valued.
|
||||
double bit_cost_; // cached value of bit cost.
|
||||
double literal_cost_; // Cached values of dominant entropy costs:
|
||||
double red_cost_; // literal, red & blue.
|
||||
double blue_cost_;
|
||||
float bit_cost_; // cached value of bit cost.
|
||||
float literal_cost_; // Cached values of dominant entropy costs:
|
||||
float red_cost_; // literal, red & blue.
|
||||
float blue_cost_;
|
||||
uint8_t is_used_[5]; // 5 for literal, red, blue, alpha, distance
|
||||
} VP8LHistogram;
|
||||
|
||||
@ -64,8 +64,8 @@ void VP8LHistogramCreate(VP8LHistogram* const p,
|
||||
const VP8LBackwardRefs* const refs,
|
||||
int palette_code_bits);
|
||||
|
||||
// Return the size of the histogram for a given palette_code_bits.
|
||||
int VP8LGetHistogramSize(int palette_code_bits);
|
||||
// Return the size of the histogram for a given cache_bits.
|
||||
int VP8LGetHistogramSize(int cache_bits);
|
||||
|
||||
// Set the palette_code_bits and reset the stats.
|
||||
// If init_arrays is true, the arrays are also filled with 0's.
|
||||
@ -105,21 +105,23 @@ static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
|
||||
((palette_code_bits > 0) ? (1 << palette_code_bits) : 0);
|
||||
}
|
||||
|
||||
// Builds the histogram image.
|
||||
// Builds the histogram image. pic and percent are for progress.
|
||||
// Returns false in case of error (stored in pic->error_code).
|
||||
int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
const VP8LBackwardRefs* const refs,
|
||||
int quality, int low_effort,
|
||||
int histogram_bits, int cache_bits,
|
||||
VP8LHistogramSet* const image_in,
|
||||
const VP8LBackwardRefs* const refs, int quality,
|
||||
int low_effort, int histogram_bits, int cache_bits,
|
||||
VP8LHistogramSet* const image_histo,
|
||||
VP8LHistogram* const tmp_histo,
|
||||
uint16_t* const histogram_symbols);
|
||||
uint16_t* const histogram_symbols,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent);
|
||||
|
||||
// Returns the entropy for the symbols in the input array.
|
||||
double VP8LBitsEntropy(const uint32_t* const array, int n);
|
||||
float VP8LBitsEntropy(const uint32_t* const array, int n);
|
||||
|
||||
// Estimate how many bits the combined entropy of literals and distance
|
||||
// approximately maps to.
|
||||
double VP8LHistogramEstimateBits(VP8LHistogram* const p);
|
||||
float VP8LHistogramEstimateBits(VP8LHistogram* const p);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
502
3rdparty/libwebp/src/enc/picture_csp_enc.c
vendored
502
3rdparty/libwebp/src/enc/picture_csp_enc.c
vendored
@ -15,12 +15,19 @@
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
#include "sharpyuv/sharpyuv_csp.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/utils/random_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/yuv.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
// Uncomment to disable gamma-compression during RGB->U/V averaging
|
||||
#define USE_GAMMA_COMPRESSION
|
||||
@ -62,10 +69,12 @@ static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
|
||||
int WebPPictureHasTransparency(const WebPPicture* picture) {
|
||||
if (picture == NULL) return 0;
|
||||
if (picture->use_argb) {
|
||||
const int alpha_offset = ALPHA_OFFSET;
|
||||
return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset,
|
||||
picture->width, picture->height,
|
||||
4, picture->argb_stride * sizeof(*picture->argb));
|
||||
if (picture->argb != NULL) {
|
||||
return CheckNonOpaque((const uint8_t*)picture->argb + ALPHA_OFFSET,
|
||||
picture->width, picture->height,
|
||||
4, picture->argb_stride * sizeof(*picture->argb));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return CheckNonOpaque(picture->a, picture->width, picture->height,
|
||||
1, picture->a_stride);
|
||||
@ -76,30 +85,31 @@ int WebPPictureHasTransparency(const WebPPicture* picture) {
|
||||
|
||||
#if defined(USE_GAMMA_COMPRESSION)
|
||||
|
||||
// gamma-compensates loss of resolution during chroma subsampling
|
||||
#define kGamma 0.80 // for now we use a different gamma value than kGammaF
|
||||
#define kGammaFix 12 // fixed-point precision for linear values
|
||||
#define kGammaScale ((1 << kGammaFix) - 1)
|
||||
#define kGammaTabFix 7 // fixed-point fractional bits precision
|
||||
#define kGammaTabScale (1 << kGammaTabFix)
|
||||
#define kGammaTabRounder (kGammaTabScale >> 1)
|
||||
#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
|
||||
// Gamma correction compensates loss of resolution during chroma subsampling.
|
||||
#define GAMMA_FIX 12 // fixed-point precision for linear values
|
||||
#define GAMMA_TAB_FIX 7 // fixed-point fractional bits precision
|
||||
#define GAMMA_TAB_SIZE (1 << (GAMMA_FIX - GAMMA_TAB_FIX))
|
||||
static const double kGamma = 0.80;
|
||||
static const int kGammaScale = ((1 << GAMMA_FIX) - 1);
|
||||
static const int kGammaTabScale = (1 << GAMMA_TAB_FIX);
|
||||
static const int kGammaTabRounder = (1 << GAMMA_TAB_FIX >> 1);
|
||||
|
||||
static int kLinearToGammaTab[kGammaTabSize + 1];
|
||||
static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1];
|
||||
static uint16_t kGammaToLinearTab[256];
|
||||
static volatile int kGammaTablesOk = 0;
|
||||
static void InitGammaTables(void);
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
|
||||
WEBP_DSP_INIT_FUNC(InitGammaTables) {
|
||||
if (!kGammaTablesOk) {
|
||||
int v;
|
||||
const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
|
||||
const double scale = (double)(1 << GAMMA_TAB_FIX) / kGammaScale;
|
||||
const double norm = 1. / 255.;
|
||||
for (v = 0; v <= 255; ++v) {
|
||||
kGammaToLinearTab[v] =
|
||||
(uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
|
||||
}
|
||||
for (v = 0; v <= kGammaTabSize; ++v) {
|
||||
for (v = 0; v <= GAMMA_TAB_SIZE; ++v) {
|
||||
kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
|
||||
}
|
||||
kGammaTablesOk = 1;
|
||||
@ -111,12 +121,12 @@ static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
|
||||
}
|
||||
|
||||
static WEBP_INLINE int Interpolate(int v) {
|
||||
const int tab_pos = v >> (kGammaTabFix + 2); // integer part
|
||||
const int tab_pos = v >> (GAMMA_TAB_FIX + 2); // integer part
|
||||
const int x = v & ((kGammaTabScale << 2) - 1); // fractional part
|
||||
const int v0 = kLinearToGammaTab[tab_pos];
|
||||
const int v1 = kLinearToGammaTab[tab_pos + 1];
|
||||
const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate
|
||||
assert(tab_pos + 1 < kGammaTabSize + 1);
|
||||
assert(tab_pos + 1 < GAMMA_TAB_SIZE + 1);
|
||||
return y;
|
||||
}
|
||||
|
||||
@ -124,7 +134,7 @@ static WEBP_INLINE int Interpolate(int v) {
|
||||
// U/V value, suitable for RGBToU/V calls.
|
||||
static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
|
||||
const int y = Interpolate(base_value << shift); // final uplifted value
|
||||
return (y + kGammaTabRounder) >> kGammaTabFix; // descale
|
||||
return (y + kGammaTabRounder) >> GAMMA_TAB_FIX; // descale
|
||||
}
|
||||
|
||||
#else
|
||||
@ -158,415 +168,26 @@ static int RGBToV(int r, int g, int b, VP8Random* const rg) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Sharp RGB->YUV conversion
|
||||
|
||||
static const int kNumIterations = 4;
|
||||
static const int kMinDimensionIterativeConversion = 4;
|
||||
|
||||
// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
|
||||
// banding sometimes. Better use extra precision.
|
||||
#define SFIX 2 // fixed-point precision of RGB and Y/W
|
||||
typedef int16_t fixed_t; // signed type with extra SFIX precision for UV
|
||||
typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W
|
||||
|
||||
#define SHALF (1 << SFIX >> 1)
|
||||
#define MAX_Y_T ((256 << SFIX) - 1)
|
||||
#define SROUNDER (1 << (YUV_FIX + SFIX - 1))
|
||||
|
||||
#if defined(USE_GAMMA_COMPRESSION)
|
||||
|
||||
// We use tables of different size and precision for the Rec709 / BT2020
|
||||
// transfer function.
|
||||
#define kGammaF (1./0.45)
|
||||
static uint32_t kLinearToGammaTabS[kGammaTabSize + 2];
|
||||
#define GAMMA_TO_LINEAR_BITS 14
|
||||
static uint32_t kGammaToLinearTabS[MAX_Y_T + 1]; // size scales with Y_FIX
|
||||
static volatile int kGammaTablesSOk = 0;
|
||||
static void InitGammaTablesS(void);
|
||||
|
||||
WEBP_DSP_INIT_FUNC(InitGammaTablesS) {
|
||||
assert(2 * GAMMA_TO_LINEAR_BITS < 32); // we use uint32_t intermediate values
|
||||
if (!kGammaTablesSOk) {
|
||||
int v;
|
||||
const double norm = 1. / MAX_Y_T;
|
||||
const double scale = 1. / kGammaTabSize;
|
||||
const double a = 0.09929682680944;
|
||||
const double thresh = 0.018053968510807;
|
||||
const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
|
||||
for (v = 0; v <= MAX_Y_T; ++v) {
|
||||
const double g = norm * v;
|
||||
double value;
|
||||
if (g <= thresh * 4.5) {
|
||||
value = g / 4.5;
|
||||
} else {
|
||||
const double a_rec = 1. / (1. + a);
|
||||
value = pow(a_rec * (g + a), kGammaF);
|
||||
}
|
||||
kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
|
||||
}
|
||||
for (v = 0; v <= kGammaTabSize; ++v) {
|
||||
const double g = scale * v;
|
||||
double value;
|
||||
if (g <= thresh) {
|
||||
value = 4.5 * g;
|
||||
} else {
|
||||
value = (1. + a) * pow(g, 1. / kGammaF) - a;
|
||||
}
|
||||
// we already incorporate the 1/2 rounding constant here
|
||||
kLinearToGammaTabS[v] =
|
||||
(uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1);
|
||||
}
|
||||
// to prevent small rounding errors to cause read-overflow:
|
||||
kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize];
|
||||
kGammaTablesSOk = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS
|
||||
static WEBP_INLINE uint32_t GammaToLinearS(int v) {
|
||||
return kGammaToLinearTabS[v];
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
|
||||
// 'value' is in GAMMA_TO_LINEAR_BITS fractional precision
|
||||
const uint32_t v = value * kGammaTabSize;
|
||||
const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS;
|
||||
// fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision
|
||||
const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS); // fractional part
|
||||
// v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1])
|
||||
const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0];
|
||||
const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1];
|
||||
// Final interpolation. Note that rounding is already included.
|
||||
const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0.
|
||||
const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS);
|
||||
return result;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void InitGammaTablesS(void) {}
|
||||
static WEBP_INLINE uint32_t GammaToLinearS(int v) {
|
||||
return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T;
|
||||
}
|
||||
static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
|
||||
return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS;
|
||||
}
|
||||
|
||||
#endif // USE_GAMMA_COMPRESSION
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static uint8_t clip_8b(fixed_t v) {
|
||||
return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
|
||||
}
|
||||
|
||||
static fixed_y_t clip_y(int y) {
|
||||
return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int RGBToGray(int r, int g, int b) {
|
||||
const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF;
|
||||
return (luma >> YUV_FIX);
|
||||
}
|
||||
|
||||
static uint32_t ScaleDown(int a, int b, int c, int d) {
|
||||
const uint32_t A = GammaToLinearS(a);
|
||||
const uint32_t B = GammaToLinearS(b);
|
||||
const uint32_t C = GammaToLinearS(c);
|
||||
const uint32_t D = GammaToLinearS(d);
|
||||
return LinearToGammaS((A + B + C + D + 2) >> 2);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) {
|
||||
int i;
|
||||
for (i = 0; i < w; ++i) {
|
||||
const uint32_t R = GammaToLinearS(src[0 * w + i]);
|
||||
const uint32_t G = GammaToLinearS(src[1 * w + i]);
|
||||
const uint32_t B = GammaToLinearS(src[2 * w + i]);
|
||||
const uint32_t Y = RGBToGray(R, G, B);
|
||||
dst[i] = (fixed_y_t)LinearToGammaS(Y);
|
||||
}
|
||||
}
|
||||
|
||||
static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
|
||||
fixed_t* dst, int uv_w) {
|
||||
int i;
|
||||
for (i = 0; i < uv_w; ++i) {
|
||||
const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1],
|
||||
src2[0 * uv_w + 0], src2[0 * uv_w + 1]);
|
||||
const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1],
|
||||
src2[2 * uv_w + 0], src2[2 * uv_w + 1]);
|
||||
const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1],
|
||||
src2[4 * uv_w + 0], src2[4 * uv_w + 1]);
|
||||
const int W = RGBToGray(r, g, b);
|
||||
dst[0 * uv_w] = (fixed_t)(r - W);
|
||||
dst[1 * uv_w] = (fixed_t)(g - W);
|
||||
dst[2 * uv_w] = (fixed_t)(b - W);
|
||||
dst += 1;
|
||||
src1 += 2;
|
||||
src2 += 2;
|
||||
}
|
||||
}
|
||||
|
||||
static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
|
||||
int i;
|
||||
for (i = 0; i < w; ++i) {
|
||||
y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) {
|
||||
const int v0 = (A * 3 + B + 2) >> 2;
|
||||
return clip_y(v0 + W0);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { // 8bit -> SFIX
|
||||
return ((fixed_y_t)a << SFIX) | SHALF;
|
||||
}
|
||||
|
||||
static void ImportOneRow(const uint8_t* const r_ptr,
|
||||
const uint8_t* const g_ptr,
|
||||
const uint8_t* const b_ptr,
|
||||
int step,
|
||||
int pic_width,
|
||||
fixed_y_t* const dst) {
|
||||
int i;
|
||||
const int w = (pic_width + 1) & ~1;
|
||||
for (i = 0; i < pic_width; ++i) {
|
||||
const int off = i * step;
|
||||
dst[i + 0 * w] = UpLift(r_ptr[off]);
|
||||
dst[i + 1 * w] = UpLift(g_ptr[off]);
|
||||
dst[i + 2 * w] = UpLift(b_ptr[off]);
|
||||
}
|
||||
if (pic_width & 1) { // replicate rightmost pixel
|
||||
dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
|
||||
dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
|
||||
dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
|
||||
}
|
||||
}
|
||||
|
||||
static void InterpolateTwoRows(const fixed_y_t* const best_y,
|
||||
const fixed_t* prev_uv,
|
||||
const fixed_t* cur_uv,
|
||||
const fixed_t* next_uv,
|
||||
int w,
|
||||
fixed_y_t* out1,
|
||||
fixed_y_t* out2) {
|
||||
const int uv_w = w >> 1;
|
||||
const int len = (w - 1) >> 1; // length to filter
|
||||
int k = 3;
|
||||
while (k-- > 0) { // process each R/G/B segments in turn
|
||||
// special boundary case for i==0
|
||||
out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]);
|
||||
out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]);
|
||||
|
||||
WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1);
|
||||
WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1);
|
||||
|
||||
// special boundary case for i == w - 1 when w is even
|
||||
if (!(w & 1)) {
|
||||
out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
|
||||
best_y[w - 1 + 0]);
|
||||
out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
|
||||
best_y[w - 1 + w]);
|
||||
}
|
||||
out1 += w;
|
||||
out2 += w;
|
||||
prev_uv += uv_w;
|
||||
cur_uv += uv_w;
|
||||
next_uv += uv_w;
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
|
||||
const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER;
|
||||
return clip_8b(16 + (luma >> (YUV_FIX + SFIX)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
|
||||
const int u = -9719 * r - 19081 * g + 28800 * b + SROUNDER;
|
||||
return clip_8b(128 + (u >> (YUV_FIX + SFIX)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
|
||||
const int v = +28800 * r - 24116 * g - 4684 * b + SROUNDER;
|
||||
return clip_8b(128 + (v >> (YUV_FIX + SFIX)));
|
||||
}
|
||||
|
||||
static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
|
||||
WebPPicture* const picture) {
|
||||
int i, j;
|
||||
uint8_t* dst_y = picture->y;
|
||||
uint8_t* dst_u = picture->u;
|
||||
uint8_t* dst_v = picture->v;
|
||||
const fixed_t* const best_uv_base = best_uv;
|
||||
const int w = (picture->width + 1) & ~1;
|
||||
const int h = (picture->height + 1) & ~1;
|
||||
const int uv_w = w >> 1;
|
||||
const int uv_h = h >> 1;
|
||||
for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) {
|
||||
for (i = 0; i < picture->width; ++i) {
|
||||
const int off = (i >> 1);
|
||||
const int W = best_y[i];
|
||||
const int r = best_uv[off + 0 * uv_w] + W;
|
||||
const int g = best_uv[off + 1 * uv_w] + W;
|
||||
const int b = best_uv[off + 2 * uv_w] + W;
|
||||
dst_y[i] = ConvertRGBToY(r, g, b);
|
||||
}
|
||||
best_y += w;
|
||||
best_uv += (j & 1) * 3 * uv_w;
|
||||
dst_y += picture->y_stride;
|
||||
}
|
||||
for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
|
||||
for (i = 0; i < uv_w; ++i) {
|
||||
const int off = i;
|
||||
const int r = best_uv[off + 0 * uv_w];
|
||||
const int g = best_uv[off + 1 * uv_w];
|
||||
const int b = best_uv[off + 2 * uv_w];
|
||||
dst_u[i] = ConvertRGBToU(r, g, b);
|
||||
dst_v[i] = ConvertRGBToV(r, g, b);
|
||||
}
|
||||
best_uv += 3 * uv_w;
|
||||
dst_u += picture->uv_stride;
|
||||
dst_v += picture->uv_stride;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main function
|
||||
|
||||
#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T)))
|
||||
|
||||
static int PreprocessARGB(const uint8_t* r_ptr,
|
||||
const uint8_t* g_ptr,
|
||||
const uint8_t* b_ptr,
|
||||
int step, int rgb_stride,
|
||||
WebPPicture* const picture) {
|
||||
// we expand the right/bottom border if needed
|
||||
const int w = (picture->width + 1) & ~1;
|
||||
const int h = (picture->height + 1) & ~1;
|
||||
const int uv_w = w >> 1;
|
||||
const int uv_h = h >> 1;
|
||||
uint64_t prev_diff_y_sum = ~0;
|
||||
int j, iter;
|
||||
|
||||
// TODO(skal): allocate one big memory chunk. But for now, it's easier
|
||||
// for valgrind debugging to have several chunks.
|
||||
fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch
|
||||
fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
|
||||
fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
|
||||
fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
|
||||
fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
|
||||
fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
|
||||
fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
|
||||
fixed_y_t* best_y = best_y_base;
|
||||
fixed_y_t* target_y = target_y_base;
|
||||
fixed_t* best_uv = best_uv_base;
|
||||
fixed_t* target_uv = target_uv_base;
|
||||
const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
|
||||
int ok;
|
||||
|
||||
if (best_y_base == NULL || best_uv_base == NULL ||
|
||||
target_y_base == NULL || target_uv_base == NULL ||
|
||||
best_rgb_y == NULL || best_rgb_uv == NULL ||
|
||||
tmp_buffer == NULL) {
|
||||
ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
goto End;
|
||||
const int ok = SharpYuvConvert(
|
||||
r_ptr, g_ptr, b_ptr, step, rgb_stride, /*rgb_bit_depth=*/8,
|
||||
picture->y, picture->y_stride, picture->u, picture->uv_stride, picture->v,
|
||||
picture->uv_stride, /*yuv_bit_depth=*/8, picture->width,
|
||||
picture->height, SharpYuvGetConversionMatrix(kSharpYuvMatrixWebp));
|
||||
if (!ok) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
assert(picture->width >= kMinDimensionIterativeConversion);
|
||||
assert(picture->height >= kMinDimensionIterativeConversion);
|
||||
|
||||
WebPInitConvertARGBToYUV();
|
||||
|
||||
// Import RGB samples to W/RGB representation.
|
||||
for (j = 0; j < picture->height; j += 2) {
|
||||
const int is_last_row = (j == picture->height - 1);
|
||||
fixed_y_t* const src1 = tmp_buffer + 0 * w;
|
||||
fixed_y_t* const src2 = tmp_buffer + 3 * w;
|
||||
|
||||
// prepare two rows of input
|
||||
ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1);
|
||||
if (!is_last_row) {
|
||||
ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
|
||||
step, picture->width, src2);
|
||||
} else {
|
||||
memcpy(src2, src1, 3 * w * sizeof(*src2));
|
||||
}
|
||||
StoreGray(src1, best_y + 0, w);
|
||||
StoreGray(src2, best_y + w, w);
|
||||
|
||||
UpdateW(src1, target_y, w);
|
||||
UpdateW(src2, target_y + w, w);
|
||||
UpdateChroma(src1, src2, target_uv, uv_w);
|
||||
memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
|
||||
best_y += 2 * w;
|
||||
best_uv += 3 * uv_w;
|
||||
target_y += 2 * w;
|
||||
target_uv += 3 * uv_w;
|
||||
r_ptr += 2 * rgb_stride;
|
||||
g_ptr += 2 * rgb_stride;
|
||||
b_ptr += 2 * rgb_stride;
|
||||
}
|
||||
|
||||
// Iterate and resolve clipping conflicts.
|
||||
for (iter = 0; iter < kNumIterations; ++iter) {
|
||||
const fixed_t* cur_uv = best_uv_base;
|
||||
const fixed_t* prev_uv = best_uv_base;
|
||||
uint64_t diff_y_sum = 0;
|
||||
|
||||
best_y = best_y_base;
|
||||
best_uv = best_uv_base;
|
||||
target_y = target_y_base;
|
||||
target_uv = target_uv_base;
|
||||
for (j = 0; j < h; j += 2) {
|
||||
fixed_y_t* const src1 = tmp_buffer + 0 * w;
|
||||
fixed_y_t* const src2 = tmp_buffer + 3 * w;
|
||||
{
|
||||
const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
|
||||
InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2);
|
||||
prev_uv = cur_uv;
|
||||
cur_uv = next_uv;
|
||||
}
|
||||
|
||||
UpdateW(src1, best_rgb_y + 0 * w, w);
|
||||
UpdateW(src2, best_rgb_y + 1 * w, w);
|
||||
UpdateChroma(src1, src2, best_rgb_uv, uv_w);
|
||||
|
||||
// update two rows of Y and one row of RGB
|
||||
diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w);
|
||||
WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
|
||||
|
||||
best_y += 2 * w;
|
||||
best_uv += 3 * uv_w;
|
||||
target_y += 2 * w;
|
||||
target_uv += 3 * uv_w;
|
||||
}
|
||||
// test exit condition
|
||||
if (iter > 0) {
|
||||
if (diff_y_sum < diff_y_threshold) break;
|
||||
if (diff_y_sum > prev_diff_y_sum) break;
|
||||
}
|
||||
prev_diff_y_sum = diff_y_sum;
|
||||
}
|
||||
// final reconstruction
|
||||
ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture);
|
||||
|
||||
End:
|
||||
WebPSafeFree(best_y_base);
|
||||
WebPSafeFree(best_uv_base);
|
||||
WebPSafeFree(target_y_base);
|
||||
WebPSafeFree(target_uv_base);
|
||||
WebPSafeFree(best_rgb_y);
|
||||
WebPSafeFree(best_rgb_uv);
|
||||
WebPSafeFree(tmp_buffer);
|
||||
return ok;
|
||||
}
|
||||
#undef SAFE_ALLOC
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// "Fast" regular RGB->YUV
|
||||
@ -591,8 +212,8 @@ static const int kAlphaFix = 19;
|
||||
// and constant are adjusted very tightly to fit 32b arithmetic.
|
||||
// In particular, they use the fact that the operands for 'v / a' are actually
|
||||
// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
|
||||
// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
|
||||
// overflow is: kGammaFix + kAlphaFix <= 31.
|
||||
// with ai in [0..255] and pi in [0..1<<GAMMA_FIX). The constraint to avoid
|
||||
// overflow is: GAMMA_FIX + kAlphaFix <= 31.
|
||||
static const uint32_t kInvAlpha[4 * 0xff + 1] = {
|
||||
0, /* alpha = 0 */
|
||||
524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
|
||||
@ -818,11 +439,20 @@ static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
|
||||
dst[0] = SUM4(r_ptr + j, step);
|
||||
dst[1] = SUM4(g_ptr + j, step);
|
||||
dst[2] = SUM4(b_ptr + j, step);
|
||||
// MemorySanitizer may raise false positives with data that passes through
|
||||
// RGBA32PackedToPlanar_16b_SSE41() due to incorrect modeling of shuffles.
|
||||
// See https://crbug.com/webp/573.
|
||||
#ifdef WEBP_MSAN
|
||||
dst[3] = 0;
|
||||
#endif
|
||||
}
|
||||
if (width & 1) {
|
||||
dst[0] = SUM2(r_ptr + j);
|
||||
dst[1] = SUM2(g_ptr + j);
|
||||
dst[2] = SUM2(b_ptr + j);
|
||||
#ifdef WEBP_MSAN
|
||||
dst[3] = 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -839,6 +469,8 @@ static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb,
|
||||
}
|
||||
}
|
||||
|
||||
extern void SharpYuvInit(VP8CPUInfo cpu_info_func);
|
||||
|
||||
static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
|
||||
const uint8_t* g_ptr,
|
||||
const uint8_t* b_ptr,
|
||||
@ -863,18 +495,18 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
|
||||
use_iterative_conversion = 0;
|
||||
}
|
||||
|
||||
if (!WebPPictureAllocYUVA(picture, width, height)) {
|
||||
if (!WebPPictureAllocYUVA(picture)) {
|
||||
return 0;
|
||||
}
|
||||
if (has_alpha) {
|
||||
assert(step == 4);
|
||||
#if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
|
||||
assert(kAlphaFix + kGammaFix <= 31);
|
||||
assert(kAlphaFix + GAMMA_FIX <= 31);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (use_iterative_conversion) {
|
||||
InitGammaTablesS();
|
||||
SharpYuvInit(VP8GetCPUInfo);
|
||||
if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
|
||||
return 0;
|
||||
}
|
||||
@ -903,7 +535,9 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
|
||||
WebPInitConvertARGBToYUV();
|
||||
InitGammaTables();
|
||||
|
||||
if (tmp_rgb == NULL) return 0; // malloc error
|
||||
if (tmp_rgb == NULL) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
|
||||
// Downsample Y/U/V planes, two rows at a time
|
||||
for (y = 0; y < (height >> 1); ++y) {
|
||||
@ -1044,7 +678,7 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
|
||||
}
|
||||
// Allocate a new argb buffer (discarding the previous one).
|
||||
if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0;
|
||||
if (!WebPPictureAllocARGB(picture)) return 0;
|
||||
picture->use_argb = 1;
|
||||
|
||||
// Convert
|
||||
@ -1106,6 +740,8 @@ static int Import(WebPPicture* const picture,
|
||||
const int width = picture->width;
|
||||
const int height = picture->height;
|
||||
|
||||
if (abs(rgb_stride) < (import_alpha ? 4 : 3) * width) return 0;
|
||||
|
||||
if (!picture->use_argb) {
|
||||
const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL;
|
||||
return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
|
||||
@ -1163,24 +799,24 @@ static int Import(WebPPicture* const picture,
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
|
||||
int WebPPictureImportBGR(WebPPicture* picture,
|
||||
const uint8_t* rgb, int rgb_stride) {
|
||||
return (picture != NULL && rgb != NULL)
|
||||
? Import(picture, rgb, rgb_stride, 3, 1, 0)
|
||||
const uint8_t* bgr, int bgr_stride) {
|
||||
return (picture != NULL && bgr != NULL)
|
||||
? Import(picture, bgr, bgr_stride, 3, 1, 0)
|
||||
: 0;
|
||||
}
|
||||
|
||||
int WebPPictureImportBGRA(WebPPicture* picture,
|
||||
const uint8_t* rgba, int rgba_stride) {
|
||||
return (picture != NULL && rgba != NULL)
|
||||
? Import(picture, rgba, rgba_stride, 4, 1, 1)
|
||||
const uint8_t* bgra, int bgra_stride) {
|
||||
return (picture != NULL && bgra != NULL)
|
||||
? Import(picture, bgra, bgra_stride, 4, 1, 1)
|
||||
: 0;
|
||||
}
|
||||
|
||||
|
||||
int WebPPictureImportBGRX(WebPPicture* picture,
|
||||
const uint8_t* rgba, int rgba_stride) {
|
||||
return (picture != NULL && rgba != NULL)
|
||||
? Import(picture, rgba, rgba_stride, 4, 1, 0)
|
||||
const uint8_t* bgrx, int bgrx_stride) {
|
||||
return (picture != NULL && bgrx != NULL)
|
||||
? Import(picture, bgrx, bgrx_stride, 4, 1, 0)
|
||||
: 0;
|
||||
}
|
||||
|
||||
@ -1201,9 +837,9 @@ int WebPPictureImportRGBA(WebPPicture* picture,
|
||||
}
|
||||
|
||||
int WebPPictureImportRGBX(WebPPicture* picture,
|
||||
const uint8_t* rgba, int rgba_stride) {
|
||||
return (picture != NULL && rgba != NULL)
|
||||
? Import(picture, rgba, rgba_stride, 4, 0, 0)
|
||||
const uint8_t* rgbx, int rgbx_stride) {
|
||||
return (picture != NULL && rgbx != NULL)
|
||||
? Import(picture, rgbx, rgbx_stride, 4, 0, 0)
|
||||
: 0;
|
||||
}
|
||||
|
||||
|
46
3rdparty/libwebp/src/enc/picture_enc.c
vendored
46
3rdparty/libwebp/src/enc/picture_enc.c
vendored
@ -12,10 +12,10 @@
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -45,6 +45,22 @@ int WebPPictureInitInternal(WebPPicture* picture, int version) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
int WebPValidatePicture(const WebPPicture* const picture) {
|
||||
if (picture == NULL) return 0;
|
||||
if (picture->width <= 0 || picture->height <= 0) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
if (picture->width <= 0 || picture->width / 4 > INT_MAX / 4 ||
|
||||
picture->height <= 0 || picture->height / 4 > INT_MAX / 4) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
if (picture->colorspace != WEBP_YUV420 &&
|
||||
picture->colorspace != WEBP_YUV420A) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void WebPPictureResetBufferARGB(WebPPicture* const picture) {
|
||||
picture->memory_argb_ = NULL;
|
||||
picture->argb = NULL;
|
||||
@ -63,18 +79,17 @@ void WebPPictureResetBuffers(WebPPicture* const picture) {
|
||||
WebPPictureResetBufferYUVA(picture);
|
||||
}
|
||||
|
||||
int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) {
|
||||
int WebPPictureAllocARGB(WebPPicture* const picture) {
|
||||
void* memory;
|
||||
const int width = picture->width;
|
||||
const int height = picture->height;
|
||||
const uint64_t argb_size = (uint64_t)width * height;
|
||||
|
||||
assert(picture != NULL);
|
||||
if (!WebPValidatePicture(picture)) return 0;
|
||||
|
||||
WebPSafeFree(picture->memory_argb_);
|
||||
WebPPictureResetBufferARGB(picture);
|
||||
|
||||
if (width <= 0 || height <= 0) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
// allocate a new buffer.
|
||||
memory = WebPSafeMalloc(argb_size + WEBP_ALIGN_CST, sizeof(*picture->argb));
|
||||
if (memory == NULL) {
|
||||
@ -86,10 +101,10 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
|
||||
const WebPEncCSP uv_csp =
|
||||
(WebPEncCSP)((int)picture->colorspace & WEBP_CSP_UV_MASK);
|
||||
int WebPPictureAllocYUVA(WebPPicture* const picture) {
|
||||
const int has_alpha = (int)picture->colorspace & WEBP_CSP_ALPHA_BIT;
|
||||
const int width = picture->width;
|
||||
const int height = picture->height;
|
||||
const int y_stride = width;
|
||||
const int uv_width = (int)(((int64_t)width + 1) >> 1);
|
||||
const int uv_height = (int)(((int64_t)height + 1) >> 1);
|
||||
@ -98,15 +113,11 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
|
||||
uint64_t y_size, uv_size, a_size, total_size;
|
||||
uint8_t* mem;
|
||||
|
||||
assert(picture != NULL);
|
||||
if (!WebPValidatePicture(picture)) return 0;
|
||||
|
||||
WebPSafeFree(picture->memory_);
|
||||
WebPPictureResetBufferYUVA(picture);
|
||||
|
||||
if (uv_csp != WEBP_YUV420) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
|
||||
}
|
||||
|
||||
// alpha
|
||||
a_width = has_alpha ? width : 0;
|
||||
a_stride = a_width;
|
||||
@ -152,15 +163,12 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
|
||||
|
||||
int WebPPictureAlloc(WebPPicture* picture) {
|
||||
if (picture != NULL) {
|
||||
const int width = picture->width;
|
||||
const int height = picture->height;
|
||||
|
||||
WebPPictureFree(picture); // erase previous buffer
|
||||
|
||||
if (!picture->use_argb) {
|
||||
return WebPPictureAllocYUVA(picture, width, height);
|
||||
return WebPPictureAllocYUVA(picture);
|
||||
} else {
|
||||
return WebPPictureAllocARGB(picture, width, height);
|
||||
return WebPPictureAllocARGB(picture);
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
|
119
3rdparty/libwebp/src/enc/picture_rescale_enc.c
vendored
119
3rdparty/libwebp/src/enc/picture_rescale_enc.c
vendored
@ -13,14 +13,15 @@
|
||||
|
||||
#include "src/webp/encode.h"
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
#include "src/utils/rescaler_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#endif // !defined(WEBP_REDUCE_SIZE)
|
||||
|
||||
#define HALVE(x) (((x) + 1) >> 1)
|
||||
|
||||
@ -56,6 +57,7 @@ static int AdjustAndCheckRectangle(const WebPPicture* const pic,
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
|
||||
if (src == NULL || dst == NULL) return 0;
|
||||
if (src == dst) return 1;
|
||||
@ -81,6 +83,7 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#endif // !defined(WEBP_REDUCE_SIZE)
|
||||
|
||||
int WebPPictureIsView(const WebPPicture* picture) {
|
||||
if (picture == NULL) return 0;
|
||||
@ -120,6 +123,7 @@ int WebPPictureView(const WebPPicture* src,
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if !defined(WEBP_REDUCE_SIZE)
|
||||
//------------------------------------------------------------------------------
|
||||
// Picture cropping
|
||||
|
||||
@ -133,7 +137,9 @@ int WebPPictureCrop(WebPPicture* pic,
|
||||
PictureGrabSpecs(pic, &tmp);
|
||||
tmp.width = width;
|
||||
tmp.height = height;
|
||||
if (!WebPPictureAlloc(&tmp)) return 0;
|
||||
if (!WebPPictureAlloc(&tmp)) {
|
||||
return WebPEncodingSetError(pic, tmp.error_code);
|
||||
}
|
||||
|
||||
if (!pic->use_argb) {
|
||||
const int y_offset = top * pic->y_stride + left;
|
||||
@ -164,22 +170,25 @@ int WebPPictureCrop(WebPPicture* pic,
|
||||
//------------------------------------------------------------------------------
|
||||
// Simple picture rescaler
|
||||
|
||||
static void RescalePlane(const uint8_t* src,
|
||||
int src_width, int src_height, int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width, int dst_height, int dst_stride,
|
||||
rescaler_t* const work,
|
||||
int num_channels) {
|
||||
static int RescalePlane(const uint8_t* src,
|
||||
int src_width, int src_height, int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width, int dst_height, int dst_stride,
|
||||
rescaler_t* const work,
|
||||
int num_channels) {
|
||||
WebPRescaler rescaler;
|
||||
int y = 0;
|
||||
WebPRescalerInit(&rescaler, src_width, src_height,
|
||||
dst, dst_width, dst_height, dst_stride,
|
||||
num_channels, work);
|
||||
if (!WebPRescalerInit(&rescaler, src_width, src_height,
|
||||
dst, dst_width, dst_height, dst_stride,
|
||||
num_channels, work)) {
|
||||
return 0;
|
||||
}
|
||||
while (y < src_height) {
|
||||
y += WebPRescalerImport(&rescaler, src_height - y,
|
||||
src + y * src_stride, src_stride);
|
||||
WebPRescalerExport(&rescaler);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void AlphaMultiplyARGB(WebPPicture* const pic, int inverse) {
|
||||
@ -195,73 +204,76 @@ static void AlphaMultiplyY(WebPPicture* const pic, int inverse) {
|
||||
}
|
||||
}
|
||||
|
||||
int WebPPictureRescale(WebPPicture* pic, int width, int height) {
|
||||
int WebPPictureRescale(WebPPicture* picture, int width, int height) {
|
||||
WebPPicture tmp;
|
||||
int prev_width, prev_height;
|
||||
rescaler_t* work;
|
||||
|
||||
if (pic == NULL) return 0;
|
||||
prev_width = pic->width;
|
||||
prev_height = pic->height;
|
||||
if (picture == NULL) return 0;
|
||||
prev_width = picture->width;
|
||||
prev_height = picture->height;
|
||||
if (!WebPRescalerGetScaledDimensions(
|
||||
prev_width, prev_height, &width, &height)) {
|
||||
return 0;
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
|
||||
PictureGrabSpecs(pic, &tmp);
|
||||
PictureGrabSpecs(picture, &tmp);
|
||||
tmp.width = width;
|
||||
tmp.height = height;
|
||||
if (!WebPPictureAlloc(&tmp)) return 0;
|
||||
if (!WebPPictureAlloc(&tmp)) {
|
||||
return WebPEncodingSetError(picture, tmp.error_code);
|
||||
}
|
||||
|
||||
if (!pic->use_argb) {
|
||||
if (!picture->use_argb) {
|
||||
work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
|
||||
if (work == NULL) {
|
||||
WebPPictureFree(&tmp);
|
||||
return 0;
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
// If present, we need to rescale alpha first (for AlphaMultiplyY).
|
||||
if (pic->a != NULL) {
|
||||
if (picture->a != NULL) {
|
||||
WebPInitAlphaProcessing();
|
||||
RescalePlane(pic->a, prev_width, prev_height, pic->a_stride,
|
||||
tmp.a, width, height, tmp.a_stride, work, 1);
|
||||
if (!RescalePlane(picture->a, prev_width, prev_height, picture->a_stride,
|
||||
tmp.a, width, height, tmp.a_stride, work, 1)) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
}
|
||||
|
||||
// We take transparency into account on the luma plane only. That's not
|
||||
// totally exact blending, but still is a good approximation.
|
||||
AlphaMultiplyY(pic, 0);
|
||||
RescalePlane(pic->y, prev_width, prev_height, pic->y_stride,
|
||||
tmp.y, width, height, tmp.y_stride, work, 1);
|
||||
AlphaMultiplyY(picture, 0);
|
||||
if (!RescalePlane(picture->y, prev_width, prev_height, picture->y_stride,
|
||||
tmp.y, width, height, tmp.y_stride, work, 1) ||
|
||||
!RescalePlane(picture->u, HALVE(prev_width), HALVE(prev_height),
|
||||
picture->uv_stride, tmp.u, HALVE(width), HALVE(height),
|
||||
tmp.uv_stride, work, 1) ||
|
||||
!RescalePlane(picture->v, HALVE(prev_width), HALVE(prev_height),
|
||||
picture->uv_stride, tmp.v, HALVE(width), HALVE(height),
|
||||
tmp.uv_stride, work, 1)) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
AlphaMultiplyY(&tmp, 1);
|
||||
|
||||
RescalePlane(pic->u,
|
||||
HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
|
||||
tmp.u,
|
||||
HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
|
||||
RescalePlane(pic->v,
|
||||
HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
|
||||
tmp.v,
|
||||
HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
|
||||
} else {
|
||||
work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
|
||||
if (work == NULL) {
|
||||
WebPPictureFree(&tmp);
|
||||
return 0;
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
// In order to correctly interpolate colors, we need to apply the alpha
|
||||
// weighting first (black-matting), scale the RGB values, and remove
|
||||
// the premultiplication afterward (while preserving the alpha channel).
|
||||
WebPInitAlphaProcessing();
|
||||
AlphaMultiplyARGB(pic, 0);
|
||||
RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height,
|
||||
pic->argb_stride * 4,
|
||||
(uint8_t*)tmp.argb, width, height,
|
||||
tmp.argb_stride * 4,
|
||||
work, 4);
|
||||
AlphaMultiplyARGB(picture, 0);
|
||||
if (!RescalePlane((const uint8_t*)picture->argb, prev_width, prev_height,
|
||||
picture->argb_stride * 4, (uint8_t*)tmp.argb, width,
|
||||
height, tmp.argb_stride * 4, work, 4)) {
|
||||
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
AlphaMultiplyARGB(&tmp, 1);
|
||||
}
|
||||
WebPPictureFree(pic);
|
||||
WebPPictureFree(picture);
|
||||
WebPSafeFree(work);
|
||||
*pic = tmp;
|
||||
*picture = tmp;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -273,23 +285,6 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebPPictureIsView(const WebPPicture* picture) {
|
||||
(void)picture;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebPPictureView(const WebPPicture* src,
|
||||
int left, int top, int width, int height,
|
||||
WebPPicture* dst) {
|
||||
(void)src;
|
||||
(void)left;
|
||||
(void)top;
|
||||
(void)width;
|
||||
(void)height;
|
||||
(void)dst;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebPPictureCrop(WebPPicture* pic,
|
||||
int left, int top, int width, int height) {
|
||||
(void)pic;
|
||||
|
45
3rdparty/libwebp/src/enc/picture_tools_enc.c
vendored
45
3rdparty/libwebp/src/enc/picture_tools_enc.c
vendored
@ -190,27 +190,28 @@ static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) {
|
||||
return (0xff000000u | (r << 16) | (g << 8) | b);
|
||||
}
|
||||
|
||||
void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
|
||||
void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb) {
|
||||
const int red = (background_rgb >> 16) & 0xff;
|
||||
const int green = (background_rgb >> 8) & 0xff;
|
||||
const int blue = (background_rgb >> 0) & 0xff;
|
||||
int x, y;
|
||||
if (pic == NULL) return;
|
||||
if (!pic->use_argb) {
|
||||
const int uv_width = (pic->width >> 1); // omit last pixel during u/v loop
|
||||
if (picture == NULL) return;
|
||||
if (!picture->use_argb) {
|
||||
// omit last pixel during u/v loop
|
||||
const int uv_width = (picture->width >> 1);
|
||||
const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF);
|
||||
// VP8RGBToU/V expects the u/v values summed over four pixels
|
||||
const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
|
||||
const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
|
||||
const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT;
|
||||
uint8_t* y_ptr = pic->y;
|
||||
uint8_t* u_ptr = pic->u;
|
||||
uint8_t* v_ptr = pic->v;
|
||||
uint8_t* a_ptr = pic->a;
|
||||
const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT;
|
||||
uint8_t* y_ptr = picture->y;
|
||||
uint8_t* u_ptr = picture->u;
|
||||
uint8_t* v_ptr = picture->v;
|
||||
uint8_t* a_ptr = picture->a;
|
||||
if (!has_alpha || a_ptr == NULL) return; // nothing to do
|
||||
for (y = 0; y < pic->height; ++y) {
|
||||
for (y = 0; y < picture->height; ++y) {
|
||||
// Luma blending
|
||||
for (x = 0; x < pic->width; ++x) {
|
||||
for (x = 0; x < picture->width; ++x) {
|
||||
const uint8_t alpha = a_ptr[x];
|
||||
if (alpha < 0xff) {
|
||||
y_ptr[x] = BLEND(Y0, y_ptr[x], alpha);
|
||||
@ -219,7 +220,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
|
||||
// Chroma blending every even line
|
||||
if ((y & 1) == 0) {
|
||||
uint8_t* const a_ptr2 =
|
||||
(y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride;
|
||||
(y + 1 == picture->height) ? a_ptr : a_ptr + picture->a_stride;
|
||||
for (x = 0; x < uv_width; ++x) {
|
||||
// Average four alpha values into a single blending weight.
|
||||
// TODO(skal): might lead to visible contouring. Can we do better?
|
||||
@ -229,24 +230,24 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
|
||||
u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha);
|
||||
v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha);
|
||||
}
|
||||
if (pic->width & 1) { // rightmost pixel
|
||||
if (picture->width & 1) { // rightmost pixel
|
||||
const uint32_t alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]);
|
||||
u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha);
|
||||
v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha);
|
||||
}
|
||||
} else {
|
||||
u_ptr += pic->uv_stride;
|
||||
v_ptr += pic->uv_stride;
|
||||
u_ptr += picture->uv_stride;
|
||||
v_ptr += picture->uv_stride;
|
||||
}
|
||||
memset(a_ptr, 0xff, pic->width); // reset alpha value to opaque
|
||||
a_ptr += pic->a_stride;
|
||||
y_ptr += pic->y_stride;
|
||||
memset(a_ptr, 0xff, picture->width); // reset alpha value to opaque
|
||||
a_ptr += picture->a_stride;
|
||||
y_ptr += picture->y_stride;
|
||||
}
|
||||
} else {
|
||||
uint32_t* argb = pic->argb;
|
||||
uint32_t* argb = picture->argb;
|
||||
const uint32_t background = MakeARGB32(red, green, blue);
|
||||
for (y = 0; y < pic->height; ++y) {
|
||||
for (x = 0; x < pic->width; ++x) {
|
||||
for (y = 0; y < picture->height; ++y) {
|
||||
for (x = 0; x < picture->width; ++x) {
|
||||
const int alpha = (argb[x] >> 24) & 0xff;
|
||||
if (alpha != 0xff) {
|
||||
if (alpha > 0) {
|
||||
@ -262,7 +263,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
|
||||
}
|
||||
}
|
||||
}
|
||||
argb += pic->argb_stride;
|
||||
argb += picture->argb_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
52
3rdparty/libwebp/src/enc/predictor_enc.c
vendored
52
3rdparty/libwebp/src/enc/predictor_enc.c
vendored
@ -16,6 +16,7 @@
|
||||
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/enc/vp8li_enc.h"
|
||||
|
||||
#define MAX_DIFF_COST (1e30f)
|
||||
@ -31,10 +32,10 @@ static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
|
||||
// Methods to calculate Entropy (Shannon).
|
||||
|
||||
static float PredictionCostSpatial(const int counts[256], int weight_0,
|
||||
double exp_val) {
|
||||
float exp_val) {
|
||||
const int significant_symbols = 256 >> 4;
|
||||
const double exp_decay_factor = 0.6;
|
||||
double bits = weight_0 * counts[0];
|
||||
const float exp_decay_factor = 0.6f;
|
||||
float bits = (float)weight_0 * counts[0];
|
||||
int i;
|
||||
for (i = 1; i < significant_symbols; ++i) {
|
||||
bits += exp_val * (counts[i] + counts[256 - i]);
|
||||
@ -46,9 +47,9 @@ static float PredictionCostSpatial(const int counts[256], int weight_0,
|
||||
static float PredictionCostSpatialHistogram(const int accumulated[4][256],
|
||||
const int tile[4][256]) {
|
||||
int i;
|
||||
double retval = 0;
|
||||
float retval = 0.f;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
const double kExpValue = 0.94;
|
||||
const float kExpValue = 0.94f;
|
||||
retval += PredictionCostSpatial(tile[i], 1, kExpValue);
|
||||
retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);
|
||||
}
|
||||
@ -249,7 +250,7 @@ static WEBP_INLINE void GetResidual(
|
||||
} else if (x == 0) {
|
||||
predict = upper_row[x]; // Top.
|
||||
} else {
|
||||
predict = pred_func(current_row[x - 1], upper_row + x);
|
||||
predict = pred_func(¤t_row[x - 1], upper_row + x);
|
||||
}
|
||||
#if (WEBP_NEAR_LOSSLESS == 1)
|
||||
if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 ||
|
||||
@ -472,12 +473,15 @@ static void CopyImageWithPrediction(int width, int height,
|
||||
// with respect to predictions. If near_lossless_quality < 100, applies
|
||||
// near lossless processing, shaving off more bits of residuals for lower
|
||||
// qualities.
|
||||
void VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image, int near_lossless_quality,
|
||||
int exact, int used_subtract_green) {
|
||||
int VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image, int near_lossless_quality,
|
||||
int exact, int used_subtract_green,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent) {
|
||||
const int tiles_per_row = VP8LSubSampleSize(width, bits);
|
||||
const int tiles_per_col = VP8LSubSampleSize(height, bits);
|
||||
int percent_start = *percent;
|
||||
int tile_y;
|
||||
int histo[4][256];
|
||||
const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality);
|
||||
@ -491,17 +495,24 @@ void VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
|
||||
int tile_x;
|
||||
for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
|
||||
const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y,
|
||||
bits, histo, argb_scratch, argb, max_quantization, exact,
|
||||
used_subtract_green, image);
|
||||
const int pred = GetBestPredictorForTile(
|
||||
width, height, tile_x, tile_y, bits, histo, argb_scratch, argb,
|
||||
max_quantization, exact, used_subtract_green, image);
|
||||
image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
|
||||
}
|
||||
|
||||
if (!WebPReportProgress(
|
||||
pic, percent_start + percent_range * tile_y / tiles_per_col,
|
||||
percent)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb,
|
||||
low_effort, max_quantization, exact,
|
||||
used_subtract_green);
|
||||
return WebPReportProgress(pic, percent_start + percent_range, percent);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -532,7 +543,7 @@ static float PredictionCostCrossColor(const int accumulated[256],
|
||||
const int counts[256]) {
|
||||
// Favor low entropy, locally and globally.
|
||||
// Favor small absolute values for PredictionCostSpatial
|
||||
static const double kExpValue = 2.4;
|
||||
static const float kExpValue = 2.4f;
|
||||
return VP8LCombinedShannonEntropy(counts, accumulated) +
|
||||
PredictionCostSpatial(counts, 3, kExpValue);
|
||||
}
|
||||
@ -714,11 +725,14 @@ static void CopyTileWithColorTransform(int xsize, int ysize,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
uint32_t* const argb, uint32_t* image) {
|
||||
int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
uint32_t* const argb, uint32_t* image,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent) {
|
||||
const int max_tile_size = 1 << bits;
|
||||
const int tile_xsize = VP8LSubSampleSize(width, bits);
|
||||
const int tile_ysize = VP8LSubSampleSize(height, bits);
|
||||
int percent_start = *percent;
|
||||
int accumulated_red_histo[256] = { 0 };
|
||||
int accumulated_blue_histo[256] = { 0 };
|
||||
int tile_x, tile_y;
|
||||
@ -768,5 +782,11 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!WebPReportProgress(
|
||||
pic, percent_start + percent_range * tile_y / tile_ysize,
|
||||
percent)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
120
3rdparty/libwebp/src/enc/quant_enc.c
vendored
120
3rdparty/libwebp/src/enc/quant_enc.c
vendored
@ -533,7 +533,8 @@ static void InitScore(VP8ModeScore* const rd) {
|
||||
rd->score = MAX_COST;
|
||||
}
|
||||
|
||||
static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
|
||||
static void CopyScore(VP8ModeScore* WEBP_RESTRICT const dst,
|
||||
const VP8ModeScore* WEBP_RESTRICT const src) {
|
||||
dst->D = src->D;
|
||||
dst->SD = src->SD;
|
||||
dst->R = src->R;
|
||||
@ -542,7 +543,8 @@ static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
|
||||
dst->score = src->score;
|
||||
}
|
||||
|
||||
static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
|
||||
static void AddScore(VP8ModeScore* WEBP_RESTRICT const dst,
|
||||
const VP8ModeScore* WEBP_RESTRICT const src) {
|
||||
dst->D += src->D;
|
||||
dst->SD += src->SD;
|
||||
dst->R += src->R;
|
||||
@ -585,15 +587,18 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
|
||||
return rate * lambda + RD_DISTO_MULT * distortion;
|
||||
}
|
||||
|
||||
static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
||||
// Coefficient type.
|
||||
enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 };
|
||||
|
||||
static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,
|
||||
int16_t in[16], int16_t out[16],
|
||||
int ctx0, int coeff_type,
|
||||
const VP8Matrix* const mtx,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx,
|
||||
int lambda) {
|
||||
const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
|
||||
CostArrayPtr const costs =
|
||||
(CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
|
||||
const int first = (coeff_type == 0) ? 1 : 0;
|
||||
const int first = (coeff_type == TYPE_I16_AC) ? 1 : 0;
|
||||
Node nodes[16][NUM_NODES];
|
||||
ScoreState score_states[2][NUM_NODES];
|
||||
ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
|
||||
@ -657,16 +662,17 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
||||
// test all alternate level values around level0.
|
||||
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
|
||||
Node* const cur = &NODE(n, m);
|
||||
int level = level0 + m;
|
||||
const int level = level0 + m;
|
||||
const int ctx = (level > 2) ? 2 : level;
|
||||
const int band = VP8EncBands[n + 1];
|
||||
score_t base_score;
|
||||
score_t best_cur_score = MAX_COST;
|
||||
int best_prev = 0; // default, in case
|
||||
score_t best_cur_score;
|
||||
int best_prev;
|
||||
score_t cost, score;
|
||||
|
||||
ss_cur[m].score = MAX_COST;
|
||||
ss_cur[m].costs = costs[n + 1][ctx];
|
||||
if (level < 0 || level > thresh_level) {
|
||||
ss_cur[m].score = MAX_COST;
|
||||
// Node is dead.
|
||||
continue;
|
||||
}
|
||||
@ -682,18 +688,24 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
||||
}
|
||||
|
||||
// Inspect all possible non-dead predecessors. Retain only the best one.
|
||||
for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
|
||||
// The base_score is added to all scores so it is only added for the final
|
||||
// value after the loop.
|
||||
cost = VP8LevelCost(ss_prev[-MIN_DELTA].costs, level);
|
||||
best_cur_score =
|
||||
ss_prev[-MIN_DELTA].score + RDScoreTrellis(lambda, cost, 0);
|
||||
best_prev = -MIN_DELTA;
|
||||
for (p = -MIN_DELTA + 1; p <= MAX_DELTA; ++p) {
|
||||
// Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
|
||||
// eliminated since their score can't be better than the current best.
|
||||
const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
|
||||
cost = VP8LevelCost(ss_prev[p].costs, level);
|
||||
// Examine node assuming it's a non-terminal one.
|
||||
const score_t score =
|
||||
base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
|
||||
score = ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
|
||||
if (score < best_cur_score) {
|
||||
best_cur_score = score;
|
||||
best_prev = p;
|
||||
}
|
||||
}
|
||||
best_cur_score += base_score;
|
||||
// Store best finding in current node.
|
||||
cur->sign = sign;
|
||||
cur->level = level;
|
||||
@ -701,11 +713,11 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
||||
ss_cur[m].score = best_cur_score;
|
||||
|
||||
// Now, record best terminal node (and thus best entry in the graph).
|
||||
if (level != 0) {
|
||||
if (level != 0 && best_cur_score < best_score) {
|
||||
const score_t last_pos_cost =
|
||||
(n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
|
||||
const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
|
||||
const score_t score = best_cur_score + last_pos_score;
|
||||
score = best_cur_score + last_pos_score;
|
||||
if (score < best_score) {
|
||||
best_score = score;
|
||||
best_path[0] = n; // best eob position
|
||||
@ -717,10 +729,16 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
||||
}
|
||||
|
||||
// Fresh start
|
||||
memset(in + first, 0, (16 - first) * sizeof(*in));
|
||||
memset(out + first, 0, (16 - first) * sizeof(*out));
|
||||
// Beware! We must preserve in[0]/out[0] value for TYPE_I16_AC case.
|
||||
if (coeff_type == TYPE_I16_AC) {
|
||||
memset(in + 1, 0, 15 * sizeof(*in));
|
||||
memset(out + 1, 0, 15 * sizeof(*out));
|
||||
} else {
|
||||
memset(in, 0, 16 * sizeof(*in));
|
||||
memset(out, 0, 16 * sizeof(*out));
|
||||
}
|
||||
if (best_path[0] == -1) {
|
||||
return 0; // skip!
|
||||
return 0; // skip!
|
||||
}
|
||||
|
||||
{
|
||||
@ -751,9 +769,9 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
||||
// all at once. Output is the reconstructed block in *yuv_out, and the
|
||||
// quantized levels in *levels.
|
||||
|
||||
static int ReconstructIntra16(VP8EncIterator* const it,
|
||||
VP8ModeScore* const rd,
|
||||
uint8_t* const yuv_out,
|
||||
static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd,
|
||||
uint8_t* WEBP_RESTRICT const yuv_out,
|
||||
int mode) {
|
||||
const VP8Encoder* const enc = it->enc_;
|
||||
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
|
||||
@ -775,9 +793,9 @@ static int ReconstructIntra16(VP8EncIterator* const it,
|
||||
for (y = 0, n = 0; y < 4; ++y) {
|
||||
for (x = 0; x < 4; ++x, ++n) {
|
||||
const int ctx = it->top_nz_[x] + it->left_nz_[y];
|
||||
const int non_zero =
|
||||
TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,
|
||||
&dqm->y1_, dqm->lambda_trellis_i16_);
|
||||
const int non_zero = TrellisQuantizeBlock(
|
||||
enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1_,
|
||||
dqm->lambda_trellis_i16_);
|
||||
it->top_nz_[x] = it->left_nz_[y] = non_zero;
|
||||
rd->y_ac_levels[n][0] = 0;
|
||||
nz |= non_zero << n;
|
||||
@ -803,10 +821,10 @@ static int ReconstructIntra16(VP8EncIterator* const it,
|
||||
return nz;
|
||||
}
|
||||
|
||||
static int ReconstructIntra4(VP8EncIterator* const it,
|
||||
static int ReconstructIntra4(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
int16_t levels[16],
|
||||
const uint8_t* const src,
|
||||
uint8_t* const yuv_out,
|
||||
const uint8_t* WEBP_RESTRICT const src,
|
||||
uint8_t* WEBP_RESTRICT const yuv_out,
|
||||
int mode) {
|
||||
const VP8Encoder* const enc = it->enc_;
|
||||
const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
|
||||
@ -818,7 +836,7 @@ static int ReconstructIntra4(VP8EncIterator* const it,
|
||||
if (DO_TRELLIS_I4 && it->do_trellis_) {
|
||||
const int x = it->i4_ & 3, y = it->i4_ >> 2;
|
||||
const int ctx = it->top_nz_[x] + it->left_nz_[y];
|
||||
nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,
|
||||
nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1_,
|
||||
dqm->lambda_trellis_i4_);
|
||||
} else {
|
||||
nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
|
||||
@ -839,7 +857,8 @@ static int ReconstructIntra4(VP8EncIterator* const it,
|
||||
|
||||
// Quantize as usual, but also compute and return the quantization error.
|
||||
// Error is already divided by DSHIFT.
|
||||
static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
|
||||
static int QuantizeSingle(int16_t* WEBP_RESTRICT const v,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int V = *v;
|
||||
const int sign = (V < 0);
|
||||
if (sign) V = -V;
|
||||
@ -853,9 +872,10 @@ static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
|
||||
return (sign ? -V : V) >> DSCALE;
|
||||
}
|
||||
|
||||
static void CorrectDCValues(const VP8EncIterator* const it,
|
||||
const VP8Matrix* const mtx,
|
||||
int16_t tmp[][16], VP8ModeScore* const rd) {
|
||||
static void CorrectDCValues(const VP8EncIterator* WEBP_RESTRICT const it,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx,
|
||||
int16_t tmp[][16],
|
||||
VP8ModeScore* WEBP_RESTRICT const rd) {
|
||||
// | top[0] | top[1]
|
||||
// --------+--------+---------
|
||||
// left[0] | tmp[0] tmp[1] <-> err0 err1
|
||||
@ -886,8 +906,8 @@ static void CorrectDCValues(const VP8EncIterator* const it,
|
||||
}
|
||||
}
|
||||
|
||||
static void StoreDiffusionErrors(VP8EncIterator* const it,
|
||||
const VP8ModeScore* const rd) {
|
||||
static void StoreDiffusionErrors(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
const VP8ModeScore* WEBP_RESTRICT const rd) {
|
||||
int ch;
|
||||
for (ch = 0; ch <= 1; ++ch) {
|
||||
int8_t* const top = it->top_derr_[it->x_][ch];
|
||||
@ -906,8 +926,9 @@ static void StoreDiffusionErrors(VP8EncIterator* const it,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
|
||||
uint8_t* const yuv_out, int mode) {
|
||||
static int ReconstructUV(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd,
|
||||
uint8_t* WEBP_RESTRICT const yuv_out, int mode) {
|
||||
const VP8Encoder* const enc = it->enc_;
|
||||
const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
|
||||
const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
|
||||
@ -927,9 +948,9 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
|
||||
for (y = 0; y < 2; ++y) {
|
||||
for (x = 0; x < 2; ++x, ++n) {
|
||||
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
|
||||
const int non_zero =
|
||||
TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,
|
||||
&dqm->uv_, dqm->lambda_trellis_uv_);
|
||||
const int non_zero = TrellisQuantizeBlock(
|
||||
enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv_,
|
||||
dqm->lambda_trellis_uv_);
|
||||
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
|
||||
nz |= non_zero << n;
|
||||
}
|
||||
@ -978,7 +999,8 @@ static void SwapOut(VP8EncIterator* const it) {
|
||||
SwapPtr(&it->yuv_out_, &it->yuv_out2_);
|
||||
}
|
||||
|
||||
static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
|
||||
static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT rd) {
|
||||
const int kNumBlocks = 16;
|
||||
VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
|
||||
const int lambda = dqm->lambda_i16_;
|
||||
@ -1038,7 +1060,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// return the cost array corresponding to the surrounding prediction modes.
|
||||
static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
|
||||
static const uint16_t* GetCostModeI4(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
const uint8_t modes[16]) {
|
||||
const int preds_w = it->enc_->preds_w_;
|
||||
const int x = (it->i4_ & 3), y = it->i4_ >> 2;
|
||||
@ -1047,7 +1069,8 @@ static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
|
||||
return VP8FixedCostsI4[top][left];
|
||||
}
|
||||
|
||||
static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
||||
static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd) {
|
||||
const VP8Encoder* const enc = it->enc_;
|
||||
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
|
||||
const int lambda = dqm->lambda_i4_;
|
||||
@ -1143,7 +1166,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
||||
static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd) {
|
||||
const int kNumBlocks = 8;
|
||||
const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
|
||||
const int lambda = dqm->lambda_uv_;
|
||||
@ -1195,7 +1219,8 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Final reconstruction and quantization.
|
||||
|
||||
static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
||||
static void SimpleQuantize(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd) {
|
||||
const VP8Encoder* const enc = it->enc_;
|
||||
const int is_i16 = (it->mb_->type_ == 1);
|
||||
int nz = 0;
|
||||
@ -1220,9 +1245,9 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
||||
}
|
||||
|
||||
// Refine intra16/intra4 sub-modes based on distortion only (not rate).
|
||||
static void RefineUsingDistortion(VP8EncIterator* const it,
|
||||
static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
int try_both_modes, int refine_uv_mode,
|
||||
VP8ModeScore* const rd) {
|
||||
VP8ModeScore* WEBP_RESTRICT const rd) {
|
||||
score_t best_score = MAX_COST;
|
||||
int nz = 0;
|
||||
int mode;
|
||||
@ -1336,7 +1361,8 @@ static void RefineUsingDistortion(VP8EncIterator* const it,
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
|
||||
int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd,
|
||||
VP8RDLevel rd_opt) {
|
||||
int is_skipped;
|
||||
const int method = it->enc_->method_;
|
||||
|
8
3rdparty/libwebp/src/enc/syntax_enc.c
vendored
8
3rdparty/libwebp/src/enc/syntax_enc.c
vendored
@ -258,7 +258,10 @@ static int EmitPartitionsSize(const VP8Encoder* const enc,
|
||||
buf[3 * p + 1] = (part_size >> 8) & 0xff;
|
||||
buf[3 * p + 2] = (part_size >> 16) & 0xff;
|
||||
}
|
||||
return p ? pic->writer(buf, 3 * p, pic) : 1;
|
||||
if (p && !pic->writer(buf, 3 * p, pic)) {
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -349,7 +352,7 @@ int VP8EncWrite(VP8Encoder* const enc) {
|
||||
(enc->alpha_data_size_ & 1);
|
||||
riff_size += CHUNK_HEADER_SIZE + padded_alpha_size;
|
||||
}
|
||||
// Sanity check.
|
||||
// RIFF size should fit in 32-bits.
|
||||
if (riff_size > 0xfffffffeU) {
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_FILE_TOO_BIG);
|
||||
}
|
||||
@ -381,6 +384,7 @@ int VP8EncWrite(VP8Encoder* const enc) {
|
||||
|
||||
enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size);
|
||||
ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_);
|
||||
if (!ok) WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
|
||||
return ok;
|
||||
}
|
||||
|
||||
|
29
3rdparty/libwebp/src/enc/vp8i_enc.h
vendored
29
3rdparty/libwebp/src/enc/vp8i_enc.h
vendored
@ -31,8 +31,8 @@ extern "C" {
|
||||
|
||||
// version numbers
|
||||
#define ENC_MAJ_VERSION 1
|
||||
#define ENC_MIN_VERSION 2
|
||||
#define ENC_REV_VERSION 0
|
||||
#define ENC_MIN_VERSION 3
|
||||
#define ENC_REV_VERSION 1
|
||||
|
||||
enum { MAX_LF_LEVELS = 64, // Maximum loop filter level
|
||||
MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost
|
||||
@ -286,8 +286,7 @@ int VP8IteratorNext(VP8EncIterator* const it);
|
||||
// save the yuv_out_ boundary values to top_/left_ arrays for next iterations.
|
||||
void VP8IteratorSaveBoundary(VP8EncIterator* const it);
|
||||
// Report progression based on macroblock rows. Return 0 for user-abort request.
|
||||
int VP8IteratorProgress(const VP8EncIterator* const it,
|
||||
int final_delta_percent);
|
||||
int VP8IteratorProgress(const VP8EncIterator* const it, int delta);
|
||||
// Intra4x4 iterations
|
||||
void VP8IteratorStartI4(VP8EncIterator* const it);
|
||||
// returns true if not done.
|
||||
@ -471,7 +470,8 @@ int VP8EncAnalyze(VP8Encoder* const enc);
|
||||
// Sets up segment's quantization values, base_quant_ and filter strengths.
|
||||
void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
|
||||
// Pick best modes and fills the levels. Returns true if skipped.
|
||||
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
|
||||
int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
VP8ModeScore* WEBP_RESTRICT const rd,
|
||||
VP8RDLevel rd_opt);
|
||||
|
||||
// in alpha.c
|
||||
@ -491,19 +491,24 @@ int VP8FilterStrengthFromDelta(int sharpness, int delta);
|
||||
|
||||
// misc utils for picture_*.c:
|
||||
|
||||
// Returns true if 'picture' is non-NULL and dimensions/colorspace are within
|
||||
// their valid ranges. If returning false, the 'error_code' in 'picture' is
|
||||
// updated.
|
||||
int WebPValidatePicture(const WebPPicture* const picture);
|
||||
|
||||
// Remove reference to the ARGB/YUVA buffer (doesn't free anything).
|
||||
void WebPPictureResetBuffers(WebPPicture* const picture);
|
||||
|
||||
// Allocates ARGB buffer of given dimension (previous one is always free'd).
|
||||
// Preserves the YUV(A) buffer. Returns false in case of error (invalid param,
|
||||
// out-of-memory).
|
||||
int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height);
|
||||
// Allocates ARGB buffer according to set width/height (previous one is
|
||||
// always free'd). Preserves the YUV(A) buffer. Returns false in case of error
|
||||
// (invalid param, out-of-memory).
|
||||
int WebPPictureAllocARGB(WebPPicture* const picture);
|
||||
|
||||
// Allocates YUVA buffer of given dimension (previous one is always free'd).
|
||||
// Uses picture->csp to determine whether an alpha buffer is needed.
|
||||
// Allocates YUVA buffer according to set width/height (previous one is always
|
||||
// free'd). Uses picture->csp to determine whether an alpha buffer is needed.
|
||||
// Preserves the ARGB buffer.
|
||||
// Returns false in case of error (invalid param, out-of-memory).
|
||||
int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
|
||||
int WebPPictureAllocYUVA(WebPPicture* const picture);
|
||||
|
||||
// Replace samples that are fully transparent by 'color' to help compressibility
|
||||
// (no guarantee, though). Assumes pic->use_argb is true.
|
||||
|
696
3rdparty/libwebp/src/enc/vp8l_enc.c
vendored
696
3rdparty/libwebp/src/enc/vp8l_enc.c
vendored
File diff suppressed because it is too large
Load Diff
26
3rdparty/libwebp/src/enc/vp8li_enc.h
vendored
26
3rdparty/libwebp/src/enc/vp8li_enc.h
vendored
@ -69,6 +69,8 @@ typedef struct {
|
||||
int use_palette_;
|
||||
int palette_size_;
|
||||
uint32_t palette_[MAX_PALETTE_SIZE];
|
||||
// Sorted version of palette_ for cache purposes.
|
||||
uint32_t palette_sorted_[MAX_PALETTE_SIZE];
|
||||
|
||||
// Some 'scratch' (potentially large) objects.
|
||||
struct VP8LBackwardRefs refs_[4]; // Backward Refs array for temporaries.
|
||||
@ -87,9 +89,10 @@ int VP8LEncodeImage(const WebPConfig* const config,
|
||||
|
||||
// Encodes the main image stream using the supplied bit writer.
|
||||
// If 'use_cache' is false, disables the use of color cache.
|
||||
WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
|
||||
const WebPPicture* const picture,
|
||||
VP8LBitWriter* const bw, int use_cache);
|
||||
// Returns false in case of error (stored in picture->error_code).
|
||||
int VP8LEncodeStream(const WebPConfig* const config,
|
||||
const WebPPicture* const picture, VP8LBitWriter* const bw,
|
||||
int use_cache);
|
||||
|
||||
#if (WEBP_NEAR_LOSSLESS == 1)
|
||||
// in near_lossless.c
|
||||
@ -101,13 +104,18 @@ int VP8ApplyNearLossless(const WebPPicture* const picture, int quality,
|
||||
//------------------------------------------------------------------------------
|
||||
// Image transforms in predictor.c.
|
||||
|
||||
void VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image, int near_lossless, int exact,
|
||||
int used_subtract_green);
|
||||
// pic and percent are for progress.
|
||||
// Returns false in case of error (stored in pic->error_code).
|
||||
int VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image, int near_lossless, int exact,
|
||||
int used_subtract_green, const WebPPicture* const pic,
|
||||
int percent_range, int* const percent);
|
||||
|
||||
void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
uint32_t* const argb, uint32_t* image);
|
||||
int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
uint32_t* const argb, uint32_t* image,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
14
3rdparty/libwebp/src/enc/webp_enc.c
vendored
14
3rdparty/libwebp/src/enc/webp_enc.c
vendored
@ -307,7 +307,10 @@ int WebPEncodingSetError(const WebPPicture* const pic,
|
||||
WebPEncodingError error) {
|
||||
assert((int)error < VP8_ENC_ERROR_LAST);
|
||||
assert((int)error >= VP8_ENC_OK);
|
||||
((WebPPicture*)pic)->error_code = error;
|
||||
// The oldest error reported takes precedence over the new one.
|
||||
if (pic->error_code == VP8_ENC_OK) {
|
||||
((WebPPicture*)pic)->error_code = error;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -317,8 +320,7 @@ int WebPReportProgress(const WebPPicture* const pic,
|
||||
*percent_store = percent;
|
||||
if (pic->progress_hook && !pic->progress_hook(percent, pic)) {
|
||||
// user abort requested
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT);
|
||||
return 0;
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT);
|
||||
}
|
||||
}
|
||||
return 1; // ok
|
||||
@ -329,16 +331,14 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
|
||||
int ok = 0;
|
||||
if (pic == NULL) return 0;
|
||||
|
||||
WebPEncodingSetError(pic, VP8_ENC_OK); // all ok so far
|
||||
pic->error_code = VP8_ENC_OK; // all ok so far
|
||||
if (config == NULL) { // bad params
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
|
||||
}
|
||||
if (!WebPValidateConfig(config)) {
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
|
||||
}
|
||||
if (pic->width <= 0 || pic->height <= 0) {
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
if (!WebPValidatePicture(pic)) return 0;
|
||||
if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION) {
|
||||
return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
|
||||
}
|
||||
|
25
3rdparty/libwebp/src/mux/anim_encode.c
vendored
25
3rdparty/libwebp/src/mux/anim_encode.c
vendored
@ -248,9 +248,6 @@ WebPAnimEncoder* WebPAnimEncoderNewInternal(
|
||||
|
||||
enc = (WebPAnimEncoder*)WebPSafeCalloc(1, sizeof(*enc));
|
||||
if (enc == NULL) return NULL;
|
||||
// sanity inits, so we can call WebPAnimEncoderDelete():
|
||||
enc->encoded_frames_ = NULL;
|
||||
enc->mux_ = NULL;
|
||||
MarkNoError(enc);
|
||||
|
||||
// Dimensions and options.
|
||||
@ -421,7 +418,7 @@ static void MinimizeChangeRectangle(const WebPPicture* const src,
|
||||
const int max_allowed_diff_lossy = QualityToMaxDiff(quality);
|
||||
const int max_allowed_diff = is_lossless ? 0 : max_allowed_diff_lossy;
|
||||
|
||||
// Sanity checks.
|
||||
// Assumption/correctness checks.
|
||||
assert(src->width == dst->width && src->height == dst->height);
|
||||
assert(rect->x_offset_ + rect->width_ <= dst->width);
|
||||
assert(rect->y_offset_ + rect->height_ <= dst->height);
|
||||
@ -596,16 +593,17 @@ int WebPAnimEncoderRefineRect(
|
||||
int is_lossless, float quality, int* const x_offset, int* const y_offset,
|
||||
int* const width, int* const height) {
|
||||
FrameRectangle rect;
|
||||
const int right = clip(*x_offset + *width, 0, curr_canvas->width);
|
||||
const int left = clip(*x_offset, 0, curr_canvas->width - 1);
|
||||
const int bottom = clip(*y_offset + *height, 0, curr_canvas->height);
|
||||
const int top = clip(*y_offset, 0, curr_canvas->height - 1);
|
||||
int right, left, bottom, top;
|
||||
if (prev_canvas == NULL || curr_canvas == NULL ||
|
||||
prev_canvas->width != curr_canvas->width ||
|
||||
prev_canvas->height != curr_canvas->height ||
|
||||
!prev_canvas->use_argb || !curr_canvas->use_argb) {
|
||||
return 0;
|
||||
}
|
||||
right = clip(*x_offset + *width, 0, curr_canvas->width);
|
||||
left = clip(*x_offset, 0, curr_canvas->width - 1);
|
||||
bottom = clip(*y_offset + *height, 0, curr_canvas->height);
|
||||
top = clip(*y_offset, 0, curr_canvas->height - 1);
|
||||
rect.x_offset_ = left;
|
||||
rect.y_offset_ = top;
|
||||
rect.width_ = clip(right - left, 0, curr_canvas->width - rect.x_offset_);
|
||||
@ -949,7 +947,8 @@ static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) {
|
||||
int new_duration;
|
||||
|
||||
assert(enc->count_ >= 1);
|
||||
assert(prev_enc_frame->sub_frame_.duration ==
|
||||
assert(!prev_enc_frame->is_key_frame_ ||
|
||||
prev_enc_frame->sub_frame_.duration ==
|
||||
prev_enc_frame->key_frame_.duration);
|
||||
assert(prev_enc_frame->sub_frame_.duration ==
|
||||
(prev_enc_frame->sub_frame_.duration & (MAX_DURATION - 1)));
|
||||
@ -966,7 +965,7 @@ static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) {
|
||||
0x10, 0x88, 0x88, 0x08
|
||||
};
|
||||
const WebPData lossless_1x1 = {
|
||||
lossless_1x1_bytes, sizeof(lossless_1x1_bytes)
|
||||
lossless_1x1_bytes, sizeof(lossless_1x1_bytes)
|
||||
};
|
||||
const uint8_t lossy_1x1_bytes[] = {
|
||||
0x52, 0x49, 0x46, 0x46, 0x40, 0x00, 0x00, 0x00, 0x57, 0x45, 0x42, 0x50,
|
||||
@ -1358,6 +1357,12 @@ int WebPAnimEncoderAdd(WebPAnimEncoder* enc, WebPPicture* frame, int timestamp,
|
||||
if (!IncreasePreviousDuration(enc, (int)prev_frame_duration)) {
|
||||
return 0;
|
||||
}
|
||||
// IncreasePreviousDuration() may add a frame to avoid exceeding
|
||||
// MAX_DURATION which could cause CacheFrame() to over read encoded_frames_
|
||||
// before the next flush.
|
||||
if (enc->count_ == enc->size_ && !FlushFrames(enc)) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
enc->first_timestamp_ = timestamp;
|
||||
}
|
||||
|
6
3rdparty/libwebp/src/mux/muxedit.c
vendored
6
3rdparty/libwebp/src/mux/muxedit.c
vendored
@ -70,6 +70,7 @@ void WebPMuxDelete(WebPMux* mux) {
|
||||
err = ChunkAssignData(&chunk, data, copy_data, tag); \
|
||||
if (err == WEBP_MUX_OK) { \
|
||||
err = ChunkSetHead(&chunk, (LIST)); \
|
||||
if (err != WEBP_MUX_OK) ChunkRelease(&chunk); \
|
||||
} \
|
||||
return err; \
|
||||
}
|
||||
@ -235,7 +236,6 @@ WebPMuxError WebPMuxSetImage(WebPMux* mux, const WebPData* bitstream,
|
||||
WebPMuxImage wpi;
|
||||
WebPMuxError err;
|
||||
|
||||
// Sanity checks.
|
||||
if (mux == NULL || bitstream == NULL || bitstream->bytes == NULL ||
|
||||
bitstream->size > MAX_CHUNK_PAYLOAD) {
|
||||
return WEBP_MUX_INVALID_ARGUMENT;
|
||||
@ -267,7 +267,6 @@ WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPMuxFrameInfo* info,
|
||||
WebPMuxImage wpi;
|
||||
WebPMuxError err;
|
||||
|
||||
// Sanity checks.
|
||||
if (mux == NULL || info == NULL) return WEBP_MUX_INVALID_ARGUMENT;
|
||||
|
||||
if (info->id != WEBP_CHUNK_ANMF) return WEBP_MUX_INVALID_ARGUMENT;
|
||||
@ -556,7 +555,8 @@ static WebPMuxError MuxCleanup(WebPMux* const mux) {
|
||||
if (num_frames == 1) {
|
||||
WebPMuxImage* frame = NULL;
|
||||
err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, 1, &frame);
|
||||
assert(err == WEBP_MUX_OK); // We know that one frame does exist.
|
||||
if (err != WEBP_MUX_OK) return err;
|
||||
// We know that one frame does exist.
|
||||
assert(frame != NULL);
|
||||
if (frame->header_ != NULL &&
|
||||
((mux->canvas_width_ == 0 && mux->canvas_height_ == 0) ||
|
||||
|
4
3rdparty/libwebp/src/mux/muxi.h
vendored
4
3rdparty/libwebp/src/mux/muxi.h
vendored
@ -28,8 +28,8 @@ extern "C" {
|
||||
// Defines and constants.
|
||||
|
||||
#define MUX_MAJ_VERSION 1
|
||||
#define MUX_MIN_VERSION 2
|
||||
#define MUX_REV_VERSION 0
|
||||
#define MUX_MIN_VERSION 3
|
||||
#define MUX_REV_VERSION 1
|
||||
|
||||
// Chunk object.
|
||||
typedef struct WebPChunk WebPChunk;
|
||||
|
9
3rdparty/libwebp/src/mux/muxinternal.c
vendored
9
3rdparty/libwebp/src/mux/muxinternal.c
vendored
@ -155,17 +155,18 @@ WebPMuxError ChunkSetHead(WebPChunk* const chunk,
|
||||
|
||||
WebPMuxError ChunkAppend(WebPChunk* const chunk,
|
||||
WebPChunk*** const chunk_list) {
|
||||
WebPMuxError err;
|
||||
assert(chunk_list != NULL && *chunk_list != NULL);
|
||||
|
||||
if (**chunk_list == NULL) {
|
||||
ChunkSetHead(chunk, *chunk_list);
|
||||
err = ChunkSetHead(chunk, *chunk_list);
|
||||
} else {
|
||||
WebPChunk* last_chunk = **chunk_list;
|
||||
while (last_chunk->next_ != NULL) last_chunk = last_chunk->next_;
|
||||
ChunkSetHead(chunk, &last_chunk->next_);
|
||||
*chunk_list = &last_chunk->next_;
|
||||
err = ChunkSetHead(chunk, &last_chunk->next_);
|
||||
if (err == WEBP_MUX_OK) *chunk_list = &last_chunk->next_;
|
||||
}
|
||||
return WEBP_MUX_OK;
|
||||
return err;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
11
3rdparty/libwebp/src/mux/muxread.c
vendored
11
3rdparty/libwebp/src/mux/muxread.c
vendored
@ -56,7 +56,7 @@ static WebPMuxError ChunkVerifyAndAssign(WebPChunk* chunk,
|
||||
uint32_t chunk_size;
|
||||
WebPData chunk_data;
|
||||
|
||||
// Sanity checks.
|
||||
// Correctness checks.
|
||||
if (data_size < CHUNK_HEADER_SIZE) return WEBP_MUX_NOT_ENOUGH_DATA;
|
||||
chunk_size = GetLE32(data + TAG_SIZE);
|
||||
if (chunk_size > MAX_CHUNK_PAYLOAD) return WEBP_MUX_BAD_DATA;
|
||||
@ -116,9 +116,12 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data,
|
||||
// Each of ANMF chunk contain a header at the beginning. So, its size should
|
||||
// be at least 'hdr_size'.
|
||||
if (size < hdr_size) goto Fail;
|
||||
ChunkAssignData(&subchunk, &temp, copy_data, chunk->tag_);
|
||||
if (ChunkAssignData(&subchunk, &temp, copy_data,
|
||||
chunk->tag_) != WEBP_MUX_OK) {
|
||||
goto Fail;
|
||||
}
|
||||
}
|
||||
ChunkSetHead(&subchunk, &wpi->header_);
|
||||
if (ChunkSetHead(&subchunk, &wpi->header_) != WEBP_MUX_OK) goto Fail;
|
||||
wpi->is_partial_ = 1; // Waiting for ALPH and/or VP8/VP8L chunks.
|
||||
|
||||
// Rest of the chunks.
|
||||
@ -186,7 +189,6 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
|
||||
WebPChunk** chunk_list_ends[WEBP_CHUNK_NIL + 1] = { NULL };
|
||||
ChunkInit(&chunk);
|
||||
|
||||
// Sanity checks.
|
||||
if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_MUX_ABI_VERSION)) {
|
||||
return NULL; // version mismatch
|
||||
}
|
||||
@ -481,7 +483,6 @@ WebPMuxError WebPMuxGetFrame(
|
||||
WebPMuxError err;
|
||||
WebPMuxImage* wpi;
|
||||
|
||||
// Sanity checks.
|
||||
if (mux == NULL || frame == NULL) {
|
||||
return WEBP_MUX_INVALID_ARGUMENT;
|
||||
}
|
||||
|
@ -55,7 +55,7 @@ void VP8LoadFinalBytes(VP8BitReader* const br);
|
||||
|
||||
// makes sure br->value_ has at least BITS bits worth of data
|
||||
static WEBP_UBSAN_IGNORE_UNDEF WEBP_INLINE
|
||||
void VP8LoadNewBytes(VP8BitReader* const br) {
|
||||
void VP8LoadNewBytes(VP8BitReader* WEBP_RESTRICT const br) {
|
||||
assert(br != NULL && br->buf_ != NULL);
|
||||
// Read 'BITS' bits at a time if possible.
|
||||
if (br->buf_ < br->buf_max_) {
|
||||
@ -104,7 +104,7 @@ void VP8LoadNewBytes(VP8BitReader* const br) {
|
||||
}
|
||||
|
||||
// Read a bit with proba 'prob'. Speed-critical function!
|
||||
static WEBP_INLINE int VP8GetBit(VP8BitReader* const br,
|
||||
static WEBP_INLINE int VP8GetBit(VP8BitReader* WEBP_RESTRICT const br,
|
||||
int prob, const char label[]) {
|
||||
// Don't move this declaration! It makes a big speed difference to store
|
||||
// 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
|
||||
@ -137,7 +137,8 @@ static WEBP_INLINE int VP8GetBit(VP8BitReader* const br,
|
||||
|
||||
// simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here)
|
||||
static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE
|
||||
int VP8GetSigned(VP8BitReader* const br, int v, const char label[]) {
|
||||
int VP8GetSigned(VP8BitReader* WEBP_RESTRICT const br, int v,
|
||||
const char label[]) {
|
||||
if (br->bits_ < 0) {
|
||||
VP8LoadNewBytes(br);
|
||||
}
|
||||
@ -147,15 +148,15 @@ int VP8GetSigned(VP8BitReader* const br, int v, const char label[]) {
|
||||
const range_t value = (range_t)(br->value_ >> pos);
|
||||
const int32_t mask = (int32_t)(split - value) >> 31; // -1 or 0
|
||||
br->bits_ -= 1;
|
||||
br->range_ += mask;
|
||||
br->range_ += (range_t)mask;
|
||||
br->range_ |= 1;
|
||||
br->value_ -= (bit_t)((split + 1) & mask) << pos;
|
||||
br->value_ -= (bit_t)((split + 1) & (uint32_t)mask) << pos;
|
||||
BT_TRACK(br);
|
||||
return (v ^ mask) - mask;
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br,
|
||||
static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* WEBP_RESTRICT const br,
|
||||
int prob, const char label[]) {
|
||||
// Don't move this declaration! It makes a big speed difference to store
|
||||
// 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/utils/bit_reader_inl_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
@ -121,7 +122,7 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits,
|
||||
|
||||
#define VP8L_LOG8_WBITS 4 // Number of bytes needed to store VP8L_WBITS bits.
|
||||
|
||||
#if defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \
|
||||
#if defined(__arm__) || defined(_M_ARM) || WEBP_AARCH64 || \
|
||||
defined(__i386__) || defined(_M_IX86) || \
|
||||
defined(__x86_64__) || defined(_M_X64)
|
||||
#define VP8L_USE_FAST_LOAD
|
||||
|
@ -19,6 +19,7 @@
|
||||
#ifdef _MSC_VER
|
||||
#include <stdlib.h> // _byteswap_ulong
|
||||
#endif
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
// Warning! This macro triggers quite some MACRO wizardry around func signature!
|
||||
@ -64,7 +65,7 @@ extern "C" {
|
||||
#define BITS 56
|
||||
#elif defined(__arm__) || defined(_M_ARM) // ARM
|
||||
#define BITS 24
|
||||
#elif defined(__aarch64__) // ARM 64bit
|
||||
#elif WEBP_AARCH64 // ARM 64bit
|
||||
#define BITS 56
|
||||
#elif defined(__mips__) // MIPS
|
||||
#define BITS 24
|
||||
|
@ -278,7 +278,7 @@ void VP8LPutBitsFlushBits(VP8LBitWriter* const bw) {
|
||||
// If needed, make some room by flushing some bits out.
|
||||
if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
|
||||
const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE;
|
||||
if (extra_size != (size_t)extra_size ||
|
||||
if (!CheckSizeOverflow(extra_size) ||
|
||||
!VP8LBitWriterResize(bw, (size_t)extra_size)) {
|
||||
bw->cur_ = bw->buf_;
|
||||
bw->error_ = 1;
|
||||
@ -314,7 +314,7 @@ void VP8LPutBitsInternal(VP8LBitWriter* const bw, uint32_t bits, int n_bits) {
|
||||
while (used >= VP8L_WRITER_BITS) {
|
||||
if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
|
||||
const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE;
|
||||
if (extra_size != (size_t)extra_size ||
|
||||
if (!CheckSizeOverflow(extra_size) ||
|
||||
!VP8LBitWriterResize(bw, (size_t)extra_size)) {
|
||||
bw->cur_ = bw->buf_;
|
||||
bw->error_ = 1;
|
||||
|
22
3rdparty/libwebp/src/utils/color_cache_utils.c
vendored
22
3rdparty/libwebp/src/utils/color_cache_utils.c
vendored
@ -20,22 +20,22 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// VP8LColorCache.
|
||||
|
||||
int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) {
|
||||
int VP8LColorCacheInit(VP8LColorCache* const color_cache, int hash_bits) {
|
||||
const int hash_size = 1 << hash_bits;
|
||||
assert(cc != NULL);
|
||||
assert(color_cache != NULL);
|
||||
assert(hash_bits > 0);
|
||||
cc->colors_ = (uint32_t*)WebPSafeCalloc((uint64_t)hash_size,
|
||||
sizeof(*cc->colors_));
|
||||
if (cc->colors_ == NULL) return 0;
|
||||
cc->hash_shift_ = 32 - hash_bits;
|
||||
cc->hash_bits_ = hash_bits;
|
||||
color_cache->colors_ = (uint32_t*)WebPSafeCalloc(
|
||||
(uint64_t)hash_size, sizeof(*color_cache->colors_));
|
||||
if (color_cache->colors_ == NULL) return 0;
|
||||
color_cache->hash_shift_ = 32 - hash_bits;
|
||||
color_cache->hash_bits_ = hash_bits;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void VP8LColorCacheClear(VP8LColorCache* const cc) {
|
||||
if (cc != NULL) {
|
||||
WebPSafeFree(cc->colors_);
|
||||
cc->colors_ = NULL;
|
||||
void VP8LColorCacheClear(VP8LColorCache* const color_cache) {
|
||||
if (color_cache != NULL) {
|
||||
WebPSafeFree(color_cache->colors_);
|
||||
color_cache->colors_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user