mirror of
https://github.com/opencv/opencv.git
synced 2024-12-03 16:35:09 +08:00
0de26fd78e
Zlib-ng is zlib replacement with optimizations for "next generation" systems. Its optimization may benifits image library decode and encode speed such as libpng. In our tests, if using zlib-ng and libpng combination on a x86_64 machine with AVX2, the time of `imdecode` amd `imencode` will drop 20% approximately. This patch enables zlib-ng's optimization if `CV_DISABLE_OPTIMIZATION` is OFF. Since Zlib-ng can dispatch intrinsics on the fly, port work is much easier. Related discussion: https://github.com/opencv/opencv/issues/22573
187 lines
8.2 KiB
C
187 lines
8.2 KiB
C
/* deflate_stored.c -- store data without compression using deflation algorithm
|
|
*
|
|
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
|
* For conditions of distribution and use, see copyright notice in zlib.h
|
|
*/
|
|
|
|
#include "zbuild.h"
|
|
#include "deflate.h"
|
|
#include "deflate_p.h"
|
|
#include "functable.h"
|
|
|
|
/* ===========================================================================
|
|
* Copy without compression as much as possible from the input stream, return
|
|
* the current block state.
|
|
*
|
|
* In case deflateParams() is used to later switch to a non-zero compression
|
|
* level, s->matches (otherwise unused when storing) keeps track of the number
|
|
* of hash table slides to perform. If s->matches is 1, then one hash table
|
|
* slide will be done when switching. If s->matches is 2, the maximum value
|
|
* allowed here, then the hash table will be cleared, since two or more slides
|
|
* is the same as a clear.
|
|
*
|
|
* deflate_stored() is written to minimize the number of times an input byte is
|
|
* copied. It is most efficient with large input and output buffers, which
|
|
* maximizes the opportunites to have a single copy from next_in to next_out.
|
|
*/
|
|
Z_INTERNAL block_state deflate_stored(deflate_state *s, int flush) {
|
|
/* Smallest worthy block size when not flushing or finishing. By default
|
|
* this is 32K. This can be as small as 507 bytes for memLevel == 1. For
|
|
* large input and output buffers, the stored block size will be larger.
|
|
*/
|
|
unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size);
|
|
|
|
/* Copy as many min_block or larger stored blocks directly to next_out as
|
|
* possible. If flushing, copy the remaining available input to next_out as
|
|
* stored blocks, if there is enough space.
|
|
*/
|
|
unsigned len, left, have, last = 0;
|
|
unsigned used = s->strm->avail_in;
|
|
do {
|
|
/* Set len to the maximum size block that we can copy directly with the
|
|
* available input data and output space. Set left to how much of that
|
|
* would be copied from what's left in the window.
|
|
*/
|
|
len = MAX_STORED; /* maximum deflate stored block length */
|
|
have = (s->bi_valid + 42) >> 3; /* number of header bytes */
|
|
if (s->strm->avail_out < have) /* need room for header */
|
|
break;
|
|
/* maximum stored block length that will fit in avail_out: */
|
|
have = s->strm->avail_out - have;
|
|
left = (int)s->strstart - s->block_start; /* bytes left in window */
|
|
if (len > (unsigned long)left + s->strm->avail_in)
|
|
len = left + s->strm->avail_in; /* limit len to the input */
|
|
len = MIN(len, have); /* limit len to the output */
|
|
|
|
/* If the stored block would be less than min_block in length, or if
|
|
* unable to copy all of the available input when flushing, then try
|
|
* copying to the window and the pending buffer instead. Also don't
|
|
* write an empty block when flushing -- deflate() does that.
|
|
*/
|
|
if (len < min_block && ((len == 0 && flush != Z_FINISH) || flush == Z_NO_FLUSH || len != left + s->strm->avail_in))
|
|
break;
|
|
|
|
/* Make a dummy stored block in pending to get the header bytes,
|
|
* including any pending bits. This also updates the debugging counts.
|
|
*/
|
|
last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0;
|
|
zng_tr_stored_block(s, (char *)0, 0L, last);
|
|
|
|
/* Replace the lengths in the dummy stored block with len. */
|
|
s->pending -= 4;
|
|
put_short(s, (uint16_t)len);
|
|
put_short(s, (uint16_t)~len);
|
|
|
|
/* Write the stored block header bytes. */
|
|
PREFIX(flush_pending)(s->strm);
|
|
|
|
/* Update debugging counts for the data about to be copied. */
|
|
cmpr_bits_add(s, len << 3);
|
|
sent_bits_add(s, len << 3);
|
|
|
|
/* Copy uncompressed bytes from the window to next_out. */
|
|
if (left) {
|
|
left = MIN(left, len);
|
|
memcpy(s->strm->next_out, s->window + s->block_start, left);
|
|
s->strm->next_out += left;
|
|
s->strm->avail_out -= left;
|
|
s->strm->total_out += left;
|
|
s->block_start += (int)left;
|
|
len -= left;
|
|
}
|
|
|
|
/* Copy uncompressed bytes directly from next_in to next_out, updating
|
|
* the check value.
|
|
*/
|
|
if (len) {
|
|
PREFIX(read_buf)(s->strm, s->strm->next_out, len);
|
|
s->strm->next_out += len;
|
|
s->strm->avail_out -= len;
|
|
s->strm->total_out += len;
|
|
}
|
|
} while (last == 0);
|
|
|
|
/* Update the sliding window with the last s->w_size bytes of the copied
|
|
* data, or append all of the copied data to the existing window if less
|
|
* than s->w_size bytes were copied. Also update the number of bytes to
|
|
* insert in the hash tables, in the event that deflateParams() switches to
|
|
* a non-zero compression level.
|
|
*/
|
|
used -= s->strm->avail_in; /* number of input bytes directly copied */
|
|
if (used) {
|
|
/* If any input was used, then no unused input remains in the window,
|
|
* therefore s->block_start == s->strstart.
|
|
*/
|
|
if (used >= s->w_size) { /* supplant the previous history */
|
|
s->matches = 2; /* clear hash */
|
|
memcpy(s->window, s->strm->next_in - s->w_size, s->w_size);
|
|
s->strstart = s->w_size;
|
|
s->insert = s->strstart;
|
|
} else {
|
|
if (s->window_size - s->strstart <= used) {
|
|
/* Slide the window down. */
|
|
s->strstart -= s->w_size;
|
|
memcpy(s->window, s->window + s->w_size, s->strstart);
|
|
if (s->matches < 2)
|
|
s->matches++; /* add a pending slide_hash() */
|
|
s->insert = MIN(s->insert, s->strstart);
|
|
}
|
|
memcpy(s->window + s->strstart, s->strm->next_in - used, used);
|
|
s->strstart += used;
|
|
s->insert += MIN(used, s->w_size - s->insert);
|
|
}
|
|
s->block_start = (int)s->strstart;
|
|
}
|
|
s->high_water = MAX(s->high_water, s->strstart);
|
|
|
|
/* If the last block was written to next_out, then done. */
|
|
if (last)
|
|
return finish_done;
|
|
|
|
/* If flushing and all input has been consumed, then done. */
|
|
if (flush != Z_NO_FLUSH && flush != Z_FINISH && s->strm->avail_in == 0 && (int)s->strstart == s->block_start)
|
|
return block_done;
|
|
|
|
/* Fill the window with any remaining input. */
|
|
have = s->window_size - s->strstart;
|
|
if (s->strm->avail_in > have && s->block_start >= (int)s->w_size) {
|
|
/* Slide the window down. */
|
|
s->block_start -= (int)s->w_size;
|
|
s->strstart -= s->w_size;
|
|
memcpy(s->window, s->window + s->w_size, s->strstart);
|
|
if (s->matches < 2)
|
|
s->matches++; /* add a pending slide_hash() */
|
|
have += s->w_size; /* more space now */
|
|
s->insert = MIN(s->insert, s->strstart);
|
|
}
|
|
|
|
have = MIN(have, s->strm->avail_in);
|
|
if (have) {
|
|
PREFIX(read_buf)(s->strm, s->window + s->strstart, have);
|
|
s->strstart += have;
|
|
s->insert += MIN(have, s->w_size - s->insert);
|
|
}
|
|
s->high_water = MAX(s->high_water, s->strstart);
|
|
|
|
/* There was not enough avail_out to write a complete worthy or flushed
|
|
* stored block to next_out. Write a stored block to pending instead, if we
|
|
* have enough input for a worthy block, or if flushing and there is enough
|
|
* room for the remaining input as a stored block in the pending buffer.
|
|
*/
|
|
have = (s->bi_valid + 42) >> 3; /* number of header bytes */
|
|
/* maximum stored block length that will fit in pending: */
|
|
have = MIN(s->pending_buf_size - have, MAX_STORED);
|
|
min_block = MIN(have, s->w_size);
|
|
left = (int)s->strstart - s->block_start;
|
|
if (left >= min_block || ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH && s->strm->avail_in == 0 && left <= have)) {
|
|
len = MIN(left, have);
|
|
last = flush == Z_FINISH && s->strm->avail_in == 0 && len == left ? 1 : 0;
|
|
zng_tr_stored_block(s, (char *)s->window + s->block_start, len, last);
|
|
s->block_start += (int)len;
|
|
PREFIX(flush_pending)(s->strm);
|
|
}
|
|
|
|
/* We've done all we can with the available input and output. */
|
|
return last ? finish_started : need_more;
|
|
}
|