opencv/3rdparty/zlib-ng/deflate_stored.c
Letu Ren 0de26fd78e Add zlib-ng as an alternative zlib implementation
Zlib-ng is zlib replacement with optimizations for "next generation" systems. Its optimization may benifits image library decode and encode speed such as libpng. In our tests, if using zlib-ng and libpng combination on a x86_64 machine with AVX2, the time of `imdecode` amd `imencode` will drop 20% approximately. This patch enables zlib-ng's optimization if `CV_DISABLE_OPTIMIZATION` is OFF. Since Zlib-ng can dispatch intrinsics on the fly, port work is much easier.

Related discussion: https://github.com/opencv/opencv/issues/22573
2024-01-14 14:58:47 +08:00

187 lines
8.2 KiB
C

/* deflate_stored.c -- store data without compression using deflation algorithm
*
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
/* ===========================================================================
* Copy without compression as much as possible from the input stream, return
* the current block state.
*
* In case deflateParams() is used to later switch to a non-zero compression
* level, s->matches (otherwise unused when storing) keeps track of the number
* of hash table slides to perform. If s->matches is 1, then one hash table
* slide will be done when switching. If s->matches is 2, the maximum value
* allowed here, then the hash table will be cleared, since two or more slides
* is the same as a clear.
*
* deflate_stored() is written to minimize the number of times an input byte is
* copied. It is most efficient with large input and output buffers, which
* maximizes the opportunites to have a single copy from next_in to next_out.
*/
Z_INTERNAL block_state deflate_stored(deflate_state *s, int flush) {
/* Smallest worthy block size when not flushing or finishing. By default
* this is 32K. This can be as small as 507 bytes for memLevel == 1. For
* large input and output buffers, the stored block size will be larger.
*/
unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size);
/* Copy as many min_block or larger stored blocks directly to next_out as
* possible. If flushing, copy the remaining available input to next_out as
* stored blocks, if there is enough space.
*/
unsigned len, left, have, last = 0;
unsigned used = s->strm->avail_in;
do {
/* Set len to the maximum size block that we can copy directly with the
* available input data and output space. Set left to how much of that
* would be copied from what's left in the window.
*/
len = MAX_STORED; /* maximum deflate stored block length */
have = (s->bi_valid + 42) >> 3; /* number of header bytes */
if (s->strm->avail_out < have) /* need room for header */
break;
/* maximum stored block length that will fit in avail_out: */
have = s->strm->avail_out - have;
left = (int)s->strstart - s->block_start; /* bytes left in window */
if (len > (unsigned long)left + s->strm->avail_in)
len = left + s->strm->avail_in; /* limit len to the input */
len = MIN(len, have); /* limit len to the output */
/* If the stored block would be less than min_block in length, or if
* unable to copy all of the available input when flushing, then try
* copying to the window and the pending buffer instead. Also don't
* write an empty block when flushing -- deflate() does that.
*/
if (len < min_block && ((len == 0 && flush != Z_FINISH) || flush == Z_NO_FLUSH || len != left + s->strm->avail_in))
break;
/* Make a dummy stored block in pending to get the header bytes,
* including any pending bits. This also updates the debugging counts.
*/
last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0;
zng_tr_stored_block(s, (char *)0, 0L, last);
/* Replace the lengths in the dummy stored block with len. */
s->pending -= 4;
put_short(s, (uint16_t)len);
put_short(s, (uint16_t)~len);
/* Write the stored block header bytes. */
PREFIX(flush_pending)(s->strm);
/* Update debugging counts for the data about to be copied. */
cmpr_bits_add(s, len << 3);
sent_bits_add(s, len << 3);
/* Copy uncompressed bytes from the window to next_out. */
if (left) {
left = MIN(left, len);
memcpy(s->strm->next_out, s->window + s->block_start, left);
s->strm->next_out += left;
s->strm->avail_out -= left;
s->strm->total_out += left;
s->block_start += (int)left;
len -= left;
}
/* Copy uncompressed bytes directly from next_in to next_out, updating
* the check value.
*/
if (len) {
PREFIX(read_buf)(s->strm, s->strm->next_out, len);
s->strm->next_out += len;
s->strm->avail_out -= len;
s->strm->total_out += len;
}
} while (last == 0);
/* Update the sliding window with the last s->w_size bytes of the copied
* data, or append all of the copied data to the existing window if less
* than s->w_size bytes were copied. Also update the number of bytes to
* insert in the hash tables, in the event that deflateParams() switches to
* a non-zero compression level.
*/
used -= s->strm->avail_in; /* number of input bytes directly copied */
if (used) {
/* If any input was used, then no unused input remains in the window,
* therefore s->block_start == s->strstart.
*/
if (used >= s->w_size) { /* supplant the previous history */
s->matches = 2; /* clear hash */
memcpy(s->window, s->strm->next_in - s->w_size, s->w_size);
s->strstart = s->w_size;
s->insert = s->strstart;
} else {
if (s->window_size - s->strstart <= used) {
/* Slide the window down. */
s->strstart -= s->w_size;
memcpy(s->window, s->window + s->w_size, s->strstart);
if (s->matches < 2)
s->matches++; /* add a pending slide_hash() */
s->insert = MIN(s->insert, s->strstart);
}
memcpy(s->window + s->strstart, s->strm->next_in - used, used);
s->strstart += used;
s->insert += MIN(used, s->w_size - s->insert);
}
s->block_start = (int)s->strstart;
}
s->high_water = MAX(s->high_water, s->strstart);
/* If the last block was written to next_out, then done. */
if (last)
return finish_done;
/* If flushing and all input has been consumed, then done. */
if (flush != Z_NO_FLUSH && flush != Z_FINISH && s->strm->avail_in == 0 && (int)s->strstart == s->block_start)
return block_done;
/* Fill the window with any remaining input. */
have = s->window_size - s->strstart;
if (s->strm->avail_in > have && s->block_start >= (int)s->w_size) {
/* Slide the window down. */
s->block_start -= (int)s->w_size;
s->strstart -= s->w_size;
memcpy(s->window, s->window + s->w_size, s->strstart);
if (s->matches < 2)
s->matches++; /* add a pending slide_hash() */
have += s->w_size; /* more space now */
s->insert = MIN(s->insert, s->strstart);
}
have = MIN(have, s->strm->avail_in);
if (have) {
PREFIX(read_buf)(s->strm, s->window + s->strstart, have);
s->strstart += have;
s->insert += MIN(have, s->w_size - s->insert);
}
s->high_water = MAX(s->high_water, s->strstart);
/* There was not enough avail_out to write a complete worthy or flushed
* stored block to next_out. Write a stored block to pending instead, if we
* have enough input for a worthy block, or if flushing and there is enough
* room for the remaining input as a stored block in the pending buffer.
*/
have = (s->bi_valid + 42) >> 3; /* number of header bytes */
/* maximum stored block length that will fit in pending: */
have = MIN(s->pending_buf_size - have, MAX_STORED);
min_block = MIN(have, s->w_size);
left = (int)s->strstart - s->block_start;
if (left >= min_block || ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH && s->strm->avail_in == 0 && left <= have)) {
len = MIN(left, have);
last = flush == Z_FINISH && s->strm->avail_in == 0 && len == left ? 1 : 0;
zng_tr_stored_block(s, (char *)s->window + s->block_start, len, last);
s->block_start += (int)len;
PREFIX(flush_pending)(s->strm);
}
/* We've done all we can with the available input and output. */
return last ? finish_started : need_more;
}