seaweedfs/weed/util/compression.go

157 lines
3.2 KiB
Go
Raw Normal View History

2019-04-19 12:43:36 +08:00
package util
import (
"bytes"
"compress/flate"
"compress/gzip"
2020-06-20 23:01:00 +08:00
"fmt"
"io/ioutil"
"strings"
2020-03-27 14:50:07 +08:00
"github.com/chrislusf/seaweedfs/weed/glog"
2020-06-24 00:12:02 +08:00
"github.com/klauspost/compress/zstd"
)
2020-09-12 19:08:03 +08:00
var (
2020-09-04 02:00:20 +08:00
UnsupportedCompression = fmt.Errorf("unsupported compression")
)
2020-09-12 19:08:03 +08:00
func MaybeGzipData(input []byte) []byte {
2020-09-04 02:00:20 +08:00
if IsGzippedContent(input) {
return input
}
gzipped, err := GzipData(input)
if err != nil {
return input
}
2020-09-12 19:08:03 +08:00
if len(gzipped)*10 > len(input)*9 {
2020-09-04 02:00:20 +08:00
return input
}
return gzipped
}
2020-09-12 19:08:03 +08:00
func MaybeDecompressData(input []byte) []byte {
2020-09-04 02:00:20 +08:00
uncompressed, err := DecompressData(input)
if err != nil {
if err != UnsupportedCompression {
glog.Errorf("decompressed data: %v", err)
}
return input
}
return uncompressed
}
2019-04-19 12:43:36 +08:00
func GzipData(input []byte) ([]byte, error) {
buf := new(bytes.Buffer)
w, _ := gzip.NewWriterLevel(buf, flate.BestSpeed)
if _, err := w.Write(input); err != nil {
2020-09-04 02:00:20 +08:00
glog.V(2).Infof("error gzip data: %v", err)
2019-04-19 12:43:36 +08:00
return nil, err
}
if err := w.Close(); err != nil {
2020-09-04 02:00:20 +08:00
glog.V(2).Infof("error closing gzipped data: %v", err)
2019-04-19 12:43:36 +08:00
return nil, err
}
return buf.Bytes(), nil
}
2020-06-25 02:39:09 +08:00
var zstdEncoder, _ = zstd.NewWriter(nil)
func ZstdData(input []byte) ([]byte, error) {
return zstdEncoder.EncodeAll(input, nil), nil
}
func DecompressData(input []byte) ([]byte, error) {
2020-06-20 13:45:27 +08:00
if IsGzippedContent(input) {
return ungzipData(input)
}
2020-06-24 00:12:02 +08:00
if IsZstdContent(input) {
return unzstdData(input)
}
2020-09-04 02:00:20 +08:00
return input, UnsupportedCompression
2020-06-20 13:45:27 +08:00
}
func ungzipData(input []byte) ([]byte, error) {
2019-04-19 12:43:36 +08:00
buf := bytes.NewBuffer(input)
r, _ := gzip.NewReader(buf)
defer r.Close()
output, err := ioutil.ReadAll(r)
if err != nil {
2020-09-04 02:00:20 +08:00
glog.V(2).Infof("error ungzip data: %v", err)
2019-04-19 12:43:36 +08:00
}
return output, err
}
2020-06-25 02:39:09 +08:00
var decoder, _ = zstd.NewReader(nil)
2020-07-14 22:34:16 +08:00
2020-06-24 00:12:02 +08:00
func unzstdData(input []byte) ([]byte, error) {
2020-06-25 02:39:09 +08:00
return decoder.DecodeAll(input, nil)
}
2020-06-20 13:45:27 +08:00
func IsGzippedContent(data []byte) bool {
if len(data) < 2 {
return false
}
return data[0] == 31 && data[1] == 139
}
2020-06-24 00:12:02 +08:00
func IsZstdContent(data []byte) bool {
if len(data) < 4 {
return false
}
2020-06-25 02:39:09 +08:00
return data[3] == 0xFD && data[2] == 0x2F && data[1] == 0xB5 && data[0] == 0x28
2020-06-24 00:12:02 +08:00
}
/*
2020-06-24 00:12:02 +08:00
* Default not to compressed since compression can be done on client side.
*/func IsCompressableFileType(ext, mtype string) (shouldBeCompressed, iAmSure bool) {
2018-12-23 07:05:31 +08:00
// text
2013-01-17 16:56:56 +08:00
if strings.HasPrefix(mtype, "text/") {
return true, true
}
2018-12-23 07:05:31 +08:00
// images
switch ext {
2020-01-30 12:51:58 +08:00
case ".svg", ".bmp", ".wav":
return true, true
2018-12-23 07:05:31 +08:00
}
if strings.HasPrefix(mtype, "image/") {
return false, true
2018-12-23 07:05:31 +08:00
}
2019-02-06 21:59:15 +08:00
// by file name extension
2013-01-17 16:56:56 +08:00
switch ext {
2020-06-24 00:12:02 +08:00
case ".zip", ".rar", ".gz", ".bz2", ".xz", ".zst":
return false, true
2014-07-09 00:32:55 +08:00
case ".pdf", ".txt", ".html", ".htm", ".css", ".js", ".json":
return true, true
2018-12-23 07:05:31 +08:00
case ".php", ".java", ".go", ".rb", ".c", ".cpp", ".h", ".hpp":
return true, true
case ".png", ".jpg", ".jpeg":
return false, true
}
2018-12-23 07:05:31 +08:00
// by mime type
if strings.HasPrefix(mtype, "application/") {
2020-06-24 00:12:02 +08:00
if strings.HasSuffix(mtype, "zstd") {
return false, true
}
if strings.HasSuffix(mtype, "xml") {
return true, true
}
if strings.HasSuffix(mtype, "script") {
return true, true
}
2020-01-30 12:51:58 +08:00
}
if strings.HasPrefix(mtype, "audio/") {
switch strings.TrimPrefix(mtype, "audio/") {
case "wave", "wav", "x-wav", "x-pn-wav":
return true, true
}
}
2018-12-23 07:05:31 +08:00
return false, false
}