seaweedfs/weed/util/http_util.go

498 lines
11 KiB
Go
Raw Normal View History

package util
import (
"compress/gzip"
2015-12-15 14:38:58 +08:00
"encoding/json"
"errors"
"fmt"
"github.com/seaweedfs/seaweedfs/weed/util/mem"
"io"
"net/http"
"net/url"
2014-03-31 02:28:04 +08:00
"strings"
2024-01-14 09:51:53 +08:00
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
)
var (
client *http.Client
Transport *http.Transport
)
func init() {
Transport = &http.Transport{
MaxIdleConns: 1024,
MaxIdleConnsPerHost: 1024,
}
2019-01-10 19:42:31 +08:00
client = &http.Client{
Transport: Transport,
}
}
func Post(url string, values url.Values) ([]byte, error) {
r, err := client.PostForm(url, values)
if err != nil {
return nil, err
}
defer r.Body.Close()
b, err := io.ReadAll(r.Body)
if r.StatusCode >= 400 {
if err != nil {
return nil, fmt.Errorf("%s: %d - %s", url, r.StatusCode, string(b))
} else {
return nil, fmt.Errorf("%s: %s", url, r.Status)
}
}
if err != nil {
return nil, err
}
return b, nil
}
// github.com/seaweedfs/seaweedfs/unmaintained/repeated_vacuum/repeated_vacuum.go
// may need increasing http.Client.Timeout
2020-10-13 15:29:46 +08:00
func Get(url string) ([]byte, bool, error) {
return GetAuthenticated(url, "")
}
2020-08-02 04:20:52 +08:00
func GetAuthenticated(url, jwt string) ([]byte, bool, error) {
2020-08-02 04:20:52 +08:00
request, err := http.NewRequest("GET", url, nil)
2023-09-02 04:10:45 +08:00
if err != nil {
return nil, true, err
}
maybeAddAuth(request, jwt)
2020-08-02 04:20:52 +08:00
request.Header.Add("Accept-Encoding", "gzip")
response, err := client.Do(request)
if err != nil {
2020-10-13 15:29:46 +08:00
return nil, true, err
}
2022-08-31 15:09:32 +08:00
defer CloseResponse(response)
2020-08-02 04:20:52 +08:00
var reader io.ReadCloser
switch response.Header.Get("Content-Encoding") {
case "gzip":
reader, err = gzip.NewReader(response.Body)
2023-09-02 04:10:45 +08:00
if err != nil {
return nil, true, err
}
2020-08-02 04:20:52 +08:00
defer reader.Close()
default:
reader = response.Body
}
b, err := io.ReadAll(reader)
2020-08-02 04:20:52 +08:00
if response.StatusCode >= 400 {
2020-10-13 15:29:46 +08:00
retryable := response.StatusCode >= 500
return nil, retryable, fmt.Errorf("%s: %s", url, response.Status)
}
if err != nil {
2020-10-13 15:29:46 +08:00
return nil, false, err
}
2020-10-13 15:29:46 +08:00
return b, false, nil
}
2014-03-21 04:30:34 +08:00
func Head(url string) (http.Header, error) {
r, err := client.Head(url)
if err != nil {
return nil, err
}
2020-02-15 01:46:36 +08:00
defer CloseResponse(r)
if r.StatusCode >= 400 {
return nil, fmt.Errorf("%s: %s", url, r.Status)
}
return r.Header, nil
}
func maybeAddAuth(req *http.Request, jwt string) {
2015-02-08 07:35:28 +08:00
if jwt != "" {
req.Header.Set("Authorization", "BEARER "+string(jwt))
}
}
func Delete(url string, jwt string) error {
req, err := http.NewRequest("DELETE", url, nil)
maybeAddAuth(req, jwt)
2014-03-21 04:30:34 +08:00
if err != nil {
return err
}
resp, e := client.Do(req)
if e != nil {
return e
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
2014-03-21 04:30:34 +08:00
return err
}
switch resp.StatusCode {
case http.StatusNotFound, http.StatusAccepted, http.StatusOK:
return nil
}
m := make(map[string]interface{})
if e := json.Unmarshal(body, &m); e == nil {
if s, ok := m["error"].(string); ok {
return errors.New(s)
}
}
return errors.New(string(body))
2014-03-21 04:30:34 +08:00
}
2014-03-31 02:28:04 +08:00
func DeleteProxied(url string, jwt string) (body []byte, httpStatus int, err error) {
req, err := http.NewRequest("DELETE", url, nil)
maybeAddAuth(req, jwt)
if err != nil {
return
}
resp, err := client.Do(req)
if err != nil {
return
}
defer resp.Body.Close()
body, err = io.ReadAll(resp.Body)
if err != nil {
return
}
httpStatus = resp.StatusCode
return
}
func GetBufferStream(url string, values url.Values, allocatedBytes []byte, eachBuffer func([]byte)) error {
r, err := client.PostForm(url, values)
if err != nil {
return err
}
2020-02-15 01:46:36 +08:00
defer CloseResponse(r)
if r.StatusCode != 200 {
return fmt.Errorf("%s: %s", url, r.Status)
}
for {
n, err := r.Body.Read(allocatedBytes)
2018-09-17 15:27:40 +08:00
if n > 0 {
eachBuffer(allocatedBytes[:n])
}
if err != nil {
if err == io.EOF {
return nil
}
return err
}
}
}
func GetUrlStream(url string, values url.Values, readFn func(io.Reader) error) error {
r, err := client.PostForm(url, values)
if err != nil {
return err
}
2020-02-15 01:46:36 +08:00
defer CloseResponse(r)
if r.StatusCode != 200 {
return fmt.Errorf("%s: %s", url, r.Status)
}
return readFn(r.Body)
}
2021-08-13 12:40:33 +08:00
func DownloadFile(fileUrl string, jwt string) (filename string, header http.Header, resp *http.Response, e error) {
FEATURE: add JWT to HTTP endpoints of Filer and use them in S3 Client - one JWT for reading and one for writing, analogous to how the JWT between Master and Volume Server works - I did not implement IP `whiteList` parameter on the filer Additionally, because http_util.DownloadFile now sets the JWT, the `download` command should now work when `jwt.signing.read` is configured. By looking at the code, I think this case did not work before. ## Docs to be adjusted after a release Page `Amazon-S3-API`: ``` # Authentication with Filer You can use mTLS for the gRPC connection between S3-API-Proxy and the filer, as explained in [Security-Configuration](Security-Configuration) - controlled by the `grpc.*` configuration in `security.toml`. Starting with version XX, it is also possible to authenticate the HTTP operations between the S3-API-Proxy and the Filer (especially uploading new files). This is configured by setting `filer_jwt.signing.key` and `filer_jwt.signing.read.key` in `security.toml`. With both configurations (gRPC and JWT), it is possible to have Filer and S3 communicate in fully authenticated fashion; so Filer will reject any unauthenticated communication. ``` Page `Security Overview`: ``` The following items are not covered, yet: - master server http REST services Starting with version XX, the Filer HTTP REST services can be secured with a JWT, by setting `filer_jwt.signing.key` and `filer_jwt.signing.read.key` in `security.toml`. ... Before version XX: "weed filer -disableHttp", disable http operations, only gRPC operations are allowed. This works with "weed mount" by FUSE. It does **not work** with the [S3 Gateway](Amazon S3 API), as this does HTTP calls to the Filer. Starting with version XX: secured by JWT, by setting `filer_jwt.signing.key` and `filer_jwt.signing.read.key` in `security.toml`. **This now works with the [S3 Gateway](Amazon S3 API).** ... # Securing Filer HTTP with JWT To enable JWT-based access control for the Filer, 1. generate `security.toml` file by `weed scaffold -config=security` 2. set `filer_jwt.signing.key` to a secret string - and optionally filer_jwt.signing.read.key` as well to a secret string 3. copy the same `security.toml` file to the filers and all S3 proxies. If `filer_jwt.signing.key` is configured: When sending upload/update/delete HTTP operations to a filer server, the request header `Authorization` should be the JWT string (`Authorization: Bearer [JwtToken]`). The operation is authorized after the filer validates the JWT with `filer_jwt.signing.key`. If `filer_jwt.signing.read.key` is configured: When sending GET or HEAD requests to a filer server, the request header `Authorization` should be the JWT string (`Authorization: Bearer [JwtToken]`). The operation is authorized after the filer validates the JWT with `filer_jwt.signing.read.key`. The S3 API Gateway reads the above JWT keys and sends authenticated HTTP requests to the filer. ``` Page `Security Configuration`: ``` (update scaffold file) ... [filer_jwt.signing] key = "blahblahblahblah" [filer_jwt.signing.read] key = "blahblahblahblah" ``` Resolves: #158
2021-12-30 02:47:53 +08:00
req, err := http.NewRequest("GET", fileUrl, nil)
if err != nil {
return "", nil, nil, err
}
maybeAddAuth(req, jwt)
FEATURE: add JWT to HTTP endpoints of Filer and use them in S3 Client - one JWT for reading and one for writing, analogous to how the JWT between Master and Volume Server works - I did not implement IP `whiteList` parameter on the filer Additionally, because http_util.DownloadFile now sets the JWT, the `download` command should now work when `jwt.signing.read` is configured. By looking at the code, I think this case did not work before. ## Docs to be adjusted after a release Page `Amazon-S3-API`: ``` # Authentication with Filer You can use mTLS for the gRPC connection between S3-API-Proxy and the filer, as explained in [Security-Configuration](Security-Configuration) - controlled by the `grpc.*` configuration in `security.toml`. Starting with version XX, it is also possible to authenticate the HTTP operations between the S3-API-Proxy and the Filer (especially uploading new files). This is configured by setting `filer_jwt.signing.key` and `filer_jwt.signing.read.key` in `security.toml`. With both configurations (gRPC and JWT), it is possible to have Filer and S3 communicate in fully authenticated fashion; so Filer will reject any unauthenticated communication. ``` Page `Security Overview`: ``` The following items are not covered, yet: - master server http REST services Starting with version XX, the Filer HTTP REST services can be secured with a JWT, by setting `filer_jwt.signing.key` and `filer_jwt.signing.read.key` in `security.toml`. ... Before version XX: "weed filer -disableHttp", disable http operations, only gRPC operations are allowed. This works with "weed mount" by FUSE. It does **not work** with the [S3 Gateway](Amazon S3 API), as this does HTTP calls to the Filer. Starting with version XX: secured by JWT, by setting `filer_jwt.signing.key` and `filer_jwt.signing.read.key` in `security.toml`. **This now works with the [S3 Gateway](Amazon S3 API).** ... # Securing Filer HTTP with JWT To enable JWT-based access control for the Filer, 1. generate `security.toml` file by `weed scaffold -config=security` 2. set `filer_jwt.signing.key` to a secret string - and optionally filer_jwt.signing.read.key` as well to a secret string 3. copy the same `security.toml` file to the filers and all S3 proxies. If `filer_jwt.signing.key` is configured: When sending upload/update/delete HTTP operations to a filer server, the request header `Authorization` should be the JWT string (`Authorization: Bearer [JwtToken]`). The operation is authorized after the filer validates the JWT with `filer_jwt.signing.key`. If `filer_jwt.signing.read.key` is configured: When sending GET or HEAD requests to a filer server, the request header `Authorization` should be the JWT string (`Authorization: Bearer [JwtToken]`). The operation is authorized after the filer validates the JWT with `filer_jwt.signing.read.key`. The S3 API Gateway reads the above JWT keys and sends authenticated HTTP requests to the filer. ``` Page `Security Configuration`: ``` (update scaffold file) ... [filer_jwt.signing] key = "blahblahblahblah" [filer_jwt.signing.read] key = "blahblahblahblah" ``` Resolves: #158
2021-12-30 02:47:53 +08:00
response, err := client.Do(req)
2014-03-31 02:28:04 +08:00
if err != nil {
2018-09-21 16:54:29 +08:00
return "", nil, nil, err
2014-03-31 02:28:04 +08:00
}
2018-09-21 16:54:29 +08:00
header = response.Header
2014-03-31 02:28:04 +08:00
contentDisposition := response.Header["Content-Disposition"]
if len(contentDisposition) > 0 {
idx := strings.Index(contentDisposition[0], "filename=")
if idx != -1 {
filename = contentDisposition[0][idx+len("filename="):]
filename = strings.Trim(filename, "\"")
2014-03-31 02:28:04 +08:00
}
}
2020-09-09 18:53:09 +08:00
resp = response
2014-03-31 02:28:04 +08:00
return
}
func Do(req *http.Request) (resp *http.Response, err error) {
return client.Do(req)
}
2015-04-17 01:02:53 +08:00
func NormalizeUrl(url string) string {
if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
return url
}
return "http://" + url
}
2020-06-20 13:45:27 +08:00
func ReadUrl(fileUrl string, cipherKey []byte, isContentCompressed bool, isFullChunk bool, offset int64, size int, buf []byte) (int64, error) {
if cipherKey != nil {
var n int
_, err := readEncryptedUrl(fileUrl, "", cipherKey, isContentCompressed, isFullChunk, offset, size, func(data []byte) {
n = copy(buf, data)
})
return int64(n), err
}
req, err := http.NewRequest("GET", fileUrl, nil)
if err != nil {
return 0, err
}
if !isFullChunk {
2020-01-23 14:59:12 +08:00
req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", offset, offset+int64(size)-1))
} else {
req.Header.Set("Accept-Encoding", "gzip")
}
r, err := client.Do(req)
if err != nil {
return 0, err
}
2022-08-31 15:09:32 +08:00
defer CloseResponse(r)
if r.StatusCode >= 400 {
return 0, fmt.Errorf("%s: %s", fileUrl, r.Status)
}
var reader io.ReadCloser
contentEncoding := r.Header.Get("Content-Encoding")
switch contentEncoding {
case "gzip":
reader, err = gzip.NewReader(r.Body)
2023-09-02 04:10:45 +08:00
if err != nil {
return 0, err
}
defer reader.Close()
default:
reader = r.Body
}
var (
i, m int
n int64
)
// refers to https://github.com/golang/go/blob/master/src/bytes/buffer.go#L199
// commit id c170b14c2c1cfb2fd853a37add92a82fd6eb4318
for {
m, err = reader.Read(buf[i:])
i += m
n += int64(m)
if err == io.EOF {
return n, nil
}
if err != nil {
return n, err
}
if n == int64(len(buf)) {
break
}
}
// drains the response body to avoid memory leak
data, _ := io.ReadAll(reader)
if len(data) != 0 {
glog.V(1).Infof("%s reader has remaining %d bytes", contentEncoding, len(data))
}
return n, err
}
2020-10-13 15:29:46 +08:00
func ReadUrlAsStream(fileUrl string, cipherKey []byte, isContentGzipped bool, isFullChunk bool, offset int64, size int, fn func(data []byte)) (retryable bool, err error) {
return ReadUrlAsStreamAuthenticated(fileUrl, "", cipherKey, isContentGzipped, isFullChunk, offset, size, fn)
}
func ReadUrlAsStreamAuthenticated(fileUrl, jwt string, cipherKey []byte, isContentGzipped bool, isFullChunk bool, offset int64, size int, fn func(data []byte)) (retryable bool, err error) {
if cipherKey != nil {
return readEncryptedUrl(fileUrl, jwt, cipherKey, isContentGzipped, isFullChunk, offset, size, fn)
}
req, err := http.NewRequest("GET", fileUrl, nil)
maybeAddAuth(req, jwt)
if err != nil {
2020-10-13 15:29:46 +08:00
return false, err
}
2020-08-02 04:20:52 +08:00
if isFullChunk {
req.Header.Add("Accept-Encoding", "gzip")
} else {
req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", offset, offset+int64(size)-1))
}
r, err := client.Do(req)
if err != nil {
2020-10-13 15:29:46 +08:00
return true, err
}
2020-02-15 01:46:36 +08:00
defer CloseResponse(r)
if r.StatusCode >= 400 {
retryable = r.StatusCode == http.StatusNotFound || r.StatusCode >= 499
2020-10-13 15:29:46 +08:00
return retryable, fmt.Errorf("%s: %s", fileUrl, r.Status)
}
2020-08-02 04:20:52 +08:00
var reader io.ReadCloser
contentEncoding := r.Header.Get("Content-Encoding")
switch contentEncoding {
case "gzip":
reader, err = gzip.NewReader(r.Body)
defer reader.Close()
default:
reader = r.Body
}
var (
m int
)
2022-02-26 18:59:19 +08:00
buf := mem.Allocate(64 * 1024)
defer mem.Free(buf)
for {
2020-08-02 04:20:52 +08:00
m, err = reader.Read(buf)
2022-08-19 14:51:08 +08:00
if m > 0 {
fn(buf[:m])
}
if err == io.EOF {
2020-10-13 15:29:46 +08:00
return false, nil
}
if err != nil {
return true, err
}
}
}
func readEncryptedUrl(fileUrl, jwt string, cipherKey []byte, isContentCompressed bool, isFullChunk bool, offset int64, size int, fn func(data []byte)) (bool, error) {
encryptedData, retryable, err := GetAuthenticated(fileUrl, jwt)
if err != nil {
2020-10-13 15:29:46 +08:00
return retryable, fmt.Errorf("fetch %s: %v", fileUrl, err)
}
decryptedData, err := Decrypt(encryptedData, CipherKey(cipherKey))
if err != nil {
2020-10-13 15:29:46 +08:00
return false, fmt.Errorf("decrypt %s: %v", fileUrl, err)
}
2020-06-20 13:45:27 +08:00
if isContentCompressed {
decryptedData, err = DecompressData(decryptedData)
if err != nil {
2020-08-02 04:46:52 +08:00
glog.V(0).Infof("unzip decrypt %s: %v", fileUrl, err)
}
}
if len(decryptedData) < int(offset)+size {
2020-10-13 15:29:46 +08:00
return false, fmt.Errorf("read decrypted %s size %d [%d, %d)", fileUrl, len(decryptedData), offset, int(offset)+size)
}
if isFullChunk {
fn(decryptedData)
} else {
fn(decryptedData[int(offset) : int(offset)+size])
}
2020-10-13 15:29:46 +08:00
return false, nil
}
func ReadUrlAsReaderCloser(fileUrl string, jwt string, rangeHeader string) (*http.Response, io.ReadCloser, error) {
req, err := http.NewRequest("GET", fileUrl, nil)
if err != nil {
return nil, nil, err
}
if rangeHeader != "" {
req.Header.Add("Range", rangeHeader)
2020-08-02 04:20:52 +08:00
} else {
req.Header.Add("Accept-Encoding", "gzip")
}
maybeAddAuth(req, jwt)
FEATURE: add JWT to HTTP endpoints of Filer and use them in S3 Client - one JWT for reading and one for writing, analogous to how the JWT between Master and Volume Server works - I did not implement IP `whiteList` parameter on the filer Additionally, because http_util.DownloadFile now sets the JWT, the `download` command should now work when `jwt.signing.read` is configured. By looking at the code, I think this case did not work before. ## Docs to be adjusted after a release Page `Amazon-S3-API`: ``` # Authentication with Filer You can use mTLS for the gRPC connection between S3-API-Proxy and the filer, as explained in [Security-Configuration](Security-Configuration) - controlled by the `grpc.*` configuration in `security.toml`. Starting with version XX, it is also possible to authenticate the HTTP operations between the S3-API-Proxy and the Filer (especially uploading new files). This is configured by setting `filer_jwt.signing.key` and `filer_jwt.signing.read.key` in `security.toml`. With both configurations (gRPC and JWT), it is possible to have Filer and S3 communicate in fully authenticated fashion; so Filer will reject any unauthenticated communication. ``` Page `Security Overview`: ``` The following items are not covered, yet: - master server http REST services Starting with version XX, the Filer HTTP REST services can be secured with a JWT, by setting `filer_jwt.signing.key` and `filer_jwt.signing.read.key` in `security.toml`. ... Before version XX: "weed filer -disableHttp", disable http operations, only gRPC operations are allowed. This works with "weed mount" by FUSE. It does **not work** with the [S3 Gateway](Amazon S3 API), as this does HTTP calls to the Filer. Starting with version XX: secured by JWT, by setting `filer_jwt.signing.key` and `filer_jwt.signing.read.key` in `security.toml`. **This now works with the [S3 Gateway](Amazon S3 API).** ... # Securing Filer HTTP with JWT To enable JWT-based access control for the Filer, 1. generate `security.toml` file by `weed scaffold -config=security` 2. set `filer_jwt.signing.key` to a secret string - and optionally filer_jwt.signing.read.key` as well to a secret string 3. copy the same `security.toml` file to the filers and all S3 proxies. If `filer_jwt.signing.key` is configured: When sending upload/update/delete HTTP operations to a filer server, the request header `Authorization` should be the JWT string (`Authorization: Bearer [JwtToken]`). The operation is authorized after the filer validates the JWT with `filer_jwt.signing.key`. If `filer_jwt.signing.read.key` is configured: When sending GET or HEAD requests to a filer server, the request header `Authorization` should be the JWT string (`Authorization: Bearer [JwtToken]`). The operation is authorized after the filer validates the JWT with `filer_jwt.signing.read.key`. The S3 API Gateway reads the above JWT keys and sends authenticated HTTP requests to the filer. ``` Page `Security Configuration`: ``` (update scaffold file) ... [filer_jwt.signing] key = "blahblahblahblah" [filer_jwt.signing.read] key = "blahblahblahblah" ``` Resolves: #158
2021-12-30 02:47:53 +08:00
r, err := client.Do(req)
if err != nil {
return nil, nil, err
}
if r.StatusCode >= 400 {
CloseResponse(r)
return nil, nil, fmt.Errorf("%s: %s", fileUrl, r.Status)
}
2020-08-02 04:20:52 +08:00
var reader io.ReadCloser
contentEncoding := r.Header.Get("Content-Encoding")
switch contentEncoding {
case "gzip":
reader, err = gzip.NewReader(r.Body)
2023-09-02 04:10:45 +08:00
if err != nil {
return nil, nil, err
}
2020-08-02 04:20:52 +08:00
default:
reader = r.Body
}
return r, reader, nil
}
2020-02-15 01:09:15 +08:00
2020-02-15 01:46:36 +08:00
func CloseResponse(resp *http.Response) {
2022-08-31 15:10:17 +08:00
if resp == nil || resp.Body == nil {
return
}
reader := &CountingReader{reader: resp.Body}
io.Copy(io.Discard, reader)
2020-02-15 01:09:15 +08:00
resp.Body.Close()
if reader.BytesRead > 0 {
glog.V(1).Infof("response leftover %d bytes", reader.BytesRead)
}
2020-02-15 01:09:15 +08:00
}
2020-11-30 18:45:00 +08:00
func CloseRequest(req *http.Request) {
reader := &CountingReader{reader: req.Body}
io.Copy(io.Discard, reader)
2020-11-30 18:45:00 +08:00
req.Body.Close()
if reader.BytesRead > 0 {
glog.V(1).Infof("request leftover %d bytes", reader.BytesRead)
}
}
type CountingReader struct {
reader io.Reader
BytesRead int
}
func (r *CountingReader) Read(p []byte) (n int, err error) {
n, err = r.reader.Read(p)
r.BytesRead += n
return n, err
2020-11-30 18:45:00 +08:00
}
2024-01-14 09:51:53 +08:00
func RetriedFetchChunkData(buffer []byte, urlStrings []string, cipherKey []byte, isGzipped bool, isFullChunk bool, offset int64) (n int, err error) {
var shouldRetry bool
for waitTime := time.Second; waitTime < RetryWaitTime; waitTime += waitTime / 2 {
for _, urlString := range urlStrings {
n = 0
if strings.Contains(urlString, "%") {
urlString = url.PathEscape(urlString)
}
shouldRetry, err = ReadUrlAsStream(urlString+"?readDeleted=true", cipherKey, isGzipped, isFullChunk, offset, len(buffer), func(data []byte) {
if n < len(buffer) {
x := copy(buffer[n:], data)
n += x
}
})
if !shouldRetry {
break
}
if err != nil {
glog.V(0).Infof("read %s failed, err: %v", urlString, err)
} else {
break
}
}
if err != nil && shouldRetry {
glog.V(0).Infof("retry reading in %v", waitTime)
time.Sleep(waitTime)
} else {
break
}
}
return n, err
}