2022-04-02 08:27:49 +08:00
|
|
|
package shell
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"flag"
|
|
|
|
"fmt"
|
2022-08-24 14:18:21 +08:00
|
|
|
"io"
|
|
|
|
|
2022-07-29 15:17:28 +08:00
|
|
|
"github.com/seaweedfs/seaweedfs/weed/cluster"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
|
2022-04-02 08:27:49 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
Commands = append(Commands, &commandClusterCheck{})
|
|
|
|
}
|
|
|
|
|
|
|
|
type commandClusterCheck struct {
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *commandClusterCheck) Name() string {
|
|
|
|
return "cluster.check"
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *commandClusterCheck) Help() string {
|
|
|
|
return `check current cluster network connectivity
|
|
|
|
|
|
|
|
cluster.check
|
|
|
|
|
|
|
|
`
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *commandClusterCheck) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
|
|
|
|
|
|
|
|
clusterPsCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
|
|
|
|
if err = clusterPsCommand.Parse(args); err != nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-04-05 05:48:00 +08:00
|
|
|
// collect topology information
|
|
|
|
topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
fmt.Fprintf(writer, "Topology volumeSizeLimit:%d MB%s\n", volumeSizeLimitMb, diskInfosToString(topologyInfo.DiskInfos))
|
|
|
|
|
|
|
|
emptyDiskTypeDiskInfo, emptyDiskTypeFound := topologyInfo.DiskInfos[""]
|
|
|
|
hddDiskTypeDiskInfo, hddDiskTypeFound := topologyInfo.DiskInfos["hdd"]
|
2022-04-12 23:47:27 +08:00
|
|
|
if !emptyDiskTypeFound && !hddDiskTypeFound {
|
|
|
|
return fmt.Errorf("Need to a hdd disk type!")
|
|
|
|
}
|
2022-06-01 03:43:55 +08:00
|
|
|
if emptyDiskTypeFound && emptyDiskTypeDiskInfo.MaxVolumeCount == 0 || hddDiskTypeFound && hddDiskTypeDiskInfo.MaxVolumeCount == 0 {
|
2022-04-05 05:48:00 +08:00
|
|
|
return fmt.Errorf("Need to a hdd disk type!")
|
|
|
|
}
|
|
|
|
|
2022-04-02 08:27:49 +08:00
|
|
|
// collect filers
|
|
|
|
var filers []pb.ServerAddress
|
|
|
|
err = commandEnv.MasterClient.WithClient(false, func(client master_pb.SeaweedClient) error {
|
|
|
|
resp, err := client.ListClusterNodes(context.Background(), &master_pb.ListClusterNodesRequest{
|
|
|
|
ClientType: cluster.FilerType,
|
2022-05-02 12:59:16 +08:00
|
|
|
FilerGroup: *commandEnv.option.FilerGroup,
|
2022-04-02 08:27:49 +08:00
|
|
|
})
|
|
|
|
|
|
|
|
for _, node := range resp.ClusterNodes {
|
|
|
|
filers = append(filers, pb.ServerAddress(node.Address))
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
fmt.Fprintf(writer, "the cluster has %d filers: %+v\n", len(filers), filers)
|
|
|
|
|
|
|
|
// collect volume servers
|
|
|
|
var volumeServers []pb.ServerAddress
|
|
|
|
t, _, err := collectTopologyInfo(commandEnv, 0)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
for _, dc := range t.DataCenterInfos {
|
|
|
|
for _, r := range dc.RackInfos {
|
|
|
|
for _, dn := range r.DataNodeInfos {
|
|
|
|
volumeServers = append(volumeServers, pb.NewServerAddressFromDataNode(dn))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fmt.Fprintf(writer, "the cluster has %d volume servers: %+v\n", len(volumeServers), volumeServers)
|
|
|
|
|
|
|
|
// collect all masters
|
|
|
|
var masters []pb.ServerAddress
|
|
|
|
for _, master := range commandEnv.MasterClient.GetMasters() {
|
|
|
|
masters = append(masters, master)
|
|
|
|
}
|
|
|
|
|
|
|
|
// check from master to volume servers
|
|
|
|
for _, master := range masters {
|
|
|
|
for _, volumeServer := range volumeServers {
|
|
|
|
fmt.Fprintf(writer, "checking master %s to volume server %s ... ", string(master), string(volumeServer))
|
2022-08-24 14:18:21 +08:00
|
|
|
err := pb.WithMasterClient(false, master, commandEnv.option.GrpcDialOption, false, func(client master_pb.SeaweedClient) error {
|
2022-04-17 04:24:17 +08:00
|
|
|
pong, err := client.Ping(context.Background(), &master_pb.PingRequest{
|
2022-04-02 08:27:49 +08:00
|
|
|
Target: string(volumeServer),
|
|
|
|
TargetType: cluster.VolumeServerType,
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err == nil {
|
|
|
|
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
|
|
|
|
}
|
2022-04-02 08:27:49 +08:00
|
|
|
return err
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err != nil {
|
2022-04-02 08:27:49 +08:00
|
|
|
fmt.Fprintf(writer, "%v\n", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-02 08:40:25 +08:00
|
|
|
// check between masters
|
|
|
|
for _, sourceMaster := range masters {
|
|
|
|
for _, targetMaster := range masters {
|
2022-04-02 11:25:35 +08:00
|
|
|
if sourceMaster == targetMaster {
|
|
|
|
continue
|
|
|
|
}
|
2022-04-02 08:40:25 +08:00
|
|
|
fmt.Fprintf(writer, "checking master %s to %s ... ", string(sourceMaster), string(targetMaster))
|
2022-08-24 14:18:21 +08:00
|
|
|
err := pb.WithMasterClient(false, sourceMaster, commandEnv.option.GrpcDialOption, false, func(client master_pb.SeaweedClient) error {
|
2022-04-17 04:24:17 +08:00
|
|
|
pong, err := client.Ping(context.Background(), &master_pb.PingRequest{
|
2022-04-02 08:40:25 +08:00
|
|
|
Target: string(targetMaster),
|
|
|
|
TargetType: cluster.MasterType,
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err == nil {
|
|
|
|
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
|
|
|
|
}
|
2022-04-02 08:40:25 +08:00
|
|
|
return err
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err != nil {
|
2022-04-02 08:40:25 +08:00
|
|
|
fmt.Fprintf(writer, "%v\n", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-02 08:27:49 +08:00
|
|
|
// check from volume servers to masters
|
|
|
|
for _, volumeServer := range volumeServers {
|
|
|
|
for _, master := range masters {
|
|
|
|
fmt.Fprintf(writer, "checking volume server %s to master %s ... ", string(volumeServer), string(master))
|
|
|
|
err := pb.WithVolumeServerClient(false, volumeServer, commandEnv.option.GrpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
|
2022-04-17 04:24:17 +08:00
|
|
|
pong, err := client.Ping(context.Background(), &volume_server_pb.PingRequest{
|
2022-04-02 08:27:49 +08:00
|
|
|
Target: string(master),
|
|
|
|
TargetType: cluster.MasterType,
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err == nil {
|
|
|
|
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
|
|
|
|
}
|
2022-04-02 08:27:49 +08:00
|
|
|
return err
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err != nil {
|
2022-04-02 08:27:49 +08:00
|
|
|
fmt.Fprintf(writer, "%v\n", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// check from filers to masters
|
|
|
|
for _, filer := range filers {
|
|
|
|
for _, master := range masters {
|
|
|
|
fmt.Fprintf(writer, "checking filer %s to master %s ... ", string(filer), string(master))
|
2023-01-20 17:48:12 +08:00
|
|
|
err := pb.WithFilerClient(false, 0, filer, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
|
2022-04-17 04:24:17 +08:00
|
|
|
pong, err := client.Ping(context.Background(), &filer_pb.PingRequest{
|
2022-04-02 08:27:49 +08:00
|
|
|
Target: string(master),
|
|
|
|
TargetType: cluster.MasterType,
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err == nil {
|
|
|
|
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
|
|
|
|
}
|
2022-04-02 08:27:49 +08:00
|
|
|
return err
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err != nil {
|
2022-04-02 08:27:49 +08:00
|
|
|
fmt.Fprintf(writer, "%v\n", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// check from filers to volume servers
|
|
|
|
for _, filer := range filers {
|
|
|
|
for _, volumeServer := range volumeServers {
|
|
|
|
fmt.Fprintf(writer, "checking filer %s to volume server %s ... ", string(filer), string(volumeServer))
|
2023-01-20 17:48:12 +08:00
|
|
|
err := pb.WithFilerClient(false, 0, filer, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
|
2022-04-17 04:24:17 +08:00
|
|
|
pong, err := client.Ping(context.Background(), &filer_pb.PingRequest{
|
2022-04-02 08:27:49 +08:00
|
|
|
Target: string(volumeServer),
|
|
|
|
TargetType: cluster.VolumeServerType,
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err == nil {
|
|
|
|
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
|
|
|
|
}
|
2022-04-02 08:27:49 +08:00
|
|
|
return err
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err != nil {
|
2022-04-02 08:27:49 +08:00
|
|
|
fmt.Fprintf(writer, "%v\n", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-02 08:40:25 +08:00
|
|
|
// check between volume servers
|
|
|
|
for _, sourceVolumeServer := range volumeServers {
|
|
|
|
for _, targetVolumeServer := range volumeServers {
|
2022-04-02 11:25:35 +08:00
|
|
|
if sourceVolumeServer == targetVolumeServer {
|
|
|
|
continue
|
|
|
|
}
|
2022-04-02 08:40:25 +08:00
|
|
|
fmt.Fprintf(writer, "checking volume server %s to %s ... ", string(sourceVolumeServer), string(targetVolumeServer))
|
|
|
|
err := pb.WithVolumeServerClient(false, sourceVolumeServer, commandEnv.option.GrpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
|
2022-04-17 04:24:17 +08:00
|
|
|
pong, err := client.Ping(context.Background(), &volume_server_pb.PingRequest{
|
2022-04-02 08:40:25 +08:00
|
|
|
Target: string(targetVolumeServer),
|
|
|
|
TargetType: cluster.VolumeServerType,
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err == nil {
|
|
|
|
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
|
|
|
|
}
|
2022-04-02 08:40:25 +08:00
|
|
|
return err
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err != nil {
|
2022-04-02 08:40:25 +08:00
|
|
|
fmt.Fprintf(writer, "%v\n", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-02 11:25:35 +08:00
|
|
|
// check between filers, and need to connect to itself
|
2022-04-02 08:40:25 +08:00
|
|
|
for _, sourceFiler := range filers {
|
|
|
|
for _, targetFiler := range filers {
|
|
|
|
fmt.Fprintf(writer, "checking filer %s to %s ... ", string(sourceFiler), string(targetFiler))
|
2023-01-20 17:48:12 +08:00
|
|
|
err := pb.WithFilerClient(false, 0, sourceFiler, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
|
2022-04-17 04:24:17 +08:00
|
|
|
pong, err := client.Ping(context.Background(), &filer_pb.PingRequest{
|
2022-04-02 08:40:25 +08:00
|
|
|
Target: string(targetFiler),
|
|
|
|
TargetType: cluster.FilerType,
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err == nil {
|
|
|
|
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
|
|
|
|
}
|
2022-04-02 08:40:25 +08:00
|
|
|
return err
|
|
|
|
})
|
2022-04-17 04:24:17 +08:00
|
|
|
if err != nil {
|
2022-04-02 08:40:25 +08:00
|
|
|
fmt.Fprintf(writer, "%v\n", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-02 08:27:49 +08:00
|
|
|
return nil
|
|
|
|
}
|
2022-04-17 04:24:17 +08:00
|
|
|
|
|
|
|
func printTiming(writer io.Writer, startNs, remoteNs, stopNs int64) {
|
|
|
|
roundTripTimeMs := float32(stopNs-startNs) / 1000000
|
|
|
|
deltaTimeMs := float32(remoteNs-(startNs+stopNs)/2) / 1000000
|
|
|
|
fmt.Fprintf(writer, "ok round trip %.3fms clock delta %.3fms\n", roundTripTimeMs, deltaTimeMs)
|
|
|
|
}
|