Skip to content
This repository has been archived by the owner on Mar 26, 2020. It is now read-only.

brick-mux : fix spurious socket connect failure issue during volume stop #1469

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions glusterd2/brick/glusterfsd.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type Glusterfsd struct {
// Externally consumable using methods of Glusterfsd interface
binarypath string
args []string
socketfilepath string
Socketfilepath string
pidfilepath string

// For internal use
Expand Down Expand Up @@ -89,8 +89,8 @@ func (b *Glusterfsd) Args() []string {
// SocketFile returns path to the brick socket file used for IPC.
func (b *Glusterfsd) SocketFile() string {

if b.socketfilepath != "" {
return b.socketfilepath
if b.Socketfilepath != "" {
return b.Socketfilepath
}

// First we form a key
Expand All @@ -101,10 +101,10 @@ func (b *Glusterfsd) SocketFile() string {
// Example: /var/run/gluster/<xxhash-hash>.socket
glusterdSockDir := config.GetString("rundir")
hash := strconv.FormatUint(xxhash.Sum64String(key), 16)
b.socketfilepath = path.Join(glusterdSockDir, hash+".socket")
b.Socketfilepath = path.Join(glusterdSockDir, hash+".socket")
// FIXME: The brick can no longer clean this up on clean shut down

return b.socketfilepath
return b.Socketfilepath
}

// PidFile returns path to the pid file of the brick process
Expand Down
29 changes: 18 additions & 11 deletions glusterd2/brickmux/demultiplex.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package brickmux
import (
"fmt"
"os"
"time"

"github.com/gluster/glusterd2/glusterd2/brick"
"github.com/gluster/glusterd2/glusterd2/daemon"
Expand All @@ -27,16 +26,32 @@ func IsLastBrickInProc(b brick.Brickinfo) bool {
// Demultiplex sends a detach request to the brick process which the
// specified brick is multiplexed onto.
func Demultiplex(b brick.Brickinfo) error {

var pidOnFile int
log.WithField("brick", b.String()).Debug("get brick daemon for demultiplex process")
brickDaemon, err := brick.NewGlusterfsd(b)
if err != nil {
return err
}
if pidOnFile, err = daemon.ReadPidFromFile(brickDaemon.PidFile()); err != nil {
log.WithFields(log.Fields{"brick": b.String(),
"pidfile": brickDaemon.PidFile()}).Error("Failed to read the pidfile")
return err

}
if pidOnFile == -1 {
log.WithFields(log.Fields{"brick": b.String(),
"pidfile": brickDaemon.PidFile()}).Error("Pid is -1")
return err

}
brickDaemon.Socketfilepath, err = glusterdGetSockFromBrickPid(pidOnFile)
if err != nil {
log.WithFields(log.Fields{"brick": b.String(),
"pid": pidOnFile}).Error("Failed to get the socket file of the glusterfsd process")
return err
}
log.WithFields(log.Fields{"brick": b.String(),
"socketFile": brickDaemon.SocketFile()}).Debug("starting demultiplex process")

client, err := daemon.GetRPCClient(brickDaemon)
if err != nil {
return err
Expand All @@ -58,16 +73,8 @@ func Demultiplex(b brick.Brickinfo) error {
}
log.WithField("brick", b.String()).Debug("detach request succeeded with result")

// TODO: Find an alternative to substitute the sleep.
// There might be some changes on glusterfsd side related to socket
// files used while brick signout,
// make appropriate changes once glusterfsd side is fixed.
time.Sleep(time.Millisecond * 200)

log.WithField("brick", b.String()).Debug("deleting brick socket and pid file")
os.Remove(brickDaemon.PidFile())
os.Remove(brickDaemon.SocketFile())
log.WithField("brick", b.String()).Debug("deleted brick socket and pid file")

return nil
}
8 changes: 0 additions & 8 deletions glusterd2/brickmux/multiplex.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package brickmux
import (
"fmt"
"net/rpc"
"os"

"github.com/gluster/glusterd2/glusterd2/brick"
"github.com/gluster/glusterd2/glusterd2/daemon"
Expand Down Expand Up @@ -87,13 +86,6 @@ func Multiplex(b brick.Brickinfo, v *volume.Volinfo, volumes []*volume.Volinfo,
return err
}

// create Unix Domain Socket hardlink
os.Remove(brickProc.SocketFile())
if err := os.Link(targetBrickProc.SocketFile(), brickProc.SocketFile()); err != nil {
undoMultiplex(client, &b)
return err
}

// create duplicate pidfile for the multiplexed brick
ok, pid := daemon.IsRunning(targetBrickProc)
if !ok {
Expand Down
27 changes: 25 additions & 2 deletions glusterd2/brickmux/utils.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package brickmux

import (
"fmt"
config "github.com/spf13/viper"
"io/ioutil"
"os"
"path"

config "github.com/spf13/viper"
"strings"
)

// getBrickVolfilePath returns path correspponding to the volfileID. Since, brick volfiles are now stored on
Expand All @@ -20,3 +22,24 @@ func getBrickVolfilePath(volfileID string) (string, error) {

return volfilePath, nil
}

// glusterdGetSockFromBrickPid returns the socket file from the /proc/
// filesystem for the concerned process running with the same pid
func glusterdGetSockFromBrickPid(pid int) (string, error) {
content, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid))
if err != nil {
return "", err
}

parts := strings.Split(string(content), "\x00")
prevPart := ""
socketFile := ""
for _, p := range parts {
if prevPart == "-S" {
socketFile = p
break
}
prevPart = p
}
return socketFile, nil
}