Wrap disconnect errors and allow reconnect

This commit is contained in:
Joe Higton 2014-06-10 03:23:18 +01:00
parent 99bcf11768
commit 1ebb3d5fcc
3 changed files with 177 additions and 38 deletions

View File

@ -46,6 +46,7 @@ func (a *agent) work() {
a.worker.err(err.(error)) a.worker.err(err.(error))
} }
}() }()
var inpack *inPack var inpack *inPack
var l int var l int
var err error var err error
@ -56,7 +57,17 @@ func (a *agent) work() {
if opErr.Temporary() { if opErr.Temporary() {
continue continue
}else{ }else{
a.worker.err(err) a.Lock()
if( a.conn != nil ){
a.Unlock()
err = &WorkerDisconnectError{
OpError : opErr,
agent : a,
}
a.worker.err(err)
}
// else - we're probably dc'ing due to a Close()
break break
} }
@ -107,6 +118,10 @@ func (a *agent) Close() {
func (a *agent) Grab() { func (a *agent) Grab() {
a.Lock() a.Lock()
defer a.Unlock() defer a.Unlock()
a.grab()
}
func (a *agent) grab(){
outpack := getOutPack() outpack := getOutPack()
outpack.dataType = dtGrabJobUniq outpack.dataType = dtGrabJobUniq
a.write(outpack) a.write(outpack)
@ -120,6 +135,23 @@ func (a *agent) PreSleep() {
a.write(outpack) a.write(outpack)
} }
func (a *agent) reconnect() (error){
a.Lock()
defer a.Unlock()
conn, err := net.Dial(a.net, a.addr)
if err != nil {
return err;
}
a.conn = conn
a.rw = bufio.NewReadWriter(bufio.NewReader(a.conn),
bufio.NewWriter(a.conn))
a.grab()
a.worker.reRegisterFuncsForAgent(a)
go a.work()
return nil
}
// read length bytes from the socket // read length bytes from the socket
func (a *agent) read(length int) (data []byte, err error) { func (a *agent) read(length int) (data []byte, err error) {
n := 0 n := 0

View File

@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"sync" "sync"
"time" "time"
"net"
) )
const ( const (
@ -30,6 +31,7 @@ type Worker struct {
limit chan bool limit chan bool
} }
// Return a worker. // Return a worker.
// //
// If limit is set to Unlimited(=0), the worker will grab all jobs // If limit is set to Unlimited(=0), the worker will grab all jobs
@ -94,6 +96,11 @@ func (worker *Worker) AddFunc(funcname string,
// inner add // inner add
func (worker *Worker) addFunc(funcname string, timeout uint32) { func (worker *Worker) addFunc(funcname string, timeout uint32) {
outpack := prepFuncOutpack( funcname, timeout)
worker.broadcast(outpack)
}
func prepFuncOutpack(funcname string, timeout uint32) (*outPack){
outpack := getOutPack() outpack := getOutPack()
if timeout == 0 { if timeout == 0 {
outpack.dataType = dtCanDo outpack.dataType = dtCanDo
@ -106,7 +113,7 @@ func (worker *Worker) addFunc(funcname string, timeout uint32) {
outpack.data[l] = '\x00' outpack.data[l] = '\x00'
binary.BigEndian.PutUint32(outpack.data[l+1:], timeout) binary.BigEndian.PutUint32(outpack.data[l+1:], timeout)
} }
worker.broadcast(outpack) return outpack
} }
// Remove a function. // Remove a function.
@ -293,6 +300,15 @@ func (worker *Worker) exec(inpack *inPack) (err error) {
} }
return return
} }
func (worker *Worker)reRegisterFuncsForAgent( a * agent ){
worker.Lock()
defer worker.Unlock()
for funcname, f := range worker.funcs {
outpack := prepFuncOutpack( funcname, f.timeout)
a.write(outpack)
}
}
// inner result // inner result
type result struct { type result struct {
@ -316,3 +332,17 @@ func execTimeout(f JobFunc, job Job, timeout time.Duration) (r *result) {
} }
return r return r
} }
// Error type passed when a worker connection disconnects
type WorkerDisconnectError struct{
*net.OpError
agent * agent
}
// Responds to the error by asking the worker to reconnect
func (e *WorkerDisconnectError) Reconnect() ( err error ){
return e.agent.reconnect()
}
// Which server was this for?
func(e *WorkerDisconnectError) Server() ( net string, addr string ){
return e.agent.net, e.agent.addr
}

View File

@ -20,8 +20,8 @@ func init() {
if check_gearman_present() { if check_gearman_present() {
panic(`Something already listening on our testing port. Chickening out of testing with it!`) panic(`Something already listening on our testing port. Chickening out of testing with it!`)
} }
gearman_ready = make( chan bool ) gearman_ready = make(chan bool)
kill_gearman = make( chan bool ) kill_gearman = make(chan bool)
// TODO: verify port is clear // TODO: verify port is clear
go run_gearman() go run_gearman()
} }
@ -61,7 +61,6 @@ func check_gearman_present() bool {
log.Println("gearman not ready " + err.Error()) log.Println("gearman not ready " + err.Error())
return false return false
} }
log.Println("gearman ready")
con.Close() con.Close()
return true return true
} }
@ -81,38 +80,32 @@ func check_gearman_is_dead() bool {
Checks for a disconnect whilst not working Checks for a disconnect whilst not working
*/ */
func TestBasicDisconnect(t *testing.T) { func TestBasicDisconnect(t *testing.T) {
<- gearman_ready <-gearman_ready
worker := New(Unlimited) worker := New(Unlimited)
timeout := make(chan bool, 1) timeout := make(chan bool, 1)
done := make( chan bool, 1) done := make(chan bool, 1)
if err := worker.AddServer(Network, "127.0.0.1:" + port); err != nil { if err := worker.AddServer(Network, "127.0.0.1:"+port); err != nil {
t.Error(err) t.Error(err)
} }
work_done := false; work_done := false
if err := worker.AddFunc("gearman-go-workertest", if err := worker.AddFunc("gearman-go-workertest",
func(j Job)(b []byte, e error){ func(j Job) (b []byte, e error) {
work_done = true; work_done = true
done <- true done <- true
return}, 0); return
err != nil { }, 0); err != nil {
t.Error(err) t.Error(err)
} }
worker.JobHandler = func( j Job ) error {
if( ! worker.ready ){
t.Error("Worker not ready as expected");
}
done <-true
return nil
}
handled_errors := false handled_errors := false
c_error := make( chan bool) c_error := make(chan bool)
worker.ErrorHandler = func( e error ){ was_dc_err := false
log.Println( e ) worker.ErrorHandler = func(e error) {
log.Println(e)
_, was_dc_err = e.(*WorkerDisconnectError)
handled_errors = true handled_errors = true
c_error <- true c_error <- true
} }
@ -149,22 +142,106 @@ func TestBasicDisconnect(t *testing.T) {
t.Error("Test timed out waiting for the error handler") t.Error("Test timed out waiting for the error handler")
case <-c_error: case <-c_error:
// error was handled! // error was handled!
if ! was_dc_err {
t.Error( "Disconnect didn't manifest as a net.OpError?")
}
} }
worker.Close()
kill_gearman <- true kill_gearman <- true
} }
func send_client_request(){ func TestDcRc(t *testing.T) {
log.Println("sending client request"); check_gearman_is_dead()
c, err := client.New( Network, "127.0.0.1:" + port ) go run_gearman()
<-gearman_ready
worker := New(Unlimited)
timeout := make(chan bool, 1)
done := make(chan bool, 1)
if err := worker.AddServer(Network, "127.0.0.1:"+port); err != nil {
t.Error(err)
}
work_done := false
if err := worker.AddFunc("gearman-go-workertest",
func(j Job) (b []byte, e error) {
log.Println("Actual work happens!")
work_done = true
done <- true
return
}, 0); err != nil {
t.Error(err)
}
worker.ErrorHandler = func(e error) {
wdc, wdcok := e.(*WorkerDisconnectError)
if( wdcok){
log.Println("Reconnecting!")
reconnected := false
for tries := 20 ; ! reconnected && tries > 0; tries -- {
rcerr := wdc.Reconnect()
if rcerr != nil{
time.Sleep(250 * time.Millisecond)
} else{
reconnected = true;
}
}
} else{
panic("Some other kind of error " + e.Error())
}
}
go func() {
time.Sleep(5 * time.Second)
timeout <- true
}()
err := worker.Ready()
if err != nil {
t.Error(err)
}
go worker.Work()
kill_gearman <- true
check_gearman_is_dead()
go run_gearman()
select {
case <-gearman_ready:
case <-timeout:
}
send_client_request()
select {
case <- done:
case <-timeout:
t.Error("Test timed out")
}
worker.Close()
kill_gearman <- true
}
func send_client_request() {
log.Println("sending client request")
c, err := client.New(Network, "127.0.0.1:"+port)
if err == nil { if err == nil {
_, err = c.DoBg("gearman-go-workertest", []byte{}, client.JobHigh) _, err = c.DoBg("gearman-go-workertest", []byte{}, client.JobHigh)
if err != nil { if err != nil {
log.Println( "error sending client request " + err.Error() ) log.Println("error sending client request " + err.Error())
} }
}else{ } else {
log.Println( "error with client " + err.Error() ) log.Println("error with client " + err.Error())
} }
} }