Browse Source

Merge pull request #45 from draxil/worker_disconnect_testing

Allow reconnect from error handler
tags/0.2
Xing 10 years ago
parent
commit
dde0c3e9b3
3 changed files with 184 additions and 39 deletions
  1. +39
    -1
      worker/agent.go
  2. +34
    -1
      worker/worker.go
  3. +111
    -37
      worker/worker_disconnect_test.go

+ 39
- 1
worker/agent.go View File

@@ -4,6 +4,7 @@ import (
"bufio"
"net"
"sync"
"io"
)

// The agent of job server.
@@ -46,6 +47,7 @@ func (a *agent) work() {
a.worker.err(err.(error))
}
}()

var inpack *inPack
var l int
var err error
@@ -56,10 +58,15 @@ func (a *agent) work() {
if opErr.Temporary() {
continue
}else{
a.worker.err(err)
a.disconnect_error(err)
// else - we're probably dc'ing due to a Close()

break
}
} else if( err == io.EOF ){
a.disconnect_error(err)
break
}
a.worker.err(err)
// If it is unexpected error and the connection wasn't
@@ -95,6 +102,16 @@ func (a *agent) work() {
}
}

func (a * agent) disconnect_error( err error ){
if( a.conn != nil ){
err = &WorkerDisconnectError{
err : err,
agent : a,
}
a.worker.err(err)
}
}

func (a *agent) Close() {
a.Lock()
defer a.Unlock()
@@ -107,6 +124,10 @@ func (a *agent) Close() {
func (a *agent) Grab() {
a.Lock()
defer a.Unlock()
a.grab()
}

func (a *agent) grab(){
outpack := getOutPack()
outpack.dataType = dtGrabJobUniq
a.write(outpack)
@@ -120,6 +141,23 @@ func (a *agent) PreSleep() {
a.write(outpack)
}

func (a *agent) reconnect() (error){
a.Lock()
defer a.Unlock()
conn, err := net.Dial(a.net, a.addr)
if err != nil {
return err;
}
a.conn = conn
a.rw = bufio.NewReadWriter(bufio.NewReader(a.conn),
bufio.NewWriter(a.conn))
a.grab()
a.worker.reRegisterFuncsForAgent(a)

go a.work()
return nil
}

// read length bytes from the socket
func (a *agent) read(length int) (data []byte, err error) {
n := 0


+ 34
- 1
worker/worker.go View File

@@ -30,6 +30,7 @@ type Worker struct {
limit chan bool
}


// Return a worker.
//
// If limit is set to Unlimited(=0), the worker will grab all jobs
@@ -94,6 +95,11 @@ func (worker *Worker) AddFunc(funcname string,

// inner add
func (worker *Worker) addFunc(funcname string, timeout uint32) {
outpack := prepFuncOutpack( funcname, timeout)
worker.broadcast(outpack)
}

func prepFuncOutpack(funcname string, timeout uint32) (*outPack){
outpack := getOutPack()
if timeout == 0 {
outpack.dataType = dtCanDo
@@ -106,7 +112,7 @@ func (worker *Worker) addFunc(funcname string, timeout uint32) {
outpack.data[l] = '\x00'
binary.BigEndian.PutUint32(outpack.data[l+1:], timeout)
}
worker.broadcast(outpack)
return outpack
}

// Remove a function.
@@ -293,6 +299,15 @@ func (worker *Worker) exec(inpack *inPack) (err error) {
}
return
}
func (worker *Worker)reRegisterFuncsForAgent( a * agent ){
worker.Lock()
defer worker.Unlock()
for funcname, f := range worker.funcs {
outpack := prepFuncOutpack( funcname, f.timeout)
a.write(outpack)
}

}

// inner result
type result struct {
@@ -316,3 +331,21 @@ func execTimeout(f JobFunc, job Job, timeout time.Duration) (r *result) {
}
return r
}

// Error type passed when a worker connection disconnects
type WorkerDisconnectError struct{
err error
agent * agent
}
func (e *WorkerDisconnectError) Error() ( string){
return e.err.Error();
}

// Responds to the error by asking the worker to reconnect
func (e *WorkerDisconnectError) Reconnect() ( err error ){
return e.agent.reconnect()
}
// Which server was this for?
func(e *WorkerDisconnectError) Server() ( net string, addr string ){
return e.agent.net, e.agent.addr
}

+ 111
- 37
worker/worker_disconnect_test.go View File

@@ -20,8 +20,8 @@ func init() {
if check_gearman_present() {
panic(`Something already listening on our testing port. Chickening out of testing with it!`)
}
gearman_ready = make( chan bool )
kill_gearman = make( chan bool )
gearman_ready = make(chan bool)
kill_gearman = make(chan bool)
// TODO: verify port is clear
go run_gearman()
}
@@ -36,7 +36,6 @@ func run_gearman() {

// Make sure we clear up our gearman:
defer func() {
log.Println("killing gearmand")
gm_cmd.Process.Kill()
}()

@@ -58,10 +57,8 @@ func run_gearman() {
func check_gearman_present() bool {
con, err := net.Dial(`tcp`, `127.0.0.1:`+port)
if err != nil {
log.Println("gearman not ready " + err.Error())
return false
}
log.Println("gearman ready")
con.Close()
return true
}
@@ -81,38 +78,32 @@ func check_gearman_is_dead() bool {
Checks for a disconnect whilst not working
*/
func TestBasicDisconnect(t *testing.T) {
<- gearman_ready
<-gearman_ready
worker := New(Unlimited)
timeout := make(chan bool, 1)
done := make( chan bool, 1)
done := make(chan bool, 1)

if err := worker.AddServer(Network, "127.0.0.1:" + port); err != nil {
if err := worker.AddServer(Network, "127.0.0.1:"+port); err != nil {
t.Error(err)
}
work_done := false;
if err := worker.AddFunc("gearman-go-workertest",
func(j Job)(b []byte, e error){
work_done = true;
done <- true
return}, 0);
err != nil {
work_done := false
if err := worker.AddFunc("gearman-go-workertest",
func(j Job) (b []byte, e error) {
work_done = true
done <- true
return
}, 0); err != nil {
t.Error(err)
}

worker.JobHandler = func( j Job ) error {
if( ! worker.ready ){
t.Error("Worker not ready as expected");
}
done <-true
return nil
}
handled_errors := false
c_error := make( chan bool)
worker.ErrorHandler = func( e error ){
log.Println( e )
handled_errors := false

c_error := make(chan bool)
was_dc_err := false
worker.ErrorHandler = func(e error) {
log.Println(e)
_, was_dc_err = e.(*WorkerDisconnectError)
handled_errors = true
c_error <- true
}
@@ -149,22 +140,105 @@ func TestBasicDisconnect(t *testing.T) {
t.Error("Test timed out waiting for the error handler")
case <-c_error:
// error was handled!
if ! was_dc_err {
t.Error( "Disconnect didn't manifest as a net.OpError?")
}
}
worker.Close()
kill_gearman <- true

}

func TestDcRc(t *testing.T) {
check_gearman_is_dead()
go run_gearman()

<-gearman_ready

worker := New(Unlimited)
timeout := make(chan bool, 1)
done := make(chan bool, 1)

if err := worker.AddServer(Network, "127.0.0.1:"+port); err != nil {
t.Error(err)
}
work_done := false
if err := worker.AddFunc("gearman-go-workertest",
func(j Job) (b []byte, e error) {
log.Println("Actual work happens!")
work_done = true
done <- true
return
}, 0); err != nil {
t.Error(err)
}

worker.ErrorHandler = func(e error) {
wdc, wdcok := e.(*WorkerDisconnectError)
if( wdcok){
log.Println("Reconnecting!")
reconnected := false
for tries := 20 ; ! reconnected && tries > 0; tries -- {
rcerr := wdc.Reconnect()
if rcerr != nil{
time.Sleep(250 * time.Millisecond)
} else{
reconnected = true;
}
}
} else{
panic("Some other kind of error " + e.Error())
}
}

go func() {
time.Sleep(5 * time.Second)
timeout <- true
}()

err := worker.Ready()

if err != nil {
t.Error(err)
}

go worker.Work()

kill_gearman <- true

check_gearman_is_dead()
go run_gearman()

select {
case <-gearman_ready:
case <-timeout:
}

send_client_request()
select {
case <- done:
case <-timeout:
t.Error("Test timed out")
}
worker.Close()
kill_gearman <- true

}

func send_client_request(){
log.Println("sending client request");
c, err := client.New( Network, "127.0.0.1:" + port )
func send_client_request() {
c, err := client.New(Network, "127.0.0.1:"+port)
if err == nil {
_, err = c.DoBg("gearman-go-workertest", []byte{}, client.JobHigh)
if err != nil {
log.Println( "error sending client request " + err.Error() )
log.Println("error sending client request " + err.Error())
}
}else{
log.Println( "error with client " + err.Error() )
} else {
log.Println("error with client " + err.Error())
}
}
}

Loading…
Cancel
Save