Wrap disconnect errors and allow reconnect

This commit is contained in:
Joe Higton 2014-06-10 03:23:18 +01:00
parent 99bcf11768
commit 1ebb3d5fcc
3 changed files with 177 additions and 38 deletions

View File

@ -46,6 +46,7 @@ func (a *agent) work() {
a.worker.err(err.(error))
}
}()
var inpack *inPack
var l int
var err error
@ -56,7 +57,17 @@ func (a *agent) work() {
if opErr.Temporary() {
continue
}else{
a.worker.err(err)
a.Lock()
if( a.conn != nil ){
a.Unlock()
err = &WorkerDisconnectError{
OpError : opErr,
agent : a,
}
a.worker.err(err)
}
// else - we're probably dc'ing due to a Close()
break
}
@ -107,6 +118,10 @@ func (a *agent) Close() {
func (a *agent) Grab() {
a.Lock()
defer a.Unlock()
a.grab()
}
func (a *agent) grab(){
outpack := getOutPack()
outpack.dataType = dtGrabJobUniq
a.write(outpack)
@ -120,6 +135,23 @@ func (a *agent) PreSleep() {
a.write(outpack)
}
func (a *agent) reconnect() (error){
a.Lock()
defer a.Unlock()
conn, err := net.Dial(a.net, a.addr)
if err != nil {
return err;
}
a.conn = conn
a.rw = bufio.NewReadWriter(bufio.NewReader(a.conn),
bufio.NewWriter(a.conn))
a.grab()
a.worker.reRegisterFuncsForAgent(a)
go a.work()
return nil
}
// read length bytes from the socket
func (a *agent) read(length int) (data []byte, err error) {
n := 0

View File

@ -7,6 +7,7 @@ import (
"fmt"
"sync"
"time"
"net"
)
const (
@ -30,6 +31,7 @@ type Worker struct {
limit chan bool
}
// Return a worker.
//
// If limit is set to Unlimited(=0), the worker will grab all jobs
@ -94,6 +96,11 @@ func (worker *Worker) AddFunc(funcname string,
// inner add
func (worker *Worker) addFunc(funcname string, timeout uint32) {
outpack := prepFuncOutpack( funcname, timeout)
worker.broadcast(outpack)
}
func prepFuncOutpack(funcname string, timeout uint32) (*outPack){
outpack := getOutPack()
if timeout == 0 {
outpack.dataType = dtCanDo
@ -106,7 +113,7 @@ func (worker *Worker) addFunc(funcname string, timeout uint32) {
outpack.data[l] = '\x00'
binary.BigEndian.PutUint32(outpack.data[l+1:], timeout)
}
worker.broadcast(outpack)
return outpack
}
// Remove a function.
@ -293,6 +300,15 @@ func (worker *Worker) exec(inpack *inPack) (err error) {
}
return
}
func (worker *Worker)reRegisterFuncsForAgent( a * agent ){
worker.Lock()
defer worker.Unlock()
for funcname, f := range worker.funcs {
outpack := prepFuncOutpack( funcname, f.timeout)
a.write(outpack)
}
}
// inner result
type result struct {
@ -316,3 +332,17 @@ func execTimeout(f JobFunc, job Job, timeout time.Duration) (r *result) {
}
return r
}
// Error type passed when a worker connection disconnects
type WorkerDisconnectError struct{
*net.OpError
agent * agent
}
// Responds to the error by asking the worker to reconnect
func (e *WorkerDisconnectError) Reconnect() ( err error ){
return e.agent.reconnect()
}
// Which server was this for?
func(e *WorkerDisconnectError) Server() ( net string, addr string ){
return e.agent.net, e.agent.addr
}

View File

@ -20,8 +20,8 @@ func init() {
if check_gearman_present() {
panic(`Something already listening on our testing port. Chickening out of testing with it!`)
}
gearman_ready = make( chan bool )
kill_gearman = make( chan bool )
gearman_ready = make(chan bool)
kill_gearman = make(chan bool)
// TODO: verify port is clear
go run_gearman()
}
@ -61,7 +61,6 @@ func check_gearman_present() bool {
log.Println("gearman not ready " + err.Error())
return false
}
log.Println("gearman ready")
con.Close()
return true
}
@ -81,38 +80,32 @@ func check_gearman_is_dead() bool {
Checks for a disconnect whilst not working
*/
func TestBasicDisconnect(t *testing.T) {
<- gearman_ready
<-gearman_ready
worker := New(Unlimited)
timeout := make(chan bool, 1)
done := make( chan bool, 1)
done := make(chan bool, 1)
if err := worker.AddServer(Network, "127.0.0.1:" + port); err != nil {
if err := worker.AddServer(Network, "127.0.0.1:"+port); err != nil {
t.Error(err)
}
work_done := false;
if err := worker.AddFunc("gearman-go-workertest",
func(j Job)(b []byte, e error){
work_done = true;
done <- true
return}, 0);
err != nil {
work_done := false
if err := worker.AddFunc("gearman-go-workertest",
func(j Job) (b []byte, e error) {
work_done = true
done <- true
return
}, 0); err != nil {
t.Error(err)
}
worker.JobHandler = func( j Job ) error {
if( ! worker.ready ){
t.Error("Worker not ready as expected");
}
done <-true
return nil
}
handled_errors := false
c_error := make( chan bool)
worker.ErrorHandler = func( e error ){
log.Println( e )
handled_errors := false
c_error := make(chan bool)
was_dc_err := false
worker.ErrorHandler = func(e error) {
log.Println(e)
_, was_dc_err = e.(*WorkerDisconnectError)
handled_errors = true
c_error <- true
}
@ -149,22 +142,106 @@ func TestBasicDisconnect(t *testing.T) {
t.Error("Test timed out waiting for the error handler")
case <-c_error:
// error was handled!
if ! was_dc_err {
t.Error( "Disconnect didn't manifest as a net.OpError?")
}
}
worker.Close()
kill_gearman <- true
}
func send_client_request(){
log.Println("sending client request");
c, err := client.New( Network, "127.0.0.1:" + port )
func TestDcRc(t *testing.T) {
check_gearman_is_dead()
go run_gearman()
<-gearman_ready
worker := New(Unlimited)
timeout := make(chan bool, 1)
done := make(chan bool, 1)
if err := worker.AddServer(Network, "127.0.0.1:"+port); err != nil {
t.Error(err)
}
work_done := false
if err := worker.AddFunc("gearman-go-workertest",
func(j Job) (b []byte, e error) {
log.Println("Actual work happens!")
work_done = true
done <- true
return
}, 0); err != nil {
t.Error(err)
}
worker.ErrorHandler = func(e error) {
wdc, wdcok := e.(*WorkerDisconnectError)
if( wdcok){
log.Println("Reconnecting!")
reconnected := false
for tries := 20 ; ! reconnected && tries > 0; tries -- {
rcerr := wdc.Reconnect()
if rcerr != nil{
time.Sleep(250 * time.Millisecond)
} else{
reconnected = true;
}
}
} else{
panic("Some other kind of error " + e.Error())
}
}
go func() {
time.Sleep(5 * time.Second)
timeout <- true
}()
err := worker.Ready()
if err != nil {
t.Error(err)
}
go worker.Work()
kill_gearman <- true
check_gearman_is_dead()
go run_gearman()
select {
case <-gearman_ready:
case <-timeout:
}
send_client_request()
select {
case <- done:
case <-timeout:
t.Error("Test timed out")
}
worker.Close()
kill_gearman <- true
}
func send_client_request() {
log.Println("sending client request")
c, err := client.New(Network, "127.0.0.1:"+port)
if err == nil {
_, err = c.DoBg("gearman-go-workertest", []byte{}, client.JobHigh)
if err != nil {
log.Println( "error sending client request " + err.Error() )
log.Println("error sending client request " + err.Error())
}
}else{
log.Println( "error with client " + err.Error() )
} else {
log.Println("error with client " + err.Error())
}
}
}