Ability to exit work loop gracefully

This change adds the ability to call Shutdown on a gearman-go worker which causes the
worker to wait for all currently active jobs to finish and then close the connection.

It does this by keeping tracking of the number of currently active transactions, disallowing
new job creation, and using a WaitGroup to wait for all active jobs to finish.
This commit is contained in:
Kyle Vigen 2014-06-10 11:37:10 -07:00
parent 320155c608
commit abfc23f301
2 changed files with 204 additions and 35 deletions

View File

@ -5,6 +5,7 @@ package worker
import ( import (
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"log"
"sync" "sync"
"time" "time"
) )
@ -18,11 +19,15 @@ const (
// It can connect to multi-server and grab jobs. // It can connect to multi-server and grab jobs.
type Worker struct { type Worker struct {
sync.Mutex sync.Mutex
agents []*agent agents []*agent
funcs jobFuncs funcs jobFuncs
in chan *inPack in chan *inPack
running bool running bool
ready bool ready bool
// The shuttingDown variable is protected by the Worker lock
shuttingDown bool
// Used during shutdown to wait for all active jobs to finish
activeJobs sync.WaitGroup
Id string Id string
ErrorHandler ErrorHandler ErrorHandler ErrorHandler
@ -137,7 +142,9 @@ func (worker *Worker) handleInPack(inpack *inPack) {
case dtNoJob: case dtNoJob:
inpack.a.PreSleep() inpack.a.PreSleep()
case dtNoop: case dtNoop:
inpack.a.Grab() if !worker.isShuttingDown() {
inpack.a.Grab()
}
case dtJobAssign, dtJobAssignUniq: case dtJobAssign, dtJobAssignUniq:
go func() { go func() {
if err := worker.exec(inpack); err != nil { if err := worker.exec(inpack); err != nil {
@ -147,7 +154,9 @@ func (worker *Worker) handleInPack(inpack *inPack) {
if worker.limit != nil { if worker.limit != nil {
worker.limit <- true worker.limit <- true
} }
inpack.a.Grab() if !worker.isShuttingDown() {
inpack.a.Grab()
}
case dtError: case dtError:
worker.err(inpack.Err()) worker.err(inpack.Err())
fallthrough fallthrough
@ -182,11 +191,12 @@ func (worker *Worker) Ready() (err error) {
// Main loop, block here // Main loop, block here
// Most of time, this should be evaluated in goroutine. // Most of time, this should be evaluated in goroutine.
func (worker *Worker) Work() { func (worker *Worker) Work() {
if ! worker.ready { if !worker.ready {
// didn't run Ready beforehand, so we'll have to do it: // didn't run Ready beforehand, so we'll have to do it:
err := worker.Ready() err := worker.Ready()
if err != nil { if err != nil {
panic( err ) log.Println("Error making worker ready: " + err.Error())
panic(err)
} }
} }
@ -224,6 +234,16 @@ func (worker *Worker) Close() {
} }
} }
// Shutdown server gracefully. This function will block until all active work has finished.
func (worker *Worker) Shutdown() {
worker.Lock()
worker.shuttingDown = true
worker.Unlock()
// Wait for all the active jobs to finish
worker.activeJobs.Wait()
worker.Close()
}
// Echo // Echo
func (worker *Worker) Echo(data []byte) { func (worker *Worker) Echo(data []byte) {
outpack := getOutPack() outpack := getOutPack()
@ -250,6 +270,13 @@ func (worker *Worker) SetId(id string) {
worker.broadcast(outpack) worker.broadcast(outpack)
} }
// IsShutdown checks to see if the worker is in the process of being shutdown.
func (worker *Worker) isShuttingDown() bool {
worker.Lock()
defer worker.Unlock()
return worker.shuttingDown
}
// inner job executing // inner job executing
func (worker *Worker) exec(inpack *inPack) (err error) { func (worker *Worker) exec(inpack *inPack) (err error) {
defer func() { defer func() {
@ -263,7 +290,14 @@ func (worker *Worker) exec(inpack *inPack) (err error) {
err = ErrUnknown err = ErrUnknown
} }
} }
worker.activeJobs.Done()
}() }()
worker.activeJobs.Add(1)
// Make sure that we don't accept any new work from old grab requests
// after we starting shutting down.
if worker.isShuttingDown() {
return
}
f, ok := worker.funcs[inpack.fn] f, ok := worker.funcs[inpack.fn]
if !ok { if !ok {
return fmt.Errorf("The function does not exist: %s", inpack.fn) return fmt.Errorf("The function does not exist: %s", inpack.fn)

View File

@ -1,7 +1,9 @@
package worker package worker
import ( import (
"errors"
"sync" "sync"
"sync/atomic"
"testing" "testing"
"time" "time"
) )
@ -78,12 +80,11 @@ func TestWork(t *testing.T) {
wg.Wait() wg.Wait()
} }
func TestWorkerClose(t *testing.T) { func TestWorkerClose(t *testing.T) {
worker.Close() worker.Close()
} }
func TestWorkWithoutReady(t * testing.T){ func TestWorkWithoutReady(t *testing.T) {
other_worker := New(Unlimited) other_worker := New(Unlimited)
if err := other_worker.AddServer(Network, "127.0.0.1:4730"); err != nil { if err := other_worker.AddServer(Network, "127.0.0.1:4730"); err != nil {
@ -94,13 +95,13 @@ func TestWorkWithoutReady(t * testing.T){
} }
timeout := make(chan bool, 1) timeout := make(chan bool, 1)
done := make( chan bool, 1) done := make(chan bool, 1)
other_worker.JobHandler = func( j Job ) error { other_worker.JobHandler = func(j Job) error {
if( ! other_worker.ready ){ if !other_worker.ready {
t.Error("Worker not ready as expected"); t.Error("Worker not ready as expected")
} }
done <-true done <- true
return nil return nil
} }
go func() { go func() {
@ -108,15 +109,15 @@ func TestWorkWithoutReady(t * testing.T){
timeout <- true timeout <- true
}() }()
go func(){ go func() {
other_worker.Work(); other_worker.Work()
}() }()
// With the all-in-one Work() we don't know if the // With the all-in-one Work() we don't know if the
// worker is ready at this stage so we may have to wait a sec: // worker is ready at this stage so we may have to wait a sec:
go func(){ go func() {
tries := 3 tries := 3
for( tries > 0 ){ for tries > 0 {
if other_worker.ready { if other_worker.ready {
other_worker.Echo([]byte("Hello")) other_worker.Echo([]byte("Hello"))
break break
@ -129,22 +130,22 @@ func TestWorkWithoutReady(t * testing.T){
}() }()
// determine if we've finished or timed out: // determine if we've finished or timed out:
select{ select {
case <- timeout: case <-timeout:
t.Error("Test timed out waiting for the worker") t.Error("Test timed out waiting for the worker")
case <- done: case <-done:
} }
} }
func TestWorkWithoutReadyWithPanic(t * testing.T){ func TestWorkWithoutReadyWithPanic(t *testing.T) {
other_worker := New(Unlimited) other_worker := New(Unlimited)
timeout := make(chan bool, 1) timeout := make(chan bool, 1)
done := make( chan bool, 1) done := make(chan bool, 1)
// Going to work with no worker setup. // Going to work with no worker setup.
// when Work (hopefully) calls Ready it will get an error which should cause it to panic() // when Work (hopefully) calls Ready it will get an error which should cause it to panic()
go func(){ go func() {
defer func() { defer func() {
if err := recover(); err != nil { if err := recover(); err != nil {
done <- true done <- true
@ -153,17 +154,151 @@ func TestWorkWithoutReadyWithPanic(t * testing.T){
t.Error("Work should raise a panic.") t.Error("Work should raise a panic.")
done <- true done <- true
}() }()
other_worker.Work(); other_worker.Work()
}() }()
go func() { go func() {
time.Sleep(2 * time.Second) time.Sleep(2 * time.Second)
timeout <- true timeout <- true
}() }()
select{ select {
case <- timeout: case <-timeout:
t.Error("Test timed out waiting for the worker") t.Error("Test timed out waiting for the worker")
case <- done: case <-done:
} }
} }
// initWorker creates a worker and adds the localhost server to it
func initWorker(t *testing.T) *Worker {
otherWorker := New(Unlimited)
if err := otherWorker.AddServer(Network, "127.0.0.1:4730"); err != nil {
t.Error(err)
}
return otherWorker
}
// submitEmptyInPack sends an empty inpack with the specified fn name to the worker. It uses
// the first agent of the worker.
func submitEmptyInPack(t *testing.T, worker *Worker, function string) {
if l := len(worker.agents); l != 1 {
t.Error("The worker has no agents")
}
inpack := getInPack()
inpack.dataType = dtJobAssign
inpack.fn = function
inpack.a = worker.agents[0]
worker.in <- inpack
}
// TestShutdownSuccessJob tests that shutdown handles active jobs that will succeed
func TestShutdownSuccessJob(t *testing.T) {
otherWorker := initWorker(t)
finishedJob := false
var wg sync.WaitGroup
successJob := func(job Job) ([]byte, error) {
wg.Done()
// Sleep for 10ms to ensure that the shutdown waits for this to finish
time.Sleep(time.Duration(10 * time.Millisecond))
finishedJob = true
return nil, nil
}
if err := otherWorker.AddFunc("test", successJob, 0); err != nil {
t.Error(err)
}
if err := otherWorker.Ready(); err != nil {
t.Error(err)
return
}
submitEmptyInPack(t, otherWorker, "test")
go otherWorker.Work()
// Wait for the success_job to start so that we know we didn't shutdown before even
// beginning to process the job.
wg.Add(1)
wg.Wait()
otherWorker.Shutdown()
if !finishedJob {
t.Error("Didn't finish job")
}
}
// TestShutdownFailureJob tests that shutdown handles active jobs that will fail
func TestShutdownFailureJob(t *testing.T) {
otherWorker := initWorker(t)
var wg sync.WaitGroup
finishedJob := false
failureJob := func(job Job) ([]byte, error) {
wg.Done()
// Sleep for 10ms to ensure that shutdown waits for this to finish
time.Sleep(time.Duration(10 * time.Millisecond))
finishedJob = true
return nil, errors.New("Error!")
}
if err := otherWorker.AddFunc("test", failureJob, 0); err != nil {
t.Error(err)
}
if err := otherWorker.Ready(); err != nil {
t.Error(err)
return
}
submitEmptyInPack(t, otherWorker, "test")
go otherWorker.Work()
// Wait for the failure_job to start so that we know we didn't shutdown before even
// beginning to process the job.
wg.Add(1)
wg.Wait()
otherWorker.Shutdown()
if !finishedJob {
t.Error("Didn't finish the failed job")
}
}
func TestSubmitMultipleJobs(t *testing.T) {
otherWorker := initWorker(t)
var startJobs sync.WaitGroup
startJobs.Add(2)
var jobsFinished int32 = 0
job := func(job Job) ([]byte, error) {
startJobs.Done()
// Sleep for 10ms to ensure that the shutdown waits for this to finish
time.Sleep(time.Duration(10 * time.Millisecond))
atomic.AddInt32(&jobsFinished, 1)
return nil, nil
}
if err := otherWorker.AddFunc("test", job, 0); err != nil {
t.Error(err)
}
if err := otherWorker.Ready(); err != nil {
t.Error(err)
return
}
submitEmptyInPack(t, otherWorker, "test")
submitEmptyInPack(t, otherWorker, "test")
go otherWorker.Work()
startJobs.Wait()
otherWorker.Shutdown()
if jobsFinished != 2 {
t.Error("Didn't run both jobs")
}
}
func TestSubmitJobAfterShutdown(t *testing.T) {
otherWorker := initWorker(t)
noRunJob := func(job Job) ([]byte, error) {
t.Error("This job shouldn't have been run")
return nil, nil
}
if err := otherWorker.AddFunc("test", noRunJob, 0); err != nil {
t.Error(err)
}
if err := otherWorker.Ready(); err != nil {
t.Error(err)
return
}
go otherWorker.Work()
otherWorker.Shutdown()
submitEmptyInPack(t, otherWorker, "test")
// Sleep for 10ms to make sure that the job doesn't run
time.Sleep(time.Duration(10 * time.Millisecond))
}