Merge pull request #1 from Clever/graceful-shutdown

Ability to exit work loop gracefully
This commit is contained in:
kvigen 2014-06-17 11:24:19 -07:00
commit 097037d212
2 changed files with 204 additions and 35 deletions

View File

@ -5,6 +5,7 @@ package worker
import (
"encoding/binary"
"fmt"
"log"
"sync"
"time"
)
@ -18,11 +19,15 @@ const (
// It can connect to multi-server and grab jobs.
type Worker struct {
sync.Mutex
agents []*agent
funcs jobFuncs
in chan *inPack
running bool
ready bool
agents []*agent
funcs jobFuncs
in chan *inPack
running bool
ready bool
// The shuttingDown variable is protected by the Worker lock
shuttingDown bool
// Used during shutdown to wait for all active jobs to finish
activeJobs sync.WaitGroup
Id string
ErrorHandler ErrorHandler
@ -137,7 +142,9 @@ func (worker *Worker) handleInPack(inpack *inPack) {
case dtNoJob:
inpack.a.PreSleep()
case dtNoop:
inpack.a.Grab()
if !worker.isShuttingDown() {
inpack.a.Grab()
}
case dtJobAssign, dtJobAssignUniq:
go func() {
if err := worker.exec(inpack); err != nil {
@ -147,7 +154,9 @@ func (worker *Worker) handleInPack(inpack *inPack) {
if worker.limit != nil {
worker.limit <- true
}
inpack.a.Grab()
if !worker.isShuttingDown() {
inpack.a.Grab()
}
case dtError:
worker.err(inpack.Err())
fallthrough
@ -182,11 +191,12 @@ func (worker *Worker) Ready() (err error) {
// Main loop, block here
// Most of time, this should be evaluated in goroutine.
func (worker *Worker) Work() {
if ! worker.ready {
if !worker.ready {
// didn't run Ready beforehand, so we'll have to do it:
err := worker.Ready()
if err != nil {
panic( err )
log.Println("Error making worker ready: " + err.Error())
panic(err)
}
}
@ -224,6 +234,16 @@ func (worker *Worker) Close() {
}
}
// Shutdown server gracefully. This function will block until all active work has finished.
func (worker *Worker) Shutdown() {
worker.Lock()
worker.shuttingDown = true
worker.Unlock()
// Wait for all the active jobs to finish
worker.activeJobs.Wait()
worker.Close()
}
// Echo
func (worker *Worker) Echo(data []byte) {
outpack := getOutPack()
@ -250,6 +270,13 @@ func (worker *Worker) SetId(id string) {
worker.broadcast(outpack)
}
// IsShutdown checks to see if the worker is in the process of being shutdown.
func (worker *Worker) isShuttingDown() bool {
worker.Lock()
defer worker.Unlock()
return worker.shuttingDown
}
// inner job executing
func (worker *Worker) exec(inpack *inPack) (err error) {
defer func() {
@ -263,7 +290,14 @@ func (worker *Worker) exec(inpack *inPack) (err error) {
err = ErrUnknown
}
}
worker.activeJobs.Done()
}()
worker.activeJobs.Add(1)
// Make sure that we don't accept any new work from old grab requests
// after we starting shutting down.
if worker.isShuttingDown() {
return
}
f, ok := worker.funcs[inpack.fn]
if !ok {
return fmt.Errorf("The function does not exist: %s", inpack.fn)

View File

@ -1,7 +1,9 @@
package worker
import (
"errors"
"sync"
"sync/atomic"
"testing"
"time"
)
@ -78,12 +80,11 @@ func TestWork(t *testing.T) {
wg.Wait()
}
func TestWorkerClose(t *testing.T) {
worker.Close()
}
func TestWorkWithoutReady(t * testing.T){
func TestWorkWithoutReady(t *testing.T) {
other_worker := New(Unlimited)
if err := other_worker.AddServer(Network, "127.0.0.1:4730"); err != nil {
@ -92,15 +93,15 @@ func TestWorkWithoutReady(t * testing.T){
if err := other_worker.AddFunc("gearman-go-workertest", foobar, 0); err != nil {
t.Error(err)
}
timeout := make(chan bool, 1)
done := make( chan bool, 1)
other_worker.JobHandler = func( j Job ) error {
if( ! other_worker.ready ){
t.Error("Worker not ready as expected");
timeout := make(chan bool, 1)
done := make(chan bool, 1)
other_worker.JobHandler = func(j Job) error {
if !other_worker.ready {
t.Error("Worker not ready as expected")
}
done <-true
done <- true
return nil
}
go func() {
@ -108,15 +109,15 @@ func TestWorkWithoutReady(t * testing.T){
timeout <- true
}()
go func(){
other_worker.Work();
go func() {
other_worker.Work()
}()
// With the all-in-one Work() we don't know if the
// With the all-in-one Work() we don't know if the
// worker is ready at this stage so we may have to wait a sec:
go func(){
go func() {
tries := 3
for( tries > 0 ){
for tries > 0 {
if other_worker.ready {
other_worker.Echo([]byte("Hello"))
break
@ -127,24 +128,24 @@ func TestWorkWithoutReady(t * testing.T){
tries--
}
}()
// determine if we've finished or timed out:
select{
case <- timeout:
select {
case <-timeout:
t.Error("Test timed out waiting for the worker")
case <- done:
case <-done:
}
}
func TestWorkWithoutReadyWithPanic(t * testing.T){
func TestWorkWithoutReadyWithPanic(t *testing.T) {
other_worker := New(Unlimited)
timeout := make(chan bool, 1)
done := make( chan bool, 1)
done := make(chan bool, 1)
// Going to work with no worker setup.
// when Work (hopefully) calls Ready it will get an error which should cause it to panic()
go func(){
go func() {
defer func() {
if err := recover(); err != nil {
done <- true
@ -153,17 +154,151 @@ func TestWorkWithoutReadyWithPanic(t * testing.T){
t.Error("Work should raise a panic.")
done <- true
}()
other_worker.Work();
other_worker.Work()
}()
go func() {
time.Sleep(2 * time.Second)
timeout <- true
}()
select{
case <- timeout:
select {
case <-timeout:
t.Error("Test timed out waiting for the worker")
case <- done:
case <-done:
}
}
// initWorker creates a worker and adds the localhost server to it
func initWorker(t *testing.T) *Worker {
otherWorker := New(Unlimited)
if err := otherWorker.AddServer(Network, "127.0.0.1:4730"); err != nil {
t.Error(err)
}
return otherWorker
}
// submitEmptyInPack sends an empty inpack with the specified fn name to the worker. It uses
// the first agent of the worker.
func submitEmptyInPack(t *testing.T, worker *Worker, function string) {
if l := len(worker.agents); l != 1 {
t.Error("The worker has no agents")
}
inpack := getInPack()
inpack.dataType = dtJobAssign
inpack.fn = function
inpack.a = worker.agents[0]
worker.in <- inpack
}
// TestShutdownSuccessJob tests that shutdown handles active jobs that will succeed
func TestShutdownSuccessJob(t *testing.T) {
otherWorker := initWorker(t)
finishedJob := false
var wg sync.WaitGroup
successJob := func(job Job) ([]byte, error) {
wg.Done()
// Sleep for 10ms to ensure that the shutdown waits for this to finish
time.Sleep(time.Duration(10 * time.Millisecond))
finishedJob = true
return nil, nil
}
if err := otherWorker.AddFunc("test", successJob, 0); err != nil {
t.Error(err)
}
if err := otherWorker.Ready(); err != nil {
t.Error(err)
return
}
submitEmptyInPack(t, otherWorker, "test")
go otherWorker.Work()
// Wait for the success_job to start so that we know we didn't shutdown before even
// beginning to process the job.
wg.Add(1)
wg.Wait()
otherWorker.Shutdown()
if !finishedJob {
t.Error("Didn't finish job")
}
}
// TestShutdownFailureJob tests that shutdown handles active jobs that will fail
func TestShutdownFailureJob(t *testing.T) {
otherWorker := initWorker(t)
var wg sync.WaitGroup
finishedJob := false
failureJob := func(job Job) ([]byte, error) {
wg.Done()
// Sleep for 10ms to ensure that shutdown waits for this to finish
time.Sleep(time.Duration(10 * time.Millisecond))
finishedJob = true
return nil, errors.New("Error!")
}
if err := otherWorker.AddFunc("test", failureJob, 0); err != nil {
t.Error(err)
}
if err := otherWorker.Ready(); err != nil {
t.Error(err)
return
}
submitEmptyInPack(t, otherWorker, "test")
go otherWorker.Work()
// Wait for the failure_job to start so that we know we didn't shutdown before even
// beginning to process the job.
wg.Add(1)
wg.Wait()
otherWorker.Shutdown()
if !finishedJob {
t.Error("Didn't finish the failed job")
}
}
func TestSubmitMultipleJobs(t *testing.T) {
otherWorker := initWorker(t)
var startJobs sync.WaitGroup
startJobs.Add(2)
var jobsFinished int32 = 0
job := func(job Job) ([]byte, error) {
startJobs.Done()
// Sleep for 10ms to ensure that the shutdown waits for this to finish
time.Sleep(time.Duration(10 * time.Millisecond))
atomic.AddInt32(&jobsFinished, 1)
return nil, nil
}
if err := otherWorker.AddFunc("test", job, 0); err != nil {
t.Error(err)
}
if err := otherWorker.Ready(); err != nil {
t.Error(err)
return
}
submitEmptyInPack(t, otherWorker, "test")
submitEmptyInPack(t, otherWorker, "test")
go otherWorker.Work()
startJobs.Wait()
otherWorker.Shutdown()
if jobsFinished != 2 {
t.Error("Didn't run both jobs")
}
}
func TestSubmitJobAfterShutdown(t *testing.T) {
otherWorker := initWorker(t)
noRunJob := func(job Job) ([]byte, error) {
t.Error("This job shouldn't have been run")
return nil, nil
}
if err := otherWorker.AddFunc("test", noRunJob, 0); err != nil {
t.Error(err)
}
if err := otherWorker.Ready(); err != nil {
t.Error(err)
return
}
go otherWorker.Work()
otherWorker.Shutdown()
submitEmptyInPack(t, otherWorker, "test")
// Sleep for 10ms to make sure that the job doesn't run
time.Sleep(time.Duration(10 * time.Millisecond))
}