diff --git a/worker/worker.go b/worker/worker.go index a81f4ab..9926aec 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -5,6 +5,7 @@ package worker import ( "encoding/binary" "fmt" + "log" "sync" "time" ) @@ -18,11 +19,15 @@ const ( // It can connect to multi-server and grab jobs. type Worker struct { sync.Mutex - agents []*agent - funcs jobFuncs - in chan *inPack - running bool - ready bool + agents []*agent + funcs jobFuncs + in chan *inPack + running bool + ready bool + // The shuttingDown variable is protected by the Worker lock + shuttingDown bool + // Used during shutdown to wait for all active jobs to finish + activeJobs sync.WaitGroup Id string ErrorHandler ErrorHandler @@ -137,7 +142,9 @@ func (worker *Worker) handleInPack(inpack *inPack) { case dtNoJob: inpack.a.PreSleep() case dtNoop: - inpack.a.Grab() + if !worker.isShuttingDown() { + inpack.a.Grab() + } case dtJobAssign, dtJobAssignUniq: go func() { if err := worker.exec(inpack); err != nil { @@ -147,7 +154,9 @@ func (worker *Worker) handleInPack(inpack *inPack) { if worker.limit != nil { worker.limit <- true } - inpack.a.Grab() + if !worker.isShuttingDown() { + inpack.a.Grab() + } case dtError: worker.err(inpack.Err()) fallthrough @@ -182,11 +191,12 @@ func (worker *Worker) Ready() (err error) { // Main loop, block here // Most of time, this should be evaluated in goroutine. func (worker *Worker) Work() { - if ! worker.ready { + if !worker.ready { // didn't run Ready beforehand, so we'll have to do it: err := worker.Ready() if err != nil { - panic( err ) + log.Println("Error making worker ready: " + err.Error()) + panic(err) } } @@ -224,6 +234,16 @@ func (worker *Worker) Close() { } } +// Shutdown server gracefully. This function will block until all active work has finished. +func (worker *Worker) Shutdown() { + worker.Lock() + worker.shuttingDown = true + worker.Unlock() + // Wait for all the active jobs to finish + worker.activeJobs.Wait() + worker.Close() +} + // Echo func (worker *Worker) Echo(data []byte) { outpack := getOutPack() @@ -250,6 +270,13 @@ func (worker *Worker) SetId(id string) { worker.broadcast(outpack) } +// IsShutdown checks to see if the worker is in the process of being shutdown. +func (worker *Worker) isShuttingDown() bool { + worker.Lock() + defer worker.Unlock() + return worker.shuttingDown +} + // inner job executing func (worker *Worker) exec(inpack *inPack) (err error) { defer func() { @@ -263,7 +290,14 @@ func (worker *Worker) exec(inpack *inPack) (err error) { err = ErrUnknown } } + worker.activeJobs.Done() }() + worker.activeJobs.Add(1) + // Make sure that we don't accept any new work from old grab requests + // after we starting shutting down. + if worker.isShuttingDown() { + return + } f, ok := worker.funcs[inpack.fn] if !ok { return fmt.Errorf("The function does not exist: %s", inpack.fn) diff --git a/worker/worker_test.go b/worker/worker_test.go index 06ce15f..946d166 100644 --- a/worker/worker_test.go +++ b/worker/worker_test.go @@ -1,7 +1,9 @@ package worker import ( + "errors" "sync" + "sync/atomic" "testing" "time" ) @@ -78,12 +80,11 @@ func TestWork(t *testing.T) { wg.Wait() } - func TestWorkerClose(t *testing.T) { worker.Close() } -func TestWorkWithoutReady(t * testing.T){ +func TestWorkWithoutReady(t *testing.T) { other_worker := New(Unlimited) if err := other_worker.AddServer(Network, "127.0.0.1:4730"); err != nil { @@ -92,15 +93,15 @@ func TestWorkWithoutReady(t * testing.T){ if err := other_worker.AddFunc("gearman-go-workertest", foobar, 0); err != nil { t.Error(err) } - - timeout := make(chan bool, 1) - done := make( chan bool, 1) - other_worker.JobHandler = func( j Job ) error { - if( ! other_worker.ready ){ - t.Error("Worker not ready as expected"); + timeout := make(chan bool, 1) + done := make(chan bool, 1) + + other_worker.JobHandler = func(j Job) error { + if !other_worker.ready { + t.Error("Worker not ready as expected") } - done <-true + done <- true return nil } go func() { @@ -108,15 +109,15 @@ func TestWorkWithoutReady(t * testing.T){ timeout <- true }() - go func(){ - other_worker.Work(); + go func() { + other_worker.Work() }() - // With the all-in-one Work() we don't know if the + // With the all-in-one Work() we don't know if the // worker is ready at this stage so we may have to wait a sec: - go func(){ + go func() { tries := 3 - for( tries > 0 ){ + for tries > 0 { if other_worker.ready { other_worker.Echo([]byte("Hello")) break @@ -127,24 +128,24 @@ func TestWorkWithoutReady(t * testing.T){ tries-- } }() - + // determine if we've finished or timed out: - select{ - case <- timeout: + select { + case <-timeout: t.Error("Test timed out waiting for the worker") - case <- done: + case <-done: } } -func TestWorkWithoutReadyWithPanic(t * testing.T){ +func TestWorkWithoutReadyWithPanic(t *testing.T) { other_worker := New(Unlimited) - + timeout := make(chan bool, 1) - done := make( chan bool, 1) + done := make(chan bool, 1) // Going to work with no worker setup. // when Work (hopefully) calls Ready it will get an error which should cause it to panic() - go func(){ + go func() { defer func() { if err := recover(); err != nil { done <- true @@ -153,17 +154,151 @@ func TestWorkWithoutReadyWithPanic(t * testing.T){ t.Error("Work should raise a panic.") done <- true }() - other_worker.Work(); + other_worker.Work() }() go func() { time.Sleep(2 * time.Second) timeout <- true }() - select{ - case <- timeout: + select { + case <-timeout: t.Error("Test timed out waiting for the worker") - case <- done: + case <-done: } } + +// initWorker creates a worker and adds the localhost server to it +func initWorker(t *testing.T) *Worker { + otherWorker := New(Unlimited) + if err := otherWorker.AddServer(Network, "127.0.0.1:4730"); err != nil { + t.Error(err) + } + return otherWorker +} + +// submitEmptyInPack sends an empty inpack with the specified fn name to the worker. It uses +// the first agent of the worker. +func submitEmptyInPack(t *testing.T, worker *Worker, function string) { + if l := len(worker.agents); l != 1 { + t.Error("The worker has no agents") + } + inpack := getInPack() + inpack.dataType = dtJobAssign + inpack.fn = function + inpack.a = worker.agents[0] + worker.in <- inpack +} + +// TestShutdownSuccessJob tests that shutdown handles active jobs that will succeed +func TestShutdownSuccessJob(t *testing.T) { + otherWorker := initWorker(t) + finishedJob := false + var wg sync.WaitGroup + successJob := func(job Job) ([]byte, error) { + wg.Done() + // Sleep for 10ms to ensure that the shutdown waits for this to finish + time.Sleep(time.Duration(10 * time.Millisecond)) + finishedJob = true + return nil, nil + } + if err := otherWorker.AddFunc("test", successJob, 0); err != nil { + t.Error(err) + } + if err := otherWorker.Ready(); err != nil { + t.Error(err) + return + } + submitEmptyInPack(t, otherWorker, "test") + go otherWorker.Work() + // Wait for the success_job to start so that we know we didn't shutdown before even + // beginning to process the job. + wg.Add(1) + wg.Wait() + otherWorker.Shutdown() + if !finishedJob { + t.Error("Didn't finish job") + } +} + +// TestShutdownFailureJob tests that shutdown handles active jobs that will fail +func TestShutdownFailureJob(t *testing.T) { + otherWorker := initWorker(t) + var wg sync.WaitGroup + finishedJob := false + failureJob := func(job Job) ([]byte, error) { + wg.Done() + // Sleep for 10ms to ensure that shutdown waits for this to finish + time.Sleep(time.Duration(10 * time.Millisecond)) + finishedJob = true + return nil, errors.New("Error!") + } + + if err := otherWorker.AddFunc("test", failureJob, 0); err != nil { + t.Error(err) + } + if err := otherWorker.Ready(); err != nil { + t.Error(err) + return + } + submitEmptyInPack(t, otherWorker, "test") + go otherWorker.Work() + // Wait for the failure_job to start so that we know we didn't shutdown before even + // beginning to process the job. + wg.Add(1) + wg.Wait() + otherWorker.Shutdown() + if !finishedJob { + t.Error("Didn't finish the failed job") + } +} + +func TestSubmitMultipleJobs(t *testing.T) { + otherWorker := initWorker(t) + var startJobs sync.WaitGroup + startJobs.Add(2) + var jobsFinished int32 = 0 + job := func(job Job) ([]byte, error) { + startJobs.Done() + // Sleep for 10ms to ensure that the shutdown waits for this to finish + time.Sleep(time.Duration(10 * time.Millisecond)) + atomic.AddInt32(&jobsFinished, 1) + return nil, nil + } + if err := otherWorker.AddFunc("test", job, 0); err != nil { + t.Error(err) + } + if err := otherWorker.Ready(); err != nil { + t.Error(err) + return + } + submitEmptyInPack(t, otherWorker, "test") + submitEmptyInPack(t, otherWorker, "test") + go otherWorker.Work() + startJobs.Wait() + otherWorker.Shutdown() + if jobsFinished != 2 { + t.Error("Didn't run both jobs") + } +} + +func TestSubmitJobAfterShutdown(t *testing.T) { + otherWorker := initWorker(t) + noRunJob := func(job Job) ([]byte, error) { + t.Error("This job shouldn't have been run") + return nil, nil + } + if err := otherWorker.AddFunc("test", noRunJob, 0); err != nil { + t.Error(err) + } + if err := otherWorker.Ready(); err != nil { + t.Error(err) + return + } + go otherWorker.Work() + otherWorker.Shutdown() + submitEmptyInPack(t, otherWorker, "test") + // Sleep for 10ms to make sure that the job doesn't run + time.Sleep(time.Duration(10 * time.Millisecond)) +}