Merge pull request #1 from Clever/graceful-shutdown
Ability to exit work loop gracefully
This commit is contained in:
commit
097037d212
@ -5,6 +5,7 @@ package worker
|
|||||||
import (
|
import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@ -18,11 +19,15 @@ const (
|
|||||||
// It can connect to multi-server and grab jobs.
|
// It can connect to multi-server and grab jobs.
|
||||||
type Worker struct {
|
type Worker struct {
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
agents []*agent
|
agents []*agent
|
||||||
funcs jobFuncs
|
funcs jobFuncs
|
||||||
in chan *inPack
|
in chan *inPack
|
||||||
running bool
|
running bool
|
||||||
ready bool
|
ready bool
|
||||||
|
// The shuttingDown variable is protected by the Worker lock
|
||||||
|
shuttingDown bool
|
||||||
|
// Used during shutdown to wait for all active jobs to finish
|
||||||
|
activeJobs sync.WaitGroup
|
||||||
|
|
||||||
Id string
|
Id string
|
||||||
ErrorHandler ErrorHandler
|
ErrorHandler ErrorHandler
|
||||||
@ -137,7 +142,9 @@ func (worker *Worker) handleInPack(inpack *inPack) {
|
|||||||
case dtNoJob:
|
case dtNoJob:
|
||||||
inpack.a.PreSleep()
|
inpack.a.PreSleep()
|
||||||
case dtNoop:
|
case dtNoop:
|
||||||
inpack.a.Grab()
|
if !worker.isShuttingDown() {
|
||||||
|
inpack.a.Grab()
|
||||||
|
}
|
||||||
case dtJobAssign, dtJobAssignUniq:
|
case dtJobAssign, dtJobAssignUniq:
|
||||||
go func() {
|
go func() {
|
||||||
if err := worker.exec(inpack); err != nil {
|
if err := worker.exec(inpack); err != nil {
|
||||||
@ -147,7 +154,9 @@ func (worker *Worker) handleInPack(inpack *inPack) {
|
|||||||
if worker.limit != nil {
|
if worker.limit != nil {
|
||||||
worker.limit <- true
|
worker.limit <- true
|
||||||
}
|
}
|
||||||
inpack.a.Grab()
|
if !worker.isShuttingDown() {
|
||||||
|
inpack.a.Grab()
|
||||||
|
}
|
||||||
case dtError:
|
case dtError:
|
||||||
worker.err(inpack.Err())
|
worker.err(inpack.Err())
|
||||||
fallthrough
|
fallthrough
|
||||||
@ -182,11 +191,12 @@ func (worker *Worker) Ready() (err error) {
|
|||||||
// Main loop, block here
|
// Main loop, block here
|
||||||
// Most of time, this should be evaluated in goroutine.
|
// Most of time, this should be evaluated in goroutine.
|
||||||
func (worker *Worker) Work() {
|
func (worker *Worker) Work() {
|
||||||
if ! worker.ready {
|
if !worker.ready {
|
||||||
// didn't run Ready beforehand, so we'll have to do it:
|
// didn't run Ready beforehand, so we'll have to do it:
|
||||||
err := worker.Ready()
|
err := worker.Ready()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic( err )
|
log.Println("Error making worker ready: " + err.Error())
|
||||||
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -224,6 +234,16 @@ func (worker *Worker) Close() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Shutdown server gracefully. This function will block until all active work has finished.
|
||||||
|
func (worker *Worker) Shutdown() {
|
||||||
|
worker.Lock()
|
||||||
|
worker.shuttingDown = true
|
||||||
|
worker.Unlock()
|
||||||
|
// Wait for all the active jobs to finish
|
||||||
|
worker.activeJobs.Wait()
|
||||||
|
worker.Close()
|
||||||
|
}
|
||||||
|
|
||||||
// Echo
|
// Echo
|
||||||
func (worker *Worker) Echo(data []byte) {
|
func (worker *Worker) Echo(data []byte) {
|
||||||
outpack := getOutPack()
|
outpack := getOutPack()
|
||||||
@ -250,6 +270,13 @@ func (worker *Worker) SetId(id string) {
|
|||||||
worker.broadcast(outpack)
|
worker.broadcast(outpack)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsShutdown checks to see if the worker is in the process of being shutdown.
|
||||||
|
func (worker *Worker) isShuttingDown() bool {
|
||||||
|
worker.Lock()
|
||||||
|
defer worker.Unlock()
|
||||||
|
return worker.shuttingDown
|
||||||
|
}
|
||||||
|
|
||||||
// inner job executing
|
// inner job executing
|
||||||
func (worker *Worker) exec(inpack *inPack) (err error) {
|
func (worker *Worker) exec(inpack *inPack) (err error) {
|
||||||
defer func() {
|
defer func() {
|
||||||
@ -263,7 +290,14 @@ func (worker *Worker) exec(inpack *inPack) (err error) {
|
|||||||
err = ErrUnknown
|
err = ErrUnknown
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
worker.activeJobs.Done()
|
||||||
}()
|
}()
|
||||||
|
worker.activeJobs.Add(1)
|
||||||
|
// Make sure that we don't accept any new work from old grab requests
|
||||||
|
// after we starting shutting down.
|
||||||
|
if worker.isShuttingDown() {
|
||||||
|
return
|
||||||
|
}
|
||||||
f, ok := worker.funcs[inpack.fn]
|
f, ok := worker.funcs[inpack.fn]
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("The function does not exist: %s", inpack.fn)
|
return fmt.Errorf("The function does not exist: %s", inpack.fn)
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
package worker
|
package worker
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@ -78,12 +80,11 @@ func TestWork(t *testing.T) {
|
|||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestWorkerClose(t *testing.T) {
|
func TestWorkerClose(t *testing.T) {
|
||||||
worker.Close()
|
worker.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWorkWithoutReady(t * testing.T){
|
func TestWorkWithoutReady(t *testing.T) {
|
||||||
other_worker := New(Unlimited)
|
other_worker := New(Unlimited)
|
||||||
|
|
||||||
if err := other_worker.AddServer(Network, "127.0.0.1:4730"); err != nil {
|
if err := other_worker.AddServer(Network, "127.0.0.1:4730"); err != nil {
|
||||||
@ -94,13 +95,13 @@ func TestWorkWithoutReady(t * testing.T){
|
|||||||
}
|
}
|
||||||
|
|
||||||
timeout := make(chan bool, 1)
|
timeout := make(chan bool, 1)
|
||||||
done := make( chan bool, 1)
|
done := make(chan bool, 1)
|
||||||
|
|
||||||
other_worker.JobHandler = func( j Job ) error {
|
other_worker.JobHandler = func(j Job) error {
|
||||||
if( ! other_worker.ready ){
|
if !other_worker.ready {
|
||||||
t.Error("Worker not ready as expected");
|
t.Error("Worker not ready as expected")
|
||||||
}
|
}
|
||||||
done <-true
|
done <- true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
go func() {
|
go func() {
|
||||||
@ -108,15 +109,15 @@ func TestWorkWithoutReady(t * testing.T){
|
|||||||
timeout <- true
|
timeout <- true
|
||||||
}()
|
}()
|
||||||
|
|
||||||
go func(){
|
go func() {
|
||||||
other_worker.Work();
|
other_worker.Work()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// With the all-in-one Work() we don't know if the
|
// With the all-in-one Work() we don't know if the
|
||||||
// worker is ready at this stage so we may have to wait a sec:
|
// worker is ready at this stage so we may have to wait a sec:
|
||||||
go func(){
|
go func() {
|
||||||
tries := 3
|
tries := 3
|
||||||
for( tries > 0 ){
|
for tries > 0 {
|
||||||
if other_worker.ready {
|
if other_worker.ready {
|
||||||
other_worker.Echo([]byte("Hello"))
|
other_worker.Echo([]byte("Hello"))
|
||||||
break
|
break
|
||||||
@ -129,22 +130,22 @@ func TestWorkWithoutReady(t * testing.T){
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
// determine if we've finished or timed out:
|
// determine if we've finished or timed out:
|
||||||
select{
|
select {
|
||||||
case <- timeout:
|
case <-timeout:
|
||||||
t.Error("Test timed out waiting for the worker")
|
t.Error("Test timed out waiting for the worker")
|
||||||
case <- done:
|
case <-done:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWorkWithoutReadyWithPanic(t * testing.T){
|
func TestWorkWithoutReadyWithPanic(t *testing.T) {
|
||||||
other_worker := New(Unlimited)
|
other_worker := New(Unlimited)
|
||||||
|
|
||||||
timeout := make(chan bool, 1)
|
timeout := make(chan bool, 1)
|
||||||
done := make( chan bool, 1)
|
done := make(chan bool, 1)
|
||||||
|
|
||||||
// Going to work with no worker setup.
|
// Going to work with no worker setup.
|
||||||
// when Work (hopefully) calls Ready it will get an error which should cause it to panic()
|
// when Work (hopefully) calls Ready it will get an error which should cause it to panic()
|
||||||
go func(){
|
go func() {
|
||||||
defer func() {
|
defer func() {
|
||||||
if err := recover(); err != nil {
|
if err := recover(); err != nil {
|
||||||
done <- true
|
done <- true
|
||||||
@ -153,17 +154,151 @@ func TestWorkWithoutReadyWithPanic(t * testing.T){
|
|||||||
t.Error("Work should raise a panic.")
|
t.Error("Work should raise a panic.")
|
||||||
done <- true
|
done <- true
|
||||||
}()
|
}()
|
||||||
other_worker.Work();
|
other_worker.Work()
|
||||||
}()
|
}()
|
||||||
go func() {
|
go func() {
|
||||||
time.Sleep(2 * time.Second)
|
time.Sleep(2 * time.Second)
|
||||||
timeout <- true
|
timeout <- true
|
||||||
}()
|
}()
|
||||||
|
|
||||||
select{
|
select {
|
||||||
case <- timeout:
|
case <-timeout:
|
||||||
t.Error("Test timed out waiting for the worker")
|
t.Error("Test timed out waiting for the worker")
|
||||||
case <- done:
|
case <-done:
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// initWorker creates a worker and adds the localhost server to it
|
||||||
|
func initWorker(t *testing.T) *Worker {
|
||||||
|
otherWorker := New(Unlimited)
|
||||||
|
if err := otherWorker.AddServer(Network, "127.0.0.1:4730"); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
return otherWorker
|
||||||
|
}
|
||||||
|
|
||||||
|
// submitEmptyInPack sends an empty inpack with the specified fn name to the worker. It uses
|
||||||
|
// the first agent of the worker.
|
||||||
|
func submitEmptyInPack(t *testing.T, worker *Worker, function string) {
|
||||||
|
if l := len(worker.agents); l != 1 {
|
||||||
|
t.Error("The worker has no agents")
|
||||||
|
}
|
||||||
|
inpack := getInPack()
|
||||||
|
inpack.dataType = dtJobAssign
|
||||||
|
inpack.fn = function
|
||||||
|
inpack.a = worker.agents[0]
|
||||||
|
worker.in <- inpack
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestShutdownSuccessJob tests that shutdown handles active jobs that will succeed
|
||||||
|
func TestShutdownSuccessJob(t *testing.T) {
|
||||||
|
otherWorker := initWorker(t)
|
||||||
|
finishedJob := false
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
successJob := func(job Job) ([]byte, error) {
|
||||||
|
wg.Done()
|
||||||
|
// Sleep for 10ms to ensure that the shutdown waits for this to finish
|
||||||
|
time.Sleep(time.Duration(10 * time.Millisecond))
|
||||||
|
finishedJob = true
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
if err := otherWorker.AddFunc("test", successJob, 0); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if err := otherWorker.Ready(); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
submitEmptyInPack(t, otherWorker, "test")
|
||||||
|
go otherWorker.Work()
|
||||||
|
// Wait for the success_job to start so that we know we didn't shutdown before even
|
||||||
|
// beginning to process the job.
|
||||||
|
wg.Add(1)
|
||||||
|
wg.Wait()
|
||||||
|
otherWorker.Shutdown()
|
||||||
|
if !finishedJob {
|
||||||
|
t.Error("Didn't finish job")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestShutdownFailureJob tests that shutdown handles active jobs that will fail
|
||||||
|
func TestShutdownFailureJob(t *testing.T) {
|
||||||
|
otherWorker := initWorker(t)
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
finishedJob := false
|
||||||
|
failureJob := func(job Job) ([]byte, error) {
|
||||||
|
wg.Done()
|
||||||
|
// Sleep for 10ms to ensure that shutdown waits for this to finish
|
||||||
|
time.Sleep(time.Duration(10 * time.Millisecond))
|
||||||
|
finishedJob = true
|
||||||
|
return nil, errors.New("Error!")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := otherWorker.AddFunc("test", failureJob, 0); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if err := otherWorker.Ready(); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
submitEmptyInPack(t, otherWorker, "test")
|
||||||
|
go otherWorker.Work()
|
||||||
|
// Wait for the failure_job to start so that we know we didn't shutdown before even
|
||||||
|
// beginning to process the job.
|
||||||
|
wg.Add(1)
|
||||||
|
wg.Wait()
|
||||||
|
otherWorker.Shutdown()
|
||||||
|
if !finishedJob {
|
||||||
|
t.Error("Didn't finish the failed job")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSubmitMultipleJobs(t *testing.T) {
|
||||||
|
otherWorker := initWorker(t)
|
||||||
|
var startJobs sync.WaitGroup
|
||||||
|
startJobs.Add(2)
|
||||||
|
var jobsFinished int32 = 0
|
||||||
|
job := func(job Job) ([]byte, error) {
|
||||||
|
startJobs.Done()
|
||||||
|
// Sleep for 10ms to ensure that the shutdown waits for this to finish
|
||||||
|
time.Sleep(time.Duration(10 * time.Millisecond))
|
||||||
|
atomic.AddInt32(&jobsFinished, 1)
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
if err := otherWorker.AddFunc("test", job, 0); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if err := otherWorker.Ready(); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
submitEmptyInPack(t, otherWorker, "test")
|
||||||
|
submitEmptyInPack(t, otherWorker, "test")
|
||||||
|
go otherWorker.Work()
|
||||||
|
startJobs.Wait()
|
||||||
|
otherWorker.Shutdown()
|
||||||
|
if jobsFinished != 2 {
|
||||||
|
t.Error("Didn't run both jobs")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSubmitJobAfterShutdown(t *testing.T) {
|
||||||
|
otherWorker := initWorker(t)
|
||||||
|
noRunJob := func(job Job) ([]byte, error) {
|
||||||
|
t.Error("This job shouldn't have been run")
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
if err := otherWorker.AddFunc("test", noRunJob, 0); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if err := otherWorker.Ready(); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
go otherWorker.Work()
|
||||||
|
otherWorker.Shutdown()
|
||||||
|
submitEmptyInPack(t, otherWorker, "test")
|
||||||
|
// Sleep for 10ms to make sure that the job doesn't run
|
||||||
|
time.Sleep(time.Duration(10 * time.Millisecond))
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user