Ability to exit work loop gracefully
This change adds the ability to call Shutdown on a gearman-go worker which causes the worker to wait for all currently active jobs to finish and then close the connection. It does this by keeping tracking of the number of currently active transactions, disallowing new job creation, and using a WaitGroup to wait for all active jobs to finish.
This commit is contained in:
parent
320155c608
commit
8cdaef9752
@ -18,11 +18,15 @@ const (
|
|||||||
// It can connect to multi-server and grab jobs.
|
// It can connect to multi-server and grab jobs.
|
||||||
type Worker struct {
|
type Worker struct {
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
agents []*agent
|
agents []*agent
|
||||||
funcs jobFuncs
|
funcs jobFuncs
|
||||||
in chan *inPack
|
in chan *inPack
|
||||||
running bool
|
running bool
|
||||||
ready bool
|
ready bool
|
||||||
|
shuttingDown bool
|
||||||
|
activeJobs int
|
||||||
|
// Used during shutdown to wait for all active jobs to finish
|
||||||
|
finishedDraining sync.WaitGroup
|
||||||
|
|
||||||
Id string
|
Id string
|
||||||
ErrorHandler ErrorHandler
|
ErrorHandler ErrorHandler
|
||||||
@ -43,6 +47,7 @@ func New(limit int) (worker *Worker) {
|
|||||||
funcs: make(jobFuncs),
|
funcs: make(jobFuncs),
|
||||||
in: make(chan *inPack, queueSize),
|
in: make(chan *inPack, queueSize),
|
||||||
}
|
}
|
||||||
|
worker.finishedDraining.Add(1)
|
||||||
if limit != Unlimited {
|
if limit != Unlimited {
|
||||||
worker.limit = make(chan bool, limit-1)
|
worker.limit = make(chan bool, limit-1)
|
||||||
}
|
}
|
||||||
@ -137,7 +142,9 @@ func (worker *Worker) handleInPack(inpack *inPack) {
|
|||||||
case dtNoJob:
|
case dtNoJob:
|
||||||
inpack.a.PreSleep()
|
inpack.a.PreSleep()
|
||||||
case dtNoop:
|
case dtNoop:
|
||||||
inpack.a.Grab()
|
if !worker.shuttingDown {
|
||||||
|
inpack.a.Grab()
|
||||||
|
}
|
||||||
case dtJobAssign, dtJobAssignUniq:
|
case dtJobAssign, dtJobAssignUniq:
|
||||||
go func() {
|
go func() {
|
||||||
if err := worker.exec(inpack); err != nil {
|
if err := worker.exec(inpack); err != nil {
|
||||||
@ -147,7 +154,9 @@ func (worker *Worker) handleInPack(inpack *inPack) {
|
|||||||
if worker.limit != nil {
|
if worker.limit != nil {
|
||||||
worker.limit <- true
|
worker.limit <- true
|
||||||
}
|
}
|
||||||
inpack.a.Grab()
|
if !worker.shuttingDown {
|
||||||
|
inpack.a.Grab()
|
||||||
|
}
|
||||||
case dtError:
|
case dtError:
|
||||||
worker.err(inpack.Err())
|
worker.err(inpack.Err())
|
||||||
fallthrough
|
fallthrough
|
||||||
@ -182,11 +191,11 @@ func (worker *Worker) Ready() (err error) {
|
|||||||
// Main loop, block here
|
// Main loop, block here
|
||||||
// Most of time, this should be evaluated in goroutine.
|
// Most of time, this should be evaluated in goroutine.
|
||||||
func (worker *Worker) Work() {
|
func (worker *Worker) Work() {
|
||||||
if ! worker.ready {
|
if !worker.ready {
|
||||||
// didn't run Ready beforehand, so we'll have to do it:
|
// didn't run Ready beforehand, so we'll have to do it:
|
||||||
err := worker.Ready()
|
err := worker.Ready()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic( err )
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -224,6 +233,19 @@ func (worker *Worker) Close() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Shutdown server gracefully. This function will block until all active work has finished.
|
||||||
|
func (worker *Worker) Shutdown() {
|
||||||
|
worker.Lock()
|
||||||
|
worker.shuttingDown = true
|
||||||
|
if worker.activeJobs == 0 {
|
||||||
|
worker.finishedDraining.Done()
|
||||||
|
}
|
||||||
|
worker.Unlock()
|
||||||
|
// Wait for the mutex
|
||||||
|
worker.finishedDraining.Wait()
|
||||||
|
worker.Close()
|
||||||
|
}
|
||||||
|
|
||||||
// Echo
|
// Echo
|
||||||
func (worker *Worker) Echo(data []byte) {
|
func (worker *Worker) Echo(data []byte) {
|
||||||
outpack := getOutPack()
|
outpack := getOutPack()
|
||||||
@ -250,6 +272,29 @@ func (worker *Worker) SetId(id string) {
|
|||||||
worker.broadcast(outpack)
|
worker.broadcast(outpack)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IncrementActive increments the count of active jobs. This will return false if no more
|
||||||
|
// jobs can't be started because the worker is shutting down.
|
||||||
|
func (worker *Worker) incrementActive() bool {
|
||||||
|
worker.Lock()
|
||||||
|
defer worker.Unlock()
|
||||||
|
if worker.shuttingDown {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
worker.activeJobs = worker.activeJobs + 1
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// DecrementActive decrements the count of active jobs. If the process is shutting down
|
||||||
|
// it will set the finishedDraining flag if there are no more active jobs.
|
||||||
|
func (worker *Worker) decrementActive() {
|
||||||
|
worker.Lock()
|
||||||
|
defer worker.Unlock()
|
||||||
|
worker.activeJobs = worker.activeJobs - 1
|
||||||
|
if worker.shuttingDown && worker.activeJobs == 0 {
|
||||||
|
worker.finishedDraining.Done()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// inner job executing
|
// inner job executing
|
||||||
func (worker *Worker) exec(inpack *inPack) (err error) {
|
func (worker *Worker) exec(inpack *inPack) (err error) {
|
||||||
defer func() {
|
defer func() {
|
||||||
@ -263,7 +308,11 @@ func (worker *Worker) exec(inpack *inPack) (err error) {
|
|||||||
err = ErrUnknown
|
err = ErrUnknown
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
worker.decrementActive()
|
||||||
}()
|
}()
|
||||||
|
if !worker.incrementActive() {
|
||||||
|
return
|
||||||
|
}
|
||||||
f, ok := worker.funcs[inpack.fn]
|
f, ok := worker.funcs[inpack.fn]
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("The function does not exist: %s", inpack.fn)
|
return fmt.Errorf("The function does not exist: %s", inpack.fn)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package worker
|
package worker
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
@ -62,6 +63,7 @@ func TestWorkerRemoveFunc(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestWork(t *testing.T) {
|
func TestWork(t *testing.T) {
|
||||||
|
// TODO: Worth looking at this for shutdown (WaitGroup)
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
worker.JobHandler = func(job Job) error {
|
worker.JobHandler = func(job Job) error {
|
||||||
t.Logf("%s", job.Data())
|
t.Logf("%s", job.Data())
|
||||||
@ -78,12 +80,11 @@ func TestWork(t *testing.T) {
|
|||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestWorkerClose(t *testing.T) {
|
func TestWorkerClose(t *testing.T) {
|
||||||
worker.Close()
|
worker.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWorkWithoutReady(t * testing.T){
|
func TestWorkWithoutReady(t *testing.T) {
|
||||||
other_worker := New(Unlimited)
|
other_worker := New(Unlimited)
|
||||||
|
|
||||||
if err := other_worker.AddServer(Network, "127.0.0.1:4730"); err != nil {
|
if err := other_worker.AddServer(Network, "127.0.0.1:4730"); err != nil {
|
||||||
@ -94,13 +95,13 @@ func TestWorkWithoutReady(t * testing.T){
|
|||||||
}
|
}
|
||||||
|
|
||||||
timeout := make(chan bool, 1)
|
timeout := make(chan bool, 1)
|
||||||
done := make( chan bool, 1)
|
done := make(chan bool, 1)
|
||||||
|
|
||||||
other_worker.JobHandler = func( j Job ) error {
|
other_worker.JobHandler = func(j Job) error {
|
||||||
if( ! other_worker.ready ){
|
if !other_worker.ready {
|
||||||
t.Error("Worker not ready as expected");
|
t.Error("Worker not ready as expected")
|
||||||
}
|
}
|
||||||
done <-true
|
done <- true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
go func() {
|
go func() {
|
||||||
@ -108,15 +109,15 @@ func TestWorkWithoutReady(t * testing.T){
|
|||||||
timeout <- true
|
timeout <- true
|
||||||
}()
|
}()
|
||||||
|
|
||||||
go func(){
|
go func() {
|
||||||
other_worker.Work();
|
other_worker.Work()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// With the all-in-one Work() we don't know if the
|
// With the all-in-one Work() we don't know if the
|
||||||
// worker is ready at this stage so we may have to wait a sec:
|
// worker is ready at this stage so we may have to wait a sec:
|
||||||
go func(){
|
go func() {
|
||||||
tries := 3
|
tries := 3
|
||||||
for( tries > 0 ){
|
for tries > 0 {
|
||||||
if other_worker.ready {
|
if other_worker.ready {
|
||||||
other_worker.Echo([]byte("Hello"))
|
other_worker.Echo([]byte("Hello"))
|
||||||
break
|
break
|
||||||
@ -129,22 +130,22 @@ func TestWorkWithoutReady(t * testing.T){
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
// determine if we've finished or timed out:
|
// determine if we've finished or timed out:
|
||||||
select{
|
select {
|
||||||
case <- timeout:
|
case <-timeout:
|
||||||
t.Error("Test timed out waiting for the worker")
|
t.Error("Test timed out waiting for the worker")
|
||||||
case <- done:
|
case <-done:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWorkWithoutReadyWithPanic(t * testing.T){
|
func TestWorkWithoutReadyWithPanic(t *testing.T) {
|
||||||
other_worker := New(Unlimited)
|
other_worker := New(Unlimited)
|
||||||
|
|
||||||
timeout := make(chan bool, 1)
|
timeout := make(chan bool, 1)
|
||||||
done := make( chan bool, 1)
|
done := make(chan bool, 1)
|
||||||
|
|
||||||
// Going to work with no worker setup.
|
// Going to work with no worker setup.
|
||||||
// when Work (hopefully) calls Ready it will get an error which should cause it to panic()
|
// when Work (hopefully) calls Ready it will get an error which should cause it to panic()
|
||||||
go func(){
|
go func() {
|
||||||
defer func() {
|
defer func() {
|
||||||
if err := recover(); err != nil {
|
if err := recover(); err != nil {
|
||||||
done <- true
|
done <- true
|
||||||
@ -153,17 +154,122 @@ func TestWorkWithoutReadyWithPanic(t * testing.T){
|
|||||||
t.Error("Work should raise a panic.")
|
t.Error("Work should raise a panic.")
|
||||||
done <- true
|
done <- true
|
||||||
}()
|
}()
|
||||||
other_worker.Work();
|
other_worker.Work()
|
||||||
}()
|
}()
|
||||||
go func() {
|
go func() {
|
||||||
time.Sleep(2 * time.Second)
|
time.Sleep(2 * time.Second)
|
||||||
timeout <- true
|
timeout <- true
|
||||||
}()
|
}()
|
||||||
|
|
||||||
select{
|
select {
|
||||||
case <- timeout:
|
case <-timeout:
|
||||||
t.Error("Test timed out waiting for the worker")
|
t.Error("Test timed out waiting for the worker")
|
||||||
case <- done:
|
case <-done:
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// initWorker creates a worker and adds the localhost server to it
|
||||||
|
func initWorker(t *testing.T) *Worker {
|
||||||
|
otherWorker := New(Unlimited)
|
||||||
|
if err := otherWorker.AddServer(Network, "127.0.0.1:4730"); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
return otherWorker
|
||||||
|
}
|
||||||
|
|
||||||
|
// submitEmptyInPack sends an empty inpack with the specified fn name to the worker. It uses
|
||||||
|
// the first agent of the worker.
|
||||||
|
func submitEmptyInPack(t *testing.T, worker *Worker, function string) {
|
||||||
|
if l := len(worker.agents); l != 1 {
|
||||||
|
t.Error("The worker has no agents")
|
||||||
|
}
|
||||||
|
inpack := getInPack()
|
||||||
|
inpack.dataType = dtJobAssign
|
||||||
|
inpack.fn = function
|
||||||
|
inpack.a = worker.agents[0]
|
||||||
|
worker.in <- inpack
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestShutdownSuccessJob tests that shutdown waits for the currently running job to
|
||||||
|
// complete.
|
||||||
|
func TestShutdownSuccessJob(t *testing.T) {
|
||||||
|
otherWorker := initWorker(t)
|
||||||
|
output := 0
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
successJob := func(job Job) ([]byte, error) {
|
||||||
|
wg.Done()
|
||||||
|
// Sleep for 100ms to ensure that the shutdown waits for this to finish
|
||||||
|
time.Sleep(time.Duration(100 * time.Millisecond))
|
||||||
|
output = 1
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
if err := otherWorker.AddFunc("test", successJob, 0); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if err := otherWorker.Ready(); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
submitEmptyInPack(t, otherWorker, "test")
|
||||||
|
go otherWorker.Work()
|
||||||
|
// Wait for the success_job to start so that we know we didn't shutdown before even
|
||||||
|
// beginning to process the job.
|
||||||
|
wg.Add(1)
|
||||||
|
wg.Wait()
|
||||||
|
otherWorker.Shutdown()
|
||||||
|
if output != 1 {
|
||||||
|
t.Error("Expected 1, output was: " + strconv.Itoa(output))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShutdownFailureJob(t *testing.T) {
|
||||||
|
otherWorker := initWorker(t)
|
||||||
|
output := 0
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
failureJob := func(job Job) ([]byte, error) {
|
||||||
|
wg.Done()
|
||||||
|
// Sleep for 100ms to ensure that shutdown waits for this to finish
|
||||||
|
time.Sleep(time.Duration(100 * time.Millisecond))
|
||||||
|
output = 1
|
||||||
|
return nil, nil //new Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := otherWorker.AddFunc("test", failureJob, 0); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if err := otherWorker.Ready(); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
submitEmptyInPack(t, otherWorker, "test")
|
||||||
|
go otherWorker.Work()
|
||||||
|
// Wait for the success_job to start so that we know we didn't shutdown before even
|
||||||
|
// beginning to process the job.
|
||||||
|
wg.Add(1)
|
||||||
|
wg.Wait()
|
||||||
|
otherWorker.Shutdown()
|
||||||
|
if output != 1 {
|
||||||
|
t.Error("Expected 1, output was: " + strconv.Itoa(output))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSubmitJobAfterShutdown(t *testing.T) {
|
||||||
|
otherWorker := initWorker(t)
|
||||||
|
noRunJob := func(job Job) ([]byte, error) {
|
||||||
|
t.Error("This job shouldn't have been run")
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
if err := otherWorker.AddFunc("test", noRunJob, 0); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if err := otherWorker.Ready(); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
go otherWorker.Work()
|
||||||
|
otherWorker.Shutdown()
|
||||||
|
submitEmptyInPack(t, otherWorker, "test")
|
||||||
|
// Sleep for 100ms to make sure that the job doesn't actually run
|
||||||
|
time.Sleep(time.Duration(100 * time.Millisecond))
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user