diff --git a/worker/agent.go b/worker/agent.go index 3367ad3..ad72690 100644 --- a/worker/agent.go +++ b/worker/agent.go @@ -86,7 +86,8 @@ func (a *agent) work() { if len(leftdata) > 0 { // some data left for processing data = append(leftdata, data...) } - if len(data) < minPacketLength { // not enough data + length := len(data) - minPacketLength + if length < 0 || length < int(binary.BigEndian.Uint32(data[8:12])) { leftdata = data continue } diff --git a/worker/inpack.go b/worker/inpack.go index fc4717a..c777b80 100644 --- a/worker/inpack.go +++ b/worker/inpack.go @@ -86,6 +86,8 @@ func (inpack *inPack) UpdateStatus(numerator, denominator int) { // Decode job from byte slice func decodeInPack(data []byte) (inpack *inPack, l int, err error) { + // The next three checks should be completely unnecessary, as they are checked in + // agent.work. if len(data) < minPacketLength { // valid package should not less 12 bytes err = fmt.Errorf("Invalid data: %v", data) return diff --git a/worker/worker.go b/worker/worker.go index 25d6a60..92b8579 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -5,6 +5,7 @@ package worker import ( "encoding/binary" "fmt" + "log" "sync" "time" ) @@ -23,6 +24,10 @@ type Worker struct { in chan *inPack running bool ready bool + // The shuttingDown variable is protected by the Worker lock + shuttingDown bool + // Used during shutdown to wait for all active jobs to finish + activeJobs sync.WaitGroup Id string ErrorHandler ErrorHandler @@ -142,7 +147,9 @@ func (worker *Worker) handleInPack(inpack *inPack) { case dtNoJob: inpack.a.PreSleep() case dtNoop: - inpack.a.Grab() + if !worker.isShuttingDown() { + inpack.a.Grab() + } case dtJobAssign, dtJobAssignUniq: go func() { if err := worker.exec(inpack); err != nil { @@ -152,7 +159,9 @@ func (worker *Worker) handleInPack(inpack *inPack) { if worker.limit != nil { worker.limit <- true } - inpack.a.Grab() + if !worker.isShuttingDown() { + inpack.a.Grab() + } case dtError: worker.err(inpack.Err()) fallthrough @@ -191,6 +200,7 @@ func (worker *Worker) Work() { // didn't run Ready beforehand, so we'll have to do it: err := worker.Ready() if err != nil { + log.Println("Error making worker ready: " + err.Error()) panic(err) } } @@ -227,6 +237,16 @@ func (worker *Worker) Close() { } } +// Shutdown server gracefully. This function will block until all active work has finished. +func (worker *Worker) Shutdown() { + worker.Lock() + worker.shuttingDown = true + worker.Unlock() + // Wait for all the active jobs to finish + worker.activeJobs.Wait() + worker.Close() +} + // Echo func (worker *Worker) Echo(data []byte) { outpack := getOutPack() @@ -253,6 +273,13 @@ func (worker *Worker) SetId(id string) { worker.broadcast(outpack) } +// IsShutdown checks to see if the worker is in the process of being shutdown. +func (worker *Worker) isShuttingDown() bool { + worker.Lock() + defer worker.Unlock() + return worker.shuttingDown +} + // inner job executing func (worker *Worker) exec(inpack *inPack) (err error) { defer func() { @@ -266,7 +293,14 @@ func (worker *Worker) exec(inpack *inPack) (err error) { err = ErrUnknown } } + worker.activeJobs.Done() }() + worker.activeJobs.Add(1) + // Make sure that we don't accept any new work from old grab requests + // after we starting shutting down. + if worker.isShuttingDown() { + return + } f, ok := worker.funcs[inpack.fn] if !ok { return fmt.Errorf("The function does not exist: %s", inpack.fn) diff --git a/worker/worker_test.go b/worker/worker_test.go index 1029b74..baa50fe 100644 --- a/worker/worker_test.go +++ b/worker/worker_test.go @@ -2,7 +2,9 @@ package worker import ( "bytes" + "errors" "sync" + "sync/atomic" "testing" "time" ) @@ -223,3 +225,137 @@ func TestWorkWithoutReadyWithPanic(t *testing.T) { } } + +// initWorker creates a worker and adds the localhost server to it +func initWorker(t *testing.T) *Worker { + otherWorker := New(Unlimited) + if err := otherWorker.AddServer(Network, "127.0.0.1:4730"); err != nil { + t.Error(err) + } + return otherWorker +} + +// submitEmptyInPack sends an empty inpack with the specified fn name to the worker. It uses +// the first agent of the worker. +func submitEmptyInPack(t *testing.T, worker *Worker, function string) { + if l := len(worker.agents); l != 1 { + t.Error("The worker has no agents") + } + inpack := getInPack() + inpack.dataType = dtJobAssign + inpack.fn = function + inpack.a = worker.agents[0] + worker.in <- inpack +} + +// TestShutdownSuccessJob tests that shutdown handles active jobs that will succeed +func TestShutdownSuccessJob(t *testing.T) { + otherWorker := initWorker(t) + finishedJob := false + var wg sync.WaitGroup + successJob := func(job Job) ([]byte, error) { + wg.Done() + // Sleep for 10ms to ensure that the shutdown waits for this to finish + time.Sleep(time.Duration(10 * time.Millisecond)) + finishedJob = true + return nil, nil + } + if err := otherWorker.AddFunc("test", successJob, 0); err != nil { + t.Error(err) + } + if err := otherWorker.Ready(); err != nil { + t.Error(err) + return + } + submitEmptyInPack(t, otherWorker, "test") + go otherWorker.Work() + // Wait for the success_job to start so that we know we didn't shutdown before even + // beginning to process the job. + wg.Add(1) + wg.Wait() + otherWorker.Shutdown() + if !finishedJob { + t.Error("Didn't finish job") + } +} + +// TestShutdownFailureJob tests that shutdown handles active jobs that will fail +func TestShutdownFailureJob(t *testing.T) { + otherWorker := initWorker(t) + var wg sync.WaitGroup + finishedJob := false + failureJob := func(job Job) ([]byte, error) { + wg.Done() + // Sleep for 10ms to ensure that shutdown waits for this to finish + time.Sleep(time.Duration(10 * time.Millisecond)) + finishedJob = true + return nil, errors.New("Error!") + } + + if err := otherWorker.AddFunc("test", failureJob, 0); err != nil { + t.Error(err) + } + if err := otherWorker.Ready(); err != nil { + t.Error(err) + return + } + submitEmptyInPack(t, otherWorker, "test") + go otherWorker.Work() + // Wait for the failure_job to start so that we know we didn't shutdown before even + // beginning to process the job. + wg.Add(1) + wg.Wait() + otherWorker.Shutdown() + if !finishedJob { + t.Error("Didn't finish the failed job") + } +} + +func TestSubmitMultipleJobs(t *testing.T) { + otherWorker := initWorker(t) + var startJobs sync.WaitGroup + startJobs.Add(2) + var jobsFinished int32 = 0 + job := func(job Job) ([]byte, error) { + startJobs.Done() + // Sleep for 10ms to ensure that the shutdown waits for this to finish + time.Sleep(time.Duration(10 * time.Millisecond)) + atomic.AddInt32(&jobsFinished, 1) + return nil, nil + } + if err := otherWorker.AddFunc("test", job, 0); err != nil { + t.Error(err) + } + if err := otherWorker.Ready(); err != nil { + t.Error(err) + return + } + submitEmptyInPack(t, otherWorker, "test") + submitEmptyInPack(t, otherWorker, "test") + go otherWorker.Work() + startJobs.Wait() + otherWorker.Shutdown() + if jobsFinished != 2 { + t.Error("Didn't run both jobs") + } +} + +func TestSubmitJobAfterShutdown(t *testing.T) { + otherWorker := initWorker(t) + noRunJob := func(job Job) ([]byte, error) { + t.Error("This job shouldn't have been run") + return nil, nil + } + if err := otherWorker.AddFunc("test", noRunJob, 0); err != nil { + t.Error(err) + } + if err := otherWorker.Ready(); err != nil { + t.Error(err) + return + } + go otherWorker.Work() + otherWorker.Shutdown() + submitEmptyInPack(t, otherWorker, "test") + // Sleep for 10ms to make sure that the job doesn't run + time.Sleep(time.Duration(10 * time.Millisecond)) +}