Add methods to retry failed workflows and jobs across all steps and queues

This commit is contained in:
Achim Rohn
2026-04-08 15:41:22 +02:00
parent d4714c4f27
commit d4008e3655
2 changed files with 44 additions and 9 deletions
+22 -3
View File
@@ -575,9 +575,8 @@ func (q *GeneralQueue) IncrementTries(ctx context.Context, jobID int, currentTri
return nil return nil
} }
// ResumeWorkflow resets jobs for a specific workflow in this queue so they can be processed again. // ResumeWorkflow resumes a specific workflow's execution in this queue by
// - All jobs with status InProgress are set back to Pending. // setting all InProgress jobs back to Pending, allowing them to be picked up again.
// - All jobs with status Failed are set back to Pending and their NumberOfTries is reset to 0.
// After resetting, if the queue is marked running, it will be started. // After resetting, if the queue is marked running, it will be started.
func (q *GeneralQueue) ResumeWorkflow(ctx context.Context, workflowId string) error { func (q *GeneralQueue) ResumeWorkflow(ctx context.Context, workflowId string) error {
// Reset in-progress jobs back to pending for the specific workflow // Reset in-progress jobs back to pending for the specific workflow
@@ -597,6 +596,26 @@ func (q *GeneralQueue) ResumeWorkflow(ctx context.Context, workflowId string) er
Debug("Reset ", inProgressCount, " in_progress jobs to pending for ", q.Name, " queue (workflow:", workflowId, ")") Debug("Reset ", inProgressCount, " in_progress jobs to pending for ", q.Name, " queue (workflow:", workflowId, ")")
} }
// Start the queue again if it is configured as running
if isRunning, err := q.IsRunning(ctx); err != nil {
Error("Failed to check ", q.Name, " queue state:", err)
} else if isRunning {
Debug("restarting ", q.Name, " queue")
if err := q.Start(ctx); err != nil {
Error("Failed to restart ", q.Name, " queue:", err)
return err
}
Debug(q.Name, " queue auto-started")
} else {
Debug(q.Name, " queue not running")
}
return nil
}
// RetryFailedWorkflow retries failed jobs for a specific workflow in this queue by
// setting all Failed jobs back to Pending and resetting their NumberOfTries to 0.
// After resetting, if the queue is marked running, it will be started.
func (q *GeneralQueue) RetryFailedWorkflow(ctx context.Context, workflowId string) error {
// Reset failed jobs back to pending and reset their try counter for the specific workflow // Reset failed jobs back to pending and reset their try counter for the specific workflow
failedCount, err := q.client.GeneralQueue.Update(). failedCount, err := q.client.GeneralQueue.Update().
Where( Where(
+22 -6
View File
@@ -115,9 +115,9 @@ func (w *Workflow) GenerateId() string {
return w.Identifier + "_" + now.Format("20060102150405") + "_" + ersteller.RandomString(5) return w.Identifier + "_" + now.Format("20060102150405") + "_" + ersteller.RandomString(5)
} }
// Resume restarts a specific workflow execution by its workflowId. // Resume restarts a specific workflow execution by its workflowId by
// It resets failed and in-progress jobs of all steps back to pending and // setting any in-progress jobs of all steps back to pending, then starts
// starts their queues again if they are running. // their queues again if they are running.
func (w *Workflow) Resume(ctx context.Context, workflowId string) error { func (w *Workflow) Resume(ctx context.Context, workflowId string) error {
var allErr error var allErr error
for _, step := range w.AllSteps { for _, step := range w.AllSteps {
@@ -133,9 +133,8 @@ func (w *Workflow) Resume(ctx context.Context, workflowId string) error {
} }
// ResumeAll restarts all executions of this workflow across all steps. // ResumeAll restarts all executions of this workflow across all steps.
// For each step it resets any in-progress jobs to pending and failed jobs to // For each step it resets any in-progress jobs to pending, then starts the
// pending with NumberOfTries reset to 0, then starts the queue again if it is // queue again if it is configured as running.
// configured as running.
func (w *Workflow) ResumeAll(ctx context.Context) error { func (w *Workflow) ResumeAll(ctx context.Context) error {
var allErr error var allErr error
for _, step := range w.AllSteps { for _, step := range w.AllSteps {
@@ -150,6 +149,23 @@ func (w *Workflow) ResumeAll(ctx context.Context) error {
return allErr return allErr
} }
// RetryFailed restarts only failed jobs of a specific workflow execution by its
// workflowId across all steps by resetting them to pending and their try count
// to 0, then starts their queues again if they are running.
func (w *Workflow) RetryFailed(ctx context.Context, workflowId string) error {
var allErr error
for _, step := range w.AllSteps {
if step.Queue == nil {
// Safety: ensure queues are initialized
step.initQueue()
}
if err := step.Queue.RetryFailedWorkflow(ctx, workflowId); err != nil {
allErr = errors.Join(allErr, fmt.Errorf("step %s: %w", step.Name, err))
}
}
return allErr
}
func NewCronTrigger(ctx context.Context, workflow *Workflow, d time.Duration) { func NewCronTrigger(ctx context.Context, workflow *Workflow, d time.Duration) {
go func() { go func() {
ticker := time.NewTicker(d) ticker := time.NewTicker(d)