From ead17f9ebffeb9116a37a6e44f85c25577bf0c39 Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Thu, 1 Feb 2024 20:51:29 -0500 Subject: [PATCH 01/38] Batch job POC * Introduces a "batch" concept, similar to batches present in Sidekiq Pro and GoodJob * Batches monitor a set of jobs, and when those jobs are completed can fire off a final job * This introduces a SolidQueue::JobBatch model, as well as the ability to enqueue jobs and associate them with the batch * There are still more ideas to figure out, but this provides a basic batch scaffolding to spark discussion --- README.md | 11 +++ app/models/solid_queue/claimed_execution.rb | 2 + app/models/solid_queue/job.rb | 6 +- app/models/solid_queue/job/executable.rb | 2 +- app/models/solid_queue/job_batch.rb | 96 +++++++++++++++++++ ...31013203_create_solid_queue_batch_table.rb | 21 ++++ lib/active_job/job_batch_id.rb | 22 +++++ lib/solid_queue.rb | 1 + lib/solid_queue/dispatcher.rb | 1 + lib/solid_queue/engine.rb | 1 + test/dummy/app/jobs/batch_completion_job.rb | 7 ++ test/dummy/app/jobs/sleepy_job.rb | 10 ++ test/models/solid_queue/job_batch_test.rb | 48 ++++++++++ 13 files changed, 226 insertions(+), 2 deletions(-) create mode 100644 app/models/solid_queue/job_batch.rb create mode 100644 db/migrate/20240131013203_create_solid_queue_batch_table.rb create mode 100644 lib/active_job/job_batch_id.rb create mode 100644 test/dummy/app/jobs/batch_completion_job.rb create mode 100644 test/dummy/app/jobs/sleepy_job.rb create mode 100644 test/models/solid_queue/job_batch_test.rb diff --git a/README.md b/README.md index f048ce58..8e7fae66 100644 --- a/README.md +++ b/README.md @@ -584,6 +584,17 @@ class ApplicationMailer < ActionMailer::Base Rails.error.report(exception) raise exception end +``` + +## Batch jobs + +```rb +SolidQueue::JobBatch.enqueue(on_finish: BatchCompletionJob) do + 5.times.map { |i| SleepyJob.perform_later(i) } +end + +SolidQueue::JobBatch.enqueue(on_success: BatchCompletionJob) do + 5.times.map { |i| SleepyJob.perform_later(i) } end ``` diff --git a/app/models/solid_queue/claimed_execution.rb b/app/models/solid_queue/claimed_execution.rb index 5d0a4057..d777a8f2 100644 --- a/app/models/solid_queue/claimed_execution.rb +++ b/app/models/solid_queue/claimed_execution.rb @@ -71,6 +71,8 @@ def perform failed_with(result.error) raise result.error end + + job.job_batch.touch(:changed_at, :last_changed_at) if job.batch_id.present? ensure unblock_next_job end diff --git a/app/models/solid_queue/job.rb b/app/models/solid_queue/job.rb index 75eaf627..80582ee1 100644 --- a/app/models/solid_queue/job.rb +++ b/app/models/solid_queue/job.rb @@ -8,6 +8,8 @@ class EnqueueError < StandardError; end serialize :arguments, coder: JSON + belongs_to :job_batch, foreign_key: :batch_id, optional: true + class << self def enqueue_all(active_jobs) active_jobs.each { |job| job.scheduled_at ||= Time.current } @@ -55,6 +57,7 @@ def create_all_from_active_jobs(active_jobs) end def attributes_from_active_job(active_job) + active_job.batch_id = JobBatch.current_batch_id || active_job.batch_id { queue_name: active_job.queue_name || DEFAULT_QUEUE_NAME, active_job_id: active_job.job_id, @@ -62,7 +65,8 @@ def attributes_from_active_job(active_job) scheduled_at: active_job.scheduled_at, class_name: active_job.class.name, arguments: active_job.serialize, - concurrency_key: active_job.concurrency_key + concurrency_key: active_job.concurrency_key, + batch_id: active_job.batch_id } end end diff --git a/app/models/solid_queue/job/executable.rb b/app/models/solid_queue/job/executable.rb index b0a4cb93..08f07bb0 100644 --- a/app/models/solid_queue/job/executable.rb +++ b/app/models/solid_queue/job/executable.rb @@ -76,7 +76,7 @@ def dispatch_bypassing_concurrency_limits end def finished! - if SolidQueue.preserve_finished_jobs? + if SolidQueue.preserve_finished_jobs? || batch_id.present? touch(:finished_at) else destroy! diff --git a/app/models/solid_queue/job_batch.rb b/app/models/solid_queue/job_batch.rb new file mode 100644 index 00000000..58bcee21 --- /dev/null +++ b/app/models/solid_queue/job_batch.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +module SolidQueue + class JobBatch < Record + belongs_to :job, foreign_key: :job_id, optional: true + has_many :jobs, foreign_key: :batch_id + + scope :incomplete, -> { + where(finished_at: nil).where("changed_at IS NOT NULL OR last_changed_at < ?", 1.hour.ago) + } + + class << self + def current_batch_id + Thread.current[:current_batch_id] + end + + def enqueue(attributes = {}) + previous_batch_id = current_batch_id.presence || nil + + job_batch = nil + transaction do + job_batch = create!(batch_attributes(attributes)) + Thread.current[:current_batch_id] = job_batch.id + yield + end + + job_batch + ensure + Thread.current[:current_batch_id] = previous_batch_id + end + + def dispatch_finished_batches + incomplete.order(:id).pluck(:id).each do |id| + transaction do + where(id:).non_blocking_lock.each(&:finish) + end + end + end + + private + + def batch_attributes(attributes) + attributes = case attributes + in { on_finish: on_finish_klass } + attributes.merge( + job_class: on_finish_klass, + completion_type: "success" + ) + in { on_success: on_success_klass } + attributes.merge( + job_class: on_success_klass, + completion_type: "success" + ) + end + + attributes.except(:on_finish, :on_success) + end + end + + def finished? + finished_at.present? + end + + def finish + return if finished? + reset_changed_at + jobs.find_each do |next_job| + # FIXME: If it's failed but is going to retry, how do we know? + # Because we need to know if we will determine what the failed execution means + # FIXME: use "success" vs "finish" vs "discard" `completion_type` to determine + # how to analyze each job + return unless next_job.finished? + end + + attrs = {} + + if job_class.present? + job_klass = job_class.constantize + active_job = job_klass.perform_later(self) + attrs[:job] = Job.find_by(active_job_id: active_job.job_id) + end + + update!({ finished_at: Time.zone.now }.merge(attrs)) + end + + private + + def reset_changed_at + if changed_at.blank? && last_changed_at.present? + update_columns(last_changed_at: Time.zone.now) # wait another hour before we check again + else + update_columns(changed_at: nil) # clear out changed_at so we ignore this until the next job finishes + end + end + end +end diff --git a/db/migrate/20240131013203_create_solid_queue_batch_table.rb b/db/migrate/20240131013203_create_solid_queue_batch_table.rb new file mode 100644 index 00000000..bf8d97ce --- /dev/null +++ b/db/migrate/20240131013203_create_solid_queue_batch_table.rb @@ -0,0 +1,21 @@ +class CreateSolidQueueBatchTable < ActiveRecord::Migration[7.1] + def change + create_table :solid_queue_job_batches do |t| + t.references :job, index: { unique: true } + t.string :job_class + t.string :completion_type + t.datetime :finished_at + t.datetime :changed_at + t.datetime :last_changed_at + t.timestamps + + t.index [ :finished_at ] + t.index [ :changed_at ] + t.index [ :last_changed_at ] + end + + add_reference :solid_queue_jobs, :batch, index: true + add_foreign_key :solid_queue_jobs, :solid_queue_job_batches, column: :batch_id, on_delete: :cascade + add_foreign_key :solid_queue_job_batches, :solid_queue_jobs, column: :job_id + end +end diff --git a/lib/active_job/job_batch_id.rb b/lib/active_job/job_batch_id.rb new file mode 100644 index 00000000..5810d152 --- /dev/null +++ b/lib/active_job/job_batch_id.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +# Inspired by active_job/core.rb docs +# https://github.com/rails/rails/blob/1c2529b9a6ba5a1eff58be0d0373d7d9d401015b/activejob/lib/active_job/core.rb#L136 +module ActiveJob + module JobBatchId + extend ActiveSupport::Concern + + included do + attr_accessor :batch_id + end + + def serialize + super.merge('batch_id' => batch_id) + end + + def deserialize(job_data) + super + self.batch_id = job_data['batch_id'] + end + end +end diff --git a/lib/solid_queue.rb b/lib/solid_queue.rb index e0d51c8c..1277ea67 100644 --- a/lib/solid_queue.rb +++ b/lib/solid_queue.rb @@ -5,6 +5,7 @@ require "active_job" require "active_job/queue_adapters" +require "active_job/job_batch_id" require "active_support" require "active_support/core_ext/numeric/time" diff --git a/lib/solid_queue/dispatcher.rb b/lib/solid_queue/dispatcher.rb index 1583e1dd..5bcbe0e8 100644 --- a/lib/solid_queue/dispatcher.rb +++ b/lib/solid_queue/dispatcher.rb @@ -37,6 +37,7 @@ def poll def dispatch_next_batch with_polling_volume do ScheduledExecution.dispatch_next_batch(batch_size) + SolidQueue::JobBatch.dispatch_finished_batches end end diff --git a/lib/solid_queue/engine.rb b/lib/solid_queue/engine.rb index d10997c7..452ae445 100644 --- a/lib/solid_queue/engine.rb +++ b/lib/solid_queue/engine.rb @@ -35,6 +35,7 @@ class Engine < ::Rails::Engine initializer "solid_queue.active_job.extensions" do ActiveSupport.on_load :active_job do include ActiveJob::ConcurrencyControls + include ActiveJob::JobBatchId end end end diff --git a/test/dummy/app/jobs/batch_completion_job.rb b/test/dummy/app/jobs/batch_completion_job.rb new file mode 100644 index 00000000..0fb17284 --- /dev/null +++ b/test/dummy/app/jobs/batch_completion_job.rb @@ -0,0 +1,7 @@ +class BatchCompletionJob < ApplicationJob + queue_as :background + + def perform(batch) + Rails.logger.info "#{batch.jobs.size} jobs completed!" + end +end diff --git a/test/dummy/app/jobs/sleepy_job.rb b/test/dummy/app/jobs/sleepy_job.rb new file mode 100644 index 00000000..dd105cdc --- /dev/null +++ b/test/dummy/app/jobs/sleepy_job.rb @@ -0,0 +1,10 @@ +class SleepyJob < ApplicationJob + queue_as :background + + retry_on Exception, wait: 30.seconds, attempts: 5 + + def perform(seconds_to_sleep) + Rails.logger.info "Feeling #{seconds_to_sleep} seconds sleepy..." + sleep seconds_to_sleep + end +end diff --git a/test/models/solid_queue/job_batch_test.rb b/test/models/solid_queue/job_batch_test.rb new file mode 100644 index 00000000..962904e8 --- /dev/null +++ b/test/models/solid_queue/job_batch_test.rb @@ -0,0 +1,48 @@ +require "test_helper" + +class SolidQueue::JobBatchTest < ActiveSupport::TestCase + self.use_transactional_tests = false + + teardown do + SolidQueue::Job.destroy_all + SolidQueue::JobBatch.destroy_all + end + + class NiceJob < ApplicationJob + retry_on Exception, wait: 1.second + + def perform(arg) + Rails.logger.info "Hi #{arg}!" + end + end + + test "batch will be completed on success" do + batch = SolidQueue::JobBatch.enqueue(on_finish: BatchCompletionJob) {} + assert_equal "success", batch.completion_type + assert_equal BatchCompletionJob.name, batch.job_class + end + + test "batch will be completed on finish" do + batch = SolidQueue::JobBatch.enqueue(on_success: BatchCompletionJob) {} + assert_equal "success", batch.completion_type + assert_equal BatchCompletionJob.name, batch.job_class + end + + test "sets the batch_id on jobs created inside of the enqueue block" do + batch = SolidQueue::JobBatch.enqueue(on_finish: BatchCompletionJob) do + NiceJob.perform_later("world") + NiceJob.perform_later("people") + end + + assert_equal 2, SolidQueue::Job.count + assert_equal [batch.id] * 2, SolidQueue::Job.last(2).map(&:batch_id) + end + + test "batch id is present inside the block" do + assert_nil SolidQueue::JobBatch.current_batch_id + SolidQueue::JobBatch.enqueue(on_finish: BatchCompletionJob) do + assert_not_nil SolidQueue::JobBatch.current_batch_id + end + assert_nil SolidQueue::JobBatch.current_batch_id + end +end From bc56edd9d984f027b9bebacf43b14d814519907c Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Mon, 5 Feb 2024 17:17:31 -0500 Subject: [PATCH 02/38] Use ActiveSupport::IsolatedExecutionState to honor user isolation level setting --- app/models/solid_queue/job_batch.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/models/solid_queue/job_batch.rb b/app/models/solid_queue/job_batch.rb index 58bcee21..fb281fbb 100644 --- a/app/models/solid_queue/job_batch.rb +++ b/app/models/solid_queue/job_batch.rb @@ -11,7 +11,7 @@ class JobBatch < Record class << self def current_batch_id - Thread.current[:current_batch_id] + ActiveSupport::IsolatedExecutionState[:current_batch_id] end def enqueue(attributes = {}) @@ -20,13 +20,13 @@ def enqueue(attributes = {}) job_batch = nil transaction do job_batch = create!(batch_attributes(attributes)) - Thread.current[:current_batch_id] = job_batch.id + ActiveSupport::IsolatedExecutionState[:current_batch_id] = job_batch.id yield end job_batch ensure - Thread.current[:current_batch_id] = previous_batch_id + ActiveSupport::IsolatedExecutionState[:current_batch_id] = previous_batch_id end def dispatch_finished_batches From 504042b32e87ad0ccc4cfd0a8eac3bdf2c42a22b Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Mon, 5 Feb 2024 17:18:03 -0500 Subject: [PATCH 03/38] Ability to retrieve batch from a job --- lib/active_job/job_batch_id.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/active_job/job_batch_id.rb b/lib/active_job/job_batch_id.rb index 5810d152..fc6978a6 100644 --- a/lib/active_job/job_batch_id.rb +++ b/lib/active_job/job_batch_id.rb @@ -18,5 +18,9 @@ def deserialize(job_data) super self.batch_id = job_data['batch_id'] end + + def batch + @batch ||= SolidQueue::JobBatch.find_by(id: batch_id) + end end end From d72d42e14431a7a727dd3f82471e054ccb0edbd9 Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Wed, 7 Feb 2024 22:06:21 -0500 Subject: [PATCH 04/38] Allow batch jobs to be instances * This means we can store the arguments and settings by letting the user do `BatchJob.new(arguments).set(options)` * Yield the batch in `enqueue` in case someone needs info from it * When you serialize then deserialize an activejob instance, the arguments are in the serialized_arguments field and can only be transferred over by the private method `deserialize_arguments_if_needed`. This is pretty janky, so there is probably something i'm missing * `perform_all_later` let's us do a perform_later even with instance, which does not seem to be possible on the instances themselves * Make sure `batch` is still first arg of the batch callback * Add spec for adding arguments and options to the batch callback --- app/models/solid_queue/job_batch.rb | 39 +++++++++++-------- ...31013203_create_solid_queue_batch_table.rb | 4 +- test/models/solid_queue/job_batch_test.rb | 26 +++++++++++-- 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/app/models/solid_queue/job_batch.rb b/app/models/solid_queue/job_batch.rb index fb281fbb..a5099731 100644 --- a/app/models/solid_queue/job_batch.rb +++ b/app/models/solid_queue/job_batch.rb @@ -5,6 +5,9 @@ class JobBatch < Record belongs_to :job, foreign_key: :job_id, optional: true has_many :jobs, foreign_key: :batch_id + serialize :on_finish_active_job, coder: JSON + serialize :on_success_active_job, coder: JSON + scope :incomplete, -> { where(finished_at: nil).where("changed_at IS NOT NULL OR last_changed_at < ?", 1.hour.ago) } @@ -21,7 +24,7 @@ def enqueue(attributes = {}) transaction do job_batch = create!(batch_attributes(attributes)) ActiveSupport::IsolatedExecutionState[:current_batch_id] = job_batch.id - yield + yield job_batch end job_batch @@ -40,20 +43,22 @@ def dispatch_finished_batches private def batch_attributes(attributes) - attributes = case attributes - in { on_finish: on_finish_klass } - attributes.merge( - job_class: on_finish_klass, - completion_type: "success" - ) - in { on_success: on_success_klass } - attributes.merge( - job_class: on_success_klass, - completion_type: "success" - ) + on_finish_klass = attributes.delete(:on_finish) + on_success_klass = attributes.delete(:on_success) + + if on_finish_klass.present? + attributes[:on_finish_active_job] = as_active_job(on_finish_klass).serialize + end + + if on_success_klass.present? + attributes[:on_success_active_job] = as_active_job(on_success_klass).serialize end - attributes.except(:on_finish, :on_success) + attributes + end + + def as_active_job(active_job_klass) + active_job_klass.is_a?(ActiveJob::Base) ? active_job_klass : active_job_klass.new end end @@ -74,9 +79,11 @@ def finish attrs = {} - if job_class.present? - job_klass = job_class.constantize - active_job = job_klass.perform_later(self) + if on_finish_active_job.present? + active_job = ActiveJob::Base.deserialize(on_finish_active_job) + active_job.send(:deserialize_arguments_if_needed) + active_job.arguments = [self] + Array.wrap(active_job.arguments) + ActiveJob.perform_all_later([active_job]) attrs[:job] = Job.find_by(active_job_id: active_job.job_id) end diff --git a/db/migrate/20240131013203_create_solid_queue_batch_table.rb b/db/migrate/20240131013203_create_solid_queue_batch_table.rb index bf8d97ce..26540b9c 100644 --- a/db/migrate/20240131013203_create_solid_queue_batch_table.rb +++ b/db/migrate/20240131013203_create_solid_queue_batch_table.rb @@ -2,8 +2,8 @@ class CreateSolidQueueBatchTable < ActiveRecord::Migration[7.1] def change create_table :solid_queue_job_batches do |t| t.references :job, index: { unique: true } - t.string :job_class - t.string :completion_type + t.string :on_finish_active_job + t.string :on_success_active_job t.datetime :finished_at t.datetime :changed_at t.datetime :last_changed_at diff --git a/test/models/solid_queue/job_batch_test.rb b/test/models/solid_queue/job_batch_test.rb index 962904e8..30684caf 100644 --- a/test/models/solid_queue/job_batch_test.rb +++ b/test/models/solid_queue/job_batch_test.rb @@ -8,6 +8,12 @@ class SolidQueue::JobBatchTest < ActiveSupport::TestCase SolidQueue::JobBatch.destroy_all end + class BatchWithArgumentsJob < ApplicationJob + def perform(batch, arg1, arg2) + Rails.logger.info "Hi #{batch.id}, #{arg1}, #{arg2}!" + end + end + class NiceJob < ApplicationJob retry_on Exception, wait: 1.second @@ -18,14 +24,14 @@ def perform(arg) test "batch will be completed on success" do batch = SolidQueue::JobBatch.enqueue(on_finish: BatchCompletionJob) {} - assert_equal "success", batch.completion_type - assert_equal BatchCompletionJob.name, batch.job_class + assert_not_nil batch.on_finish_active_job + assert_equal BatchCompletionJob.name, batch.on_finish_active_job["job_class"] end test "batch will be completed on finish" do batch = SolidQueue::JobBatch.enqueue(on_success: BatchCompletionJob) {} - assert_equal "success", batch.completion_type - assert_equal BatchCompletionJob.name, batch.job_class + assert_not_nil batch.on_success_active_job + assert_equal BatchCompletionJob.name, batch.on_success_active_job["job_class"] end test "sets the batch_id on jobs created inside of the enqueue block" do @@ -45,4 +51,16 @@ def perform(arg) end assert_nil SolidQueue::JobBatch.current_batch_id end + + test "allow arguments and options for callbacks" do + SolidQueue::JobBatch.enqueue( + on_finish: BatchWithArgumentsJob.new(1, 2).set(queue: :batch), + ) do + NiceJob.perform_later("world") + end + + assert_not_nil SolidQueue::JobBatch.last.on_finish_active_job["arguments"] + assert_equal SolidQueue::JobBatch.last.on_finish_active_job["arguments"], [1, 2] + assert_equal SolidQueue::JobBatch.last.on_finish_active_job["queue_name"], "batch" + end end From 6ceca41709c12ca7d20d0f4374961827d1c04532 Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Fri, 22 Mar 2024 20:22:25 -0400 Subject: [PATCH 05/38] Use text so the jobs store properly on mysql * Support Ruby < 3.2 by removing the implicit key/variable syntax --- app/models/solid_queue/job_batch.rb | 6 +++--- .../20240131013203_create_solid_queue_batch_table.rb | 4 ++-- lib/active_job/job_batch_id.rb | 4 ++-- test/models/solid_queue/job_batch_test.rb | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/app/models/solid_queue/job_batch.rb b/app/models/solid_queue/job_batch.rb index a5099731..3d0de10d 100644 --- a/app/models/solid_queue/job_batch.rb +++ b/app/models/solid_queue/job_batch.rb @@ -35,7 +35,7 @@ def enqueue(attributes = {}) def dispatch_finished_batches incomplete.order(:id).pluck(:id).each do |id| transaction do - where(id:).non_blocking_lock.each(&:finish) + where(id: id).non_blocking_lock.each(&:finish) end end end @@ -82,8 +82,8 @@ def finish if on_finish_active_job.present? active_job = ActiveJob::Base.deserialize(on_finish_active_job) active_job.send(:deserialize_arguments_if_needed) - active_job.arguments = [self] + Array.wrap(active_job.arguments) - ActiveJob.perform_all_later([active_job]) + active_job.arguments = [ self ] + Array.wrap(active_job.arguments) + ActiveJob.perform_all_later([ active_job ]) attrs[:job] = Job.find_by(active_job_id: active_job.job_id) end diff --git a/db/migrate/20240131013203_create_solid_queue_batch_table.rb b/db/migrate/20240131013203_create_solid_queue_batch_table.rb index 26540b9c..8e9e79af 100644 --- a/db/migrate/20240131013203_create_solid_queue_batch_table.rb +++ b/db/migrate/20240131013203_create_solid_queue_batch_table.rb @@ -2,8 +2,8 @@ class CreateSolidQueueBatchTable < ActiveRecord::Migration[7.1] def change create_table :solid_queue_job_batches do |t| t.references :job, index: { unique: true } - t.string :on_finish_active_job - t.string :on_success_active_job + t.text :on_finish_active_job + t.text :on_success_active_job t.datetime :finished_at t.datetime :changed_at t.datetime :last_changed_at diff --git a/lib/active_job/job_batch_id.rb b/lib/active_job/job_batch_id.rb index fc6978a6..494e197f 100644 --- a/lib/active_job/job_batch_id.rb +++ b/lib/active_job/job_batch_id.rb @@ -11,12 +11,12 @@ module JobBatchId end def serialize - super.merge('batch_id' => batch_id) + super.merge("batch_id" => batch_id) end def deserialize(job_data) super - self.batch_id = job_data['batch_id'] + self.batch_id = job_data["batch_id"] end def batch diff --git a/test/models/solid_queue/job_batch_test.rb b/test/models/solid_queue/job_batch_test.rb index 30684caf..e49f59c2 100644 --- a/test/models/solid_queue/job_batch_test.rb +++ b/test/models/solid_queue/job_batch_test.rb @@ -23,13 +23,13 @@ def perform(arg) end test "batch will be completed on success" do - batch = SolidQueue::JobBatch.enqueue(on_finish: BatchCompletionJob) {} + batch = SolidQueue::JobBatch.enqueue(on_finish: BatchCompletionJob) { } assert_not_nil batch.on_finish_active_job assert_equal BatchCompletionJob.name, batch.on_finish_active_job["job_class"] end test "batch will be completed on finish" do - batch = SolidQueue::JobBatch.enqueue(on_success: BatchCompletionJob) {} + batch = SolidQueue::JobBatch.enqueue(on_success: BatchCompletionJob) { } assert_not_nil batch.on_success_active_job assert_equal BatchCompletionJob.name, batch.on_success_active_job["job_class"] end @@ -41,7 +41,7 @@ def perform(arg) end assert_equal 2, SolidQueue::Job.count - assert_equal [batch.id] * 2, SolidQueue::Job.last(2).map(&:batch_id) + assert_equal [ batch.id ] * 2, SolidQueue::Job.last(2).map(&:batch_id) end test "batch id is present inside the block" do @@ -60,7 +60,7 @@ def perform(arg) end assert_not_nil SolidQueue::JobBatch.last.on_finish_active_job["arguments"] - assert_equal SolidQueue::JobBatch.last.on_finish_active_job["arguments"], [1, 2] + assert_equal SolidQueue::JobBatch.last.on_finish_active_job["arguments"], [ 1, 2 ] assert_equal SolidQueue::JobBatch.last.on_finish_active_job["queue_name"], "batch" end end From 953bb3243640dbaea71308cc7102d9543d67abe0 Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Mon, 23 Sep 2024 22:50:29 -0400 Subject: [PATCH 06/38] Handle on_failure and on_success * on_failure fires the first time any of the jobs fail, even once * on_success only fires if all jobs work (after retries) * remove unused job_id --- app/models/solid_queue/job_batch.rb | 53 ++++++++++++++----- ...31013203_create_solid_queue_batch_table.rb | 3 +- test/test_helpers/jobs_test_helper.rb | 8 +++ 3 files changed, 50 insertions(+), 14 deletions(-) diff --git a/app/models/solid_queue/job_batch.rb b/app/models/solid_queue/job_batch.rb index 3d0de10d..1bac9139 100644 --- a/app/models/solid_queue/job_batch.rb +++ b/app/models/solid_queue/job_batch.rb @@ -7,10 +7,12 @@ class JobBatch < Record serialize :on_finish_active_job, coder: JSON serialize :on_success_active_job, coder: JSON + serialize :on_failure_active_job, coder: JSON scope :incomplete, -> { where(finished_at: nil).where("changed_at IS NOT NULL OR last_changed_at < ?", 1.hour.ago) } + scope :finished, -> { where.not(finished_at: nil) } class << self def current_batch_id @@ -45,6 +47,7 @@ def dispatch_finished_batches def batch_attributes(attributes) on_finish_klass = attributes.delete(:on_finish) on_success_klass = attributes.delete(:on_success) + on_failure_klass = attributes.delete(:on_failure) if on_finish_klass.present? attributes[:on_finish_active_job] = as_active_job(on_finish_klass).serialize @@ -54,6 +57,10 @@ def batch_attributes(attributes) attributes[:on_success_active_job] = as_active_job(on_success_klass).serialize end + if on_failure_klass.present? + attributes[:on_failure_active_job] = as_active_job(on_failure_klass).serialize + end + attributes end @@ -69,22 +76,29 @@ def finished? def finish return if finished? reset_changed_at - jobs.find_each do |next_job| - # FIXME: If it's failed but is going to retry, how do we know? - # Because we need to know if we will determine what the failed execution means - # FIXME: use "success" vs "finish" vs "discard" `completion_type` to determine - # how to analyze each job - return unless next_job.finished? - end + all_jobs_succeeded = true attrs = {} + jobs.find_each do |next_job| + # SolidQueue does treats `discard_on` differently than failures. The job will report as being :finished, + # and there is no record of the failure. + # GoodJob would report a discard as an error. It's possible we should do that in the future? + if fire_failure_job?(next_job) + perform_completion_job(:on_failure_active_job, attrs) + update!(attrs) + end + + status = next_job.status + all_jobs_succeeded = all_jobs_succeeded && status != :failed + return unless status.in?([ :finished, :failed ]) + end if on_finish_active_job.present? - active_job = ActiveJob::Base.deserialize(on_finish_active_job) - active_job.send(:deserialize_arguments_if_needed) - active_job.arguments = [ self ] + Array.wrap(active_job.arguments) - ActiveJob.perform_all_later([ active_job ]) - attrs[:job] = Job.find_by(active_job_id: active_job.job_id) + perform_completion_job(:on_finish_active_job, attrs) + end + + if on_success_active_job.present? && all_jobs_succeeded + perform_completion_job(:on_success_active_job, attrs) end update!({ finished_at: Time.zone.now }.merge(attrs)) @@ -92,6 +106,21 @@ def finish private + def fire_failure_job?(job) + return false if on_failure_active_job.blank? || job.failed_execution.blank? + job = ActiveJob::Base.deserialize(on_failure_active_job) + job.provider_job_id.blank? + end + + def perform_completion_job(job_field, attrs) + active_job = ActiveJob::Base.deserialize(send(job_field)) + active_job.send(:deserialize_arguments_if_needed) + active_job.arguments = [ self ] + Array.wrap(active_job.arguments) + ActiveJob.perform_all_later([ active_job ]) + active_job.provider_job_id = Job.find_by(active_job_id: active_job.job_id).id + attrs[job_field] = active_job.serialize + end + def reset_changed_at if changed_at.blank? && last_changed_at.present? update_columns(last_changed_at: Time.zone.now) # wait another hour before we check again diff --git a/db/migrate/20240131013203_create_solid_queue_batch_table.rb b/db/migrate/20240131013203_create_solid_queue_batch_table.rb index 8e9e79af..f97faee5 100644 --- a/db/migrate/20240131013203_create_solid_queue_batch_table.rb +++ b/db/migrate/20240131013203_create_solid_queue_batch_table.rb @@ -1,9 +1,9 @@ class CreateSolidQueueBatchTable < ActiveRecord::Migration[7.1] def change create_table :solid_queue_job_batches do |t| - t.references :job, index: { unique: true } t.text :on_finish_active_job t.text :on_success_active_job + t.text :on_failure_active_job t.datetime :finished_at t.datetime :changed_at t.datetime :last_changed_at @@ -16,6 +16,5 @@ def change add_reference :solid_queue_jobs, :batch, index: true add_foreign_key :solid_queue_jobs, :solid_queue_job_batches, column: :batch_id, on_delete: :cascade - add_foreign_key :solid_queue_job_batches, :solid_queue_jobs, column: :job_id end end diff --git a/test/test_helpers/jobs_test_helper.rb b/test/test_helpers/jobs_test_helper.rb index 8b71e7f6..b000a65d 100644 --- a/test/test_helpers/jobs_test_helper.rb +++ b/test/test_helpers/jobs_test_helper.rb @@ -17,6 +17,14 @@ def wait_for_jobs_to_be_released_for(timeout = 1.second) end end + def wait_for_job_batches_to_finish_for(timeout = 1.second) + wait_while_with_timeout(timeout) do + skip_active_record_query_cache do + SolidQueue::JobBatch.where(finished_at: nil).any? + end + end + end + def assert_unfinished_jobs(*jobs) skip_active_record_query_cache do assert_equal jobs.map(&:job_id).sort, SolidQueue::Job.where(finished_at: nil).map(&:active_job_id).sort From bd16f4adc6b01c65f426a3b08a8bb97fbf9eba0b Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Mon, 23 Sep 2024 22:56:52 -0400 Subject: [PATCH 07/38] Allow enqueueing into a batch instance * Allows enqueueing a job within a job, as part of the batch --- app/models/solid_queue/job_batch.rb | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/app/models/solid_queue/job_batch.rb b/app/models/solid_queue/job_batch.rb index 1bac9139..1173c01f 100644 --- a/app/models/solid_queue/job_batch.rb +++ b/app/models/solid_queue/job_batch.rb @@ -69,6 +69,20 @@ def as_active_job(active_job_klass) end end + # Instance-level enqueue + def enqueue(attributes = {}) + previous_batch_id = self.class.current_batch_id.presence || nil + + transaction do + ActiveSupport::IsolatedExecutionState[:current_batch_id] = id + yield self + end + + self + ensure + ActiveSupport::IsolatedExecutionState[:current_batch_id] = previous_batch_id + end + def finished? finished_at.present? end From 2998d74ee734722955ef5ba10b50dcc9ac338446 Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Mon, 23 Sep 2024 22:57:56 -0400 Subject: [PATCH 08/38] Block enqueueing if the batch is finished --- app/models/solid_queue/job_batch.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/models/solid_queue/job_batch.rb b/app/models/solid_queue/job_batch.rb index 1173c01f..1a94da03 100644 --- a/app/models/solid_queue/job_batch.rb +++ b/app/models/solid_queue/job_batch.rb @@ -71,6 +71,8 @@ def as_active_job(active_job_klass) # Instance-level enqueue def enqueue(attributes = {}) + raise "You cannot enqueue a batch that is already finished" if finished? + previous_batch_id = self.class.current_batch_id.presence || nil transaction do From 7c60234da6a110deeaaf04904370bef76bd568f8 Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Mon, 23 Sep 2024 23:03:47 -0400 Subject: [PATCH 09/38] Migration to allow nesting batches * Support nested batches * Parent batches will not complete until all child batches have been completed --- app/models/solid_queue/job_batch.rb | 45 ++++++++++++------- ...31013203_create_solid_queue_batch_table.rb | 1 + 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/app/models/solid_queue/job_batch.rb b/app/models/solid_queue/job_batch.rb index 1a94da03..eb41f7de 100644 --- a/app/models/solid_queue/job_batch.rb +++ b/app/models/solid_queue/job_batch.rb @@ -3,7 +3,9 @@ module SolidQueue class JobBatch < Record belongs_to :job, foreign_key: :job_id, optional: true + belongs_to :parent_job_batch, foreign_key: :parent_job_batch_id, class_name: "SolidQueue::JobBatch", optional: true has_many :jobs, foreign_key: :batch_id + has_many :children, foreign_key: :parent_job_batch_id, class_name: "SolidQueue::JobBatch" serialize :on_finish_active_job, coder: JSON serialize :on_success_active_job, coder: JSON @@ -20,28 +22,33 @@ def current_batch_id end def enqueue(attributes = {}) - previous_batch_id = current_batch_id.presence || nil - job_batch = nil transaction do job_batch = create!(batch_attributes(attributes)) - ActiveSupport::IsolatedExecutionState[:current_batch_id] = job_batch.id - yield job_batch + wrap_in_batch_context(job_batch.id) do + yield job_batch + end end job_batch - ensure - ActiveSupport::IsolatedExecutionState[:current_batch_id] = previous_batch_id end def dispatch_finished_batches incomplete.order(:id).pluck(:id).each do |id| transaction do - where(id: id).non_blocking_lock.each(&:finish) + where(id: id).includes(:children, :jobs).non_blocking_lock.each(&:finish) end end end + def wrap_in_batch_context(batch_id) + previous_batch_id = current_batch_id.presence || nil + ActiveSupport::IsolatedExecutionState[:current_batch_id] = batch_id + yield + ensure + ActiveSupport::IsolatedExecutionState[:current_batch_id] = previous_batch_id + end + private def batch_attributes(attributes) @@ -61,6 +68,8 @@ def batch_attributes(attributes) attributes[:on_failure_active_job] = as_active_job(on_failure_klass).serialize end + attributes[:parent_job_batch_id] = current_batch_id if current_batch_id.present? + attributes end @@ -73,16 +82,13 @@ def as_active_job(active_job_klass) def enqueue(attributes = {}) raise "You cannot enqueue a batch that is already finished" if finished? - previous_batch_id = self.class.current_batch_id.presence || nil - transaction do - ActiveSupport::IsolatedExecutionState[:current_batch_id] = id - yield self + self.class.wrap_in_batch_context(id) do + yield self + end end self - ensure - ActiveSupport::IsolatedExecutionState[:current_batch_id] = previous_batch_id end def finished? @@ -109,6 +115,10 @@ def finish return unless status.in?([ :finished, :failed ]) end + children.find_each do |child| + return unless child.finished? + end + if on_finish_active_job.present? perform_completion_job(:on_finish_active_job, attrs) end @@ -117,7 +127,10 @@ def finish perform_completion_job(:on_success_active_job, attrs) end - update!({ finished_at: Time.zone.now }.merge(attrs)) + transaction do + parent_job_batch.touch(:changed_at, :last_changed_at) if parent_job_batch_id.present? + update!({ finished_at: Time.zone.now }.merge(attrs)) + end end private @@ -132,7 +145,9 @@ def perform_completion_job(job_field, attrs) active_job = ActiveJob::Base.deserialize(send(job_field)) active_job.send(:deserialize_arguments_if_needed) active_job.arguments = [ self ] + Array.wrap(active_job.arguments) - ActiveJob.perform_all_later([ active_job ]) + self.class.wrap_in_batch_context(id) do + ActiveJob.perform_all_later([ active_job ]) + end active_job.provider_job_id = Job.find_by(active_job_id: active_job.job_id).id attrs[job_field] = active_job.serialize end diff --git a/db/migrate/20240131013203_create_solid_queue_batch_table.rb b/db/migrate/20240131013203_create_solid_queue_batch_table.rb index f97faee5..91b76ee8 100644 --- a/db/migrate/20240131013203_create_solid_queue_batch_table.rb +++ b/db/migrate/20240131013203_create_solid_queue_batch_table.rb @@ -1,6 +1,7 @@ class CreateSolidQueueBatchTable < ActiveRecord::Migration[7.1] def change create_table :solid_queue_job_batches do |t| + t.references :parent_job_batch, index: true # FIXME: foreign key t.text :on_finish_active_job t.text :on_success_active_job t.text :on_failure_active_job From fc319c9217a810388aea70d00cbde875ea0a2daa Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Wed, 25 Sep 2024 21:51:35 -0400 Subject: [PATCH 10/38] Expanded batch readme --- README.md | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8e7fae66..d8665650 100644 --- a/README.md +++ b/README.md @@ -588,12 +588,62 @@ class ApplicationMailer < ActionMailer::Base ## Batch jobs +SolidQueue offers support for batching jobs. This allows you to track progress of a set of jobs, +and optionally trigger callbacks based on their status. It supports the following: + +- Relating jobs to a batch, to track their status +- Three available callbacks to fire: + - `on_finish`: Fired when all jobs have finished, including retries. Fires even when some jobs have failed. + - `on_success`: Fired when all jobs have succeeded, including retries. Will not fire if any jobs have failed, but will fire if jobs have been discarded using `discard_on` + - `on_failure`: Fired the _first_ time a job fails, after all retries are exhausted. +- If a job is part of a batch, it can enqueue more jobs for that batch using `batch#enqueue` +- Batches can be nested within other batches, creating a hierarchy. Outer batches will not finish until all nested batches have finished. + ```rb -SolidQueue::JobBatch.enqueue(on_finish: BatchCompletionJob) do - 5.times.map { |i| SleepyJob.perform_later(i) } +class SleepyJob < ApplicationJob + def perform(seconds_to_sleep) + Rails.logger.info "Feeling #{seconds_to_sleep} seconds sleepy..." + sleep seconds_to_sleep + end +end + +class MultiStepJob < ApplicationJob + def perform + batch.enqueue do + SleepyJob.perform_later(5) + # Because of this nested batch, the top-level batch won't finish until the inner, + # 10 second job finishes + # Both jobs will still run simultaneously + SolidQueue::JobBatch.enqueue do + SleepyJob.perform_later(10) + end + end + end +end + +class BatchFinishJob < ApplicationJob + def perform(batch) # batch is always the default first argument + Rails.logger.info "Good job finishing all jobs" + end +end + +class BatchSuccessJob < ApplicationJob + def perform(batch) # batch is always the default first argument + Rails.logger.info "Good job finishing all jobs, and all of them worked!" + end +end + +class BatchFailureJob < ApplicationJob + def perform(batch) # batch is always the default first argument + Rails.logger.info "At least one job failed, sorry!" + end end -SolidQueue::JobBatch.enqueue(on_success: BatchCompletionJob) do +SolidQueue::JobBatch.enqueue( + on_finish: BatchFinishJob, + on_success: BatchSuccessJob, + on_failure: BatchFailureJob +) do 5.times.map { |i| SleepyJob.perform_later(i) } end ``` From 871aef2d948690b73bc998528954dbddcf206363 Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Wed, 25 Sep 2024 22:05:28 -0400 Subject: [PATCH 11/38] Force an initial batch check --- app/models/solid_queue/job_batch.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/models/solid_queue/job_batch.rb b/app/models/solid_queue/job_batch.rb index eb41f7de..bd01475d 100644 --- a/app/models/solid_queue/job_batch.rb +++ b/app/models/solid_queue/job_batch.rb @@ -69,6 +69,9 @@ def batch_attributes(attributes) end attributes[:parent_job_batch_id] = current_batch_id if current_batch_id.present? + # Set it initially, so we check the batch even if there are no jobs + attributes[:changed_at] = Time.zone.now + attributes[:last_changed_at] = Time.zone.now attributes end From 2f05ba9f29c3684ac2e01059d1f202e8ec2a0249 Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Wed, 25 Sep 2024 22:27:27 -0400 Subject: [PATCH 12/38] Initial batch lifecycle tests * Attach success jobs to the parent batch, not to the current batch (which has already finished at this point) --- app/models/solid_queue/job_batch.rb | 2 +- test/integration/batch_lifecycle_test.rb | 83 ++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 test/integration/batch_lifecycle_test.rb diff --git a/app/models/solid_queue/job_batch.rb b/app/models/solid_queue/job_batch.rb index bd01475d..40e183b5 100644 --- a/app/models/solid_queue/job_batch.rb +++ b/app/models/solid_queue/job_batch.rb @@ -148,7 +148,7 @@ def perform_completion_job(job_field, attrs) active_job = ActiveJob::Base.deserialize(send(job_field)) active_job.send(:deserialize_arguments_if_needed) active_job.arguments = [ self ] + Array.wrap(active_job.arguments) - self.class.wrap_in_batch_context(id) do + self.class.wrap_in_batch_context(parent_job_batch_id || self.class.current_batch_id) do ActiveJob.perform_all_later([ active_job ]) end active_job.provider_job_id = Job.find_by(active_job_id: active_job.job_id).id diff --git a/test/integration/batch_lifecycle_test.rb b/test/integration/batch_lifecycle_test.rb new file mode 100644 index 00000000..22714315 --- /dev/null +++ b/test/integration/batch_lifecycle_test.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +require "test_helper" + +class BatchLifecycleTest < ActiveSupport::TestCase + setup do + @worker = SolidQueue::Worker.new(queues: "background", threads: 3) + @dispatcher = SolidQueue::Dispatcher.new(batch_size: 10, polling_interval: 0.2) + end + + teardown do + @worker.stop + @dispatcher.stop + + JobBuffer.clear + + SolidQueue::Job.destroy_all + SolidQueue::JobBatch.destroy_all + end + + class BatchOnSuccessJob < ApplicationJob + queue_as :background + + def perform(batch, custom_message = "") + JobBuffer.add "#{custom_message}: #{batch.jobs.size} jobs succeeded!" + end + end + + class AddsMoreJobsJob < ApplicationJob + queue_as :background + + def perform + batch.enqueue do + AddToBufferJob.perform_later "added from inside 1" + AddToBufferJob.perform_later "added from inside 2" + SolidQueue::JobBatch.enqueue do + AddToBufferJob.perform_later "added from inside 3" + end + end + end + end + + test "nested batches finish from the inside out" do + batch2 = batch3 = batch4 = nil + batch1 = SolidQueue::JobBatch.enqueue(on_success: BatchOnSuccessJob.new("3")) do + batch2 = SolidQueue::JobBatch.enqueue(on_success: BatchOnSuccessJob.new("2")) do + batch3 = SolidQueue::JobBatch.enqueue(on_success: BatchOnSuccessJob.new("1")) { } + batch4 = SolidQueue::JobBatch.enqueue(on_success: BatchOnSuccessJob.new("1.1")) { } + end + end + + @dispatcher.start + @worker.start + + wait_for_job_batches_to_finish_for(2.seconds) + wait_for_jobs_to_finish_for(2.seconds) + + assert_equal [ "1: 0 jobs succeeded!", "1.1: 0 jobs succeeded!", "2: 2 jobs succeeded!", "3: 1 jobs succeeded!" ], JobBuffer.values + assert_equal 4, SolidQueue::JobBatch.finished.count + assert_equal batch1.reload.finished_at > batch2.reload.finished_at, true + assert_equal batch2.finished_at > batch3.reload.finished_at, true + assert_equal batch2.finished_at > batch4.reload.finished_at, true + end + + test "all jobs are run, including jobs enqueued inside of other jobs" do + SolidQueue::JobBatch.enqueue do + AddToBufferJob.perform_later "hey" + SolidQueue::JobBatch.enqueue do + AddToBufferJob.perform_later "ho" + AddsMoreJobsJob.perform_later + end + end + + @dispatcher.start + @worker.start + + wait_for_job_batches_to_finish_for(2.seconds) + wait_for_jobs_to_finish_for(2.seconds) + + assert_equal [ "added from inside 1", "added from inside 2", "added from inside 3", "hey", "ho" ], JobBuffer.values.sort + assert_equal 3, SolidQueue::JobBatch.finished.count + end +end From 7274e93b3f516c94b24d6ece644fcf2a39b3698c Mon Sep 17 00:00:00 2001 From: JP Camara <48120+jpcamara@users.noreply.github.com> Date: Fri, 22 Nov 2024 17:51:49 -0500 Subject: [PATCH 13/38] Add job batches to queue_schema.rb as well --- test/dummy/db/queue_schema.rb | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/dummy/db/queue_schema.rb b/test/dummy/db/queue_schema.rb index 697c2e92..64de0e82 100644 --- a/test/dummy/db/queue_schema.rb +++ b/test/dummy/db/queue_schema.rb @@ -38,6 +38,22 @@ t.index ["job_id"], name: "index_solid_queue_failed_executions_on_job_id", unique: true end + create_table "solid_queue_job_batches", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| + t.bigint "parent_job_batch_id" + t.text "on_finish_active_job" + t.text "on_success_active_job" + t.text "on_failure_active_job" + t.datetime "finished_at" + t.datetime "changed_at" + t.datetime "last_changed_at" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["changed_at"], name: "index_solid_queue_job_batches_on_changed_at" + t.index ["finished_at"], name: "index_solid_queue_job_batches_on_finished_at" + t.index ["last_changed_at"], name: "index_solid_queue_job_batches_on_last_changed_at" + t.index ["parent_job_batch_id"], name: "index_solid_queue_job_batches_on_parent_job_batch_id" + end + create_table "solid_queue_jobs", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| t.string "queue_name", null: false t.string "class_name", null: false @@ -49,7 +65,9 @@ t.string "concurrency_key" t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.bigint "batch_id" t.index ["active_job_id"], name: "index_solid_queue_jobs_on_active_job_id" + t.index ["batch_id"], name: "index_solid_queue_jobs_on_batch_id" t.index ["class_name"], name: "index_solid_queue_jobs_on_class_name" t.index ["finished_at"], name: "index_solid_queue_jobs_on_finished_at" t.index ["queue_name", "finished_at"], name: "index_solid_queue_jobs_for_filtering" @@ -135,6 +153,7 @@ add_foreign_key "solid_queue_blocked_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "solid_queue_claimed_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "solid_queue_failed_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade + add_foreign_key "solid_queue_jobs", "solid_queue_job_batches", column: "batch_id", on_delete: :cascade add_foreign_key "solid_queue_ready_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "solid_queue_recurring_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "solid_queue_scheduled_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade From 3ad729f9940f0821e10f5f6936a7d2faed58d9f1 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Fri, 29 Aug 2025 01:35:47 -0400 Subject: [PATCH 14/38] Refactor internals and api namespace of batches * Thanks to Mikael Henriksson for his work in https://github.com/rails/solid_queue/pull/590. His work decentralizes management of batch status by moving it to the BatchUpdateJob, and tracking status using counts rather than querying specific job statuses after the fact. This is a much simpler approach to tracking the jobs, and allows us to avoid a constantly polling set of queries in the dispatcher. Also add in arbitrary metadata to allow tracking data from start to end of execution. This also means enqueueing a BatchUpdateJob based on callbacks in two different kinds of Batchable, which are included when a job is updated and finished, or when a FailedExecution is created (since failed jobs never "finish"). * This batch feature already took some inspiration from the GoodJob batch implementation (https://github.com/bensheldon/good_job). But now we also increase that by adopting some of the buffering and abstractions in a similar form as GoodJob. To discourage heavy reliance on the JobBatch model, it has been renamed to BatchRecord, and a separate Batch interface is how you interact with batches, with some delegation to the core model. * A new Buffer class (also modeled after GoodJob) was added specifically for batches. This was primarily added to support enqueue_after_transaction_commit. We now override the ActiveJob #enqueue method so we can keep track of which jobs are attempting to enqueue. When enqueue_after_transaction_commit is on, those jobs do not enqueue until all transactions commit. By tracking them at the high level enqueue and keeping a buffer of jobs, we can ensure that the jobs get tracked even when their creation is deferred until the transaction is committed. The side benefit is that we get to enqueue all the jobs together, probably offering some performance advantage. This buffer also keeps track of child batches for the same reason. * To support triggering a callback/BatchUpdateJob when a job finishes, the update to finished_at needed to become an update! call * As a simplification, on_failure is now only fired after all jobs finish, rather than at the first time a job fails * The adapter logic itself also needed to be updated to support the buffer and enqueue_after_transaction_commit. If a job is coming from a batch enqueue, we ignore it here and allow the batching process to enqueue_all at the end of the enqueue block. If the job is originally from a batch, but is retrying, we make sure the job counts in the batch stay updated. I don't love this addition, since it adds alot of complication to the adapter code, all solely oriented around batches * Batches benefit from keeping jobs until the batch has finished. As such, we ignore the preserve jobs setting, but if it is set to false, we enqueue a cleanup job once the batch has finished and clear out finished jobs * Implement preserved jobs test and remove todo * Idempotent updates with pessismistic locks * Check if it finished before we acquired the lock * Use enqueue_all directly rather than passing through activejob for completion jobs Co-authored-by: Mikael Henriksson --- README.md | 12 +- app/jobs/solid_queue/batch_update_job.rb | 25 ++ app/models/solid_queue/batch_record.rb | 174 ++++++++++++ app/models/solid_queue/batch_record/buffer.rb | 47 ++++ app/models/solid_queue/claimed_execution.rb | 2 - app/models/solid_queue/execution/batchable.rb | 20 ++ app/models/solid_queue/failed_execution.rb | 2 +- app/models/solid_queue/job.rb | 5 +- app/models/solid_queue/job/batchable.rb | 25 ++ app/models/solid_queue/job/executable.rb | 4 +- app/models/solid_queue/job_batch.rb | 166 ------------ ...31013203_create_solid_queue_batch_table.rb | 21 -- lib/active_job/batch_record_id.rb | 47 ++++ lib/active_job/job_batch_id.rb | 26 -- .../queue_adapters/solid_queue_adapter.rb | 28 +- .../install/templates/db/queue_schema.rb | 23 ++ lib/solid_queue.rb | 2 +- lib/solid_queue/batch.rb | 142 ++++++++++ lib/solid_queue/batch/cleanup_job.rb | 17 ++ lib/solid_queue/batch/empty_job.rb | 14 + lib/solid_queue/dispatcher.rb | 1 - lib/solid_queue/engine.rb | 2 +- test/dummy/db/queue_schema.rb | 25 +- test/integration/batch_lifecycle_test.rb | 250 ++++++++++++++++-- test/models/solid_queue/batch_record_test.rb | 81 ++++++ test/models/solid_queue/job_batch_test.rb | 66 ----- test/test_helpers/jobs_test_helper.rb | 2 +- 27 files changed, 904 insertions(+), 325 deletions(-) create mode 100644 app/jobs/solid_queue/batch_update_job.rb create mode 100644 app/models/solid_queue/batch_record.rb create mode 100644 app/models/solid_queue/batch_record/buffer.rb create mode 100644 app/models/solid_queue/execution/batchable.rb create mode 100644 app/models/solid_queue/job/batchable.rb delete mode 100644 app/models/solid_queue/job_batch.rb delete mode 100644 db/migrate/20240131013203_create_solid_queue_batch_table.rb create mode 100644 lib/active_job/batch_record_id.rb delete mode 100644 lib/active_job/job_batch_id.rb create mode 100644 lib/solid_queue/batch.rb create mode 100644 lib/solid_queue/batch/cleanup_job.rb create mode 100644 lib/solid_queue/batch/empty_job.rb create mode 100644 test/models/solid_queue/batch_record_test.rb delete mode 100644 test/models/solid_queue/job_batch_test.rb diff --git a/README.md b/README.md index d8665650..ed1bfdc7 100644 --- a/README.md +++ b/README.md @@ -595,9 +595,10 @@ and optionally trigger callbacks based on their status. It supports the followin - Three available callbacks to fire: - `on_finish`: Fired when all jobs have finished, including retries. Fires even when some jobs have failed. - `on_success`: Fired when all jobs have succeeded, including retries. Will not fire if any jobs have failed, but will fire if jobs have been discarded using `discard_on` - - `on_failure`: Fired the _first_ time a job fails, after all retries are exhausted. + - `on_failure`: Fired when all jobs have finished, including retries. Will only fire if one or more jobs have failed. - If a job is part of a batch, it can enqueue more jobs for that batch using `batch#enqueue` -- Batches can be nested within other batches, creating a hierarchy. Outer batches will not finish until all nested batches have finished. +- Batches can be nested within other batches, creating a hierarchy. Outer batches will not fire callbacks until all nested jobs have finished. +- Attaching arbitrary metadata to a batch ```rb class SleepyJob < ApplicationJob @@ -614,7 +615,7 @@ class MultiStepJob < ApplicationJob # Because of this nested batch, the top-level batch won't finish until the inner, # 10 second job finishes # Both jobs will still run simultaneously - SolidQueue::JobBatch.enqueue do + SolidQueue::Batch.enqueue do SleepyJob.perform_later(10) end end @@ -639,10 +640,11 @@ class BatchFailureJob < ApplicationJob end end -SolidQueue::JobBatch.enqueue( +SolidQueue::Batch.enqueue( on_finish: BatchFinishJob, on_success: BatchSuccessJob, - on_failure: BatchFailureJob + on_failure: BatchFailureJob, + metadata: { user_id: 123 } ) do 5.times.map { |i| SleepyJob.perform_later(i) } end diff --git a/app/jobs/solid_queue/batch_update_job.rb b/app/jobs/solid_queue/batch_update_job.rb new file mode 100644 index 00000000..a8c41bb4 --- /dev/null +++ b/app/jobs/solid_queue/batch_update_job.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module SolidQueue + class BatchUpdateJob < ActiveJob::Base + class UpdateFailure < RuntimeError; end + + queue_as :background + + discard_on ActiveRecord::RecordNotFound + + def perform(batch_id, job) + batch = SolidQueue::BatchRecord.find_by!(batch_id: batch_id) + + return if job.batch_id != batch_id + + status = job.status + return unless status.in?([ :finished, :failed ]) + + batch.job_finished!(job) + rescue => e + Rails.logger.error "[SolidQueue] BatchUpdateJob failed for batch #{batch_id}, job #{job.id}: #{e.message}" + raise + end + end +end diff --git a/app/models/solid_queue/batch_record.rb b/app/models/solid_queue/batch_record.rb new file mode 100644 index 00000000..32893e86 --- /dev/null +++ b/app/models/solid_queue/batch_record.rb @@ -0,0 +1,174 @@ +# frozen_string_literal: true + +module SolidQueue + class BatchRecord < Record + self.table_name = "solid_queue_job_batches" + + STATUSES = %w[pending processing completed failed] + + belongs_to :parent_job_batch, foreign_key: :parent_job_batch_id, class_name: "SolidQueue::BatchRecord", optional: true + has_many :jobs, foreign_key: :batch_id, primary_key: :batch_id + has_many :children, foreign_key: :parent_job_batch_id, primary_key: :batch_id, class_name: "SolidQueue::BatchRecord" + + serialize :on_finish, coder: JSON + serialize :on_success, coder: JSON + serialize :on_failure, coder: JSON + serialize :metadata, coder: JSON + + validates :status, inclusion: { in: STATUSES } + + scope :pending, -> { where(status: "pending") } + scope :processing, -> { where(status: "processing") } + scope :completed, -> { where(status: "completed") } + scope :failed, -> { where(status: "failed") } + scope :finished, -> { where(status: %w[completed failed]) } + scope :unfinished, -> { where(status: %w[pending processing]) } + + after_initialize :set_batch_id + before_create :set_parent_job_batch_id + + def on_success=(value) + super(serialize_callback(value)) + end + + def on_failure=(value) + super(serialize_callback(value)) + end + + def on_finish=(value) + super(serialize_callback(value)) + end + + def job_finished!(job) + return if finished? + return if job.batch_processed_at? + + job.with_lock do + if job.batch_processed_at.blank? + job.update!(batch_processed_at: Time.current) + + if job.failed_execution.present? + self.class.where(id: id).update_all( + "failed_jobs = failed_jobs + 1, pending_jobs = pending_jobs - 1" + ) + else + self.class.where(id: id).update_all( + "completed_jobs = completed_jobs + 1, pending_jobs = pending_jobs - 1" + ) + end + end + end + + reload + check_completion! + end + + def check_completion! + return if finished? + + actual_children = children.count + return if actual_children < expected_children + + children.find_each do |child| + return unless child.finished? + end + + with_lock do + if finished? + # do nothing + elsif pending_jobs <= 0 + if failed_jobs > 0 + mark_as_failed! + else + mark_as_completed! + end + clear_unpreserved_jobs + elsif status == "pending" + update!(status: "processing") + end + end + end + + def finished? + status.in?(%w[completed failed]) + end + + def processing? + status == "processing" + end + + def pending? + status == "pending" + end + + def progress_percentage + return 0 if total_jobs == 0 + ((completed_jobs + failed_jobs) * 100.0 / total_jobs).round(2) + end + + private + + def set_parent_job_batch_id + self.parent_job_batch_id ||= Batch.current_batch_id if Batch.current_batch_id.present? + end + + def set_batch_id + self.batch_id ||= SecureRandom.uuid + end + + def as_active_job(active_job_klass) + active_job_klass.is_a?(ActiveJob::Base) ? active_job_klass : active_job_klass.new + end + + def serialize_callback(value) + return value if value.blank? + active_job = as_active_job(value) + # We can pick up batch ids from context, but callbacks should never be considered a part of the batch + active_job.batch_id = nil + active_job.serialize + end + + def perform_completion_job(job_field, attrs) + active_job = ActiveJob::Base.deserialize(send(job_field)) + active_job.send(:deserialize_arguments_if_needed) + active_job.arguments = [ Batch.new(_batch_record: self) ] + Array.wrap(active_job.arguments) + SolidQueue::Job.enqueue_all([ active_job ]) + + active_job.provider_job_id = Job.find_by(active_job_id: active_job.job_id).id + attrs[job_field] = active_job.serialize + end + + def mark_as_completed! + # SolidQueue does treats `discard_on` differently than failures. The job will report as being :finished, + # and there is no record of the failure. + # GoodJob would report a discard as an error. It's possible we should do that in the future? + update!(status: "completed", finished_at: Time.current) + + perform_completion_job(:on_success, {}) if on_success.present? + perform_completion_job(:on_finish, {}) if on_finish.present? + + if parent_job_batch_id.present? + parent = BatchRecord.find_by(batch_id: parent_job_batch_id) + parent&.reload&.check_completion! + end + end + + def mark_as_failed! + update!(status: "failed", finished_at: Time.current) + perform_completion_job(:on_failure, {}) if on_failure.present? + perform_completion_job(:on_finish, {}) if on_finish.present? + + # Check if parent batch can now complete + if parent_job_batch_id.present? + parent = BatchRecord.find_by(batch_id: parent_job_batch_id) + parent&.check_completion! + end + end + + def clear_unpreserved_jobs + SolidQueue::Batch::CleanupJob.perform_later(self) unless SolidQueue.preserve_finished_jobs? + end + end +end + +require_relative "batch_record/buffer" diff --git a/app/models/solid_queue/batch_record/buffer.rb b/app/models/solid_queue/batch_record/buffer.rb new file mode 100644 index 00000000..982593be --- /dev/null +++ b/app/models/solid_queue/batch_record/buffer.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module SolidQueue + class BatchRecord + class Buffer + attr_reader :jobs, :child_batches + + def initialize + @jobs = {} + @child_batches = [] + end + + def add(job) + @jobs[job.job_id] = job + job + end + + def add_child_batch(batch) + @child_batches << batch + batch + end + + def capture + previous_buffer = ActiveSupport::IsolatedExecutionState[:solid_queue_batch_buffer] + ActiveSupport::IsolatedExecutionState[:solid_queue_batch_buffer] = self + + yield + + @jobs + ensure + ActiveSupport::IsolatedExecutionState[:solid_queue_batch_buffer] = previous_buffer + end + + def self.current + ActiveSupport::IsolatedExecutionState[:solid_queue_batch_buffer] + end + + def self.capture_job(job) + current&.add(job) + end + + def self.capture_child_batch(batch) + current&.add_child_batch(batch) + end + end + end +end diff --git a/app/models/solid_queue/claimed_execution.rb b/app/models/solid_queue/claimed_execution.rb index d777a8f2..5d0a4057 100644 --- a/app/models/solid_queue/claimed_execution.rb +++ b/app/models/solid_queue/claimed_execution.rb @@ -71,8 +71,6 @@ def perform failed_with(result.error) raise result.error end - - job.job_batch.touch(:changed_at, :last_changed_at) if job.batch_id.present? ensure unblock_next_job end diff --git a/app/models/solid_queue/execution/batchable.rb b/app/models/solid_queue/execution/batchable.rb new file mode 100644 index 00000000..bc1cd7a2 --- /dev/null +++ b/app/models/solid_queue/execution/batchable.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module SolidQueue + class Execution + module Batchable + extend ActiveSupport::Concern + + included do + after_create :update_batch_progress, if: -> { job.batch_id? } + end + + private + def update_batch_progress + BatchUpdateJob.perform_later(job.batch_id, job) + rescue => e + Rails.logger.error "[SolidQueue] Failed to notify batch #{batch_id} about job #{id} completion: #{e.message}" + end + end + end +end diff --git a/app/models/solid_queue/failed_execution.rb b/app/models/solid_queue/failed_execution.rb index 8bcdc92f..50066bcc 100644 --- a/app/models/solid_queue/failed_execution.rb +++ b/app/models/solid_queue/failed_execution.rb @@ -2,7 +2,7 @@ module SolidQueue class FailedExecution < Execution - include Dispatching + include Dispatching, Batchable serialize :error, coder: JSON diff --git a/app/models/solid_queue/job.rb b/app/models/solid_queue/job.rb index 80582ee1..6cb59e12 100644 --- a/app/models/solid_queue/job.rb +++ b/app/models/solid_queue/job.rb @@ -4,12 +4,10 @@ module SolidQueue class Job < Record class EnqueueError < StandardError; end - include Executable, Clearable, Recurrable + include Executable, Clearable, Recurrable, Batchable serialize :arguments, coder: JSON - belongs_to :job_batch, foreign_key: :batch_id, optional: true - class << self def enqueue_all(active_jobs) active_jobs.each { |job| job.scheduled_at ||= Time.current } @@ -57,7 +55,6 @@ def create_all_from_active_jobs(active_jobs) end def attributes_from_active_job(active_job) - active_job.batch_id = JobBatch.current_batch_id || active_job.batch_id { queue_name: active_job.queue_name || DEFAULT_QUEUE_NAME, active_job_id: active_job.job_id, diff --git a/app/models/solid_queue/job/batchable.rb b/app/models/solid_queue/job/batchable.rb new file mode 100644 index 00000000..7d33bcee --- /dev/null +++ b/app/models/solid_queue/job/batchable.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module SolidQueue + class Job + module Batchable + extend ActiveSupport::Concern + + included do + belongs_to :job_batch, foreign_key: :batch_id, optional: true + + after_update :update_batch_progress, if: :batch_id? + end + + private + def update_batch_progress + return unless saved_change_to_finished_at? && finished_at.present? + return unless batch_id.present? + + BatchUpdateJob.perform_later(batch_id, self) + rescue => e + Rails.logger.error "[SolidQueue] Failed to notify batch #{batch_id} about job #{id} completion: #{e.message}" + end + end + end +end diff --git a/app/models/solid_queue/job/executable.rb b/app/models/solid_queue/job/executable.rb index 08f07bb0..31d21a00 100644 --- a/app/models/solid_queue/job/executable.rb +++ b/app/models/solid_queue/job/executable.rb @@ -76,8 +76,8 @@ def dispatch_bypassing_concurrency_limits end def finished! - if SolidQueue.preserve_finished_jobs? || batch_id.present? - touch(:finished_at) + if SolidQueue.preserve_finished_jobs? || batch_id.present? # We clear jobs after the batch finishes + update!(finished_at: Time.current) else destroy! end diff --git a/app/models/solid_queue/job_batch.rb b/app/models/solid_queue/job_batch.rb deleted file mode 100644 index 40e183b5..00000000 --- a/app/models/solid_queue/job_batch.rb +++ /dev/null @@ -1,166 +0,0 @@ -# frozen_string_literal: true - -module SolidQueue - class JobBatch < Record - belongs_to :job, foreign_key: :job_id, optional: true - belongs_to :parent_job_batch, foreign_key: :parent_job_batch_id, class_name: "SolidQueue::JobBatch", optional: true - has_many :jobs, foreign_key: :batch_id - has_many :children, foreign_key: :parent_job_batch_id, class_name: "SolidQueue::JobBatch" - - serialize :on_finish_active_job, coder: JSON - serialize :on_success_active_job, coder: JSON - serialize :on_failure_active_job, coder: JSON - - scope :incomplete, -> { - where(finished_at: nil).where("changed_at IS NOT NULL OR last_changed_at < ?", 1.hour.ago) - } - scope :finished, -> { where.not(finished_at: nil) } - - class << self - def current_batch_id - ActiveSupport::IsolatedExecutionState[:current_batch_id] - end - - def enqueue(attributes = {}) - job_batch = nil - transaction do - job_batch = create!(batch_attributes(attributes)) - wrap_in_batch_context(job_batch.id) do - yield job_batch - end - end - - job_batch - end - - def dispatch_finished_batches - incomplete.order(:id).pluck(:id).each do |id| - transaction do - where(id: id).includes(:children, :jobs).non_blocking_lock.each(&:finish) - end - end - end - - def wrap_in_batch_context(batch_id) - previous_batch_id = current_batch_id.presence || nil - ActiveSupport::IsolatedExecutionState[:current_batch_id] = batch_id - yield - ensure - ActiveSupport::IsolatedExecutionState[:current_batch_id] = previous_batch_id - end - - private - - def batch_attributes(attributes) - on_finish_klass = attributes.delete(:on_finish) - on_success_klass = attributes.delete(:on_success) - on_failure_klass = attributes.delete(:on_failure) - - if on_finish_klass.present? - attributes[:on_finish_active_job] = as_active_job(on_finish_klass).serialize - end - - if on_success_klass.present? - attributes[:on_success_active_job] = as_active_job(on_success_klass).serialize - end - - if on_failure_klass.present? - attributes[:on_failure_active_job] = as_active_job(on_failure_klass).serialize - end - - attributes[:parent_job_batch_id] = current_batch_id if current_batch_id.present? - # Set it initially, so we check the batch even if there are no jobs - attributes[:changed_at] = Time.zone.now - attributes[:last_changed_at] = Time.zone.now - - attributes - end - - def as_active_job(active_job_klass) - active_job_klass.is_a?(ActiveJob::Base) ? active_job_klass : active_job_klass.new - end - end - - # Instance-level enqueue - def enqueue(attributes = {}) - raise "You cannot enqueue a batch that is already finished" if finished? - - transaction do - self.class.wrap_in_batch_context(id) do - yield self - end - end - - self - end - - def finished? - finished_at.present? - end - - def finish - return if finished? - reset_changed_at - - all_jobs_succeeded = true - attrs = {} - jobs.find_each do |next_job| - # SolidQueue does treats `discard_on` differently than failures. The job will report as being :finished, - # and there is no record of the failure. - # GoodJob would report a discard as an error. It's possible we should do that in the future? - if fire_failure_job?(next_job) - perform_completion_job(:on_failure_active_job, attrs) - update!(attrs) - end - - status = next_job.status - all_jobs_succeeded = all_jobs_succeeded && status != :failed - return unless status.in?([ :finished, :failed ]) - end - - children.find_each do |child| - return unless child.finished? - end - - if on_finish_active_job.present? - perform_completion_job(:on_finish_active_job, attrs) - end - - if on_success_active_job.present? && all_jobs_succeeded - perform_completion_job(:on_success_active_job, attrs) - end - - transaction do - parent_job_batch.touch(:changed_at, :last_changed_at) if parent_job_batch_id.present? - update!({ finished_at: Time.zone.now }.merge(attrs)) - end - end - - private - - def fire_failure_job?(job) - return false if on_failure_active_job.blank? || job.failed_execution.blank? - job = ActiveJob::Base.deserialize(on_failure_active_job) - job.provider_job_id.blank? - end - - def perform_completion_job(job_field, attrs) - active_job = ActiveJob::Base.deserialize(send(job_field)) - active_job.send(:deserialize_arguments_if_needed) - active_job.arguments = [ self ] + Array.wrap(active_job.arguments) - self.class.wrap_in_batch_context(parent_job_batch_id || self.class.current_batch_id) do - ActiveJob.perform_all_later([ active_job ]) - end - active_job.provider_job_id = Job.find_by(active_job_id: active_job.job_id).id - attrs[job_field] = active_job.serialize - end - - def reset_changed_at - if changed_at.blank? && last_changed_at.present? - update_columns(last_changed_at: Time.zone.now) # wait another hour before we check again - else - update_columns(changed_at: nil) # clear out changed_at so we ignore this until the next job finishes - end - end - end -end diff --git a/db/migrate/20240131013203_create_solid_queue_batch_table.rb b/db/migrate/20240131013203_create_solid_queue_batch_table.rb deleted file mode 100644 index 91b76ee8..00000000 --- a/db/migrate/20240131013203_create_solid_queue_batch_table.rb +++ /dev/null @@ -1,21 +0,0 @@ -class CreateSolidQueueBatchTable < ActiveRecord::Migration[7.1] - def change - create_table :solid_queue_job_batches do |t| - t.references :parent_job_batch, index: true # FIXME: foreign key - t.text :on_finish_active_job - t.text :on_success_active_job - t.text :on_failure_active_job - t.datetime :finished_at - t.datetime :changed_at - t.datetime :last_changed_at - t.timestamps - - t.index [ :finished_at ] - t.index [ :changed_at ] - t.index [ :last_changed_at ] - end - - add_reference :solid_queue_jobs, :batch, index: true - add_foreign_key :solid_queue_jobs, :solid_queue_job_batches, column: :batch_id, on_delete: :cascade - end -end diff --git a/lib/active_job/batch_record_id.rb b/lib/active_job/batch_record_id.rb new file mode 100644 index 00000000..7f8491af --- /dev/null +++ b/lib/active_job/batch_record_id.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +# Inspired by active_job/core.rb docs +# https://github.com/rails/rails/blob/1c2529b9a6ba5a1eff58be0d0373d7d9d401015b/activejob/lib/active_job/core.rb#L136 +module ActiveJob + module BatchRecordId + extend ActiveSupport::Concern + + included do + attr_accessor :batch_id + end + + def initialize(*arguments, **kwargs) + super + self.batch_id = SolidQueue::Batch.current_batch_id if solid_queue_job? + end + + def enqueue(options = {}) + super.tap do |job| + if solid_queue_job? + SolidQueue::BatchRecord::Buffer.capture_job(self) + end + end + end + + def serialize + super.merge("batch_id" => batch_id) + end + + def deserialize(job_data) + super + self.batch_id = job_data["batch_id"] + end + + def batch + @batch ||= SolidQueue::Batch.new( + _batch_record: SolidQueue::BatchRecord.find_by(batch_id: batch_id) + ) + end + + private + + def solid_queue_job? + self.class.queue_adapter_name == "solid_queue" + end + end +end diff --git a/lib/active_job/job_batch_id.rb b/lib/active_job/job_batch_id.rb deleted file mode 100644 index 494e197f..00000000 --- a/lib/active_job/job_batch_id.rb +++ /dev/null @@ -1,26 +0,0 @@ -# frozen_string_literal: true - -# Inspired by active_job/core.rb docs -# https://github.com/rails/rails/blob/1c2529b9a6ba5a1eff58be0d0373d7d9d401015b/activejob/lib/active_job/core.rb#L136 -module ActiveJob - module JobBatchId - extend ActiveSupport::Concern - - included do - attr_accessor :batch_id - end - - def serialize - super.merge("batch_id" => batch_id) - end - - def deserialize(job_data) - super - self.batch_id = job_data["batch_id"] - end - - def batch - @batch ||= SolidQueue::JobBatch.find_by(id: batch_id) - end - end -end diff --git a/lib/active_job/queue_adapters/solid_queue_adapter.rb b/lib/active_job/queue_adapters/solid_queue_adapter.rb index fe556042..04de658d 100644 --- a/lib/active_job/queue_adapters/solid_queue_adapter.rb +++ b/lib/active_job/queue_adapters/solid_queue_adapter.rb @@ -16,16 +16,40 @@ def enqueue_after_transaction_commit? end def enqueue(active_job) # :nodoc: - SolidQueue::Job.enqueue(active_job) + return if in_batch?(active_job) + + SolidQueue::Job.enqueue(active_job).tap do |enqueued_job| + increment_job_count(active_job, enqueued_job) + end end def enqueue_at(active_job, timestamp) # :nodoc: - SolidQueue::Job.enqueue(active_job, scheduled_at: Time.at(timestamp)) + return if in_batch?(active_job) + + SolidQueue::Job.enqueue(active_job, scheduled_at: Time.at(timestamp)).tap do |enqueued_job| + increment_job_count(active_job, enqueued_job) + end end def enqueue_all(active_jobs) # :nodoc: SolidQueue::Job.enqueue_all(active_jobs) end + + private + + def in_batch?(active_job) + active_job.batch_id.present? && active_job.executions <= 0 + end + + def in_batch_retry?(active_job) + active_job.batch_id.present? && active_job.executions > 0 + end + + def increment_job_count(active_job, enqueued_job) + if enqueued_job.persisted? && in_batch_retry?(active_job) + SolidQueue::Batch.update_job_count(active_job.batch_id, 1) + end + end end end end diff --git a/lib/generators/solid_queue/install/templates/db/queue_schema.rb b/lib/generators/solid_queue/install/templates/db/queue_schema.rb index 85194b6a..f9fd033f 100644 --- a/lib/generators/solid_queue/install/templates/db/queue_schema.rb +++ b/lib/generators/solid_queue/install/templates/db/queue_schema.rb @@ -26,6 +26,26 @@ t.index [ "job_id" ], name: "index_solid_queue_failed_executions_on_job_id", unique: true end + create_table "solid_queue_job_batches", force: :cascade do |t| + t.string "batch_id", null: false + t.string "parent_job_batch_id" + t.text "on_finish" + t.text "on_success" + t.text "on_failure" + t.text "metadata" + t.integer "total_jobs", default: 0, null: false + t.integer "pending_jobs", default: 0, null: false + t.integer "completed_jobs", default: 0, null: false + t.integer "failed_jobs", default: 0, null: false + t.integer "expected_children", default: 0, null: false + t.string "status", default: "pending", null: false + t.datetime "finished_at" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index [ "finished_at" ], name: "index_solid_queue_job_batches_on_finished_at" + t.index [ "parent_job_batch_id" ], name: "index_solid_queue_job_batches_on_parent_job_batch_id" + end + create_table "solid_queue_jobs", force: :cascade do |t| t.string "queue_name", null: false t.string "class_name", null: false @@ -37,7 +57,10 @@ t.string "concurrency_key" t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.string "batch_id" + t.datetime "batch_processed_at" t.index [ "active_job_id" ], name: "index_solid_queue_jobs_on_active_job_id" + t.index [ "batch_id" ], name: "index_solid_queue_jobs_on_batch_id" t.index [ "class_name" ], name: "index_solid_queue_jobs_on_class_name" t.index [ "finished_at" ], name: "index_solid_queue_jobs_on_finished_at" t.index [ "queue_name", "finished_at" ], name: "index_solid_queue_jobs_for_filtering" diff --git a/lib/solid_queue.rb b/lib/solid_queue.rb index 1277ea67..f4d3a92c 100644 --- a/lib/solid_queue.rb +++ b/lib/solid_queue.rb @@ -5,7 +5,7 @@ require "active_job" require "active_job/queue_adapters" -require "active_job/job_batch_id" +require "active_job/batch_record_id" require "active_support" require "active_support/core_ext/numeric/time" diff --git a/lib/solid_queue/batch.rb b/lib/solid_queue/batch.rb new file mode 100644 index 00000000..0904df0a --- /dev/null +++ b/lib/solid_queue/batch.rb @@ -0,0 +1,142 @@ +# frozen_string_literal: true + +require_relative "batch/empty_job" +require_relative "batch/cleanup_job" + +module SolidQueue + class Batch + include GlobalID::Identification + + delegate :completed_jobs, :failed_jobs, :pending_jobs, :total_jobs, :progress_percentage, + :finished?, :processing?, :pending?, :status, :batch_id, + :metadata, :metadata=, + :on_success, :on_success=, + :on_failure, :on_failure=, + :on_finish, :on_finish=, + :reload, + to: :batch_record + + def initialize(_batch_record: nil) + @batch_record = _batch_record || BatchRecord.new + end + + def batch_record + @batch_record + end + + def id + batch_id + end + + def enqueue(&block) + raise "You cannot enqueue a batch that is already finished" if finished? + + SolidQueue::BatchRecord::Buffer.capture_child_batch(self) if batch_record.new_record? + + buffer = SolidQueue::BatchRecord::Buffer.new + buffer.capture do + Batch.wrap_in_batch_context(batch_id) do + block.call(self) + end + end + + if enqueue_after_transaction_commit? + ActiveRecord.after_all_transactions_commit do + enqueue_batch(buffer) + end + else + enqueue_batch(buffer) + end + end + + private + + def enqueue_after_transaction_commit? + return false unless defined?(ApplicationJob.enqueue_after_transaction_commit) + + case ApplicationJob.enqueue_after_transaction_commit + when :always, true + true + when :never, false + false + when :default + true + end + end + + def enqueue_batch(buffer) + if batch_record.new_record? + enqueue_new_batch(buffer) + else + jobs = buffer.jobs.values + enqueue_existing_batch(jobs) + end + end + + def enqueue_new_batch(buffer) + SolidQueue::BatchRecord.transaction do + batch_record.save! + + # If batch has no jobs, enqueue an EmptyJob + # This ensures callbacks always execute, even for empty batches + jobs = buffer.jobs.values + if jobs.empty? + empty_job = SolidQueue::Batch::EmptyJob.new + empty_job.batch_id = batch_record.batch_id + jobs = [ empty_job ] + end + + batch_record.update!( + total_jobs: jobs.size, + pending_jobs: SolidQueue::Job.enqueue_all(jobs), + expected_children: buffer.child_batches.size + ) + end + end + + def enqueue_existing_batch(active_jobs) + jobs = Array.wrap(active_jobs) + enqueued_count = SolidQueue::Job.enqueue_all(jobs) + + Batch.update_job_count(batch_id, enqueued_count) + end + + class << self + def enqueue(on_success: nil, on_failure: nil, on_finish: nil, metadata: nil, &block) + new.tap do |batch| + batch.batch_record.assign_attributes( + on_success: on_success, + on_failure: on_failure, + on_finish: on_finish, + metadata: metadata, + parent_job_batch_id: current_batch_id + ) + + batch.enqueue(&block) + end + end + + def find(batch_id) + new(_batch_record: BatchRecord.find_by!(batch_id: batch_id)) + end + + def update_job_count(batch_id, count) + BatchRecord.where(batch_id: batch_id).update_all( + "total_jobs = total_jobs + #{count}, pending_jobs = pending_jobs + #{count}" + ) + end + + def current_batch_id + ActiveSupport::IsolatedExecutionState[:current_batch_id] + end + + def wrap_in_batch_context(batch_id) + previous_batch_id = current_batch_id.presence || nil + ActiveSupport::IsolatedExecutionState[:current_batch_id] = batch_id + yield + ensure + ActiveSupport::IsolatedExecutionState[:current_batch_id] = previous_batch_id + end + end + end +end diff --git a/lib/solid_queue/batch/cleanup_job.rb b/lib/solid_queue/batch/cleanup_job.rb new file mode 100644 index 00000000..eb381908 --- /dev/null +++ b/lib/solid_queue/batch/cleanup_job.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module SolidQueue + class Batch + class CleanupJob < ApplicationJob + queue_as :background + + discard_on ActiveRecord::RecordNotFound + + def perform(job_batch) + return if SolidQueue.preserve_finished_jobs? + + job_batch.jobs.finished.destroy_all + end + end + end +end diff --git a/lib/solid_queue/batch/empty_job.rb b/lib/solid_queue/batch/empty_job.rb new file mode 100644 index 00000000..f457eabe --- /dev/null +++ b/lib/solid_queue/batch/empty_job.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module SolidQueue + class Batch + class EmptyJob < ApplicationJob + queue_as :background + + def perform + # This job does nothing - it just exists to trigger batch completion + # The batch completion will be handled by the normal job_finished! flow + end + end + end +end diff --git a/lib/solid_queue/dispatcher.rb b/lib/solid_queue/dispatcher.rb index 5bcbe0e8..1583e1dd 100644 --- a/lib/solid_queue/dispatcher.rb +++ b/lib/solid_queue/dispatcher.rb @@ -37,7 +37,6 @@ def poll def dispatch_next_batch with_polling_volume do ScheduledExecution.dispatch_next_batch(batch_size) - SolidQueue::JobBatch.dispatch_finished_batches end end diff --git a/lib/solid_queue/engine.rb b/lib/solid_queue/engine.rb index 452ae445..f7e059cb 100644 --- a/lib/solid_queue/engine.rb +++ b/lib/solid_queue/engine.rb @@ -35,7 +35,7 @@ class Engine < ::Rails::Engine initializer "solid_queue.active_job.extensions" do ActiveSupport.on_load :active_job do include ActiveJob::ConcurrencyControls - include ActiveJob::JobBatchId + include ActiveJob::BatchRecordId end end end diff --git a/test/dummy/db/queue_schema.rb b/test/dummy/db/queue_schema.rb index 64de0e82..1201a59f 100644 --- a/test/dummy/db/queue_schema.rb +++ b/test/dummy/db/queue_schema.rb @@ -39,18 +39,23 @@ end create_table "solid_queue_job_batches", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| - t.bigint "parent_job_batch_id" - t.text "on_finish_active_job" - t.text "on_success_active_job" - t.text "on_failure_active_job" + t.string "batch_id" + t.string "parent_job_batch_id" + t.text "on_finish" + t.text "on_success" + t.text "on_failure" + t.text "metadata" + t.integer "total_jobs", default: 0, null: false + t.integer "pending_jobs", default: 0, null: false + t.integer "completed_jobs", default: 0, null: false + t.integer "failed_jobs", default: 0, null: false + t.integer "expected_children", default: 0, null: false + t.string "status", default: "pending", null: false t.datetime "finished_at" - t.datetime "changed_at" - t.datetime "last_changed_at" t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.index ["changed_at"], name: "index_solid_queue_job_batches_on_changed_at" + t.index ["batch_id"], name: "index_solid_queue_job_batches_on_batch_id", unique: true t.index ["finished_at"], name: "index_solid_queue_job_batches_on_finished_at" - t.index ["last_changed_at"], name: "index_solid_queue_job_batches_on_last_changed_at" t.index ["parent_job_batch_id"], name: "index_solid_queue_job_batches_on_parent_job_batch_id" end @@ -65,7 +70,8 @@ t.string "concurrency_key" t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.bigint "batch_id" + t.string "batch_id" + t.datetime "batch_processed_at" t.index ["active_job_id"], name: "index_solid_queue_jobs_on_active_job_id" t.index ["batch_id"], name: "index_solid_queue_jobs_on_batch_id" t.index ["class_name"], name: "index_solid_queue_jobs_on_class_name" @@ -153,7 +159,6 @@ add_foreign_key "solid_queue_blocked_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "solid_queue_claimed_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "solid_queue_failed_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade - add_foreign_key "solid_queue_jobs", "solid_queue_job_batches", column: "batch_id", on_delete: :cascade add_foreign_key "solid_queue_ready_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "solid_queue_recurring_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "solid_queue_scheduled_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade diff --git a/test/integration/batch_lifecycle_test.rb b/test/integration/batch_lifecycle_test.rb index 22714315..3ad455f8 100644 --- a/test/integration/batch_lifecycle_test.rb +++ b/test/integration/batch_lifecycle_test.rb @@ -3,26 +3,70 @@ require "test_helper" class BatchLifecycleTest < ActiveSupport::TestCase + FailingJobError = Class.new(RuntimeError) + + def assert_finished_in_order(*batches) + job_batches = batches.map { |batch| SolidQueue::BatchRecord.find_by(batch_id: batch.batch_id) } + + job_batches.each_cons(2) do |batch1, batch2| + assert_equal batch1.reload.finished_at < batch2.reload.finished_at, true + end + end + setup do + @_on_thread_error = SolidQueue.on_thread_error + SolidQueue.on_thread_error = silent_on_thread_error_for([ FailingJobError ], @_on_thread_error) @worker = SolidQueue::Worker.new(queues: "background", threads: 3) @dispatcher = SolidQueue::Dispatcher.new(batch_size: 10, polling_interval: 0.2) end teardown do + SolidQueue.on_thread_error = @_on_thread_error @worker.stop @dispatcher.stop JobBuffer.clear SolidQueue::Job.destroy_all - SolidQueue::JobBatch.destroy_all + SolidQueue::BatchRecord.destroy_all + + ApplicationJob.enqueue_after_transaction_commit = false if defined?(ApplicationJob.enqueue_after_transaction_commit) + SolidQueue.preserve_finished_jobs = true end class BatchOnSuccessJob < ApplicationJob queue_as :background def perform(batch, custom_message = "") - JobBuffer.add "#{custom_message}: #{batch.jobs.size} jobs succeeded!" + JobBuffer.add "#{custom_message}: #{batch.completed_jobs} jobs succeeded!" + end + end + + class BatchOnFailureJob < ApplicationJob + queue_as :background + + def perform(batch, custom_message = "") + JobBuffer.add "#{custom_message}: #{batch.failed_jobs} jobs failed!" + end + end + + class FailingJob < ApplicationJob + queue_as :background + + retry_on FailingJobError, attempts: 3, wait: 0.1.seconds + + def perform + raise FailingJobError, "Failing job" + end + end + + class DiscardingJob < ApplicationJob + queue_as :background + + discard_on FailingJobError + + def perform + raise FailingJobError, "Failing job" end end @@ -33,7 +77,7 @@ def perform batch.enqueue do AddToBufferJob.perform_later "added from inside 1" AddToBufferJob.perform_later "added from inside 2" - SolidQueue::JobBatch.enqueue do + SolidQueue::Batch.enqueue do AddToBufferJob.perform_later "added from inside 3" end end @@ -42,10 +86,10 @@ def perform test "nested batches finish from the inside out" do batch2 = batch3 = batch4 = nil - batch1 = SolidQueue::JobBatch.enqueue(on_success: BatchOnSuccessJob.new("3")) do - batch2 = SolidQueue::JobBatch.enqueue(on_success: BatchOnSuccessJob.new("2")) do - batch3 = SolidQueue::JobBatch.enqueue(on_success: BatchOnSuccessJob.new("1")) { } - batch4 = SolidQueue::JobBatch.enqueue(on_success: BatchOnSuccessJob.new("1.1")) { } + batch1 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("3")) do + batch2 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("2")) do + batch3 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1")) { } + batch4 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1.1")) { } end end @@ -55,17 +99,18 @@ def perform wait_for_job_batches_to_finish_for(2.seconds) wait_for_jobs_to_finish_for(2.seconds) - assert_equal [ "1: 0 jobs succeeded!", "1.1: 0 jobs succeeded!", "2: 2 jobs succeeded!", "3: 1 jobs succeeded!" ], JobBuffer.values - assert_equal 4, SolidQueue::JobBatch.finished.count - assert_equal batch1.reload.finished_at > batch2.reload.finished_at, true - assert_equal batch2.finished_at > batch3.reload.finished_at, true - assert_equal batch2.finished_at > batch4.reload.finished_at, true + expected_values = [ "1: 1 jobs succeeded!", "1.1: 1 jobs succeeded!", "2: 1 jobs succeeded!", "3: 1 jobs succeeded!" ] + assert_equal expected_values.sort, JobBuffer.values.sort + assert_equal 4, SolidQueue::BatchRecord.finished.count + assert_finished_in_order(batch4, batch2, batch1) + assert_finished_in_order(batch3, batch2, batch1) end test "all jobs are run, including jobs enqueued inside of other jobs" do - SolidQueue::JobBatch.enqueue do + batch2 = nil + batch1 = SolidQueue::Batch.enqueue do AddToBufferJob.perform_later "hey" - SolidQueue::JobBatch.enqueue do + batch2 = SolidQueue::Batch.enqueue do AddToBufferJob.perform_later "ho" AddsMoreJobsJob.perform_later end @@ -75,9 +120,182 @@ def perform @worker.start wait_for_job_batches_to_finish_for(2.seconds) - wait_for_jobs_to_finish_for(2.seconds) assert_equal [ "added from inside 1", "added from inside 2", "added from inside 3", "hey", "ho" ], JobBuffer.values.sort - assert_equal 3, SolidQueue::JobBatch.finished.count + assert_equal 3, SolidQueue::BatchRecord.finished.count + assert_finished_in_order(batch2, batch1) + end + + test "when self.enqueue_after_transaction_commit = true" do + skip if Rails::VERSION::MAJOR == 7 && Rails::VERSION::MINOR == 1 + + ApplicationJob.enqueue_after_transaction_commit = true + batch1 = batch2 = batch3 = nil + JobResult.transaction do + JobResult.create!(queue_name: "default", status: "") + + batch1 = SolidQueue::Batch.enqueue do + AddToBufferJob.perform_later "hey" + JobResult.transaction(requires_new: true) do + JobResult.create!(queue_name: "default", status: "") + batch2 = SolidQueue::Batch.enqueue do + AddToBufferJob.perform_later "ho" + batch3 = SolidQueue::Batch.enqueue do + AddToBufferJob.perform_later "let's go" + end + end + end + end + end + + @dispatcher.start + @worker.start + + wait_for_job_batches_to_finish_for(2.seconds) + wait_for_jobs_to_finish_for(2.seconds) + + assert_equal 3, SolidQueue::BatchRecord.finished.count + assert_equal 6, SolidQueue::Job.finished.count + assert_equal 6, SolidQueue::Job.count + assert_finished_in_order(batch3, batch2, batch1) + end + + test "failed jobs fire properly" do + batch2 = nil + batch1 = SolidQueue::Batch.enqueue(on_failure: BatchOnFailureJob.new("0")) do + FailingJob.perform_later + batch2 = SolidQueue::Batch.enqueue(on_failure: BatchOnFailureJob.new("1")) do + FailingJob.perform_later + end + end + + @dispatcher.start + @worker.start + + wait_for_job_batches_to_finish_for(3.seconds) + wait_for_jobs_to_finish_for(3.seconds) + + job_batch1 = SolidQueue::BatchRecord.find_by(batch_id: batch1.batch_id) + job_batch2 = SolidQueue::BatchRecord.find_by(batch_id: batch2.batch_id) + + assert_equal 2, SolidQueue::BatchRecord.count + assert_equal 2, SolidQueue::BatchRecord.finished.count + + assert_equal 3, job_batch1.total_jobs + assert_equal 1, job_batch1.failed_jobs + assert_equal 2, job_batch1.completed_jobs + assert_equal 0, job_batch1.pending_jobs + + assert_equal 3, job_batch2.total_jobs + assert_equal 1, job_batch2.failed_jobs + assert_equal 2, job_batch2.completed_jobs + assert_equal 0, job_batch2.pending_jobs + + assert_equal [ "failed", "failed" ].sort, SolidQueue::BatchRecord.all.pluck(:status) + assert_equal [ "0: 1 jobs failed!", "1: 1 jobs failed!" ], JobBuffer.values.sort + assert_finished_in_order(batch2, batch1) + end + + test "discarded jobs fire properly" do + batch2 = nil + batch1 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("0")) do + DiscardingJob.perform_later + batch2 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1")) do + DiscardingJob.perform_later + end + end + + @dispatcher.start + @worker.start + + wait_for_job_batches_to_finish_for(3.seconds) + wait_for_jobs_to_finish_for(3.seconds) + + job_batch1 = SolidQueue::BatchRecord.find_by(batch_id: batch1.batch_id) + job_batch2 = SolidQueue::BatchRecord.find_by(batch_id: batch2.batch_id) + + assert_equal 2, SolidQueue::BatchRecord.count + assert_equal 2, SolidQueue::BatchRecord.finished.count + + assert_equal 1, job_batch1.total_jobs + assert_equal 0, job_batch1.failed_jobs + assert_equal 1, job_batch1.completed_jobs + assert_equal 0, job_batch1.pending_jobs + + assert_equal 1, job_batch2.total_jobs + assert_equal 0, job_batch2.failed_jobs + assert_equal 1, job_batch2.completed_jobs + assert_equal 0, job_batch2.pending_jobs + + assert_equal [ "completed", "completed" ].sort, SolidQueue::BatchRecord.all.pluck(:status) + assert_equal [ "0: 1 jobs succeeded!", "1: 1 jobs succeeded!" ], JobBuffer.values.sort + assert_finished_in_order(batch2, batch1) + end + + test "preserve_finished_jobs = false" do + SolidQueue.preserve_finished_jobs = false + batch1 = SolidQueue::Batch.enqueue do + AddToBufferJob.perform_later "hey" + end + + assert_equal false, batch1.reload.finished? + assert_equal 1, SolidQueue::Job.count + assert_equal 0, SolidQueue::Job.finished.count + + @dispatcher.start + @worker.start + + wait_for_job_batches_to_finish_for(2.seconds) + wait_for_jobs_to_finish_for(2.seconds) + + assert_equal true, batch1.reload.finished? + assert_equal 0, SolidQueue::Job.count + end + + test "batch interface" do + batch = SolidQueue::Batch.enqueue( + metadata: { source: "test", priority: "high", user_id: 123 }, + on_finish: OnFinishJob, + on_success: OnSuccessJob, + on_failure: OnFailureJob + ) do + AddToBufferJob.perform_later "hey" + end + + @dispatcher.start + @worker.start + + wait_for_job_batches_to_finish_for(2.seconds) + wait_for_jobs_to_finish_for(2.seconds) + + assert_equal [ "Hi finish #{batch.batch_id}!", "Hi success #{batch.batch_id}!", "hey" ].sort, JobBuffer.values.sort + assert_equal 1, batch.reload.completed_jobs + assert_equal 0, batch.failed_jobs + assert_equal 0, batch.pending_jobs + assert_equal 1, batch.total_jobs + end + + class OnFinishJob < ApplicationJob + queue_as :background + + def perform(batch) + JobBuffer.add "Hi finish #{batch.batch_id}!" + end + end + + class OnSuccessJob < ApplicationJob + queue_as :background + + def perform(batch) + JobBuffer.add "Hi success #{batch.batch_id}!" + end + end + + class OnFailureJob < ApplicationJob + queue_as :background + + def perform(batch) + JobBuffer.add "Hi failure #{batch.batch_id}!" + end end end diff --git a/test/models/solid_queue/batch_record_test.rb b/test/models/solid_queue/batch_record_test.rb new file mode 100644 index 00000000..d0ffbf6f --- /dev/null +++ b/test/models/solid_queue/batch_record_test.rb @@ -0,0 +1,81 @@ +require "test_helper" + +class SolidQueue::BatchRecordTest < ActiveSupport::TestCase + self.use_transactional_tests = false + + teardown do + SolidQueue::Job.destroy_all + SolidQueue::BatchRecord.destroy_all + end + + class BatchWithArgumentsJob < ApplicationJob + def perform(batch, arg1, arg2) + Rails.logger.info "Hi #{batch.batch_id}, #{arg1}, #{arg2}!" + end + end + + class NiceJob < ApplicationJob + retry_on Exception, wait: 1.second + + def perform(arg) + Rails.logger.info "Hi #{arg}!" + end + end + + test "batch will be completed on success" do + batch = SolidQueue::Batch.enqueue(on_finish: BatchCompletionJob) { } + job_batch = SolidQueue::BatchRecord.find_by(batch_id: batch.batch_id) + assert_not_nil job_batch.on_finish + assert_equal BatchCompletionJob.name, job_batch.on_finish["job_class"] + end + + test "batch will be completed on finish" do + batch = SolidQueue::Batch.enqueue(on_success: BatchCompletionJob) { } + job_batch = SolidQueue::BatchRecord.find_by(batch_id: batch.batch_id) + assert_not_nil job_batch.on_success + assert_equal BatchCompletionJob.name, job_batch.on_success["job_class"] + end + + test "sets the batch_id on jobs created inside of the enqueue block" do + batch = SolidQueue::Batch.enqueue(on_finish: BatchCompletionJob) do + NiceJob.perform_later("world") + NiceJob.perform_later("people") + end + + assert_equal 2, SolidQueue::Job.count + assert_equal [ batch.batch_id ] * 2, SolidQueue::Job.last(2).map(&:batch_id) + end + + test "batch id is present inside the block" do + assert_nil SolidQueue::Batch.current_batch_id + SolidQueue::Batch.enqueue(on_finish: BatchCompletionJob) do + assert_not_nil SolidQueue::Batch.current_batch_id + end + assert_nil SolidQueue::Batch.current_batch_id + end + + test "allow arguments and options for callbacks" do + SolidQueue::Batch.enqueue( + on_finish: BatchWithArgumentsJob.new(1, 2).set(queue: :batch), + ) do + NiceJob.perform_later("world") + end + + assert_not_nil SolidQueue::BatchRecord.last.on_finish["arguments"] + assert_equal SolidQueue::BatchRecord.last.on_finish["arguments"], [ 1, 2 ] + assert_equal SolidQueue::BatchRecord.last.on_finish["queue_name"], "batch" + end + + test "creates batch with metadata" do + SolidQueue::Batch.enqueue( + metadata: { source: "test", priority: "high", user_id: 123 } + ) do + NiceJob.perform_later("world") + end + + assert_not_nil SolidQueue::BatchRecord.last.metadata + assert_equal SolidQueue::BatchRecord.last.metadata["source"], "test" + assert_equal SolidQueue::BatchRecord.last.metadata["priority"], "high" + assert_equal SolidQueue::BatchRecord.last.metadata["user_id"], 123 + end +end diff --git a/test/models/solid_queue/job_batch_test.rb b/test/models/solid_queue/job_batch_test.rb deleted file mode 100644 index e49f59c2..00000000 --- a/test/models/solid_queue/job_batch_test.rb +++ /dev/null @@ -1,66 +0,0 @@ -require "test_helper" - -class SolidQueue::JobBatchTest < ActiveSupport::TestCase - self.use_transactional_tests = false - - teardown do - SolidQueue::Job.destroy_all - SolidQueue::JobBatch.destroy_all - end - - class BatchWithArgumentsJob < ApplicationJob - def perform(batch, arg1, arg2) - Rails.logger.info "Hi #{batch.id}, #{arg1}, #{arg2}!" - end - end - - class NiceJob < ApplicationJob - retry_on Exception, wait: 1.second - - def perform(arg) - Rails.logger.info "Hi #{arg}!" - end - end - - test "batch will be completed on success" do - batch = SolidQueue::JobBatch.enqueue(on_finish: BatchCompletionJob) { } - assert_not_nil batch.on_finish_active_job - assert_equal BatchCompletionJob.name, batch.on_finish_active_job["job_class"] - end - - test "batch will be completed on finish" do - batch = SolidQueue::JobBatch.enqueue(on_success: BatchCompletionJob) { } - assert_not_nil batch.on_success_active_job - assert_equal BatchCompletionJob.name, batch.on_success_active_job["job_class"] - end - - test "sets the batch_id on jobs created inside of the enqueue block" do - batch = SolidQueue::JobBatch.enqueue(on_finish: BatchCompletionJob) do - NiceJob.perform_later("world") - NiceJob.perform_later("people") - end - - assert_equal 2, SolidQueue::Job.count - assert_equal [ batch.id ] * 2, SolidQueue::Job.last(2).map(&:batch_id) - end - - test "batch id is present inside the block" do - assert_nil SolidQueue::JobBatch.current_batch_id - SolidQueue::JobBatch.enqueue(on_finish: BatchCompletionJob) do - assert_not_nil SolidQueue::JobBatch.current_batch_id - end - assert_nil SolidQueue::JobBatch.current_batch_id - end - - test "allow arguments and options for callbacks" do - SolidQueue::JobBatch.enqueue( - on_finish: BatchWithArgumentsJob.new(1, 2).set(queue: :batch), - ) do - NiceJob.perform_later("world") - end - - assert_not_nil SolidQueue::JobBatch.last.on_finish_active_job["arguments"] - assert_equal SolidQueue::JobBatch.last.on_finish_active_job["arguments"], [ 1, 2 ] - assert_equal SolidQueue::JobBatch.last.on_finish_active_job["queue_name"], "batch" - end -end diff --git a/test/test_helpers/jobs_test_helper.rb b/test/test_helpers/jobs_test_helper.rb index b000a65d..1a8a205c 100644 --- a/test/test_helpers/jobs_test_helper.rb +++ b/test/test_helpers/jobs_test_helper.rb @@ -20,7 +20,7 @@ def wait_for_jobs_to_be_released_for(timeout = 1.second) def wait_for_job_batches_to_finish_for(timeout = 1.second) wait_while_with_timeout(timeout) do skip_active_record_query_cache do - SolidQueue::JobBatch.where(finished_at: nil).any? + SolidQueue::BatchRecord.where(finished_at: nil).any? end end end From bc1efa43df6261e32fa0f628e827babd84ac8ebe Mon Sep 17 00:00:00 2001 From: JP Camara Date: Fri, 5 Sep 2025 17:54:25 -0400 Subject: [PATCH 15/38] Move away from a batch_processed_at to batch_execution model * BatchExecution allows us to know for sure we only ever run completion on a job once. We destroy it and update the counts in a transaction. Also can remove the batch_processed_at field from jobs, which are meant to be touched as little as possible and relevant states reflected in *_execution models * It also gives us a slightly cleaner interface in the batchable classes * Updated some table naming and pruned unused fields/indexes * Increase child batch count as new batches are enqueued, even in existing batches * Refactor to a unified Batch interface * It was overly complicated to split Batch and BatchRecord apart just to keep a more strict interface * That concept was taken from GoodJob, but it didn't feel in the spirit of the simplicity of the SolidQueue project. It was alot of concepts to juggle in your head * Also moved around some files (like the cleanup and empty jobs) to the more appropriate app/jobs --- .../jobs}/solid_queue/batch/cleanup_job.rb | 0 .../jobs}/solid_queue/batch/empty_job.rb | 0 app/jobs/solid_queue/batch_update_job.rb | 25 -- app/models/solid_queue/batch.rb | 262 ++++++++++++++++++ .../{batch_record => batch}/buffer.rb | 2 +- app/models/solid_queue/batch_execution.rb | 45 +++ app/models/solid_queue/batch_record.rb | 174 ------------ app/models/solid_queue/execution/batchable.rb | 7 +- app/models/solid_queue/job/batchable.rb | 9 +- .../{batch_record_id.rb => batch_id.rb} | 8 +- .../queue_adapters/solid_queue_adapter.rb | 1 + .../install/templates/db/queue_schema.rb | 18 +- lib/solid_queue.rb | 2 +- lib/solid_queue/batch.rb | 142 ---------- lib/solid_queue/engine.rb | 2 +- test/dummy/db/queue_schema.rb | 18 +- test/integration/batch_lifecycle_test.rb | 46 +-- .../{batch_record_test.rb => batch_test.rb} | 22 +- test/test_helpers/jobs_test_helper.rb | 2 +- 19 files changed, 384 insertions(+), 401 deletions(-) rename {lib => app/jobs}/solid_queue/batch/cleanup_job.rb (100%) rename {lib => app/jobs}/solid_queue/batch/empty_job.rb (100%) delete mode 100644 app/jobs/solid_queue/batch_update_job.rb create mode 100644 app/models/solid_queue/batch.rb rename app/models/solid_queue/{batch_record => batch}/buffer.rb (97%) create mode 100644 app/models/solid_queue/batch_execution.rb delete mode 100644 app/models/solid_queue/batch_record.rb rename lib/active_job/{batch_record_id.rb => batch_id.rb} (80%) delete mode 100644 lib/solid_queue/batch.rb rename test/models/solid_queue/{batch_record_test.rb => batch_test.rb} (72%) diff --git a/lib/solid_queue/batch/cleanup_job.rb b/app/jobs/solid_queue/batch/cleanup_job.rb similarity index 100% rename from lib/solid_queue/batch/cleanup_job.rb rename to app/jobs/solid_queue/batch/cleanup_job.rb diff --git a/lib/solid_queue/batch/empty_job.rb b/app/jobs/solid_queue/batch/empty_job.rb similarity index 100% rename from lib/solid_queue/batch/empty_job.rb rename to app/jobs/solid_queue/batch/empty_job.rb diff --git a/app/jobs/solid_queue/batch_update_job.rb b/app/jobs/solid_queue/batch_update_job.rb deleted file mode 100644 index a8c41bb4..00000000 --- a/app/jobs/solid_queue/batch_update_job.rb +++ /dev/null @@ -1,25 +0,0 @@ -# frozen_string_literal: true - -module SolidQueue - class BatchUpdateJob < ActiveJob::Base - class UpdateFailure < RuntimeError; end - - queue_as :background - - discard_on ActiveRecord::RecordNotFound - - def perform(batch_id, job) - batch = SolidQueue::BatchRecord.find_by!(batch_id: batch_id) - - return if job.batch_id != batch_id - - status = job.status - return unless status.in?([ :finished, :failed ]) - - batch.job_finished!(job) - rescue => e - Rails.logger.error "[SolidQueue] BatchUpdateJob failed for batch #{batch_id}, job #{job.id}: #{e.message}" - raise - end - end -end diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb new file mode 100644 index 00000000..c36a0836 --- /dev/null +++ b/app/models/solid_queue/batch.rb @@ -0,0 +1,262 @@ +# frozen_string_literal: true + +module SolidQueue + class Batch < Record + STATUSES = %w[pending processing completed failed] + + belongs_to :parent_batch, foreign_key: :parent_batch_id, class_name: "SolidQueue::Batch", optional: true + has_many :jobs, foreign_key: :batch_id, primary_key: :batch_id + has_many :batch_executions, foreign_key: :batch_id, primary_key: :batch_id, class_name: "SolidQueue::BatchExecution" + has_many :child_batches, foreign_key: :parent_batch_id, primary_key: :batch_id, class_name: "SolidQueue::Batch" + + serialize :on_finish, coder: JSON + serialize :on_success, coder: JSON + serialize :on_failure, coder: JSON + serialize :metadata, coder: JSON + + validates :status, inclusion: { in: STATUSES } + + scope :pending, -> { where(status: "pending") } + scope :processing, -> { where(status: "processing") } + scope :completed, -> { where(status: "completed") } + scope :failed, -> { where(status: "failed") } + scope :finished, -> { where(status: %w[completed failed]) } + scope :unfinished, -> { where(status: %w[pending processing]) } + + after_initialize :set_batch_id + before_create :set_parent_batch_id + + def enqueue(&block) + raise "You cannot enqueue a batch that is already finished" if finished? + + SolidQueue::Batch::Buffer.capture_child_batch(self) if new_record? + + buffer = SolidQueue::Batch::Buffer.new + buffer.capture do + Batch.wrap_in_batch_context(batch_id) do + block.call(self) + end + end + + if enqueue_after_transaction_commit? + ActiveRecord.after_all_transactions_commit do + enqueue_batch(buffer) + end + else + enqueue_batch(buffer) + end + end + + def on_success=(value) + super(serialize_callback(value)) + end + + def on_failure=(value) + super(serialize_callback(value)) + end + + def on_finish=(value) + super(serialize_callback(value)) + end + + def check_completion! + return if finished? + + with_lock do + return if finished_at? + + if pending_jobs == 0 + unfinished_children = child_batches.where.not(status: %w[completed failed]).count + + if total_child_batches == 0 || unfinished_children == 0 + new_status = failed_jobs > 0 ? "failed" : "completed" + update!(status: new_status, finished_at: Time.current) + execute_callbacks + end + elsif status == "pending" && (completed_jobs > 0 || failed_jobs > 0) + # Move from pending to processing once any job completes + update!(status: "processing") + end + end + end + + def finished? + status.in?(%w[completed failed]) + end + + def processing? + status == "processing" + end + + def pending? + status == "pending" + end + + def progress_percentage + return 0 if total_jobs == 0 + ((completed_jobs + failed_jobs) * 100.0 / total_jobs).round(2) + end + + private + + def enqueue_after_transaction_commit? + return false unless defined?(ApplicationJob.enqueue_after_transaction_commit) + + case ApplicationJob.enqueue_after_transaction_commit + when :always, true + true + when :never, false + false + when :default + true + end + end + + def enqueue_batch(buffer) + if new_record? + enqueue_new_batch(buffer) + else + enqueue_existing_batch(buffer) + end + end + + def enqueue_new_batch(buffer) + SolidQueue::Batch.transaction do + save! + + # If batch has no jobs, enqueue an EmptyJob + # This ensures callbacks always execute, even for empty batches + jobs = buffer.jobs.values + if jobs.empty? + empty_job = SolidQueue::Batch::EmptyJob.new + empty_job.batch_id = batch_id + jobs = [ empty_job ] + end + + # Enqueue jobs - this handles creation and preparation + enqueued_count = SolidQueue::Job.enqueue_all(jobs) + + persisted_jobs = jobs.select { |job| job.provider_job_id.present? } + SolidQueue::BatchExecution.track_job_creation(persisted_jobs, batch_id) + + # Update batch record with counts + update!( + total_jobs: enqueued_count, + pending_jobs: enqueued_count, + total_child_batches: buffer.child_batches.size + ) + end + end + + def enqueue_existing_batch(buffer) + jobs = buffer.jobs.values + new_child_batches = buffer.child_batches.size + + SolidQueue::Batch.transaction do + enqueued_count = SolidQueue::Job.enqueue_all(jobs) + + persisted_jobs = jobs.select(&:successfully_enqueued?) + SolidQueue::BatchExecution.track_job_creation(persisted_jobs, batch_id) + + Batch.where(batch_id: batch_id).update_all([ + "total_jobs = total_jobs + ?, pending_jobs = pending_jobs + ?, total_child_batches = total_child_batches + ?", + enqueued_count, enqueued_count, new_child_batches + ]) + end + + jobs.count(&:successfully_enqueued?) + end + + def set_parent_batch_id + self.parent_batch_id ||= Batch.current_batch_id if Batch.current_batch_id.present? + end + + def set_batch_id + self.batch_id ||= SecureRandom.uuid + end + + def as_active_job(active_job_klass) + active_job_klass.is_a?(ActiveJob::Base) ? active_job_klass : active_job_klass.new + end + + def serialize_callback(value) + return value if value.blank? + active_job = as_active_job(value) + # We can pick up batch ids from context, but callbacks should never be considered a part of the batch + active_job.batch_id = nil + active_job.serialize + end + + def perform_completion_job(job_field, attrs) + active_job = ActiveJob::Base.deserialize(send(job_field)) + active_job.send(:deserialize_arguments_if_needed) + active_job.arguments = [ self ] + Array.wrap(active_job.arguments) + SolidQueue::Job.enqueue_all([ active_job ]) + + active_job.provider_job_id = Job.find_by(active_job_id: active_job.job_id).id + attrs[job_field] = active_job.serialize + end + + def execute_callbacks + if status == "failed" + perform_completion_job(:on_failure, {}) if on_failure.present? + elsif status == "completed" + perform_completion_job(:on_success, {}) if on_success.present? + end + + perform_completion_job(:on_finish, {}) if on_finish.present? + + clear_unpreserved_jobs + + check_parent_completion! + end + + def clear_unpreserved_jobs + SolidQueue::Batch::CleanupJob.perform_later(self) unless SolidQueue.preserve_finished_jobs? + end + + def check_parent_completion! + if parent_batch_id.present? + parent = Batch.find_by(batch_id: parent_batch_id) + parent&.check_completion! unless parent&.finished? + end + end + + class << self + def enqueue(on_success: nil, on_failure: nil, on_finish: nil, metadata: nil, &block) + new.tap do |batch| + batch.assign_attributes( + on_success: on_success, + on_failure: on_failure, + on_finish: on_finish, + metadata: metadata, + parent_batch_id: current_batch_id + ) + + batch.enqueue(&block) + end + end + + def update_job_count(batch_id, count) + count = count.to_i + Batch.where(batch_id: batch_id).update_all( + "total_jobs = total_jobs + #{count}, pending_jobs = pending_jobs + #{count}", + ) + end + + def current_batch_id + ActiveSupport::IsolatedExecutionState[:current_batch_id] + end + + def wrap_in_batch_context(batch_id) + previous_batch_id = current_batch_id.presence || nil + ActiveSupport::IsolatedExecutionState[:current_batch_id] = batch_id + yield + ensure + ActiveSupport::IsolatedExecutionState[:current_batch_id] = previous_batch_id + end + end + end +end + +require_relative "batch/buffer" diff --git a/app/models/solid_queue/batch_record/buffer.rb b/app/models/solid_queue/batch/buffer.rb similarity index 97% rename from app/models/solid_queue/batch_record/buffer.rb rename to app/models/solid_queue/batch/buffer.rb index 982593be..a9eb1e97 100644 --- a/app/models/solid_queue/batch_record/buffer.rb +++ b/app/models/solid_queue/batch/buffer.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module SolidQueue - class BatchRecord + class Batch class Buffer attr_reader :jobs, :child_batches diff --git a/app/models/solid_queue/batch_execution.rb b/app/models/solid_queue/batch_execution.rb new file mode 100644 index 00000000..51ef6050 --- /dev/null +++ b/app/models/solid_queue/batch_execution.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module SolidQueue + class BatchExecution < Record + belongs_to :job, optional: true + belongs_to :batch, foreign_key: :batch_id, primary_key: :batch_id + + class << self + def track_job_creation(active_jobs, batch_id) + execution_data = Array.wrap(active_jobs).map do |active_job| + { + job_id: active_job.provider_job_id, + batch_id: batch_id + } + end + + SolidQueue::BatchExecution.insert_all(execution_data) + end + + def process_job_completion(job, status) + batch_id = job.batch_id + batch_execution = job.batch_execution + + return if batch_execution.blank? + + transaction do + batch_execution.destroy! + + if status == "failed" + Batch.where(batch_id: batch_id).update_all( + "pending_jobs = pending_jobs - 1, failed_jobs = failed_jobs + 1" + ) + else + Batch.where(batch_id: batch_id).update_all( + "pending_jobs = pending_jobs - 1, completed_jobs = completed_jobs + 1" + ) + end + end + + batch = Batch.find_by(batch_id: batch_id) + batch&.check_completion! + end + end + end +end diff --git a/app/models/solid_queue/batch_record.rb b/app/models/solid_queue/batch_record.rb deleted file mode 100644 index 32893e86..00000000 --- a/app/models/solid_queue/batch_record.rb +++ /dev/null @@ -1,174 +0,0 @@ -# frozen_string_literal: true - -module SolidQueue - class BatchRecord < Record - self.table_name = "solid_queue_job_batches" - - STATUSES = %w[pending processing completed failed] - - belongs_to :parent_job_batch, foreign_key: :parent_job_batch_id, class_name: "SolidQueue::BatchRecord", optional: true - has_many :jobs, foreign_key: :batch_id, primary_key: :batch_id - has_many :children, foreign_key: :parent_job_batch_id, primary_key: :batch_id, class_name: "SolidQueue::BatchRecord" - - serialize :on_finish, coder: JSON - serialize :on_success, coder: JSON - serialize :on_failure, coder: JSON - serialize :metadata, coder: JSON - - validates :status, inclusion: { in: STATUSES } - - scope :pending, -> { where(status: "pending") } - scope :processing, -> { where(status: "processing") } - scope :completed, -> { where(status: "completed") } - scope :failed, -> { where(status: "failed") } - scope :finished, -> { where(status: %w[completed failed]) } - scope :unfinished, -> { where(status: %w[pending processing]) } - - after_initialize :set_batch_id - before_create :set_parent_job_batch_id - - def on_success=(value) - super(serialize_callback(value)) - end - - def on_failure=(value) - super(serialize_callback(value)) - end - - def on_finish=(value) - super(serialize_callback(value)) - end - - def job_finished!(job) - return if finished? - return if job.batch_processed_at? - - job.with_lock do - if job.batch_processed_at.blank? - job.update!(batch_processed_at: Time.current) - - if job.failed_execution.present? - self.class.where(id: id).update_all( - "failed_jobs = failed_jobs + 1, pending_jobs = pending_jobs - 1" - ) - else - self.class.where(id: id).update_all( - "completed_jobs = completed_jobs + 1, pending_jobs = pending_jobs - 1" - ) - end - end - end - - reload - check_completion! - end - - def check_completion! - return if finished? - - actual_children = children.count - return if actual_children < expected_children - - children.find_each do |child| - return unless child.finished? - end - - with_lock do - if finished? - # do nothing - elsif pending_jobs <= 0 - if failed_jobs > 0 - mark_as_failed! - else - mark_as_completed! - end - clear_unpreserved_jobs - elsif status == "pending" - update!(status: "processing") - end - end - end - - def finished? - status.in?(%w[completed failed]) - end - - def processing? - status == "processing" - end - - def pending? - status == "pending" - end - - def progress_percentage - return 0 if total_jobs == 0 - ((completed_jobs + failed_jobs) * 100.0 / total_jobs).round(2) - end - - private - - def set_parent_job_batch_id - self.parent_job_batch_id ||= Batch.current_batch_id if Batch.current_batch_id.present? - end - - def set_batch_id - self.batch_id ||= SecureRandom.uuid - end - - def as_active_job(active_job_klass) - active_job_klass.is_a?(ActiveJob::Base) ? active_job_klass : active_job_klass.new - end - - def serialize_callback(value) - return value if value.blank? - active_job = as_active_job(value) - # We can pick up batch ids from context, but callbacks should never be considered a part of the batch - active_job.batch_id = nil - active_job.serialize - end - - def perform_completion_job(job_field, attrs) - active_job = ActiveJob::Base.deserialize(send(job_field)) - active_job.send(:deserialize_arguments_if_needed) - active_job.arguments = [ Batch.new(_batch_record: self) ] + Array.wrap(active_job.arguments) - SolidQueue::Job.enqueue_all([ active_job ]) - - active_job.provider_job_id = Job.find_by(active_job_id: active_job.job_id).id - attrs[job_field] = active_job.serialize - end - - def mark_as_completed! - # SolidQueue does treats `discard_on` differently than failures. The job will report as being :finished, - # and there is no record of the failure. - # GoodJob would report a discard as an error. It's possible we should do that in the future? - update!(status: "completed", finished_at: Time.current) - - perform_completion_job(:on_success, {}) if on_success.present? - perform_completion_job(:on_finish, {}) if on_finish.present? - - if parent_job_batch_id.present? - parent = BatchRecord.find_by(batch_id: parent_job_batch_id) - parent&.reload&.check_completion! - end - end - - def mark_as_failed! - update!(status: "failed", finished_at: Time.current) - perform_completion_job(:on_failure, {}) if on_failure.present? - perform_completion_job(:on_finish, {}) if on_finish.present? - - # Check if parent batch can now complete - if parent_job_batch_id.present? - parent = BatchRecord.find_by(batch_id: parent_job_batch_id) - parent&.check_completion! - end - end - - def clear_unpreserved_jobs - SolidQueue::Batch::CleanupJob.perform_later(self) unless SolidQueue.preserve_finished_jobs? - end - end -end - -require_relative "batch_record/buffer" diff --git a/app/models/solid_queue/execution/batchable.rb b/app/models/solid_queue/execution/batchable.rb index bc1cd7a2..7b031d4f 100644 --- a/app/models/solid_queue/execution/batchable.rb +++ b/app/models/solid_queue/execution/batchable.rb @@ -11,9 +11,12 @@ module Batchable private def update_batch_progress - BatchUpdateJob.perform_later(job.batch_id, job) + # FailedExecutions are only created when the job is done retrying + if is_a?(FailedExecution) + BatchExecution.process_job_completion(job, "failed") + end rescue => e - Rails.logger.error "[SolidQueue] Failed to notify batch #{batch_id} about job #{id} completion: #{e.message}" + Rails.logger.error "[SolidQueue] Failed to notify batch #{job.batch_id} about job #{job.id} failure: #{e.message}" end end end diff --git a/app/models/solid_queue/job/batchable.rb b/app/models/solid_queue/job/batchable.rb index 7d33bcee..e7ab5a0e 100644 --- a/app/models/solid_queue/job/batchable.rb +++ b/app/models/solid_queue/job/batchable.rb @@ -6,7 +6,8 @@ module Batchable extend ActiveSupport::Concern included do - belongs_to :job_batch, foreign_key: :batch_id, optional: true + belongs_to :batch, foreign_key: :batch_id, primary_key: :batch_id, class_name: "SolidQueue::Batch", optional: true + has_one :batch_execution, foreign_key: :job_id, dependent: :destroy after_update :update_batch_progress, if: :batch_id? end @@ -16,9 +17,11 @@ def update_batch_progress return unless saved_change_to_finished_at? && finished_at.present? return unless batch_id.present? - BatchUpdateJob.perform_later(batch_id, self) + # Jobs marked as finished are considered completed + # (even if they failed and are being retried - we don't know that here) + BatchExecution.process_job_completion(self, "completed") rescue => e - Rails.logger.error "[SolidQueue] Failed to notify batch #{batch_id} about job #{id} completion: #{e.message}" + Rails.logger.error "[SolidQueue] Failed to update batch #{batch_id} progress for job #{id}: #{e.message}" end end end diff --git a/lib/active_job/batch_record_id.rb b/lib/active_job/batch_id.rb similarity index 80% rename from lib/active_job/batch_record_id.rb rename to lib/active_job/batch_id.rb index 7f8491af..b1afb4a5 100644 --- a/lib/active_job/batch_record_id.rb +++ b/lib/active_job/batch_id.rb @@ -3,7 +3,7 @@ # Inspired by active_job/core.rb docs # https://github.com/rails/rails/blob/1c2529b9a6ba5a1eff58be0d0373d7d9d401015b/activejob/lib/active_job/core.rb#L136 module ActiveJob - module BatchRecordId + module BatchId extend ActiveSupport::Concern included do @@ -18,7 +18,7 @@ def initialize(*arguments, **kwargs) def enqueue(options = {}) super.tap do |job| if solid_queue_job? - SolidQueue::BatchRecord::Buffer.capture_job(self) + SolidQueue::Batch::Buffer.capture_job(self) end end end @@ -33,9 +33,7 @@ def deserialize(job_data) end def batch - @batch ||= SolidQueue::Batch.new( - _batch_record: SolidQueue::BatchRecord.find_by(batch_id: batch_id) - ) + @batch ||= SolidQueue::Batch.find_by(batch_id: batch_id) end private diff --git a/lib/active_job/queue_adapters/solid_queue_adapter.rb b/lib/active_job/queue_adapters/solid_queue_adapter.rb index 04de658d..7a9b505d 100644 --- a/lib/active_job/queue_adapters/solid_queue_adapter.rb +++ b/lib/active_job/queue_adapters/solid_queue_adapter.rb @@ -47,6 +47,7 @@ def in_batch_retry?(active_job) def increment_job_count(active_job, enqueued_job) if enqueued_job.persisted? && in_batch_retry?(active_job) + SolidQueue::BatchExecution.track_job_creation(active_job, active_job.batch_id) SolidQueue::Batch.update_job_count(active_job.batch_id, 1) end end diff --git a/lib/generators/solid_queue/install/templates/db/queue_schema.rb b/lib/generators/solid_queue/install/templates/db/queue_schema.rb index f9fd033f..55c3156d 100644 --- a/lib/generators/solid_queue/install/templates/db/queue_schema.rb +++ b/lib/generators/solid_queue/install/templates/db/queue_schema.rb @@ -26,9 +26,9 @@ t.index [ "job_id" ], name: "index_solid_queue_failed_executions_on_job_id", unique: true end - create_table "solid_queue_job_batches", force: :cascade do |t| + create_table "solid_queue_batches", force: :cascade do |t| t.string "batch_id", null: false - t.string "parent_job_batch_id" + t.string "parent_batch_id" t.text "on_finish" t.text "on_success" t.text "on_failure" @@ -37,13 +37,20 @@ t.integer "pending_jobs", default: 0, null: false t.integer "completed_jobs", default: 0, null: false t.integer "failed_jobs", default: 0, null: false - t.integer "expected_children", default: 0, null: false + t.integer "total_child_batches", default: 0, null: false t.string "status", default: "pending", null: false t.datetime "finished_at" t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.index [ "finished_at" ], name: "index_solid_queue_job_batches_on_finished_at" - t.index [ "parent_job_batch_id" ], name: "index_solid_queue_job_batches_on_parent_job_batch_id" + t.index [ "batch_id" ], name: "index_solid_queue_batches_on_batch_id", unique: true + t.index [ "parent_batch_id" ], name: "index_solid_queue_batches_on_parent_batch_id" + end + + create_table "solid_queue_batch_executions", force: :cascade do |t| + t.bigint "job_id", null: false + t.string "batch_id", null: false + t.datetime "created_at", null: false + t.index [ "job_id" ], name: "index_solid_queue_batch_executions_on_job_id", unique: true end create_table "solid_queue_jobs", force: :cascade do |t| @@ -58,7 +65,6 @@ t.datetime "created_at", null: false t.datetime "updated_at", null: false t.string "batch_id" - t.datetime "batch_processed_at" t.index [ "active_job_id" ], name: "index_solid_queue_jobs_on_active_job_id" t.index [ "batch_id" ], name: "index_solid_queue_jobs_on_batch_id" t.index [ "class_name" ], name: "index_solid_queue_jobs_on_class_name" diff --git a/lib/solid_queue.rb b/lib/solid_queue.rb index f4d3a92c..58b459fd 100644 --- a/lib/solid_queue.rb +++ b/lib/solid_queue.rb @@ -5,7 +5,7 @@ require "active_job" require "active_job/queue_adapters" -require "active_job/batch_record_id" +require "active_job/batch_id" require "active_support" require "active_support/core_ext/numeric/time" diff --git a/lib/solid_queue/batch.rb b/lib/solid_queue/batch.rb deleted file mode 100644 index 0904df0a..00000000 --- a/lib/solid_queue/batch.rb +++ /dev/null @@ -1,142 +0,0 @@ -# frozen_string_literal: true - -require_relative "batch/empty_job" -require_relative "batch/cleanup_job" - -module SolidQueue - class Batch - include GlobalID::Identification - - delegate :completed_jobs, :failed_jobs, :pending_jobs, :total_jobs, :progress_percentage, - :finished?, :processing?, :pending?, :status, :batch_id, - :metadata, :metadata=, - :on_success, :on_success=, - :on_failure, :on_failure=, - :on_finish, :on_finish=, - :reload, - to: :batch_record - - def initialize(_batch_record: nil) - @batch_record = _batch_record || BatchRecord.new - end - - def batch_record - @batch_record - end - - def id - batch_id - end - - def enqueue(&block) - raise "You cannot enqueue a batch that is already finished" if finished? - - SolidQueue::BatchRecord::Buffer.capture_child_batch(self) if batch_record.new_record? - - buffer = SolidQueue::BatchRecord::Buffer.new - buffer.capture do - Batch.wrap_in_batch_context(batch_id) do - block.call(self) - end - end - - if enqueue_after_transaction_commit? - ActiveRecord.after_all_transactions_commit do - enqueue_batch(buffer) - end - else - enqueue_batch(buffer) - end - end - - private - - def enqueue_after_transaction_commit? - return false unless defined?(ApplicationJob.enqueue_after_transaction_commit) - - case ApplicationJob.enqueue_after_transaction_commit - when :always, true - true - when :never, false - false - when :default - true - end - end - - def enqueue_batch(buffer) - if batch_record.new_record? - enqueue_new_batch(buffer) - else - jobs = buffer.jobs.values - enqueue_existing_batch(jobs) - end - end - - def enqueue_new_batch(buffer) - SolidQueue::BatchRecord.transaction do - batch_record.save! - - # If batch has no jobs, enqueue an EmptyJob - # This ensures callbacks always execute, even for empty batches - jobs = buffer.jobs.values - if jobs.empty? - empty_job = SolidQueue::Batch::EmptyJob.new - empty_job.batch_id = batch_record.batch_id - jobs = [ empty_job ] - end - - batch_record.update!( - total_jobs: jobs.size, - pending_jobs: SolidQueue::Job.enqueue_all(jobs), - expected_children: buffer.child_batches.size - ) - end - end - - def enqueue_existing_batch(active_jobs) - jobs = Array.wrap(active_jobs) - enqueued_count = SolidQueue::Job.enqueue_all(jobs) - - Batch.update_job_count(batch_id, enqueued_count) - end - - class << self - def enqueue(on_success: nil, on_failure: nil, on_finish: nil, metadata: nil, &block) - new.tap do |batch| - batch.batch_record.assign_attributes( - on_success: on_success, - on_failure: on_failure, - on_finish: on_finish, - metadata: metadata, - parent_job_batch_id: current_batch_id - ) - - batch.enqueue(&block) - end - end - - def find(batch_id) - new(_batch_record: BatchRecord.find_by!(batch_id: batch_id)) - end - - def update_job_count(batch_id, count) - BatchRecord.where(batch_id: batch_id).update_all( - "total_jobs = total_jobs + #{count}, pending_jobs = pending_jobs + #{count}" - ) - end - - def current_batch_id - ActiveSupport::IsolatedExecutionState[:current_batch_id] - end - - def wrap_in_batch_context(batch_id) - previous_batch_id = current_batch_id.presence || nil - ActiveSupport::IsolatedExecutionState[:current_batch_id] = batch_id - yield - ensure - ActiveSupport::IsolatedExecutionState[:current_batch_id] = previous_batch_id - end - end - end -end diff --git a/lib/solid_queue/engine.rb b/lib/solid_queue/engine.rb index f7e059cb..8daffe0e 100644 --- a/lib/solid_queue/engine.rb +++ b/lib/solid_queue/engine.rb @@ -35,7 +35,7 @@ class Engine < ::Rails::Engine initializer "solid_queue.active_job.extensions" do ActiveSupport.on_load :active_job do include ActiveJob::ConcurrencyControls - include ActiveJob::BatchRecordId + include ActiveJob::BatchId end end end diff --git a/test/dummy/db/queue_schema.rb b/test/dummy/db/queue_schema.rb index 1201a59f..cc2e1a11 100644 --- a/test/dummy/db/queue_schema.rb +++ b/test/dummy/db/queue_schema.rb @@ -38,9 +38,9 @@ t.index ["job_id"], name: "index_solid_queue_failed_executions_on_job_id", unique: true end - create_table "solid_queue_job_batches", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| + create_table "solid_queue_batches", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| t.string "batch_id" - t.string "parent_job_batch_id" + t.string "parent_batch_id" t.text "on_finish" t.text "on_success" t.text "on_failure" @@ -49,14 +49,20 @@ t.integer "pending_jobs", default: 0, null: false t.integer "completed_jobs", default: 0, null: false t.integer "failed_jobs", default: 0, null: false - t.integer "expected_children", default: 0, null: false + t.integer "total_child_batches", default: 0, null: false t.string "status", default: "pending", null: false t.datetime "finished_at" t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.index ["batch_id"], name: "index_solid_queue_job_batches_on_batch_id", unique: true - t.index ["finished_at"], name: "index_solid_queue_job_batches_on_finished_at" - t.index ["parent_job_batch_id"], name: "index_solid_queue_job_batches_on_parent_job_batch_id" + t.index ["batch_id"], name: "index_solid_queue_batches_on_batch_id", unique: true + t.index ["parent_batch_id"], name: "index_solid_queue_batches_on_parent_batch_id" + end + + create_table "solid_queue_batch_executions", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| + t.bigint "job_id", null: false + t.string "batch_id", null: false + t.datetime "created_at", null: false + t.index ["job_id"], name: "index_solid_queue_batch_executions_on_job_id", unique: true end create_table "solid_queue_jobs", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| diff --git a/test/integration/batch_lifecycle_test.rb b/test/integration/batch_lifecycle_test.rb index 3ad455f8..5514fbcd 100644 --- a/test/integration/batch_lifecycle_test.rb +++ b/test/integration/batch_lifecycle_test.rb @@ -6,7 +6,7 @@ class BatchLifecycleTest < ActiveSupport::TestCase FailingJobError = Class.new(RuntimeError) def assert_finished_in_order(*batches) - job_batches = batches.map { |batch| SolidQueue::BatchRecord.find_by(batch_id: batch.batch_id) } + job_batches = batches.map { |batch| SolidQueue::Batch.find_by(batch_id: batch.batch_id) } job_batches.each_cons(2) do |batch1, batch2| assert_equal batch1.reload.finished_at < batch2.reload.finished_at, true @@ -28,7 +28,7 @@ def assert_finished_in_order(*batches) JobBuffer.clear SolidQueue::Job.destroy_all - SolidQueue::BatchRecord.destroy_all + SolidQueue::Batch.destroy_all ApplicationJob.enqueue_after_transaction_commit = false if defined?(ApplicationJob.enqueue_after_transaction_commit) SolidQueue.preserve_finished_jobs = true @@ -101,7 +101,7 @@ def perform expected_values = [ "1: 1 jobs succeeded!", "1.1: 1 jobs succeeded!", "2: 1 jobs succeeded!", "3: 1 jobs succeeded!" ] assert_equal expected_values.sort, JobBuffer.values.sort - assert_equal 4, SolidQueue::BatchRecord.finished.count + assert_equal 4, SolidQueue::Batch.finished.count assert_finished_in_order(batch4, batch2, batch1) assert_finished_in_order(batch3, batch2, batch1) end @@ -122,7 +122,7 @@ def perform wait_for_job_batches_to_finish_for(2.seconds) assert_equal [ "added from inside 1", "added from inside 2", "added from inside 3", "hey", "ho" ], JobBuffer.values.sort - assert_equal 3, SolidQueue::BatchRecord.finished.count + assert_equal 3, SolidQueue::Batch.finished.count assert_finished_in_order(batch2, batch1) end @@ -154,9 +154,9 @@ def perform wait_for_job_batches_to_finish_for(2.seconds) wait_for_jobs_to_finish_for(2.seconds) - assert_equal 3, SolidQueue::BatchRecord.finished.count - assert_equal 6, SolidQueue::Job.finished.count - assert_equal 6, SolidQueue::Job.count + assert_equal 3, SolidQueue::Batch.finished.count + assert_equal 3, SolidQueue::Job.finished.count + assert_equal 3, SolidQueue::Job.count assert_finished_in_order(batch3, batch2, batch1) end @@ -175,23 +175,23 @@ def perform wait_for_job_batches_to_finish_for(3.seconds) wait_for_jobs_to_finish_for(3.seconds) - job_batch1 = SolidQueue::BatchRecord.find_by(batch_id: batch1.batch_id) - job_batch2 = SolidQueue::BatchRecord.find_by(batch_id: batch2.batch_id) + job_batch1 = SolidQueue::Batch.find_by(batch_id: batch1.batch_id) + job_batch2 = SolidQueue::Batch.find_by(batch_id: batch2.batch_id) - assert_equal 2, SolidQueue::BatchRecord.count - assert_equal 2, SolidQueue::BatchRecord.finished.count + assert_equal 2, SolidQueue::Batch.count + assert_equal 2, SolidQueue::Batch.finished.count - assert_equal 3, job_batch1.total_jobs - assert_equal 1, job_batch1.failed_jobs - assert_equal 2, job_batch1.completed_jobs + assert_equal 3, job_batch1.total_jobs # 1 original + 2 retries + assert_equal 1, job_batch1.failed_jobs # Final failure + assert_equal 2, job_batch1.completed_jobs # 2 retries marked as "finished" assert_equal 0, job_batch1.pending_jobs - assert_equal 3, job_batch2.total_jobs - assert_equal 1, job_batch2.failed_jobs - assert_equal 2, job_batch2.completed_jobs + assert_equal 3, job_batch2.total_jobs # 1 original + 2 retries + assert_equal 1, job_batch2.failed_jobs # Final failure + assert_equal 2, job_batch2.completed_jobs # 2 retries marked as "finished" assert_equal 0, job_batch2.pending_jobs - assert_equal [ "failed", "failed" ].sort, SolidQueue::BatchRecord.all.pluck(:status) + assert_equal [ "failed", "failed" ].sort, SolidQueue::Batch.all.pluck(:status) assert_equal [ "0: 1 jobs failed!", "1: 1 jobs failed!" ], JobBuffer.values.sort assert_finished_in_order(batch2, batch1) end @@ -211,11 +211,11 @@ def perform wait_for_job_batches_to_finish_for(3.seconds) wait_for_jobs_to_finish_for(3.seconds) - job_batch1 = SolidQueue::BatchRecord.find_by(batch_id: batch1.batch_id) - job_batch2 = SolidQueue::BatchRecord.find_by(batch_id: batch2.batch_id) + job_batch1 = SolidQueue::Batch.find_by(batch_id: batch1.batch_id) + job_batch2 = SolidQueue::Batch.find_by(batch_id: batch2.batch_id) - assert_equal 2, SolidQueue::BatchRecord.count - assert_equal 2, SolidQueue::BatchRecord.finished.count + assert_equal 2, SolidQueue::Batch.count + assert_equal 2, SolidQueue::Batch.finished.count assert_equal 1, job_batch1.total_jobs assert_equal 0, job_batch1.failed_jobs @@ -227,7 +227,7 @@ def perform assert_equal 1, job_batch2.completed_jobs assert_equal 0, job_batch2.pending_jobs - assert_equal [ "completed", "completed" ].sort, SolidQueue::BatchRecord.all.pluck(:status) + assert_equal [ "completed", "completed" ].sort, SolidQueue::Batch.all.pluck(:status) assert_equal [ "0: 1 jobs succeeded!", "1: 1 jobs succeeded!" ], JobBuffer.values.sort assert_finished_in_order(batch2, batch1) end diff --git a/test/models/solid_queue/batch_record_test.rb b/test/models/solid_queue/batch_test.rb similarity index 72% rename from test/models/solid_queue/batch_record_test.rb rename to test/models/solid_queue/batch_test.rb index d0ffbf6f..ee00bfd1 100644 --- a/test/models/solid_queue/batch_record_test.rb +++ b/test/models/solid_queue/batch_test.rb @@ -1,11 +1,11 @@ require "test_helper" -class SolidQueue::BatchRecordTest < ActiveSupport::TestCase +class SolidQueue::BatchTest < ActiveSupport::TestCase self.use_transactional_tests = false teardown do SolidQueue::Job.destroy_all - SolidQueue::BatchRecord.destroy_all + SolidQueue::Batch.destroy_all end class BatchWithArgumentsJob < ApplicationJob @@ -24,14 +24,14 @@ def perform(arg) test "batch will be completed on success" do batch = SolidQueue::Batch.enqueue(on_finish: BatchCompletionJob) { } - job_batch = SolidQueue::BatchRecord.find_by(batch_id: batch.batch_id) + job_batch = SolidQueue::Batch.find_by(batch_id: batch.batch_id) assert_not_nil job_batch.on_finish assert_equal BatchCompletionJob.name, job_batch.on_finish["job_class"] end test "batch will be completed on finish" do batch = SolidQueue::Batch.enqueue(on_success: BatchCompletionJob) { } - job_batch = SolidQueue::BatchRecord.find_by(batch_id: batch.batch_id) + job_batch = SolidQueue::Batch.find_by(batch_id: batch.batch_id) assert_not_nil job_batch.on_success assert_equal BatchCompletionJob.name, job_batch.on_success["job_class"] end @@ -61,9 +61,9 @@ def perform(arg) NiceJob.perform_later("world") end - assert_not_nil SolidQueue::BatchRecord.last.on_finish["arguments"] - assert_equal SolidQueue::BatchRecord.last.on_finish["arguments"], [ 1, 2 ] - assert_equal SolidQueue::BatchRecord.last.on_finish["queue_name"], "batch" + assert_not_nil SolidQueue::Batch.last.on_finish["arguments"] + assert_equal SolidQueue::Batch.last.on_finish["arguments"], [ 1, 2 ] + assert_equal SolidQueue::Batch.last.on_finish["queue_name"], "batch" end test "creates batch with metadata" do @@ -73,9 +73,9 @@ def perform(arg) NiceJob.perform_later("world") end - assert_not_nil SolidQueue::BatchRecord.last.metadata - assert_equal SolidQueue::BatchRecord.last.metadata["source"], "test" - assert_equal SolidQueue::BatchRecord.last.metadata["priority"], "high" - assert_equal SolidQueue::BatchRecord.last.metadata["user_id"], 123 + assert_not_nil SolidQueue::Batch.last.metadata + assert_equal SolidQueue::Batch.last.metadata["source"], "test" + assert_equal SolidQueue::Batch.last.metadata["priority"], "high" + assert_equal SolidQueue::Batch.last.metadata["user_id"], 123 end end diff --git a/test/test_helpers/jobs_test_helper.rb b/test/test_helpers/jobs_test_helper.rb index 1a8a205c..62913f08 100644 --- a/test/test_helpers/jobs_test_helper.rb +++ b/test/test_helpers/jobs_test_helper.rb @@ -20,7 +20,7 @@ def wait_for_jobs_to_be_released_for(timeout = 1.second) def wait_for_job_batches_to_finish_for(timeout = 1.second) wait_while_with_timeout(timeout) do skip_active_record_query_cache do - SolidQueue::BatchRecord.where(finished_at: nil).any? + SolidQueue::Batch.where(finished_at: nil).any? end end end From bd9a7819153be0aeb9c9ef0d16c50a505d2a4c13 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Tue, 9 Sep 2025 01:08:01 +0200 Subject: [PATCH 16/38] Reduce complexity of batches implementation * Use BatchPreparable module to add batch executions and update the batch totals after creating ready/claimed records * Remove buffer logic - this makes it harder to implement empty batches, but results in much simpler to understand code. Also batches will always exist now, whether the jobs inside ever get enqueued or not * Move logic for getting status and querying by status scopes to Trackable module * total_child_batches wasn't being used for much, so remove it --- app/models/solid_queue/batch.rb | 122 +----------------- app/models/solid_queue/batch/buffer.rb | 47 ------- app/models/solid_queue/batch/trackable.rb | 35 +++++ app/models/solid_queue/batch_execution.rb | 24 ++-- .../solid_queue/execution/batch_preparable.rb | 25 ++++ app/models/solid_queue/ready_execution.rb | 2 + app/models/solid_queue/scheduled_execution.rb | 2 +- lib/active_job/batch_id.rb | 8 -- .../queue_adapters/solid_queue_adapter.rb | 29 +---- .../install/templates/db/queue_schema.rb | 1 - test/dummy/db/queue_schema.rb | 1 - test/integration/batch_lifecycle_test.rb | 89 ++++++++++--- test/test_helpers/jobs_test_helper.rb | 2 +- 13 files changed, 158 insertions(+), 229 deletions(-) delete mode 100644 app/models/solid_queue/batch/buffer.rb create mode 100644 app/models/solid_queue/batch/trackable.rb create mode 100644 app/models/solid_queue/execution/batch_preparable.rb diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb index c36a0836..c5b64637 100644 --- a/app/models/solid_queue/batch.rb +++ b/app/models/solid_queue/batch.rb @@ -4,6 +4,8 @@ module SolidQueue class Batch < Record STATUSES = %w[pending processing completed failed] + include Trackable + belongs_to :parent_batch, foreign_key: :parent_batch_id, class_name: "SolidQueue::Batch", optional: true has_many :jobs, foreign_key: :batch_id, primary_key: :batch_id has_many :batch_executions, foreign_key: :batch_id, primary_key: :batch_id, class_name: "SolidQueue::BatchExecution" @@ -16,34 +18,16 @@ class Batch < Record validates :status, inclusion: { in: STATUSES } - scope :pending, -> { where(status: "pending") } - scope :processing, -> { where(status: "processing") } - scope :completed, -> { where(status: "completed") } - scope :failed, -> { where(status: "failed") } - scope :finished, -> { where(status: %w[completed failed]) } - scope :unfinished, -> { where(status: %w[pending processing]) } - after_initialize :set_batch_id before_create :set_parent_batch_id def enqueue(&block) raise "You cannot enqueue a batch that is already finished" if finished? - SolidQueue::Batch::Buffer.capture_child_batch(self) if new_record? - - buffer = SolidQueue::Batch::Buffer.new - buffer.capture do - Batch.wrap_in_batch_context(batch_id) do - block.call(self) - end - end + save! if new_record? - if enqueue_after_transaction_commit? - ActiveRecord.after_all_transactions_commit do - enqueue_batch(buffer) - end - else - enqueue_batch(buffer) + Batch.wrap_in_batch_context(batch_id) do + block.call(self) end end @@ -68,7 +52,7 @@ def check_completion! if pending_jobs == 0 unfinished_children = child_batches.where.not(status: %w[completed failed]).count - if total_child_batches == 0 || unfinished_children == 0 + if unfinished_children == 0 new_status = failed_jobs > 0 ? "failed" : "completed" update!(status: new_status, finished_at: Time.current) execute_callbacks @@ -80,93 +64,8 @@ def check_completion! end end - def finished? - status.in?(%w[completed failed]) - end - - def processing? - status == "processing" - end - - def pending? - status == "pending" - end - - def progress_percentage - return 0 if total_jobs == 0 - ((completed_jobs + failed_jobs) * 100.0 / total_jobs).round(2) - end - private - def enqueue_after_transaction_commit? - return false unless defined?(ApplicationJob.enqueue_after_transaction_commit) - - case ApplicationJob.enqueue_after_transaction_commit - when :always, true - true - when :never, false - false - when :default - true - end - end - - def enqueue_batch(buffer) - if new_record? - enqueue_new_batch(buffer) - else - enqueue_existing_batch(buffer) - end - end - - def enqueue_new_batch(buffer) - SolidQueue::Batch.transaction do - save! - - # If batch has no jobs, enqueue an EmptyJob - # This ensures callbacks always execute, even for empty batches - jobs = buffer.jobs.values - if jobs.empty? - empty_job = SolidQueue::Batch::EmptyJob.new - empty_job.batch_id = batch_id - jobs = [ empty_job ] - end - - # Enqueue jobs - this handles creation and preparation - enqueued_count = SolidQueue::Job.enqueue_all(jobs) - - persisted_jobs = jobs.select { |job| job.provider_job_id.present? } - SolidQueue::BatchExecution.track_job_creation(persisted_jobs, batch_id) - - # Update batch record with counts - update!( - total_jobs: enqueued_count, - pending_jobs: enqueued_count, - total_child_batches: buffer.child_batches.size - ) - end - end - - def enqueue_existing_batch(buffer) - jobs = buffer.jobs.values - new_child_batches = buffer.child_batches.size - - SolidQueue::Batch.transaction do - enqueued_count = SolidQueue::Job.enqueue_all(jobs) - - persisted_jobs = jobs.select(&:successfully_enqueued?) - SolidQueue::BatchExecution.track_job_creation(persisted_jobs, batch_id) - - Batch.where(batch_id: batch_id).update_all([ - "total_jobs = total_jobs + ?, pending_jobs = pending_jobs + ?, total_child_batches = total_child_batches + ?", - enqueued_count, enqueued_count, new_child_batches - ]) - end - - jobs.count(&:successfully_enqueued?) - end - def set_parent_batch_id self.parent_batch_id ||= Batch.current_batch_id if Batch.current_batch_id.present? end @@ -237,13 +136,6 @@ def enqueue(on_success: nil, on_failure: nil, on_finish: nil, metadata: nil, &bl end end - def update_job_count(batch_id, count) - count = count.to_i - Batch.where(batch_id: batch_id).update_all( - "total_jobs = total_jobs + #{count}, pending_jobs = pending_jobs + #{count}", - ) - end - def current_batch_id ActiveSupport::IsolatedExecutionState[:current_batch_id] end @@ -258,5 +150,3 @@ def wrap_in_batch_context(batch_id) end end end - -require_relative "batch/buffer" diff --git a/app/models/solid_queue/batch/buffer.rb b/app/models/solid_queue/batch/buffer.rb deleted file mode 100644 index a9eb1e97..00000000 --- a/app/models/solid_queue/batch/buffer.rb +++ /dev/null @@ -1,47 +0,0 @@ -# frozen_string_literal: true - -module SolidQueue - class Batch - class Buffer - attr_reader :jobs, :child_batches - - def initialize - @jobs = {} - @child_batches = [] - end - - def add(job) - @jobs[job.job_id] = job - job - end - - def add_child_batch(batch) - @child_batches << batch - batch - end - - def capture - previous_buffer = ActiveSupport::IsolatedExecutionState[:solid_queue_batch_buffer] - ActiveSupport::IsolatedExecutionState[:solid_queue_batch_buffer] = self - - yield - - @jobs - ensure - ActiveSupport::IsolatedExecutionState[:solid_queue_batch_buffer] = previous_buffer - end - - def self.current - ActiveSupport::IsolatedExecutionState[:solid_queue_batch_buffer] - end - - def self.capture_job(job) - current&.add(job) - end - - def self.capture_child_batch(batch) - current&.add_child_batch(batch) - end - end - end -end diff --git a/app/models/solid_queue/batch/trackable.rb b/app/models/solid_queue/batch/trackable.rb new file mode 100644 index 00000000..806025f0 --- /dev/null +++ b/app/models/solid_queue/batch/trackable.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module SolidQueue + class Batch + module Trackable + extend ActiveSupport::Concern + + included do + scope :pending, -> { where(status: "pending") } + scope :processing, -> { where(status: "processing") } + scope :completed, -> { where(status: "completed") } + scope :failed, -> { where(status: "failed") } + scope :finished, -> { where(status: %w[completed failed]) } + scope :unfinished, -> { where(status: %w[pending processing]) } + end + + def finished? + status.in?(%w[completed failed]) + end + + def processing? + status == "processing" + end + + def pending? + status == "pending" + end + + def progress_percentage + return 0 if total_jobs == 0 + ((completed_jobs + failed_jobs) * 100.0 / total_jobs).round(2) + end + end + end +end diff --git a/app/models/solid_queue/batch_execution.rb b/app/models/solid_queue/batch_execution.rb index 51ef6050..29c4aabc 100644 --- a/app/models/solid_queue/batch_execution.rb +++ b/app/models/solid_queue/batch_execution.rb @@ -6,15 +6,23 @@ class BatchExecution < Record belongs_to :batch, foreign_key: :batch_id, primary_key: :batch_id class << self - def track_job_creation(active_jobs, batch_id) - execution_data = Array.wrap(active_jobs).map do |active_job| - { - job_id: active_job.provider_job_id, - batch_id: batch_id - } + def create_all_from_jobs(jobs) + batch_jobs = jobs.select { |job| job.batch_id.present? } + return if batch_jobs.empty? + + batch_jobs.group_by(&:batch_id).each do |batch_id, jobs| + BatchExecution.insert_all!(jobs.map { |job| + { batch_id:, job_id: job.respond_to?(:provider_job_id) ? job.provider_job_id : job.id } + }) + + total = jobs.size + SolidQueue::Batch.upsert( + { batch_id:, total_jobs: total, pending_jobs: total }, + on_duplicate: Arel.sql( + "total_jobs = total_jobs + #{total}, pending_jobs = pending_jobs + #{total}" + ) + ) end - - SolidQueue::BatchExecution.insert_all(execution_data) end def process_job_completion(job, status) diff --git a/app/models/solid_queue/execution/batch_preparable.rb b/app/models/solid_queue/execution/batch_preparable.rb new file mode 100644 index 00000000..e1df26b0 --- /dev/null +++ b/app/models/solid_queue/execution/batch_preparable.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module SolidQueue + class Execution + module BatchPreparable + extend ActiveSupport::Concern + + included do + after_create :create_batch_execution + end + + def create_batch_execution + BatchExecution.create_all_from_jobs([ job ]) + end + + class_methods do + def create_all_from_jobs(jobs) + super.tap do + BatchExecution.create_all_from_jobs(jobs) + end + end + end + end + end +end diff --git a/app/models/solid_queue/ready_execution.rb b/app/models/solid_queue/ready_execution.rb index 35a11292..11748e6a 100644 --- a/app/models/solid_queue/ready_execution.rb +++ b/app/models/solid_queue/ready_execution.rb @@ -2,6 +2,8 @@ module SolidQueue class ReadyExecution < Execution + include BatchPreparable + scope :queued_as, ->(queue_name) { where(queue_name: queue_name) } assumes_attributes_from_job diff --git a/app/models/solid_queue/scheduled_execution.rb b/app/models/solid_queue/scheduled_execution.rb index f2159422..c6d82951 100644 --- a/app/models/solid_queue/scheduled_execution.rb +++ b/app/models/solid_queue/scheduled_execution.rb @@ -2,7 +2,7 @@ module SolidQueue class ScheduledExecution < Execution - include Dispatching + include Dispatching, BatchPreparable scope :due, -> { where(scheduled_at: ..Time.current) } scope :ordered, -> { order(scheduled_at: :asc, priority: :asc, job_id: :asc) } diff --git a/lib/active_job/batch_id.rb b/lib/active_job/batch_id.rb index b1afb4a5..fe57755c 100644 --- a/lib/active_job/batch_id.rb +++ b/lib/active_job/batch_id.rb @@ -15,14 +15,6 @@ def initialize(*arguments, **kwargs) self.batch_id = SolidQueue::Batch.current_batch_id if solid_queue_job? end - def enqueue(options = {}) - super.tap do |job| - if solid_queue_job? - SolidQueue::Batch::Buffer.capture_job(self) - end - end - end - def serialize super.merge("batch_id" => batch_id) end diff --git a/lib/active_job/queue_adapters/solid_queue_adapter.rb b/lib/active_job/queue_adapters/solid_queue_adapter.rb index 7a9b505d..fe556042 100644 --- a/lib/active_job/queue_adapters/solid_queue_adapter.rb +++ b/lib/active_job/queue_adapters/solid_queue_adapter.rb @@ -16,41 +16,16 @@ def enqueue_after_transaction_commit? end def enqueue(active_job) # :nodoc: - return if in_batch?(active_job) - - SolidQueue::Job.enqueue(active_job).tap do |enqueued_job| - increment_job_count(active_job, enqueued_job) - end + SolidQueue::Job.enqueue(active_job) end def enqueue_at(active_job, timestamp) # :nodoc: - return if in_batch?(active_job) - - SolidQueue::Job.enqueue(active_job, scheduled_at: Time.at(timestamp)).tap do |enqueued_job| - increment_job_count(active_job, enqueued_job) - end + SolidQueue::Job.enqueue(active_job, scheduled_at: Time.at(timestamp)) end def enqueue_all(active_jobs) # :nodoc: SolidQueue::Job.enqueue_all(active_jobs) end - - private - - def in_batch?(active_job) - active_job.batch_id.present? && active_job.executions <= 0 - end - - def in_batch_retry?(active_job) - active_job.batch_id.present? && active_job.executions > 0 - end - - def increment_job_count(active_job, enqueued_job) - if enqueued_job.persisted? && in_batch_retry?(active_job) - SolidQueue::BatchExecution.track_job_creation(active_job, active_job.batch_id) - SolidQueue::Batch.update_job_count(active_job.batch_id, 1) - end - end end end end diff --git a/lib/generators/solid_queue/install/templates/db/queue_schema.rb b/lib/generators/solid_queue/install/templates/db/queue_schema.rb index 55c3156d..93331a80 100644 --- a/lib/generators/solid_queue/install/templates/db/queue_schema.rb +++ b/lib/generators/solid_queue/install/templates/db/queue_schema.rb @@ -37,7 +37,6 @@ t.integer "pending_jobs", default: 0, null: false t.integer "completed_jobs", default: 0, null: false t.integer "failed_jobs", default: 0, null: false - t.integer "total_child_batches", default: 0, null: false t.string "status", default: "pending", null: false t.datetime "finished_at" t.datetime "created_at", null: false diff --git a/test/dummy/db/queue_schema.rb b/test/dummy/db/queue_schema.rb index cc2e1a11..9b92ddf7 100644 --- a/test/dummy/db/queue_schema.rb +++ b/test/dummy/db/queue_schema.rb @@ -49,7 +49,6 @@ t.integer "pending_jobs", default: 0, null: false t.integer "completed_jobs", default: 0, null: false t.integer "failed_jobs", default: 0, null: false - t.integer "total_child_batches", default: 0, null: false t.string "status", default: "pending", null: false t.datetime "finished_at" t.datetime "created_at", null: false diff --git a/test/integration/batch_lifecycle_test.rb b/test/integration/batch_lifecycle_test.rb index 5514fbcd..cf8a567d 100644 --- a/test/integration/batch_lifecycle_test.rb +++ b/test/integration/batch_lifecycle_test.rb @@ -5,11 +5,9 @@ class BatchLifecycleTest < ActiveSupport::TestCase FailingJobError = Class.new(RuntimeError) - def assert_finished_in_order(*batches) - job_batches = batches.map { |batch| SolidQueue::Batch.find_by(batch_id: batch.batch_id) } - - job_batches.each_cons(2) do |batch1, batch2| - assert_equal batch1.reload.finished_at < batch2.reload.finished_at, true + def assert_finished_in_order(*finishables) + finishables.each_cons(2) do |finished1, finished2| + assert_equal finished1.reload.finished_at < finished2.reload.finished_at, true end end @@ -84,20 +82,48 @@ def perform end end + test "empty batches never finish" do + # `enqueue_after_transaction_commit` makes it difficult to tell if a batch is empty, or if the + # jobs are waiting to be run after commit. + # If we could tell deterministically, we could enqueue an EmptyJob to make sure the batches + # don't hang forever. + SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("3")) do + SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("2")) do + SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1")) { } + SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1.1")) { } + end + end + + @dispatcher.start + @worker.start + + wait_for_batches_to_finish_for(2.seconds) + wait_for_jobs_to_finish_for(1.second) + + assert_equal [], JobBuffer.values + assert_equal 4, SolidQueue::Batch.pending.count + end + test "nested batches finish from the inside out" do batch2 = batch3 = batch4 = nil batch1 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("3")) do + SolidQueue::Batch::EmptyJob.perform_later batch2 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("2")) do - batch3 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1")) { } - batch4 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1.1")) { } + SolidQueue::Batch::EmptyJob.perform_later + batch3 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1")) do + SolidQueue::Batch::EmptyJob.perform_later + end + batch4 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1.1")) do + SolidQueue::Batch::EmptyJob.perform_later + end end end @dispatcher.start @worker.start - wait_for_job_batches_to_finish_for(2.seconds) - wait_for_jobs_to_finish_for(2.seconds) + wait_for_batches_to_finish_for(2.seconds) + wait_for_jobs_to_finish_for(1.second) expected_values = [ "1: 1 jobs succeeded!", "1.1: 1 jobs succeeded!", "2: 1 jobs succeeded!", "3: 1 jobs succeeded!" ] assert_equal expected_values.sort, JobBuffer.values.sort @@ -119,7 +145,7 @@ def perform @dispatcher.start @worker.start - wait_for_job_batches_to_finish_for(2.seconds) + wait_for_batches_to_finish_for(2.seconds) assert_equal [ "added from inside 1", "added from inside 2", "added from inside 3", "hey", "ho" ], JobBuffer.values.sort assert_equal 3, SolidQueue::Batch.finished.count @@ -151,8 +177,7 @@ def perform @dispatcher.start @worker.start - wait_for_job_batches_to_finish_for(2.seconds) - wait_for_jobs_to_finish_for(2.seconds) + wait_for_batches_to_finish_for(2.seconds) assert_equal 3, SolidQueue::Batch.finished.count assert_equal 3, SolidQueue::Job.finished.count @@ -172,8 +197,8 @@ def perform @dispatcher.start @worker.start - wait_for_job_batches_to_finish_for(3.seconds) - wait_for_jobs_to_finish_for(3.seconds) + wait_for_batches_to_finish_for(3.seconds) + wait_for_jobs_to_finish_for(1.second) job_batch1 = SolidQueue::Batch.find_by(batch_id: batch1.batch_id) job_batch2 = SolidQueue::Batch.find_by(batch_id: batch2.batch_id) @@ -208,8 +233,8 @@ def perform @dispatcher.start @worker.start - wait_for_job_batches_to_finish_for(3.seconds) - wait_for_jobs_to_finish_for(3.seconds) + wait_for_batches_to_finish_for(3.seconds) + wait_for_jobs_to_finish_for(1.second) job_batch1 = SolidQueue::Batch.find_by(batch_id: batch1.batch_id) job_batch2 = SolidQueue::Batch.find_by(batch_id: batch2.batch_id) @@ -245,7 +270,7 @@ def perform @dispatcher.start @worker.start - wait_for_job_batches_to_finish_for(2.seconds) + wait_for_batches_to_finish_for(2.seconds) wait_for_jobs_to_finish_for(2.seconds) assert_equal true, batch1.reload.finished? @@ -265,8 +290,8 @@ def perform @dispatcher.start @worker.start - wait_for_job_batches_to_finish_for(2.seconds) - wait_for_jobs_to_finish_for(2.seconds) + wait_for_batches_to_finish_for(2.seconds) + wait_for_jobs_to_finish_for(1.second) assert_equal [ "Hi finish #{batch.batch_id}!", "Hi success #{batch.batch_id}!", "hey" ].sort, JobBuffer.values.sort assert_equal 1, batch.reload.completed_jobs @@ -275,6 +300,32 @@ def perform assert_equal 1, batch.total_jobs end + # test "batch finishes without firing callbacks if no jobs enqueue properly" do + # ApplicationJob.enqueue_after_transaction_commit = true + # batch1 = nil + # JobResult.transaction do + # batch1 = SolidQueue::Batch.enqueue( + # metadata: { source: "test", priority: "high", user_id: 123 }, + # on_finish: OnFinishJob, + # on_success: OnSuccessJob, + # on_failure: OnFailureJob + # ) do + # AddToBufferJob.perform_later "hey" + # end + + # raise ActiveRecord::Rollback + # end + + # assert_equal [].sort, JobBuffer.values.sort + # assert_equal 0, batch1.reload.completed_jobs + # assert_equal 0, batch1.failed_jobs + # assert_equal 0, batch1.pending_jobs + # assert_equal 0, batch1.total_jobs + # batch1.reload + # assert_equal true, batch1.finished? + # assert_equal "completed", batch1.status + # end + class OnFinishJob < ApplicationJob queue_as :background diff --git a/test/test_helpers/jobs_test_helper.rb b/test/test_helpers/jobs_test_helper.rb index 62913f08..314f531a 100644 --- a/test/test_helpers/jobs_test_helper.rb +++ b/test/test_helpers/jobs_test_helper.rb @@ -17,7 +17,7 @@ def wait_for_jobs_to_be_released_for(timeout = 1.second) end end - def wait_for_job_batches_to_finish_for(timeout = 1.second) + def wait_for_batches_to_finish_for(timeout = 1.second) wait_while_with_timeout(timeout) do skip_active_record_query_cache do SolidQueue::Batch.where(finished_at: nil).any? From af0c583683badb46d1cc79e407034e8632133a54 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Tue, 9 Sep 2025 01:13:26 +0200 Subject: [PATCH 17/38] Test updates --- test/integration/batch_lifecycle_test.rb | 65 ++++++++++-------------- 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/test/integration/batch_lifecycle_test.rb b/test/integration/batch_lifecycle_test.rb index cf8a567d..1b321a77 100644 --- a/test/integration/batch_lifecycle_test.rb +++ b/test/integration/batch_lifecycle_test.rb @@ -7,10 +7,14 @@ class BatchLifecycleTest < ActiveSupport::TestCase def assert_finished_in_order(*finishables) finishables.each_cons(2) do |finished1, finished2| - assert_equal finished1.reload.finished_at < finished2.reload.finished_at, true + assert_equal finished1.finished_at < finished2.finished_at, true end end + def job!(active_job) + SolidQueue::Job.find_by!(active_job_id: active_job.job_id) + end + setup do @_on_thread_error = SolidQueue.on_thread_error SolidQueue.on_thread_error = silent_on_thread_error_for([ FailingJobError ], @_on_thread_error) @@ -128,17 +132,18 @@ def perform expected_values = [ "1: 1 jobs succeeded!", "1.1: 1 jobs succeeded!", "2: 1 jobs succeeded!", "3: 1 jobs succeeded!" ] assert_equal expected_values.sort, JobBuffer.values.sort assert_equal 4, SolidQueue::Batch.finished.count - assert_finished_in_order(batch4, batch2, batch1) - assert_finished_in_order(batch3, batch2, batch1) + assert_finished_in_order(batch4.reload, batch2.reload, batch1.reload) + assert_finished_in_order(batch3.reload, batch2, batch1) end test "all jobs are run, including jobs enqueued inside of other jobs" do batch2 = nil + job1 = job2 = job3 = nil batch1 = SolidQueue::Batch.enqueue do - AddToBufferJob.perform_later "hey" + job1 = AddToBufferJob.perform_later "hey" batch2 = SolidQueue::Batch.enqueue do - AddToBufferJob.perform_later "ho" - AddsMoreJobsJob.perform_later + job2 = AddToBufferJob.perform_later "ho" + job3 = AddsMoreJobsJob.perform_later end end @@ -149,7 +154,10 @@ def perform assert_equal [ "added from inside 1", "added from inside 2", "added from inside 3", "hey", "ho" ], JobBuffer.values.sort assert_equal 3, SolidQueue::Batch.finished.count - assert_finished_in_order(batch2, batch1) + assert_finished_in_order(batch2.reload, batch1.reload) + assert_finished_in_order(job!(job3), batch2) + assert_finished_in_order(job!(job2), batch2) + assert_finished_in_order(job!(job1), batch1) end test "when self.enqueue_after_transaction_commit = true" do @@ -157,17 +165,18 @@ def perform ApplicationJob.enqueue_after_transaction_commit = true batch1 = batch2 = batch3 = nil + job1 = job2 = job3 = nil JobResult.transaction do JobResult.create!(queue_name: "default", status: "") batch1 = SolidQueue::Batch.enqueue do - AddToBufferJob.perform_later "hey" + job1 = AddToBufferJob.perform_later "hey" JobResult.transaction(requires_new: true) do JobResult.create!(queue_name: "default", status: "") batch2 = SolidQueue::Batch.enqueue do - AddToBufferJob.perform_later "ho" + job2 = AddToBufferJob.perform_later "ho" batch3 = SolidQueue::Batch.enqueue do - AddToBufferJob.perform_later "let's go" + job3 = AddToBufferJob.perform_later "let's go" end end end @@ -179,10 +188,14 @@ def perform wait_for_batches_to_finish_for(2.seconds) + assert_equal [ "hey", "ho", "let's go" ], JobBuffer.values.sort assert_equal 3, SolidQueue::Batch.finished.count assert_equal 3, SolidQueue::Job.finished.count assert_equal 3, SolidQueue::Job.count - assert_finished_in_order(batch3, batch2, batch1) + assert_finished_in_order(batch3.reload, batch2.reload, batch1.reload) + assert_finished_in_order(job!(job3), batch3) + assert_finished_in_order(job!(job2), batch2) + assert_finished_in_order(job!(job1), batch1) end test "failed jobs fire properly" do @@ -218,7 +231,7 @@ def perform assert_equal [ "failed", "failed" ].sort, SolidQueue::Batch.all.pluck(:status) assert_equal [ "0: 1 jobs failed!", "1: 1 jobs failed!" ], JobBuffer.values.sort - assert_finished_in_order(batch2, batch1) + assert_finished_in_order(batch2.reload, batch1.reload) end test "discarded jobs fire properly" do @@ -254,7 +267,7 @@ def perform assert_equal [ "completed", "completed" ].sort, SolidQueue::Batch.all.pluck(:status) assert_equal [ "0: 1 jobs succeeded!", "1: 1 jobs succeeded!" ], JobBuffer.values.sort - assert_finished_in_order(batch2, batch1) + assert_finished_in_order(batch2.reload, batch1.reload) end test "preserve_finished_jobs = false" do @@ -300,32 +313,6 @@ def perform assert_equal 1, batch.total_jobs end - # test "batch finishes without firing callbacks if no jobs enqueue properly" do - # ApplicationJob.enqueue_after_transaction_commit = true - # batch1 = nil - # JobResult.transaction do - # batch1 = SolidQueue::Batch.enqueue( - # metadata: { source: "test", priority: "high", user_id: 123 }, - # on_finish: OnFinishJob, - # on_success: OnSuccessJob, - # on_failure: OnFailureJob - # ) do - # AddToBufferJob.perform_later "hey" - # end - - # raise ActiveRecord::Rollback - # end - - # assert_equal [].sort, JobBuffer.values.sort - # assert_equal 0, batch1.reload.completed_jobs - # assert_equal 0, batch1.failed_jobs - # assert_equal 0, batch1.pending_jobs - # assert_equal 0, batch1.total_jobs - # batch1.reload - # assert_equal true, batch1.finished? - # assert_equal "completed", batch1.status - # end - class OnFinishJob < ApplicationJob queue_as :background From 55abeaf44c463b50fb3ea673837b37236b4fa9b9 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Wed, 10 Sep 2025 01:34:26 +0200 Subject: [PATCH 18/38] Create batch executions alongside ready and scheduled executions * Making it explicit is the easiest option, and the most in alignment with solid queue * Fix errors around upserting across providers. SQLite and Postgres share identical syntax (at least for this use-case) and mysql works differently --- app/models/solid_queue/batch_execution.rb | 25 ++++++++++++++++--- .../solid_queue/execution/batch_preparable.rb | 25 ------------------- app/models/solid_queue/job/batchable.rb | 11 ++++++++ app/models/solid_queue/job/executable.rb | 4 ++- app/models/solid_queue/ready_execution.rb | 2 -- app/models/solid_queue/scheduled_execution.rb | 2 +- test/integration/batch_lifecycle_test.rb | 24 ++++++++++++++++++ 7 files changed, 61 insertions(+), 32 deletions(-) delete mode 100644 app/models/solid_queue/execution/batch_preparable.rb diff --git a/app/models/solid_queue/batch_execution.rb b/app/models/solid_queue/batch_execution.rb index 29c4aabc..9114e763 100644 --- a/app/models/solid_queue/batch_execution.rb +++ b/app/models/solid_queue/batch_execution.rb @@ -18,9 +18,7 @@ def create_all_from_jobs(jobs) total = jobs.size SolidQueue::Batch.upsert( { batch_id:, total_jobs: total, pending_jobs: total }, - on_duplicate: Arel.sql( - "total_jobs = total_jobs + #{total}, pending_jobs = pending_jobs + #{total}" - ) + **provider_upsert_options ) end end @@ -48,6 +46,27 @@ def process_job_completion(job, status) batch = Batch.find_by(batch_id: batch_id) batch&.check_completion! end + + private + + def provider_upsert_options + case connection.adapter_name + when "PostgreSQL", "SQLite" + { + unique_by: :batch_id, + on_duplicate: Arel.sql( + "total_jobs = solid_queue_batches.total_jobs + excluded.total_jobs, " \ + "pending_jobs = solid_queue_batches.pending_jobs + excluded.pending_jobs" + ) + } + else + { + on_duplicate: Arel.sql( + "total_jobs = total_jobs + VALUES(total_jobs), pending_jobs = pending_jobs + VALUES(pending_jobs)" + ) + } + end + end end end end diff --git a/app/models/solid_queue/execution/batch_preparable.rb b/app/models/solid_queue/execution/batch_preparable.rb deleted file mode 100644 index e1df26b0..00000000 --- a/app/models/solid_queue/execution/batch_preparable.rb +++ /dev/null @@ -1,25 +0,0 @@ -# frozen_string_literal: true - -module SolidQueue - class Execution - module BatchPreparable - extend ActiveSupport::Concern - - included do - after_create :create_batch_execution - end - - def create_batch_execution - BatchExecution.create_all_from_jobs([ job ]) - end - - class_methods do - def create_all_from_jobs(jobs) - super.tap do - BatchExecution.create_all_from_jobs(jobs) - end - end - end - end - end -end diff --git a/app/models/solid_queue/job/batchable.rb b/app/models/solid_queue/job/batchable.rb index e7ab5a0e..a81870e6 100644 --- a/app/models/solid_queue/job/batchable.rb +++ b/app/models/solid_queue/job/batchable.rb @@ -9,10 +9,21 @@ module Batchable belongs_to :batch, foreign_key: :batch_id, primary_key: :batch_id, class_name: "SolidQueue::Batch", optional: true has_one :batch_execution, foreign_key: :job_id, dependent: :destroy + after_create :create_batch_execution, if: :batch_id? after_update :update_batch_progress, if: :batch_id? end + class_methods do + def batch_all(jobs) + BatchExecution.create_all_from_jobs(jobs) + end + end + private + def create_batch_execution + BatchExecution.create_all_from_jobs([ self ]) + end + def update_batch_progress return unless saved_change_to_finished_at? && finished_at.present? return unless batch_id.present? diff --git a/app/models/solid_queue/job/executable.rb b/app/models/solid_queue/job/executable.rb index 31d21a00..b56c3e03 100644 --- a/app/models/solid_queue/job/executable.rb +++ b/app/models/solid_queue/job/executable.rb @@ -19,7 +19,9 @@ module Executable class_methods do def prepare_all_for_execution(jobs) due, not_yet_due = jobs.partition(&:due?) - dispatch_all(due) + schedule_all(not_yet_due) + (dispatch_all(due) + schedule_all(not_yet_due)).tap do |jobs| + batch_all(jobs.select { |job| job.batch_id.present? }) + end end def dispatch_all(jobs) diff --git a/app/models/solid_queue/ready_execution.rb b/app/models/solid_queue/ready_execution.rb index 11748e6a..35a11292 100644 --- a/app/models/solid_queue/ready_execution.rb +++ b/app/models/solid_queue/ready_execution.rb @@ -2,8 +2,6 @@ module SolidQueue class ReadyExecution < Execution - include BatchPreparable - scope :queued_as, ->(queue_name) { where(queue_name: queue_name) } assumes_attributes_from_job diff --git a/app/models/solid_queue/scheduled_execution.rb b/app/models/solid_queue/scheduled_execution.rb index c6d82951..f2159422 100644 --- a/app/models/solid_queue/scheduled_execution.rb +++ b/app/models/solid_queue/scheduled_execution.rb @@ -2,7 +2,7 @@ module SolidQueue class ScheduledExecution < Execution - include Dispatching, BatchPreparable + include Dispatching scope :due, -> { where(scheduled_at: ..Time.current) } scope :ordered, -> { order(scheduled_at: :asc, priority: :asc, job_id: :asc) } diff --git a/test/integration/batch_lifecycle_test.rb b/test/integration/batch_lifecycle_test.rb index 1b321a77..ee99a475 100644 --- a/test/integration/batch_lifecycle_test.rb +++ b/test/integration/batch_lifecycle_test.rb @@ -234,6 +234,30 @@ def perform assert_finished_in_order(batch2.reload, batch1.reload) end + test "executes the same with perform_all_later as it does a normal enqueue" do + batch2 = nil + batch1 = SolidQueue::Batch.enqueue do + ActiveJob.perform_all_later([ FailingJob.new, FailingJob.new ]) + batch2 = SolidQueue::Batch.enqueue do + ActiveJob.perform_all_later([ AddToBufferJob.new("ok"), AddToBufferJob.new("ok2") ]) + end + end + + @dispatcher.start + @worker.start + + wait_for_batches_to_finish_for(3.seconds) + wait_for_jobs_to_finish_for(1.second) + + assert_equal 6, batch1.reload.jobs.count + assert_equal 6, batch1.total_jobs + assert_equal 2, SolidQueue::Batch.finished.count + assert_equal "failed", batch1.status + assert_equal 2, batch2.reload.jobs.count + assert_equal 2, batch2.total_jobs + assert_equal "completed", batch2.status + end + test "discarded jobs fire properly" do batch2 = nil batch1 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("0")) do From 3e243580e3237cb725b7e3be775b76f80b7230d1 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Wed, 10 Sep 2025 10:02:56 +0200 Subject: [PATCH 19/38] Leftover from previous implementation --- test/dummy/db/queue_schema.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/test/dummy/db/queue_schema.rb b/test/dummy/db/queue_schema.rb index 9b92ddf7..283acedc 100644 --- a/test/dummy/db/queue_schema.rb +++ b/test/dummy/db/queue_schema.rb @@ -76,7 +76,6 @@ t.datetime "created_at", null: false t.datetime "updated_at", null: false t.string "batch_id" - t.datetime "batch_processed_at" t.index ["active_job_id"], name: "index_solid_queue_jobs_on_active_job_id" t.index ["batch_id"], name: "index_solid_queue_jobs_on_batch_id" t.index ["class_name"], name: "index_solid_queue_jobs_on_class_name" From 0a8598a637eb0b224000479546622e6bc7cb3754 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Fri, 12 Sep 2025 00:46:21 +0200 Subject: [PATCH 20/38] Move batch completion checks to job * Reduce load from each callback, and makes checks less susceptible to race conditions * Make sure monitor jobs can run, even absent of an ApplicationJob * Allow setting the queue on the maintenance jobs * Bring back emptyjob for empty queues --- README.md | 16 ++++++ app/jobs/solid_queue/batch/empty_job.rb | 4 +- app/jobs/solid_queue/batch_monitor_job.rb | 38 +++++++++++++ app/models/solid_queue/batch.rb | 20 +++++++ app/models/solid_queue/batch_execution.rb | 3 - test/integration/batch_lifecycle_test.rb | 67 ++++++++--------------- 6 files changed, 98 insertions(+), 50 deletions(-) create mode 100644 app/jobs/solid_queue/batch_monitor_job.rb diff --git a/README.md b/README.md index ed1bfdc7..99723697 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ Solid Queue can be used with SQL databases such as MySQL, PostgreSQL, or SQLite, - [Performance considerations](#performance-considerations) - [Failed jobs and retries](#failed-jobs-and-retries) - [Error reporting on jobs](#error-reporting-on-jobs) +- [Batch jobs](#batch-jobs) - [Puma plugin](#puma-plugin) - [Jobs and transactional integrity](#jobs-and-transactional-integrity) - [Recurring tasks](#recurring-tasks) @@ -650,6 +651,21 @@ SolidQueue::Batch.enqueue( end ``` +### Batch options + +As part of the processing of a batch, some jobs are automatically enqueued: + +- A `SolidQueue::Batch::BatchMonitorJob` is enqueued for every `Batch` being processed +- In the case of an empty batch, a `SolidQueue::Batch::EmptyJob` is enqueued + +By default, these jobs run on the `default` queue. You can specify an alternative queue for them in an initializer: + +```rb +Rails.application.config.after_initialize do # or to_prepare + SolidQueue::Batch.maintenance_queue_name = "my_batch_queue" +end +``` + ## Puma plugin We provide a Puma plugin if you want to run the Solid Queue's supervisor together with Puma and have Puma monitor and manage it. You just need to add diff --git a/app/jobs/solid_queue/batch/empty_job.rb b/app/jobs/solid_queue/batch/empty_job.rb index f457eabe..d29e1ad0 100644 --- a/app/jobs/solid_queue/batch/empty_job.rb +++ b/app/jobs/solid_queue/batch/empty_job.rb @@ -2,9 +2,7 @@ module SolidQueue class Batch - class EmptyJob < ApplicationJob - queue_as :background - + class EmptyJob < (defined?(ApplicationJob) ? ApplicationJob : ActiveJob::Base) def perform # This job does nothing - it just exists to trigger batch completion # The batch completion will be handled by the normal job_finished! flow diff --git a/app/jobs/solid_queue/batch_monitor_job.rb b/app/jobs/solid_queue/batch_monitor_job.rb new file mode 100644 index 00000000..e4b2e770 --- /dev/null +++ b/app/jobs/solid_queue/batch_monitor_job.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module SolidQueue + class BatchMonitorJob < (defined?(ApplicationJob) ? ApplicationJob : ActiveJob::Base) + POLLING_INTERVAL = 1.seconds + + def perform(batch_id:) + batch = Batch.find_by(batch_id: batch_id) + return unless batch + + return if batch.finished? + + loop do + batch.reload + + break if batch.finished? + + if check_completion?(batch) + batch.check_completion! + break if batch.reload.finished? + end + + sleep(POLLING_INTERVAL) + end + rescue => e + Rails.logger.error "[SolidQueue] BatchMonitorJob error for batch #{batch_id}: #{e.message}" + # Only re-enqueue on error, with a delay + self.class.set(wait: 30.seconds).perform_later(batch_id: batch_id) + end + + private + + def check_completion?(batch) + has_incomplete_children = batch.child_batches.where(finished_at: nil).exists? + !has_incomplete_children && batch.pending_jobs <= 0 && batch.total_jobs > 0 + end + end +end diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb index c5b64637..4733416e 100644 --- a/app/models/solid_queue/batch.rb +++ b/app/models/solid_queue/batch.rb @@ -21,6 +21,9 @@ class Batch < Record after_initialize :set_batch_id before_create :set_parent_batch_id + mattr_accessor :maintenance_queue_name + self.maintenance_queue_name = "default" + def enqueue(&block) raise "You cannot enqueue a batch that is already finished" if finished? @@ -29,6 +32,11 @@ def enqueue(&block) Batch.wrap_in_batch_context(batch_id) do block.call(self) end + + ActiveRecord.after_all_transactions_commit do + enqueue_empty_job if reload.total_jobs == 0 + enqueue_monitor_job + end end def on_success=(value) @@ -121,6 +129,18 @@ def check_parent_completion! end end + def enqueue_empty_job + Batch.wrap_in_batch_context(batch_id) do + EmptyJob.set(queue: self.class.maintenance_queue_name || "default").perform_later + end + end + + def enqueue_monitor_job + Batch.wrap_in_batch_context(nil) do + BatchMonitorJob.set(queue: self.class.maintenance_queue_name || "default").perform_later(batch_id: batch_id) + end + end + class << self def enqueue(on_success: nil, on_failure: nil, on_finish: nil, metadata: nil, &block) new.tap do |batch| diff --git a/app/models/solid_queue/batch_execution.rb b/app/models/solid_queue/batch_execution.rb index 9114e763..951c0f29 100644 --- a/app/models/solid_queue/batch_execution.rb +++ b/app/models/solid_queue/batch_execution.rb @@ -42,9 +42,6 @@ def process_job_completion(job, status) ) end end - - batch = Batch.find_by(batch_id: batch_id) - batch&.check_completion! end private diff --git a/test/integration/batch_lifecycle_test.rb b/test/integration/batch_lifecycle_test.rb index ee99a475..fe5df63f 100644 --- a/test/integration/batch_lifecycle_test.rb +++ b/test/integration/batch_lifecycle_test.rb @@ -5,21 +5,12 @@ class BatchLifecycleTest < ActiveSupport::TestCase FailingJobError = Class.new(RuntimeError) - def assert_finished_in_order(*finishables) - finishables.each_cons(2) do |finished1, finished2| - assert_equal finished1.finished_at < finished2.finished_at, true - end - end - - def job!(active_job) - SolidQueue::Job.find_by!(active_job_id: active_job.job_id) - end - setup do @_on_thread_error = SolidQueue.on_thread_error SolidQueue.on_thread_error = silent_on_thread_error_for([ FailingJobError ], @_on_thread_error) @worker = SolidQueue::Worker.new(queues: "background", threads: 3) @dispatcher = SolidQueue::Dispatcher.new(batch_size: 10, polling_interval: 0.2) + SolidQueue::Batch.maintenance_queue_name = "background" end teardown do @@ -34,6 +25,7 @@ def job!(active_job) ApplicationJob.enqueue_after_transaction_commit = false if defined?(ApplicationJob.enqueue_after_transaction_commit) SolidQueue.preserve_finished_jobs = true + SolidQueue::Batch.maintenance_queue_name = nil end class BatchOnSuccessJob < ApplicationJob @@ -86,40 +78,12 @@ def perform end end - test "empty batches never finish" do - # `enqueue_after_transaction_commit` makes it difficult to tell if a batch is empty, or if the - # jobs are waiting to be run after commit. - # If we could tell deterministically, we could enqueue an EmptyJob to make sure the batches - # don't hang forever. - SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("3")) do - SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("2")) do - SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1")) { } - SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1.1")) { } - end - end - - @dispatcher.start - @worker.start - - wait_for_batches_to_finish_for(2.seconds) - wait_for_jobs_to_finish_for(1.second) - - assert_equal [], JobBuffer.values - assert_equal 4, SolidQueue::Batch.pending.count - end - test "nested batches finish from the inside out" do batch2 = batch3 = batch4 = nil batch1 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("3")) do - SolidQueue::Batch::EmptyJob.perform_later batch2 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("2")) do - SolidQueue::Batch::EmptyJob.perform_later - batch3 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1")) do - SolidQueue::Batch::EmptyJob.perform_later - end - batch4 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1.1")) do - SolidQueue::Batch::EmptyJob.perform_later - end + batch3 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1")) { } + batch4 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1.1")) { } end end @@ -188,10 +152,11 @@ def perform wait_for_batches_to_finish_for(2.seconds) + jobs = batch_jobs(batch1, batch2, batch3) assert_equal [ "hey", "ho", "let's go" ], JobBuffer.values.sort assert_equal 3, SolidQueue::Batch.finished.count - assert_equal 3, SolidQueue::Job.finished.count - assert_equal 3, SolidQueue::Job.count + assert_equal 3, jobs.finished.count + assert_equal 3, jobs.count assert_finished_in_order(batch3.reload, batch2.reload, batch1.reload) assert_finished_in_order(job!(job3), batch3) assert_finished_in_order(job!(job2), batch2) @@ -301,8 +266,8 @@ def perform end assert_equal false, batch1.reload.finished? - assert_equal 1, SolidQueue::Job.count - assert_equal 0, SolidQueue::Job.finished.count + assert_equal 1, batch1.jobs.count + assert_equal 0, batch1.jobs.finished.count @dispatcher.start @worker.start @@ -360,4 +325,18 @@ def perform(batch) JobBuffer.add "Hi failure #{batch.batch_id}!" end end + + def assert_finished_in_order(*finishables) + finishables.each_cons(2) do |finished1, finished2| + assert_equal finished1.finished_at < finished2.finished_at, true + end + end + + def job!(active_job) + SolidQueue::Job.find_by!(active_job_id: active_job.job_id) + end + + def batch_jobs(*batches) + SolidQueue::Job.where(batch_id: batches.map(&:batch_id)) + end end From 0761fd2b1a04a2c7292fe48b165271ab93072bc6 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Fri, 12 Sep 2025 01:02:02 +0200 Subject: [PATCH 21/38] Support rails versions that don't have after_all_transactions_commit --- app/models/solid_queue/batch.rb | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb index 4733416e..e0672c1f 100644 --- a/app/models/solid_queue/batch.rb +++ b/app/models/solid_queue/batch.rb @@ -33,9 +33,12 @@ def enqueue(&block) block.call(self) end - ActiveRecord.after_all_transactions_commit do - enqueue_empty_job if reload.total_jobs == 0 - enqueue_monitor_job + if ActiveSupport.respond_to?(:after_all_transactions_commit) + ActiveRecord.after_all_transactions_commit do + start_monitoring + end + else + start_monitoring end end @@ -141,6 +144,11 @@ def enqueue_monitor_job end end + def start_monitoring + enqueue_empty_job if reload.total_jobs == 0 + enqueue_monitor_job + end + class << self def enqueue(on_success: nil, on_failure: nil, on_finish: nil, metadata: nil, &block) new.tap do |batch| From 64c3dda609c3432024b9a255461bbaea1d6c13ae Mon Sep 17 00:00:00 2001 From: JP Camara Date: Sun, 14 Sep 2025 00:38:46 +0200 Subject: [PATCH 22/38] Remove support for nested batches for now * We still track it, but it was causing alot of race conditions while trying to keep exclusively in callbacks. Running in a job or worker/dispatcher it works easily, but adds more overhead to the code and processing * Move to explicit timestamp fields instead of status fields so it's easier to track specifics of batch transitions * Move batches lower in the schema, after current models --- CLAUDE.md | 729 ++++++++++++++++++ app/jobs/solid_queue/batch/cleanup_job.rb | 4 +- app/jobs/solid_queue/batch_monitor_job.rb | 38 - app/models/solid_queue/batch.rb | 56 +- app/models/solid_queue/batch/trackable.rb | 26 +- app/models/solid_queue/batch_execution.rb | 3 + .../install/templates/db/queue_schema.rb | 53 +- test/dummy/db/queue_schema.rb | 53 +- test/integration/batch_lifecycle_test.rb | 35 +- 9 files changed, 830 insertions(+), 167 deletions(-) create mode 100644 CLAUDE.md delete mode 100644 app/jobs/solid_queue/batch_monitor_job.rb diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..729f180a --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,729 @@ +# Solid Queue Codebase Analysis + +Co-authored-by: Mikael Henriksson + +## Project Overview + +Solid Queue is a database-backed queuing backend for Active Job in Ruby on Rails. It's designed as a modern, performant alternative to Redis-based solutions, leveraging SQL databases for job storage and processing. + +### Key Characteristics +- **Version**: Currently in development on `batch-poc` branch +- **Ruby Version**: >= 3.1.6 +- **Rails Version**: >= 7.2 +- **Database Support**: MySQL 8+, PostgreSQL 9.5+, SQLite +- **License**: MIT + +## Architecture Overview + +### Core Components + +1. **Workers**: Process jobs from queues + - Multi-threaded execution using thread pools + - Configurable polling intervals and batch sizes + - Queue prioritization support + +2. **Dispatchers**: Move scheduled jobs to ready state + - Handle future-scheduled jobs + - Manage concurrency controls + - Perform maintenance tasks + +3. **Scheduler**: Manages recurring tasks + - Cron-like job scheduling + - Supports job classes and eval commands + +4. **Supervisor**: Orchestrates all processes + - Process lifecycle management + - Signal handling (TERM, INT, QUIT) + - Heartbeat monitoring + +## Current Development Focus: Batch Processing + +The `batch-poc` branch implements job batching functionality, allowing: +- Grouping related jobs together +- Tracking collective progress +- Triggering callbacks on batch completion/failure +- Nested batch support with parent-child relationships +- Dynamic job addition within running batches + +### Batch Implementation Architecture + +#### Core Components + +**SolidQueue::Batch** (app/models/solid_queue/batch.rb) +- Primary ActiveRecord model for batch persistence +- Manages batch context using `ActiveSupport::IsolatedExecutionState` +- Handles batch lifecycle and job enqueueing +- Tracks: `total_jobs`, `pending_jobs`, `completed_jobs`, `failed_jobs` +- Manages callback execution and status transitions (pending → processing → completed/failed) +- Handles parent-child batch relationships via `parent_batch_id` +- Serializes callback jobs as JSON for later execution +- Key instance methods: `enqueue`, `check_completion!`, `execute_callbacks` +- Key class methods: `enqueue`, `wrap_in_batch_context`, `current_batch_id` +- Automatically enqueues EmptyJob for empty batches to ensure callbacks fire +- Enqueues BatchMonitorJob for completion monitoring + +**SolidQueue::Batch::Trackable** (app/models/solid_queue/batch/trackable.rb) +- Concern that provides status tracking and query scopes +- Scopes: `pending`, `processing`, `completed`, `failed`, `finished`, `unfinished` +- Helper methods: `finished?`, `processing?`, `pending?`, `progress_percentage` +- Calculates progress based on completed and failed jobs + +**SolidQueue::BatchExecution** (app/models/solid_queue/batch_execution.rb) +- Lightweight tracking record that exists only while a job is pending +- Deleted atomically when job completes to trigger counter updates +- Presence indicates job hasn't been processed yet +- Key class methods: + - `create_all_from_jobs`: Bulk creates executions and updates batch counters + - `process_job_completion`: Handles atomic deletion and counter updates +- Uses database-specific upsert strategies for atomic counter increments + +**SolidQueue::Job::Batchable** (app/models/solid_queue/job/batchable.rb) +- Concern mixed into Job model for batch support +- Creates BatchExecution records after job creation +- Tracks job completion via `after_update` callback +- Fires when `finished_at` is set (jobs being retried, not final failures) +- Handles batch progress updates when jobs complete + +**SolidQueue::Execution::Batchable** (app/models/solid_queue/execution/batchable.rb) +- Concern mixed into Execution model for batch support +- Tracks final job failures via `after_create` callback on FailedExecution +- Only fires when job exhausts all retries +- Updates batch failure counter for permanently failed jobs + +**ActiveJob::BatchId** (lib/active_job/batch_id.rb) +- ActiveJob extension for batch context +- Auto-assigns batch_id from context during job initialization +- Serializes/deserializes batch_id with job data +- Provides `batch` helper method to access current batch +- Only activates for SolidQueue adapter + +**SolidQueue::Batch::CleanupJob** (app/jobs/solid_queue/batch/cleanup_job.rb) +- Internal job for cleaning up finished jobs in a batch +- Respects `preserve_finished_jobs` configuration +- Automatically enqueued after batch completion +- Discards on RecordNotFound to handle already-deleted batches gracefully + +**SolidQueue::Batch::EmptyJob** (app/jobs/solid_queue/batch/empty_job.rb) +- Ensures batch callbacks fire even when no jobs are enqueued +- Does nothing in its perform method - exists solely to trigger completion +- Enables patterns where jobs are conditionally enqueued + +**SolidQueue::BatchMonitorJob** (app/jobs/solid_queue/batch_monitor_job.rb) +- Monitors batch completion status with a 1-second polling interval +- Checks for completion when all child batches are finished and pending_jobs is 0 +- Re-enqueues itself on error with a 30-second delay +- Automatically stops monitoring when batch is finished + +#### Batch Lifecycle + +1. **Creation Phase**: + ```ruby + batch = SolidQueue::Batch.enqueue(on_success: SuccessJob) do |batch| + MyJob.perform_later(arg1) + AnotherJob.perform_later(arg2) + end + ``` + - Creates UUID-identified batch record + - Sets batch context using `ActiveSupport::IsolatedExecutionState` + - Jobs automatically pick up batch_id from context + - Batch persisted before jobs are enqueued + - Parent batch relationship established if nested + +2. **Job Enqueuing**: + - ActiveJob::BatchId mixin captures batch_id during job initialization + - Jobs created with batch_id foreign key + - BatchExecution records created via `after_create` callback + - Batch counters updated atomically using database-specific upserts + - Total and pending job counts incremented together + - BatchMonitorJob automatically enqueued to monitor completion + +3. **Execution Phase**: + - Jobs processed normally by workers + - Job::Batchable `after_update` callback fires when `finished_at` is set + - For retrying jobs: marked as finished, batch gets "completed" update + - For final failures: FailedExecution created, triggers Execution::Batchable callback + - BatchExecution.process_job_completion handles atomic counter updates + +4. **Progress Tracking**: + - BatchExecution deletion happens in transaction with counter updates + - Atomic SQL: `pending_jobs = pending_jobs - 1, completed_jobs = completed_jobs + 1` + - No locking needed for counter updates (atomic SQL operations) + - Status changes from "pending" to "processing" on first job completion + - Real-time progress via `progress_percentage` method + +5. **Completion Detection**: + - `check_completion!` called after each job finishes + - Also monitored by BatchMonitorJob with 1-second polling + - Uses pessimistic locking to prevent race conditions + - Checks: `pending_jobs == 0` AND no unfinished child batches + - Determines final status: "completed" if `failed_jobs == 0`, otherwise "failed" + - Sets `finished_at` timestamp and updates status + - Status transitions from "pending" to "processing" on first job completion + +6. **Callback Execution**: + - Callbacks deserialized and enqueued as regular jobs + - Batch passed as first argument to callback job + - Execution order: on_failure/on_success, then on_finish + - Parent batch completion checked after callbacks + - CleanupJob enqueued if jobs shouldn't be preserved + +#### Batch Callbacks + +**Callback Types:** +- `on_finish`: Always fires when batch completes (success or failure) +- `on_success`: Fires only when all jobs succeed (failed_jobs == 0) +- `on_failure`: Fires on first job failure after all retries exhausted + +**Callback Execution:** +- Callbacks are ActiveJob instances serialized in the database +- Batch passed as first argument: `perform(batch, *original_args)` +- Executed asynchronously after batch completion +- Support for callback chaining in nested batches + +#### Special Features + +**Empty Batch Handling**: +- EmptyJob ensures callbacks fire even with no jobs +- Allows for conditional job enqueueing patterns +- Automatically enqueued when batch.total_jobs == 0 after enqueue block + +**Dynamic Job Addition**: +```ruby +class MyJob < ApplicationJob + def perform + batch.enqueue do # Add more jobs to current batch + AnotherJob.perform_later + end + end +end +``` + +**Nested Batches**: +- Full parent-child relationship tracking +- Children must complete before parent can complete +- Callbacks execute from innermost to outermost + +**Transaction Safety**: +- Full support for `enqueue_after_transaction_commit` +- Handles both synchronous and asynchronous enqueueing modes +- Prevents partial batch creation on rollback + +**Cleanup**: +- CleanupJob removes finished jobs when `preserve_finished_jobs` is false +- Maintains batch records for audit trail + +#### Database Schema + +**solid_queue_batches table:** +- `batch_id`: UUID identifier (unique index) +- `parent_batch_id`: For nested batches (indexed) +- `status`: pending, processing, completed, failed (default: "pending") +- `total_jobs`: Total number of jobs in batch (default: 0) +- `pending_jobs`: Jobs not yet completed (default: 0) +- `completed_jobs`: Successfully completed jobs (default: 0) +- `failed_jobs`: Permanently failed jobs (default: 0) +- `on_finish`: Serialized ActiveJob for finish callback (TEXT) +- `on_success`: Serialized ActiveJob for success callback (TEXT) +- `on_failure`: Serialized ActiveJob for failure callback (TEXT) +- `metadata`: JSON field for custom data (TEXT) +- `finished_at`: Completion timestamp +- `created_at`, `updated_at`: Rails timestamps + +**solid_queue_batch_executions table:** +- `job_id`: Foreign key to jobs table (unique index) +- `batch_id`: UUID reference to batch (STRING) +- `created_at`: Record creation timestamp +- Acts as presence indicator - deleted when job completes + +**solid_queue_jobs table additions:** +- `batch_id`: UUID reference to batch (STRING, indexed) + +## Development Approach + +### Database Schema Management +**IMPORTANT**: This project uses direct schema files (`db/queue_schema.rb`) rather than Rails migrations during development. Changes to the database structure should be made directly in: +- `lib/generators/solid_queue/install/templates/db/queue_schema.rb` - Template schema +- `test/dummy/db/queue_schema.rb` - Test database schema + +The schema is loaded fresh for tests, so schema changes can be made directly without migration files during development. + +## Directory Structure + +``` +solid_queue/ + app/ +  jobs/ # Internal job implementations +   solid_queue/ +   batch_update_job.rb +   recurring_job.rb +  models/ # Core ActiveRecord models +  solid_queue/ +  job.rb # Main job model +  job_batch.rb # Batch tracking +  execution.rb family (claimed, ready, failed, etc.) +  process.rb # Worker/dispatcher processes + lib/ +  active_job/ # ActiveJob integration +   job_batch_id.rb +   queue_adapters/solid_queue_adapter.rb +  solid_queue/ # Core library code +   batch.rb # Batch implementation +   worker.rb # Worker implementation +   dispatcher.rb # Dispatcher implementation +   supervisor.rb # Process supervisor +  generators/ # Rails generator for installation + test/ +  integration/ # Integration tests +   batch_lifecycle_test.rb # Batch-specific tests +  models/ # Model tests +  unit/ # Unit tests + config/ +  routes.rb # Engine routes (if any) +``` + +## Key Design Patterns + +### 1. Polling with FOR UPDATE SKIP LOCKED +- Prevents lock contention between workers +- Ensures efficient job claiming +- Falls back gracefully on older databases + +### 2. Semaphore-based Concurrency Control +- Limits concurrent executions per key +- Supports blocking and discarding strategies +- Configurable duration limits + +### 3. Transactional Job Enqueueing +- Jobs and batch records created atomically +- Support for `enqueue_after_transaction_commit` +- Handles bulk enqueuing efficiently + +### 4. Process Heartbeats +- Regular heartbeat updates from all processes +- Automatic cleanup of dead processes +- Configurable thresholds + +## Testing Approach + +- **73 test files** covering unit, integration, and model tests +- Test dummy Rails application in `test/dummy/` +- Custom test helpers for: + - Process lifecycle testing + - Job execution verification + - Configuration testing +- Uses fixtures for test data + +## Configuration Files + +### Primary Configuration +- `config/queue.yml`: Worker and dispatcher configuration +- `config/recurring.yml`: Scheduled job definitions +- Database config requires separate `queue` database connection + +### Important Settings +- `process_heartbeat_interval`: Default 60 seconds +- `process_alive_threshold`: Default 5 minutes +- `shutdown_timeout`: Default 5 seconds +- `preserve_finished_jobs`: Default true +- `clear_finished_jobs_after`: Default 1 day + +## Development Workflow + +### Current Git Status +- Branch: `batch-poc` +- Modified files indicate active batch development +- Tests being added/modified for batch functionality + +### Key Modified Files +- `app/models/solid_queue/job/batchable.rb`: Job batch integration +- `app/models/solid_queue/execution/batchable.rb`: Execution batch handling +- `lib/solid_queue/batch.rb`: Core batch logic +- `test/integration/batch_lifecycle_test.rb`: Batch testing + +## Performance Considerations + +1. **Database Indexing**: Critical for polling performance + - Index on `(queue_name, priority, job_id)` + - Covering indexes for skip locked queries + +2. **Batch Overhead**: Batching adds transactional overhead + - Avoid mixing with bulk enqueuing + - Consider impact on concurrency controls + +3. **Thread Pool Sizing**: + - Should be less than database connection pool - 2 + - Account for polling and heartbeat connections + +## ActiveJob Layer: Understanding the Division of Responsibilities + +Solid Queue is intentionally designed as a **backend adapter** for ActiveJob, not a complete job processing framework. This architectural decision means many critical features are handled by ActiveJob itself, not Solid Queue. Understanding this layering is crucial for working with the codebase. + +### Features Handled by ActiveJob (NOT in Solid Queue) + +1. **Retry Logic** (`retry_on`) + - ActiveJob manages retry attempts, backoff strategies, and max attempts + - Solid Queue only stores failed executions for manual intervention + - No retry mechanism exists in Solid Queue itself + +2. **Error Handling** (`discard_on`, `rescue_from`) + - ActiveJob decides whether to retry, discard, or handle errors + - Solid Queue just captures and stores the error information + - Custom error handling logic lives in job classes, not the queue + +3. **Callbacks** (`before_enqueue`, `after_perform`, etc.) + - All job lifecycle callbacks are ActiveJob features + - Solid Queue doesn't know about or manage these callbacks + - Exception: Batch callbacks are Solid Queue-specific + +4. **Serialization/Deserialization** + - ActiveJob handles argument serialization (GlobalID, etc.) + - Solid Queue stores the serialized job data as JSON + - Complex argument types are ActiveJob's responsibility + +5. **Job Configuration** + - `queue_as`, `priority`, and other DSL methods are ActiveJob + - Solid Queue reads these values after ActiveJob sets them + - Job class inheritance and configuration is pure ActiveJob + +6. **Timeouts and Deadlines** + - No built-in job timeout mechanism in Solid Queue + - Must be implemented at the job level using ActiveJob patterns + - Process-level timeouts handled via signals only + +### Features Solid Queue DOES Provide + +1. **Storage and Retrieval** + - Database schema for jobs and executions + - Efficient polling with `FOR UPDATE SKIP LOCKED` + - Transaction-safe job claiming + +2. **Process Management** + - Worker processes with thread pools + - Dispatcher for scheduled jobs + - Supervisor for process lifecycle + +3. **Concurrency Controls** (Extended ActiveJob) + - Semaphore-based limiting + - Blocking/discarding on conflicts + - Duration-based expiry + +4. **Batch Processing** (Extended ActiveJob) + - Job grouping and tracking + - Batch-specific callbacks + - Progress monitoring + +5. **Recurring Jobs** + - Cron-like scheduling + - Separate from ActiveJob's scheduling + +### The Adapter Pattern + +The `SolidQueueAdapter` is minimal by design: +```ruby +class SolidQueueAdapter + def enqueue(active_job) + SolidQueue::Job.enqueue(active_job) + end + + def enqueue_at(active_job, timestamp) + SolidQueue::Job.enqueue(active_job, scheduled_at: Time.at(timestamp)) + end +end +``` + +This thin adapter means: +- Solid Queue doesn't parse job classes +- It doesn't understand job arguments beyond storage +- It doesn't execute business logic, only job invocation + +### Implications for Development + +When working with Solid Queue: + +1. **Don't look for retry logic here** - It's in ActiveJob +2. **Don't implement job-level features** - Use ActiveJob patterns +3. **Focus on infrastructure** - Storage, retrieval, process management +4. **Extend via ActiveJob** - Custom job classes, not queue modifications +5. **Batch features are special** - One of the few job-level features in Solid Queue + +### Error Flow Example + +1. Job raises exception during `perform` +2. ActiveJob's `retry_on` catches it +3. ActiveJob decides: retry now, retry later, or discard +4. If retrying later: ActiveJob calls `enqueue_at` on adapter +5. Solid Queue stores the job with new scheduled time +6. If final failure: Solid Queue creates `failed_execution` record +7. Manual intervention needed via `failed_execution.retry` + +### CRITICAL: Job Retry Behavior in Solid Queue + +**Each retry attempt creates a new job with a new job_id but same active_job_id** + +When a job fails and will be retried: +1. The current job is marked as `finished` (finished_at is set) +2. A new job is created for the retry with a new job_id +3. The jobs share the same active_job_id + +When a job exhausts all retries (final failure): +1. The final job is NOT marked as finished (finished_at remains nil) +2. A FailedExecution record is created +3. No new job is created + +**Example with 3 retry attempts:** +- Job 1 (id: 100) fails → marked as finished → Job 2 created +- Job 2 (id: 101) fails → marked as finished → Job 3 created +- Job 3 (id: 102) fails → NOT marked as finished → FailedExecution created + +This means: +- Jobs that are retried DO have finished_at set +- Jobs with FailedExecutions do NOT have finished_at set +- The Job::Batchable callback fires for jobs being retried (they're "finished") +- The Execution::Batchable callback fires for final failures (FailedExecution created) + +This separation keeps Solid Queue focused on being a robust, database-backed storage and execution engine while ActiveJob handles the higher-level job processing semantics. + +## Integration Points + +### Rails Integration +- Engine-based architecture +- Automatic configuration in Rails 8 +- Generator for easy setup + +### Database Adapters +- Adapter-specific optimizations +- Automatic skip locked detection +- Connection pool management + +## Security Considerations + +- No secrets/credentials in job arguments +- Careful with eval in recurring tasks +- Database permissions for queue database + +## Common Patterns for Extension + +### Adding New Job Types +1. Inherit from `ApplicationJob` +2. Use batch context for batch jobs +3. Implement proper error handling + +### Custom Callbacks +1. Use lifecycle hooks for process events +2. Implement batch callbacks for completion logic +3. Consider transactional boundaries + +### Performance Monitoring +1. Hook into instrumentation API +2. Monitor heartbeat intervals +3. Track queue depths and processing times + +## Debugging Tips + +1. **Check heartbeats**: Ensure processes are alive +2. **Review failed_executions**: Inspect error details +3. **Monitor semaphores**: Check for concurrency blocks +4. **Batch status**: Use `JobBatch` model to track progress +5. **Enable query logs**: Set `silence_polling: false` + +## Known Limitations + +1. Phased restarts not supported with Puma plugin +2. Queue order not preserved in concurrency unblocking +3. Batch callbacks execute asynchronously +4. No automatic retry mechanism (relies on ActiveJob) + +## Future Considerations + +- Batch lifecycle improvements in progress +- Potential for distributed locking mechanisms +- Enhanced monitoring and metrics +- Dashboard UI integration improvements + +## Code Style and Conventions + +### Model Structure +- **Base Class**: All models inherit from `SolidQueue::Record` (not directly from `ActiveRecord::Base`) +- **Concerns**: Extract shared behavior into concerns under `app/models/solid_queue/{model}/` +- **Associations**: Define clear relationships with foreign keys and dependent options +- **Scopes**: Use descriptive names, chain simple scopes for complex queries + +### Naming Conventions +```ruby +# Classes +class SolidQueue::ReadyExecution < Execution # Descriptive, namespaced + +# Methods +def dispatch_batch # Action verb for operations +def finished? # Predicate with ? +def finish! # Bang for state changes +def with_lock # Preposition for context methods +def after_commit_on_finish # Lifecycle callbacks clearly named + +# Constants +DEFAULT_BATCH_SIZE = 500 # SCREAMING_SNAKE_CASE +STATUSES = %w[ pending processing completed failed ] # Arrays for enums +``` + +### Database Operations + +#### Transaction Patterns +```ruby +# Always wrap multi-step operations +transaction do + job = create!(job_attributes) + job.prepare_for_execution + job +end + +# Use with_lock for pessimistic locking +batch_record.with_lock do + batch_record.update!(pending_jobs: batch_record.pending_jobs - 1) + batch_record.check_completion! +end +``` + +#### Bulk Operations +```ruby +# Prefer insert_all for bulk creates +insert_all(execution_rows, returning: %w[ id job_id ]) + +# Use update_all for batch updates +where(id: job_ids).update_all(finished_at: Time.current) + +# Chain scopes for complex queries +ready.by_priority.limit(batch_size) +``` + +#### SQL Safety +```ruby +# Parameterized queries +where("scheduled_at <= ?", Time.current) + +# Arel for complex SQL +lock(Arel.sql("FOR UPDATE SKIP LOCKED")) + +# Avoid string interpolation +# BAD: where("status = '#{status}'") +# GOOD: where(status: status) +``` + +### Concern Organization + +```ruby +module SolidQueue + module Job + module Batchable + extend ActiveSupport::Concern + + included do + # Associations + belongs_to :batch_record, optional: true + + # Callbacks + after_update :track_batch_progress, if: :batch_id? + + # Scopes + scope :in_batch, ->(batch_id) { where(batch_id: batch_id) } + end + + class_methods do + # Class-level functionality + end + + # Instance methods grouped by purpose + private + def track_batch_progress + # Implementation + end + end + end +end +``` + +### Callback Patterns + +```ruby +# Use conditional callbacks +after_create :dispatch, if: :ready? +after_destroy :unblock_next, if: -> { concurrency_limited? && ready? } + +# Separate callback methods +private + def dispatch + ReadyExecution.create_from_job(self) + end +``` + +### Error Handling + +```ruby +# Custom exceptions with context +class BatchCompletionError < StandardError + attr_reader :batch_id + + def initialize(batch_id, message) + @batch_id = batch_id + super("Batch #{batch_id}: #{message}") + end +end + +# Wrap and re-raise with context +rescue ActiveRecord::RecordNotUnique => e + raise EnqueueError.new("Duplicate job: #{e.message}").tap { |error| + error.set_backtrace(e.backtrace) + } +end + +# Silent rescue for non-critical operations +def optional_cleanup + # cleanup code +rescue => e + Rails.logger.error "[SolidQueue] Cleanup failed: #{e.message}" +end +``` + +### Instrumentation + +```ruby +# Always instrument important operations +SolidQueue.instrument(:batch_update, batch_id: batch_id) do |payload| + result = perform_update + payload[:jobs_updated] = result.count + result +end +``` + +### Testing Patterns + +```ruby +# Use transactional tests sparingly +self.use_transactional_tests = false # For integration tests + +# Custom assertions +def assert_batch_completed(batch_id) + batch = SolidQueue::BatchRecord.find(batch_id) + assert_equal "completed", batch.status + assert_equal 0, batch.pending_jobs +end + +# Wait helpers for async operations +wait_for_jobs_to_finish_for(2.seconds) +``` + +### Key Principles + +1. **Composition over Inheritance**: Use concerns for shared behavior +2. **Fail Fast**: Validate early, use bang methods for critical operations +3. **Idempotency**: Design operations to be safely retryable +4. **Instrumentation**: Measure everything important +5. **Clear Boundaries**: Models handle persistence, jobs handle business logic +6. **Defensive Coding**: Handle nil cases, use safe navigation (`&.`) +7. **Explicit over Implicit**: Clear method names over clever shortcuts +8. **Transaction Safety**: Always consider rollback scenarios +9. **Performance First**: Use bulk operations, avoid N+1 queries +10. **Rails Conventions**: Follow Rails patterns unless there's a good reason not to + +### IMPORTANT + +- Always utilize ActiveSupport::IsolatedExecutionState instead of Thread.current \ No newline at end of file diff --git a/app/jobs/solid_queue/batch/cleanup_job.rb b/app/jobs/solid_queue/batch/cleanup_job.rb index eb381908..3e697962 100644 --- a/app/jobs/solid_queue/batch/cleanup_job.rb +++ b/app/jobs/solid_queue/batch/cleanup_job.rb @@ -2,9 +2,7 @@ module SolidQueue class Batch - class CleanupJob < ApplicationJob - queue_as :background - + class CleanupJob < (defined?(ApplicationJob) ? ApplicationJob : ActiveJob::Base) discard_on ActiveRecord::RecordNotFound def perform(job_batch) diff --git a/app/jobs/solid_queue/batch_monitor_job.rb b/app/jobs/solid_queue/batch_monitor_job.rb deleted file mode 100644 index e4b2e770..00000000 --- a/app/jobs/solid_queue/batch_monitor_job.rb +++ /dev/null @@ -1,38 +0,0 @@ -# frozen_string_literal: true - -module SolidQueue - class BatchMonitorJob < (defined?(ApplicationJob) ? ApplicationJob : ActiveJob::Base) - POLLING_INTERVAL = 1.seconds - - def perform(batch_id:) - batch = Batch.find_by(batch_id: batch_id) - return unless batch - - return if batch.finished? - - loop do - batch.reload - - break if batch.finished? - - if check_completion?(batch) - batch.check_completion! - break if batch.reload.finished? - end - - sleep(POLLING_INTERVAL) - end - rescue => e - Rails.logger.error "[SolidQueue] BatchMonitorJob error for batch #{batch_id}: #{e.message}" - # Only re-enqueue on error, with a delay - self.class.set(wait: 30.seconds).perform_later(batch_id: batch_id) - end - - private - - def check_completion?(batch) - has_incomplete_children = batch.child_batches.where(finished_at: nil).exists? - !has_incomplete_children && batch.pending_jobs <= 0 && batch.total_jobs > 0 - end - end -end diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb index e0672c1f..093ee32f 100644 --- a/app/models/solid_queue/batch.rb +++ b/app/models/solid_queue/batch.rb @@ -2,11 +2,9 @@ module SolidQueue class Batch < Record - STATUSES = %w[pending processing completed failed] - include Trackable - belongs_to :parent_batch, foreign_key: :parent_batch_id, class_name: "SolidQueue::Batch", optional: true + belongs_to :parent_batch, foreign_key: :parent_batch_id, primary_key: :batch_id, class_name: "SolidQueue::Batch", optional: true has_many :jobs, foreign_key: :batch_id, primary_key: :batch_id has_many :batch_executions, foreign_key: :batch_id, primary_key: :batch_id, class_name: "SolidQueue::BatchExecution" has_many :child_batches, foreign_key: :parent_batch_id, primary_key: :batch_id, class_name: "SolidQueue::Batch" @@ -16,10 +14,9 @@ class Batch < Record serialize :on_failure, coder: JSON serialize :metadata, coder: JSON - validates :status, inclusion: { in: STATUSES } - after_initialize :set_batch_id before_create :set_parent_batch_id + after_commit :start_batch, on: :create, unless: -> { ActiveRecord.respond_to?(:after_all_transactions_commit) } mattr_accessor :maintenance_queue_name self.maintenance_queue_name = "default" @@ -33,12 +30,10 @@ def enqueue(&block) block.call(self) end - if ActiveSupport.respond_to?(:after_all_transactions_commit) + if ActiveRecord.respond_to?(:after_all_transactions_commit) ActiveRecord.after_all_transactions_commit do - start_monitoring + start_batch end - else - start_monitoring end end @@ -55,22 +50,16 @@ def on_finish=(value) end def check_completion! - return if finished? + return if finished? || !ready? with_lock do - return if finished_at? + return if finished_at? || !ready? if pending_jobs == 0 - unfinished_children = child_batches.where.not(status: %w[completed failed]).count - - if unfinished_children == 0 - new_status = failed_jobs > 0 ? "failed" : "completed" - update!(status: new_status, finished_at: Time.current) - execute_callbacks - end - elsif status == "pending" && (completed_jobs > 0 || failed_jobs > 0) - # Move from pending to processing once any job completes - update!(status: "processing") + finished_attributes = { finished_at: Time.current } + finished_attributes[:failed_at] = Time.current if failed_jobs > 0 + update!(finished_attributes) + execute_callbacks end end end @@ -108,28 +97,19 @@ def perform_completion_job(job_field, attrs) end def execute_callbacks - if status == "failed" + if failed_at? perform_completion_job(:on_failure, {}) if on_failure.present? - elsif status == "completed" + else perform_completion_job(:on_success, {}) if on_success.present? end perform_completion_job(:on_finish, {}) if on_finish.present? clear_unpreserved_jobs - - check_parent_completion! end def clear_unpreserved_jobs - SolidQueue::Batch::CleanupJob.perform_later(self) unless SolidQueue.preserve_finished_jobs? - end - - def check_parent_completion! - if parent_batch_id.present? - parent = Batch.find_by(batch_id: parent_batch_id) - parent&.check_completion! unless parent&.finished? - end + SolidQueue::Batch::CleanupJob.set(queue: self.class.maintenance_queue_name || "default").perform_later(self) unless SolidQueue.preserve_finished_jobs? end def enqueue_empty_job @@ -138,15 +118,9 @@ def enqueue_empty_job end end - def enqueue_monitor_job - Batch.wrap_in_batch_context(nil) do - BatchMonitorJob.set(queue: self.class.maintenance_queue_name || "default").perform_later(batch_id: batch_id) - end - end - - def start_monitoring + def start_batch enqueue_empty_job if reload.total_jobs == 0 - enqueue_monitor_job + update!(enqueued_at: Time.current) end class << self diff --git a/app/models/solid_queue/batch/trackable.rb b/app/models/solid_queue/batch/trackable.rb index 806025f0..20c27f1c 100644 --- a/app/models/solid_queue/batch/trackable.rb +++ b/app/models/solid_queue/batch/trackable.rb @@ -6,24 +6,26 @@ module Trackable extend ActiveSupport::Concern included do - scope :pending, -> { where(status: "pending") } - scope :processing, -> { where(status: "processing") } - scope :completed, -> { where(status: "completed") } - scope :failed, -> { where(status: "failed") } - scope :finished, -> { where(status: %w[completed failed]) } - scope :unfinished, -> { where(status: %w[pending processing]) } + scope :finished, -> { where.not(finished_at: nil) } + scope :succeeded, -> { finished.where(failed_at: nil) } + scope :unfinished, -> { where(finished_at: nil) } + scope :failed, -> { where.not(failed_at: nil) } end - def finished? - status.in?(%w[completed failed]) + def failed? + failed_at.present? + end + + def succeeded? + finished? && !failed? end - def processing? - status == "processing" + def finished? + finished_at.present? end - def pending? - status == "pending" + def ready? + enqueued_at.present? end def progress_percentage diff --git a/app/models/solid_queue/batch_execution.rb b/app/models/solid_queue/batch_execution.rb index 951c0f29..ef448567 100644 --- a/app/models/solid_queue/batch_execution.rb +++ b/app/models/solid_queue/batch_execution.rb @@ -42,6 +42,9 @@ def process_job_completion(job, status) ) end end + + batch = Batch.find_by(batch_id: batch_id) + batch.check_completion! if batch.present? end private diff --git a/lib/generators/solid_queue/install/templates/db/queue_schema.rb b/lib/generators/solid_queue/install/templates/db/queue_schema.rb index 93331a80..bcdbe741 100644 --- a/lib/generators/solid_queue/install/templates/db/queue_schema.rb +++ b/lib/generators/solid_queue/install/templates/db/queue_schema.rb @@ -26,32 +26,6 @@ t.index [ "job_id" ], name: "index_solid_queue_failed_executions_on_job_id", unique: true end - create_table "solid_queue_batches", force: :cascade do |t| - t.string "batch_id", null: false - t.string "parent_batch_id" - t.text "on_finish" - t.text "on_success" - t.text "on_failure" - t.text "metadata" - t.integer "total_jobs", default: 0, null: false - t.integer "pending_jobs", default: 0, null: false - t.integer "completed_jobs", default: 0, null: false - t.integer "failed_jobs", default: 0, null: false - t.string "status", default: "pending", null: false - t.datetime "finished_at" - t.datetime "created_at", null: false - t.datetime "updated_at", null: false - t.index [ "batch_id" ], name: "index_solid_queue_batches_on_batch_id", unique: true - t.index [ "parent_batch_id" ], name: "index_solid_queue_batches_on_parent_batch_id" - end - - create_table "solid_queue_batch_executions", force: :cascade do |t| - t.bigint "job_id", null: false - t.string "batch_id", null: false - t.datetime "created_at", null: false - t.index [ "job_id" ], name: "index_solid_queue_batch_executions_on_job_id", unique: true - end - create_table "solid_queue_jobs", force: :cascade do |t| t.string "queue_name", null: false t.string "class_name", null: false @@ -148,6 +122,33 @@ t.index [ "key" ], name: "index_solid_queue_semaphores_on_key", unique: true end + create_table "solid_queue_batches", force: :cascade do |t| + t.string "batch_id" + t.string "parent_batch_id" + t.text "on_finish" + t.text "on_success" + t.text "on_failure" + t.text "metadata" + t.integer "total_jobs", default: 0, null: false + t.integer "pending_jobs", default: 0, null: false + t.integer "completed_jobs", default: 0, null: false + t.integer "failed_jobs", default: 0, null: false + t.datetime "enqueued_at" + t.datetime "finished_at" + t.datetime "failed_at" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["batch_id"], name: "index_solid_queue_batches_on_batch_id", unique: true + t.index ["parent_batch_id"], name: "index_solid_queue_batches_on_parent_batch_id" + end + + create_table "solid_queue_batch_executions", force: :cascade do |t| + t.bigint "job_id", null: false + t.string "batch_id", null: false + t.datetime "created_at", null: false + t.index [ "job_id" ], name: "index_solid_queue_batch_executions_on_job_id", unique: true + end + add_foreign_key "solid_queue_blocked_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "solid_queue_claimed_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "solid_queue_failed_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade diff --git a/test/dummy/db/queue_schema.rb b/test/dummy/db/queue_schema.rb index 283acedc..70fdb2ff 100644 --- a/test/dummy/db/queue_schema.rb +++ b/test/dummy/db/queue_schema.rb @@ -38,32 +38,6 @@ t.index ["job_id"], name: "index_solid_queue_failed_executions_on_job_id", unique: true end - create_table "solid_queue_batches", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| - t.string "batch_id" - t.string "parent_batch_id" - t.text "on_finish" - t.text "on_success" - t.text "on_failure" - t.text "metadata" - t.integer "total_jobs", default: 0, null: false - t.integer "pending_jobs", default: 0, null: false - t.integer "completed_jobs", default: 0, null: false - t.integer "failed_jobs", default: 0, null: false - t.string "status", default: "pending", null: false - t.datetime "finished_at" - t.datetime "created_at", null: false - t.datetime "updated_at", null: false - t.index ["batch_id"], name: "index_solid_queue_batches_on_batch_id", unique: true - t.index ["parent_batch_id"], name: "index_solid_queue_batches_on_parent_batch_id" - end - - create_table "solid_queue_batch_executions", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| - t.bigint "job_id", null: false - t.string "batch_id", null: false - t.datetime "created_at", null: false - t.index ["job_id"], name: "index_solid_queue_batch_executions_on_job_id", unique: true - end - create_table "solid_queue_jobs", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| t.string "queue_name", null: false t.string "class_name", null: false @@ -160,6 +134,33 @@ t.index ["key"], name: "index_solid_queue_semaphores_on_key", unique: true end + create_table "solid_queue_batches", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| + t.string "batch_id" + t.string "parent_batch_id" + t.text "on_finish" + t.text "on_success" + t.text "on_failure" + t.text "metadata" + t.integer "total_jobs", default: 0, null: false + t.integer "pending_jobs", default: 0, null: false + t.integer "completed_jobs", default: 0, null: false + t.integer "failed_jobs", default: 0, null: false + t.datetime "enqueued_at" + t.datetime "finished_at" + t.datetime "failed_at" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["batch_id"], name: "index_solid_queue_batches_on_batch_id", unique: true + t.index ["parent_batch_id"], name: "index_solid_queue_batches_on_parent_batch_id" + end + + create_table "solid_queue_batch_executions", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| + t.bigint "job_id", null: false + t.string "batch_id", null: false + t.datetime "created_at", null: false + t.index ["job_id"], name: "index_solid_queue_batch_executions_on_job_id", unique: true + end + add_foreign_key "solid_queue_blocked_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "solid_queue_claimed_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade add_foreign_key "solid_queue_failed_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade diff --git a/test/integration/batch_lifecycle_test.rb b/test/integration/batch_lifecycle_test.rb index fe5df63f..808477da 100644 --- a/test/integration/batch_lifecycle_test.rb +++ b/test/integration/batch_lifecycle_test.rb @@ -78,12 +78,11 @@ def perform end end - test "nested batches finish from the inside out" do - batch2 = batch3 = batch4 = nil - batch1 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("3")) do - batch2 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("2")) do - batch3 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1")) { } - batch4 = SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1.1")) { } + test "empty batches fire callbacks" do + SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("3")) do + SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("2")) do + SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1")) { } + SolidQueue::Batch.enqueue(on_success: BatchOnSuccessJob.new("1.1")) { } end end @@ -96,8 +95,6 @@ def perform expected_values = [ "1: 1 jobs succeeded!", "1.1: 1 jobs succeeded!", "2: 1 jobs succeeded!", "3: 1 jobs succeeded!" ] assert_equal expected_values.sort, JobBuffer.values.sort assert_equal 4, SolidQueue::Batch.finished.count - assert_finished_in_order(batch4.reload, batch2.reload, batch1.reload) - assert_finished_in_order(batch3.reload, batch2, batch1) end test "all jobs are run, including jobs enqueued inside of other jobs" do @@ -118,10 +115,9 @@ def perform assert_equal [ "added from inside 1", "added from inside 2", "added from inside 3", "hey", "ho" ], JobBuffer.values.sort assert_equal 3, SolidQueue::Batch.finished.count - assert_finished_in_order(batch2.reload, batch1.reload) - assert_finished_in_order(job!(job3), batch2) + assert_finished_in_order(job!(job3), batch2.reload) assert_finished_in_order(job!(job2), batch2) - assert_finished_in_order(job!(job1), batch1) + assert_finished_in_order(job!(job1), batch1.reload) end test "when self.enqueue_after_transaction_commit = true" do @@ -157,10 +153,9 @@ def perform assert_equal 3, SolidQueue::Batch.finished.count assert_equal 3, jobs.finished.count assert_equal 3, jobs.count - assert_finished_in_order(batch3.reload, batch2.reload, batch1.reload) - assert_finished_in_order(job!(job3), batch3) - assert_finished_in_order(job!(job2), batch2) - assert_finished_in_order(job!(job1), batch1) + assert_finished_in_order(job!(job3), batch3.reload) + assert_finished_in_order(job!(job2), batch2.reload) + assert_finished_in_order(job!(job1), batch1.reload) end test "failed jobs fire properly" do @@ -194,9 +189,8 @@ def perform assert_equal 2, job_batch2.completed_jobs # 2 retries marked as "finished" assert_equal 0, job_batch2.pending_jobs - assert_equal [ "failed", "failed" ].sort, SolidQueue::Batch.all.pluck(:status) + assert_equal [ true, true ].sort, SolidQueue::Batch.all.map(&:failed?) assert_equal [ "0: 1 jobs failed!", "1: 1 jobs failed!" ], JobBuffer.values.sort - assert_finished_in_order(batch2.reload, batch1.reload) end test "executes the same with perform_all_later as it does a normal enqueue" do @@ -217,10 +211,10 @@ def perform assert_equal 6, batch1.reload.jobs.count assert_equal 6, batch1.total_jobs assert_equal 2, SolidQueue::Batch.finished.count - assert_equal "failed", batch1.status + assert_equal true, batch1.failed? assert_equal 2, batch2.reload.jobs.count assert_equal 2, batch2.total_jobs - assert_equal "completed", batch2.status + assert_equal true, batch2.succeeded? end test "discarded jobs fire properly" do @@ -254,9 +248,8 @@ def perform assert_equal 1, job_batch2.completed_jobs assert_equal 0, job_batch2.pending_jobs - assert_equal [ "completed", "completed" ].sort, SolidQueue::Batch.all.pluck(:status) + assert_equal [ true, true ].sort, SolidQueue::Batch.all.map(&:succeeded?) assert_equal [ "0: 1 jobs succeeded!", "1: 1 jobs succeeded!" ], JobBuffer.values.sort - assert_finished_in_order(batch2.reload, batch1.reload) end test "preserve_finished_jobs = false" do From 60424d94e1dae3f8a2e3058cd3c2476968ba1b8c Mon Sep 17 00:00:00 2001 From: JP Camara Date: Sun, 14 Sep 2025 01:12:35 +0200 Subject: [PATCH 23/38] Fix starting batch in rails 7.1 --- app/models/solid_queue/batch.rb | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb index 093ee32f..18e156a3 100644 --- a/app/models/solid_queue/batch.rb +++ b/app/models/solid_queue/batch.rb @@ -24,15 +24,17 @@ class Batch < Record def enqueue(&block) raise "You cannot enqueue a batch that is already finished" if finished? - save! if new_record? + transaction do + save! if new_record? - Batch.wrap_in_batch_context(batch_id) do - block.call(self) - end + Batch.wrap_in_batch_context(batch_id) do + block.call(self) + end - if ActiveRecord.respond_to?(:after_all_transactions_commit) - ActiveRecord.after_all_transactions_commit do - start_batch + if ActiveRecord.respond_to?(:after_all_transactions_commit) + ActiveRecord.after_all_transactions_commit do + start_batch + end end end end From 58a236ff8467bcbe1e8e5224d0022848ab4b59cb Mon Sep 17 00:00:00 2001 From: JP Camara Date: Mon, 15 Sep 2025 11:40:12 +0200 Subject: [PATCH 24/38] Helper status method --- app/models/solid_queue/batch/trackable.rb | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/app/models/solid_queue/batch/trackable.rb b/app/models/solid_queue/batch/trackable.rb index 20c27f1c..4c8bab63 100644 --- a/app/models/solid_queue/batch/trackable.rb +++ b/app/models/solid_queue/batch/trackable.rb @@ -12,6 +12,16 @@ module Trackable scope :failed, -> { where.not(failed_at: nil) } end + def status + if finished? + failed? ? "failed" : "completed" + elsif enqueued_at.present? + "processing" + else + "pending" + end + end + def failed? failed_at.present? end From 6ad1be1790664dd6b6a14bafdd05677392487f80 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Tue, 16 Sep 2025 01:20:40 +0200 Subject: [PATCH 25/38] Remove parent/child batch relationship, which simplifies the logic --- app/models/solid_queue/batch.rb | 10 +--------- .../solid_queue/install/templates/db/queue_schema.rb | 3 +-- test/dummy/db/queue_schema.rb | 3 +-- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb index 18e156a3..5ee91feb 100644 --- a/app/models/solid_queue/batch.rb +++ b/app/models/solid_queue/batch.rb @@ -4,10 +4,8 @@ module SolidQueue class Batch < Record include Trackable - belongs_to :parent_batch, foreign_key: :parent_batch_id, primary_key: :batch_id, class_name: "SolidQueue::Batch", optional: true has_many :jobs, foreign_key: :batch_id, primary_key: :batch_id has_many :batch_executions, foreign_key: :batch_id, primary_key: :batch_id, class_name: "SolidQueue::BatchExecution" - has_many :child_batches, foreign_key: :parent_batch_id, primary_key: :batch_id, class_name: "SolidQueue::Batch" serialize :on_finish, coder: JSON serialize :on_success, coder: JSON @@ -15,7 +13,6 @@ class Batch < Record serialize :metadata, coder: JSON after_initialize :set_batch_id - before_create :set_parent_batch_id after_commit :start_batch, on: :create, unless: -> { ActiveRecord.respond_to?(:after_all_transactions_commit) } mattr_accessor :maintenance_queue_name @@ -68,10 +65,6 @@ def check_completion! private - def set_parent_batch_id - self.parent_batch_id ||= Batch.current_batch_id if Batch.current_batch_id.present? - end - def set_batch_id self.batch_id ||= SecureRandom.uuid end @@ -132,8 +125,7 @@ def enqueue(on_success: nil, on_failure: nil, on_finish: nil, metadata: nil, &bl on_success: on_success, on_failure: on_failure, on_finish: on_finish, - metadata: metadata, - parent_batch_id: current_batch_id + metadata: metadata ) batch.enqueue(&block) diff --git a/lib/generators/solid_queue/install/templates/db/queue_schema.rb b/lib/generators/solid_queue/install/templates/db/queue_schema.rb index bcdbe741..5914164a 100644 --- a/lib/generators/solid_queue/install/templates/db/queue_schema.rb +++ b/lib/generators/solid_queue/install/templates/db/queue_schema.rb @@ -124,7 +124,6 @@ create_table "solid_queue_batches", force: :cascade do |t| t.string "batch_id" - t.string "parent_batch_id" t.text "on_finish" t.text "on_success" t.text "on_failure" @@ -139,7 +138,6 @@ t.datetime "created_at", null: false t.datetime "updated_at", null: false t.index ["batch_id"], name: "index_solid_queue_batches_on_batch_id", unique: true - t.index ["parent_batch_id"], name: "index_solid_queue_batches_on_parent_batch_id" end create_table "solid_queue_batch_executions", force: :cascade do |t| @@ -147,6 +145,7 @@ t.string "batch_id", null: false t.datetime "created_at", null: false t.index [ "job_id" ], name: "index_solid_queue_batch_executions_on_job_id", unique: true + t.index ["batch_id"], name: "index_solid_queue_batch_executions_on_batch_id" end add_foreign_key "solid_queue_blocked_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade diff --git a/test/dummy/db/queue_schema.rb b/test/dummy/db/queue_schema.rb index 70fdb2ff..a673f303 100644 --- a/test/dummy/db/queue_schema.rb +++ b/test/dummy/db/queue_schema.rb @@ -136,7 +136,6 @@ create_table "solid_queue_batches", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| t.string "batch_id" - t.string "parent_batch_id" t.text "on_finish" t.text "on_success" t.text "on_failure" @@ -151,7 +150,6 @@ t.datetime "created_at", null: false t.datetime "updated_at", null: false t.index ["batch_id"], name: "index_solid_queue_batches_on_batch_id", unique: true - t.index ["parent_batch_id"], name: "index_solid_queue_batches_on_parent_batch_id" end create_table "solid_queue_batch_executions", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| @@ -159,6 +157,7 @@ t.string "batch_id", null: false t.datetime "created_at", null: false t.index ["job_id"], name: "index_solid_queue_batch_executions_on_job_id", unique: true + t.index ["batch_id"], name: "index_solid_queue_batch_executions_on_batch_id" end add_foreign_key "solid_queue_blocked_executions", "solid_queue_jobs", column: "job_id", on_delete: :cascade From 7b8462a8f7dcef561524db073ae43e3a5731dad9 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Tue, 16 Sep 2025 09:09:25 +0200 Subject: [PATCH 26/38] Performance improvements * By querying batch executions remaining, the query times remain very fast. * When we are constantly updating the single batch row counts, it becomes a hotspot. Fast executing jobs quickly accumulate and slow down overall job processing (processing a few thousand jobs goes for 10ish seconds to 40ish seconds). This still adds a bit of overhead, but significantly less (10ish seconds to 15ish seconds) * Handle batch completion in an after_commit to make sure the transaction is visible before checking executions. This may mean we need to introduce some monitoring in the cases an after_commit fails to fire due network issues or a database issue --- app/models/solid_queue/batch.rb | 26 +++++++++++---- app/models/solid_queue/batch/trackable.rb | 22 +++++++++++++ app/models/solid_queue/batch_execution.rb | 32 +++++-------------- app/models/solid_queue/execution/batchable.rb | 4 +-- app/models/solid_queue/job/batchable.rb | 4 +-- 5 files changed, 52 insertions(+), 36 deletions(-) diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb index 5ee91feb..e313de20 100644 --- a/app/models/solid_queue/batch.rb +++ b/app/models/solid_queue/batch.rb @@ -50,16 +50,28 @@ def on_finish=(value) def check_completion! return if finished? || !ready? + return if batch_executions.limit(1).exists? - with_lock do - return if finished_at? || !ready? + rows = Batch + .by_batch_id(batch_id) + .unfinished + .empty_executions + .update_all(finished_at: Time.current) + + return if rows.zero? - if pending_jobs == 0 - finished_attributes = { finished_at: Time.current } - finished_attributes[:failed_at] = Time.current if failed_jobs > 0 - update!(finished_attributes) - execute_callbacks + with_lock do + failed = jobs.joins(:failed_execution).count + finished_attributes = {} + if failed > 0 + finished_attributes[:failed_at] = Time.current + finished_attributes[:failed_jobs] = failed end + finished_attributes[:completed_jobs] = total_jobs - failed + finished_attributes[:pending_jobs] = 0 + + update!(finished_attributes) + execute_callbacks end end diff --git a/app/models/solid_queue/batch/trackable.rb b/app/models/solid_queue/batch/trackable.rb index 4c8bab63..0eab82a7 100644 --- a/app/models/solid_queue/batch/trackable.rb +++ b/app/models/solid_queue/batch/trackable.rb @@ -10,6 +10,16 @@ module Trackable scope :succeeded, -> { finished.where(failed_at: nil) } scope :unfinished, -> { where(finished_at: nil) } scope :failed, -> { where.not(failed_at: nil) } + scope :by_batch_id, ->(batch_id) { where(batch_id:) } + scope :empty_executions, -> { + where(<<~SQL) + NOT EXISTS ( + SELECT 1 FROM solid_queue_batch_executions + WHERE solid_queue_batch_executions.batch_id = solid_queue_batches.batch_id + LIMIT 1 + ) + SQL + } end def status @@ -38,6 +48,18 @@ def ready? enqueued_at.present? end + def completed_jobs + finished? ? self[:completed_jobs] : total_jobs - batch_executions.count + end + + def failed_jobs + finished? ? self[:failed_jobs] : jobs.joins(:failed_execution).count + end + + def pending_jobs + finished? ? self[:pending_jobs] : batch_executions.count + end + def progress_percentage return 0 if total_jobs == 0 ((completed_jobs + failed_jobs) * 100.0 / total_jobs).round(2) diff --git a/app/models/solid_queue/batch_execution.rb b/app/models/solid_queue/batch_execution.rb index ef448567..2e7d7d90 100644 --- a/app/models/solid_queue/batch_execution.rb +++ b/app/models/solid_queue/batch_execution.rb @@ -5,6 +5,14 @@ class BatchExecution < Record belongs_to :job, optional: true belongs_to :batch, foreign_key: :batch_id, primary_key: :batch_id + after_commit :check_completion, on: :destroy + + private + def check_completion + batch = Batch.find_by(batch_id: batch_id) + batch.check_completion! if batch.present? + end + class << self def create_all_from_jobs(jobs) batch_jobs = jobs.select { |job| job.batch_id.present? } @@ -23,30 +31,6 @@ def create_all_from_jobs(jobs) end end - def process_job_completion(job, status) - batch_id = job.batch_id - batch_execution = job.batch_execution - - return if batch_execution.blank? - - transaction do - batch_execution.destroy! - - if status == "failed" - Batch.where(batch_id: batch_id).update_all( - "pending_jobs = pending_jobs - 1, failed_jobs = failed_jobs + 1" - ) - else - Batch.where(batch_id: batch_id).update_all( - "pending_jobs = pending_jobs - 1, completed_jobs = completed_jobs + 1" - ) - end - end - - batch = Batch.find_by(batch_id: batch_id) - batch.check_completion! if batch.present? - end - private def provider_upsert_options diff --git a/app/models/solid_queue/execution/batchable.rb b/app/models/solid_queue/execution/batchable.rb index 7b031d4f..fe9aa6ad 100644 --- a/app/models/solid_queue/execution/batchable.rb +++ b/app/models/solid_queue/execution/batchable.rb @@ -11,9 +11,9 @@ module Batchable private def update_batch_progress - # FailedExecutions are only created when the job is done retrying if is_a?(FailedExecution) - BatchExecution.process_job_completion(job, "failed") + # FailedExecutions are only created when the job is done retrying + job.batch_execution&.destroy! end rescue => e Rails.logger.error "[SolidQueue] Failed to notify batch #{job.batch_id} about job #{job.id} failure: #{e.message}" diff --git a/app/models/solid_queue/job/batchable.rb b/app/models/solid_queue/job/batchable.rb index a81870e6..46fb0ff9 100644 --- a/app/models/solid_queue/job/batchable.rb +++ b/app/models/solid_queue/job/batchable.rb @@ -28,9 +28,7 @@ def update_batch_progress return unless saved_change_to_finished_at? && finished_at.present? return unless batch_id.present? - # Jobs marked as finished are considered completed - # (even if they failed and are being retried - we don't know that here) - BatchExecution.process_job_completion(self, "completed") + batch_execution&.destroy! rescue => e Rails.logger.error "[SolidQueue] Failed to update batch #{batch_id} progress for job #{id}: #{e.message}" end From 6effa16789a4a4236d51957201dcc15ae7b929d8 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Tue, 16 Sep 2025 13:09:19 +0200 Subject: [PATCH 27/38] We no longer need to keep jobs * Batch execution is managed through the BatchExecution model, which is dependent destroyed when jobs are destroyed * Since it checks batch completion in an after_commit on: :destroy, it already gets checked, even when the job is not preserved * Because we rely on batch executions and counts, we don't need the jobs to stick around to properly run a batch --- app/jobs/solid_queue/batch/cleanup_job.rb | 15 --------------- app/models/solid_queue/batch.rb | 6 ------ app/models/solid_queue/job/executable.rb | 2 +- 3 files changed, 1 insertion(+), 22 deletions(-) delete mode 100644 app/jobs/solid_queue/batch/cleanup_job.rb diff --git a/app/jobs/solid_queue/batch/cleanup_job.rb b/app/jobs/solid_queue/batch/cleanup_job.rb deleted file mode 100644 index 3e697962..00000000 --- a/app/jobs/solid_queue/batch/cleanup_job.rb +++ /dev/null @@ -1,15 +0,0 @@ -# frozen_string_literal: true - -module SolidQueue - class Batch - class CleanupJob < (defined?(ApplicationJob) ? ApplicationJob : ActiveJob::Base) - discard_on ActiveRecord::RecordNotFound - - def perform(job_batch) - return if SolidQueue.preserve_finished_jobs? - - job_batch.jobs.finished.destroy_all - end - end - end -end diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb index e313de20..1992697f 100644 --- a/app/models/solid_queue/batch.rb +++ b/app/models/solid_queue/batch.rb @@ -111,12 +111,6 @@ def execute_callbacks end perform_completion_job(:on_finish, {}) if on_finish.present? - - clear_unpreserved_jobs - end - - def clear_unpreserved_jobs - SolidQueue::Batch::CleanupJob.set(queue: self.class.maintenance_queue_name || "default").perform_later(self) unless SolidQueue.preserve_finished_jobs? end def enqueue_empty_job diff --git a/app/models/solid_queue/job/executable.rb b/app/models/solid_queue/job/executable.rb index b56c3e03..32b070d9 100644 --- a/app/models/solid_queue/job/executable.rb +++ b/app/models/solid_queue/job/executable.rb @@ -78,7 +78,7 @@ def dispatch_bypassing_concurrency_limits end def finished! - if SolidQueue.preserve_finished_jobs? || batch_id.present? # We clear jobs after the batch finishes + if SolidQueue.preserve_finished_jobs? update!(finished_at: Time.current) else destroy! From 554afd53af37a45db2c6a9d4ec8179b9a76741ff Mon Sep 17 00:00:00 2001 From: JP Camara Date: Tue, 16 Sep 2025 23:02:01 +0200 Subject: [PATCH 28/38] Removing pending_jobs column * Without always updating it on the fly, it's always the same as total_jobs, or is 0. So it's not really useful as a distinct column --- app/models/solid_queue/batch.rb | 1 - app/models/solid_queue/batch/trackable.rb | 2 +- app/models/solid_queue/batch_execution.rb | 7 +++---- .../solid_queue/install/templates/db/queue_schema.rb | 1 - test/dummy/db/queue_schema.rb | 1 - 5 files changed, 4 insertions(+), 8 deletions(-) diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb index 1992697f..215f3048 100644 --- a/app/models/solid_queue/batch.rb +++ b/app/models/solid_queue/batch.rb @@ -68,7 +68,6 @@ def check_completion! finished_attributes[:failed_jobs] = failed end finished_attributes[:completed_jobs] = total_jobs - failed - finished_attributes[:pending_jobs] = 0 update!(finished_attributes) execute_callbacks diff --git a/app/models/solid_queue/batch/trackable.rb b/app/models/solid_queue/batch/trackable.rb index 0eab82a7..9897b191 100644 --- a/app/models/solid_queue/batch/trackable.rb +++ b/app/models/solid_queue/batch/trackable.rb @@ -57,7 +57,7 @@ def failed_jobs end def pending_jobs - finished? ? self[:pending_jobs] : batch_executions.count + finished? ? 0 : batch_executions.count end def progress_percentage diff --git a/app/models/solid_queue/batch_execution.rb b/app/models/solid_queue/batch_execution.rb index 2e7d7d90..b61c67de 100644 --- a/app/models/solid_queue/batch_execution.rb +++ b/app/models/solid_queue/batch_execution.rb @@ -25,7 +25,7 @@ def create_all_from_jobs(jobs) total = jobs.size SolidQueue::Batch.upsert( - { batch_id:, total_jobs: total, pending_jobs: total }, + { batch_id:, total_jobs: total }, **provider_upsert_options ) end @@ -39,14 +39,13 @@ def provider_upsert_options { unique_by: :batch_id, on_duplicate: Arel.sql( - "total_jobs = solid_queue_batches.total_jobs + excluded.total_jobs, " \ - "pending_jobs = solid_queue_batches.pending_jobs + excluded.pending_jobs" + "total_jobs = solid_queue_batches.total_jobs + excluded.total_jobs" ) } else { on_duplicate: Arel.sql( - "total_jobs = total_jobs + VALUES(total_jobs), pending_jobs = pending_jobs + VALUES(pending_jobs)" + "total_jobs = total_jobs + VALUES(total_jobs)" ) } end diff --git a/lib/generators/solid_queue/install/templates/db/queue_schema.rb b/lib/generators/solid_queue/install/templates/db/queue_schema.rb index 5914164a..3a3d517f 100644 --- a/lib/generators/solid_queue/install/templates/db/queue_schema.rb +++ b/lib/generators/solid_queue/install/templates/db/queue_schema.rb @@ -129,7 +129,6 @@ t.text "on_failure" t.text "metadata" t.integer "total_jobs", default: 0, null: false - t.integer "pending_jobs", default: 0, null: false t.integer "completed_jobs", default: 0, null: false t.integer "failed_jobs", default: 0, null: false t.datetime "enqueued_at" diff --git a/test/dummy/db/queue_schema.rb b/test/dummy/db/queue_schema.rb index a673f303..1255a971 100644 --- a/test/dummy/db/queue_schema.rb +++ b/test/dummy/db/queue_schema.rb @@ -141,7 +141,6 @@ t.text "on_failure" t.text "metadata" t.integer "total_jobs", default: 0, null: false - t.integer "pending_jobs", default: 0, null: false t.integer "completed_jobs", default: 0, null: false t.integer "failed_jobs", default: 0, null: false t.datetime "enqueued_at" From 80af4e01b6956c8865b78a345da890b2ce755759 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Tue, 16 Sep 2025 23:04:09 +0200 Subject: [PATCH 29/38] Update doc to reflect current feature state * Remove multi step job example since we don't handle hierarchy anymore for the time being --- README.md | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 99723697..c67a3d18 100644 --- a/README.md +++ b/README.md @@ -598,7 +598,6 @@ and optionally trigger callbacks based on their status. It supports the followin - `on_success`: Fired when all jobs have succeeded, including retries. Will not fire if any jobs have failed, but will fire if jobs have been discarded using `discard_on` - `on_failure`: Fired when all jobs have finished, including retries. Will only fire if one or more jobs have failed. - If a job is part of a batch, it can enqueue more jobs for that batch using `batch#enqueue` -- Batches can be nested within other batches, creating a hierarchy. Outer batches will not fire callbacks until all nested jobs have finished. - Attaching arbitrary metadata to a batch ```rb @@ -609,20 +608,6 @@ class SleepyJob < ApplicationJob end end -class MultiStepJob < ApplicationJob - def perform - batch.enqueue do - SleepyJob.perform_later(5) - # Because of this nested batch, the top-level batch won't finish until the inner, - # 10 second job finishes - # Both jobs will still run simultaneously - SolidQueue::Batch.enqueue do - SleepyJob.perform_later(10) - end - end - end -end - class BatchFinishJob < ApplicationJob def perform(batch) # batch is always the default first argument Rails.logger.info "Good job finishing all jobs" @@ -653,12 +638,9 @@ end ### Batch options -As part of the processing of a batch, some jobs are automatically enqueued: - -- A `SolidQueue::Batch::BatchMonitorJob` is enqueued for every `Batch` being processed -- In the case of an empty batch, a `SolidQueue::Batch::EmptyJob` is enqueued +In the case of an empty batch, a `SolidQueue::Batch::EmptyJob` is enqueued. -By default, these jobs run on the `default` queue. You can specify an alternative queue for them in an initializer: +By default, this jobs run on the `default` queue. You can specify an alternative queue for it in an initializer: ```rb Rails.application.config.after_initialize do # or to_prepare From a195e25a92d629205230b3d6386db4cde32fc98c Mon Sep 17 00:00:00 2001 From: JP Camara Date: Tue, 16 Sep 2025 23:19:22 +0200 Subject: [PATCH 30/38] We always save the batch first now, so we don't need to upsert --- app/models/solid_queue/batch_execution.rb | 25 +---------------------- 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/app/models/solid_queue/batch_execution.rb b/app/models/solid_queue/batch_execution.rb index b61c67de..733c1a5b 100644 --- a/app/models/solid_queue/batch_execution.rb +++ b/app/models/solid_queue/batch_execution.rb @@ -24,32 +24,9 @@ def create_all_from_jobs(jobs) }) total = jobs.size - SolidQueue::Batch.upsert( - { batch_id:, total_jobs: total }, - **provider_upsert_options - ) + SolidQueue::Batch.where(batch_id:).update_all(["total_jobs = total_jobs + ?", total]) end end - - private - - def provider_upsert_options - case connection.adapter_name - when "PostgreSQL", "SQLite" - { - unique_by: :batch_id, - on_duplicate: Arel.sql( - "total_jobs = solid_queue_batches.total_jobs + excluded.total_jobs" - ) - } - else - { - on_duplicate: Arel.sql( - "total_jobs = total_jobs + VALUES(total_jobs)" - ) - } - end - end end end end From 6da0e9ff1305213680064c33b5abf7749b421f92 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Tue, 16 Sep 2025 23:45:16 +0200 Subject: [PATCH 31/38] Rubocop --- app/models/solid_queue/batch_execution.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/solid_queue/batch_execution.rb b/app/models/solid_queue/batch_execution.rb index 733c1a5b..e74e3dc2 100644 --- a/app/models/solid_queue/batch_execution.rb +++ b/app/models/solid_queue/batch_execution.rb @@ -24,7 +24,7 @@ def create_all_from_jobs(jobs) }) total = jobs.size - SolidQueue::Batch.where(batch_id:).update_all(["total_jobs = total_jobs + ?", total]) + SolidQueue::Batch.where(batch_id:).update_all([ "total_jobs = total_jobs + ?", total ]) end end end From 8e583f13ffc0f76aed18f2f746d5aed090f6e51d Mon Sep 17 00:00:00 2001 From: JP Camara Date: Wed, 17 Sep 2025 00:40:24 +0200 Subject: [PATCH 32/38] Accidental claude.md --- CLAUDE.md | 729 ------------------------------------------------------ 1 file changed, 729 deletions(-) delete mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 729f180a..00000000 --- a/CLAUDE.md +++ /dev/null @@ -1,729 +0,0 @@ -# Solid Queue Codebase Analysis - -Co-authored-by: Mikael Henriksson - -## Project Overview - -Solid Queue is a database-backed queuing backend for Active Job in Ruby on Rails. It's designed as a modern, performant alternative to Redis-based solutions, leveraging SQL databases for job storage and processing. - -### Key Characteristics -- **Version**: Currently in development on `batch-poc` branch -- **Ruby Version**: >= 3.1.6 -- **Rails Version**: >= 7.2 -- **Database Support**: MySQL 8+, PostgreSQL 9.5+, SQLite -- **License**: MIT - -## Architecture Overview - -### Core Components - -1. **Workers**: Process jobs from queues - - Multi-threaded execution using thread pools - - Configurable polling intervals and batch sizes - - Queue prioritization support - -2. **Dispatchers**: Move scheduled jobs to ready state - - Handle future-scheduled jobs - - Manage concurrency controls - - Perform maintenance tasks - -3. **Scheduler**: Manages recurring tasks - - Cron-like job scheduling - - Supports job classes and eval commands - -4. **Supervisor**: Orchestrates all processes - - Process lifecycle management - - Signal handling (TERM, INT, QUIT) - - Heartbeat monitoring - -## Current Development Focus: Batch Processing - -The `batch-poc` branch implements job batching functionality, allowing: -- Grouping related jobs together -- Tracking collective progress -- Triggering callbacks on batch completion/failure -- Nested batch support with parent-child relationships -- Dynamic job addition within running batches - -### Batch Implementation Architecture - -#### Core Components - -**SolidQueue::Batch** (app/models/solid_queue/batch.rb) -- Primary ActiveRecord model for batch persistence -- Manages batch context using `ActiveSupport::IsolatedExecutionState` -- Handles batch lifecycle and job enqueueing -- Tracks: `total_jobs`, `pending_jobs`, `completed_jobs`, `failed_jobs` -- Manages callback execution and status transitions (pending → processing → completed/failed) -- Handles parent-child batch relationships via `parent_batch_id` -- Serializes callback jobs as JSON for later execution -- Key instance methods: `enqueue`, `check_completion!`, `execute_callbacks` -- Key class methods: `enqueue`, `wrap_in_batch_context`, `current_batch_id` -- Automatically enqueues EmptyJob for empty batches to ensure callbacks fire -- Enqueues BatchMonitorJob for completion monitoring - -**SolidQueue::Batch::Trackable** (app/models/solid_queue/batch/trackable.rb) -- Concern that provides status tracking and query scopes -- Scopes: `pending`, `processing`, `completed`, `failed`, `finished`, `unfinished` -- Helper methods: `finished?`, `processing?`, `pending?`, `progress_percentage` -- Calculates progress based on completed and failed jobs - -**SolidQueue::BatchExecution** (app/models/solid_queue/batch_execution.rb) -- Lightweight tracking record that exists only while a job is pending -- Deleted atomically when job completes to trigger counter updates -- Presence indicates job hasn't been processed yet -- Key class methods: - - `create_all_from_jobs`: Bulk creates executions and updates batch counters - - `process_job_completion`: Handles atomic deletion and counter updates -- Uses database-specific upsert strategies for atomic counter increments - -**SolidQueue::Job::Batchable** (app/models/solid_queue/job/batchable.rb) -- Concern mixed into Job model for batch support -- Creates BatchExecution records after job creation -- Tracks job completion via `after_update` callback -- Fires when `finished_at` is set (jobs being retried, not final failures) -- Handles batch progress updates when jobs complete - -**SolidQueue::Execution::Batchable** (app/models/solid_queue/execution/batchable.rb) -- Concern mixed into Execution model for batch support -- Tracks final job failures via `after_create` callback on FailedExecution -- Only fires when job exhausts all retries -- Updates batch failure counter for permanently failed jobs - -**ActiveJob::BatchId** (lib/active_job/batch_id.rb) -- ActiveJob extension for batch context -- Auto-assigns batch_id from context during job initialization -- Serializes/deserializes batch_id with job data -- Provides `batch` helper method to access current batch -- Only activates for SolidQueue adapter - -**SolidQueue::Batch::CleanupJob** (app/jobs/solid_queue/batch/cleanup_job.rb) -- Internal job for cleaning up finished jobs in a batch -- Respects `preserve_finished_jobs` configuration -- Automatically enqueued after batch completion -- Discards on RecordNotFound to handle already-deleted batches gracefully - -**SolidQueue::Batch::EmptyJob** (app/jobs/solid_queue/batch/empty_job.rb) -- Ensures batch callbacks fire even when no jobs are enqueued -- Does nothing in its perform method - exists solely to trigger completion -- Enables patterns where jobs are conditionally enqueued - -**SolidQueue::BatchMonitorJob** (app/jobs/solid_queue/batch_monitor_job.rb) -- Monitors batch completion status with a 1-second polling interval -- Checks for completion when all child batches are finished and pending_jobs is 0 -- Re-enqueues itself on error with a 30-second delay -- Automatically stops monitoring when batch is finished - -#### Batch Lifecycle - -1. **Creation Phase**: - ```ruby - batch = SolidQueue::Batch.enqueue(on_success: SuccessJob) do |batch| - MyJob.perform_later(arg1) - AnotherJob.perform_later(arg2) - end - ``` - - Creates UUID-identified batch record - - Sets batch context using `ActiveSupport::IsolatedExecutionState` - - Jobs automatically pick up batch_id from context - - Batch persisted before jobs are enqueued - - Parent batch relationship established if nested - -2. **Job Enqueuing**: - - ActiveJob::BatchId mixin captures batch_id during job initialization - - Jobs created with batch_id foreign key - - BatchExecution records created via `after_create` callback - - Batch counters updated atomically using database-specific upserts - - Total and pending job counts incremented together - - BatchMonitorJob automatically enqueued to monitor completion - -3. **Execution Phase**: - - Jobs processed normally by workers - - Job::Batchable `after_update` callback fires when `finished_at` is set - - For retrying jobs: marked as finished, batch gets "completed" update - - For final failures: FailedExecution created, triggers Execution::Batchable callback - - BatchExecution.process_job_completion handles atomic counter updates - -4. **Progress Tracking**: - - BatchExecution deletion happens in transaction with counter updates - - Atomic SQL: `pending_jobs = pending_jobs - 1, completed_jobs = completed_jobs + 1` - - No locking needed for counter updates (atomic SQL operations) - - Status changes from "pending" to "processing" on first job completion - - Real-time progress via `progress_percentage` method - -5. **Completion Detection**: - - `check_completion!` called after each job finishes - - Also monitored by BatchMonitorJob with 1-second polling - - Uses pessimistic locking to prevent race conditions - - Checks: `pending_jobs == 0` AND no unfinished child batches - - Determines final status: "completed" if `failed_jobs == 0`, otherwise "failed" - - Sets `finished_at` timestamp and updates status - - Status transitions from "pending" to "processing" on first job completion - -6. **Callback Execution**: - - Callbacks deserialized and enqueued as regular jobs - - Batch passed as first argument to callback job - - Execution order: on_failure/on_success, then on_finish - - Parent batch completion checked after callbacks - - CleanupJob enqueued if jobs shouldn't be preserved - -#### Batch Callbacks - -**Callback Types:** -- `on_finish`: Always fires when batch completes (success or failure) -- `on_success`: Fires only when all jobs succeed (failed_jobs == 0) -- `on_failure`: Fires on first job failure after all retries exhausted - -**Callback Execution:** -- Callbacks are ActiveJob instances serialized in the database -- Batch passed as first argument: `perform(batch, *original_args)` -- Executed asynchronously after batch completion -- Support for callback chaining in nested batches - -#### Special Features - -**Empty Batch Handling**: -- EmptyJob ensures callbacks fire even with no jobs -- Allows for conditional job enqueueing patterns -- Automatically enqueued when batch.total_jobs == 0 after enqueue block - -**Dynamic Job Addition**: -```ruby -class MyJob < ApplicationJob - def perform - batch.enqueue do # Add more jobs to current batch - AnotherJob.perform_later - end - end -end -``` - -**Nested Batches**: -- Full parent-child relationship tracking -- Children must complete before parent can complete -- Callbacks execute from innermost to outermost - -**Transaction Safety**: -- Full support for `enqueue_after_transaction_commit` -- Handles both synchronous and asynchronous enqueueing modes -- Prevents partial batch creation on rollback - -**Cleanup**: -- CleanupJob removes finished jobs when `preserve_finished_jobs` is false -- Maintains batch records for audit trail - -#### Database Schema - -**solid_queue_batches table:** -- `batch_id`: UUID identifier (unique index) -- `parent_batch_id`: For nested batches (indexed) -- `status`: pending, processing, completed, failed (default: "pending") -- `total_jobs`: Total number of jobs in batch (default: 0) -- `pending_jobs`: Jobs not yet completed (default: 0) -- `completed_jobs`: Successfully completed jobs (default: 0) -- `failed_jobs`: Permanently failed jobs (default: 0) -- `on_finish`: Serialized ActiveJob for finish callback (TEXT) -- `on_success`: Serialized ActiveJob for success callback (TEXT) -- `on_failure`: Serialized ActiveJob for failure callback (TEXT) -- `metadata`: JSON field for custom data (TEXT) -- `finished_at`: Completion timestamp -- `created_at`, `updated_at`: Rails timestamps - -**solid_queue_batch_executions table:** -- `job_id`: Foreign key to jobs table (unique index) -- `batch_id`: UUID reference to batch (STRING) -- `created_at`: Record creation timestamp -- Acts as presence indicator - deleted when job completes - -**solid_queue_jobs table additions:** -- `batch_id`: UUID reference to batch (STRING, indexed) - -## Development Approach - -### Database Schema Management -**IMPORTANT**: This project uses direct schema files (`db/queue_schema.rb`) rather than Rails migrations during development. Changes to the database structure should be made directly in: -- `lib/generators/solid_queue/install/templates/db/queue_schema.rb` - Template schema -- `test/dummy/db/queue_schema.rb` - Test database schema - -The schema is loaded fresh for tests, so schema changes can be made directly without migration files during development. - -## Directory Structure - -``` -solid_queue/ - app/ -  jobs/ # Internal job implementations -   solid_queue/ -   batch_update_job.rb -   recurring_job.rb -  models/ # Core ActiveRecord models -  solid_queue/ -  job.rb # Main job model -  job_batch.rb # Batch tracking -  execution.rb family (claimed, ready, failed, etc.) -  process.rb # Worker/dispatcher processes - lib/ -  active_job/ # ActiveJob integration -   job_batch_id.rb -   queue_adapters/solid_queue_adapter.rb -  solid_queue/ # Core library code -   batch.rb # Batch implementation -   worker.rb # Worker implementation -   dispatcher.rb # Dispatcher implementation -   supervisor.rb # Process supervisor -  generators/ # Rails generator for installation - test/ -  integration/ # Integration tests -   batch_lifecycle_test.rb # Batch-specific tests -  models/ # Model tests -  unit/ # Unit tests - config/ -  routes.rb # Engine routes (if any) -``` - -## Key Design Patterns - -### 1. Polling with FOR UPDATE SKIP LOCKED -- Prevents lock contention between workers -- Ensures efficient job claiming -- Falls back gracefully on older databases - -### 2. Semaphore-based Concurrency Control -- Limits concurrent executions per key -- Supports blocking and discarding strategies -- Configurable duration limits - -### 3. Transactional Job Enqueueing -- Jobs and batch records created atomically -- Support for `enqueue_after_transaction_commit` -- Handles bulk enqueuing efficiently - -### 4. Process Heartbeats -- Regular heartbeat updates from all processes -- Automatic cleanup of dead processes -- Configurable thresholds - -## Testing Approach - -- **73 test files** covering unit, integration, and model tests -- Test dummy Rails application in `test/dummy/` -- Custom test helpers for: - - Process lifecycle testing - - Job execution verification - - Configuration testing -- Uses fixtures for test data - -## Configuration Files - -### Primary Configuration -- `config/queue.yml`: Worker and dispatcher configuration -- `config/recurring.yml`: Scheduled job definitions -- Database config requires separate `queue` database connection - -### Important Settings -- `process_heartbeat_interval`: Default 60 seconds -- `process_alive_threshold`: Default 5 minutes -- `shutdown_timeout`: Default 5 seconds -- `preserve_finished_jobs`: Default true -- `clear_finished_jobs_after`: Default 1 day - -## Development Workflow - -### Current Git Status -- Branch: `batch-poc` -- Modified files indicate active batch development -- Tests being added/modified for batch functionality - -### Key Modified Files -- `app/models/solid_queue/job/batchable.rb`: Job batch integration -- `app/models/solid_queue/execution/batchable.rb`: Execution batch handling -- `lib/solid_queue/batch.rb`: Core batch logic -- `test/integration/batch_lifecycle_test.rb`: Batch testing - -## Performance Considerations - -1. **Database Indexing**: Critical for polling performance - - Index on `(queue_name, priority, job_id)` - - Covering indexes for skip locked queries - -2. **Batch Overhead**: Batching adds transactional overhead - - Avoid mixing with bulk enqueuing - - Consider impact on concurrency controls - -3. **Thread Pool Sizing**: - - Should be less than database connection pool - 2 - - Account for polling and heartbeat connections - -## ActiveJob Layer: Understanding the Division of Responsibilities - -Solid Queue is intentionally designed as a **backend adapter** for ActiveJob, not a complete job processing framework. This architectural decision means many critical features are handled by ActiveJob itself, not Solid Queue. Understanding this layering is crucial for working with the codebase. - -### Features Handled by ActiveJob (NOT in Solid Queue) - -1. **Retry Logic** (`retry_on`) - - ActiveJob manages retry attempts, backoff strategies, and max attempts - - Solid Queue only stores failed executions for manual intervention - - No retry mechanism exists in Solid Queue itself - -2. **Error Handling** (`discard_on`, `rescue_from`) - - ActiveJob decides whether to retry, discard, or handle errors - - Solid Queue just captures and stores the error information - - Custom error handling logic lives in job classes, not the queue - -3. **Callbacks** (`before_enqueue`, `after_perform`, etc.) - - All job lifecycle callbacks are ActiveJob features - - Solid Queue doesn't know about or manage these callbacks - - Exception: Batch callbacks are Solid Queue-specific - -4. **Serialization/Deserialization** - - ActiveJob handles argument serialization (GlobalID, etc.) - - Solid Queue stores the serialized job data as JSON - - Complex argument types are ActiveJob's responsibility - -5. **Job Configuration** - - `queue_as`, `priority`, and other DSL methods are ActiveJob - - Solid Queue reads these values after ActiveJob sets them - - Job class inheritance and configuration is pure ActiveJob - -6. **Timeouts and Deadlines** - - No built-in job timeout mechanism in Solid Queue - - Must be implemented at the job level using ActiveJob patterns - - Process-level timeouts handled via signals only - -### Features Solid Queue DOES Provide - -1. **Storage and Retrieval** - - Database schema for jobs and executions - - Efficient polling with `FOR UPDATE SKIP LOCKED` - - Transaction-safe job claiming - -2. **Process Management** - - Worker processes with thread pools - - Dispatcher for scheduled jobs - - Supervisor for process lifecycle - -3. **Concurrency Controls** (Extended ActiveJob) - - Semaphore-based limiting - - Blocking/discarding on conflicts - - Duration-based expiry - -4. **Batch Processing** (Extended ActiveJob) - - Job grouping and tracking - - Batch-specific callbacks - - Progress monitoring - -5. **Recurring Jobs** - - Cron-like scheduling - - Separate from ActiveJob's scheduling - -### The Adapter Pattern - -The `SolidQueueAdapter` is minimal by design: -```ruby -class SolidQueueAdapter - def enqueue(active_job) - SolidQueue::Job.enqueue(active_job) - end - - def enqueue_at(active_job, timestamp) - SolidQueue::Job.enqueue(active_job, scheduled_at: Time.at(timestamp)) - end -end -``` - -This thin adapter means: -- Solid Queue doesn't parse job classes -- It doesn't understand job arguments beyond storage -- It doesn't execute business logic, only job invocation - -### Implications for Development - -When working with Solid Queue: - -1. **Don't look for retry logic here** - It's in ActiveJob -2. **Don't implement job-level features** - Use ActiveJob patterns -3. **Focus on infrastructure** - Storage, retrieval, process management -4. **Extend via ActiveJob** - Custom job classes, not queue modifications -5. **Batch features are special** - One of the few job-level features in Solid Queue - -### Error Flow Example - -1. Job raises exception during `perform` -2. ActiveJob's `retry_on` catches it -3. ActiveJob decides: retry now, retry later, or discard -4. If retrying later: ActiveJob calls `enqueue_at` on adapter -5. Solid Queue stores the job with new scheduled time -6. If final failure: Solid Queue creates `failed_execution` record -7. Manual intervention needed via `failed_execution.retry` - -### CRITICAL: Job Retry Behavior in Solid Queue - -**Each retry attempt creates a new job with a new job_id but same active_job_id** - -When a job fails and will be retried: -1. The current job is marked as `finished` (finished_at is set) -2. A new job is created for the retry with a new job_id -3. The jobs share the same active_job_id - -When a job exhausts all retries (final failure): -1. The final job is NOT marked as finished (finished_at remains nil) -2. A FailedExecution record is created -3. No new job is created - -**Example with 3 retry attempts:** -- Job 1 (id: 100) fails → marked as finished → Job 2 created -- Job 2 (id: 101) fails → marked as finished → Job 3 created -- Job 3 (id: 102) fails → NOT marked as finished → FailedExecution created - -This means: -- Jobs that are retried DO have finished_at set -- Jobs with FailedExecutions do NOT have finished_at set -- The Job::Batchable callback fires for jobs being retried (they're "finished") -- The Execution::Batchable callback fires for final failures (FailedExecution created) - -This separation keeps Solid Queue focused on being a robust, database-backed storage and execution engine while ActiveJob handles the higher-level job processing semantics. - -## Integration Points - -### Rails Integration -- Engine-based architecture -- Automatic configuration in Rails 8 -- Generator for easy setup - -### Database Adapters -- Adapter-specific optimizations -- Automatic skip locked detection -- Connection pool management - -## Security Considerations - -- No secrets/credentials in job arguments -- Careful with eval in recurring tasks -- Database permissions for queue database - -## Common Patterns for Extension - -### Adding New Job Types -1. Inherit from `ApplicationJob` -2. Use batch context for batch jobs -3. Implement proper error handling - -### Custom Callbacks -1. Use lifecycle hooks for process events -2. Implement batch callbacks for completion logic -3. Consider transactional boundaries - -### Performance Monitoring -1. Hook into instrumentation API -2. Monitor heartbeat intervals -3. Track queue depths and processing times - -## Debugging Tips - -1. **Check heartbeats**: Ensure processes are alive -2. **Review failed_executions**: Inspect error details -3. **Monitor semaphores**: Check for concurrency blocks -4. **Batch status**: Use `JobBatch` model to track progress -5. **Enable query logs**: Set `silence_polling: false` - -## Known Limitations - -1. Phased restarts not supported with Puma plugin -2. Queue order not preserved in concurrency unblocking -3. Batch callbacks execute asynchronously -4. No automatic retry mechanism (relies on ActiveJob) - -## Future Considerations - -- Batch lifecycle improvements in progress -- Potential for distributed locking mechanisms -- Enhanced monitoring and metrics -- Dashboard UI integration improvements - -## Code Style and Conventions - -### Model Structure -- **Base Class**: All models inherit from `SolidQueue::Record` (not directly from `ActiveRecord::Base`) -- **Concerns**: Extract shared behavior into concerns under `app/models/solid_queue/{model}/` -- **Associations**: Define clear relationships with foreign keys and dependent options -- **Scopes**: Use descriptive names, chain simple scopes for complex queries - -### Naming Conventions -```ruby -# Classes -class SolidQueue::ReadyExecution < Execution # Descriptive, namespaced - -# Methods -def dispatch_batch # Action verb for operations -def finished? # Predicate with ? -def finish! # Bang for state changes -def with_lock # Preposition for context methods -def after_commit_on_finish # Lifecycle callbacks clearly named - -# Constants -DEFAULT_BATCH_SIZE = 500 # SCREAMING_SNAKE_CASE -STATUSES = %w[ pending processing completed failed ] # Arrays for enums -``` - -### Database Operations - -#### Transaction Patterns -```ruby -# Always wrap multi-step operations -transaction do - job = create!(job_attributes) - job.prepare_for_execution - job -end - -# Use with_lock for pessimistic locking -batch_record.with_lock do - batch_record.update!(pending_jobs: batch_record.pending_jobs - 1) - batch_record.check_completion! -end -``` - -#### Bulk Operations -```ruby -# Prefer insert_all for bulk creates -insert_all(execution_rows, returning: %w[ id job_id ]) - -# Use update_all for batch updates -where(id: job_ids).update_all(finished_at: Time.current) - -# Chain scopes for complex queries -ready.by_priority.limit(batch_size) -``` - -#### SQL Safety -```ruby -# Parameterized queries -where("scheduled_at <= ?", Time.current) - -# Arel for complex SQL -lock(Arel.sql("FOR UPDATE SKIP LOCKED")) - -# Avoid string interpolation -# BAD: where("status = '#{status}'") -# GOOD: where(status: status) -``` - -### Concern Organization - -```ruby -module SolidQueue - module Job - module Batchable - extend ActiveSupport::Concern - - included do - # Associations - belongs_to :batch_record, optional: true - - # Callbacks - after_update :track_batch_progress, if: :batch_id? - - # Scopes - scope :in_batch, ->(batch_id) { where(batch_id: batch_id) } - end - - class_methods do - # Class-level functionality - end - - # Instance methods grouped by purpose - private - def track_batch_progress - # Implementation - end - end - end -end -``` - -### Callback Patterns - -```ruby -# Use conditional callbacks -after_create :dispatch, if: :ready? -after_destroy :unblock_next, if: -> { concurrency_limited? && ready? } - -# Separate callback methods -private - def dispatch - ReadyExecution.create_from_job(self) - end -``` - -### Error Handling - -```ruby -# Custom exceptions with context -class BatchCompletionError < StandardError - attr_reader :batch_id - - def initialize(batch_id, message) - @batch_id = batch_id - super("Batch #{batch_id}: #{message}") - end -end - -# Wrap and re-raise with context -rescue ActiveRecord::RecordNotUnique => e - raise EnqueueError.new("Duplicate job: #{e.message}").tap { |error| - error.set_backtrace(e.backtrace) - } -end - -# Silent rescue for non-critical operations -def optional_cleanup - # cleanup code -rescue => e - Rails.logger.error "[SolidQueue] Cleanup failed: #{e.message}" -end -``` - -### Instrumentation - -```ruby -# Always instrument important operations -SolidQueue.instrument(:batch_update, batch_id: batch_id) do |payload| - result = perform_update - payload[:jobs_updated] = result.count - result -end -``` - -### Testing Patterns - -```ruby -# Use transactional tests sparingly -self.use_transactional_tests = false # For integration tests - -# Custom assertions -def assert_batch_completed(batch_id) - batch = SolidQueue::BatchRecord.find(batch_id) - assert_equal "completed", batch.status - assert_equal 0, batch.pending_jobs -end - -# Wait helpers for async operations -wait_for_jobs_to_finish_for(2.seconds) -``` - -### Key Principles - -1. **Composition over Inheritance**: Use concerns for shared behavior -2. **Fail Fast**: Validate early, use bang methods for critical operations -3. **Idempotency**: Design operations to be safely retryable -4. **Instrumentation**: Measure everything important -5. **Clear Boundaries**: Models handle persistence, jobs handle business logic -6. **Defensive Coding**: Handle nil cases, use safe navigation (`&.`) -7. **Explicit over Implicit**: Clear method names over clever shortcuts -8. **Transaction Safety**: Always consider rollback scenarios -9. **Performance First**: Use bulk operations, avoid N+1 queries -10. **Rails Conventions**: Follow Rails patterns unless there's a good reason not to - -### IMPORTANT - -- Always utilize ActiveSupport::IsolatedExecutionState instead of Thread.current \ No newline at end of file From 46e117c0c7e6bc8ac34958bbd78f005e42e52cdd Mon Sep 17 00:00:00 2001 From: JP Camara Date: Wed, 17 Sep 2025 18:55:06 +0200 Subject: [PATCH 33/38] Allow omitting a block, which will just enqueue an empty job --- app/models/solid_queue/batch.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb index 215f3048..0584e0cb 100644 --- a/app/models/solid_queue/batch.rb +++ b/app/models/solid_queue/batch.rb @@ -5,7 +5,8 @@ class Batch < Record include Trackable has_many :jobs, foreign_key: :batch_id, primary_key: :batch_id - has_many :batch_executions, foreign_key: :batch_id, primary_key: :batch_id, class_name: "SolidQueue::BatchExecution" + has_many :batch_executions, foreign_key: :batch_id, primary_key: :batch_id, class_name: "SolidQueue::BatchExecution", + dependent: :destroy serialize :on_finish, coder: JSON serialize :on_success, coder: JSON @@ -25,7 +26,7 @@ def enqueue(&block) save! if new_record? Batch.wrap_in_batch_context(batch_id) do - block.call(self) + block&.call(self) end if ActiveRecord.respond_to?(:after_all_transactions_commit) From fc2f2276e87a7bedc55812899610435d5470c740 Mon Sep 17 00:00:00 2001 From: JP Camara Date: Fri, 10 Oct 2025 21:24:20 -0400 Subject: [PATCH 34/38] Switch batch_id to active_job_batch_id * This is a placeholder for a later rails-based abstraction * Use bigint references like everything else and directly relate to the record --- app/models/solid_queue/batch.rb | 17 ++++++++--------- app/models/solid_queue/batch/trackable.rb | 3 +-- app/models/solid_queue/batch_execution.rb | 6 +++--- app/models/solid_queue/job/batchable.rb | 2 +- lib/active_job/batch_id.rb | 2 +- .../install/templates/db/queue_schema.rb | 8 ++++---- test/dummy/db/queue_schema.rb | 8 ++++---- test/integration/batch_lifecycle_test.rb | 18 +++++++++--------- 8 files changed, 31 insertions(+), 33 deletions(-) diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb index 0584e0cb..4a6d8d8f 100644 --- a/app/models/solid_queue/batch.rb +++ b/app/models/solid_queue/batch.rb @@ -4,16 +4,15 @@ module SolidQueue class Batch < Record include Trackable - has_many :jobs, foreign_key: :batch_id, primary_key: :batch_id - has_many :batch_executions, foreign_key: :batch_id, primary_key: :batch_id, class_name: "SolidQueue::BatchExecution", - dependent: :destroy + has_many :jobs + has_many :batch_executions, class_name: "SolidQueue::BatchExecution", dependent: :destroy serialize :on_finish, coder: JSON serialize :on_success, coder: JSON serialize :on_failure, coder: JSON serialize :metadata, coder: JSON - after_initialize :set_batch_id + after_initialize :set_active_job_batch_id after_commit :start_batch, on: :create, unless: -> { ActiveRecord.respond_to?(:after_all_transactions_commit) } mattr_accessor :maintenance_queue_name @@ -25,7 +24,7 @@ def enqueue(&block) transaction do save! if new_record? - Batch.wrap_in_batch_context(batch_id) do + Batch.wrap_in_batch_context(id) do block&.call(self) end @@ -54,7 +53,7 @@ def check_completion! return if batch_executions.limit(1).exists? rows = Batch - .by_batch_id(batch_id) + .where(id: id) .unfinished .empty_executions .update_all(finished_at: Time.current) @@ -77,8 +76,8 @@ def check_completion! private - def set_batch_id - self.batch_id ||= SecureRandom.uuid + def set_active_job_batch_id + self.active_job_batch_id ||= SecureRandom.uuid end def as_active_job(active_job_klass) @@ -114,7 +113,7 @@ def execute_callbacks end def enqueue_empty_job - Batch.wrap_in_batch_context(batch_id) do + Batch.wrap_in_batch_context(id) do EmptyJob.set(queue: self.class.maintenance_queue_name || "default").perform_later end end diff --git a/app/models/solid_queue/batch/trackable.rb b/app/models/solid_queue/batch/trackable.rb index 9897b191..61dca290 100644 --- a/app/models/solid_queue/batch/trackable.rb +++ b/app/models/solid_queue/batch/trackable.rb @@ -10,12 +10,11 @@ module Trackable scope :succeeded, -> { finished.where(failed_at: nil) } scope :unfinished, -> { where(finished_at: nil) } scope :failed, -> { where.not(failed_at: nil) } - scope :by_batch_id, ->(batch_id) { where(batch_id:) } scope :empty_executions, -> { where(<<~SQL) NOT EXISTS ( SELECT 1 FROM solid_queue_batch_executions - WHERE solid_queue_batch_executions.batch_id = solid_queue_batches.batch_id + WHERE solid_queue_batch_executions.batch_id = solid_queue_batches.id LIMIT 1 ) SQL diff --git a/app/models/solid_queue/batch_execution.rb b/app/models/solid_queue/batch_execution.rb index e74e3dc2..f0f5acea 100644 --- a/app/models/solid_queue/batch_execution.rb +++ b/app/models/solid_queue/batch_execution.rb @@ -3,13 +3,13 @@ module SolidQueue class BatchExecution < Record belongs_to :job, optional: true - belongs_to :batch, foreign_key: :batch_id, primary_key: :batch_id + belongs_to :batch after_commit :check_completion, on: :destroy private def check_completion - batch = Batch.find_by(batch_id: batch_id) + batch = Batch.find_by(id: batch_id) batch.check_completion! if batch.present? end @@ -24,7 +24,7 @@ def create_all_from_jobs(jobs) }) total = jobs.size - SolidQueue::Batch.where(batch_id:).update_all([ "total_jobs = total_jobs + ?", total ]) + SolidQueue::Batch.where(id: batch_id).update_all([ "total_jobs = total_jobs + ?", total ]) end end end diff --git a/app/models/solid_queue/job/batchable.rb b/app/models/solid_queue/job/batchable.rb index 46fb0ff9..5ab1bae4 100644 --- a/app/models/solid_queue/job/batchable.rb +++ b/app/models/solid_queue/job/batchable.rb @@ -6,7 +6,7 @@ module Batchable extend ActiveSupport::Concern included do - belongs_to :batch, foreign_key: :batch_id, primary_key: :batch_id, class_name: "SolidQueue::Batch", optional: true + belongs_to :batch, optional: true has_one :batch_execution, foreign_key: :job_id, dependent: :destroy after_create :create_batch_execution, if: :batch_id? diff --git a/lib/active_job/batch_id.rb b/lib/active_job/batch_id.rb index fe57755c..1bd03bfc 100644 --- a/lib/active_job/batch_id.rb +++ b/lib/active_job/batch_id.rb @@ -25,7 +25,7 @@ def deserialize(job_data) end def batch - @batch ||= SolidQueue::Batch.find_by(batch_id: batch_id) + @batch ||= SolidQueue::Batch.find_by(id: batch_id) end private diff --git a/lib/generators/solid_queue/install/templates/db/queue_schema.rb b/lib/generators/solid_queue/install/templates/db/queue_schema.rb index 3a3d517f..25f87688 100644 --- a/lib/generators/solid_queue/install/templates/db/queue_schema.rb +++ b/lib/generators/solid_queue/install/templates/db/queue_schema.rb @@ -37,7 +37,7 @@ t.string "concurrency_key" t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.string "batch_id" + t.bigint "batch_id" t.index [ "active_job_id" ], name: "index_solid_queue_jobs_on_active_job_id" t.index [ "batch_id" ], name: "index_solid_queue_jobs_on_batch_id" t.index [ "class_name" ], name: "index_solid_queue_jobs_on_class_name" @@ -123,7 +123,7 @@ end create_table "solid_queue_batches", force: :cascade do |t| - t.string "batch_id" + t.string "active_job_batch_id" t.text "on_finish" t.text "on_success" t.text "on_failure" @@ -136,12 +136,12 @@ t.datetime "failed_at" t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.index ["batch_id"], name: "index_solid_queue_batches_on_batch_id", unique: true + t.index ["active_job_batch_id"], name: "index_solid_queue_batches_on_active_job_batch_id", unique: true end create_table "solid_queue_batch_executions", force: :cascade do |t| t.bigint "job_id", null: false - t.string "batch_id", null: false + t.bigint "batch_id", null: false t.datetime "created_at", null: false t.index [ "job_id" ], name: "index_solid_queue_batch_executions_on_job_id", unique: true t.index ["batch_id"], name: "index_solid_queue_batch_executions_on_batch_id" diff --git a/test/dummy/db/queue_schema.rb b/test/dummy/db/queue_schema.rb index 1255a971..050f1220 100644 --- a/test/dummy/db/queue_schema.rb +++ b/test/dummy/db/queue_schema.rb @@ -49,7 +49,7 @@ t.string "concurrency_key" t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.string "batch_id" + t.bigint "batch_id" t.index ["active_job_id"], name: "index_solid_queue_jobs_on_active_job_id" t.index ["batch_id"], name: "index_solid_queue_jobs_on_batch_id" t.index ["class_name"], name: "index_solid_queue_jobs_on_class_name" @@ -135,7 +135,7 @@ end create_table "solid_queue_batches", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| - t.string "batch_id" + t.string "active_job_batch_id" t.text "on_finish" t.text "on_success" t.text "on_failure" @@ -148,12 +148,12 @@ t.datetime "failed_at" t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.index ["batch_id"], name: "index_solid_queue_batches_on_batch_id", unique: true + t.index ["active_job_batch_id"], name: "index_solid_queue_batches_on_active_job_batch_id", unique: true end create_table "solid_queue_batch_executions", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| t.bigint "job_id", null: false - t.string "batch_id", null: false + t.bigint "batch_id", null: false t.datetime "created_at", null: false t.index ["job_id"], name: "index_solid_queue_batch_executions_on_job_id", unique: true t.index ["batch_id"], name: "index_solid_queue_batch_executions_on_batch_id" diff --git a/test/integration/batch_lifecycle_test.rb b/test/integration/batch_lifecycle_test.rb index 808477da..b1f058ff 100644 --- a/test/integration/batch_lifecycle_test.rb +++ b/test/integration/batch_lifecycle_test.rb @@ -173,8 +173,8 @@ def perform wait_for_batches_to_finish_for(3.seconds) wait_for_jobs_to_finish_for(1.second) - job_batch1 = SolidQueue::Batch.find_by(batch_id: batch1.batch_id) - job_batch2 = SolidQueue::Batch.find_by(batch_id: batch2.batch_id) + job_batch1 = SolidQueue::Batch.find_by(id: batch1.id) + job_batch2 = SolidQueue::Batch.find_by(id: batch2.id) assert_equal 2, SolidQueue::Batch.count assert_equal 2, SolidQueue::Batch.finished.count @@ -232,8 +232,8 @@ def perform wait_for_batches_to_finish_for(3.seconds) wait_for_jobs_to_finish_for(1.second) - job_batch1 = SolidQueue::Batch.find_by(batch_id: batch1.batch_id) - job_batch2 = SolidQueue::Batch.find_by(batch_id: batch2.batch_id) + job_batch1 = SolidQueue::Batch.find_by(id: batch1.id) + job_batch2 = SolidQueue::Batch.find_by(id: batch2.id) assert_equal 2, SolidQueue::Batch.count assert_equal 2, SolidQueue::Batch.finished.count @@ -288,7 +288,7 @@ def perform wait_for_batches_to_finish_for(2.seconds) wait_for_jobs_to_finish_for(1.second) - assert_equal [ "Hi finish #{batch.batch_id}!", "Hi success #{batch.batch_id}!", "hey" ].sort, JobBuffer.values.sort + assert_equal [ "Hi finish #{batch.id}!", "Hi success #{batch.id}!", "hey" ].sort, JobBuffer.values.sort assert_equal 1, batch.reload.completed_jobs assert_equal 0, batch.failed_jobs assert_equal 0, batch.pending_jobs @@ -299,7 +299,7 @@ class OnFinishJob < ApplicationJob queue_as :background def perform(batch) - JobBuffer.add "Hi finish #{batch.batch_id}!" + JobBuffer.add "Hi finish #{batch.id}!" end end @@ -307,7 +307,7 @@ class OnSuccessJob < ApplicationJob queue_as :background def perform(batch) - JobBuffer.add "Hi success #{batch.batch_id}!" + JobBuffer.add "Hi success #{batch.id}!" end end @@ -315,7 +315,7 @@ class OnFailureJob < ApplicationJob queue_as :background def perform(batch) - JobBuffer.add "Hi failure #{batch.batch_id}!" + JobBuffer.add "Hi failure #{batch.id}!" end end @@ -330,6 +330,6 @@ def job!(active_job) end def batch_jobs(*batches) - SolidQueue::Job.where(batch_id: batches.map(&:batch_id)) + SolidQueue::Job.where(id: batches.map(&:id)) end end From 437a78075ad009367615a82b5dfcb17c298812df Mon Sep 17 00:00:00 2001 From: JP Camara Date: Fri, 10 Oct 2025 21:33:45 -0400 Subject: [PATCH 35/38] Make it so metadata is more ergonomic to include * GoodJob and Sidekiq do not require an explicit keyword, they just accept any other arguments as metadata and set them that way * Also make it so we always return a hash for metadata, and that hash is with_indifferent_access --- app/models/solid_queue/batch.rb | 6 +++++- test/integration/batch_lifecycle_test.rb | 4 ++-- test/models/solid_queue/batch_test.rb | 8 ++++---- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/app/models/solid_queue/batch.rb b/app/models/solid_queue/batch.rb index 4a6d8d8f..6292f328 100644 --- a/app/models/solid_queue/batch.rb +++ b/app/models/solid_queue/batch.rb @@ -48,6 +48,10 @@ def on_finish=(value) super(serialize_callback(value)) end + def metadata + (super || {}).with_indifferent_access + end + def check_completion! return if finished? || !ready? return if batch_executions.limit(1).exists? @@ -124,7 +128,7 @@ def start_batch end class << self - def enqueue(on_success: nil, on_failure: nil, on_finish: nil, metadata: nil, &block) + def enqueue(on_success: nil, on_failure: nil, on_finish: nil, **metadata, &block) new.tap do |batch| batch.assign_attributes( on_success: on_success, diff --git a/test/integration/batch_lifecycle_test.rb b/test/integration/batch_lifecycle_test.rb index b1f058ff..cbbc77da 100644 --- a/test/integration/batch_lifecycle_test.rb +++ b/test/integration/batch_lifecycle_test.rb @@ -274,10 +274,10 @@ def perform test "batch interface" do batch = SolidQueue::Batch.enqueue( - metadata: { source: "test", priority: "high", user_id: 123 }, on_finish: OnFinishJob, on_success: OnSuccessJob, - on_failure: OnFailureJob + on_failure: OnFailureJob, + source: "test", priority: "high", user_id: 123 ) do AddToBufferJob.perform_later "hey" end diff --git a/test/models/solid_queue/batch_test.rb b/test/models/solid_queue/batch_test.rb index ee00bfd1..d5e09a58 100644 --- a/test/models/solid_queue/batch_test.rb +++ b/test/models/solid_queue/batch_test.rb @@ -24,14 +24,14 @@ def perform(arg) test "batch will be completed on success" do batch = SolidQueue::Batch.enqueue(on_finish: BatchCompletionJob) { } - job_batch = SolidQueue::Batch.find_by(batch_id: batch.batch_id) + job_batch = SolidQueue::Batch.find_by(id: batch.id) assert_not_nil job_batch.on_finish assert_equal BatchCompletionJob.name, job_batch.on_finish["job_class"] end test "batch will be completed on finish" do batch = SolidQueue::Batch.enqueue(on_success: BatchCompletionJob) { } - job_batch = SolidQueue::Batch.find_by(batch_id: batch.batch_id) + job_batch = SolidQueue::Batch.find_by(id: batch.id) assert_not_nil job_batch.on_success assert_equal BatchCompletionJob.name, job_batch.on_success["job_class"] end @@ -43,7 +43,7 @@ def perform(arg) end assert_equal 2, SolidQueue::Job.count - assert_equal [ batch.batch_id ] * 2, SolidQueue::Job.last(2).map(&:batch_id) + assert_equal [ batch.id ] * 2, SolidQueue::Job.last(2).map(&:batch_id) end test "batch id is present inside the block" do @@ -68,7 +68,7 @@ def perform(arg) test "creates batch with metadata" do SolidQueue::Batch.enqueue( - metadata: { source: "test", priority: "high", user_id: 123 } + source: "test", priority: "high", user_id: 123 ) do NiceJob.perform_later("world") end From ca61ca516a4e4ac8784b2e52181f916c788f4c9b Mon Sep 17 00:00:00 2001 From: JP Camara Date: Fri, 10 Oct 2025 21:44:07 -0400 Subject: [PATCH 36/38] Bad query field --- test/integration/batch_lifecycle_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/batch_lifecycle_test.rb b/test/integration/batch_lifecycle_test.rb index cbbc77da..25e78c58 100644 --- a/test/integration/batch_lifecycle_test.rb +++ b/test/integration/batch_lifecycle_test.rb @@ -330,6 +330,6 @@ def job!(active_job) end def batch_jobs(*batches) - SolidQueue::Job.where(id: batches.map(&:id)) + SolidQueue::Job.where(batch_id: batches.map(&:id)) end end From cde32c3994bf8c424ea6143c7f3b66681c1989de Mon Sep 17 00:00:00 2001 From: JP Camara Date: Fri, 10 Oct 2025 21:49:24 -0400 Subject: [PATCH 37/38] Update metadata interface --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c67a3d18..d03ecf10 100644 --- a/README.md +++ b/README.md @@ -630,7 +630,7 @@ SolidQueue::Batch.enqueue( on_finish: BatchFinishJob, on_success: BatchSuccessJob, on_failure: BatchFailureJob, - metadata: { user_id: 123 } + user_id: 123 ) do 5.times.map { |i| SleepyJob.perform_later(i) } end From 130ea3b815fc0e5ebdbeac78a0bc8815e547c16b Mon Sep 17 00:00:00 2001 From: JP Camara Date: Fri, 10 Oct 2025 22:07:07 -0400 Subject: [PATCH 38/38] Give more breathing room for CI test runs --- test/integration/batch_lifecycle_test.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/test/integration/batch_lifecycle_test.rb b/test/integration/batch_lifecycle_test.rb index 25e78c58..90d06690 100644 --- a/test/integration/batch_lifecycle_test.rb +++ b/test/integration/batch_lifecycle_test.rb @@ -146,7 +146,7 @@ def perform @dispatcher.start @worker.start - wait_for_batches_to_finish_for(2.seconds) + wait_for_batches_to_finish_for(5.seconds) jobs = batch_jobs(batch1, batch2, batch3) assert_equal [ "hey", "ho", "let's go" ], JobBuffer.values.sort @@ -170,8 +170,8 @@ def perform @dispatcher.start @worker.start - wait_for_batches_to_finish_for(3.seconds) - wait_for_jobs_to_finish_for(1.second) + wait_for_batches_to_finish_for(5.seconds) + wait_for_jobs_to_finish_for(5.second) job_batch1 = SolidQueue::Batch.find_by(id: batch1.id) job_batch2 = SolidQueue::Batch.find_by(id: batch2.id) @@ -205,8 +205,8 @@ def perform @dispatcher.start @worker.start - wait_for_batches_to_finish_for(3.seconds) - wait_for_jobs_to_finish_for(1.second) + wait_for_batches_to_finish_for(5.seconds) + wait_for_jobs_to_finish_for(5.second) assert_equal 6, batch1.reload.jobs.count assert_equal 6, batch1.total_jobs @@ -229,8 +229,8 @@ def perform @dispatcher.start @worker.start - wait_for_batches_to_finish_for(3.seconds) - wait_for_jobs_to_finish_for(1.second) + wait_for_batches_to_finish_for(5.seconds) + wait_for_jobs_to_finish_for(5.second) job_batch1 = SolidQueue::Batch.find_by(id: batch1.id) job_batch2 = SolidQueue::Batch.find_by(id: batch2.id) @@ -265,8 +265,8 @@ def perform @dispatcher.start @worker.start - wait_for_batches_to_finish_for(2.seconds) - wait_for_jobs_to_finish_for(2.seconds) + wait_for_batches_to_finish_for(5.seconds) + wait_for_jobs_to_finish_for(5.seconds) assert_equal true, batch1.reload.finished? assert_equal 0, SolidQueue::Job.count