Skip to content

Commit

Permalink
Fix timing issue during scraping
Browse files Browse the repository at this point in the history
Files uploaded while the scrape was running were previously skipped over
  • Loading branch information
Earlopain committed Sep 23, 2023
1 parent 823bbee commit 48f8721
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
2 changes: 1 addition & 1 deletion app/helpers/artist_url_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ def scraper_status(artist_url, prefix: "")
return "" if artist_url.scraper_status.blank?

beginning = "#{prefix}: " if prefix.present?
"#{beginning}#{artist_url.scraper_status.to_json}"
"#{beginning}#{artist_url.scraper_status.except('started_at').to_json}"
end
end
5 changes: 3 additions & 2 deletions app/jobs/scrape_artist_url_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ class ScrapeArtistUrlJob < ApplicationJob
queue_as :scraping
good_job_control_concurrency_with(total_limit: 1, key: -> { arguments.first.id })

def perform(artist_url)
def perform(artist_url) # rubocop:disable Metrics/CyclomaticComplexity
return unless artist_url.scraper_enabled?

scraper = artist_url.scraper
scraper.jumpstart(artist_url.scraper_status[scraper.class.state.to_s]) if artist_url.scraper_status.present?
artist_url.scraper_status["started_at"] ||= Time.current

while scraper.more?
submissions = scraper.fetch_and_save_next_submissions
Expand All @@ -18,8 +19,8 @@ def perform(artist_url)
stop_marker = artist_url.last_scraped_at
break if stop_marker.present? && submissions.any? { |submission| submission.created_at.before? stop_marker }
end
artist_url.last_scraped_at = artist_url.scraper_status["started_at"]
artist_url.scraper_status = {}
artist_url.last_scraped_at = Time.current
artist_url.save
end
end

0 comments on commit 48f8721

Please sign in to comment.