Skip to content

Commit

Permalink
Merge pull request huginn#819 from dsander/agent-runner
Browse files Browse the repository at this point in the history
Add AgentRunner and LongRunnable to support long running agents
  • Loading branch information
dsander committed Sep 9, 2015
2 parents 065a35c + e0f1469 commit 06b022f
Show file tree
Hide file tree
Showing 18 changed files with 997 additions and 240 deletions.
124 changes: 124 additions & 0 deletions app/concerns/long_runnable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
=begin
Usage Example:
class Agents::ExampleAgent < Agent
include LongRunnable
# Optional
# Override this method if you need to group multiple agents based on an API key,
# or server they connect to.
# Have a look at the TwitterStreamAgent for an example.
def self.setup_worker; end
class Worker < LongRunnable::Worker
# Optional
# Called after initialization of the Worker class, use this method as an initializer.
def setup; end
# Required
# Put your agent logic in here, it must not return. If it does your agent will be restarted.
def run; end
# Optional
# Use this method the gracefully stop your agent but make sure the run method return, or
# terminate the thread.
def stop; end
end
end
=end
module LongRunnable
extend ActiveSupport::Concern

included do |base|
AgentRunner.register(base)
end

def start_worker?
true
end

def worker_id(config = nil)
"#{self.class.to_s}-#{id}-#{Digest::SHA1.hexdigest((config.presence || options).to_json)}"
end

module ClassMethods
def setup_worker
active.map do |agent|
next unless agent.start_worker?
self::Worker.new(id: agent.worker_id, agent: agent)
end.compact
end
end

class Worker
attr_reader :thread, :id, :agent, :config, :mutex, :scheduler

def initialize(options = {})
@id = options[:id]
@agent = options[:agent]
@config = options[:config]
end

def run
raise StandardError, 'Override LongRunnable::Worker#run in your agent Worker subclass.'
end

def run!
@thread = Thread.new do
begin
run
rescue SignalException, SystemExit
stop!
rescue StandardError => e
message = "#{id} Exception #{e.message}:\n#{e.backtrace.first(10).join("\n")}"
AgentRunner.with_connection do
agent.error(message)
end
end
end
end

def setup!(scheduler, mutex)
@scheduler = scheduler
@mutex = mutex
setup if respond_to?(:setup)
end

def stop!
@scheduler.jobs(tag: id).each(&:unschedule)

if respond_to?(:stop)
stop
else
thread.terminate
end
end

def restart!
stop!
setup!(scheduler, mutex)
run!
end

def every(*args, &blk)
schedule(:every, args, &blk)
end

def cron(*args, &blk)
schedule(:cron, args, &blk)
end

def schedule_in(*args, &blk)
schedule(:schedule_in, args, &blk)
end

def boolify(value)
agent.send(:boolify, value)
end

private
def schedule(method, args, &blk)
@scheduler.send(method, *args, tag: id, &blk)
end
end
end
86 changes: 85 additions & 1 deletion app/models/agents/jabber_agent.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
module Agents
class JabberAgent < Agent
include LongRunnable
include FormConfigurable

cannot_be_scheduled!
cannot_create_events!

gem_dependency_check { defined?(Jabber) }

Expand All @@ -16,9 +18,22 @@ class JabberAgent < Agent
can contain any keys found in the source's payload, escaped using double curly braces.
ex: `"News Story: {{title}}: {{url}}"`
When `connect_to_receiver` is set to true, the JabberAgent will emit an event for every message it receives.
Have a look at the [Wiki](https://github.com/cantino/huginn/wiki/Formatting-Events-using-Liquid) to learn more about liquid templating.
MD

event_description <<-MD
`event` will be set to either `on_join`, `on_leave`, `on_message`, `on_room_message` or `on_subject`
{
"event": "on_message",
"time": null,
"nick": "Dominik Sander",
"message": "Hello from huginn."
}
MD

def default_options
{
'jabber_server' => '127.0.0.1',
Expand All @@ -31,6 +46,15 @@ def default_options
}
end

form_configurable :jabber_server
form_configurable :jabber_port
form_configurable :jabber_sender
form_configurable :jabber_receiver
form_configurable :jabber_password
form_configurable :message, type: :text
form_configurable :connect_to_receiver, type: :boolean
form_configurable :expected_receive_period_in_days

def working?
last_receive_at && last_receive_at > interpolated['expected_receive_period_in_days'].to_i.days.ago && !recent_error_logs?
end
Expand All @@ -50,6 +74,10 @@ def deliver(text)
client.send Jabber::Message::new(interpolated['jabber_receiver'], text).set_type(:chat)
end

def start_worker?
boolify(interpolated[:connect_to_receiver])
end

private

def client
Expand All @@ -66,5 +94,61 @@ def credentials_present?
def body(event)
interpolated(event)['message']
end

class Worker < LongRunnable::Worker
IGNORE_MESSAGES_FOR=5

def setup
require 'xmpp4r/muc/helper/simplemucclient'
end

def run
@started_at = Time.now
@client = client
muc = Jabber::MUC::SimpleMUCClient.new(@client)

[:on_join, :on_leave, :on_message, :on_room_message, :on_subject].each do |event|
muc.__send__(event) do |*args|
message_handler(event, args)
end
end

muc.join(agent.interpolated['jabber_receiver'])

sleep(1) while @client.is_connected?
end

def message_handler(event, args)
return if Time.now - @started_at < IGNORE_MESSAGES_FOR

time, nick, message = normalize_args(event, args)

AgentRunner.with_connection do
agent.create_event(payload: {event: event, time: time, nick: nick, message: message})
end
end

def stop
@client.close
@client.stop
thread.terminate
end

def client
agent.send(:client)
end

private
def normalize_args(event, args)
case event
when :on_join, :on_leave
[args[0], args[1]]
when :on_message, :on_subject
args
when :on_room_message
[args[0], nil, args[1]]
end
end
end
end
end
121 changes: 121 additions & 0 deletions app/models/agents/twitter_stream_agent.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module Agents
class TwitterStreamAgent < Agent
include TwitterConcern
include LongRunnable

cannot_receive_events!

Expand Down Expand Up @@ -122,5 +123,125 @@ def remove_unused_keys!(agent, base)
end
end
end

def self.setup_worker
if Agents::TwitterStreamAgent.dependencies_missing?
STDERR.puts Agents::TwitterStreamAgent.twitter_dependencies_missing
STDERR.flush
return false
end

Agents::TwitterStreamAgent.active.group_by { |agent| agent.twitter_oauth_token }.map do |oauth_token, agents|
filter_to_agent_map = agents.map { |agent| agent.options[:filters] }.flatten.uniq.compact.map(&:strip).inject({}) { |m, f| m[f] = []; m }

agents.each do |agent|
agent.options[:filters].flatten.uniq.compact.map(&:strip).each do |filter|
filter_to_agent_map[filter] << agent
end
end

config_hash = filter_to_agent_map.map { |k, v| [k, v.map(&:id)] }
config_hash.push(oauth_token)

Worker.new(id: agents.first.worker_id(config_hash),
config: {filter_to_agent_map: filter_to_agent_map},
agent: agents.first)
end
end

class Worker < LongRunnable::Worker
RELOAD_TIMEOUT = 60.minutes
DUPLICATE_DETECTION_LENGTH = 1000
SEPARATOR = /[^\w_\-]+/

def setup
require 'twitter/json_stream'
@filter_to_agent_map = @config[:filter_to_agent_map]

schedule_in RELOAD_TIMEOUT do
puts "--> Restarting TwitterStream #{id}"
restart!
end
end

def run
@recent_tweets = []
EventMachine.run do
stream!(@filter_to_agent_map.keys, @agent) do |status|
handle_status(status)
end
end
Thread.stop
end

def stop
EventMachine.stop_event_loop if EventMachine.reactor_running?
thread.terminate
end

private
def stream!(filters, agent, &block)
filters = filters.map(&:downcase).uniq

stream = Twitter::JSONStream.connect(
:path => "/1/statuses/#{(filters && filters.length > 0) ? 'filter' : 'sample'}.json#{"?track=#{filters.map {|f| CGI::escape(f) }.join(",")}" if filters && filters.length > 0}",
:ssl => true,
:oauth => {
:consumer_key => agent.twitter_consumer_key,
:consumer_secret => agent.twitter_consumer_secret,
:access_key => agent.twitter_oauth_token,
:access_secret => agent.twitter_oauth_token_secret
}
)

stream.each_item do |status|
block.call(status)
end

stream.on_error do |message|
STDERR.puts " --> Twitter error: #{message} <--"
end

stream.on_no_data do |message|
STDERR.puts " --> Got no data for awhile; trying to reconnect."
restart!
end

stream.on_max_reconnects do |timeout, retries|
STDERR.puts " --> Oops, tried too many times! <--"
sleep 60
restart!
end
end

def handle_status(status)
status = JSON.parse(status) if status.is_a?(String)
return unless status
return if status.has_key?('delete')
return unless status['text']
status['text'] = status['text'].gsub(/&lt;/, "<").gsub(/&gt;/, ">").gsub(/[\t\n\r]/, ' ')

if status["retweeted_status"].present? && status["retweeted_status"].is_a?(Hash)
puts "Skipping retweet: #{status["text"]}"
return
elsif @recent_tweets.include?(status["id_str"])
puts "Skipping duplicate tweet: #{status["text"]}"
return
end

@recent_tweets << status["id_str"]
@recent_tweets.shift if @recent_tweets.length > DUPLICATE_DETECTION_LENGTH
puts status["text"]
@filter_to_agent_map.keys.each do |filter|
next unless (filter.downcase.split(SEPARATOR) - status["text"].downcase.split(SEPARATOR)).reject(&:empty?) == [] # Hacky McHackerson
@filter_to_agent_map[filter].each do |agent|
puts " -> #{agent.name}"
AgentRunner.with_connection do
agent.process_tweet(filter, status)
end
end
end
end
end
end
end
9 changes: 9 additions & 0 deletions bin/agent_runner.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env ruby

# This process is used to maintain Huginn's upkeep behavior, automatically running scheduled Agents and
# periodically propagating and expiring Events. It also running TwitterStreamAgents and Agents that support long running
# background jobs.

require_relative './pre_runner_boot'

AgentRunner.new(except: DelayedJobWorker).run
Loading

0 comments on commit 06b022f

Please sign in to comment.