From c6b0311b8626b42bc7e79e0195047a50e5b64dd1 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Mon, 26 Sep 2016 16:42:38 +0200 Subject: [PATCH] Fix #54 - Fetch remote accounts by URL from mentions Fetching atom extracted from FetchRemoteAccountService and FetchRemoteStatusService into FetchAtomService. Mentions of the constant "http://activityschema.org/collection/public" skipped as it's not a real URL/user. --- app/services/fan_out_on_write_service.rb | 2 +- app/services/fetch_atom_service.rb | 46 +++++++++++++++++++ app/services/fetch_feed_service.rb | 17 ------- app/services/fetch_remote_account_service.rb | 22 +++++++++ app/services/fetch_remote_status_service.rb | 42 ++--------------- app/services/process_feed_service.rb | 10 +++- .../api/subscriptions_controller_spec.rb | 7 ++- spec/services/fetch_atom_service_spec.rb | 4 ++ spec/services/fetch_feed_service_spec.rb | 8 ---- .../fetch_remote_account_service_spec.rb | 4 ++ .../fetch_remote_status_service_spec.rb | 4 ++ 11 files changed, 99 insertions(+), 67 deletions(-) create mode 100644 app/services/fetch_atom_service.rb delete mode 100644 app/services/fetch_feed_service.rb create mode 100644 app/services/fetch_remote_account_service.rb create mode 100644 spec/services/fetch_atom_service_spec.rb delete mode 100644 spec/services/fetch_feed_service_spec.rb create mode 100644 spec/services/fetch_remote_account_service_spec.rb create mode 100644 spec/services/fetch_remote_status_service_spec.rb diff --git a/app/services/fan_out_on_write_service.rb b/app/services/fan_out_on_write_service.rb index b8e2f5c2..2d18709d 100644 --- a/app/services/fan_out_on_write_service.rb +++ b/app/services/fan_out_on_write_service.rb @@ -23,7 +23,7 @@ class FanOutOnWriteService < BaseService def deliver_to_mentioned(status) status.mentions.each do |mention| mentioned_account = mention.account - next unless mentioned_account.local? + next if !mentioned_account.local? || mentioned_account.id == status.account_id FeedManager.instance.push(:mentions, mentioned_account, status) end end diff --git a/app/services/fetch_atom_service.rb b/app/services/fetch_atom_service.rb new file mode 100644 index 00000000..57f789ad --- /dev/null +++ b/app/services/fetch_atom_service.rb @@ -0,0 +1,46 @@ +class FetchAtomService < BaseService + def call(url) + response = http_client.head(url) + + Rails.logger.debug "Remote status HEAD request returned code #{response.code}" + return nil if response.code != 200 + + if response.mime_type == 'application/atom+xml' + return [url, fetch(url)] + elsif !response['Link'].blank? + return process_headers(response) + else + return process_html(fetch(url)) + end + end + + private + + def process_html(body) + Rails.logger.debug "Processing HTML" + + page = Nokogiri::HTML(body) + alternate_link = page.xpath('//link[@rel="alternate"]').find { |link| link['type'] == 'application/atom+xml' } + + return nil if alternate_link.nil? + return [alternate_link['href'], fetch(alternate_link['href'])] + end + + def process_headers(response) + Rails.logger.debug "Processing link header" + + link_header = LinkHeader.parse(response['Link']) + alternate_link = link_header.find_link(['rel', 'alternate'], ['type', 'application/atom+xml']) + + return nil if alternate_link.nil? + return [alternate_link.href, fetch(alternate_link.href)] + end + + def fetch(url) + http_client.get(url).to_s + end + + def http_client + HTTP.timeout(:per_operation, write: 20, connect: 20, read: 50) + end +end diff --git a/app/services/fetch_feed_service.rb b/app/services/fetch_feed_service.rb deleted file mode 100644 index f18e9fc0..00000000 --- a/app/services/fetch_feed_service.rb +++ /dev/null @@ -1,17 +0,0 @@ -class FetchFeedService < BaseService - # Fetch an account's feed and process it - # @param [Account] account - def call(account) - process_service.(http_client.get(account.remote_url), account) - end - - private - - def process_service - @process_service ||= ProcessFeedService.new - end - - def http_client - HTTP - end -end diff --git a/app/services/fetch_remote_account_service.rb b/app/services/fetch_remote_account_service.rb new file mode 100644 index 00000000..d24ac2a3 --- /dev/null +++ b/app/services/fetch_remote_account_service.rb @@ -0,0 +1,22 @@ +class FetchRemoteAccountService < BaseService + def call(url) + atom_url, body = FetchAtomService.new.(url) + + return nil if atom_url.nil? + return process_atom(atom_url, body) + end + + private + + def process_atom(url, body) + url_parts = Addressable::URI.parse(url) + username = xml.at_xpath('//xmlns:author/xmlns:name').try(:content) + domain = url_parts.host + + return nil if username.nil? + + Rails.logger.debug "Going to webfinger #{username}@#{domain}" + + return FollowRemoteAccountService.new.("#{username}@#{domain}") + end +end diff --git a/app/services/fetch_remote_status_service.rb b/app/services/fetch_remote_status_service.rb index c872cb38..7613607b 100644 --- a/app/services/fetch_remote_status_service.rb +++ b/app/services/fetch_remote_status_service.rb @@ -1,17 +1,9 @@ class FetchRemoteStatusService < BaseService def call(url) - response = http_client.head(url) + atom_url, body = FetchAtomService.new.(url) - Rails.logger.debug "Remote status HEAD request returned code #{response.code}" - return nil if response.code != 200 - - if response.mime_type == 'application/atom+xml' - return process_atom(url, fetch(url)) - elsif !response['Link'].blank? - return process_headers(response) - else - return process_html(fetch(url)) - end + return nil if atom_url.nil? + return process_atom(atom_url, body) end private @@ -29,26 +21,6 @@ class FetchRemoteStatusService < BaseService return statuses.first end - def process_html(body) - Rails.logger.debug "Processing HTML for remote status" - - page = Nokogiri::HTML(body) - alternate_link = page.xpath('//link[@rel="alternate"]').find { |link| link['type'] == 'application/atom+xml' } - - return nil if alternate_link.nil? - return process_atom(alternate_link['href'], fetch(alternate_link['href'])) - end - - def process_headers(response) - Rails.logger.debug "Processing link header for remote status" - - link_header = LinkHeader.parse(response['Link']) - alternate_link = link_header.find_link(['rel', 'alternate'], ['type', 'application/atom+xml']) - - return nil if alternate_link.nil? - return process_atom(alternate_link.href, fetch(alternate_link.href)) - end - def extract_author(url, xml) url_parts = Addressable::URI.parse(url) username = xml.at_xpath('//xmlns:author/xmlns:name').try(:content) @@ -60,12 +32,4 @@ class FetchRemoteStatusService < BaseService return FollowRemoteAccountService.new.("#{username}@#{domain}") end - - def fetch(url) - http_client.get(url).to_s - end - - def http_client - HTTP.timeout(:per_operation, write: 20, connect: 20, read: 50) - end end diff --git a/app/services/process_feed_service.rb b/app/services/process_feed_service.rb index 5e760bc7..47992b24 100644 --- a/app/services/process_feed_service.rb +++ b/app/services/process_feed_service.rb @@ -57,7 +57,11 @@ class ProcessFeedService < BaseService # and tidier links.each do |mention_link| - href = Addressable::URI.parse(mention_link.attribute('href').value) + href_val = mention_link.attribute('href').value + + next if href_val == 'http://activityschema.org/collection/public' + + href = Addressable::URI.parse(href_val) if href.host == Rails.configuration.x.local_domain # A local user is mentioned @@ -72,6 +76,10 @@ class ProcessFeedService < BaseService # This is kinda dodgy because URLs could change, we don't index them mentioned_account = Account.find_by(url: href.to_s) + if mentioned_account.nil? + mentioned_account = FetchRemoteAccountService.new.(href) + end + unless mentioned_account.nil? mentioned_account.mentions.where(status: status).first_or_create(status: status) end diff --git a/spec/controllers/api/subscriptions_controller_spec.rb b/spec/controllers/api/subscriptions_controller_spec.rb index e0ae8d48..2af6cb72 100644 --- a/spec/controllers/api/subscriptions_controller_spec.rb +++ b/spec/controllers/api/subscriptions_controller_spec.rb @@ -32,7 +32,12 @@ RSpec.describe Api::SubscriptionsController, type: :controller do stub_request(:head, "https://social.umeahackerspace.se/notice/424348").to_return(status: 404) stub_request(:head, "https://community.highlandarrow.com/notice/50467").to_return(status: 404) stub_request(:head, "https://quitter.no/notice/1243309").to_return(status: 404) - + stub_request(:head, "https://quitter.no/user/7477").to_return(status: 404) + stub_request(:head, "https://community.highlandarrow.com/user/1").to_return(status: 404) + stub_request(:head, "https://social.umeahackerspace.se/user/2").to_return(status: 404) + stub_request(:head, "https://gs.kawa-kun.com/user/2").to_return(status: 404) + stub_request(:head, "https://mastodon.social/users/Gargron").to_return(status: 404) + request.env['HTTP_X_HUB_SIGNATURE'] = "sha1=#{OpenSSL::HMAC.hexdigest('sha1', 'abc', feed)}" request.env['RAW_POST_DATA'] = feed diff --git a/spec/services/fetch_atom_service_spec.rb b/spec/services/fetch_atom_service_spec.rb new file mode 100644 index 00000000..5491fd02 --- /dev/null +++ b/spec/services/fetch_atom_service_spec.rb @@ -0,0 +1,4 @@ +require 'rails_helper' + +RSpec.describe FetchAtomService do +end diff --git a/spec/services/fetch_feed_service_spec.rb b/spec/services/fetch_feed_service_spec.rb deleted file mode 100644 index a28333fe..00000000 --- a/spec/services/fetch_feed_service_spec.rb +++ /dev/null @@ -1,8 +0,0 @@ -require 'rails_helper' - -RSpec.describe FetchFeedService do - subject { FetchFeedService.new } - - it 'fetches remote user\'s feed' - it 'processes the feed' -end diff --git a/spec/services/fetch_remote_account_service_spec.rb b/spec/services/fetch_remote_account_service_spec.rb new file mode 100644 index 00000000..bb1877c7 --- /dev/null +++ b/spec/services/fetch_remote_account_service_spec.rb @@ -0,0 +1,4 @@ +require 'rails_helper' + +RSpec.describe FetchRemoteAccountService do +end diff --git a/spec/services/fetch_remote_status_service_spec.rb b/spec/services/fetch_remote_status_service_spec.rb new file mode 100644 index 00000000..cbdecbf2 --- /dev/null +++ b/spec/services/fetch_remote_status_service_spec.rb @@ -0,0 +1,4 @@ +require 'rails_helper' + +RSpec.describe FetchRemoteStatusService do +end