Add collection fetching module
This commit is contained in:
parent
0a3a552696
commit
05081cd81b
|
@ -363,7 +363,8 @@
|
||||||
follow_handshake_timeout: 500,
|
follow_handshake_timeout: 500,
|
||||||
note_replies_output_limit: 5,
|
note_replies_output_limit: 5,
|
||||||
sign_object_fetches: true,
|
sign_object_fetches: true,
|
||||||
authorized_fetch_mode: false
|
authorized_fetch_mode: false,
|
||||||
|
max_collection_objects: 50
|
||||||
|
|
||||||
config :pleroma, :streamer,
|
config :pleroma, :streamer,
|
||||||
workers: 3,
|
workers: 3,
|
||||||
|
|
68
lib/pleroma/collections/fetcher.ex
Normal file
68
lib/pleroma/collections/fetcher.ex
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
# Akkoma: The cooler fediverse server
|
||||||
|
# Copyright © 2022- Akkoma Authors <https://akkoma.dev/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Akkoma.Collections.Fetcher do
|
||||||
|
@moduledoc """
|
||||||
|
Activitypub Collections fetching functions
|
||||||
|
see: https://www.w3.org/TR/activitystreams-core/#paging
|
||||||
|
"""
|
||||||
|
alias Pleroma.Object.Fetcher
|
||||||
|
alias Pleroma.Config
|
||||||
|
|
||||||
|
def fetch_collection_by_ap_id(ap_id) when is_binary(ap_id) do
|
||||||
|
fetch_collection(ap_id)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp fetch_collection(ap_id) do
|
||||||
|
with {:ok, page} <- Fetcher.fetch_and_contain_remote_object_from_id(ap_id) do
|
||||||
|
{:ok, objects_from_collection(page)}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp items_in_page(%{"type" => type, "orderedItems" => items})
|
||||||
|
when is_list(items) and type in ["OrderedCollection", "OrderedCollectionPage"],
|
||||||
|
do: items
|
||||||
|
|
||||||
|
defp items_in_page(%{"type" => type, "items" => items})
|
||||||
|
when is_list(items) and type in ["Collection", "CollectionPage"],
|
||||||
|
do: items
|
||||||
|
|
||||||
|
defp objects_from_collection(%{"type" => "OrderedCollection", "orderedItems" => items})
|
||||||
|
when is_list(items),
|
||||||
|
do: items
|
||||||
|
|
||||||
|
defp objects_from_collection(%{"type" => "Collection", "items" => items}) when is_list(items),
|
||||||
|
do: items
|
||||||
|
|
||||||
|
defp objects_from_collection(%{"type" => type, "first" => first})
|
||||||
|
when is_binary(first) and type in ["Collection", "OrderedCollection"] do
|
||||||
|
fetch_page_items(first)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp objects_from_collection(%{"type" => type, "first" => %{"id" => id}})
|
||||||
|
when is_binary(id) and type in ["Collection", "OrderedCollection"] do
|
||||||
|
fetch_page_items(id)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp fetch_page_items(id, items \\ []) do
|
||||||
|
if Enum.count(items) >= Config.get([:activitypub, :max_collection_objects]) do
|
||||||
|
items
|
||||||
|
else
|
||||||
|
{:ok, page} = Fetcher.fetch_and_contain_remote_object_from_id(id)
|
||||||
|
objects = items_in_page(page)
|
||||||
|
|
||||||
|
if Enum.count(objects) > 0 do
|
||||||
|
maybe_next_page(page, items ++ objects)
|
||||||
|
else
|
||||||
|
items
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp maybe_next_page(%{"next" => id}, items) when is_binary(id) do
|
||||||
|
fetch_page_items(id, items)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp maybe_next_page(_, items), do: items
|
||||||
|
end
|
19
test/fixtures/collections/ordered_array.json
vendored
Normal file
19
test/fixtures/collections/ordered_array.json
vendored
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
{
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams",
|
||||||
|
"id": "https://example.com/collection/ordered_array",
|
||||||
|
"summary": "Object history",
|
||||||
|
"type": "OrderedCollection",
|
||||||
|
"totalItems": 2,
|
||||||
|
"orderedItems": [
|
||||||
|
{
|
||||||
|
"type": "Create",
|
||||||
|
"actor": "http://www.test.example/sally",
|
||||||
|
"object": "http://example.org/foo"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "Like",
|
||||||
|
"actor": "http://www.test.example/joe",
|
||||||
|
"object": "http://example.org/foo"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
19
test/fixtures/collections/unordered_array.json
vendored
Normal file
19
test/fixtures/collections/unordered_array.json
vendored
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
{
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams",
|
||||||
|
"id": "https://example.com/collection/unordered_array",
|
||||||
|
"summary": "Object history",
|
||||||
|
"type": "Collection",
|
||||||
|
"totalItems": 2,
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "Create",
|
||||||
|
"actor": "http://www.test.example/sally",
|
||||||
|
"object": "http://example.org/foo"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "Like",
|
||||||
|
"actor": "http://www.test.example/joe",
|
||||||
|
"object": "http://example.org/foo"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
20
test/fixtures/collections/unordered_page_embedded.json
vendored
Normal file
20
test/fixtures/collections/unordered_page_embedded.json
vendored
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams",
|
||||||
|
"summary": "Sally's recent activities",
|
||||||
|
"type": "Collection",
|
||||||
|
"id": "http://example.org/foo",
|
||||||
|
"totalItems": 10,
|
||||||
|
"first": {
|
||||||
|
"type": "CollectionPage",
|
||||||
|
"id": "http://example.org/foo?page=1",
|
||||||
|
"partOf": "http://example.org/foo",
|
||||||
|
"next": "http://example.org/foo?page=2",
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "Create",
|
||||||
|
"actor": "http://www.test.example/sally",
|
||||||
|
"object": "http://example.org/foo"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
13
test/fixtures/collections/unordered_page_first.json
vendored
Normal file
13
test/fixtures/collections/unordered_page_first.json
vendored
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"type": "CollectionPage",
|
||||||
|
"id": "https://example.com/collection/unordered_page_reference?page=1",
|
||||||
|
"partOf": "https://example.com/collection/unordered_page_reference",
|
||||||
|
"next": "https://example.com/collection/unordered_page_reference?page=2",
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "Create",
|
||||||
|
"actor": "http://www.test.example/sally",
|
||||||
|
"object": "http://example.org/foo"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
8
test/fixtures/collections/unordered_page_reference.json
vendored
Normal file
8
test/fixtures/collections/unordered_page_reference.json
vendored
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
{
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams",
|
||||||
|
"summary": "Sally's recent activities",
|
||||||
|
"type": "Collection",
|
||||||
|
"id": "https://example.com/collection/unordered_page_reference",
|
||||||
|
"totalItems": 10,
|
||||||
|
"first": "https://example.com/collection/unordered_page_reference?page=1"
|
||||||
|
}
|
12
test/fixtures/collections/unordered_page_second.json
vendored
Normal file
12
test/fixtures/collections/unordered_page_second.json
vendored
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
{
|
||||||
|
"type": "CollectionPage",
|
||||||
|
"id": "https://example.com/collection/unordered_page_reference?page=2",
|
||||||
|
"partOf": "https://example.com/collection/unordered_page_reference",
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "Like",
|
||||||
|
"actor": "http://www.test.example/sally",
|
||||||
|
"object": "http://example.org/foo"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
167
test/pleroma/collections/collections_fetcher_test.exs
Normal file
167
test/pleroma/collections/collections_fetcher_test.exs
Normal file
|
@ -0,0 +1,167 @@
|
||||||
|
defmodule Akkoma.Collections.FetcherTest do
|
||||||
|
use Pleroma.DataCase
|
||||||
|
use Oban.Testing, repo: Pleroma.Repo
|
||||||
|
|
||||||
|
alias Akkoma.Collections.Fetcher
|
||||||
|
|
||||||
|
import Tesla.Mock
|
||||||
|
|
||||||
|
setup do
|
||||||
|
mock(fn env -> apply(HttpRequestMock, :request, [env]) end)
|
||||||
|
:ok
|
||||||
|
end
|
||||||
|
|
||||||
|
test "it should extract items from an embedded array in a Collection" do
|
||||||
|
unordered_collection =
|
||||||
|
"test/fixtures/collections/unordered_array.json"
|
||||||
|
|> File.read!()
|
||||||
|
|
||||||
|
ap_id = "https://example.com/collection/ordered_array"
|
||||||
|
|
||||||
|
Tesla.Mock.mock(fn
|
||||||
|
%{
|
||||||
|
method: :get,
|
||||||
|
url: ^ap_id
|
||||||
|
} ->
|
||||||
|
%Tesla.Env{
|
||||||
|
status: 200,
|
||||||
|
body: unordered_collection,
|
||||||
|
headers: [{"content-type", "application/activity+json"}]
|
||||||
|
}
|
||||||
|
end)
|
||||||
|
|
||||||
|
{:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
|
||||||
|
assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects
|
||||||
|
end
|
||||||
|
|
||||||
|
test "it should extract items from an embedded array in an OrderedCollection" do
|
||||||
|
ordered_collection =
|
||||||
|
"test/fixtures/collections/ordered_array.json"
|
||||||
|
|> File.read!()
|
||||||
|
|
||||||
|
ap_id = "https://example.com/collection/ordered_array"
|
||||||
|
|
||||||
|
Tesla.Mock.mock(fn
|
||||||
|
%{
|
||||||
|
method: :get,
|
||||||
|
url: ^ap_id
|
||||||
|
} ->
|
||||||
|
%Tesla.Env{
|
||||||
|
status: 200,
|
||||||
|
body: ordered_collection,
|
||||||
|
headers: [{"content-type", "application/activity+json"}]
|
||||||
|
}
|
||||||
|
end)
|
||||||
|
|
||||||
|
{:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
|
||||||
|
assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects
|
||||||
|
end
|
||||||
|
|
||||||
|
test "it should extract items from an referenced first page in a Collection" do
|
||||||
|
unordered_collection =
|
||||||
|
"test/fixtures/collections/unordered_page_reference.json"
|
||||||
|
|> File.read!()
|
||||||
|
|
||||||
|
first_page =
|
||||||
|
"test/fixtures/collections/unordered_page_first.json"
|
||||||
|
|> File.read!()
|
||||||
|
|
||||||
|
second_page =
|
||||||
|
"test/fixtures/collections/unordered_page_second.json"
|
||||||
|
|> File.read!()
|
||||||
|
|
||||||
|
ap_id = "https://example.com/collection/unordered_page_reference"
|
||||||
|
first_page_id = "https://example.com/collection/unordered_page_reference?page=1"
|
||||||
|
second_page_id = "https://example.com/collection/unordered_page_reference?page=2"
|
||||||
|
|
||||||
|
Tesla.Mock.mock(fn
|
||||||
|
%{
|
||||||
|
method: :get,
|
||||||
|
url: ^ap_id
|
||||||
|
} ->
|
||||||
|
%Tesla.Env{
|
||||||
|
status: 200,
|
||||||
|
body: unordered_collection,
|
||||||
|
headers: [{"content-type", "application/activity+json"}]
|
||||||
|
}
|
||||||
|
|
||||||
|
%{
|
||||||
|
method: :get,
|
||||||
|
url: ^first_page_id
|
||||||
|
} ->
|
||||||
|
%Tesla.Env{
|
||||||
|
status: 200,
|
||||||
|
body: first_page,
|
||||||
|
headers: [{"content-type", "application/activity+json"}]
|
||||||
|
}
|
||||||
|
|
||||||
|
%{
|
||||||
|
method: :get,
|
||||||
|
url: ^second_page_id
|
||||||
|
} ->
|
||||||
|
%Tesla.Env{
|
||||||
|
status: 200,
|
||||||
|
body: second_page,
|
||||||
|
headers: [{"content-type", "application/activity+json"}]
|
||||||
|
}
|
||||||
|
end)
|
||||||
|
|
||||||
|
{:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
|
||||||
|
assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects
|
||||||
|
end
|
||||||
|
|
||||||
|
test "it should stop fetching when we hit :max_collection_objects" do
|
||||||
|
clear_config([:activitypub, :max_collection_objects], 1)
|
||||||
|
|
||||||
|
unordered_collection =
|
||||||
|
"test/fixtures/collections/unordered_page_reference.json"
|
||||||
|
|> File.read!()
|
||||||
|
|
||||||
|
first_page =
|
||||||
|
"test/fixtures/collections/unordered_page_first.json"
|
||||||
|
|> File.read!()
|
||||||
|
|
||||||
|
second_page =
|
||||||
|
"test/fixtures/collections/unordered_page_second.json"
|
||||||
|
|> File.read!()
|
||||||
|
|
||||||
|
ap_id = "https://example.com/collection/unordered_page_reference"
|
||||||
|
first_page_id = "https://example.com/collection/unordered_page_reference?page=1"
|
||||||
|
second_page_id = "https://example.com/collection/unordered_page_reference?page=2"
|
||||||
|
|
||||||
|
Tesla.Mock.mock(fn
|
||||||
|
%{
|
||||||
|
method: :get,
|
||||||
|
url: ^ap_id
|
||||||
|
} ->
|
||||||
|
%Tesla.Env{
|
||||||
|
status: 200,
|
||||||
|
body: unordered_collection,
|
||||||
|
headers: [{"content-type", "application/activity+json"}]
|
||||||
|
}
|
||||||
|
|
||||||
|
%{
|
||||||
|
method: :get,
|
||||||
|
url: ^first_page_id
|
||||||
|
} ->
|
||||||
|
%Tesla.Env{
|
||||||
|
status: 200,
|
||||||
|
body: first_page,
|
||||||
|
headers: [{"content-type", "application/activity+json"}]
|
||||||
|
}
|
||||||
|
|
||||||
|
%{
|
||||||
|
method: :get,
|
||||||
|
url: ^second_page_id
|
||||||
|
} ->
|
||||||
|
%Tesla.Env{
|
||||||
|
status: 200,
|
||||||
|
body: second_page,
|
||||||
|
headers: [{"content-type", "application/activity+json"}]
|
||||||
|
}
|
||||||
|
end)
|
||||||
|
|
||||||
|
{:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
|
||||||
|
assert [%{"type" => "Create"}] = objects
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in a new issue