akkoma/test/mix/tasks/pleroma/database_test.exs

639 lines
20 KiB
Elixir
Raw Permalink Normal View History

# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Mix.Tasks.Pleroma.DatabaseTest do
use Pleroma.DataCase, async: true
2020-08-22 17:46:01 +00:00
use Oban.Testing, repo: Pleroma.Repo
alias Pleroma.Activity
alias Pleroma.Bookmark
alias Pleroma.Object
alias Pleroma.Repo
alias Pleroma.User
alias Pleroma.Web.CommonAPI
import Pleroma.Factory
setup_all do
Mix.shell(Mix.Shell.Process)
on_exit(fn ->
Mix.shell(Mix.Shell.IO)
end)
:ok
end
describe "running remove_embedded_objects" do
test "it replaces objects with references" do
user = insert(:user)
2020-05-12 19:59:26 +00:00
{:ok, activity} = CommonAPI.post(user, %{status: "test"})
new_data = Map.put(activity.data, "object", activity.object.data)
{:ok, activity} =
activity
|> Activity.change(%{data: new_data})
|> Repo.update()
assert is_map(activity.data["object"])
Mix.Tasks.Pleroma.Database.run(["remove_embedded_objects"])
activity = Activity.get_by_id_with_object(activity.id)
assert is_binary(activity.data["object"])
end
end
describe "prune_objects" do
setup do
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
old_insert_date =
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
Timex.now()
|> Timex.shift(days: -deadline)
|> Timex.to_naive_datetime()
|> NaiveDateTime.truncate(:second)
%{old_insert_date: old_insert_date}
end
test "it prunes old objects from the database", %{old_insert_date: old_insert_date} do
insert(:note)
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
%{id: note_remote_public_id} =
:note
|> insert()
|> Ecto.Changeset.change(%{updated_at: old_insert_date})
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
|> Repo.update!()
note_remote_non_public =
%{id: note_remote_non_public_id, data: note_remote_non_public_data} =
:note
|> insert()
note_remote_non_public
|> Ecto.Changeset.change(%{
updated_at: old_insert_date,
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end)
})
|> Repo.update!()
assert length(Repo.all(Object)) == 3
Mix.Tasks.Pleroma.Database.run(["prune_objects"])
assert length(Repo.all(Object)) == 1
refute Object.get_by_id(note_remote_public_id)
refute Object.get_by_id(note_remote_non_public_id)
end
test "it cleans up bookmarks", %{old_insert_date: old_insert_date} do
user = insert(:user)
{:ok, old_object_activity} = CommonAPI.post(user, %{status: "yadayada"})
Repo.one(Object)
|> Ecto.Changeset.change(%{updated_at: old_insert_date})
|> Repo.update!()
{:ok, new_object_activity} = CommonAPI.post(user, %{status: "yadayada"})
{:ok, _} = Bookmark.create(user.id, old_object_activity.id)
{:ok, _} = Bookmark.create(user.id, new_object_activity.id)
assert length(Repo.all(Object)) == 2
assert length(Repo.all(Bookmark)) == 2
Mix.Tasks.Pleroma.Database.run(["prune_objects"])
assert length(Repo.all(Object)) == 1
assert length(Repo.all(Bookmark)) == 1
refute Bookmark.get(user.id, old_object_activity.id)
end
test "with the --keep-non-public option it still keeps non-public posts even if they are not local",
%{old_insert_date: old_insert_date} do
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
insert(:note)
%{id: note_remote_id} =
:note
|> insert()
|> Ecto.Changeset.change(%{updated_at: old_insert_date})
|> Repo.update!()
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
note_remote_non_public =
%{data: note_remote_non_public_data} =
:note
|> insert()
note_remote_non_public
|> Ecto.Changeset.change(%{
updated_at: old_insert_date,
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end)
})
|> Repo.update!()
assert length(Repo.all(Object)) == 3
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-non-public"])
assert length(Repo.all(Object)) == 2
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
refute Object.get_by_id(note_remote_id)
end
test "with the --keep-threads and --keep-non-public option it keeps old threads with non-public replies even if the interaction is not local",
%{old_insert_date: old_insert_date} do
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
# For non-public we only check Create Activities because only these are relevant for threads
# Flags are always non-public, Announces from relays can be non-public...
remote_user1 = insert(:user, local: false)
remote_user2 = insert(:user, local: false)
# Old remote non-public reply (should be kept)
{:ok, old_remote_post1_activity} =
CommonAPI.post(remote_user1, %{status: "some thing", local: false})
old_remote_post1_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_remote_non_public_reply_activity} =
CommonAPI.post(remote_user2, %{
status: "some reply",
in_reply_to_status_id: old_remote_post1_activity.id
})
old_remote_non_public_reply_activity
|> Ecto.Changeset.change(%{
local: false,
updated_at: old_insert_date,
data: old_remote_non_public_reply_activity.data |> update_in(["to"], fn _ -> [] end)
})
|> Repo.update!()
# Old remote non-public Announce (should be removed)
{:ok, old_remote_post2_activity = %{data: %{"object" => old_remote_post2_id}}} =
CommonAPI.post(remote_user1, %{status: "some thing", local: false})
old_remote_post2_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_remote_non_public_repeat_activity} =
CommonAPI.repeat(old_remote_post2_activity.id, remote_user2)
old_remote_non_public_repeat_activity
|> Ecto.Changeset.change(%{
local: false,
updated_at: old_insert_date,
data: old_remote_non_public_repeat_activity.data |> update_in(["to"], fn _ -> [] end)
})
|> Repo.update!()
assert length(Repo.all(Object)) == 3
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads", "--keep-non-public"])
Repo.all(Pleroma.Activity)
assert length(Repo.all(Object)) == 2
refute Object.get_by_ap_id(old_remote_post2_id)
end
test "with the --keep-threads option it still keeps non-old threads even with no local interactions" do
remote_user = insert(:user, local: false)
remote_user2 = insert(:user, local: false)
{:ok, remote_post_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
{:ok, remote_post_reply_activity} =
CommonAPI.post(remote_user2, %{
status: "some reply",
in_reply_to_status_id: remote_post_activity.id
})
remote_post_activity
|> Ecto.Changeset.change(%{local: false})
|> Repo.update!()
remote_post_reply_activity
|> Ecto.Changeset.change(%{local: false})
|> Repo.update!()
assert length(Repo.all(Object)) == 2
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
assert length(Repo.all(Object)) == 2
end
test "with the --keep-threads option it deletes old threads with no local interaction", %{
old_insert_date: old_insert_date
} do
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
remote_user = insert(:user, local: false)
remote_user2 = insert(:user, local: false)
{:ok, old_remote_post_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
old_remote_post_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_remote_post_reply_activity} =
CommonAPI.post(remote_user2, %{
status: "some reply",
in_reply_to_status_id: old_remote_post_activity.id
})
old_remote_post_reply_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_favourite_activity} =
CommonAPI.favorite(remote_user2, old_remote_post_activity.id)
old_favourite_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post_activity.id, remote_user2)
old_repeat_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
assert length(Repo.all(Object)) == 2
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
assert length(Repo.all(Object)) == 0
end
test "with the --keep-threads option it keeps old threads with local interaction", %{
old_insert_date: old_insert_date
} do
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
remote_user = insert(:user, local: false)
local_user = insert(:user, local: true)
# local reply
{:ok, old_remote_post1_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
old_remote_post1_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_local_post2_reply_activity} =
CommonAPI.post(local_user, %{
status: "some reply",
in_reply_to_status_id: old_remote_post1_activity.id
})
old_local_post2_reply_activity
|> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
|> Repo.update!()
# local Like
{:ok, old_remote_post3_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
old_remote_post3_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_favourite_activity} = CommonAPI.favorite(local_user, old_remote_post3_activity.id)
old_favourite_activity
|> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
|> Repo.update!()
# local Announce
{:ok, old_remote_post4_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
old_remote_post4_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post4_activity.id, local_user)
old_repeat_activity
|> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
|> Repo.update!()
assert length(Repo.all(Object)) == 4
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
assert length(Repo.all(Object)) == 4
end
test "with the --keep-threads option it keeps old threads with bookmarked posts", %{
old_insert_date: old_insert_date
} do
Prune Objects --keep-threads option (#350) This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be interesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * ~~Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted.~~ fixed in https://akkoma.dev/AkkomaGang/akkoma/pulls/379 * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still kept old remote non-public posts. I added an option to keep this behaviour, but this also means that you now have to explicitly provide that option. **This could be considered a breaking change!** * ~~Note that this removes from the objects table, but not from the activities.~~ See https://akkoma.dev/AkkomaGang/akkoma/pulls/427 for that. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms *** **TODO** 1. [x] **Question:** Is it OK to keep it like this in regard to quote posts? If not (ie post quoted by local users should also be kept), should we give quotes the same context as the post they are quoting? (If we don't want to give them the same context, I'll have to see how/if I can do it without being too costly) * See https://akkoma.dev/AkkomaGang/akkoma/pulls/379 2. [x] **Question:** the "original" query only deletes public posts (this is undocumented, but you can check the code). This new one doesn't care for scope. From the docs I get that the idea is that posts can be refetched when needed. But I have from a trusted source that Pleroma can't refetch non-public posts. I assume that's the reason why they are kept here. I see different options to deal with this 1. ~~We keep it as currently implemented and just don't care about scope with this option~~ 2. ~~We add logic to not delete non-public posts either (I'll have to see how costly that becomes)~~ 3. We add an extra --keep-non-public parameter. This is technically speaking breakage (you didn't have to provide a param before for this, now you do), but I'm inclined to not care much because it wasn't documented nor tested in the first place. 3. [x] See if we can do the query using Elixir 4. [x] Test on a bigger DB to see that we don't run into a timeout 5. [x] Add docs Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/350 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
2023-01-09 22:15:41 +00:00
remote_user = insert(:user, local: false)
local_user = insert(:user, local: true)
{:ok, old_remote_post_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
old_remote_post_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
Pleroma.Bookmark.create(local_user.id, old_remote_post_activity.id)
assert length(Repo.all(Object)) == 1
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
assert length(Repo.all(Object)) == 1
end
test "We don't have unexpected tables which may contain objects that are referenced by activities" do
# We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table.
# If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we
# add logic for that in the 'prune_objects' task so that we don't wrongly delete their corresponding activities.
# So when someone adds (or removes) a table, this test will fail.
# Either the table contains objects which can be referenced from the activities table
# => in that case the prune_objects job should be adapted so we don't delete activities who still have the referenced object.
# Or it doesn't contain objects which can be referenced from the activities table
# => in that case you can add/remove the table to/from this (sorted) list.
assert Repo.query!(
"SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';"
).rows
|> Enum.sort() == [
["activities"],
["announcement_read_relationships"],
["announcements"],
["apps"],
["backups"],
["bookmarks"],
["chat_message_references"],
["chats"],
["config"],
["conversation_participation_recipient_ships"],
["conversation_participations"],
["conversations"],
["counter_cache"],
["data_migration_failed_ids"],
["data_migrations"],
["deliveries"],
["filters"],
["following_relationships"],
["hashtags"],
["hashtags_objects"],
["instances"],
["lists"],
["markers"],
["mfa_tokens"],
["moderation_log"],
["notifications"],
["oauth_authorizations"],
["oauth_tokens"],
["oban_jobs"],
["oban_peers"],
["objects"],
["password_reset_tokens"],
["push_subscriptions"],
["registrations"],
["report_notes"],
["scheduled_activities"],
["schema_migrations"],
["thread_mutes"],
["user_follows_hashtag"],
["user_frontend_setting_profiles"],
["user_invite_tokens"],
["user_notes"],
["user_relationships"],
["users"]
]
end
test "it prunes orphaned activities with the --prune-orphaned-activities" do
%Object{} |> Map.merge(%{data: %{"id" => "object_for_activity"}}) |> Repo.insert()
%Activity{}
|> Map.merge(%{
local: false,
data: %{"id" => "remote_activity_with_object", "object" => "object_for_activity"}
})
|> Repo.insert()
%Activity{}
|> Map.merge(%{
local: false,
data: %{
"id" => "remote_activity_with_activity",
"object" => "remote_activity_with_object"
}
})
|> Repo.insert()
%User{} |> Map.merge(%{ap_id: "actor"}) |> Repo.insert()
%Activity{}
|> Map.merge(%{
local: false,
data: %{"id" => "remote_activity_with_actor", "object" => "actor"}
})
|> Repo.insert()
%Activity{}
|> Map.merge(%{
local: false,
data: %{
"id" => "remote_activity_without_existing_referenced_object",
"object" => "non_existing"
}
})
|> Repo.insert()
%Activity{}
|> Map.merge(%{
local: true,
data: %{"id" => "local_activity_with_actor", "object" => "non_existing"}
})
|> Repo.insert()
assert length(Repo.all(Activity)) == 5
Mix.Tasks.Pleroma.Database.run(["prune_objects"])
assert length(Repo.all(Activity)) == 5
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"])
activities = Repo.all(Activity)
assert "remote_activity_without_existing_referenced_object" not in Enum.map(
activities,
fn a -> a.data["id"] end
)
assert length(activities) == 4
end
test "it prunes orphaned activities with the --prune-orphaned-activities when the objects are referenced from an array" do
%Object{} |> Map.merge(%{data: %{"id" => "existing_object"}}) |> Repo.insert()
%User{} |> Map.merge(%{ap_id: "existing_actor"}) |> Repo.insert()
%Activity{}
|> Map.merge(%{
local: false,
data: %{
"id" => "remote_activity_existing_object",
"object" => ["non_ existing_object", "existing_object"]
}
})
|> Repo.insert()
%Activity{}
|> Map.merge(%{
local: false,
data: %{
"id" => "remote_activity_existing_actor",
"object" => ["non_ existing_object", "existing_actor"]
}
})
|> Repo.insert()
%Activity{}
|> Map.merge(%{
local: false,
data: %{
"id" => "remote_activity_existing_activity",
"object" => ["non_ existing_object", "remote_activity_existing_actor"]
}
})
|> Repo.insert()
%Activity{}
|> Map.merge(%{
local: false,
data: %{
"id" => "remote_activity_without_existing_referenced_object",
"object" => ["owo", "whats_this"]
}
})
|> Repo.insert()
assert length(Repo.all(Activity)) == 4
Mix.Tasks.Pleroma.Database.run(["prune_objects"])
assert length(Repo.all(Activity)) == 4
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"])
activities = Repo.all(Activity)
assert length(activities) == 3
assert "remote_activity_without_existing_referenced_object" not in Enum.map(
activities,
fn a -> a.data["id"] end
)
assert length(activities) == 3
end
end
describe "running update_users_following_followers_counts" do
test "following and followers count are updated" do
[user, user2] = insert_pair(:user)
2020-12-01 20:17:52 +00:00
{:ok, %User{} = user, _user2} = User.follow(user, user2)
following = User.following(user)
assert length(following) == 2
assert user.follower_count == 0
{:ok, user} =
user
|> Ecto.Changeset.change(%{follower_count: 3})
|> Repo.update()
assert user.follower_count == 3
assert {:ok, :ok} ==
Mix.Tasks.Pleroma.Database.run(["update_users_following_followers_counts"])
user = User.get_by_id(user.id)
assert length(User.following(user)) == 2
assert user.follower_count == 0
end
end
describe "running fix_likes_collections" do
test "it turns OrderedCollection likes into empty arrays" do
[user, user2] = insert_pair(:user)
2020-05-12 19:59:26 +00:00
{:ok, %{id: id, object: object}} = CommonAPI.post(user, %{status: "test"})
{:ok, %{object: object2}} = CommonAPI.post(user, %{status: "test test"})
CommonAPI.favorite(user2, id)
likes = %{
"first" =>
"http://mastodon.example.org/objects/dbdbc507-52c8-490d-9b7c-1e1d52e5c132/likes?page=1",
"id" => "http://mastodon.example.org/objects/dbdbc507-52c8-490d-9b7c-1e1d52e5c132/likes",
"totalItems" => 3,
"type" => "OrderedCollection"
}
new_data = Map.put(object2.data, "likes", likes)
object2
|> Ecto.Changeset.change(%{data: new_data})
|> Repo.update()
assert length(Object.get_by_id(object.id).data["likes"]) == 1
assert is_map(Object.get_by_id(object2.id).data["likes"])
assert :ok == Mix.Tasks.Pleroma.Database.run(["fix_likes_collections"])
assert length(Object.get_by_id(object.id).data["likes"]) == 1
assert Enum.empty?(Object.get_by_id(object2.id).data["likes"])
end
end
describe "ensure_expiration" do
test "it adds to expiration old statuses" do
2020-08-22 17:46:01 +00:00
activity1 = insert(:note_activity)
2020-08-22 17:46:01 +00:00
{:ok, inserted_at, 0} = DateTime.from_iso8601("2015-01-23T23:50:07Z")
activity2 = insert(:note_activity, %{inserted_at: inserted_at})
2020-08-22 17:46:01 +00:00
%{id: activity_id3} = insert(:note_activity)
2020-08-22 17:46:01 +00:00
expires_at = DateTime.add(DateTime.utc_now(), 60 * 61)
2020-08-22 17:46:01 +00:00
Pleroma.Workers.PurgeExpiredActivity.enqueue(%{
activity_id: activity_id3,
expires_at: expires_at
})
Mix.Tasks.Pleroma.Database.run(["ensure_expiration"])
2020-08-22 17:46:01 +00:00
assert_enqueued(
worker: Pleroma.Workers.PurgeExpiredActivity,
args: %{activity_id: activity1.id},
scheduled_at:
activity1.inserted_at
|> DateTime.from_naive!("Etc/UTC")
|> Timex.shift(days: 365)
)
assert_enqueued(
worker: Pleroma.Workers.PurgeExpiredActivity,
args: %{activity_id: activity2.id},
scheduled_at:
activity2.inserted_at
|> DateTime.from_naive!("Etc/UTC")
|> Timex.shift(days: 365)
)
assert_enqueued(
worker: Pleroma.Workers.PurgeExpiredActivity,
args: %{activity_id: activity_id3},
scheduled_at: expires_at
)
end
end
end