Automatic status translation (#187)

Fixes #115

Co-authored-by: FloatingGhost <hannah@coffee-and-dreams.uk>
Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/187
This commit is contained in:
floatingghost 2022-08-29 19:42:22 +00:00
parent 722e56b308
commit df39cab9c1
15 changed files with 543 additions and 7 deletions

View file

@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- support for setting instance languages in metadata - support for setting instance languages in metadata
- support for reusing oauth tokens, and not requiring new authorizations - support for reusing oauth tokens, and not requiring new authorizations
- the ability to obfuscate domains in your MRF descriptions - the ability to obfuscate domains in your MRF descriptions
- automatic translation of statuses via DeepL or LibreTranslate
### Changed ### Changed
- MFM parsing is now done on the backend by a modified version of ilja's parser -> https://akkoma.dev/AkkomaGang/mfm-parser - MFM parsing is now done on the backend by a modified version of ilja's parser -> https://akkoma.dev/AkkomaGang/mfm-parser

View file

@ -843,6 +843,19 @@
} }
} }
config :pleroma, :translator,
enabled: false,
module: Akkoma.Translators.DeepL
config :pleroma, :deepl,
# either :free or :pro
tier: :free,
api_key: ""
config :pleroma, :libre_translate,
url: "http://127.0.0.1:5000",
api_key: nil
# Import environment specific config. This must remain at the bottom # Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above. # of this file so it overrides the configuration defined above.
import_config "#{Mix.env()}.exs" import_config "#{Mix.env()}.exs"

View file

@ -3226,13 +3226,14 @@
group: :pleroma, group: :pleroma,
key: Pleroma.Search, key: Pleroma.Search,
type: :group, type: :group,
label: "Search",
description: "General search settings.", description: "General search settings.",
children: [ children: [
%{ %{
key: :module, key: :module,
type: :keyword, type: :module,
description: "Selected search module.", description: "Selected search module.",
suggestion: [Pleroma.Search.DatabaseSearch, Pleroma.Search.Meilisearch] suggestions: {:list_behaviour_implementations, Pleroma.Search.SearchBackend}
} }
] ]
}, },
@ -3257,7 +3258,7 @@
}, },
%{ %{
key: :initial_indexing_chunk_size, key: :initial_indexing_chunk_size,
type: :int, type: :integer,
description: description:
"Amount of posts in a batch when running the initial indexing operation. Should probably not be more than 100000" <> "Amount of posts in a batch when running the initial indexing operation. Should probably not be more than 100000" <>
" since there's a limit on maximum insert size", " since there's a limit on maximum insert size",
@ -3268,6 +3269,7 @@
%{ %{
group: :pleroma, group: :pleroma,
key: Pleroma.Search.Elasticsearch.Cluster, key: Pleroma.Search.Elasticsearch.Cluster,
label: "Elasticsearch",
type: :group, type: :group,
description: "Elasticsearch settings.", description: "Elasticsearch settings.",
children: [ children: [
@ -3334,13 +3336,13 @@
}, },
%{ %{
key: :bulk_page_size, key: :bulk_page_size,
type: :int, type: :integer,
description: "Size for bulk put requests, mostly used on building the index", description: "Size for bulk put requests, mostly used on building the index",
suggestion: [5000] suggestion: [5000]
}, },
%{ %{
key: :bulk_wait_interval, key: :bulk_wait_interval,
type: :int, type: :integer,
description: "Time to wait between bulk put requests (in ms)", description: "Time to wait between bulk put requests (in ms)",
suggestion: [15_000] suggestion: [15_000]
} }
@ -3349,5 +3351,66 @@
] ]
} }
] ]
},
%{
group: :pleroma,
key: :translator,
type: :group,
description: "Translation Settings",
children: [
%{
key: :enabled,
type: :boolean,
description: "Is translation enabled?",
suggestion: [true, false]
},
%{
key: :module,
type: :module,
description: "Translation module.",
suggestions: {:list_behaviour_implementations, Pleroma.Akkoma.Translator}
}
]
},
%{
group: :pleroma,
key: :deepl,
label: "DeepL",
type: :group,
description: "DeepL Settings.",
children: [
%{
key: :tier,
type: {:dropdown, :atom},
description: "API Tier",
suggestions: [:free, :pro]
},
%{
key: :api_key,
type: :string,
description: "API key for DeepL",
suggestions: [nil]
}
]
},
%{
group: :pleroma,
key: :libre_translate,
type: :group,
description: "LibreTranslate Settings.",
children: [
%{
key: :url,
type: :string,
description: "URL for libretranslate",
suggestion: [nil]
},
%{
key: :api_key,
type: :string,
description: "API key for libretranslate",
suggestion: [nil]
}
]
} }
] ]

View file

@ -1159,3 +1159,28 @@ Each job has these settings:
* `:max_running` - max concurrently runnings jobs * `:max_running` - max concurrently runnings jobs
* `:max_waiting` - max waiting jobs * `:max_waiting` - max waiting jobs
### Translation Settings
Settings to automatically translate statuses for end users. Currently supported
translation services are DeepL and LibreTranslate.
Translations are available at `/api/v1/statuses/:id/translations/:language`, where
`language` is the target language code (e.g `en`)
### `:translator`
- `:enabled` - enables translation
- `:module` - Sets module to be used
- Either `Pleroma.Akkoma.Translators.DeepL` or `Pleroma.Akkoma.Translators.LibreTranslate`
### `:deepl`
- `:api_key` - API key for DeepL
- `:tier` - API tier
- either `:free` or `:pro`
### `:libre_translate`
- `:url` - URL of LibreTranslate instance
- `:api_key` - API key for LibreTranslate

View file

@ -0,0 +1,58 @@
defmodule Pleroma.Akkoma.Translators.DeepL do
@behaviour Pleroma.Akkoma.Translator
alias Pleroma.HTTP
alias Pleroma.Config
require Logger
defp base_url(:free) do
"https://api-free.deepl.com/v2/"
end
defp base_url(:pro) do
"https://api.deepl.com/v2/"
end
defp api_key do
Config.get([:deepl, :api_key])
end
defp tier do
Config.get([:deepl, :tier])
end
@impl Pleroma.Akkoma.Translator
def translate(string, to_language) do
with {:ok, %{status: 200} = response} <- do_request(api_key(), tier(), string, to_language),
{:ok, body} <- Jason.decode(response.body) do
%{"translations" => [%{"text" => translated, "detected_source_language" => detected}]} =
body
{:ok, detected, translated}
else
{:ok, %{status: status} = response} ->
Logger.warning("DeepL: Request rejected: #{inspect(response)}")
{:error, "DeepL request failed (code #{status})"}
{:error, reason} ->
{:error, reason}
end
end
defp do_request(api_key, tier, string, to_language) do
HTTP.post(
base_url(tier) <> "translate",
URI.encode_query(
%{
text: string,
target_lang: to_language
},
:rfc3986
),
[
{"authorization", "DeepL-Auth-Key #{api_key}"},
{"content-type", "application/x-www-form-urlencoded"}
]
)
end
end

View file

@ -0,0 +1,51 @@
defmodule Pleroma.Akkoma.Translators.LibreTranslate do
@behaviour Pleroma.Akkoma.Translator
alias Pleroma.Config
alias Pleroma.HTTP
require Logger
defp api_key do
Config.get([:libre_translate, :api_key])
end
defp url do
Config.get([:libre_translate, :url])
end
@impl Pleroma.Akkoma.Translator
def translate(string, to_language) do
with {:ok, %{status: 200} = response} <- do_request(string, to_language),
{:ok, body} <- Jason.decode(response.body) do
%{"translatedText" => translated, "detectedLanguage" => %{"language" => detected}} = body
{:ok, detected, translated}
else
{:ok, %{status: status} = response} ->
Logger.warning("libre_translate: request failed, #{inspect(response)}")
{:error, "libre_translate: request failed (code #{status})"}
{:error, reason} ->
{:error, reason}
end
end
defp do_request(string, to_language) do
url = URI.parse(url())
url = %{url | path: "/translate"}
HTTP.post(
to_string(url),
Jason.encode!(%{
q: string,
source: "auto",
target: to_language,
format: "html",
api_key: api_key()
}),
[
{"content-type", "application/json"}
]
)
end
end

View file

@ -0,0 +1,3 @@
defmodule Pleroma.Akkoma.Translator do
@callback translate(String.t(), String.t()) :: {:ok, String.t(), String.t()} | {:error, any()}
end

View file

@ -154,7 +154,8 @@ defp cachex_children do
build_cachex("web_resp", limit: 2500), build_cachex("web_resp", limit: 2500),
build_cachex("emoji_packs", expiration: emoji_packs_expiration(), limit: 10), build_cachex("emoji_packs", expiration: emoji_packs_expiration(), limit: 10),
build_cachex("failed_proxy_url", limit: 2500), build_cachex("failed_proxy_url", limit: 2500),
build_cachex("banned_urls", default_ttl: :timer.hours(24 * 30), limit: 5_000) build_cachex("banned_urls", default_ttl: :timer.hours(24 * 30), limit: 5_000),
build_cachex("translations", default_ttl: :timer.hours(24 * 30), limit: 2500)
] ]
end end

View file

@ -406,6 +406,22 @@ def bookmarks_operation do
} }
end end
def translate_operation do
%Operation{
tags: ["Retrieve status translation"],
summary: "Translate status",
description: "View the translation of a given status",
operationId: "StatusController.translation",
security: [%{"oAuth" => ["read:statuses"]}],
parameters: [id_param(), language_param()],
responses: %{
200 => Operation.response("Translation", "application/json", translation()),
400 => Operation.response("Error", "application/json", ApiError),
404 => Operation.response("Not Found", "application/json", ApiError)
}
}
end
def array_of_statuses do def array_of_statuses do
%Schema{type: :array, items: Status, example: [Status.schema().example]} %Schema{type: :array, items: Status, example: [Status.schema().example]}
end end
@ -552,6 +568,10 @@ def id_param do
) )
end end
defp language_param do
Operation.parameter(:language, :path, :string, "ISO 639 language code", example: "en")
end
defp status_response do defp status_response do
Operation.response("Status", "application/json", Status) Operation.response("Status", "application/json", Status)
end end
@ -573,4 +593,20 @@ defp context do
} }
} }
end end
defp translation do
%Schema{
title: "StatusTranslation",
description: "The translation of a status.",
type: :object,
required: [:detected_language, :text],
properties: %{
detected_language: %Schema{
type: :string,
description: "The detected language of the text"
},
text: %Schema{type: :string, description: "The translated text"}
}
}
end
end end

View file

@ -14,6 +14,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusController do
alias Pleroma.Bookmark alias Pleroma.Bookmark
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.Repo alias Pleroma.Repo
alias Pleroma.Config
alias Pleroma.ScheduledActivity alias Pleroma.ScheduledActivity
alias Pleroma.User alias Pleroma.User
alias Pleroma.Web.ActivityPub.ActivityPub alias Pleroma.Web.ActivityPub.ActivityPub
@ -30,6 +31,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusController do
plug(:skip_public_check when action in [:index, :show]) plug(:skip_public_check when action in [:index, :show])
@unauthenticated_access %{fallback: :proceed_unauthenticated, scopes: []} @unauthenticated_access %{fallback: :proceed_unauthenticated, scopes: []}
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
plug( plug(
OAuthScopesPlug, OAuthScopesPlug,
@ -37,7 +39,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusController do
when action in [ when action in [
:index, :index,
:show, :show,
:context :context,
:translate
] ]
) )
@ -418,6 +421,46 @@ def bookmarks(%{assigns: %{user: user}} = conn, params) do
) )
end end
@doc "GET /api/v1/statuses/:id/translations/:language"
def translate(%{assigns: %{user: user}} = conn, %{id: id, language: language}) do
with {:enabled, true} <- {:enabled, Config.get([:translator, :enabled])},
%Activity{} = activity <- Activity.get_by_id_with_object(id),
{:visible, true} <- {:visible, Visibility.visible_for_user?(activity, user)},
translation_module <- Config.get([:translator, :module]),
{:ok, detected, translation} <-
fetch_or_translate(
activity.id,
activity.object.data["content"],
language,
translation_module
) do
json(conn, %{detected_language: detected, text: translation})
else
{:enabled, false} ->
conn
|> put_status(:bad_request)
|> json(%{"error" => "Translation is not enabled"})
{:visible, false} ->
{:error, :not_found}
e ->
e
end
end
defp fetch_or_translate(status_id, text, language, translation_module) do
@cachex.fetch!(:user_cache, "translations:#{status_id}:#{language}", fn _ ->
value = translation_module.translate(text, language)
with {:ok, _, _} <- value do
value
else
_ -> {:ignore, value}
end
end)
end
defp put_application(params, %{assigns: %{token: %Token{user: %User{} = user} = token}} = _conn) do defp put_application(params, %{assigns: %{token: %Token{user: %User{} = user} = token}} = _conn) do
if user.disclose_client do if user.disclose_client do
%{client_name: client_name, website: website} = Repo.preload(token, :app).app %{client_name: client_name, website: website} = Repo.preload(token, :app).app

View file

@ -81,6 +81,9 @@ def features do
if Config.get([:instance, :profile_directory]) do if Config.get([:instance, :profile_directory]) do
"profile_directory" "profile_directory"
end, end,
if Config.get([:translator, :enabled], false) do
"akkoma:machine_translation"
end,
"custom_emoji_reactions" "custom_emoji_reactions"
] ]
|> Enum.filter(& &1) |> Enum.filter(& &1)

View file

@ -553,6 +553,7 @@ defmodule Pleroma.Web.Router do
post("/statuses/:id/unbookmark", StatusController, :unbookmark) post("/statuses/:id/unbookmark", StatusController, :unbookmark)
post("/statuses/:id/mute", StatusController, :mute_conversation) post("/statuses/:id/mute", StatusController, :mute_conversation)
post("/statuses/:id/unmute", StatusController, :unmute_conversation) post("/statuses/:id/unmute", StatusController, :unmute_conversation)
get("/statuses/:id/translations/:language", StatusController, :translate)
post("/push/subscription", SubscriptionController, :create) post("/push/subscription", SubscriptionController, :create)
get("/push/subscription", SubscriptionController, :show) get("/push/subscription", SubscriptionController, :show)

View file

@ -0,0 +1,75 @@
defmodule Pleroma.Akkoma.Translators.DeepLTest do
use Pleroma.DataCase, async: true
alias Pleroma.Akkoma.Translators.DeepL
describe "translating with deepl" do
setup do
clear_config([:deepl, :api_key], "deepl_api_key")
end
test "should work with the free tier" do
clear_config([:deepl, :tier], :free)
Tesla.Mock.mock(fn
%{method: :post, url: "https://api-free.deepl.com/v2/translate"} = env ->
auth_header = Enum.find(env.headers, fn {k, _v} -> k == "authorization" end)
assert {"authorization", "DeepL-Auth-Key deepl_api_key"} = auth_header
%Tesla.Env{
status: 200,
body:
Jason.encode!(%{
translations: [
%{
"text" => "I will crush you",
"detected_source_language" => "ja"
}
]
})
}
end)
assert {:ok, "ja", "I will crush you"} = DeepL.translate("ギュギュ握りつぶしちゃうぞ", "en")
end
test "should work with the pro tier" do
clear_config([:deepl, :tier], :pro)
Tesla.Mock.mock(fn
%{method: :post, url: "https://api.deepl.com/v2/translate"} = env ->
auth_header = Enum.find(env.headers, fn {k, _v} -> k == "authorization" end)
assert {"authorization", "DeepL-Auth-Key deepl_api_key"} = auth_header
%Tesla.Env{
status: 200,
body:
Jason.encode!(%{
translations: [
%{
"text" => "I will crush you",
"detected_source_language" => "ja"
}
]
})
}
end)
assert {:ok, "ja", "I will crush you"} = DeepL.translate("ギュギュ握りつぶしちゃうぞ", "en")
end
test "should gracefully fail if the API errors" do
clear_config([:deepl, :tier], :free)
Tesla.Mock.mock(fn
%{method: :post, url: "https://api-free.deepl.com/v2/translate"} ->
%Tesla.Env{
status: 403,
body: ""
}
end)
assert {:error, "DeepL request failed (code 403)"} = DeepL.translate("ギュギュ握りつぶしちゃうぞ", "en")
end
end
end

View file

@ -0,0 +1,91 @@
defmodule Pleroma.Akkoma.Translators.LibreTranslateTest do
use Pleroma.DataCase, async: true
alias Pleroma.Akkoma.Translators.LibreTranslate
describe "translating with libre translate" do
setup do
clear_config([:libre_translate, :url], "http://libre.translate/translate")
end
test "should work without an API key" do
Tesla.Mock.mock(fn
%{method: :post, url: "http://libre.translate/translate"} = env ->
assert {:ok, %{"api_key" => nil}} = Jason.decode(env.body)
%Tesla.Env{
status: 200,
body:
Jason.encode!(%{
detectedLanguage: %{
confidence: 83,
language: "ja"
},
translatedText: "I will crush you"
})
}
end)
assert {:ok, "ja", "I will crush you"} = LibreTranslate.translate("ギュギュ握りつぶしちゃうぞ", "en")
end
test "should work with an API key" do
clear_config([:libre_translate, :api_key], "libre_translate_api_key")
Tesla.Mock.mock(fn
%{method: :post, url: "http://libre.translate/translate"} = env ->
assert {:ok, %{"api_key" => "libre_translate_api_key"}} = Jason.decode(env.body)
%Tesla.Env{
status: 200,
body:
Jason.encode!(%{
detectedLanguage: %{
confidence: 83,
language: "ja"
},
translatedText: "I will crush you"
})
}
end)
assert {:ok, "ja", "I will crush you"} = LibreTranslate.translate("ギュギュ握りつぶしちゃうぞ", "en")
end
test "should gracefully handle API key errors" do
clear_config([:libre_translate, :api_key], "")
Tesla.Mock.mock(fn
%{method: :post, url: "http://libre.translate/translate"} ->
%Tesla.Env{
status: 403,
body:
Jason.encode!(%{
error: "Please contact the server operator to obtain an API key"
})
}
end)
assert {:error, "libre_translate: request failed (code 403)"} =
LibreTranslate.translate("ギュギュ握りつぶしちゃうぞ", "en")
end
test "should gracefully handle an unsupported language" do
clear_config([:libre_translate, :api_key], "")
Tesla.Mock.mock(fn
%{method: :post, url: "http://libre.translate/translate"} ->
%Tesla.Env{
status: 400,
body:
Jason.encode!(%{
error: "zoop is not supported"
})
}
end)
assert {:error, "libre_translate: request failed (code 400)"} =
LibreTranslate.translate("ギュギュ握りつぶしちゃうぞ", "zoop")
end
end
end

View file

@ -2071,4 +2071,76 @@ test "posting a quote of a status that doesn't exist", %{conn: conn} do
|> json_response_and_validate_schema(422) |> json_response_and_validate_schema(422)
end end
end end
describe "translating statuses" do
setup do
clear_config([:translator, :enabled], true)
clear_config([:translator, :module], Pleroma.Akkoma.Translators.DeepL)
clear_config([:deepl, :api_key], "deepl_api_key")
oauth_access(["read:statuses"])
end
test "should return text and detected language", %{conn: conn} do
clear_config([:deepl, :tier], :free)
Tesla.Mock.mock_global(fn
%{method: :post, url: "https://api-free.deepl.com/v2/translate"} ->
%Tesla.Env{
status: 200,
body:
Jason.encode!(%{
translations: [
%{
"text" => "Tell me, for whom do you fight?",
"detected_source_language" => "ja"
}
]
})
}
end)
user = insert(:user)
{:ok, to_translate} = CommonAPI.post(user, %{status: "何のために闘う?"})
conn =
conn
|> put_req_header("content-type", "application/json")
|> get("/api/v1/statuses/#{to_translate.id}/translations/en")
response = json_response_and_validate_schema(conn, 200)
assert response["text"] == "Tell me, for whom do you fight?"
assert response["detected_language"] == "ja"
end
test "should not allow translating of statuses you cannot see", %{conn: conn} do
clear_config([:deepl, :tier], :free)
Tesla.Mock.mock_global(fn
%{method: :post, url: "https://api-free.deepl.com/v2/translate"} ->
%Tesla.Env{
status: 200,
body:
Jason.encode!(%{
translations: [
%{
"text" => "Tell me, for whom do you fight?",
"detected_source_language" => "ja"
}
]
})
}
end)
user = insert(:user)
{:ok, to_translate} = CommonAPI.post(user, %{status: "何のために闘う?", visibility: "private"})
conn =
conn
|> put_req_header("content-type", "application/json")
|> get("/api/v1/statuses/#{to_translate.id}/translations/en")
json_response_and_validate_schema(conn, 404)
end
end
end end