Add translation module for Argos Translate (#351)

Argos Translate is a Python module for translation and can be used as a command line tool.

This is also the engine for LibreTranslate, for which we already have a module.
Here we can use the engine directly from our server without doing requests to a third party or having to install our own LibreTranslate webservice (obviously you do have to install Argos Translate).

One thing that's currently still missing from Argos Translate is auto-detection of languages (see <https://github.com/argosopentech/argos-translate/issues/9>). For now, when no source language is provided, we just return the text unchanged, supposedly translated from the target language. That way you get a near immediate response in pleroma-fe when clicking Translate, after which you can select the source language from a dropdown.

Argos Translate also doesn't seem to handle html very well. Therefore we give admins the option to strip the html before translating. I made this an option because I'm unsure if/how this will change in the future.

Co-authored-by: ilja <git@ilja.space>
Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/351
Co-authored-by: ilja <akkoma.dev@ilja.space>
Co-committed-by: ilja <akkoma.dev@ilja.space>
This commit is contained in:
ilja 2022-12-19 13:06:39 +00:00 committed by floatingghost
parent 233c4bb3ba
commit c092fc9fd6
6 changed files with 241 additions and 2 deletions

View file

@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Added
- Prometheus metrics exporting from `/api/v1/akkoma/metrics`
- Ability to alter http pool size
- Translation of statuses via ArgosTranslate
### Removed
- Non-finch HTTP adapters

View file

@ -882,6 +882,11 @@
url: "http://127.0.0.1:5000",
api_key: nil
config :pleroma, :argos_translate,
command_argos_translate: "argos-translate",
command_argospm: "argospm",
strip_html: true
# Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above.
import_config "#{Mix.env()}.exs"

View file

@ -3442,5 +3442,30 @@
suggestion: [nil]
}
]
},
%{
group: :pleroma,
key: :argos_translate,
type: :group,
description: "ArgosTranslate Settings.",
children: [
%{
key: :command_argos_translate,
type: :string,
description: "command for `argos-translate`. Can be the command if it's in your PATH, or the full path to the file.",
suggestion: ["argos-translate"]
},
%{
key: :command_argospm,
type: :string,
description: "command for `argospm`. Can be the command if it's in your PATH, or the full path to the file.",
suggestion: ["argospm"]
},
%{
key: :strip_html,
type: :boolean,
description: "Strip html from the post before translating it."
}
]
}
]

View file

@ -1119,7 +1119,7 @@ Each job has these settings:
### Translation Settings
Settings to automatically translate statuses for end users. Currently supported
translation services are DeepL and LibreTranslate.
translation services are DeepL and LibreTranslate. The supported command line tool is [Argos Translate](https://github.com/argosopentech/argos-translate).
Translations are available at `/api/v1/statuses/:id/translations/:language`, where
`language` is the target language code (e.g `en`)
@ -1128,7 +1128,7 @@ Translations are available at `/api/v1/statuses/:id/translations/:language`, whe
- `:enabled` - enables translation
- `:module` - Sets module to be used
- Either `Pleroma.Akkoma.Translators.DeepL` or `Pleroma.Akkoma.Translators.LibreTranslate`
- Either `Pleroma.Akkoma.Translators.DeepL`, `Pleroma.Akkoma.Translators.LibreTranslate`, or `Pleroma.Akkoma.Translators.ArgosTranslate`
### `:deepl`
@ -1140,3 +1140,9 @@ Translations are available at `/api/v1/statuses/:id/translations/:language`, whe
- `:url` - URL of LibreTranslate instance
- `:api_key` - API key for LibreTranslate
### `:argos_translate`
- `:command_argos_translate` - command for `argos-translate`. Can be the command if it's in your PATH, or the full path to the file (default: `argos-translate`).
- `:command_argospm` - command for `argospm`. Can be the command if it's in your PATH, or the full path to the file (default: `argospm`).
- `:strip_html` - Strip html from the post before translating it (default: `true`).

View file

@ -0,0 +1,109 @@
defmodule Pleroma.Akkoma.Translators.ArgosTranslate do
@behaviour Pleroma.Akkoma.Translator
alias Pleroma.Config
defp argos_translate do
Config.get([:argos_translate, :command_argos_translate])
end
defp argospm do
Config.get([:argos_translate, :command_argospm])
end
defp strip_html? do
Config.get([:argos_translate, :strip_html])
end
defp safe_languages() do
try do
System.cmd(argospm(), ["list"], stderr_to_stdout: true, parallelism: true)
rescue
_ -> {"Command #{argospm()} not found", 1}
end
end
@impl Pleroma.Akkoma.Translator
def languages do
with {response, 0} <- safe_languages() do
langs =
response
|> String.split("\n", trim: true)
|> Enum.map(fn
"translate-" <> l -> String.split(l, "_")
end)
source_langs =
langs
|> Enum.map(fn [l, _] -> %{code: l, name: l} end)
|> Enum.uniq()
dest_langs =
langs
|> Enum.map(fn [_, l] -> %{code: l, name: l} end)
|> Enum.uniq()
{:ok, source_langs, dest_langs}
else
{response, _} -> {:error, "ArgosTranslate failed to fetch languages (#{response})"}
end
end
defp safe_translate(string, from_language, to_language) do
try do
System.cmd(
argos_translate(),
["--from-lang", from_language, "--to-lang", to_language, string],
stderr_to_stdout: true,
parallelism: true
)
rescue
_ -> {"Command #{argos_translate()} not found", 1}
end
end
defp clean_string(string, true) do
string
|> String.replace("<p>", "\n")
|> String.replace("</p>", "\n")
|> String.replace("<br>", "\n")
|> String.replace("<br/>", "\n")
|> String.replace("<li>", "\n")
|> Pleroma.HTML.strip_tags()
|> HtmlEntities.decode()
end
defp clean_string(string, _), do: string
defp htmlify_response(string, true) do
string
|> HtmlEntities.encode()
|> String.replace("\n", "<br/>")
end
defp htmlify_response(string, _), do: string
@impl Pleroma.Akkoma.Translator
def translate(string, nil, to_language) do
# Akkoma's Pleroma-fe expects us to detect the source language automatically.
# Argos-translate doesn't have that option (yet?)
# see <https://github.com/argosopentech/argos-translate/issues/9>
# For now we return the text unchanged, supposedly translated from the target language.
# Afterwards people get the option to overwrite the source language from a dropdown.
{:ok, to_language, string}
end
def translate(string, from_language, to_language) do
# Argos Translate doesn't properly translate HTML (yet?)
# For now we give admins the option to strip the html before translating
# Note that we have to add some html back to the response afterwards
string = clean_string(string, strip_html?())
with {translated, 0} <-
safe_translate(string, from_language, to_language) do
{:ok, from_language, translated |> htmlify_response(strip_html?())}
else
{response, _} -> {:error, "ArgosTranslate failed to translate (#{response})"}
end
end
end

View file

@ -0,0 +1,93 @@
defmodule Pleroma.Akkoma.Translators.ArgosTranslateTest do
alias Pleroma.Akkoma.Translators.ArgosTranslate
import Mock
use Pleroma.DataCase, async: true
setup do
clear_config([:argos_translate, :command_argos_translate], "argos-translate_test")
clear_config([:argos_translate, :command_argospm], "argospm_test")
end
test "it lists available languages" do
languages =
with_mock System, [:passthrough],
cmd: fn "argospm_test", ["list"], _ ->
{"translate-nl_en\ntranslate-en_nl\ntranslate-ja_en\n", 0}
end do
ArgosTranslate.languages()
end
assert {:ok, source_langs, dest_langs} = languages
assert [%{code: "en", name: "en"}, %{code: "ja", name: "ja"}, %{code: "nl", name: "nl"}] =
source_langs |> Enum.sort()
assert [%{code: "en", name: "en"}, %{code: "nl", name: "nl"}] = dest_langs |> Enum.sort()
end
test "it translates from the to language when no language is set and returns the text unchanged" do
assert {:ok, "nl", "blabla"} = ArgosTranslate.translate("blabla", nil, "nl")
end
test "it translates from the provided language if provided" do
translation_response =
with_mock System, [:passthrough],
cmd: fn "argos-translate_test", ["--from-lang", "nl", "--to-lang", "en", "blabla"], _ ->
{"yadayada", 0}
end do
ArgosTranslate.translate("blabla", "nl", "en")
end
assert {:ok, "nl", "yadayada"} = translation_response
end
test "it returns a proper error when the executable can't be found" do
non_existing_command = "sfqsfgqsefd"
clear_config([:argos_translate, :command_argos_translate], non_existing_command)
clear_config([:argos_translate, :command_argospm], non_existing_command)
assert nil == System.find_executable(non_existing_command)
assert {:error, "ArgosTranslate failed to fetch languages" <> _} = ArgosTranslate.languages()
assert {:error, "ArgosTranslate failed to translate" <> _} =
ArgosTranslate.translate("blabla", "nl", "en")
end
test "it can strip html" do
content =
~s[<p>What&#39;s up my fellow fedizens?</p><p>So anyway</p><ul><li><a class="hashtag" data-tag="cofe" href="https://suya.space/tag/cofe">#cofe</a></li><li><a class="hashtag" data-tag="suya" href="https://cofe.space/tag/suya">#Suya</a></li></ul><p>ammiright!<br/>:ablobfoxhyper:</p>]
stripped_content =
"\nWhat's up my fellow fedizens?\n\nSo anyway\n\n#cofe\n#Suya\nammiright!\n:ablobfoxhyper:\n"
expected_response_strip_html =
"<br/>What&#39;s up my fellow fedizens?<br/><br/>So anyway<br/><br/>#cofe<br/>#Suya<br/>ammiright!<br/>:ablobfoxhyper:<br/>"
response_strip_html =
with_mock System, [:passthrough],
cmd: fn "argos-translate_test",
["--from-lang", _, "--to-lang", _, ^stripped_content],
_ ->
{stripped_content, 0}
end do
ArgosTranslate.translate(content, "nl", "en")
end
clear_config([:argos_translate, :strip_html], false)
response_no_strip_html =
with_mock System, [:passthrough],
cmd: fn "argos-translate_test", ["--from-lang", _, "--to-lang", _, string], _ ->
{string, 0}
end do
ArgosTranslate.translate(content, "nl", "en")
end
assert {:ok, "nl", content} == response_no_strip_html
assert {:ok, "nl", expected_response_strip_html} == response_strip_html
end
end