Adjust content indexing to skip more unneeded stuff
This commit is contained in:
parent
14ef6ce80f
commit
117f525fd6
|
@ -52,13 +52,6 @@ def run(["index"]) do
|
||||||
timeout: :infinity
|
timeout: :infinity
|
||||||
)
|
)
|
||||||
|> Stream.chunk_every(chunk_size)
|
|> Stream.chunk_every(chunk_size)
|
||||||
|> Stream.transform(0, fn objects, acc ->
|
|
||||||
new_acc = acc + Enum.count(objects)
|
|
||||||
|
|
||||||
IO.puts("Indexed #{new_acc} entries")
|
|
||||||
|
|
||||||
{[objects], new_acc}
|
|
||||||
end)
|
|
||||||
|> Stream.map(fn objects ->
|
|> Stream.map(fn objects ->
|
||||||
Enum.map(objects, fn object ->
|
Enum.map(objects, fn object ->
|
||||||
data = object.data
|
data = object.data
|
||||||
|
@ -70,15 +63,34 @@ def run(["index"]) do
|
||||||
end
|
end
|
||||||
|
|
||||||
{:ok, published, _} = DateTime.from_iso8601(data["published"])
|
{:ok, published, _} = DateTime.from_iso8601(data["published"])
|
||||||
{:ok, content} = FastSanitize.strip_tags(content_str)
|
|
||||||
|
|
||||||
|
content =
|
||||||
|
with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
|
||||||
|
trimmed <- String.trim(scrubbed) do
|
||||||
|
trimmed
|
||||||
|
end
|
||||||
|
|
||||||
|
# Only index if there is anything in the string. If there is a single symbol,
|
||||||
|
# it's probably a dot from mastodon posts with just the picture
|
||||||
|
if String.length(content) > 1 do
|
||||||
%{
|
%{
|
||||||
id: object.id,
|
id: object.id,
|
||||||
content: content,
|
content: content,
|
||||||
ap: data["id"],
|
ap: data["id"],
|
||||||
published: published |> DateTime.to_unix()
|
published: published |> DateTime.to_unix()
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
nil
|
||||||
|
end
|
||||||
end)
|
end)
|
||||||
|
|> Enum.filter(fn o -> not is_nil(o) end)
|
||||||
|
end)
|
||||||
|
|> Stream.transform(0, fn objects, acc ->
|
||||||
|
new_acc = acc + Enum.count(objects)
|
||||||
|
|
||||||
|
IO.puts("Indexed #{new_acc} entries")
|
||||||
|
|
||||||
|
{[objects], new_acc}
|
||||||
end)
|
end)
|
||||||
|> Stream.each(fn objects ->
|
|> Stream.each(fn objects ->
|
||||||
{:ok, result} =
|
{:ok, result} =
|
||||||
|
@ -102,6 +114,9 @@ def run(["clear"]) do
|
||||||
|
|
||||||
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
|
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
|
||||||
|
|
||||||
{:ok, _} = Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects", "", [], [])
|
{:ok, _} =
|
||||||
|
Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [],
|
||||||
|
timeout: :infinity
|
||||||
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue