diff --git a/activities/migrations/0017_post_language.py b/activities/migrations/0017_post_language.py new file mode 100644 index 000000000..4774d0d77 --- /dev/null +++ b/activities/migrations/0017_post_language.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.1 on 2023-05-15 09:26 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("activities", "0016_index_together_migration"), + ] + + operations = [ + migrations.AddField( + model_name="post", + name="language", + field=models.CharField(default=""), + ), + ] diff --git a/activities/models/post.py b/activities/models/post.py index 045233e99..6138ad652 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -31,6 +31,7 @@ from core.ld import ( canonicalise, format_ld_date, + get_language, get_list, get_value_or_map, parse_ld_date, @@ -252,6 +253,9 @@ class Types(models.TextChoices): # The main (HTML) content content = models.TextField() + # The language of the content + language = models.CharField(default="") + type = models.CharField( max_length=20, choices=Types.choices, @@ -474,6 +478,7 @@ def create_local( reply_to: Optional["Post"] = None, attachments: list | None = None, question: dict | None = None, + language: str | None = None, ) -> "Post": with transaction.atomic(): # Find mentions in this post @@ -492,6 +497,9 @@ def create_local( sorted([tag[: Hashtag.MAXIMUM_LENGTH] for tag in parser.hashtags]) or None ) + if language is None or language == "": + language = author.config_identity.preferred_posting_language + # Make the Post object post = cls.objects.create( author=author, @@ -502,6 +510,7 @@ def create_local( visibility=visibility, hashtags=hashtags, in_reply_to=reply_to.object_uri if reply_to else None, + language=language, ) post.object_uri = post.urls.object_uri post.url = post.absolute_object_uri() @@ -526,6 +535,7 @@ def edit_local( visibility: int = Visibilities.public, attachments: list | None = None, attachment_attributes: list | None = None, + language: str | None = None, ): with transaction.atomic(): # Strip all HTML and apply linebreaks filter @@ -538,6 +548,9 @@ def edit_local( self.summary = summary or None self.sensitive = bool(summary) if sensitive is None else sensitive self.visibility = visibility + if language is None or language == "": + language = self.author.config_identity.preferred_posting_language + self.language = language self.edited = timezone.now() self.mentions.set(self.mentions_from_content(content, self.author)) self.emojis.set(Emoji.emojis_from_content(content, None)) @@ -649,6 +662,10 @@ def to_ap(self) -> dict: "tag": [], "attachment": [], } + if self.language != "": + value["contentMap"] = { + self.language: value["content"], + } if self.type == Post.Types.question and self.type_data: value[self.type_data.mode] = [ { @@ -872,6 +889,7 @@ def by_ap(cls, data, create=False, update=False, fetch_author=False) -> "Post": post.published = parse_ld_date(data.get("published")) post.edited = parse_ld_date(data.get("updated")) post.in_reply_to = data.get("inReplyTo") + post.language = get_language(data) or "" # Mentions and hashtags post.hashtags = [] for tag in get_list(data, "tag"): @@ -1106,12 +1124,16 @@ def to_mastodon_json(self, interactions=None, bookmarks=None, identity=None): self.Visibilities.mentioned: "direct", self.Visibilities.local_only: "public", } + language = self.language + if self.language == "": + language = None value = { "id": self.pk, "uri": self.object_uri, "created_at": format_ld_date(self.published), "account": self.author.to_mastodon_json(include_counts=False), "content": self.safe_content_remote(), + "language": language, "visibility": visibility_mapping[self.visibility], "sensitive": self.sensitive, "spoiler_text": self.summary or "", @@ -1152,7 +1174,6 @@ def to_mastodon_json(self, interactions=None, bookmarks=None, identity=None): if isinstance(self.type_data, QuestionData) else None, "card": None, - "language": None, "text": self.safe_content_remote(), "edited_at": format_ld_date(self.edited) if self.edited else None, } diff --git a/api/schemas.py b/api/schemas.py index 3be6b059b..f2996b728 100644 --- a/api/schemas.py +++ b/api/schemas.py @@ -151,7 +151,7 @@ class Status(Schema): reblog: Optional["Status"] = Field(...) poll: Poll | None = Field(...) card: None = Field(...) - language: None = Field(...) + language: str | None = Field(...) text: str | None = Field(...) edited_at: str | None favourited: bool = False @@ -422,13 +422,19 @@ def from_identity( activities_models.Post.Visibilities.mentioned: "direct", activities_models.Post.Visibilities.local_only: "public", } + preferred_posting_language = None + if identity.config_identity.preferred_posting_language != "": + preferred_posting_language = ( + identity.config_identity.preferred_posting_language + ) + return cls.parse_obj( { "posting:default:visibility": visibility_mapping[ identity.config_identity.default_post_visibility ], "posting:default:sensitive": False, - "posting:default:language": None, + "posting:default:language": preferred_posting_language, "reading:expand:media": "default", "reading:expand:spoilers": identity.config_identity.expand_content_warnings, } diff --git a/api/views/statuses.py b/api/views/statuses.py index 4e20993cd..e9db890ee 100644 --- a/api/views/statuses.py +++ b/api/views/statuses.py @@ -110,6 +110,7 @@ def post_status(request, details: PostStatusSchema) -> schemas.Status: reply_to=reply_post, attachments=attachments, question=details.poll.dict() if details.poll else None, + language=details.language, ) # Add their own timeline event for immediate visibility TimelineEvent.add_post(request.identity, post) @@ -141,6 +142,7 @@ def edit_status(request, id: str, details: EditStatusSchema) -> schemas.Status: sensitive=details.sensitive, attachments=attachments, attachment_attributes=details.media_attributes, + language=details.language, ) return schemas.Status.from_post(post) diff --git a/core/ld.py b/core/ld.py index 8bbc08894..91294ed6e 100644 --- a/core/ld.py +++ b/core/ld.py @@ -1,5 +1,6 @@ import datetime import os +import re import urllib.parse as urllib_parse from dateutil import parser @@ -692,3 +693,24 @@ def media_type_from_filename(filename): return "image/webp" else: return "application/octet-stream" + + +def get_language(data) -> str | None: + """Detects and returns a document's language""" + map_ = None + if "contentMap" in data: + map_ = data["contentMap"] + elif "nameMap" in data: + map_ = data["nameMap"] + elif "summaryMap" in data: + map_ = data["summaryMap"] + + if not map_: + return None + + lang = list(map_.keys())[0] + if not lang or lang == "und": + return None + + lang = re.split("-|_", lang)[0] + return lang.lower() diff --git a/core/models/config.py b/core/models/config.py index 544478b79..89f706102 100644 --- a/core/models/config.py +++ b/core/models/config.py @@ -286,6 +286,7 @@ class IdentityOptions(pydantic.BaseModel): visible_reaction_counts: bool = True expand_content_warnings: bool = False boosts_on_profile: bool = True + preferred_posting_language: str = "" class DomainOptions(pydantic.BaseModel): site_name: str = "" diff --git a/requirements.txt b/requirements.txt index 04b966d40..927b2b44e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,7 @@ httpx~=0.23 markdown_it_py~=2.1.0 pillow~=9.3.0 psycopg~=3.1.8 +pycountry~=22.3.5 pydantic~=1.10.2 pyld~=2.0.3 pylibmc~=1.6.3 diff --git a/templates/activities/_post.html b/templates/activities/_post.html index 6834ffbf4..cbdf35228 100644 --- a/templates/activities/_post.html +++ b/templates/activities/_post.html @@ -32,7 +32,7 @@ {% endif %} -
+
{{ post.safe_content_local }} {% if post.attachments.exists %} diff --git a/tests/activities/models/test_post.py b/tests/activities/models/test_post.py index 3b28b14a4..4a13fee0f 100644 --- a/tests/activities/models/test_post.py +++ b/tests/activities/models/test_post.py @@ -259,6 +259,7 @@ def test_content_map(remote_identity): create=True, ) assert post.content == "Hi World" + assert post.language == "" post2 = Post.by_ap( data={ @@ -271,6 +272,7 @@ def test_content_map(remote_identity): create=True, ) assert post2.content == "Hey World" + assert post2.language == "" post3 = Post.by_ap( data={ @@ -283,6 +285,7 @@ def test_content_map(remote_identity): create=True, ) assert post3.content == "Hello World" + assert post3.language == "en" @pytest.mark.django_db diff --git a/tests/core/test_ld.py b/tests/core/test_ld.py index 4f123173b..fb409a773 100644 --- a/tests/core/test_ld.py +++ b/tests/core/test_ld.py @@ -2,7 +2,7 @@ from dateutil.tz import tzutc -from core.ld import parse_ld_date +from core.ld import get_language, parse_ld_date def test_parse_ld_date(): @@ -41,3 +41,41 @@ def test_parse_ld_date(): tzinfo=tzutc(), ) assert difference.total_seconds() == 0 + + +def test_get_language(): + assert ( + get_language( + { + "contentMap": { + "en": "

Hello

", + "es": "

hola

", + }, + "nameMap": {"de": "Hallo"}, + "summaryMap": {"fr": "Bonjour"}, + } + ) + == "en" + ) + assert ( + get_language( + { + "nameMap": {"de": "Hallo"}, + "summaryMap": {"fr": "Bonjour"}, + } + ) + == "de" + ) + assert ( + get_language( + { + "summaryMap": {"fr": "Bonjour"}, + } + ) + == "fr" + ) + assert get_language({"contentMap": {"en-gb": "

Hello

"}}) == "en" + assert get_language({"contentMap": {"en_GB": "

Hello

"}}) == "en" + assert get_language({"contentMap": {"EN": "

Hello

"}}) == "en" + assert get_language({"contentMap": {"und": "

Hello

"}}) is None + assert get_language({}) is None diff --git a/users/views/settings/posting.py b/users/views/settings/posting.py index 3916f1e6b..de393577b 100644 --- a/users/views/settings/posting.py +++ b/users/views/settings/posting.py @@ -1,3 +1,5 @@ +import pycountry + from activities.models.post import Post from users.views.settings.settings_page import SettingsPage @@ -15,8 +17,29 @@ class PostingPage(SettingsPage): "title": "Expand content warnings", "help_text": "If content warnings should be expanded by default (not honoured by all clients)", }, + "preferred_posting_language": { + "title": "Default posting language", + "help_text": "", + "choices": sorted( + ( + [ + ("", ""), + ] + + [ + (lang.alpha_2, lang.name) + for lang in pycountry.languages + if hasattr(lang, "alpha_2") + ] + ), + key=lambda lang: lang[1], + ), + }, } layout = { - "Posting": ["default_post_visibility", "expand_content_warnings"], + "Posting": [ + "default_post_visibility", + "expand_content_warnings", + "preferred_posting_language", + ], } diff --git a/users/views/settings/settings_page.py b/users/views/settings/settings_page.py index ab5d4b2f1..5b305cbd0 100644 --- a/users/views/settings/settings_page.py +++ b/users/views/settings/settings_page.py @@ -23,7 +23,7 @@ class SettingsPage(FormView): options_class = Config.IdentityOptions template_name = "settings/settings.html" section: ClassVar[str] - options: dict[str, dict[str, str | int]] + options: dict[str, dict[str, str | int | list[tuple[int | str, str]]]] layout: dict[str, list[str]] def get_form_class(self): @@ -42,7 +42,11 @@ def get_form_class(self): elif config_field.type_ is UploadedImage: form_field = forms.ImageField elif config_field.type_ is str: - if details.get("display") == "textarea": + choices = details.get("choices") + if choices: + field_kwargs["widget"] = forms.Select(choices=choices) + form_field = forms.CharField + elif details.get("display") == "textarea": form_field = partial( forms.CharField, widget=forms.Textarea,