From 32216315aa0c20f1f3ea520b72121366eeccf6df Mon Sep 17 00:00:00 2001 From: Christof Dorner Date: Sun, 14 May 2023 22:36:36 +0200 Subject: [PATCH 1/6] Add identity setting for default post language --- api/schemas.py | 2 +- core/models/config.py | 1 + requirements.txt | 1 + users/views/settings/posting.py | 20 +++++++++++++++++++- users/views/settings/settings_page.py | 8 ++++++-- 5 files changed, 28 insertions(+), 4 deletions(-) diff --git a/api/schemas.py b/api/schemas.py index 3be6b059b..f25bab6b6 100644 --- a/api/schemas.py +++ b/api/schemas.py @@ -428,7 +428,7 @@ def from_identity( identity.config_identity.default_post_visibility ], "posting:default:sensitive": False, - "posting:default:language": None, + "posting:default:language": identity.config_identity.preferred_posting_language, "reading:expand:media": "default", "reading:expand:spoilers": identity.config_identity.expand_content_warnings, } diff --git a/core/models/config.py b/core/models/config.py index 544478b79..b3e322cb7 100644 --- a/core/models/config.py +++ b/core/models/config.py @@ -286,6 +286,7 @@ class IdentityOptions(pydantic.BaseModel): visible_reaction_counts: bool = True expand_content_warnings: bool = False boosts_on_profile: bool = True + preferred_posting_language: str | None = None class DomainOptions(pydantic.BaseModel): site_name: str = "" diff --git a/requirements.txt b/requirements.txt index 04b966d40..927b2b44e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,7 @@ httpx~=0.23 markdown_it_py~=2.1.0 pillow~=9.3.0 psycopg~=3.1.8 +pycountry~=22.3.5 pydantic~=1.10.2 pyld~=2.0.3 pylibmc~=1.6.3 diff --git a/users/views/settings/posting.py b/users/views/settings/posting.py index 3916f1e6b..9bef87061 100644 --- a/users/views/settings/posting.py +++ b/users/views/settings/posting.py @@ -1,3 +1,5 @@ +import pycountry + from activities.models.post import Post from users.views.settings.settings_page import SettingsPage @@ -15,8 +17,24 @@ class PostingPage(SettingsPage): "title": "Expand content warnings", "help_text": "If content warnings should be expanded by default (not honoured by all clients)", }, + "preferred_posting_language": { + "title": "Default posting language", + "help_text": "", + "choices": sorted( + [ + (lang.alpha_2, lang.name) + for lang in pycountry.languages + if hasattr(lang, "alpha_2") + ], + key=lambda lang: lang[1], + ), + }, } layout = { - "Posting": ["default_post_visibility", "expand_content_warnings"], + "Posting": [ + "default_post_visibility", + "expand_content_warnings", + "preferred_posting_language", + ], } diff --git a/users/views/settings/settings_page.py b/users/views/settings/settings_page.py index ab5d4b2f1..5b305cbd0 100644 --- a/users/views/settings/settings_page.py +++ b/users/views/settings/settings_page.py @@ -23,7 +23,7 @@ class SettingsPage(FormView): options_class = Config.IdentityOptions template_name = "settings/settings.html" section: ClassVar[str] - options: dict[str, dict[str, str | int]] + options: dict[str, dict[str, str | int | list[tuple[int | str, str]]]] layout: dict[str, list[str]] def get_form_class(self): @@ -42,7 +42,11 @@ def get_form_class(self): elif config_field.type_ is UploadedImage: form_field = forms.ImageField elif config_field.type_ is str: - if details.get("display") == "textarea": + choices = details.get("choices") + if choices: + field_kwargs["widget"] = forms.Select(choices=choices) + form_field = forms.CharField + elif details.get("display") == "textarea": form_field = partial( forms.CharField, widget=forms.Textarea, From 808838707a02d04ed1bf0012166ce31b65c1452b Mon Sep 17 00:00:00 2001 From: Christof Dorner Date: Mon, 15 May 2023 12:08:11 +0200 Subject: [PATCH 2/6] Add language support to posts --- activities/migrations/0017_post_language.py | 18 ++++++++++ activities/models/post.py | 20 ++++++++++- api/schemas.py | 2 +- api/views/statuses.py | 2 ++ core/ld.py | 22 ++++++++++++ tests/activities/models/test_post.py | 3 ++ tests/core/test_ld.py | 40 ++++++++++++++++++++- 7 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 activities/migrations/0017_post_language.py diff --git a/activities/migrations/0017_post_language.py b/activities/migrations/0017_post_language.py new file mode 100644 index 000000000..ec21634ba --- /dev/null +++ b/activities/migrations/0017_post_language.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.1 on 2023-05-15 09:26 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("activities", "0016_index_together_migration"), + ] + + operations = [ + migrations.AddField( + model_name="post", + name="language", + field=models.CharField(max_length=2, null=True), + ), + ] diff --git a/activities/models/post.py b/activities/models/post.py index 045233e99..d770a9b70 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -31,6 +31,7 @@ from core.ld import ( canonicalise, format_ld_date, + get_language, get_list, get_value_or_map, parse_ld_date, @@ -252,6 +253,9 @@ class Types(models.TextChoices): # The main (HTML) content content = models.TextField() + # The language of the content + language = models.CharField(max_length=2, null=True) + type = models.CharField( max_length=20, choices=Types.choices, @@ -474,6 +478,7 @@ def create_local( reply_to: Optional["Post"] = None, attachments: list | None = None, question: dict | None = None, + language: str | None = None, ) -> "Post": with transaction.atomic(): # Find mentions in this post @@ -492,6 +497,9 @@ def create_local( sorted([tag[: Hashtag.MAXIMUM_LENGTH] for tag in parser.hashtags]) or None ) + if language is None: + language = author.config_identity.preferred_posting_language + # Make the Post object post = cls.objects.create( author=author, @@ -502,6 +510,7 @@ def create_local( visibility=visibility, hashtags=hashtags, in_reply_to=reply_to.object_uri if reply_to else None, + language=language, ) post.object_uri = post.urls.object_uri post.url = post.absolute_object_uri() @@ -526,6 +535,7 @@ def edit_local( visibility: int = Visibilities.public, attachments: list | None = None, attachment_attributes: list | None = None, + language: str | None = None, ): with transaction.atomic(): # Strip all HTML and apply linebreaks filter @@ -538,6 +548,9 @@ def edit_local( self.summary = summary or None self.sensitive = bool(summary) if sensitive is None else sensitive self.visibility = visibility + if language is None: + language = self.author.config_identity.preferred_posting_language + self.language = language self.edited = timezone.now() self.mentions.set(self.mentions_from_content(content, self.author)) self.emojis.set(Emoji.emojis_from_content(content, None)) @@ -649,6 +662,10 @@ def to_ap(self) -> dict: "tag": [], "attachment": [], } + if self.language is not None: + value["contentMap"] = { + self.language: value["content"], + } if self.type == Post.Types.question and self.type_data: value[self.type_data.mode] = [ { @@ -872,6 +889,7 @@ def by_ap(cls, data, create=False, update=False, fetch_author=False) -> "Post": post.published = parse_ld_date(data.get("published")) post.edited = parse_ld_date(data.get("updated")) post.in_reply_to = data.get("inReplyTo") + post.language = get_language(data) # Mentions and hashtags post.hashtags = [] for tag in get_list(data, "tag"): @@ -1112,6 +1130,7 @@ def to_mastodon_json(self, interactions=None, bookmarks=None, identity=None): "created_at": format_ld_date(self.published), "account": self.author.to_mastodon_json(include_counts=False), "content": self.safe_content_remote(), + "language": self.language, "visibility": visibility_mapping[self.visibility], "sensitive": self.sensitive, "spoiler_text": self.summary or "", @@ -1152,7 +1171,6 @@ def to_mastodon_json(self, interactions=None, bookmarks=None, identity=None): if isinstance(self.type_data, QuestionData) else None, "card": None, - "language": None, "text": self.safe_content_remote(), "edited_at": format_ld_date(self.edited) if self.edited else None, } diff --git a/api/schemas.py b/api/schemas.py index f25bab6b6..55e9ce863 100644 --- a/api/schemas.py +++ b/api/schemas.py @@ -151,7 +151,7 @@ class Status(Schema): reblog: Optional["Status"] = Field(...) poll: Poll | None = Field(...) card: None = Field(...) - language: None = Field(...) + language: str | None = Field(...) text: str | None = Field(...) edited_at: str | None favourited: bool = False diff --git a/api/views/statuses.py b/api/views/statuses.py index 4e20993cd..e9db890ee 100644 --- a/api/views/statuses.py +++ b/api/views/statuses.py @@ -110,6 +110,7 @@ def post_status(request, details: PostStatusSchema) -> schemas.Status: reply_to=reply_post, attachments=attachments, question=details.poll.dict() if details.poll else None, + language=details.language, ) # Add their own timeline event for immediate visibility TimelineEvent.add_post(request.identity, post) @@ -141,6 +142,7 @@ def edit_status(request, id: str, details: EditStatusSchema) -> schemas.Status: sensitive=details.sensitive, attachments=attachments, attachment_attributes=details.media_attributes, + language=details.language, ) return schemas.Status.from_post(post) diff --git a/core/ld.py b/core/ld.py index 8bbc08894..91294ed6e 100644 --- a/core/ld.py +++ b/core/ld.py @@ -1,5 +1,6 @@ import datetime import os +import re import urllib.parse as urllib_parse from dateutil import parser @@ -692,3 +693,24 @@ def media_type_from_filename(filename): return "image/webp" else: return "application/octet-stream" + + +def get_language(data) -> str | None: + """Detects and returns a document's language""" + map_ = None + if "contentMap" in data: + map_ = data["contentMap"] + elif "nameMap" in data: + map_ = data["nameMap"] + elif "summaryMap" in data: + map_ = data["summaryMap"] + + if not map_: + return None + + lang = list(map_.keys())[0] + if not lang or lang == "und": + return None + + lang = re.split("-|_", lang)[0] + return lang.lower() diff --git a/tests/activities/models/test_post.py b/tests/activities/models/test_post.py index 3b28b14a4..4aa6d9bc5 100644 --- a/tests/activities/models/test_post.py +++ b/tests/activities/models/test_post.py @@ -259,6 +259,7 @@ def test_content_map(remote_identity): create=True, ) assert post.content == "Hi World" + assert post.language is None post2 = Post.by_ap( data={ @@ -271,6 +272,7 @@ def test_content_map(remote_identity): create=True, ) assert post2.content == "Hey World" + assert post2.language is None post3 = Post.by_ap( data={ @@ -283,6 +285,7 @@ def test_content_map(remote_identity): create=True, ) assert post3.content == "Hello World" + assert post3.language == "en" @pytest.mark.django_db diff --git a/tests/core/test_ld.py b/tests/core/test_ld.py index 4f123173b..fb409a773 100644 --- a/tests/core/test_ld.py +++ b/tests/core/test_ld.py @@ -2,7 +2,7 @@ from dateutil.tz import tzutc -from core.ld import parse_ld_date +from core.ld import get_language, parse_ld_date def test_parse_ld_date(): @@ -41,3 +41,41 @@ def test_parse_ld_date(): tzinfo=tzutc(), ) assert difference.total_seconds() == 0 + + +def test_get_language(): + assert ( + get_language( + { + "contentMap": { + "en": "

Hello

", + "es": "

hola

", + }, + "nameMap": {"de": "Hallo"}, + "summaryMap": {"fr": "Bonjour"}, + } + ) + == "en" + ) + assert ( + get_language( + { + "nameMap": {"de": "Hallo"}, + "summaryMap": {"fr": "Bonjour"}, + } + ) + == "de" + ) + assert ( + get_language( + { + "summaryMap": {"fr": "Bonjour"}, + } + ) + == "fr" + ) + assert get_language({"contentMap": {"en-gb": "

Hello

"}}) == "en" + assert get_language({"contentMap": {"en_GB": "

Hello

"}}) == "en" + assert get_language({"contentMap": {"EN": "

Hello

"}}) == "en" + assert get_language({"contentMap": {"und": "

Hello

"}}) is None + assert get_language({}) is None From b963fbcbfd390f0296f3f86bb2d39e9000e0ff2e Mon Sep 17 00:00:00 2001 From: Christof Dorner Date: Mon, 15 May 2023 12:58:44 +0200 Subject: [PATCH 3/6] Help screenreaders with `lang` attribute when rendering a post --- templates/activities/_post.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/activities/_post.html b/templates/activities/_post.html index 6834ffbf4..cbdf35228 100644 --- a/templates/activities/_post.html +++ b/templates/activities/_post.html @@ -32,7 +32,7 @@ {% endif %} -
+
{{ post.safe_content_local }} {% if post.attachments.exists %} From 1a11a1810e1c0b3425433b31ec70816a093105b3 Mon Sep 17 00:00:00 2001 From: Christof Dorner Date: Thu, 1 Jun 2023 18:41:24 +0200 Subject: [PATCH 4/6] Increase language max chars to 50 --- activities/migrations/0017_post_language.py | 2 +- activities/models/post.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/activities/migrations/0017_post_language.py b/activities/migrations/0017_post_language.py index ec21634ba..64966fc04 100644 --- a/activities/migrations/0017_post_language.py +++ b/activities/migrations/0017_post_language.py @@ -13,6 +13,6 @@ class Migration(migrations.Migration): migrations.AddField( model_name="post", name="language", - field=models.CharField(max_length=2, null=True), + field=models.CharField(max_length=50, null=True), ), ] diff --git a/activities/models/post.py b/activities/models/post.py index d770a9b70..827a6de22 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -254,7 +254,7 @@ class Types(models.TextChoices): content = models.TextField() # The language of the content - language = models.CharField(max_length=2, null=True) + language = models.CharField(max_length=50, null=True) type = models.CharField( max_length=20, From 710b7b4caab06843aa20312d9ef5a5161a934d40 Mon Sep 17 00:00:00 2001 From: Christof Dorner Date: Tue, 6 Jun 2023 19:51:01 +0200 Subject: [PATCH 5/6] allow any length language and default to empty string --- activities/migrations/0017_post_language.py | 2 +- activities/models/post.py | 15 +++++++++------ api/schemas.py | 8 +++++++- core/models/config.py | 2 +- tests/activities/models/test_post.py | 4 ++-- 5 files changed, 20 insertions(+), 11 deletions(-) diff --git a/activities/migrations/0017_post_language.py b/activities/migrations/0017_post_language.py index 64966fc04..4774d0d77 100644 --- a/activities/migrations/0017_post_language.py +++ b/activities/migrations/0017_post_language.py @@ -13,6 +13,6 @@ class Migration(migrations.Migration): migrations.AddField( model_name="post", name="language", - field=models.CharField(max_length=50, null=True), + field=models.CharField(default=""), ), ] diff --git a/activities/models/post.py b/activities/models/post.py index 827a6de22..6138ad652 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -254,7 +254,7 @@ class Types(models.TextChoices): content = models.TextField() # The language of the content - language = models.CharField(max_length=50, null=True) + language = models.CharField(default="") type = models.CharField( max_length=20, @@ -497,7 +497,7 @@ def create_local( sorted([tag[: Hashtag.MAXIMUM_LENGTH] for tag in parser.hashtags]) or None ) - if language is None: + if language is None or language == "": language = author.config_identity.preferred_posting_language # Make the Post object @@ -548,7 +548,7 @@ def edit_local( self.summary = summary or None self.sensitive = bool(summary) if sensitive is None else sensitive self.visibility = visibility - if language is None: + if language is None or language == "": language = self.author.config_identity.preferred_posting_language self.language = language self.edited = timezone.now() @@ -662,7 +662,7 @@ def to_ap(self) -> dict: "tag": [], "attachment": [], } - if self.language is not None: + if self.language != "": value["contentMap"] = { self.language: value["content"], } @@ -889,7 +889,7 @@ def by_ap(cls, data, create=False, update=False, fetch_author=False) -> "Post": post.published = parse_ld_date(data.get("published")) post.edited = parse_ld_date(data.get("updated")) post.in_reply_to = data.get("inReplyTo") - post.language = get_language(data) + post.language = get_language(data) or "" # Mentions and hashtags post.hashtags = [] for tag in get_list(data, "tag"): @@ -1124,13 +1124,16 @@ def to_mastodon_json(self, interactions=None, bookmarks=None, identity=None): self.Visibilities.mentioned: "direct", self.Visibilities.local_only: "public", } + language = self.language + if self.language == "": + language = None value = { "id": self.pk, "uri": self.object_uri, "created_at": format_ld_date(self.published), "account": self.author.to_mastodon_json(include_counts=False), "content": self.safe_content_remote(), - "language": self.language, + "language": language, "visibility": visibility_mapping[self.visibility], "sensitive": self.sensitive, "spoiler_text": self.summary or "", diff --git a/api/schemas.py b/api/schemas.py index 55e9ce863..f2996b728 100644 --- a/api/schemas.py +++ b/api/schemas.py @@ -422,13 +422,19 @@ def from_identity( activities_models.Post.Visibilities.mentioned: "direct", activities_models.Post.Visibilities.local_only: "public", } + preferred_posting_language = None + if identity.config_identity.preferred_posting_language != "": + preferred_posting_language = ( + identity.config_identity.preferred_posting_language + ) + return cls.parse_obj( { "posting:default:visibility": visibility_mapping[ identity.config_identity.default_post_visibility ], "posting:default:sensitive": False, - "posting:default:language": identity.config_identity.preferred_posting_language, + "posting:default:language": preferred_posting_language, "reading:expand:media": "default", "reading:expand:spoilers": identity.config_identity.expand_content_warnings, } diff --git a/core/models/config.py b/core/models/config.py index b3e322cb7..89f706102 100644 --- a/core/models/config.py +++ b/core/models/config.py @@ -286,7 +286,7 @@ class IdentityOptions(pydantic.BaseModel): visible_reaction_counts: bool = True expand_content_warnings: bool = False boosts_on_profile: bool = True - preferred_posting_language: str | None = None + preferred_posting_language: str = "" class DomainOptions(pydantic.BaseModel): site_name: str = "" diff --git a/tests/activities/models/test_post.py b/tests/activities/models/test_post.py index 4aa6d9bc5..4a13fee0f 100644 --- a/tests/activities/models/test_post.py +++ b/tests/activities/models/test_post.py @@ -259,7 +259,7 @@ def test_content_map(remote_identity): create=True, ) assert post.content == "Hi World" - assert post.language is None + assert post.language == "" post2 = Post.by_ap( data={ @@ -272,7 +272,7 @@ def test_content_map(remote_identity): create=True, ) assert post2.content == "Hey World" - assert post2.language is None + assert post2.language == "" post3 = Post.by_ap( data={ From 9204ae5a93dac66663b8533cb7db353e6c2e1887 Mon Sep 17 00:00:00 2001 From: Christof Dorner Date: Tue, 6 Jun 2023 20:24:50 +0200 Subject: [PATCH 6/6] allow user to unselect a preferred posting language --- users/views/settings/posting.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/users/views/settings/posting.py b/users/views/settings/posting.py index 9bef87061..de393577b 100644 --- a/users/views/settings/posting.py +++ b/users/views/settings/posting.py @@ -21,11 +21,16 @@ class PostingPage(SettingsPage): "title": "Default posting language", "help_text": "", "choices": sorted( - [ - (lang.alpha_2, lang.name) - for lang in pycountry.languages - if hasattr(lang, "alpha_2") - ], + ( + [ + ("", ""), + ] + + [ + (lang.alpha_2, lang.name) + for lang in pycountry.languages + if hasattr(lang, "alpha_2") + ] + ), key=lambda lang: lang[1], ), },