Skip to content

Commit

Permalink
32-transform-to-and-from-xliff-2 (#33)
Browse files Browse the repository at this point in the history
  • Loading branch information
VladislavsPerkanuks authored Mar 9, 2023
1 parent 5e11325 commit 69f6865
Show file tree
Hide file tree
Showing 2 changed files with 299 additions and 0 deletions.
96 changes: 96 additions & 0 deletions pkg/convert/xliff2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package convert

import (
"encoding/xml"
"fmt"

"go.expect.digital/translate/pkg/model"
"golang.org/x/text/language"
)

// XLIFF 2 Specification: https://docs.oasis-open.org/xliff/xliff-core/v2.0/os/xliff-core-v2.0-os.html
// XLIFF 2 Example: https://localizely.com/xliff-file/?tab=xliff-20

type xliff2 struct {
XMLName xml.Name `xml:"urn:oasis:names:tc:xliff:document:2.0 xliff"`
Version string `xml:"version,attr"`
SrcLang language.Tag `xml:"srcLang,attr"`
File file `xml:"file"`
}
type file struct {
Units []unit `xml:"unit"`
}

type unit struct {
ID string `xml:"id,attr"`
Notes *[]note `xml:"notes>note"` // Set as pointer to avoid empty <notes></notes> when marshalling.
Source string `xml:"segment>source"`
}

type note struct {
Category string `xml:"category,attr"`
Content string `xml:",chardata"`
}

// FromXliff2 converts serialized data from the XML data in the XLIFF 2 format into a model.Messages struct.
func FromXliff2(data []byte) (model.Messages, error) {
var xlf xliff2
if err := xml.Unmarshal(data, &xlf); err != nil {
return model.Messages{}, fmt.Errorf("unmarshal XLIFF 2 formatted XML into xliff2 struct: %w", err)
}

messages := model.Messages{Language: xlf.SrcLang, Messages: make([]model.Message, 0, len(xlf.File.Units))}

findDescription := func(u unit) string {
for _, note := range *u.Notes {
if note.Category == "description" {
return note.Content
}
}

return ""
}

for _, unit := range xlf.File.Units {
messages.Messages = append(messages.Messages, model.Message{
ID: unit.ID,
Message: unit.Source,
Description: findDescription(unit),
})
}

return messages, nil
}

// ToXliff2 converts a model.Messages struct into a byte slice in the XLIFF 2 format.
func ToXliff2(messages model.Messages) ([]byte, error) {
xlf := xliff2{
Version: "2.0",
SrcLang: messages.Language,
File: file{
Units: make([]unit, 0, len(messages.Messages)),
},
}

for _, msg := range messages.Messages {
var notes *[]note
if msg.Description != "" {
notes = &[]note{{Category: "description", Content: msg.Description}}
}

xlf.File.Units = append(xlf.File.Units, unit{
ID: msg.ID,
Source: msg.Message,
Notes: notes,
})
}

data, err := xml.Marshal(&xlf)
if err != nil {
return nil, fmt.Errorf("marshal xliff2 struct to XLIFF 2 formatted XML: %w", err)
}

dataWithHeader := append([]byte(xml.Header), data...) // prepend generic XML header

return dataWithHeader, nil
}
203 changes: 203 additions & 0 deletions pkg/convert/xliff2_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
package convert

import (
"fmt"
"regexp"
"testing"

"github.com/stretchr/testify/assert"
"go.expect.digital/translate/pkg/model"
"golang.org/x/text/language"
)

func assertEqualXml(t *testing.T, expected, actual []byte) bool {
t.Helper()

// Matches a substring that starts with > and ends with < with zero or more whitespace in between.
re := regexp.MustCompile(`>(\s*)<`)
expectedTrimmed := re.ReplaceAllString(string(expected), "><")
actualTrimmed := re.ReplaceAllString(string(actual), "><")

return assert.Equal(t, expectedTrimmed, actualTrimmed)
}

func TestFromXliff2(t *testing.T) {
t.Parallel()

tests := []struct {
name string
wantErr error
data []byte
want model.Messages
}{
{
name: "All OK",
data: []byte(`<?xml version="1.0" encoding="UTF-8"?>
<xliff version="2.0" xmlns="urn:oasis:names:tc:xliff:document:2.0" srcLang="en" trgLang="fr">
<file id="ngi18n" original="ng.template">
<unit id="common.welcome">
<notes>
<note category="location">src/app/app.component.html:16</note>
</notes>
<segment>
<source>Welcome!</source>
<target>Bienvenue!</target>
</segment>
</unit>
<unit id="common.app.title">
<notes>
<note category="location">src/app/app.component.html:4</note>
<note category="description">App title</note>
</notes>
<segment>
<source>Diary</source>
<target>Agenda</target>
</segment>
</unit>
</file>
</xliff>`),
want: model.Messages{
Language: language.English,
Messages: []model.Message{
{
ID: "common.welcome",
Message: "Welcome!",
},
{
ID: "common.app.title",
Message: "Diary",
Description: "App title",
},
},
},
wantErr: nil,
},
{
name: "Malformed language tag",
data: []byte(`<?xml version="1.0" encoding="UTF-8"?>
<xliff version="2.0" xmlns="urn:oasis:names:tc:xliff:document:2.0" srcLang="xyz-ZY-Latn" trgLang="fr">
<file id="ngi18n" original="ng.template">
<unit id="common.welcome">
<notes>
<note category="location">src/app/app.component.html:16</note>
</notes>
<segment>
<source>Welcome!</source>
<target>Bienvenue!</target>
</segment>
</unit>
<unit id="common.app.title">
<notes>
<note category="location">src/app/app.component.html:4</note>
<note category="description">App title</note>
</notes>
<segment>
<source>Diary</source>
<target>Agenda</target>
</segment>
</unit>
</file>
</xliff>`),
wantErr: fmt.Errorf("language: subtag \"xyz\" is well-formed but unknown"),
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

result, err := FromXliff2(tt.data)
if tt.wantErr != nil {
assert.ErrorContains(t, err, tt.wantErr.Error())
return
}

if !assert.NoError(t, err) {
return
}

assert.Equal(t, tt.want.Language, result.Language)
assert.ElementsMatch(t, tt.want.Messages, result.Messages)
})
}
}

func Test_ToXliff2(t *testing.T) {
t.Parallel()

tests := []struct {
name string
want []byte
wantErr error
messages model.Messages
}{
{
name: "All OK",
want: []byte(`<?xml version="1.0" encoding="UTF-8"?>
<xliff xmlns="urn:oasis:names:tc:xliff:document:2.0" version="2.0" srcLang="en">
<file>
<unit id="Welcome">
<notes>
<note category="description">To welcome a new visitor</note>
</notes>
<segment>
<source>Welcome to our website!</source>
</segment>
</unit>
<unit id="Error">
<notes>
<note category="description">To inform the user of an error</note>
</notes>
<segment>
<source>Something went wrong. Please try again later.</source>
</segment>
</unit>
<unit id="Feedback">
<segment>
<source>We appreciate your feedback. Thank you for using our service.</source>
</segment>
</unit>
</file>
</xliff>`),
wantErr: nil,
messages: model.Messages{
Language: language.English,
Messages: []model.Message{
{
ID: "Welcome",
Message: "Welcome to our website!",
Description: "To welcome a new visitor",
},
{
ID: "Error",
Message: "Something went wrong. Please try again later.",
Description: "To inform the user of an error",
},
{
ID: "Feedback",
Message: "We appreciate your feedback. Thank you for using our service.",
},
},
},
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

result, err := ToXliff2(tt.messages)

if tt.wantErr != nil {
assert.ErrorContains(t, err, tt.wantErr.Error())
return
}

if !assert.NoError(t, err) {
return
}

assertEqualXml(t, tt.want, result)
})
}
}

0 comments on commit 69f6865

Please sign in to comment.