-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
32-transform-to-and-from-xliff-2 (#33)
- Loading branch information
1 parent
5e11325
commit 69f6865
Showing
2 changed files
with
299 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
package convert | ||
|
||
import ( | ||
"encoding/xml" | ||
"fmt" | ||
|
||
"go.expect.digital/translate/pkg/model" | ||
"golang.org/x/text/language" | ||
) | ||
|
||
// XLIFF 2 Specification: https://docs.oasis-open.org/xliff/xliff-core/v2.0/os/xliff-core-v2.0-os.html | ||
// XLIFF 2 Example: https://localizely.com/xliff-file/?tab=xliff-20 | ||
|
||
type xliff2 struct { | ||
XMLName xml.Name `xml:"urn:oasis:names:tc:xliff:document:2.0 xliff"` | ||
Version string `xml:"version,attr"` | ||
SrcLang language.Tag `xml:"srcLang,attr"` | ||
File file `xml:"file"` | ||
} | ||
type file struct { | ||
Units []unit `xml:"unit"` | ||
} | ||
|
||
type unit struct { | ||
ID string `xml:"id,attr"` | ||
Notes *[]note `xml:"notes>note"` // Set as pointer to avoid empty <notes></notes> when marshalling. | ||
Source string `xml:"segment>source"` | ||
} | ||
|
||
type note struct { | ||
Category string `xml:"category,attr"` | ||
Content string `xml:",chardata"` | ||
} | ||
|
||
// FromXliff2 converts serialized data from the XML data in the XLIFF 2 format into a model.Messages struct. | ||
func FromXliff2(data []byte) (model.Messages, error) { | ||
var xlf xliff2 | ||
if err := xml.Unmarshal(data, &xlf); err != nil { | ||
return model.Messages{}, fmt.Errorf("unmarshal XLIFF 2 formatted XML into xliff2 struct: %w", err) | ||
} | ||
|
||
messages := model.Messages{Language: xlf.SrcLang, Messages: make([]model.Message, 0, len(xlf.File.Units))} | ||
|
||
findDescription := func(u unit) string { | ||
for _, note := range *u.Notes { | ||
if note.Category == "description" { | ||
return note.Content | ||
} | ||
} | ||
|
||
return "" | ||
} | ||
|
||
for _, unit := range xlf.File.Units { | ||
messages.Messages = append(messages.Messages, model.Message{ | ||
ID: unit.ID, | ||
Message: unit.Source, | ||
Description: findDescription(unit), | ||
}) | ||
} | ||
|
||
return messages, nil | ||
} | ||
|
||
// ToXliff2 converts a model.Messages struct into a byte slice in the XLIFF 2 format. | ||
func ToXliff2(messages model.Messages) ([]byte, error) { | ||
xlf := xliff2{ | ||
Version: "2.0", | ||
SrcLang: messages.Language, | ||
File: file{ | ||
Units: make([]unit, 0, len(messages.Messages)), | ||
}, | ||
} | ||
|
||
for _, msg := range messages.Messages { | ||
var notes *[]note | ||
if msg.Description != "" { | ||
notes = &[]note{{Category: "description", Content: msg.Description}} | ||
} | ||
|
||
xlf.File.Units = append(xlf.File.Units, unit{ | ||
ID: msg.ID, | ||
Source: msg.Message, | ||
Notes: notes, | ||
}) | ||
} | ||
|
||
data, err := xml.Marshal(&xlf) | ||
if err != nil { | ||
return nil, fmt.Errorf("marshal xliff2 struct to XLIFF 2 formatted XML: %w", err) | ||
} | ||
|
||
dataWithHeader := append([]byte(xml.Header), data...) // prepend generic XML header | ||
|
||
return dataWithHeader, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
package convert | ||
|
||
import ( | ||
"fmt" | ||
"regexp" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"go.expect.digital/translate/pkg/model" | ||
"golang.org/x/text/language" | ||
) | ||
|
||
func assertEqualXml(t *testing.T, expected, actual []byte) bool { | ||
t.Helper() | ||
|
||
// Matches a substring that starts with > and ends with < with zero or more whitespace in between. | ||
re := regexp.MustCompile(`>(\s*)<`) | ||
expectedTrimmed := re.ReplaceAllString(string(expected), "><") | ||
actualTrimmed := re.ReplaceAllString(string(actual), "><") | ||
|
||
return assert.Equal(t, expectedTrimmed, actualTrimmed) | ||
} | ||
|
||
func TestFromXliff2(t *testing.T) { | ||
t.Parallel() | ||
|
||
tests := []struct { | ||
name string | ||
wantErr error | ||
data []byte | ||
want model.Messages | ||
}{ | ||
{ | ||
name: "All OK", | ||
data: []byte(`<?xml version="1.0" encoding="UTF-8"?> | ||
<xliff version="2.0" xmlns="urn:oasis:names:tc:xliff:document:2.0" srcLang="en" trgLang="fr"> | ||
<file id="ngi18n" original="ng.template"> | ||
<unit id="common.welcome"> | ||
<notes> | ||
<note category="location">src/app/app.component.html:16</note> | ||
</notes> | ||
<segment> | ||
<source>Welcome!</source> | ||
<target>Bienvenue!</target> | ||
</segment> | ||
</unit> | ||
<unit id="common.app.title"> | ||
<notes> | ||
<note category="location">src/app/app.component.html:4</note> | ||
<note category="description">App title</note> | ||
</notes> | ||
<segment> | ||
<source>Diary</source> | ||
<target>Agenda</target> | ||
</segment> | ||
</unit> | ||
</file> | ||
</xliff>`), | ||
want: model.Messages{ | ||
Language: language.English, | ||
Messages: []model.Message{ | ||
{ | ||
ID: "common.welcome", | ||
Message: "Welcome!", | ||
}, | ||
{ | ||
ID: "common.app.title", | ||
Message: "Diary", | ||
Description: "App title", | ||
}, | ||
}, | ||
}, | ||
wantErr: nil, | ||
}, | ||
{ | ||
name: "Malformed language tag", | ||
data: []byte(`<?xml version="1.0" encoding="UTF-8"?> | ||
<xliff version="2.0" xmlns="urn:oasis:names:tc:xliff:document:2.0" srcLang="xyz-ZY-Latn" trgLang="fr"> | ||
<file id="ngi18n" original="ng.template"> | ||
<unit id="common.welcome"> | ||
<notes> | ||
<note category="location">src/app/app.component.html:16</note> | ||
</notes> | ||
<segment> | ||
<source>Welcome!</source> | ||
<target>Bienvenue!</target> | ||
</segment> | ||
</unit> | ||
<unit id="common.app.title"> | ||
<notes> | ||
<note category="location">src/app/app.component.html:4</note> | ||
<note category="description">App title</note> | ||
</notes> | ||
<segment> | ||
<source>Diary</source> | ||
<target>Agenda</target> | ||
</segment> | ||
</unit> | ||
</file> | ||
</xliff>`), | ||
wantErr: fmt.Errorf("language: subtag \"xyz\" is well-formed but unknown"), | ||
}, | ||
} | ||
for _, tt := range tests { | ||
tt := tt | ||
t.Run(tt.name, func(t *testing.T) { | ||
t.Parallel() | ||
|
||
result, err := FromXliff2(tt.data) | ||
if tt.wantErr != nil { | ||
assert.ErrorContains(t, err, tt.wantErr.Error()) | ||
return | ||
} | ||
|
||
if !assert.NoError(t, err) { | ||
return | ||
} | ||
|
||
assert.Equal(t, tt.want.Language, result.Language) | ||
assert.ElementsMatch(t, tt.want.Messages, result.Messages) | ||
}) | ||
} | ||
} | ||
|
||
func Test_ToXliff2(t *testing.T) { | ||
t.Parallel() | ||
|
||
tests := []struct { | ||
name string | ||
want []byte | ||
wantErr error | ||
messages model.Messages | ||
}{ | ||
{ | ||
name: "All OK", | ||
want: []byte(`<?xml version="1.0" encoding="UTF-8"?> | ||
<xliff xmlns="urn:oasis:names:tc:xliff:document:2.0" version="2.0" srcLang="en"> | ||
<file> | ||
<unit id="Welcome"> | ||
<notes> | ||
<note category="description">To welcome a new visitor</note> | ||
</notes> | ||
<segment> | ||
<source>Welcome to our website!</source> | ||
</segment> | ||
</unit> | ||
<unit id="Error"> | ||
<notes> | ||
<note category="description">To inform the user of an error</note> | ||
</notes> | ||
<segment> | ||
<source>Something went wrong. Please try again later.</source> | ||
</segment> | ||
</unit> | ||
<unit id="Feedback"> | ||
<segment> | ||
<source>We appreciate your feedback. Thank you for using our service.</source> | ||
</segment> | ||
</unit> | ||
</file> | ||
</xliff>`), | ||
wantErr: nil, | ||
messages: model.Messages{ | ||
Language: language.English, | ||
Messages: []model.Message{ | ||
{ | ||
ID: "Welcome", | ||
Message: "Welcome to our website!", | ||
Description: "To welcome a new visitor", | ||
}, | ||
{ | ||
ID: "Error", | ||
Message: "Something went wrong. Please try again later.", | ||
Description: "To inform the user of an error", | ||
}, | ||
{ | ||
ID: "Feedback", | ||
Message: "We appreciate your feedback. Thank you for using our service.", | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
for _, tt := range tests { | ||
tt := tt | ||
t.Run(tt.name, func(t *testing.T) { | ||
t.Parallel() | ||
|
||
result, err := ToXliff2(tt.messages) | ||
|
||
if tt.wantErr != nil { | ||
assert.ErrorContains(t, err, tt.wantErr.Error()) | ||
return | ||
} | ||
|
||
if !assert.NoError(t, err) { | ||
return | ||
} | ||
|
||
assertEqualXml(t, tt.want, result) | ||
}) | ||
} | ||
} |