diff --git a/go.mod b/go.mod
index 102baef..bfee221 100644
--- a/go.mod
+++ b/go.mod
@@ -8,6 +8,7 @@ require (
github.com/rs/zerolog v1.33.0
github.com/stretchr/testify v1.9.0
go.mau.fi/util v0.8.2
+ golang.org/x/net v0.31.0
google.golang.org/protobuf v1.35.2
maunium.net/go/mautrix v0.22.1-0.20241126202918-4b970e0ea7e6
)
@@ -34,7 +35,6 @@ require (
go.mau.fi/zeroconfig v0.1.3 // indirect
golang.org/x/crypto v0.29.0 // indirect
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect
- golang.org/x/net v0.31.0 // indirect
golang.org/x/sync v0.9.0 // indirect
golang.org/x/sys v0.27.0 // indirect
golang.org/x/text v0.20.0 // indirect
diff --git a/pkg/connector/handlematrix.go b/pkg/connector/handlematrix.go
index 6d61f4c..5485174 100644
--- a/pkg/connector/handlematrix.go
+++ b/pkg/connector/handlematrix.go
@@ -40,7 +40,9 @@ func (c *GChatClient) HandleMatrixMessage(ctx context.Context, msg *bridgev2.Mat
}
var annotations []*proto.Annotation
- var messageInfo *proto.MessageInfo
+ messageInfo := &proto.MessageInfo{
+ AcceptFormatAnnotations: true,
+ }
if msg.Content.MsgType.IsMedia() {
data, err := c.userLogin.Bridge.Bot.DownloadMedia(ctx, msg.Content.URL, msg.Content.File)
@@ -68,35 +70,35 @@ func (c *GChatClient) HandleMatrixMessage(ctx context.Context, msg *bridgev2.Mat
if msg.ThreadRoot != nil {
topicId = string(msg.ThreadRoot.ID)
}
- messageInfo = &proto.MessageInfo{
- AcceptFormatAnnotations: true,
- ReplyTo: &proto.SendReplyTarget{
- Id: &proto.MessageId{
- ParentId: &proto.MessageParentId{
- Parent: &proto.MessageParentId_TopicId{
- TopicId: &proto.TopicId{
- GroupId: groupId,
- TopicId: topicId,
- },
+ messageInfo.ReplyTo = &proto.SendReplyTarget{
+ Id: &proto.MessageId{
+ ParentId: &proto.MessageParentId{
+ Parent: &proto.MessageParentId_TopicId{
+ TopicId: &proto.TopicId{
+ GroupId: groupId,
+ TopicId: topicId,
},
},
- MessageId: replyToId,
},
- CreateTime: msg.ReplyTo.Timestamp.UnixMicro(),
+ MessageId: replyToId,
},
+ CreateTime: msg.ReplyTo.Timestamp.UnixMicro(),
}
}
var msgID string
var timestamp int64
+ textBody := msg.Content.Body
+ text, entities := c.msgConv.ToGChat(ctx, msg.Content)
+
+ if entities != nil {
+ textBody = text
+ annotations = entities
+ }
+
if msg.ThreadRoot != nil {
threadId := string(msg.ThreadRoot.ID)
- if messageInfo == nil {
- messageInfo = &proto.MessageInfo{
- AcceptFormatAnnotations: true,
- }
- }
req := &proto.CreateMessageRequest{
ParentId: &proto.MessageParentId{
Parent: &proto.MessageParentId_TopicId{
@@ -107,7 +109,7 @@ func (c *GChatClient) HandleMatrixMessage(ctx context.Context, msg *bridgev2.Mat
},
},
LocalId: string(msg.Event.ID),
- TextBody: msg.Content.Body,
+ TextBody: textBody,
Annotations: annotations,
MessageInfo: messageInfo,
}
@@ -120,7 +122,7 @@ func (c *GChatClient) HandleMatrixMessage(ctx context.Context, msg *bridgev2.Mat
} else {
req := &proto.CreateTopicRequest{
GroupId: groupId,
- TextBody: msg.Content.Body,
+ TextBody: textBody,
Annotations: annotations,
MessageInfo: messageInfo,
}
diff --git a/pkg/msgconv/from-matrix.go b/pkg/msgconv/from-matrix.go
new file mode 100644
index 0000000..726a184
--- /dev/null
+++ b/pkg/msgconv/from-matrix.go
@@ -0,0 +1,19 @@
+package msgconv
+
+import (
+ "context"
+
+ "maunium.net/go/mautrix/event"
+
+ "go.mau.fi/mautrix-googlechat/pkg/gchatmeow/proto"
+ "go.mau.fi/mautrix-googlechat/pkg/msgconv/matrixfmt"
+)
+
+func (mc *MessageConverter) ToGChat(
+ ctx context.Context,
+ content *event.MessageEventContent,
+) (string, []*proto.Annotation) {
+ parser := &matrixfmt.HTMLParser{}
+ body, annotations := matrixfmt.Parse(ctx, parser, content)
+ return body, annotations
+}
diff --git a/pkg/msgconv/gchatfmt/convert_test.go b/pkg/msgconv/gchatfmt/convert_test.go
index a9956fa..448370f 100644
--- a/pkg/msgconv/gchatfmt/convert_test.go
+++ b/pkg/msgconv/gchatfmt/convert_test.go
@@ -10,23 +10,7 @@ import (
"go.mau.fi/mautrix-googlechat/pkg/msgconv/gchatfmt"
)
-func makeAnnotation(start, length int32, format proto.FormatMetadata_FormatType) *proto.Annotation {
- return &proto.Annotation{
- Type: proto.AnnotationType_FORMAT_DATA,
- StartIndex: start,
- Length: length,
- ChipRenderType: proto.Annotation_DO_NOT_RENDER,
- Metadata: &proto.Annotation_FormatMetadata{
- FormatMetadata: &proto.FormatMetadata{
- FormatType: format,
- },
- },
- }
-}
-
func TestParse(t *testing.T) {
- assert.Equal(t, 1, 1)
-
tests := []struct {
name string
ins string
@@ -43,10 +27,10 @@ func TestParse(t *testing.T) {
name: "bold italic strike underline",
ins: "a b i s u z",
ine: []*proto.Annotation{
- makeAnnotation(2, 1, proto.FormatMetadata_BOLD),
- makeAnnotation(4, 1, proto.FormatMetadata_ITALIC),
- makeAnnotation(6, 1, proto.FormatMetadata_STRIKE),
- makeAnnotation(8, 1, proto.FormatMetadata_UNDERLINE),
+ gchatfmt.MakeAnnotation(2, 1, proto.FormatMetadata_BOLD),
+ gchatfmt.MakeAnnotation(4, 1, proto.FormatMetadata_ITALIC),
+ gchatfmt.MakeAnnotation(6, 1, proto.FormatMetadata_STRIKE),
+ gchatfmt.MakeAnnotation(8, 1, proto.FormatMetadata_UNDERLINE),
},
body: "a b i s u z",
html: "a b i s u z",
@@ -55,7 +39,7 @@ func TestParse(t *testing.T) {
name: "emoji",
ins: "🎆 a b z",
ine: []*proto.Annotation{
- makeAnnotation(5, 1, proto.FormatMetadata_BOLD),
+ gchatfmt.MakeAnnotation(5, 1, proto.FormatMetadata_BOLD),
},
body: "🎆 a b z",
html: "🎆 a b z",
diff --git a/pkg/msgconv/gchatfmt/utils.go b/pkg/msgconv/gchatfmt/utils.go
new file mode 100644
index 0000000..9781867
--- /dev/null
+++ b/pkg/msgconv/gchatfmt/utils.go
@@ -0,0 +1,18 @@
+package gchatfmt
+
+import "go.mau.fi/mautrix-googlechat/pkg/gchatmeow/proto"
+
+func MakeAnnotation(start, length int32, format proto.FormatMetadata_FormatType) *proto.Annotation {
+ return &proto.Annotation{
+ Type: proto.AnnotationType_FORMAT_DATA,
+ StartIndex: start,
+ Length: length,
+ ChipRenderType: proto.Annotation_DO_NOT_RENDER,
+ Metadata: &proto.Annotation_FormatMetadata{
+ FormatMetadata: &proto.FormatMetadata{
+ FormatType: format,
+ },
+ },
+ }
+
+}
diff --git a/pkg/msgconv/matrixfmt/convert.go b/pkg/msgconv/matrixfmt/convert.go
new file mode 100644
index 0000000..002e1dc
--- /dev/null
+++ b/pkg/msgconv/matrixfmt/convert.go
@@ -0,0 +1,29 @@
+package matrixfmt
+
+import (
+ "context"
+
+ "maunium.net/go/mautrix/event"
+
+ "go.mau.fi/mautrix-googlechat/pkg/gchatmeow/proto"
+)
+
+func Parse(ctx context.Context, parser *HTMLParser, content *event.MessageEventContent) (string, []*proto.Annotation) {
+ if content.Format != event.FormatHTML {
+ return content.Body, nil
+ }
+ parseCtx := NewContext(ctx)
+ parseCtx.AllowedMentions = content.Mentions
+ parsed := parser.Parse(content.FormattedBody, parseCtx)
+ if parsed == nil {
+ return "", nil
+ }
+ var bodyRanges []*proto.Annotation
+ if len(parsed.Entities) > 0 {
+ bodyRanges = make([]*proto.Annotation, len(parsed.Entities))
+ for i, ent := range parsed.Entities {
+ bodyRanges[i] = ent.Proto()
+ }
+ }
+ return parsed.String.String(), bodyRanges
+}
diff --git a/pkg/msgconv/matrixfmt/convert_test.go b/pkg/msgconv/matrixfmt/convert_test.go
new file mode 100644
index 0000000..d97e78d
--- /dev/null
+++ b/pkg/msgconv/matrixfmt/convert_test.go
@@ -0,0 +1,46 @@
+package matrixfmt_test
+
+import (
+ "context"
+ "fmt"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "maunium.net/go/mautrix/event"
+
+ "go.mau.fi/mautrix-googlechat/pkg/gchatmeow/proto"
+ "go.mau.fi/mautrix-googlechat/pkg/msgconv/gchatfmt"
+ "go.mau.fi/mautrix-googlechat/pkg/msgconv/matrixfmt"
+)
+
+func TestParse(t *testing.T) {
+ tests := []struct {
+ name string
+ in string
+ out string
+ ent []*proto.Annotation
+ }{
+ {name: "Plain", in: "Hello, World!", out: "Hello, World!"},
+ {name: "Bold", in: "a b c", out: "a b c",
+ ent: []*proto.Annotation{
+ gchatfmt.MakeAnnotation(2, 1, proto.FormatMetadata_BOLD),
+ },
+ },
+ }
+
+ parser := &matrixfmt.HTMLParser{}
+ matrixfmt.DebugLog = func(format string, args ...any) {
+ fmt.Printf(format, args...)
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ parsed, entities := matrixfmt.Parse(context.TODO(), parser, &event.MessageEventContent{
+ Format: event.FormatHTML,
+ FormattedBody: test.in,
+ })
+ assert.Equal(t, test.out, parsed)
+ assert.Equal(t, test.ent, entities)
+ })
+ }
+}
diff --git a/pkg/msgconv/matrixfmt/html.go b/pkg/msgconv/matrixfmt/html.go
new file mode 100644
index 0000000..d3974c0
--- /dev/null
+++ b/pkg/msgconv/matrixfmt/html.go
@@ -0,0 +1,454 @@
+package matrixfmt
+
+import (
+ "context"
+ "fmt"
+ "math"
+ "strconv"
+ "strings"
+
+ "golang.org/x/net/html"
+ "maunium.net/go/mautrix/event"
+
+ "go.mau.fi/mautrix-googlechat/pkg/gchatmeow"
+)
+
+type EntityString struct {
+ String gchatmeow.UTF16String
+ Entities BodyRangeList
+}
+
+var DebugLog = func(format string, args ...any) {}
+
+func NewEntityString(val string) *EntityString {
+ DebugLog("NEW %q\n", val)
+ return &EntityString{
+ String: gchatmeow.NewUTF16String(val),
+ }
+}
+
+func (es *EntityString) Split(at uint16) []*EntityString {
+ if at > 0x7F {
+ panic("cannot split at non-ASCII character")
+ }
+ if es == nil {
+ return []*EntityString{}
+ }
+ DebugLog("SPLIT %q %q %+v\n", es.String, rune(at), es.Entities)
+ var output []*EntityString
+ prevSplit := 0
+ doSplit := func(i int) *EntityString {
+ newES := &EntityString{
+ String: es.String[prevSplit:i],
+ }
+ for _, entity := range es.Entities {
+ if (entity.End() <= i || entity.End() > prevSplit) && (entity.Start >= prevSplit || entity.Start < i) {
+ entity = *entity.TruncateStart(prevSplit).TruncateEnd(i).Offset(-prevSplit)
+ if entity.Length > 0 {
+ newES.Entities = append(newES.Entities, entity)
+ }
+ }
+ }
+ return newES
+ }
+ for i, chr := range es.String {
+ if chr != at {
+ continue
+ }
+ newES := doSplit(i)
+ output = append(output, newES)
+ DebugLog(" -> %q %+v\n", newES.String, newES.Entities)
+ prevSplit = i + 1
+ }
+ if prevSplit == 0 {
+ DebugLog(" -> NOOP\n")
+ return []*EntityString{es}
+ }
+ if prevSplit != len(es.String) {
+ newES := doSplit(len(es.String))
+ output = append(output, newES)
+ DebugLog(" -> %q %+v\n", newES.String, newES.Entities)
+ }
+ DebugLog("SPLITEND\n")
+ return output
+}
+
+func (es *EntityString) TrimSpace() *EntityString {
+ if es == nil {
+ return nil
+ }
+ DebugLog("TRIMSPACE %q %+v\n", es.String, es.Entities)
+ cutStart := 0
+ for ; cutStart < len(es.String); cutStart++ {
+ switch es.String[cutStart] {
+ case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
+ continue
+ }
+ break
+ }
+ cutEnd := len(es.String)
+ for ; cutEnd > cutStart; cutEnd-- {
+ switch es.String[cutEnd-1] {
+ case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
+ continue
+ }
+ break
+ }
+ if cutEnd == cutStart {
+ DebugLog(" -> EMPTY\n")
+ return NewEntityString("")
+ }
+ if cutStart == 0 && cutEnd == len(es.String) {
+ DebugLog(" -> NOOP\n")
+ return es
+ }
+ newEntities := es.Entities[:0]
+ for _, ent := range es.Entities {
+ ent = *ent.Offset(-cutStart).TruncateEnd(cutEnd)
+ if ent.Length > 0 {
+ newEntities = append(newEntities, ent)
+ }
+ }
+ es.String = es.String[cutStart:cutEnd]
+ es.Entities = newEntities
+ DebugLog(" -> %q %+v\n", es.String, es.Entities)
+ return es
+}
+
+func JoinEntityString(with string, strings ...*EntityString) *EntityString {
+ withUTF16 := gchatmeow.NewUTF16String(with)
+ totalLen := 0
+ totalEntities := 0
+ for _, s := range strings {
+ totalLen += len(s.String)
+ totalEntities += len(s.Entities)
+ }
+ str := make(gchatmeow.UTF16String, 0, totalLen+len(strings)*len(withUTF16))
+ entities := make(BodyRangeList, 0, totalEntities)
+ DebugLog("JOIN %q %d\n", with, len(strings))
+ for _, s := range strings {
+ if s == nil || len(s.String) == 0 {
+ continue
+ }
+ DebugLog(" + %q %+v\n", s.String, s.Entities)
+ for _, entity := range s.Entities {
+ entity.Start += len(str)
+ entities = append(entities, entity)
+ }
+ str = append(str, s.String...)
+ str = append(str, withUTF16...)
+ }
+ DebugLog(" -> %q %+v\n", str, entities)
+ return &EntityString{
+ String: str,
+ Entities: entities,
+ }
+}
+
+func (es *EntityString) Format(value BodyRangeValue) *EntityString {
+ if es == nil {
+ return nil
+ }
+ newEntity := BodyRange{
+ Start: 0,
+ Length: len(es.String),
+ Value: value,
+ }
+ es.Entities = append(BodyRangeList{newEntity}, es.Entities...)
+ DebugLog("FORMAT %v %q %+v\n", value, es.String, es.Entities)
+ return es
+}
+
+func (es *EntityString) Append(other *EntityString) *EntityString {
+ if es == nil {
+ return other
+ } else if other == nil {
+ return es
+ }
+ DebugLog("APPEND %q %+v\n + %q %+v\n", es.String, es.Entities, other.String, other.Entities)
+ for _, entity := range other.Entities {
+ entity.Start += len(es.String)
+ es.Entities = append(es.Entities, entity)
+ }
+ es.String = append(es.String, other.String...)
+ DebugLog(" -> %q %+v\n", es.String, es.Entities)
+ return es
+}
+
+func (es *EntityString) AppendString(other string) *EntityString {
+ if es == nil {
+ return NewEntityString(other)
+ } else if len(other) == 0 {
+ return es
+ }
+ DebugLog("APPENDSTRING %q %+v\n + %q\n", es.String, es.Entities, other)
+ es.String = append(es.String, gchatmeow.NewUTF16String(other)...)
+ DebugLog(" -> %q %+v\n", es.String, es.Entities)
+ return es
+}
+
+type TagStack []string
+
+func (ts TagStack) Index(tag string) int {
+ for i := len(ts) - 1; i >= 0; i-- {
+ if ts[i] == tag {
+ return i
+ }
+ }
+ return -1
+}
+
+func (ts TagStack) Has(tag string) bool {
+ return ts.Index(tag) >= 0
+}
+
+type Context struct {
+ Ctx context.Context
+ AllowedMentions *event.Mentions
+ TagStack TagStack
+ PreserveWhitespace bool
+}
+
+func NewContext(ctx context.Context) Context {
+ return Context{
+ Ctx: ctx,
+ TagStack: make(TagStack, 0, 4),
+ }
+}
+
+func (ctx Context) WithTag(tag string) Context {
+ ctx.TagStack = append(ctx.TagStack, tag)
+ return ctx
+}
+
+func (ctx Context) WithWhitespace() Context {
+ ctx.PreserveWhitespace = true
+ return ctx
+}
+
+// HTMLParser is a somewhat customizable Matrix HTML parser.
+type HTMLParser struct {
+ // GetUUIDFromMXID func(context.Context, id.UserID) uuid.UUID
+}
+
+// TaggedString is a string that also contains a HTML tag.
+type TaggedString struct {
+ *EntityString
+ tag string
+}
+
+func (parser *HTMLParser) maybeGetAttribute(node *html.Node, attribute string) (string, bool) {
+ for _, attr := range node.Attr {
+ if attr.Key == attribute {
+ return attr.Val, true
+ }
+ }
+ return "", false
+}
+
+func (parser *HTMLParser) getAttribute(node *html.Node, attribute string) string {
+ val, _ := parser.maybeGetAttribute(node, attribute)
+ return val
+}
+
+// Digits counts the number of digits (and the sign, if negative) in an integer.
+func Digits(num int) int {
+ if num == 0 {
+ return 1
+ } else if num < 0 {
+ return Digits(-num) + 1
+ }
+ return int(math.Floor(math.Log10(float64(num))) + 1)
+}
+
+func (parser *HTMLParser) listToString(node *html.Node, ctx Context) *EntityString {
+ ordered := node.Data == "ol"
+ taggedChildren := parser.nodeToTaggedStrings(node.FirstChild, ctx)
+ counter := 1
+ indentLength := 0
+ if ordered {
+ start := parser.getAttribute(node, "start")
+ if len(start) > 0 {
+ counter, _ = strconv.Atoi(start)
+ }
+
+ longestIndex := (counter - 1) + len(taggedChildren)
+ indentLength = Digits(longestIndex)
+ }
+ indent := strings.Repeat(" ", indentLength+2)
+ var children []*EntityString
+ for _, child := range taggedChildren {
+ if child.tag != "li" {
+ continue
+ }
+ var prefix string
+ if ordered {
+ indexPadding := indentLength - Digits(counter)
+ if indexPadding < 0 {
+ // This will happen on negative start indexes where longestIndex is usually wrong, otherwise shouldn't happen
+ indexPadding = 0
+ }
+ prefix = fmt.Sprintf("%d. %s", counter, strings.Repeat(" ", indexPadding))
+ } else {
+ prefix = "* "
+ }
+ es := NewEntityString(prefix).Append(child.EntityString)
+ counter++
+ parts := es.Split('\n')
+ for i, part := range parts[1:] {
+ parts[i+1] = NewEntityString(indent).Append(part)
+ }
+ children = append(children, parts...)
+ }
+ return JoinEntityString("\n", children...)
+}
+
+func (parser *HTMLParser) basicFormatToString(node *html.Node, ctx Context) *EntityString {
+ str := parser.nodeToTagAwareString(node.FirstChild, ctx)
+ switch node.Data {
+ case "b", "strong":
+ return str.Format(StyleBold)
+ case "i", "em":
+ return str.Format(StyleItalic)
+ case "s", "del", "strike":
+ return str.Format(StyleStrikethrough)
+ case "u", "ins":
+ return str
+ case "tt", "code":
+ return str.Format(StyleMonospace)
+ }
+ return str
+}
+
+func (parser *HTMLParser) spanToString(node *html.Node, ctx Context) *EntityString {
+ return parser.nodeToTagAwareString(node.FirstChild, ctx)
+}
+
+func (parser *HTMLParser) headerToString(node *html.Node, ctx Context) *EntityString {
+ length := int(node.Data[1] - '0')
+ prefix := strings.Repeat("#", length) + " "
+ return NewEntityString(prefix).Append(parser.nodeToString(node.FirstChild, ctx)).Format(StyleBold)
+}
+
+func (parser *HTMLParser) blockquoteToString(node *html.Node, ctx Context) *EntityString {
+ str := parser.nodeToTagAwareString(node.FirstChild, ctx)
+ childrenArr := str.TrimSpace().Split('\n')
+ for index, child := range childrenArr {
+ childrenArr[index] = NewEntityString("> ").Append(child)
+ }
+ return JoinEntityString("\n", childrenArr...)
+}
+
+func (parser *HTMLParser) linkToString(node *html.Node, ctx Context) *EntityString {
+ str := parser.nodeToTagAwareString(node.FirstChild, ctx)
+ href := parser.getAttribute(node, "href")
+ if len(href) == 0 {
+ return str
+ }
+ if str.String.String() == href {
+ return str
+ }
+ return str.AppendString(fmt.Sprintf(" (%s)", href))
+}
+
+func (parser *HTMLParser) tagToString(node *html.Node, ctx Context) *EntityString {
+ ctx = ctx.WithTag(node.Data)
+ switch node.Data {
+ case "blockquote":
+ return parser.blockquoteToString(node, ctx)
+ case "ol", "ul":
+ return parser.listToString(node, ctx)
+ case "h1", "h2", "h3", "h4", "h5", "h6":
+ return parser.headerToString(node, ctx)
+ case "br":
+ return NewEntityString("\n")
+ case "b", "strong", "i", "em", "s", "strike", "del", "u", "ins", "tt", "code":
+ return parser.basicFormatToString(node, ctx)
+ case "span", "font":
+ return parser.spanToString(node, ctx)
+ case "a":
+ return parser.linkToString(node, ctx)
+ case "p":
+ return parser.nodeToTagAwareString(node.FirstChild, ctx)
+ case "hr":
+ return NewEntityString("---")
+ case "pre":
+ var preStr *EntityString
+ if node.FirstChild != nil && node.FirstChild.Type == html.ElementNode && node.FirstChild.Data == "code" {
+ preStr = parser.nodeToString(node.FirstChild.FirstChild, ctx.WithWhitespace())
+ } else {
+ preStr = parser.nodeToString(node.FirstChild, ctx.WithWhitespace())
+ }
+ return preStr.Format(StyleMonospace)
+ default:
+ return parser.nodeToTagAwareString(node.FirstChild, ctx)
+ }
+}
+
+func (parser *HTMLParser) singleNodeToString(node *html.Node, ctx Context) TaggedString {
+ switch node.Type {
+ case html.TextNode:
+ if !ctx.PreserveWhitespace {
+ node.Data = strings.Replace(node.Data, "\n", "", -1)
+ }
+ return TaggedString{NewEntityString(node.Data), "text"}
+ case html.ElementNode:
+ return TaggedString{parser.tagToString(node, ctx), node.Data}
+ case html.DocumentNode:
+ return TaggedString{parser.nodeToTagAwareString(node.FirstChild, ctx), "html"}
+ default:
+ return TaggedString{&EntityString{}, "unknown"}
+ }
+}
+
+func (parser *HTMLParser) nodeToTaggedStrings(node *html.Node, ctx Context) (strs []TaggedString) {
+ for ; node != nil; node = node.NextSibling {
+ strs = append(strs, parser.singleNodeToString(node, ctx))
+ }
+ return
+}
+
+var BlockTags = []string{"p", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul", "pre", "blockquote", "div", "hr", "table"}
+
+func (parser *HTMLParser) isBlockTag(tag string) bool {
+ for _, blockTag := range BlockTags {
+ if tag == blockTag {
+ return true
+ }
+ }
+ return false
+}
+
+func (parser *HTMLParser) nodeToTagAwareString(node *html.Node, ctx Context) *EntityString {
+ strs := parser.nodeToTaggedStrings(node, ctx)
+ var output *EntityString
+ for _, str := range strs {
+ tstr := str.EntityString
+ if parser.isBlockTag(str.tag) {
+ tstr = NewEntityString("\n").Append(tstr).AppendString("\n")
+ }
+ if output == nil {
+ output = tstr
+ } else {
+ output = output.Append(tstr)
+ }
+ }
+ return output.TrimSpace()
+}
+
+func (parser *HTMLParser) nodeToStrings(node *html.Node, ctx Context) (strs []*EntityString) {
+ for ; node != nil; node = node.NextSibling {
+ strs = append(strs, parser.singleNodeToString(node, ctx).EntityString)
+ }
+ return
+}
+
+func (parser *HTMLParser) nodeToString(node *html.Node, ctx Context) *EntityString {
+ return JoinEntityString("", parser.nodeToStrings(node, ctx)...)
+}
+
+// Parse converts Matrix HTML into text using the settings in this parser.
+func (parser *HTMLParser) Parse(htmlData string, ctx Context) *EntityString {
+ node, _ := html.Parse(strings.NewReader(htmlData))
+ return parser.nodeToTagAwareString(node, ctx)
+}
diff --git a/pkg/msgconv/matrixfmt/tags.go b/pkg/msgconv/matrixfmt/tags.go
new file mode 100644
index 0000000..8f45c26
--- /dev/null
+++ b/pkg/msgconv/matrixfmt/tags.go
@@ -0,0 +1,40 @@
+package matrixfmt
+
+import (
+ "fmt"
+
+ "go.mau.fi/mautrix-googlechat/pkg/gchatmeow/proto"
+)
+
+type BodyRangeValue interface {
+ String() string
+ Format(message string) string
+ Proto() proto.FormatMetadata_FormatType
+}
+
+type Style int
+
+const (
+ StyleNone Style = iota
+ StyleBold
+ StyleItalic
+ StyleStrikethrough
+ StyleSourceCode
+ StyleMonospace // 5
+ StyleHidden
+ StyleMonospaceBlock
+ StyleUnderline
+ StyleFontColor
+)
+
+func (s Style) Proto() proto.FormatMetadata_FormatType {
+ return proto.FormatMetadata_FormatType(s)
+}
+
+func (s Style) String() string {
+ return fmt.Sprintf("Style(%d)", s)
+}
+
+func (s Style) Format(message string) string {
+ return message
+}
diff --git a/pkg/msgconv/matrixfmt/tree.go b/pkg/msgconv/matrixfmt/tree.go
new file mode 100644
index 0000000..70a58b2
--- /dev/null
+++ b/pkg/msgconv/matrixfmt/tree.go
@@ -0,0 +1,101 @@
+package matrixfmt
+
+import (
+ "fmt"
+ "sort"
+
+ "go.mau.fi/mautrix-googlechat/pkg/gchatmeow/proto"
+ "go.mau.fi/mautrix-googlechat/pkg/msgconv/gchatfmt"
+)
+
+type BodyRange struct {
+ Start int
+ Length int
+ Value BodyRangeValue
+}
+
+type BodyRangeList []BodyRange
+
+var _ sort.Interface = BodyRangeList(nil)
+
+func (b BodyRangeList) Len() int {
+ return len(b)
+}
+
+func (b BodyRangeList) Less(i, j int) bool {
+ return b[i].Start < b[j].Start || b[i].Length > b[j].Length
+}
+
+func (b BodyRangeList) Swap(i, j int) {
+ b[i], b[j] = b[j], b[i]
+}
+
+func (b BodyRange) String() string {
+ return fmt.Sprintf("%d:%d:%v", b.Start, b.Length, b.Value)
+}
+
+// End returns the end index of the range.
+func (b BodyRange) End() int {
+ return b.Start + b.Length
+}
+
+// Offset changes the start of the range without affecting the length.
+func (b BodyRange) Offset(offset int) *BodyRange {
+ b.Start += offset
+ return &b
+}
+
+// TruncateStart changes the length of the range, so it starts at the given index and ends at the same index as before.
+func (b BodyRange) TruncateStart(startAt int) *BodyRange {
+ if b.Start < startAt {
+ b.Length -= startAt - b.Start
+ b.Start = startAt
+ }
+ return &b
+}
+
+// TruncateEnd changes the length of the range, so it ends at or before the given index and starts at the same index as before.
+func (b BodyRange) TruncateEnd(maxEnd int) *BodyRange {
+ if b.End() > maxEnd {
+ b.Length = maxEnd - b.Start
+ }
+ return &b
+}
+
+func (b BodyRange) Proto() *proto.Annotation {
+ return gchatfmt.MakeAnnotation(int32(b.Start), int32(b.Length), b.Value.Proto())
+}
+
+// LinkedRangeTree is a linked tree of formatting entities.
+//
+// It's meant to parse a list of Signal body ranges into nodes that either overlap completely or not at all,
+// which enables more natural conversion to HTML.
+type LinkedRangeTree struct {
+ Node *BodyRange
+ Sibling *LinkedRangeTree
+ Child *LinkedRangeTree
+}
+
+func ptrAdd(to **LinkedRangeTree, r *BodyRange) {
+ if *to == nil {
+ *to = &LinkedRangeTree{}
+ }
+ (*to).Add(r)
+}
+
+// Add adds the given formatting entity to this tree.
+func (lrt *LinkedRangeTree) Add(r *BodyRange) {
+ if lrt.Node == nil {
+ lrt.Node = r
+ return
+ }
+ lrtEnd := lrt.Node.End()
+ if r.Start >= lrtEnd {
+ ptrAdd(&lrt.Sibling, r.Offset(-lrtEnd))
+ return
+ }
+ if r.End() > lrtEnd {
+ ptrAdd(&lrt.Sibling, r.TruncateStart(lrtEnd).Offset(-lrtEnd))
+ }
+ ptrAdd(&lrt.Child, r.TruncateEnd(lrtEnd).Offset(-lrt.Node.Start))
+}