diff --git a/src/encoding/xml/marshal.go b/src/encoding/xml/marshal.go index 8c6342013d3304..9d6045c916a920 100644 --- a/src/encoding/xml/marshal.go +++ b/src/encoding/xml/marshal.go @@ -179,17 +179,24 @@ var ( ) // EncodeToken writes the given XML token to the stream. -// It returns an error if StartElement and EndElement tokens are not properly matched. +// It returns an error if StartElement and EndElement tokens are not +// properly matched. // -// EncodeToken does not call Flush, because usually it is part of a larger operation -// such as Encode or EncodeElement (or a custom Marshaler's MarshalXML invoked -// during those), and those will call Flush when finished. -// Callers that create an Encoder and then invoke EncodeToken directly, without -// using Encode or EncodeElement, need to call Flush when finished to ensure -// that the XML is written to the underlying writer. +// EncodeToken does not call Flush, because usually it is part of a +// larger operation such as Encode or EncodeElement (or a custom +// Marshaler's MarshalXML invoked during those), and those will call +// Flush when finished. Callers that create an Encoder and then invoke +// EncodeToken directly, without using Encode or EncodeElement, need to +// call Flush when finished to ensure that the XML is written to the +// underlying writer. // -// EncodeToken allows writing a ProcInst with Target set to "xml" only as the first token -// in the stream. +// EncodeToken allows writing a ProcInst with Target set to "xml" only +// as the first token in the stream. +// +// When encoding a StartElement holding an XML namespace prefix +// declaration for a prefix that is not already declared, contained +// elements (including the StartElement itself) will use the declared +// prefix when encoding names with matching namespace URIs. func (enc *Encoder) EncodeToken(t Token) error { p := &enc.p switch t := t.(type) { @@ -256,19 +263,27 @@ type printer struct { depth int indentedIn bool putNewline bool + defaultNS string attrNS map[string]string // map prefix -> name space attrPrefix map[string]string // map name space -> prefix - prefixes []string + prefixes []printerPrefix tags []Name } -// createAttrPrefix finds the name space prefix attribute to use for the given name space, -// defining a new prefix if necessary. It returns the prefix. -func (p *printer) createAttrPrefix(url string) string { - if prefix := p.attrPrefix[url]; prefix != "" { - return prefix - } +// printerPrefix holds a namespace undo record. +// When an element is popped, the prefix record +// is set back to the recorded URL. The empty +// prefix records the URL for the default name space. +// +// The start of an element is recorded with an element +// that has mark=true. +type printerPrefix struct { + prefix string + url string + mark bool +} +func (p *printer) prefixForNS(url string, isAttr bool) string { // The "http://www.w3.org/XML/1998/namespace" name space is predefined as "xml" // and must be referred to that way. // (The "http://www.w3.org/2000/xmlns/" name space is also predefined as "xmlns", @@ -276,12 +291,97 @@ func (p *printer) createAttrPrefix(url string) string { if url == xmlURL { return "xml" } + if !isAttr && url == p.defaultNS { + // We can use the default name space. + return "" + } + return p.attrPrefix[url] +} - // Need to define a new name space. - if p.attrPrefix == nil { - p.attrPrefix = make(map[string]string) - p.attrNS = make(map[string]string) +// defineNS pushes any namespace definition found in the given attribute. +// If ignoreNonEmptyDefault is true, an xmlns="nonempty" +// attribute will be ignored. +func (p *printer) defineNS(attr Attr, ignoreNonEmptyDefault bool) error { + var prefix string + if attr.Name.Local == "xmlns" { + if attr.Name.Space != "" && attr.Name.Space != "xml" && attr.Name.Space != xmlURL { + return fmt.Errorf("xml: cannot redefine xmlns attribute prefix") + } + } else if attr.Name.Space == "xmlns" && attr.Name.Local != "" { + prefix = attr.Name.Local + if attr.Value == "" { + // Technically, an empty XML namespace is allowed for an attribute. + // From http://www.w3.org/TR/xml-names11/#scoping-defaulting: + // + // The attribute value in a namespace declaration for a prefix may be + // empty. This has the effect, within the scope of the declaration, of removing + // any association of the prefix with a namespace name. + // + // However our namespace prefixes here are used only as hints. There's + // no need to respect the removal of a namespace prefix, so we ignore it. + return nil + } + } else { + // Ignore: it's not a namespace definition + return nil + } + if prefix == "" { + if attr.Value == p.defaultNS { + // No need for redefinition. + return nil + } + if attr.Value != "" && ignoreNonEmptyDefault { + // We have an xmlns="..." value but + // it can't define a name space in this context, + // probably because the element has an empty + // name space. In this case, we just ignore + // the name space declaration. + return nil + } + } else if _, ok := p.attrPrefix[attr.Value]; ok { + // There's already a prefix for the given name space, + // so use that. This prevents us from + // having two prefixes for the same name space + // so attrNS and attrPrefix can remain bijective. + return nil } + p.pushPrefix(prefix, attr.Value) + return nil +} + +// createNSPrefix creates a name space prefix attribute +// to use for the given name space, defining a new prefix +// if necessary. +// If isAttr is true, the prefix is to be created for an attribute +// prefix, which means that the default name space cannot +// be used. +func (p *printer) createNSPrefix(url string, isAttr bool) { + if _, ok := p.attrPrefix[url]; ok { + // We already have a prefix for the given URL. + return + } + switch { + case !isAttr && url == p.defaultNS: + // We can use the default name space. + return + case url == "": + // The only way we can encode names in the empty + // name space is by using the default name space, + // so we must use that. + if p.defaultNS != "" { + // The default namespace is non-empty, so we + // need to set it to empty. + p.pushPrefix("", "") + } + return + case url == xmlURL: + return + } + // TODO If the URL is an existing prefix, we could + // use it as is. That would enable the + // marshaling of elements that had been unmarshaled + // and with a name space prefix that was not found. + // although technically it would be incorrect. // Pick a name. We try to use the final element of the path // but fall back to _. @@ -306,39 +406,98 @@ func (p *printer) createAttrPrefix(url string) string { } } - p.attrPrefix[url] = prefix - p.attrNS[prefix] = url - - p.WriteString(`xmlns:`) - p.WriteString(prefix) - p.WriteString(`="`) - EscapeText(p, []byte(url)) - p.WriteString(`" `) + p.pushPrefix(prefix, url) +} - p.prefixes = append(p.prefixes, prefix) +// writeNamespaces writes xmlns attributes for all the +// namespace prefixes that have been defined in +// the current element. +func (p *printer) writeNamespaces() { + for i := len(p.prefixes) - 1; i >= 0; i-- { + prefix := p.prefixes[i] + if prefix.mark { + return + } + p.WriteString(" ") + if prefix.prefix == "" { + // Default name space. + p.WriteString(`xmlns="`) + } else { + p.WriteString("xmlns:") + p.WriteString(prefix.prefix) + p.WriteString(`="`) + } + EscapeText(p, []byte(p.nsForPrefix(prefix.prefix))) + p.WriteString(`"`) + } +} - return prefix +// pushPrefix pushes a new prefix on the prefix stack +// without checking to see if it is already defined. +func (p *printer) pushPrefix(prefix, url string) { + p.prefixes = append(p.prefixes, printerPrefix{ + prefix: prefix, + url: p.nsForPrefix(prefix), + }) + p.setAttrPrefix(prefix, url) } -// deleteAttrPrefix removes an attribute name space prefix. -func (p *printer) deleteAttrPrefix(prefix string) { - delete(p.attrPrefix, p.attrNS[prefix]) - delete(p.attrNS, prefix) +// nsForPrefix returns the name space for the given +// prefix. Note that this is not valid for the +// empty attribute prefix, which always has an empty +// name space. +func (p *printer) nsForPrefix(prefix string) string { + if prefix == "" { + return p.defaultNS + } + return p.attrNS[prefix] } +// markPrefix marks the start of an element on the prefix +// stack. func (p *printer) markPrefix() { - p.prefixes = append(p.prefixes, "") + p.prefixes = append(p.prefixes, printerPrefix{ + mark: true, + }) } +// popPrefix pops all defined prefixes for the current +// element. func (p *printer) popPrefix() { for len(p.prefixes) > 0 { prefix := p.prefixes[len(p.prefixes)-1] p.prefixes = p.prefixes[:len(p.prefixes)-1] - if prefix == "" { + if prefix.mark { break } - p.deleteAttrPrefix(prefix) + p.setAttrPrefix(prefix.prefix, prefix.url) + } +} + +// setAttrPrefix sets an attribute name space prefix. +// If url is empty, the attribute is removed. +// If prefix is empty, the default name space is set. +func (p *printer) setAttrPrefix(prefix, url string) { + if prefix == "" { + p.defaultNS = url + return + } + if url == "" { + delete(p.attrPrefix, p.attrNS[prefix]) + delete(p.attrNS, prefix) + return + } + if p.attrPrefix == nil { + // Need to define a new name space. + p.attrPrefix = make(map[string]string) + p.attrNS = make(map[string]string) } + // Remove any old prefix value. This is OK because we maintain a + // strict one-to-one mapping between prefix and URL (see + // defineNS) + delete(p.attrPrefix, p.attrNS[prefix]) + p.attrPrefix[url] = prefix + p.attrNS[prefix] = url } var ( @@ -376,23 +535,23 @@ func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo, startTemplat // Check for marshaler. if val.CanInterface() && typ.Implements(marshalerType) { - return p.marshalInterface(val.Interface().(Marshaler), defaultStart(typ, finfo, startTemplate)) + return p.marshalInterface(val.Interface().(Marshaler), p.defaultStart(typ, finfo, startTemplate)) } if val.CanAddr() { pv := val.Addr() if pv.CanInterface() && pv.Type().Implements(marshalerType) { - return p.marshalInterface(pv.Interface().(Marshaler), defaultStart(pv.Type(), finfo, startTemplate)) + return p.marshalInterface(pv.Interface().(Marshaler), p.defaultStart(pv.Type(), finfo, startTemplate)) } } // Check for text marshaler. if val.CanInterface() && typ.Implements(textMarshalerType) { - return p.marshalTextInterface(val.Interface().(encoding.TextMarshaler), defaultStart(typ, finfo, startTemplate)) + return p.marshalTextInterface(val.Interface().(encoding.TextMarshaler), p.defaultStart(typ, finfo, startTemplate)) } if val.CanAddr() { pv := val.Addr() if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { - return p.marshalTextInterface(pv.Interface().(encoding.TextMarshaler), defaultStart(pv.Type(), finfo, startTemplate)) + return p.marshalTextInterface(pv.Interface().(encoding.TextMarshaler), p.defaultStart(pv.Type(), finfo, startTemplate)) } } @@ -419,6 +578,10 @@ func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo, startTemplat // 3. type name var start StartElement + // Historic behaviour: elements use the default name space + // they are contained in by default. + start.Name.Space = p.defaultNS + if startTemplate != nil { start.Name = startTemplate.Name start.Attr = append(start.Attr, startTemplate.Attr...) @@ -431,7 +594,10 @@ func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo, startTemplat } } if start.Name.Local == "" && finfo != nil { - start.Name.Space, start.Name.Local = finfo.xmlns, finfo.name + start.Name.Local = finfo.name + if finfo.xmlns != "" { + start.Name.Space = finfo.xmlns + } } if start.Name.Local == "" { name := typ.Name() @@ -440,6 +606,9 @@ func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo, startTemplat } start.Name.Local = name } + // Historic behaviour: an element that's in a namespace sets + // the default namespace for all elements contained within it. + start.setDefaultNamespace() // Attributes for i := range tinfo.fields { @@ -552,7 +721,7 @@ func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo, startTemplat // defaultStart returns the default start element to use, // given the reflect type, field info, and start template. -func defaultStart(typ reflect.Type, finfo *fieldInfo, startTemplate *StartElement) StartElement { +func (p *printer) defaultStart(typ reflect.Type, finfo *fieldInfo, startTemplate *StartElement) StartElement { var start StartElement // Precedence for the XML element name is as above, // except that we do not look inside structs for the first field. @@ -569,6 +738,12 @@ func defaultStart(typ reflect.Type, finfo *fieldInfo, startTemplate *StartElemen // since it has the Marshaler methods. start.Name.Local = typ.Elem().Name() } + // Historic behaviour: elements use the name space of + // the element they are contained in by default. + if start.Name.Space == "" { + start.Name.Space = p.defaultNS + } + start.setDefaultNamespace() return start } @@ -613,29 +788,44 @@ func (p *printer) writeStart(start *StartElement) error { p.tags = append(p.tags, start.Name) p.markPrefix() + // Define any name spaces explicitly declared in the attributes. + // We do this as a separate pass so that explicitly declared prefixes + // will take precedence over implicitly declared prefixes + // regardless of the order of the attributes. + ignoreNonEmptyDefault := start.Name.Space == "" + for _, attr := range start.Attr { + if err := p.defineNS(attr, ignoreNonEmptyDefault); err != nil { + return err + } + } + // Define any new name spaces implied by the attributes. + for _, attr := range start.Attr { + name := attr.Name + // From http://www.w3.org/TR/xml-names11/#defaulting + // "Default namespace declarations do not apply directly + // to attribute names; the interpretation of unprefixed + // attributes is determined by the element on which they + // appear." + // This means we don't need to create a new namespace + // when an attribute name space is empty. + if name.Space != "" && !name.isNamespace() { + p.createNSPrefix(name.Space, true) + } + } + p.createNSPrefix(start.Name.Space, false) p.writeIndent(1) p.WriteByte('<') - p.WriteString(start.Name.Local) - - if start.Name.Space != "" { - p.WriteString(` xmlns="`) - p.EscapeString(start.Name.Space) - p.WriteByte('"') - } - - // Attributes + p.writeName(start.Name, false) + p.writeNamespaces() for _, attr := range start.Attr { name := attr.Name - if name.Local == "" { + if name.Local == "" || name.isNamespace() { + // Namespaces have already been written by writeNamespaces above. continue } p.WriteByte(' ') - if name.Space != "" { - p.WriteString(p.createAttrPrefix(name.Space)) - p.WriteByte(':') - } - p.WriteString(name.Local) + p.writeName(name, true) p.WriteString(`="`) p.EscapeString(attr.Value) p.WriteByte('"') @@ -644,6 +834,16 @@ func (p *printer) writeStart(start *StartElement) error { return nil } +// writeName writes the given name. It assumes +// that p.createNSPrefix(name) has already been called. +func (p *printer) writeName(name Name, isAttr bool) { + if prefix := p.prefixForNS(name.Space, isAttr); prefix != "" { + p.WriteString(prefix) + p.WriteByte(':') + } + p.WriteString(name.Local) +} + func (p *printer) writeEnd(name Name) error { if name.Local == "" { return fmt.Errorf("xml: end tag with no name") @@ -662,7 +862,7 @@ func (p *printer) writeEnd(name Name) error { p.writeIndent(-1) p.WriteByte('<') p.WriteByte('/') - p.WriteString(name.Local) + p.writeName(name, false) p.WriteByte('>') p.popPrefix() return nil diff --git a/src/encoding/xml/marshal_test.go b/src/encoding/xml/marshal_test.go index d8507ce81401e5..cc6994338da760 100644 --- a/src/encoding/xml/marshal_test.go +++ b/src/encoding/xml/marshal_test.go @@ -1203,7 +1203,7 @@ var encodeTokenTests = []struct { toks: []Token{ StartElement{Name{"space", "local"}, nil}, }, - want: ``, + want: ``, }, { desc: "start element with no name", toks: []Token{ @@ -1291,7 +1291,7 @@ var encodeTokenTests = []struct { EndElement{Name{"another", "foo"}}, }, err: "xml: end tag in namespace another does not match start tag in namespace space", - want: ``, + want: ``, }, { desc: "start element with explicit namespace", toks: []Token{ @@ -1300,7 +1300,7 @@ var encodeTokenTests = []struct { {Name{"space", "foo"}, "value"}, }}, }, - want: ``, + want: ``, }, { desc: "start element with explicit namespace and colliding prefix", toks: []Token{ @@ -1310,7 +1310,7 @@ var encodeTokenTests = []struct { {Name{"x", "bar"}, "other"}, }}, }, - want: ``, + want: ``, }, { desc: "start element using previously defined namespace", toks: []Token{ @@ -1321,7 +1321,7 @@ var encodeTokenTests = []struct { {Name{"space", "x"}, "y"}, }}, }, - want: ``, + want: ``, }, { desc: "nested name space with same prefix", toks: []Token{ @@ -1342,7 +1342,7 @@ var encodeTokenTests = []struct { {Name{"space2", "b"}, "space2 value"}, }}, }, - want: ``, + want: ``, }, { desc: "start element defining several prefixes for the same name space", toks: []Token{ @@ -1352,7 +1352,7 @@ var encodeTokenTests = []struct { {Name{"space", "x"}, "value"}, }}, }, - want: ``, + want: ``, }, { desc: "nested element redefines name space", toks: []Token{ @@ -1364,7 +1364,7 @@ var encodeTokenTests = []struct { {Name{"space", "a"}, "value"}, }}, }, - want: ``, + want: ``, }, { desc: "nested element creates alias for default name space", toks: []Token{ @@ -1376,7 +1376,7 @@ var encodeTokenTests = []struct { {Name{"space", "a"}, "value"}, }}, }, - want: ``, + want: ``, }, { desc: "nested element defines default name space with existing prefix", toks: []Token{ @@ -1388,7 +1388,7 @@ var encodeTokenTests = []struct { {Name{"space", "a"}, "value"}, }}, }, - want: ``, + want: ``, }, { desc: "nested element uses empty attribute name space when default ns defined", toks: []Token{ @@ -1399,7 +1399,7 @@ var encodeTokenTests = []struct { {Name{"", "attr"}, "value"}, }}, }, - want: ``, + want: ``, }, { desc: "redefine xmlns", toks: []Token{ @@ -1407,7 +1407,7 @@ var encodeTokenTests = []struct { {Name{"foo", "xmlns"}, "space"}, }}, }, - want: ``, + err: `xml: cannot redefine xmlns attribute prefix`, }, { desc: "xmlns with explicit name space #1", toks: []Token{ @@ -1415,7 +1415,7 @@ var encodeTokenTests = []struct { {Name{"xml", "xmlns"}, "space"}, }}, }, - want: ``, + want: ``, }, { desc: "xmlns with explicit name space #2", toks: []Token{ @@ -1423,7 +1423,7 @@ var encodeTokenTests = []struct { {Name{xmlURL, "xmlns"}, "space"}, }}, }, - want: ``, + want: ``, }, { desc: "empty name space declaration is ignored", toks: []Token{ @@ -1431,7 +1431,7 @@ var encodeTokenTests = []struct { {Name{"xmlns", "foo"}, ""}, }}, }, - want: ``, + want: ``, }, { desc: "attribute with no name is ignored", toks: []Token{ @@ -1447,7 +1447,7 @@ var encodeTokenTests = []struct { {Name{"/34", "x"}, "value"}, }}, }, - want: ``, + want: `<_:foo xmlns:_="/34" _:x="value">`, }, { desc: "nested element resets default namespace to empty", toks: []Token{ @@ -1460,7 +1460,7 @@ var encodeTokenTests = []struct { {Name{"space", "x"}, "value"}, }}, }, - want: ``, + want: ``, }, { desc: "nested element requires empty default name space", toks: []Token{ @@ -1469,7 +1469,7 @@ var encodeTokenTests = []struct { }}, StartElement{Name{"", "foo"}, nil}, }, - want: ``, + want: ``, }, { desc: "attribute uses name space from xmlns", toks: []Token{ @@ -1478,7 +1478,7 @@ var encodeTokenTests = []struct { {Name{"some/space", "other"}, "other value"}, }}, }, - want: ``, + want: ``, }, { desc: "default name space should not be used by attributes", toks: []Token{ @@ -1491,7 +1491,7 @@ var encodeTokenTests = []struct { EndElement{Name{"space", "baz"}}, EndElement{Name{"space", "foo"}}, }, - want: ``, + want: ``, }, { desc: "default name space not used by attributes, not explicitly defined", toks: []Token{ @@ -1503,7 +1503,7 @@ var encodeTokenTests = []struct { EndElement{Name{"space", "baz"}}, EndElement{Name{"space", "foo"}}, }, - want: ``, + want: ``, }, { desc: "impossible xmlns declaration", toks: []Token{ @@ -1514,7 +1514,7 @@ var encodeTokenTests = []struct { {Name{"space", "attr"}, "value"}, }}, }, - want: ``, + want: ``, }} func TestEncodeToken(t *testing.T) { diff --git a/src/encoding/xml/read_test.go b/src/encoding/xml/read_test.go index 7d004dc488cdfe..02f1e10c330add 100644 --- a/src/encoding/xml/read_test.go +++ b/src/encoding/xml/read_test.go @@ -5,6 +5,8 @@ package xml import ( + "bytes" + "fmt" "io" "reflect" "strings" @@ -484,6 +486,34 @@ func TestUnmarshalNS(t *testing.T) { } } +func TestRoundTrip(t *testing.T) { + // From issue 7535 + const s = `` + in := bytes.NewBufferString(s) + for i := 0; i < 10; i++ { + out := &bytes.Buffer{} + d := NewDecoder(in) + e := NewEncoder(out) + + for { + t, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + fmt.Println("failed:", err) + return + } + e.EncodeToken(t) + } + e.Flush() + in = out + } + if got := in.String(); got != s { + t.Errorf("have: %q\nwant: %q\n", got, s) + } +} + func TestMarshalNS(t *testing.T) { dst := Tables{"hello", "world"} data, err := Marshal(&dst) @@ -607,7 +637,7 @@ func TestMarshalNSAttr(t *testing.T) { if err != nil { t.Fatalf("Marshal: %v", err) } - want := `` + want := `` str := string(data) if str != want { t.Errorf("Marshal:\nhave: %#q\nwant: %#q\n", str, want) diff --git a/src/encoding/xml/xml.go b/src/encoding/xml/xml.go index e9535d7b55f88f..0c64cd730d6b42 100644 --- a/src/encoding/xml/xml.go +++ b/src/encoding/xml/xml.go @@ -35,15 +35,24 @@ func (e *SyntaxError) Error() string { return "XML syntax error on line " + strconv.Itoa(e.Line) + ": " + e.Msg } -// A Name represents an XML name (Local) annotated -// with a name space identifier (Space). -// In tokens returned by Decoder.Token, the Space identifier -// is given as a canonical URL, not the short prefix used -// in the document being parsed. +// A Name represents an XML name (Local) annotated with a name space +// identifier (Space). In tokens returned by Decoder.Token, the Space +// identifier is given as a canonical URL, not the short prefix used in +// the document being parsed. +// +// As a special case, XML namespace declarations will use the literal +// string "xmlns" for the Space field instead of the fully resolved URL. +// See Encoder.EncodeToken for more information on namespace encoding +// behaviour. type Name struct { Space, Local string } +// isNamespace reports whether the name is a namespace-defining name. +func (name Name) isNamespace() bool { + return name.Local == "xmlns" || name.Space == "xmlns" +} + // An Attr represents an attribute in an XML element (Name=Value). type Attr struct { Name Name @@ -72,6 +81,24 @@ func (e StartElement) End() EndElement { return EndElement{e.Name} } +// setDefaultNamespace sets the namespace of the element +// as the default for all elements contained within it. +func (e *StartElement) setDefaultNamespace() { + if e.Name.Space == "" { + // If there's no namespace on the element, don't + // set the default. Strictly speaking this might be wrong, as + // we can't tell if the element had no namespace set + // or was just using the default namespace. + return + } + e.Attr = append(e.Attr, Attr{ + Name: Name{ + Local: "xmlns", + }, + Value: e.Name.Space, + }) +} + // An EndElement represents an XML end element. type EndElement struct { Name Name