pax_global_header00006660000000000000000000000064131314416600014511gustar00rootroot0000000000000052 comment=652b277a9313806ef04f7c0cb0205dd455c4f344 goxml2json-1.1.0/000077500000000000000000000000001313144166000136125ustar00rootroot00000000000000goxml2json-1.1.0/.gitignore000066400000000000000000000004211313144166000155770ustar00rootroot00000000000000# Compiled Object files, Static and Dynamic libs (Shared Objects) *.o *.a *.so # Folders _obj _test # Architecture specific extensions/prefixes *.[568vq] [568vq].out *.cgo1.go *.cgo2.c _cgo_defun.c _cgo_gotypes.go _cgo_export.* _testmain.go *.exe *.test *.prof /.tags goxml2json-1.1.0/LICENSE000066400000000000000000000020711313144166000146170ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2016 Bastien Gysler Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. goxml2json-1.1.0/README.md000066400000000000000000000030341313144166000150710ustar00rootroot00000000000000# goxml2json [![CircleCI](https://circleci.com/gh/basgys/goxml2json.svg?style=svg)](https://circleci.com/gh/basgys/goxml2json) Go package that converts XML to JSON ### Install go get -u github.com/basgys/goxml2json ### Importing import github.com/basgys/goxml2json ### Usage **Code example** ```go package main import ( "fmt" "strings" xj "github.com/basgys/goxml2json" ) func main() { // xml is an io.Reader xml := strings.NewReader(`world`) json, err := xj.Convert(xml) if err != nil { panic("That's embarrassing...") } fmt.Println(json.String()) // {"hello": "world"} } ``` **Input** ```xml bar ``` **Output** ```json { "osm": { "-version": "0.6", "-generator": "CGImap 0.0.2", "bounds": { "-minlat": "54.0889580", "-minlon": "12.2487570", "-maxlat": "54.0913900", "-maxlon": "12.2524800" }, "foo": "bar" } } ``` ### Contributing Feel free to contribute to this project if you want to fix/extend/improve it. ### Contributors - [DirectX](https://github.com/directx) - [samuelhug](https://github.com/samuelhug) ### TODO * Extract data types in JSON (numbers, boolean, ...) * Categorise errors * Option to prettify the JSON output * Benchmark goxml2json-1.1.0/converter.go000066400000000000000000000006311313144166000161500ustar00rootroot00000000000000package xml2json import ( "bytes" "io" ) // Convert converts the given XML document to JSON func Convert(r io.Reader) (*bytes.Buffer, error) { // Decode XML document root := &Node{} err := NewDecoder(r).Decode(root) if err != nil { return nil, err } // Then encode it in JSON buf := new(bytes.Buffer) err = NewEncoder(buf).Encode(root) if err != nil { return nil, err } return buf, nil } goxml2json-1.1.0/converter_test.go000066400000000000000000000132601313144166000172110ustar00rootroot00000000000000package xml2json import ( "strings" "testing" sj "github.com/bitly/go-simplejson" "github.com/stretchr/testify/assert" ) // TestConvert ensures that the whole process works correctly // It takes an XML document and outputs a JSON document func TestConvert(t *testing.T) { assert := assert.New(t) s := ` bar content ` // Build SimpleJSON json, err := sj.NewJson([]byte(`{ "osm": { "-version": "0.6", "-generator": "CGImap 0.0.2", "bounds": { "-minlat": "54.0889580", "-minlon": "12.2487570", "-maxlat": "54.0913900", "-maxlon": "12.2524800" }, "node": [ { "-id": "298884269", "-lat": "54.0901746", "-lon": "12.2482632", "-user": "SvenHRO", "-uid": "46882", "-visible": "true", "-version": "1", "-changeset": "676636", "-timestamp": "2008-09-21T21:37:45Z" }, { "-id": "261728686", "-lat": "54.0906309", "-lon": "12.2441924", "-user": "PikoWinter", "-uid": "36744", "-visible": "true", "-version": "1", "-changeset": "323878", "-timestamp": "2008-05-03T13:39:23Z" }, { "-id": "1831881213", "-version": "1", "-changeset": "12370172", "-lat": "54.0900666", "-lon": "12.2539381", "-user": "lafkor", "-uid": "75625", "-visible": "true", "-timestamp": "2012-07-20T09:43:19Z", "tag": [ { "-k": "name", "-v": "Neu Broderstorf" }, { "-k": "traffic_sign", "-v": "city_limit" } ] } ], "foo": "bar", "mixed": { "-attr": "attribute", "#content": "content" } } }`)) assert.NoError(err) expected, err := json.MarshalJSON() assert.NoError(err) // Then encode it in JSON res, err := Convert(strings.NewReader(s)) assert.NoError(err) // Assertion assert.JSONEq(string(expected), res.String(), "Drumroll") } func TestConvertWithNewLines(t *testing.T) { assert := assert.New(t) s := ` foo bar ` // Build SimpleJSON json, err := sj.NewJson([]byte(`{ "osm": { "foo": "foo\n\n\t\tbar" } }`)) assert.NoError(err) expected, err := json.MarshalJSON() assert.NoError(err) // Then encode it in JSON res, err := Convert(strings.NewReader(s)) assert.NoError(err) // Assertion assert.JSONEq(string(expected), res.String(), "Drumroll") } func TestConvertWithMixedTags(t *testing.T) { assert := assert.New(t) s := ` Shared/IDL:IceSess\/SessMgr:1\.0.IDL/Common/!ICESMS\/ACPCRTC!ICESMSLB\/CRT.LB!-3379045898978075261!1563026!0 ` // Build SimpleJSON json, err := sj.NewJson([]byte(`{ "Envelope": { "Header": { "Security": { "-wsse": "http://schemas.xmlsoap.org/ws/2002/12/secext", "BinarySecurityToken": { "#content": "Shared/IDL:IceSess\\/SessMgr:1\\.0.IDL/Common/!ICESMS\\/ACPCRTC!ICESMSLB\\/CRT.LB!-3379045898978075261!1563026!0", "-EncodingType": "wsse:Base64Binary", "-valueType": "String" } } }, "-soap-env": "http://schemas.xmlsoap.org/soap/envelope/" } }`)) assert.NoError(err) expected, err := json.MarshalJSON() assert.NoError(err) // Then encode it in JSON res, err := Convert(strings.NewReader(s)) assert.NoError(err) // Assertion assert.JSONEq(string(expected), res.String(), "Drumroll") } // TestConvertISO ensures that other charsets can be converted func TestConvertISO(t *testing.T) { assert := assert.New(t) s := []byte{0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31, 0x2E, 0x30, 0x22, 0x20, 0x65, 0x6E, 0x63, 0x6F, 0x64, 0x69, 0x6E, 0x67, 0x3D, 0x22, 0x49, 0x53, 0x4F, 0x2D, 0x38, 0x38, 0x35, 0x39, 0x2D, 0x31, 0x22, 0x3F, 0x3E, 0x3C, 0x63, 0x68, 0x61, 0x72, 0x73, 0x65, 0x74, 0x3E, 0xFC, 0x62, 0x65, 0x72, 0x20, 0x63, 0x6F, 0x6D, 0x70, 0x6C, 0x65, 0x78, 0x3C, 0x2F, 0x63, 0x68, 0x61, 0x72, 0x73, 0x65, 0x74, 0x3E} // Build SimpleJSON json, err := sj.NewJson([]byte(`{ "charset": "über complex" }`)) assert.NoError(err) expected, err := json.MarshalJSON() assert.NoError(err) // Then encode it in JSON res, err := Convert(strings.NewReader(string(s))) assert.NoError(err) // Assertion assert.JSONEq(string(expected), res.String(), "Drumroll") } goxml2json-1.1.0/decoder.go000066400000000000000000000056621313144166000155570ustar00rootroot00000000000000package xml2json import ( "encoding/xml" "io" "unicode" "golang.org/x/net/html/charset" ) const ( attrPrefix = "-" contentPrefix = "#" ) // A Decoder reads and decodes XML objects from an input stream. type Decoder struct { r io.Reader err error attributePrefix string contentPrefix string } type element struct { parent *element n *Node label string } func (dec *Decoder) SetAttributePrefix(prefix string) { dec.attributePrefix = prefix } func (dec *Decoder) SetContentPrefix(prefix string) { dec.contentPrefix = prefix } func (dec *Decoder) DecodeWithCustomPrefixes(root *Node, contentPrefix string, attributePrefix string) error { dec.contentPrefix = contentPrefix dec.attributePrefix = attributePrefix return dec.Decode(root) } // NewDecoder returns a new decoder that reads from r. func NewDecoder(r io.Reader) *Decoder { return &Decoder{r: r} } // Decode reads the next JSON-encoded value from its // input and stores it in the value pointed to by v. func (dec *Decoder) Decode(root *Node) error { if dec.contentPrefix == "" { dec.contentPrefix = contentPrefix } if dec.attributePrefix == "" { dec.attributePrefix = attrPrefix } xmlDec := xml.NewDecoder(dec.r) // That will convert the charset if the provided XML is non-UTF-8 xmlDec.CharsetReader = charset.NewReaderLabel // Create first element from the root node elem := &element{ parent: nil, n: root, } for { t, _ := xmlDec.Token() if t == nil { break } switch se := t.(type) { case xml.StartElement: // Build new a new current element and link it to its parent elem = &element{ parent: elem, n: &Node{}, label: se.Name.Local, } // Extract attributes as children for _, a := range se.Attr { elem.n.AddChild(dec.attributePrefix+a.Name.Local, &Node{Data: a.Value}) } case xml.CharData: // Extract XML data (if any) elem.n.Data = trimNonGraphic(string(xml.CharData(se))) case xml.EndElement: // And add it to its parent list if elem.parent != nil { elem.parent.n.AddChild(elem.label, elem.n) } // Then change the current element to its parent elem = elem.parent } } return nil } // trimNonGraphic returns a slice of the string s, with all leading and trailing // non graphic characters and spaces removed. // // Graphic characters include letters, marks, numbers, punctuation, symbols, // and spaces, from categories L, M, N, P, S, Zs. // Spacing characters are set by category Z and property Pattern_White_Space. func trimNonGraphic(s string) string { if s == "" { return s } var first *int var last int for i, r := range []rune(s) { if !unicode.IsGraphic(r) || unicode.IsSpace(r) { continue } if first == nil { f := i // copy i first = &f last = i } else { last = i } } // If first is nil, it means there are no graphic characters if first == nil { return "" } return string([]rune(s)[*first : last+1]) } goxml2json-1.1.0/decoder_test.go000066400000000000000000000037761313144166000166220ustar00rootroot00000000000000package xml2json import ( "strings" "testing" "github.com/stretchr/testify/assert" ) // TestDecode ensures that decode does not return any errors (not that useful) func TestDecode(t *testing.T) { assert := assert.New(t) s := ` bar ` // Decode XML document root := &Node{} var err error var dec *Decoder dec = NewDecoder(strings.NewReader(s)) err = dec.Decode(root) assert.NoError(err) dec.SetAttributePrefix("test") dec.SetContentPrefix("test2") err = dec.DecodeWithCustomPrefixes(root, "test3", "test4") assert.NoError(err) } func TestTrim(t *testing.T) { table := []struct { in string expected string }{ {in: "foo", expected: "foo"}, {in: " foo", expected: "foo"}, {in: "foo ", expected: "foo"}, {in: " foo ", expected: "foo"}, {in: " foo ", expected: "foo"}, {in: "foo bar", expected: "foo bar"}, {in: "\n\tfoo\n\t", expected: "foo"}, {in: "\n\tfoo\n\tbar\n\t", expected: "foo\n\tbar"}, {in: "", expected: ""}, {in: "\n", expected: ""}, {in: "\n\v", expected: ""}, {in: "ending with ä", expected: "ending with ä"}, {in: "ä and ä", expected: "ä and ä"}, } for _, scenario := range table { got := trimNonGraphic(scenario.in) assert.Equal(t, scenario.expected, got) } } goxml2json-1.1.0/doc.go000066400000000000000000000001011313144166000146760ustar00rootroot00000000000000// Package xml2json is an XML to JSON converter package xml2json goxml2json-1.1.0/encoder.go000066400000000000000000000103121313144166000155550ustar00rootroot00000000000000package xml2json import ( "bytes" "io" "unicode/utf8" ) // An Encoder writes JSON objects to an output stream. type Encoder struct { w io.Writer err error contentPrefix string attributePrefix string } // NewEncoder returns a new encoder that writes to w. func NewEncoder(w io.Writer) *Encoder { return &Encoder{w: w} } func (enc *Encoder) SetAttributePrefix(prefix string) { enc.attributePrefix = prefix } func (enc *Encoder) SetContentPrefix(prefix string) { enc.contentPrefix = prefix } func (enc *Encoder) EncodeWithCustomPrefixes(root *Node, contentPrefix string, attributePrefix string) error { enc.contentPrefix = contentPrefix enc.attributePrefix = attributePrefix return enc.Encode(root) } // Encode writes the JSON encoding of v to the stream func (enc *Encoder) Encode(root *Node) error { if enc.err != nil { return enc.err } if root == nil { return nil } if enc.contentPrefix == "" { enc.contentPrefix = contentPrefix } if enc.attributePrefix == "" { enc.attributePrefix = attrPrefix } enc.err = enc.format(root, 0) // Terminate each value with a newline. // This makes the output look a little nicer // when debugging, and some kind of space // is required if the encoded value was a number, // so that the reader knows there aren't more // digits coming. enc.write("\n") return enc.err } func (enc *Encoder) format(n *Node, lvl int) error { if n.IsComplex() { enc.write("{") // Add data as an additional attibute (if any) if len(n.Data) > 0 { enc.write("\"") enc.write(enc.contentPrefix) enc.write("content") enc.write("\": ") enc.write(sanitiseString(n.Data)) enc.write(", ") } i := 0 tot := len(n.Children) for label, children := range n.Children { enc.write("\"") enc.write(label) enc.write("\": ") if len(children) > 1 { // Array enc.write("[") for j, c := range children { enc.format(c, lvl+1) if j < len(children)-1 { enc.write(", ") } } enc.write("]") } else { // Map enc.format(children[0], lvl+1) } if i < tot-1 { enc.write(", ") } i++ } enc.write("}") } else { // TODO : Extract data type enc.write(sanitiseString(n.Data)) } return nil } func (enc *Encoder) write(s string) { enc.w.Write([]byte(s)) } // https://golang.org/src/encoding/json/encode.go?s=5584:5627#L788 var hex = "0123456789abcdef" func sanitiseString(s string) string { var buf bytes.Buffer buf.WriteByte('"') start := 0 for i := 0; i < len(s); { if b := s[i]; b < utf8.RuneSelf { if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' { i++ continue } if start < i { buf.WriteString(s[start:i]) } switch b { case '\\', '"': buf.WriteByte('\\') buf.WriteByte(b) case '\n': buf.WriteByte('\\') buf.WriteByte('n') case '\r': buf.WriteByte('\\') buf.WriteByte('r') case '\t': buf.WriteByte('\\') buf.WriteByte('t') default: // This encodes bytes < 0x20 except for \n and \r, // as well as <, > and &. The latter are escaped because they // can lead to security holes when user-controlled strings // are rendered into JSON and served to some browsers. buf.WriteString(`\u00`) buf.WriteByte(hex[b>>4]) buf.WriteByte(hex[b&0xF]) } i++ start = i continue } c, size := utf8.DecodeRuneInString(s[i:]) if c == utf8.RuneError && size == 1 { if start < i { buf.WriteString(s[start:i]) } buf.WriteString(`\ufffd`) i += size start = i continue } // U+2028 is LINE SEPARATOR. // U+2029 is PARAGRAPH SEPARATOR. // They are both technically valid characters in JSON strings, // but don't work in JSONP, which has to be evaluated as JavaScript, // and can lead to security holes there. It is valid JSON to // escape them, so we do so unconditionally. // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. if c == '\u2028' || c == '\u2029' { if start < i { buf.WriteString(s[start:i]) } buf.WriteString(`\u202`) buf.WriteByte(hex[c&0xF]) i += size start = i continue } i += size } if start < len(s) { buf.WriteString(s[start:]) } buf.WriteByte('"') return buf.String() } goxml2json-1.1.0/encoder_test.go000066400000000000000000000040511313144166000166170ustar00rootroot00000000000000package xml2json import ( "bytes" "fmt" "testing" sj "github.com/bitly/go-simplejson" "github.com/stretchr/testify/assert" ) type bio struct { Firstname string Lastname string Hobbies []string Misc map[string]string } // TestEncode ensures that encode outputs the expected JSON document. func TestEncode(t *testing.T) { var err error assert := assert.New(t) author := bio{ Firstname: "Bastien", Lastname: "Gysler", Hobbies: []string{"DJ", "Running", "Tennis"}, Misc: map[string]string{ "lineSeparator": "\u2028", "Nationality": "Swiss", "City": "Zürich", "foo": "", "bar": "\"quoted text\"", "esc": "escaped \\ sanitized", "r": "\r return line", "default": "< >", "runeError": "\uFFFD", }, } // Build document root := &Node{} root.AddChild("firstname", &Node{ Data: author.Firstname, }) root.AddChild("lastname", &Node{ Data: author.Lastname, }) for _, h := range author.Hobbies { root.AddChild("hobbies", &Node{ Data: h, }) } misc := &Node{} for k, v := range author.Misc { misc.AddChild(k, &Node{ Data: v, }) } root.AddChild("misc", misc) var enc *Encoder // Convert to JSON string buf := new(bytes.Buffer) enc = NewEncoder(buf) err = enc.Encode(nil) assert.NoError(err) enc.SetAttributePrefix("test") enc.SetContentPrefix("test2") err = enc.EncodeWithCustomPrefixes(root, "test3", "test4") assert.NoError(err) err = enc.Encode(root) assert.NoError(err) // Build SimpleJSON sj, err := sj.NewJson(buf.Bytes()) res, err := sj.Map() assert.NoError(err) // Assertions assert.Equal(author.Firstname, res["firstname"]) assert.Equal(author.Lastname, res["lastname"]) resHobbies, err := sj.Get("hobbies").StringArray() assert.NoError(err) assert.Equal(author.Hobbies, resHobbies) resMisc, err := sj.Get("misc").Map() assert.NoError(err) for k, v := range resMisc { assert.Equal(author.Misc[k], v) } enc.err = fmt.Errorf("Testing if error provided is returned") assert.Error(enc.Encode(nil)) } goxml2json-1.1.0/struct.go000066400000000000000000000010021313144166000154560ustar00rootroot00000000000000package xml2json // Node is a data element on a tree type Node struct { Children map[string]Nodes Data string } // Nodes is a list of nodes type Nodes []*Node // AddChild appends a node to the list of children func (n *Node) AddChild(s string, c *Node) { // Lazy lazy if n.Children == nil { n.Children = map[string]Nodes{} } n.Children[s] = append(n.Children[s], c) } // IsComplex returns whether it is a complex type (has children) func (n *Node) IsComplex() bool { return len(n.Children) > 0 } goxml2json-1.1.0/struct_test.go000066400000000000000000000011371313144166000165260ustar00rootroot00000000000000package xml2json import ( "testing" "github.com/stretchr/testify/assert" ) func TestAddChild(t *testing.T) { assert := assert.New(t) n := Node{} assert.Len(n.Children, 0) n.AddChild("a", &Node{}) assert.Len(n.Children, 1) n.AddChild("b", &Node{}) assert.Len(n.Children, 2) } func TestIsComplex(t *testing.T) { assert := assert.New(t) n := Node{} assert.False(n.IsComplex(), "nodes with no children are not complex") n.AddChild("b", &Node{}) assert.True(n.IsComplex(), "nodes with children are complex") n.Data = "foo" assert.True(n.IsComplex(), "data does not impact IsComplex") }