pax_global_header 0000666 0000000 0000000 00000000064 13131441660 0014511 g ustar 00root root 0000000 0000000 52 comment=652b277a9313806ef04f7c0cb0205dd455c4f344
goxml2json-1.1.0/ 0000775 0000000 0000000 00000000000 13131441660 0013612 5 ustar 00root root 0000000 0000000 goxml2json-1.1.0/.gitignore 0000664 0000000 0000000 00000000421 13131441660 0015577 0 ustar 00root root 0000000 0000000 # Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
/.tags
goxml2json-1.1.0/LICENSE 0000664 0000000 0000000 00000002071 13131441660 0014617 0 ustar 00root root 0000000 0000000 The MIT License (MIT)
Copyright (c) 2016 Bastien Gysler
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
goxml2json-1.1.0/README.md 0000664 0000000 0000000 00000003034 13131441660 0015071 0 ustar 00root root 0000000 0000000 # goxml2json [](https://circleci.com/gh/basgys/goxml2json)
Go package that converts XML to JSON
### Install
go get -u github.com/basgys/goxml2json
### Importing
import github.com/basgys/goxml2json
### Usage
**Code example**
```go
package main
import (
"fmt"
"strings"
xj "github.com/basgys/goxml2json"
)
func main() {
// xml is an io.Reader
xml := strings.NewReader(`world`)
json, err := xj.Convert(xml)
if err != nil {
panic("That's embarrassing...")
}
fmt.Println(json.String())
// {"hello": "world"}
}
```
**Input**
```xml
bar
```
**Output**
```json
{
"osm": {
"-version": "0.6",
"-generator": "CGImap 0.0.2",
"bounds": {
"-minlat": "54.0889580",
"-minlon": "12.2487570",
"-maxlat": "54.0913900",
"-maxlon": "12.2524800"
},
"foo": "bar"
}
}
```
### Contributing
Feel free to contribute to this project if you want to fix/extend/improve it.
### Contributors
- [DirectX](https://github.com/directx)
- [samuelhug](https://github.com/samuelhug)
### TODO
* Extract data types in JSON (numbers, boolean, ...)
* Categorise errors
* Option to prettify the JSON output
* Benchmark
goxml2json-1.1.0/converter.go 0000664 0000000 0000000 00000000631 13131441660 0016150 0 ustar 00root root 0000000 0000000 package xml2json
import (
"bytes"
"io"
)
// Convert converts the given XML document to JSON
func Convert(r io.Reader) (*bytes.Buffer, error) {
// Decode XML document
root := &Node{}
err := NewDecoder(r).Decode(root)
if err != nil {
return nil, err
}
// Then encode it in JSON
buf := new(bytes.Buffer)
err = NewEncoder(buf).Encode(root)
if err != nil {
return nil, err
}
return buf, nil
}
goxml2json-1.1.0/converter_test.go 0000664 0000000 0000000 00000013260 13131441660 0017211 0 ustar 00root root 0000000 0000000 package xml2json
import (
"strings"
"testing"
sj "github.com/bitly/go-simplejson"
"github.com/stretchr/testify/assert"
)
// TestConvert ensures that the whole process works correctly
// It takes an XML document and outputs a JSON document
func TestConvert(t *testing.T) {
assert := assert.New(t)
s := `
bar
content
`
// Build SimpleJSON
json, err := sj.NewJson([]byte(`{
"osm": {
"-version": "0.6",
"-generator": "CGImap 0.0.2",
"bounds": {
"-minlat": "54.0889580",
"-minlon": "12.2487570",
"-maxlat": "54.0913900",
"-maxlon": "12.2524800"
},
"node": [
{
"-id": "298884269",
"-lat": "54.0901746",
"-lon": "12.2482632",
"-user": "SvenHRO",
"-uid": "46882",
"-visible": "true",
"-version": "1",
"-changeset": "676636",
"-timestamp": "2008-09-21T21:37:45Z"
},
{
"-id": "261728686",
"-lat": "54.0906309",
"-lon": "12.2441924",
"-user": "PikoWinter",
"-uid": "36744",
"-visible": "true",
"-version": "1",
"-changeset": "323878",
"-timestamp": "2008-05-03T13:39:23Z"
},
{
"-id": "1831881213",
"-version": "1",
"-changeset": "12370172",
"-lat": "54.0900666",
"-lon": "12.2539381",
"-user": "lafkor",
"-uid": "75625",
"-visible": "true",
"-timestamp": "2012-07-20T09:43:19Z",
"tag": [
{
"-k": "name",
"-v": "Neu Broderstorf"
},
{
"-k": "traffic_sign",
"-v": "city_limit"
}
]
}
],
"foo": "bar",
"mixed": {
"-attr": "attribute",
"#content": "content"
}
}
}`))
assert.NoError(err)
expected, err := json.MarshalJSON()
assert.NoError(err)
// Then encode it in JSON
res, err := Convert(strings.NewReader(s))
assert.NoError(err)
// Assertion
assert.JSONEq(string(expected), res.String(), "Drumroll")
}
func TestConvertWithNewLines(t *testing.T) {
assert := assert.New(t)
s := `
foo
bar
`
// Build SimpleJSON
json, err := sj.NewJson([]byte(`{
"osm": {
"foo": "foo\n\n\t\tbar"
}
}`))
assert.NoError(err)
expected, err := json.MarshalJSON()
assert.NoError(err)
// Then encode it in JSON
res, err := Convert(strings.NewReader(s))
assert.NoError(err)
// Assertion
assert.JSONEq(string(expected), res.String(), "Drumroll")
}
func TestConvertWithMixedTags(t *testing.T) {
assert := assert.New(t)
s := `
Shared/IDL:IceSess\/SessMgr:1\.0.IDL/Common/!ICESMS\/ACPCRTC!ICESMSLB\/CRT.LB!-3379045898978075261!1563026!0
`
// Build SimpleJSON
json, err := sj.NewJson([]byte(`{
"Envelope": {
"Header": {
"Security": {
"-wsse": "http://schemas.xmlsoap.org/ws/2002/12/secext",
"BinarySecurityToken": {
"#content": "Shared/IDL:IceSess\\/SessMgr:1\\.0.IDL/Common/!ICESMS\\/ACPCRTC!ICESMSLB\\/CRT.LB!-3379045898978075261!1563026!0",
"-EncodingType": "wsse:Base64Binary",
"-valueType": "String"
}
}
},
"-soap-env": "http://schemas.xmlsoap.org/soap/envelope/"
}
}`))
assert.NoError(err)
expected, err := json.MarshalJSON()
assert.NoError(err)
// Then encode it in JSON
res, err := Convert(strings.NewReader(s))
assert.NoError(err)
// Assertion
assert.JSONEq(string(expected), res.String(), "Drumroll")
}
// TestConvertISO ensures that other charsets can be converted
func TestConvertISO(t *testing.T) {
assert := assert.New(t)
s := []byte{0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31, 0x2E, 0x30, 0x22, 0x20, 0x65, 0x6E, 0x63, 0x6F, 0x64, 0x69, 0x6E, 0x67, 0x3D, 0x22, 0x49, 0x53, 0x4F, 0x2D, 0x38, 0x38, 0x35, 0x39, 0x2D, 0x31, 0x22, 0x3F, 0x3E, 0x3C, 0x63, 0x68, 0x61, 0x72, 0x73, 0x65, 0x74, 0x3E, 0xFC, 0x62, 0x65, 0x72, 0x20, 0x63, 0x6F, 0x6D, 0x70, 0x6C, 0x65, 0x78, 0x3C, 0x2F, 0x63, 0x68, 0x61, 0x72, 0x73, 0x65, 0x74, 0x3E}
// Build SimpleJSON
json, err := sj.NewJson([]byte(`{
"charset": "über complex"
}`))
assert.NoError(err)
expected, err := json.MarshalJSON()
assert.NoError(err)
// Then encode it in JSON
res, err := Convert(strings.NewReader(string(s)))
assert.NoError(err)
// Assertion
assert.JSONEq(string(expected), res.String(), "Drumroll")
}
goxml2json-1.1.0/decoder.go 0000664 0000000 0000000 00000005662 13131441660 0015557 0 ustar 00root root 0000000 0000000 package xml2json
import (
"encoding/xml"
"io"
"unicode"
"golang.org/x/net/html/charset"
)
const (
attrPrefix = "-"
contentPrefix = "#"
)
// A Decoder reads and decodes XML objects from an input stream.
type Decoder struct {
r io.Reader
err error
attributePrefix string
contentPrefix string
}
type element struct {
parent *element
n *Node
label string
}
func (dec *Decoder) SetAttributePrefix(prefix string) {
dec.attributePrefix = prefix
}
func (dec *Decoder) SetContentPrefix(prefix string) {
dec.contentPrefix = prefix
}
func (dec *Decoder) DecodeWithCustomPrefixes(root *Node, contentPrefix string, attributePrefix string) error {
dec.contentPrefix = contentPrefix
dec.attributePrefix = attributePrefix
return dec.Decode(root)
}
// NewDecoder returns a new decoder that reads from r.
func NewDecoder(r io.Reader) *Decoder {
return &Decoder{r: r}
}
// Decode reads the next JSON-encoded value from its
// input and stores it in the value pointed to by v.
func (dec *Decoder) Decode(root *Node) error {
if dec.contentPrefix == "" {
dec.contentPrefix = contentPrefix
}
if dec.attributePrefix == "" {
dec.attributePrefix = attrPrefix
}
xmlDec := xml.NewDecoder(dec.r)
// That will convert the charset if the provided XML is non-UTF-8
xmlDec.CharsetReader = charset.NewReaderLabel
// Create first element from the root node
elem := &element{
parent: nil,
n: root,
}
for {
t, _ := xmlDec.Token()
if t == nil {
break
}
switch se := t.(type) {
case xml.StartElement:
// Build new a new current element and link it to its parent
elem = &element{
parent: elem,
n: &Node{},
label: se.Name.Local,
}
// Extract attributes as children
for _, a := range se.Attr {
elem.n.AddChild(dec.attributePrefix+a.Name.Local, &Node{Data: a.Value})
}
case xml.CharData:
// Extract XML data (if any)
elem.n.Data = trimNonGraphic(string(xml.CharData(se)))
case xml.EndElement:
// And add it to its parent list
if elem.parent != nil {
elem.parent.n.AddChild(elem.label, elem.n)
}
// Then change the current element to its parent
elem = elem.parent
}
}
return nil
}
// trimNonGraphic returns a slice of the string s, with all leading and trailing
// non graphic characters and spaces removed.
//
// Graphic characters include letters, marks, numbers, punctuation, symbols,
// and spaces, from categories L, M, N, P, S, Zs.
// Spacing characters are set by category Z and property Pattern_White_Space.
func trimNonGraphic(s string) string {
if s == "" {
return s
}
var first *int
var last int
for i, r := range []rune(s) {
if !unicode.IsGraphic(r) || unicode.IsSpace(r) {
continue
}
if first == nil {
f := i // copy i
first = &f
last = i
} else {
last = i
}
}
// If first is nil, it means there are no graphic characters
if first == nil {
return ""
}
return string([]rune(s)[*first : last+1])
}
goxml2json-1.1.0/decoder_test.go 0000664 0000000 0000000 00000003776 13131441660 0016622 0 ustar 00root root 0000000 0000000 package xml2json
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
// TestDecode ensures that decode does not return any errors (not that useful)
func TestDecode(t *testing.T) {
assert := assert.New(t)
s := `
bar
`
// Decode XML document
root := &Node{}
var err error
var dec *Decoder
dec = NewDecoder(strings.NewReader(s))
err = dec.Decode(root)
assert.NoError(err)
dec.SetAttributePrefix("test")
dec.SetContentPrefix("test2")
err = dec.DecodeWithCustomPrefixes(root, "test3", "test4")
assert.NoError(err)
}
func TestTrim(t *testing.T) {
table := []struct {
in string
expected string
}{
{in: "foo", expected: "foo"},
{in: " foo", expected: "foo"},
{in: "foo ", expected: "foo"},
{in: " foo ", expected: "foo"},
{in: " foo ", expected: "foo"},
{in: "foo bar", expected: "foo bar"},
{in: "\n\tfoo\n\t", expected: "foo"},
{in: "\n\tfoo\n\tbar\n\t", expected: "foo\n\tbar"},
{in: "", expected: ""},
{in: "\n", expected: ""},
{in: "\n\v", expected: ""},
{in: "ending with ä", expected: "ending with ä"},
{in: "ä and ä", expected: "ä and ä"},
}
for _, scenario := range table {
got := trimNonGraphic(scenario.in)
assert.Equal(t, scenario.expected, got)
}
}
goxml2json-1.1.0/doc.go 0000664 0000000 0000000 00000000101 13131441660 0014676 0 ustar 00root root 0000000 0000000 // Package xml2json is an XML to JSON converter
package xml2json
goxml2json-1.1.0/encoder.go 0000664 0000000 0000000 00000010312 13131441660 0015555 0 ustar 00root root 0000000 0000000 package xml2json
import (
"bytes"
"io"
"unicode/utf8"
)
// An Encoder writes JSON objects to an output stream.
type Encoder struct {
w io.Writer
err error
contentPrefix string
attributePrefix string
}
// NewEncoder returns a new encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
return &Encoder{w: w}
}
func (enc *Encoder) SetAttributePrefix(prefix string) {
enc.attributePrefix = prefix
}
func (enc *Encoder) SetContentPrefix(prefix string) {
enc.contentPrefix = prefix
}
func (enc *Encoder) EncodeWithCustomPrefixes(root *Node, contentPrefix string, attributePrefix string) error {
enc.contentPrefix = contentPrefix
enc.attributePrefix = attributePrefix
return enc.Encode(root)
}
// Encode writes the JSON encoding of v to the stream
func (enc *Encoder) Encode(root *Node) error {
if enc.err != nil {
return enc.err
}
if root == nil {
return nil
}
if enc.contentPrefix == "" {
enc.contentPrefix = contentPrefix
}
if enc.attributePrefix == "" {
enc.attributePrefix = attrPrefix
}
enc.err = enc.format(root, 0)
// Terminate each value with a newline.
// This makes the output look a little nicer
// when debugging, and some kind of space
// is required if the encoded value was a number,
// so that the reader knows there aren't more
// digits coming.
enc.write("\n")
return enc.err
}
func (enc *Encoder) format(n *Node, lvl int) error {
if n.IsComplex() {
enc.write("{")
// Add data as an additional attibute (if any)
if len(n.Data) > 0 {
enc.write("\"")
enc.write(enc.contentPrefix)
enc.write("content")
enc.write("\": ")
enc.write(sanitiseString(n.Data))
enc.write(", ")
}
i := 0
tot := len(n.Children)
for label, children := range n.Children {
enc.write("\"")
enc.write(label)
enc.write("\": ")
if len(children) > 1 {
// Array
enc.write("[")
for j, c := range children {
enc.format(c, lvl+1)
if j < len(children)-1 {
enc.write(", ")
}
}
enc.write("]")
} else {
// Map
enc.format(children[0], lvl+1)
}
if i < tot-1 {
enc.write(", ")
}
i++
}
enc.write("}")
} else {
// TODO : Extract data type
enc.write(sanitiseString(n.Data))
}
return nil
}
func (enc *Encoder) write(s string) {
enc.w.Write([]byte(s))
}
// https://golang.org/src/encoding/json/encode.go?s=5584:5627#L788
var hex = "0123456789abcdef"
func sanitiseString(s string) string {
var buf bytes.Buffer
buf.WriteByte('"')
start := 0
for i := 0; i < len(s); {
if b := s[i]; b < utf8.RuneSelf {
if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' {
i++
continue
}
if start < i {
buf.WriteString(s[start:i])
}
switch b {
case '\\', '"':
buf.WriteByte('\\')
buf.WriteByte(b)
case '\n':
buf.WriteByte('\\')
buf.WriteByte('n')
case '\r':
buf.WriteByte('\\')
buf.WriteByte('r')
case '\t':
buf.WriteByte('\\')
buf.WriteByte('t')
default:
// This encodes bytes < 0x20 except for \n and \r,
// as well as <, > and &. The latter are escaped because they
// can lead to security holes when user-controlled strings
// are rendered into JSON and served to some browsers.
buf.WriteString(`\u00`)
buf.WriteByte(hex[b>>4])
buf.WriteByte(hex[b&0xF])
}
i++
start = i
continue
}
c, size := utf8.DecodeRuneInString(s[i:])
if c == utf8.RuneError && size == 1 {
if start < i {
buf.WriteString(s[start:i])
}
buf.WriteString(`\ufffd`)
i += size
start = i
continue
}
// U+2028 is LINE SEPARATOR.
// U+2029 is PARAGRAPH SEPARATOR.
// They are both technically valid characters in JSON strings,
// but don't work in JSONP, which has to be evaluated as JavaScript,
// and can lead to security holes there. It is valid JSON to
// escape them, so we do so unconditionally.
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
if c == '\u2028' || c == '\u2029' {
if start < i {
buf.WriteString(s[start:i])
}
buf.WriteString(`\u202`)
buf.WriteByte(hex[c&0xF])
i += size
start = i
continue
}
i += size
}
if start < len(s) {
buf.WriteString(s[start:])
}
buf.WriteByte('"')
return buf.String()
}
goxml2json-1.1.0/encoder_test.go 0000664 0000000 0000000 00000004051 13131441660 0016617 0 ustar 00root root 0000000 0000000 package xml2json
import (
"bytes"
"fmt"
"testing"
sj "github.com/bitly/go-simplejson"
"github.com/stretchr/testify/assert"
)
type bio struct {
Firstname string
Lastname string
Hobbies []string
Misc map[string]string
}
// TestEncode ensures that encode outputs the expected JSON document.
func TestEncode(t *testing.T) {
var err error
assert := assert.New(t)
author := bio{
Firstname: "Bastien",
Lastname: "Gysler",
Hobbies: []string{"DJ", "Running", "Tennis"},
Misc: map[string]string{
"lineSeparator": "\u2028",
"Nationality": "Swiss",
"City": "Zürich",
"foo": "",
"bar": "\"quoted text\"",
"esc": "escaped \\ sanitized",
"r": "\r return line",
"default": "< >",
"runeError": "\uFFFD",
},
}
// Build document
root := &Node{}
root.AddChild("firstname", &Node{
Data: author.Firstname,
})
root.AddChild("lastname", &Node{
Data: author.Lastname,
})
for _, h := range author.Hobbies {
root.AddChild("hobbies", &Node{
Data: h,
})
}
misc := &Node{}
for k, v := range author.Misc {
misc.AddChild(k, &Node{
Data: v,
})
}
root.AddChild("misc", misc)
var enc *Encoder
// Convert to JSON string
buf := new(bytes.Buffer)
enc = NewEncoder(buf)
err = enc.Encode(nil)
assert.NoError(err)
enc.SetAttributePrefix("test")
enc.SetContentPrefix("test2")
err = enc.EncodeWithCustomPrefixes(root, "test3", "test4")
assert.NoError(err)
err = enc.Encode(root)
assert.NoError(err)
// Build SimpleJSON
sj, err := sj.NewJson(buf.Bytes())
res, err := sj.Map()
assert.NoError(err)
// Assertions
assert.Equal(author.Firstname, res["firstname"])
assert.Equal(author.Lastname, res["lastname"])
resHobbies, err := sj.Get("hobbies").StringArray()
assert.NoError(err)
assert.Equal(author.Hobbies, resHobbies)
resMisc, err := sj.Get("misc").Map()
assert.NoError(err)
for k, v := range resMisc {
assert.Equal(author.Misc[k], v)
}
enc.err = fmt.Errorf("Testing if error provided is returned")
assert.Error(enc.Encode(nil))
}
goxml2json-1.1.0/struct.go 0000664 0000000 0000000 00000001002 13131441660 0015456 0 ustar 00root root 0000000 0000000 package xml2json
// Node is a data element on a tree
type Node struct {
Children map[string]Nodes
Data string
}
// Nodes is a list of nodes
type Nodes []*Node
// AddChild appends a node to the list of children
func (n *Node) AddChild(s string, c *Node) {
// Lazy lazy
if n.Children == nil {
n.Children = map[string]Nodes{}
}
n.Children[s] = append(n.Children[s], c)
}
// IsComplex returns whether it is a complex type (has children)
func (n *Node) IsComplex() bool {
return len(n.Children) > 0
}
goxml2json-1.1.0/struct_test.go 0000664 0000000 0000000 00000001137 13131441660 0016526 0 ustar 00root root 0000000 0000000 package xml2json
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestAddChild(t *testing.T) {
assert := assert.New(t)
n := Node{}
assert.Len(n.Children, 0)
n.AddChild("a", &Node{})
assert.Len(n.Children, 1)
n.AddChild("b", &Node{})
assert.Len(n.Children, 2)
}
func TestIsComplex(t *testing.T) {
assert := assert.New(t)
n := Node{}
assert.False(n.IsComplex(), "nodes with no children are not complex")
n.AddChild("b", &Node{})
assert.True(n.IsComplex(), "nodes with children are complex")
n.Data = "foo"
assert.True(n.IsComplex(), "data does not impact IsComplex")
}