pax_global_header 0000666 0000000 0000000 00000000064 15162405644 0014521 g ustar 00root root 0000000 0000000 52 comment=a25303ba7bafdf441c40f865222c4955027c337b
golang-github-goware-urlx-0.3.2/ 0000775 0000000 0000000 00000000000 15162405644 0016524 5 ustar 00root root 0000000 0000000 golang-github-goware-urlx-0.3.2/.travis.yml 0000664 0000000 0000000 00000001101 15162405644 0020626 0 ustar 00root root 0000000 0000000 language: go
go:
- 1.11.x
- tip
env:
global:
- GO111MODULE=on
before_install:
- curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $GOPATH/bin latest
install:
- go mod download
script:
- go mod tidy && git diff --exit-code; code=$?; git checkout -- .; (exit $code)
- go test -race -cover -coverprofile=coverage.txt -covermode=atomic ./...
- golangci-lint run
after_success:
- bash <(curl -s https://codecov.io/bash)
matrix:
fast_finish: true
allow_failures:
- go: tip
notifications:
email: false
golang-github-goware-urlx-0.3.2/LICENSE 0000664 0000000 0000000 00000002075 15162405644 0017535 0 ustar 00root root 0000000 0000000 MIT License
Copyright (c) 2014 Pressly Inc. www.pressly.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
golang-github-goware-urlx-0.3.2/README.md 0000664 0000000 0000000 00000007465 15162405644 0020017 0 ustar 00root root 0000000 0000000 # URLx
[Golang](http://golang.org/) pkg for URL parsing and normalization.
1. [Parsing URL](#parsing-url) ([GoDoc](https://godoc.org/github.com/goware/urlx#Parse))
2. [Normalizing URL](#normalizing-url) ([GoDoc](https://godoc.org/github.com/goware/urlx#Normalize))
3. [Splitting host:port from URL](#splitting-hostport-from-url) ([GoDoc](https://godoc.org/github.com/goware/urlx#SplitHostPort))
4. [Resolving IP address from URL](#resolving-ip-address-from-url) ([GoDoc](https://godoc.org/github.com/goware/urlx#Resolve))
[](https://godoc.org/github.com/goware/urlx)
[](https://travis-ci.org/goware/urlx)
## Parsing URL
The [urlx.Parse()](https://godoc.org/github.com/goware/urlx#Parse) is compatible with the same function from [net/url](https://golang.org/pkg/net/url/#Parse) pkg, but has slightly different behavior. It enforces default scheme and favors absolute URLs over relative paths.
### Difference between [urlx](https://godoc.org/github.com/goware/urlx#Parse) and [net/url](https://golang.org/pkg/net/url/#Parse)
| github.com/goware/urlx |
net/url |
urlx.Parse("example.com")
&url.URL{
Scheme: "http",
Host: "example.com",
Path: "",
}
|
url.Parse("example.com")
&url.URL{
Scheme: "",
Host: "",
Path: "example.com",
}
|
urlx.Parse("localhost:8080")
&url.URL{
Scheme: "http",
Host: "localhost:8080",
Path: "",
Opaque: "",
}
|
url.Parse("localhost:8080")
&url.URL{
Scheme: "localhost",
Host: "",
Path: "",
Opaque: "8080",
}
|
urlx.Parse("user.local:8000/path")
&url.URL{
Scheme: "http",
Host: "user.local:8000",
Path: "/path",
Opaque: "",
}
|
url.Parse("user.local:8000/path")
&url.URL{
Scheme: "user.local",
Host: "",
Path: "",
Opaque: "8000/path",
}
|
### Usage
```go
import "github.com/goware/urlx"
func main() {
url, _ := urlx.Parse("example.com")
// url.Scheme == "http"
// url.Host == "example.com"
fmt.Print(url)
// Prints http://example.com
}
```
## Normalizing URL
The [urlx.Normalize()](https://godoc.org/github.com/goware/urlx#Normalize) function normalizes the URL using the predefined subset of [Purell](https://github.com/PuerkitoBio/purell) flags.
### Usage
```go
import "github.com/goware/urlx"
func main() {
url, _ := urlx.Parse("localhost:80///x///y/z/../././index.html?b=y&a=x#t=20")
normalized, _ := urlx.Normalize(url)
fmt.Print(normalized)
// Prints http://localhost/x/y/index.html?a=x&b=y#t=20
}
```
## Splitting host:port from URL
The [urlx.SplitHostPort()](https://godoc.org/github.com/goware/urlx#SplitHostPort) is compatible with the same function from [net](https://golang.org/pkg/net/) pkg, but has slightly different behavior. It doesn't remove brackets from `[IPv6]` host.
### Usage
```go
import "github.com/goware/urlx"
func main() {
url, _ := urlx.Parse("localhost:80")
host, port, _ := urlx.SplitHostPort(url)
fmt.Print(host)
// Prints localhost
fmt.Print(port)
// Prints 80
}
```
## Resolving IP address from URL
The [urlx.Resolve()](https://godoc.org/github.com/goware/urlx#Resolve) is compatible with [ResolveIPAddr()](https://golang.org/pkg/net/#ResolveIPAddr) from [net](https://golang.org/pkg/net/).
### Usage
```go
url, _ := urlx.Parse("localhost")
ip, _ := urlx.Resolve(url)
fmt.Print(ip)
// Prints 127.0.0.1
```
## License
URLx is licensed under the [MIT License](./LICENSE).
golang-github-goware-urlx-0.3.2/go.mod 0000664 0000000 0000000 00000000372 15162405644 0017634 0 ustar 00root root 0000000 0000000 module github.com/goware/urlx
require (
github.com/PuerkitoBio/purell v1.1.1
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd
golang.org/x/text v0.3.0 // indirect
)
golang-github-goware-urlx-0.3.2/go.sum 0000664 0000000 0000000 00000001402 15162405644 0017654 0 ustar 00root root 0000000 0000000 github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd h1:HuTn7WObtcDo9uEEU7rEqL0jYthdXAmZ6PP+meazmaU=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang-github-goware-urlx-0.3.2/urlx.go 0000664 0000000 0000000 00000012512 15162405644 0020046 0 ustar 00root root 0000000 0000000 // Package urlx parses and normalizes URLs. It can also resolve hostname to an IP address.
package urlx
import (
"errors"
"net"
"net/url"
"regexp"
"strconv"
"strings"
"github.com/PuerkitoBio/purell"
"golang.org/x/net/idna"
)
// Parse parses raw URL string into the net/url URL struct.
// It uses the url.Parse() internally, but it slightly changes
// its behavior:
// 1. It forces the default scheme and port to http
// 2. It favors absolute paths over relative ones, thus "example.com"
// is parsed into url.Host instead of url.Path.
// 3. It lowercases the Host (not only the Scheme).
func Parse(rawURL string) (*url.URL, error) {
return ParseWithDefaultScheme(rawURL, "http")
}
func ParseWithDefaultScheme(rawURL string, scheme string) (*url.URL, error) {
rawURL = defaultScheme(rawURL, scheme)
// Use net/url.Parse() now.
u, err := url.Parse(rawURL)
if err != nil {
return nil, err
}
host, _, err := SplitHostPort(u)
if err != nil {
return nil, err
}
if err := checkHost(host); err != nil {
return nil, err
}
u.Host = strings.ToLower(u.Host)
u.Scheme = strings.ToLower(u.Scheme)
return u, nil
}
func defaultScheme(rawURL, scheme string) string {
// Force default http scheme, so net/url.Parse() doesn't
// put both host and path into the (relative) path.
if strings.Index(rawURL, "//") == 0 {
// Leading double slashes (any scheme). Force http.
rawURL = scheme + ":" + rawURL
}
if !strings.Contains(rawURL, "://") {
// Missing scheme. Force http.
rawURL = scheme + "://" + rawURL
}
return rawURL
}
var (
domainRegexp = regexp.MustCompile(`^([a-zA-Z0-9-_]{1,63}\.)*([a-zA-Z0-9-]{1,63})$`)
ipv4Regexp = regexp.MustCompile(`^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$`)
ipv6Regexp = regexp.MustCompile(`^\[[a-fA-F0-9:]+\]$`)
)
func checkHost(host string) error {
if host == "" {
return &url.Error{Op: "host", URL: host, Err: errors.New("empty host")}
}
host = strings.ToLower(host)
if domainRegexp.MatchString(host) {
return nil
}
if punycode, err := idna.ToASCII(host); err != nil {
return err
} else if domainRegexp.MatchString(punycode) {
return nil
}
// IPv4 and IPv6.
if ipv4Regexp.MatchString(host) || ipv6Regexp.MatchString(host) {
return nil
}
return &url.Error{Op: "host", URL: host, Err: errors.New("invalid host")}
}
// SplitHostPort splits network address of the form "host:port" into
// host and port. Unlike net.SplitHostPort(), it doesn't remove brackets
// from [IPv6] host and it accepts net/url.URL struct instead of a string.
func SplitHostPort(u *url.URL) (host, port string, err error) {
if u == nil {
return "", "", &url.Error{Op: "host", URL: host, Err: errors.New("empty url")}
}
host = u.Host
// Find last colon.
i := strings.LastIndex(host, ":")
if i == -1 {
// No port found.
return host, "", nil
}
// Return if the last colon is inside [IPv6] brackets.
if strings.HasPrefix(host, "[") && strings.Contains(host[i:], "]") {
// No port found.
return host, "", nil
}
if i == len(host)-1 {
return "", "", &url.Error{Op: "port", URL: u.String(), Err: errors.New("empty port")}
}
port = host[i+1:]
host = host[:i]
if _, err := strconv.Atoi(port); err != nil {
return "", "", &url.Error{Op: "port", URL: u.String(), Err: err}
}
return host, port, nil
}
const normalizeFlags purell.NormalizationFlags = purell.FlagRemoveDefaultPort |
purell.FlagDecodeDWORDHost | purell.FlagDecodeOctalHost | purell.FlagDecodeHexHost |
purell.FlagRemoveUnnecessaryHostDots | purell.FlagRemoveDotSegments | purell.FlagRemoveDuplicateSlashes |
purell.FlagUppercaseEscapes | purell.FlagDecodeUnnecessaryEscapes | purell.FlagEncodeNecessaryEscapes |
purell.FlagSortQuery
// Normalize returns normalized URL string.
// Behavior:
// 1. Remove unnecessary host dots.
// 2. Remove default port (http://localhost:80 becomes http://localhost).
// 3. Remove duplicate slashes.
// 4. Remove unnecessary dots from path.
// 5. Sort query parameters.
// 6. Decode host IP into decimal numbers.
// 7. Handle escape values.
// 8. Decode Punycode domains into UTF8 representation.
func Normalize(u *url.URL) (string, error) {
host, port, err := SplitHostPort(u)
if err != nil {
return "", err
}
if err := checkHost(host); err != nil {
return "", err
}
// Decode Punycode.
host, err = idna.ToUnicode(host)
if err != nil {
return "", err
}
u.Host = strings.ToLower(host)
if port != "" {
u.Host += ":" + port
}
u.Scheme = strings.ToLower(u.Scheme)
return purell.NormalizeURL(u, normalizeFlags), nil
}
// NormalizeString returns normalized URL string.
// It's a shortcut for Parse() and Normalize() funcs.
func NormalizeString(rawURL string) (string, error) {
u, err := Parse(rawURL)
if err != nil {
return "", err
}
return Normalize(u)
}
// Resolve resolves the URL host to its IP address.
func Resolve(u *url.URL) (*net.IPAddr, error) {
host, _, err := SplitHostPort(u)
if err != nil {
return nil, err
}
addr, err := net.ResolveIPAddr("ip", host)
if err != nil {
return nil, err
}
return addr, nil
}
// Resolve resolves the URL host to its IP address.
// It's a shortcut for Parse() and Resolve() funcs.
func ResolveString(rawURL string) (*net.IPAddr, error) {
u, err := Parse(rawURL)
if err != nil {
return nil, err
}
return Resolve(u)
}
func URIEncode(uri string) (string, error) {
u, err := url.Parse(uri)
if err != nil {
return "", err
}
return u.String(), nil
}
golang-github-goware-urlx-0.3.2/urlx_test.go 0000664 0000000 0000000 00000024114 15162405644 0021106 0 ustar 00root root 0000000 0000000 package urlx_test
import (
"fmt"
"strings"
"testing"
"github.com/goware/urlx"
)
func TestParse(t *testing.T) {
tests := []struct {
in string
out string
err bool
}{
// Error out on missing host:
{in: "", err: true},
{in: "/", err: true},
{in: "//", err: true},
// Test schemes:
{in: "http://example.com", out: "http://example.com"},
{in: "HTTP://x.example.com", out: "http://x.example.com"},
{in: "http://localhost", out: "http://localhost"},
{in: "http://user.local", out: "http://user.local"},
{in: "http://kubernetes-service", out: "http://kubernetes-service"},
{in: "https://example.com", out: "https://example.com"},
{in: "HTTPS://example.com", out: "https://example.com"},
{in: "ssh://example.com:22", out: "ssh://example.com:22"},
{in: "jabber://example.com:5222", out: "jabber://example.com:5222"},
// Leading double slashes (any scheme) defaults to http:
{in: "//example.com", out: "http://example.com"},
// Empty scheme defaults to http:
{in: "localhost", out: "http://localhost"},
{in: "LOCALHOST", out: "http://localhost"},
{in: "localhost:80", out: "http://localhost:80"},
{in: "localhost:8080", out: "http://localhost:8080"},
{in: "user.local", out: "http://user.local"},
{in: "user.local:80", out: "http://user.local:80"},
{in: "user.local:8080", out: "http://user.local:8080"},
{in: "kubernetes-service", out: "http://kubernetes-service"},
{in: "kubernetes-service:80", out: "http://kubernetes-service:80"},
{in: "kubernetes-service:8080", out: "http://kubernetes-service:8080"},
{in: "127.0.0.1", out: "http://127.0.0.1"},
{in: "127.0.0.1:80", out: "http://127.0.0.1:80"},
{in: "127.0.0.1:8080", out: "http://127.0.0.1:8080"},
{in: "[2001:db8:a0b:12f0::1]", out: "http://[2001:db8:a0b:12f0::1]"},
{in: "[2001:db8:a0b:12f0::80]", out: "http://[2001:db8:a0b:12f0::80]"},
// Keep the port even on matching scheme:
{in: "http://localhost:80", out: "http://localhost:80"},
{in: "http://localhost:8080", out: "http://localhost:8080"},
{in: "http://x.example.io:8080", out: "http://x.example.io:8080"},
{in: "[2001:db8:a0b:12f0::80]:80", out: "http://[2001:db8:a0b:12f0::80]:80"},
{in: "[2001:db8:a0b:12f0::1]:8080", out: "http://[2001:db8:a0b:12f0::1]:8080"},
// Test domains, subdomains etc.:
{in: "example.com", out: "http://example.com"},
{in: "1.example.com", out: "http://1.example.com"},
{in: "1.example.io", out: "http://1.example.io"},
{in: "subsub.sub.example.com", out: "http://subsub.sub.example.com"},
{in: "subdomain_test.example.com", out: "http://subdomain_test.example.com"},
// Test userinfo:
{in: "user@example.com", out: "http://user@example.com"},
{in: "user:passwd@example.com", out: "http://user:passwd@example.com"},
{in: "https://user:passwd@subsub.sub.example.com", out: "https://user:passwd@subsub.sub.example.com"},
// Lowercase scheme and host by default. Let net/url normalize URL by default:
{in: "hTTp://subSUB.sub.EXAMPLE.COM/x//////y///foo.mp3?c=z&a=x&b=y#t=20", out: "http://subsub.sub.example.com/x//////y///foo.mp3?c=z&a=x&b=y#t=20"},
// IDNA Punycode domains.
// TODO: net/url escapes all the fields in String() method. Should we fix it?
{in: "http://www.žluťoučký-kůň.cz/úpěl-ďábelské-ódy", out: "http://www.%C5%BElu%C5%A5ou%C4%8Dk%C3%BD-k%C5%AF%C5%88.cz/%C3%BAp%C4%9Bl-%C4%8F%C3%A1belsk%C3%A9-%C3%B3dy"},
{in: "http://www.xn--luouk-k-z2a6lsyxjlexh.cz/úpěl-ďábelské-ódy", out: "http://www.xn--luouk-k-z2a6lsyxjlexh.cz/%C3%BAp%C4%9Bl-%C4%8F%C3%A1belsk%C3%A9-%C3%B3dy"},
{in: "http://żółć.pl/żółć.html", out: "http://%C5%BC%C3%B3%C5%82%C4%87.pl/%C5%BC%C3%B3%C5%82%C4%87.html"},
{in: "http://xn--kda4b0koi.pl/żółć.html", out: "http://xn--kda4b0koi.pl/%C5%BC%C3%B3%C5%82%C4%87.html"},
// IANA TLDs.
// TODO: net/url escapes all the fields in String() method. Should we fix it?
{in: "https://pressly.餐厅", out: "https://pressly.%E9%A4%90%E5%8E%85"},
{in: "https://pressly.组织机构", out: "https://pressly.%E7%BB%84%E7%BB%87%E6%9C%BA%E6%9E%84"},
// Some obviously wrong data:
{in: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==", err: true},
{in: "javascript:evilFunction()", err: true},
{in: "otherscheme:garbage", err: true},
{in: "", err: true},
{in: "http://www.google.com", out: "http://www.google.com"},
{in: "https://www.google.com", out: "https://www.google.com"},
{in: "HTTP://WWW.GOOGLE.COM", out: "http://www.google.com"},
{in: "HTTPS://WWW.google.COM", out: "https://www.google.com"},
{in: "http:/www.google.com", err: true},
{in: "http:///www.google.com", err: true},
{in: "javascript:void(0)", err: true},
{in: "