123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573 |
- #
- #
- # Nim's Runtime Library
- # (c) Copyright 2015 Dominik Picheta
- #
- # See the file "copying.txt", included in this
- # distribution, for details about the copyright.
- #
- ## This module implements URI parsing as specified by RFC 3986.
- ##
- ## A Uniform Resource Identifier (URI) provides a simple and extensible
- ## means for identifying a resource. A URI can be further classified
- ## as a locator, a name, or both. The term "Uniform Resource Locator"
- ## (URL) refers to the subset of URIs.
- ##
- ## .. warning:: URI parsers in this module do not perform security validation.
- ##
- ## # Basic usage
- ## ## Combine URIs
- runnableExamples:
- let host = parseUri("https://nim-lang.org")
- assert $host == "https://nim-lang.org"
- assert $(host / "/blog.html") == "https://nim-lang.org/blog.html"
- assert $(host / "blog2.html") == "https://nim-lang.org/blog2.html"
- ## ## Access URI item
- runnableExamples:
- let res = parseUri("sftp://127.0.0.1:4343")
- assert isAbsolute(res)
- assert res.port == "4343"
- ## ## Data URI Base64
- runnableExamples:
- assert getDataUri("Hello World", "text/plain") == "data:text/plain;charset=utf-8;base64,SGVsbG8gV29ybGQ="
- assert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt"
- import std/[strutils, parseutils, base64]
- import std/private/[since, decode_helpers]
- when defined(nimPreviewSlimSystem):
- import std/assertions
- type
- Url* = distinct string
- Uri* = object
- scheme*, username*, password*: string
- hostname*, port*, path*, query*, anchor*: string
- opaque*: bool
- isIpv6*: bool
- UriParseError* = object of ValueError
- proc uriParseError*(msg: string) {.noreturn.} =
- ## Raises a `UriParseError` exception with message `msg`.
- raise newException(UriParseError, msg)
- func encodeUrl*(s: string, usePlus = true): string =
- ## Encodes a URL according to RFC3986.
- ##
- ## This means that characters in the set
- ## `{'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~'}` are
- ## carried over to the result.
- ## All other characters are encoded as `%xx` where `xx`
- ## denotes its hexadecimal value.
- ##
- ## As a special rule, when the value of `usePlus` is true,
- ## spaces are encoded as `+` instead of `%20`.
- ##
- ## **See also:**
- ## * `decodeUrl func<#decodeUrl,string>`_
- runnableExamples:
- assert encodeUrl("https://nim-lang.org") == "https%3A%2F%2Fnim-lang.org"
- assert encodeUrl("https://nim-lang.org/this is a test") == "https%3A%2F%2Fnim-lang.org%2Fthis+is+a+test"
- assert encodeUrl("https://nim-lang.org/this is a test", false) == "https%3A%2F%2Fnim-lang.org%2Fthis%20is%20a%20test"
- result = newStringOfCap(s.len + s.len shr 2) # assume 12% non-alnum-chars
- let fromSpace = if usePlus: "+" else: "%20"
- for c in s:
- case c
- # https://tools.ietf.org/html/rfc3986#section-2.3
- of 'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~': add(result, c)
- of ' ': add(result, fromSpace)
- else:
- add(result, '%')
- add(result, toHex(ord(c), 2))
- func decodeUrl*(s: string, decodePlus = true): string =
- ## Decodes a URL according to RFC3986.
- ##
- ## This means that any `%xx` (where `xx` denotes a hexadecimal
- ## value) are converted to the character with ordinal number `xx`,
- ## and every other character is carried over.
- ## If `xx` is not a valid hexadecimal value, it is left intact.
- ##
- ## As a special rule, when the value of `decodePlus` is true, `+`
- ## characters are converted to a space.
- ##
- ## **See also:**
- ## * `encodeUrl func<#encodeUrl,string>`_
- runnableExamples:
- assert decodeUrl("https%3A%2F%2Fnim-lang.org") == "https://nim-lang.org"
- assert decodeUrl("https%3A%2F%2Fnim-lang.org%2Fthis+is+a+test") == "https://nim-lang.org/this is a test"
- assert decodeUrl("https%3A%2F%2Fnim-lang.org%2Fthis%20is%20a%20test",
- false) == "https://nim-lang.org/this is a test"
- assert decodeUrl("abc%xyz") == "abc%xyz"
- result = newString(s.len)
- var i = 0
- var j = 0
- while i < s.len:
- case s[i]
- of '%':
- result[j] = decodePercent(s, i)
- of '+':
- if decodePlus:
- result[j] = ' '
- else:
- result[j] = s[i]
- else: result[j] = s[i]
- inc(i)
- inc(j)
- setLen(result, j)
- func encodeQuery*(query: openArray[(string, string)], usePlus = true,
- omitEq = true, sep = '&'): string =
- ## Encodes a set of (key, value) parameters into a URL query string.
- ##
- ## Every (key, value) pair is URL-encoded and written as `key=value`. If the
- ## value is an empty string then the `=` is omitted, unless `omitEq` is
- ## false.
- ## The pairs are joined together by the `sep` character.
- ##
- ## The `usePlus` parameter is passed down to the `encodeUrl` function that
- ## is used for the URL encoding of the string values.
- ##
- ## **See also:**
- ## * `encodeUrl func<#encodeUrl,string>`_
- runnableExamples:
- assert encodeQuery({: }) == ""
- assert encodeQuery({"a": "1", "b": "2"}) == "a=1&b=2"
- assert encodeQuery({"a": "1", "b": ""}) == "a=1&b"
- assert encodeQuery({"a": "1", "b": ""}, omitEq = false, sep = ';') == "a=1;b="
- for elem in query:
- # Encode the `key = value` pairs and separate them with 'sep'
- if result.len > 0: result.add(sep)
- let (key, val) = elem
- result.add(encodeUrl(key, usePlus))
- # Omit the '=' if the value string is empty
- if not omitEq or val.len > 0:
- result.add('=')
- result.add(encodeUrl(val, usePlus))
- iterator decodeQuery*(data: string, sep = '&'): tuple[key, value: string] =
- ## Reads and decodes the query string `data` and yields the `(key, value)` pairs
- ## the data consists of. If compiled with `-d:nimLegacyParseQueryStrict`,
- ## a `UriParseError` is raised when there is an unencoded `=` character in a decoded
- ## value, which was the behavior in Nim < 1.5.1.
- runnableExamples:
- import std/sequtils
- assert toSeq(decodeQuery("foo=1&bar=2=3")) == @[("foo", "1"), ("bar", "2=3")]
- assert toSeq(decodeQuery("foo=1;bar=2=3", ';')) == @[("foo", "1"), ("bar", "2=3")]
- assert toSeq(decodeQuery("&a&=b&=&&")) == @[("", ""), ("a", ""), ("", "b"), ("", ""), ("", "")]
- proc parseData(data: string, i: int, field: var string, sep: char): int =
- result = i
- while result < data.len:
- let c = data[result]
- case c
- of '%': add(field, decodePercent(data, result))
- of '+': add(field, ' ')
- of '&': break
- else:
- if c == sep: break
- else: add(field, data[result])
- inc(result)
- var i = 0
- var name = ""
- var value = ""
- # decode everything in one pass:
- while i < data.len:
- setLen(name, 0) # reuse memory
- i = parseData(data, i, name, '=')
- setLen(value, 0) # reuse memory
- if i < data.len and data[i] == '=':
- inc(i) # skip '='
- when defined(nimLegacyParseQueryStrict):
- i = parseData(data, i, value, '=')
- else:
- i = parseData(data, i, value, sep)
- yield (name, value)
- if i < data.len:
- when defined(nimLegacyParseQueryStrict):
- if data[i] != '&':
- uriParseError("'&' expected at index '$#' for '$#'" % [$i, data])
- inc(i)
- func parseAuthority(authority: string, result: var Uri) =
- var i = 0
- var inPort = false
- var inIPv6 = false
- while i < authority.len:
- case authority[i]
- of '@':
- swap result.password, result.port
- result.port.setLen(0)
- swap result.username, result.hostname
- result.hostname.setLen(0)
- inPort = false
- of ':':
- if inIPv6:
- result.hostname.add(authority[i])
- else:
- inPort = true
- of '[':
- inIPv6 = true
- result.isIpv6 = true
- of ']':
- inIPv6 = false
- else:
- if inPort:
- result.port.add(authority[i])
- else:
- result.hostname.add(authority[i])
- i.inc
- func parsePath(uri: string, i: var int, result: var Uri) =
- i.inc parseUntil(uri, result.path, {'?', '#'}, i)
- # The 'mailto' scheme's PATH actually contains the hostname/username
- if cmpIgnoreCase(result.scheme, "mailto") == 0:
- parseAuthority(result.path, result)
- result.path.setLen(0)
- if i < uri.len and uri[i] == '?':
- i.inc # Skip '?'
- i.inc parseUntil(uri, result.query, {'#'}, i)
- if i < uri.len and uri[i] == '#':
- i.inc # Skip '#'
- i.inc parseUntil(uri, result.anchor, {}, i)
- func initUri*(isIpv6 = false): Uri =
- ## Initializes a URI with `scheme`, `username`, `password`,
- ## `hostname`, `port`, `path`, `query`, `anchor` and `isIpv6`.
- ##
- ## **See also:**
- ## * `Uri type <#Uri>`_ for available fields in the URI type
- runnableExamples:
- var uri2 = initUri(isIpv6 = true)
- uri2.scheme = "tcp"
- uri2.hostname = "2001:0db8:85a3:0000:0000:8a2e:0370:7334"
- uri2.port = "8080"
- assert $uri2 == "tcp://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080"
- result = Uri(scheme: "", username: "", password: "", hostname: "", port: "",
- path: "", query: "", anchor: "", isIpv6: isIpv6)
- func resetUri(uri: var Uri) =
- for f in uri.fields:
- when f is string:
- f.setLen(0)
- else:
- f = false
- func parseUri*(uri: string, result: var Uri) =
- ## Parses a URI. The `result` variable will be cleared before.
- ##
- ## **See also:**
- ## * `Uri type <#Uri>`_ for available fields in the URI type
- ## * `initUri func <#initUri>`_ for initializing a URI
- runnableExamples:
- var res = initUri()
- parseUri("https://nim-lang.org/docs/manual.html", res)
- assert res.scheme == "https"
- assert res.hostname == "nim-lang.org"
- assert res.path == "/docs/manual.html"
- resetUri(result)
- var i = 0
- # Check if this is a reference URI (relative URI)
- let doubleSlash = uri.len > 1 and uri[0] == '/' and uri[1] == '/'
- if i < uri.len and uri[i] == '/':
- # Make sure `uri` doesn't begin with '//'.
- if not doubleSlash:
- parsePath(uri, i, result)
- return
- # Scheme
- i.inc parseWhile(uri, result.scheme, Letters + Digits + {'+', '-', '.'}, i)
- if (i >= uri.len or uri[i] != ':') and not doubleSlash:
- # Assume this is a reference URI (relative URI)
- i = 0
- result.scheme.setLen(0)
- parsePath(uri, i, result)
- return
- if not doubleSlash:
- i.inc # Skip ':'
- # Authority
- if i+1 < uri.len and uri[i] == '/' and uri[i+1] == '/':
- i.inc(2) # Skip //
- var authority = ""
- i.inc parseUntil(uri, authority, {'/', '?', '#'}, i)
- if authority.len > 0:
- parseAuthority(authority, result)
- else:
- result.opaque = true
- # Path
- parsePath(uri, i, result)
- func parseUri*(uri: string): Uri =
- ## Parses a URI and returns it.
- ##
- ## **See also:**
- ## * `Uri type <#Uri>`_ for available fields in the URI type
- runnableExamples:
- let res = parseUri("ftp://Username:Password@Hostname")
- assert res.username == "Username"
- assert res.password == "Password"
- assert res.scheme == "ftp"
- result = initUri()
- parseUri(uri, result)
- func removeDotSegments(path: string): string =
- ## Collapses `..` and `.` in `path` in a similar way as done in `os.normalizedPath`
- ## Caution: this is buggy.
- runnableExamples:
- assert removeDotSegments("a1/a2/../a3/a4/a5/./a6/a7/.//./") == "a1/a3/a4/a5/a6/a7/"
- assert removeDotSegments("http://www.ai.") == "http://www.ai."
- # xxx adapt or reuse `pathnorm.normalizePath(path, '/')` to make this more reliable, but
- # taking into account url specificities such as not collapsing leading `//` in scheme
- # `https://`. see `turi` for failing tests.
- if path.len == 0: return ""
- var collection: seq[string] = @[]
- let endsWithSlash = path.endsWith '/'
- var i = 0
- var currentSegment = ""
- while i < path.len:
- case path[i]
- of '/':
- collection.add(currentSegment)
- currentSegment = ""
- of '.':
- if i+2 < path.len and path[i+1] == '.' and path[i+2] == '/':
- if collection.len > 0:
- discard collection.pop()
- i.inc 3
- continue
- elif i + 1 < path.len and path[i+1] == '/':
- i.inc 2
- continue
- currentSegment.add path[i]
- else:
- currentSegment.add path[i]
- i.inc
- if currentSegment != "":
- collection.add currentSegment
- result = collection.join("/")
- if endsWithSlash: result.add '/'
- func merge(base, reference: Uri): string =
- # http://tools.ietf.org/html/rfc3986#section-5.2.3
- if base.hostname != "" and base.path == "":
- '/' & reference.path
- else:
- let lastSegment = rfind(base.path, "/")
- if lastSegment == -1:
- reference.path
- else:
- base.path[0 .. lastSegment] & reference.path
- func combine*(base: Uri, reference: Uri): Uri =
- ## Combines a base URI with a reference URI.
- ##
- ## This uses the algorithm specified in
- ## `section 5.2.2 of RFC 3986 <http://tools.ietf.org/html/rfc3986#section-5.2.2>`_.
- ##
- ## This means that the slashes inside the base URIs path as well as reference
- ## URIs path affect the resulting URI.
- ##
- ## **See also:**
- ## * `/ func <#/,Uri,string>`_ for building URIs
- runnableExamples:
- let foo = combine(parseUri("https://nim-lang.org/foo/bar"), parseUri("/baz"))
- assert foo.path == "/baz"
- let bar = combine(parseUri("https://nim-lang.org/foo/bar"), parseUri("baz"))
- assert bar.path == "/foo/baz"
- let qux = combine(parseUri("https://nim-lang.org/foo/bar/"), parseUri("baz"))
- assert qux.path == "/foo/bar/baz"
- template setAuthority(dest, src): untyped =
- dest.hostname = src.hostname
- dest.username = src.username
- dest.port = src.port
- dest.password = src.password
- result = initUri()
- if reference.scheme != base.scheme and reference.scheme != "":
- result = reference
- result.path = removeDotSegments(result.path)
- else:
- if reference.hostname != "":
- setAuthority(result, reference)
- result.path = removeDotSegments(reference.path)
- result.query = reference.query
- else:
- if reference.path == "":
- result.path = base.path
- if reference.query != "":
- result.query = reference.query
- else:
- result.query = base.query
- else:
- if reference.path.startsWith("/"):
- result.path = removeDotSegments(reference.path)
- else:
- result.path = removeDotSegments(merge(base, reference))
- result.query = reference.query
- setAuthority(result, base)
- result.scheme = base.scheme
- result.anchor = reference.anchor
- func combine*(uris: varargs[Uri]): Uri =
- ## Combines multiple URIs together.
- ##
- ## **See also:**
- ## * `/ func <#/,Uri,string>`_ for building URIs
- runnableExamples:
- let foo = combine(parseUri("https://nim-lang.org/"), parseUri("docs/"),
- parseUri("manual.html"))
- assert foo.hostname == "nim-lang.org"
- assert foo.path == "/docs/manual.html"
- result = uris[0]
- for i in 1 ..< uris.len:
- result = combine(result, uris[i])
- func isAbsolute*(uri: Uri): bool =
- ## Returns true if URI is absolute, false otherwise.
- runnableExamples:
- assert parseUri("https://nim-lang.org").isAbsolute
- assert not parseUri("nim-lang").isAbsolute
- return uri.scheme != "" and (uri.hostname != "" or uri.path != "")
- func `/`*(x: Uri, path: string): Uri =
- ## Concatenates the path specified to the specified URIs path.
- ##
- ## Contrary to the `combine func <#combine,Uri,Uri>`_ you do not have to worry about
- ## the slashes at the beginning and end of the path and URIs path
- ## respectively.
- ##
- ## **See also:**
- ## * `combine func <#combine,Uri,Uri>`_
- runnableExamples:
- let foo = parseUri("https://nim-lang.org/foo/bar") / "/baz"
- assert foo.path == "/foo/bar/baz"
- let bar = parseUri("https://nim-lang.org/foo/bar") / "baz"
- assert bar.path == "/foo/bar/baz"
- let qux = parseUri("https://nim-lang.org/foo/bar/") / "baz"
- assert qux.path == "/foo/bar/baz"
- result = x
- if result.path.len == 0:
- if path.len == 0 or path[0] != '/':
- result.path = "/"
- result.path.add(path)
- return
- if result.path.len > 0 and result.path[result.path.len-1] == '/':
- if path.len > 0 and path[0] == '/':
- result.path.add(path[1 .. path.len-1])
- else:
- result.path.add(path)
- else:
- if path.len == 0 or path[0] != '/':
- result.path.add '/'
- result.path.add(path)
- func `?`*(u: Uri, query: openArray[(string, string)]): Uri =
- ## Concatenates the query parameters to the specified URI object.
- runnableExamples:
- let foo = parseUri("https://example.com") / "foo" ? {"bar": "qux"}
- assert $foo == "https://example.com/foo?bar=qux"
- result = u
- result.query = encodeQuery(query)
- func `$`*(u: Uri): string =
- ## Returns the string representation of the specified URI object.
- runnableExamples:
- assert $parseUri("https://nim-lang.org") == "https://nim-lang.org"
- # Get the len of all the parts.
- let schemeLen = u.scheme.len
- let usernameLen = u.username.len
- let passwordLen = u.password.len
- let hostnameLen = u.hostname.len
- let portLen = u.port.len
- let pathLen = u.path.len
- let queryLen = u.query.len
- let anchorLen = u.anchor.len
- # Prepare a string that fits all the parts and all punctuation chars.
- # 12 is the max len required by all possible punctuation chars.
- result = newStringOfCap(
- schemeLen + usernameLen + passwordLen + hostnameLen + portLen + pathLen + queryLen + anchorLen + 12
- )
- # Insert to result.
- if schemeLen > 0:
- result.add u.scheme
- result.add ':'
- if not u.opaque:
- result.add '/'
- result.add '/'
- if usernameLen > 0:
- result.add u.username
- if passwordLen > 0:
- result.add ':'
- result.add u.password
- result.add '@'
- if u.hostname.endsWith('/'):
- if u.isIpv6:
- result.add '['
- result.add u.hostname[0 .. ^2]
- result.add ']'
- else:
- result.add u.hostname[0 .. ^2]
- else:
- if u.isIpv6:
- result.add '['
- result.add u.hostname
- result.add ']'
- else:
- result.add u.hostname
- if portLen > 0:
- result.add ':'
- result.add u.port
- if pathLen > 0:
- if hostnameLen > 0 and u.path[0] != '/':
- result.add '/'
- result.add u.path
- if queryLen > 0:
- result.add '?'
- result.add u.query
- if anchorLen > 0:
- result.add '#'
- result.add u.anchor
- proc getDataUri*(data, mime: string, encoding = "utf-8"): string {.since: (1, 3).} =
- ## Convenience proc for `base64.encode` returns a standard Base64 Data URI (RFC-2397)
- ##
- ## **See also:**
- ## * `mimetypes <mimetypes.html>`_ for `mime` argument
- ## * https://tools.ietf.org/html/rfc2397
- ## * https://en.wikipedia.org/wiki/Data_URI_scheme
- runnableExamples: static: assert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt"
- assert encoding.len > 0 and mime.len > 0 # Must *not* be URL-Safe, see RFC-2397
- let base64encoded: string = base64.encode(data)
- # ("data:".len + ";charset=".len + ";base64,".len) == 22
- result = newStringOfCap(22 + mime.len + encoding.len + base64encoded.len)
- result.add "data:"
- result.add mime
- result.add ";charset="
- result.add encoding
- result.add ";base64,"
- result.add base64encoded
|