web.texi 74 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040
  1. @c -*-texinfo-*-
  2. @c This is part of the GNU Guile Reference Manual.
  3. @c Copyright (C) 2010, 2011, 2012, 2013, 2015, 2018, 2019, 2020 Free Software Foundation, Inc.
  4. @c See the file guile.texi for copying conditions.
  5. @node Web
  6. @section @acronym{HTTP}, the Web, and All That
  7. @cindex Web
  8. @cindex WWW
  9. @cindex HTTP
  10. It has always been possible to connect computers together and share
  11. information between them, but the rise of the World Wide Web over the
  12. last couple of decades has made it much easier to do so. The result is
  13. a richly connected network of computation, in which Guile forms a part.
  14. By ``the web'', we mean the HTTP protocol@footnote{Yes, the P is for
  15. protocol, but this phrase appears repeatedly in RFC 2616.} as handled by
  16. servers, clients, proxies, caches, and the various kinds of messages and
  17. message components that can be sent and received by that protocol,
  18. notably HTML.
  19. On one level, the web is text in motion: the protocols themselves are
  20. textual (though the payload may be binary), and it's possible to create
  21. a socket and speak text to the web. But such an approach is obviously
  22. primitive. This section details the higher-level data types and
  23. operations provided by Guile: URIs, HTTP request and response records,
  24. and a conventional web server implementation.
  25. The material in this section is arranged in ascending order, in which
  26. later concepts build on previous ones. If you prefer to start with the
  27. highest-level perspective, @pxref{Web Examples}, and work your way
  28. back.
  29. @menu
  30. * Types and the Web:: Types prevent bugs and security problems.
  31. * URIs:: Universal Resource Identifiers.
  32. * HTTP:: The Hyper-Text Transfer Protocol.
  33. * HTTP Headers:: How Guile represents specific header values.
  34. * Transfer Codings:: HTTP Transfer Codings.
  35. * Requests:: HTTP requests.
  36. * Responses:: HTTP responses.
  37. * Web Client:: Accessing web resources over HTTP.
  38. * Web Server:: Serving HTTP to the internet.
  39. * Web Examples:: How to use this thing.
  40. @end menu
  41. @node Types and the Web
  42. @subsection Types and the Web
  43. It is a truth universally acknowledged, that a program with good use of
  44. data types, will be free from many common bugs. Unfortunately, the
  45. common practice in web programming seems to ignore this maxim. This
  46. subsection makes the case for expressive data types in web programming.
  47. By ``expressive data types'', we mean that the data types @emph{say}
  48. something about how a program solves a problem. For example, if we
  49. choose to represent dates using SRFI 19 date records (@pxref{SRFI-19}),
  50. this indicates that there is a part of the program that will always have
  51. valid dates. Error handling for a number of basic cases, like invalid
  52. dates, occurs on the boundary in which we produce a SRFI 19 date record
  53. from other types, like strings.
  54. With regards to the web, data types are helpful in the two broad phases
  55. of HTTP messages: parsing and generation.
  56. Consider a server, which has to parse a request, and produce a response.
  57. Guile will parse the request into an HTTP request object
  58. (@pxref{Requests}), with each header parsed into an appropriate Scheme
  59. data type. This transition from an incoming stream of characters to
  60. typed data is a state change in a program---the strings might parse, or
  61. they might not, and something has to happen if they do not. (Guile
  62. throws an error in this case.) But after you have the parsed request,
  63. ``client'' code (code built on top of the Guile web framework) will not
  64. have to check for syntactic validity. The types already make this
  65. information manifest.
  66. This state change on the parsing boundary makes programs more robust,
  67. as they themselves are freed from the need to do a number of common
  68. error checks, and they can use normal Scheme procedures to handle a
  69. request instead of ad-hoc string parsers.
  70. The need for types on the response generation side (in a server) is more
  71. subtle, though not less important. Consider the example of a POST
  72. handler, which prints out the text that a user submits from a form.
  73. Such a handler might include a procedure like this:
  74. @example
  75. ;; First, a helper procedure
  76. (define (para . contents)
  77. (string-append "<p>" (string-concatenate contents) "</p>"))
  78. ;; Now the meat of our simple web application
  79. (define (you-said text)
  80. (para "You said: " text))
  81. (display (you-said "Hi!"))
  82. @print{} <p>You said: Hi!</p>
  83. @end example
  84. This is a perfectly valid implementation, provided that the incoming
  85. text does not contain the special HTML characters @samp{<}, @samp{>}, or
  86. @samp{&}. But this provision of a restricted character set is not
  87. reflected anywhere in the program itself: we must @emph{assume} that the
  88. programmer understands this, and performs the check elsewhere.
  89. Unfortunately, the short history of the practice of programming does not
  90. bear out this assumption. A @dfn{cross-site scripting} (@acronym{XSS})
  91. vulnerability is just such a common error in which unfiltered user input
  92. is allowed into the output. A user could submit a crafted comment to
  93. your web site which results in visitors running malicious Javascript,
  94. within the security context of your domain:
  95. @example
  96. (display (you-said "<script src=\"http://bad.com/nasty.js\" />"))
  97. @print{} <p>You said: <script src="http://bad.com/nasty.js" /></p>
  98. @end example
  99. The fundamental problem here is that both user data and the program
  100. template are represented using strings. This identity means that types
  101. can't help the programmer to make a distinction between these two, so
  102. they get confused.
  103. There are a number of possible solutions, but perhaps the best is to
  104. treat HTML not as strings, but as native s-expressions: as SXML. The
  105. basic idea is that HTML is either text, represented by a string, or an
  106. element, represented as a tagged list. So @samp{foo} becomes
  107. @samp{"foo"}, and @samp{<b>foo</b>} becomes @samp{(b "foo")}.
  108. Attributes, if present, go in a tagged list headed by @samp{@@}, like
  109. @samp{(img (@@ (src "http://example.com/foo.png")))}. @xref{SXML}, for
  110. more information.
  111. The good thing about SXML is that HTML elements cannot be confused with
  112. text. Let's make a new definition of @code{para}:
  113. @example
  114. (define (para . contents)
  115. `(p ,@@contents))
  116. (use-modules (sxml simple))
  117. (sxml->xml (you-said "Hi!"))
  118. @print{} <p>You said: Hi!</p>
  119. (sxml->xml (you-said "<i>Rats, foiled again!</i>"))
  120. @print{} <p>You said: &lt;i&gt;Rats, foiled again!&lt;/i&gt;</p>
  121. @end example
  122. So we see in the second example that HTML elements cannot be unwittingly
  123. introduced into the output. However it is now perfectly acceptable to
  124. pass SXML to @code{you-said}; in fact, that is the big advantage of SXML
  125. over everything-as-a-string.
  126. @example
  127. (sxml->xml (you-said (you-said "<Hi!>")))
  128. @print{} <p>You said: <p>You said: &lt;Hi!&gt;</p></p>
  129. @end example
  130. The SXML types allow procedures to @emph{compose}. The types make
  131. manifest which parts are HTML elements, and which are text. So you
  132. needn't worry about escaping user input; the type transition back to a
  133. string handles that for you. @acronym{XSS} vulnerabilities are a thing
  134. of the past.
  135. Well. That's all very nice and opinionated and such, but how do I use
  136. the thing? Read on!
  137. @node URIs
  138. @subsection Universal Resource Identifiers
  139. Guile provides a standard data type for Universal Resource Identifiers
  140. (URIs), as defined in RFC 3986.
  141. The generic URI syntax is as follows:
  142. @example
  143. URI-reference := [scheme ":"] ["//" [userinfo "@@"] host [":" port]] path \
  144. [ "?" query ] [ "#" fragment ]
  145. @end example
  146. For example, in the URI, @indicateurl{http://www.gnu.org/help/}, the
  147. scheme is @code{http}, the host is @code{www.gnu.org}, the path is
  148. @code{/help/}, and there is no userinfo, port, query, or fragment.
  149. Userinfo is something of an abstraction, as some legacy URI schemes
  150. allowed userinfo of the form @code{@var{username}:@var{passwd}}. But
  151. since passwords do not belong in URIs, the RFC does not want to condone
  152. this practice, so it calls anything before the @code{@@} sign
  153. @dfn{userinfo}.
  154. @example
  155. (use-modules (web uri))
  156. @end example
  157. The following procedures can be found in the @code{(web uri)}
  158. module. Load it into your Guile, using a form like the above, to have
  159. access to them.
  160. The most common way to build a URI from Scheme is with the
  161. @code{build-uri} function.
  162. @deffn {Scheme Procedure} build-uri scheme @
  163. [#:userinfo=@code{#f}] [#:host=@code{#f}] [#:port=@code{#f}] @
  164. [#:path=@code{""}] [#:query=@code{#f}] [#:fragment=@code{#f}] @
  165. [#:validate?=@code{#t}]
  166. Construct a URI. @var{scheme} should be a symbol, @var{port} either a
  167. positive, exact integer or @code{#f}, and the rest of the fields are
  168. either strings or @code{#f}. If @var{validate?} is true, also run some
  169. consistency checks to make sure that the constructed URI is valid.
  170. @end deffn
  171. @deffn {Scheme Procedure} uri? obj
  172. Return @code{#t} if @var{obj} is a URI.
  173. @end deffn
  174. Guile, URIs are represented as URI records, with a number of associated
  175. accessors.
  176. @deffn {Scheme Procedure} uri-scheme uri
  177. @deffnx {Scheme Procedure} uri-userinfo uri
  178. @deffnx {Scheme Procedure} uri-host uri
  179. @deffnx {Scheme Procedure} uri-port uri
  180. @deffnx {Scheme Procedure} uri-path uri
  181. @deffnx {Scheme Procedure} uri-query uri
  182. @deffnx {Scheme Procedure} uri-fragment uri
  183. Field accessors for the URI record type. The URI scheme will be a
  184. symbol, or @code{#f} if the object is a relative-ref (see below). The
  185. port will be either a positive, exact integer or @code{#f}, and the rest
  186. of the fields will be either strings or @code{#f} if not present.
  187. @end deffn
  188. @deffn {Scheme Procedure} string->uri string
  189. Parse @var{string} into a URI object. Return @code{#f} if the string
  190. could not be parsed.
  191. @end deffn
  192. @deffn {Scheme Procedure} uri->string uri [#:include-fragment?=@code{#t}]
  193. Serialize @var{uri} to a string. If the URI has a port that is the
  194. default port for its scheme, the port is not included in the
  195. serialization. If @var{include-fragment?} is given as false, the
  196. resulting string will omit the fragment (if any).
  197. @end deffn
  198. @deffn {Scheme Procedure} declare-default-port! scheme port
  199. Declare a default port for the given URI scheme.
  200. @end deffn
  201. @deffn {Scheme Procedure} uri-decode str [#:encoding=@code{"utf-8"}] [#:decode-plus-to-space? #t]
  202. Percent-decode the given @var{str}, according to @var{encoding}, which
  203. should be the name of a character encoding.
  204. Note that this function should not generally be applied to a full URI
  205. string. For paths, use @code{split-and-decode-uri-path} instead. For
  206. query strings, split the query on @code{&} and @code{=} boundaries, and
  207. decode the components separately.
  208. Note also that percent-encoded strings encode @emph{bytes}, not
  209. characters. There is no guarantee that a given byte sequence is a valid
  210. string encoding. Therefore this routine may signal an error if the
  211. decoded bytes are not valid for the given encoding. Pass @code{#f} for
  212. @var{encoding} if you want decoded bytes as a bytevector directly.
  213. @xref{Ports, @code{set-port-encoding!}}, for more information on
  214. character encodings.
  215. If @var{decode-plus-to-space?} is true, which is the default, also
  216. replace instances of the plus character @samp{+} with a space character.
  217. This is needed when parsing @code{application/x-www-form-urlencoded}
  218. data.
  219. Returns a string of the decoded characters, or a bytevector if
  220. @var{encoding} was @code{#f}.
  221. @end deffn
  222. @deffn {Scheme Procedure} uri-encode str [#:encoding=@code{"utf-8"}] [#:unescaped-chars]
  223. Percent-encode any character not in the character set,
  224. @var{unescaped-chars}.
  225. The default character set includes alphanumerics from ASCII, as well as
  226. the special characters @samp{-}, @samp{.}, @samp{_}, and @samp{~}. Any
  227. other character will be percent-encoded, by writing out the character to
  228. a bytevector within the given @var{encoding}, then encoding each byte as
  229. @code{%@var{HH}}, where @var{HH} is the hexadecimal representation of
  230. the byte.
  231. @end deffn
  232. @deffn {Scheme Procedure} split-and-decode-uri-path path
  233. Split @var{path} into its components, and decode each component,
  234. removing empty components.
  235. For example, @code{"/foo/bar%20baz/"} decodes to the two-element list,
  236. @code{("foo" "bar baz")}.
  237. @end deffn
  238. @deffn {Scheme Procedure} encode-and-join-uri-path parts
  239. URI-encode each element of @var{parts}, which should be a list of
  240. strings, and join the parts together with @code{/} as a delimiter.
  241. For example, the list @code{("scrambled eggs" "biscuits&gravy")} encodes
  242. as @code{"scrambled%20eggs/biscuits%26gravy"}.
  243. @end deffn
  244. @subsubheading Subtypes of URI
  245. As we noted above, not all URI objects have a scheme. You might have
  246. noted in the ``generic URI syntax'' example that the left-hand side of
  247. that grammar definition was URI-reference, not URI. A
  248. @dfn{URI-reference} is a generalization of a URI where the scheme is
  249. optional. If no scheme is specified, it is taken to be relative to some
  250. other related URI. A common use of URI references is when you want to
  251. be vague regarding the choice of HTTP or HTTPS -- serving a web page
  252. referring to @code{/foo.css} will use HTTPS if loaded over HTTPS, or
  253. HTTP otherwise.
  254. @deffn {Scheme Procedure} build-uri-reference [#:scheme=@code{#f}]@
  255. [#:userinfo=@code{#f}] [#:host=@code{#f}] [#:port=@code{#f}] @
  256. [#:path=@code{""}] [#:query=@code{#f}] [#:fragment=@code{#f}] @
  257. [#:validate?=@code{#t}]
  258. Like @code{build-uri}, but with an optional scheme.
  259. @end deffn
  260. @deffn {Scheme Procedure} uri-reference? obj
  261. Return @code{#t} if @var{obj} is a URI-reference. This is the most
  262. general URI predicate, as it includes not only full URIs that have
  263. schemes (those that match @code{uri?}) but also URIs without schemes.
  264. @end deffn
  265. It's also possible to build a @dfn{relative-ref}: a URI-reference that
  266. explicitly lacks a scheme.
  267. @deffn {Scheme Procedure} build-relative-ref @
  268. [#:userinfo=@code{#f}] [#:host=@code{#f}] [#:port=@code{#f}] @
  269. [#:path=@code{""}] [#:query=@code{#f}] [#:fragment=@code{#f}] @
  270. [#:validate?=@code{#t}]
  271. Like @code{build-uri}, but with no scheme.
  272. @end deffn
  273. @deffn {Scheme Procedure} relative-ref? obj
  274. Return @code{#t} if @var{obj} is a ``relative-ref'': a URI-reference
  275. that has no scheme. Every URI-reference will either match @code{uri?}
  276. or @code{relative-ref?} (but not both).
  277. @end deffn
  278. In case it's not clear from the above, the most general of these URI
  279. types is the URI-reference, with @code{build-uri-reference} as the most
  280. general constructor. @code{build-uri} and @code{build-relative-ref}
  281. enforce enforce specific restrictions on the URI-reference. The most
  282. generic URI parser is then @code{string->uri-reference}, and there is
  283. also a parser for when you know that you want a relative-ref.
  284. Note that @code{uri?} will only return @code{#t} for URI objects that
  285. have schemes; that is, it rejects relative-refs.
  286. @deffn {Scheme Procedure} string->uri-reference string
  287. Parse @var{string} into a URI object, while not requiring a scheme.
  288. Return @code{#f} if the string could not be parsed.
  289. @end deffn
  290. @deffn {Scheme Procedure} string->relative-ref string
  291. Parse @var{string} into a URI object, while asserting that no scheme is
  292. present. Return @code{#f} if the string could not be parsed.
  293. @end deffn
  294. @node HTTP
  295. @subsection The Hyper-Text Transfer Protocol
  296. The initial motivation for including web functionality in Guile, rather
  297. than rely on an external package, was to establish a standard base on
  298. which people can share code. To that end, we continue the focus on data
  299. types by providing a number of low-level parsers and unparsers for
  300. elements of the HTTP protocol.
  301. If you are want to skip the low-level details for now and move on to web
  302. pages, @pxref{Web Client}, and @pxref{Web Server}. Otherwise, load the
  303. HTTP module, and read on.
  304. @example
  305. (use-modules (web http))
  306. @end example
  307. The focus of the @code{(web http)} module is to parse and unparse
  308. standard HTTP headers, representing them to Guile as native data
  309. structures. For example, a @code{Date:} header will be represented as a
  310. SRFI-19 date record (@pxref{SRFI-19}), rather than as a string.
  311. Guile tries to follow RFCs fairly strictly---the road to perdition being
  312. paved with compatibility hacks---though some allowances are made for
  313. not-too-divergent texts.
  314. Header names are represented as lower-case symbols.
  315. @deffn {Scheme Procedure} string->header name
  316. Parse @var{name} to a symbolic header name.
  317. @end deffn
  318. @deffn {Scheme Procedure} header->string sym
  319. Return the string form for the header named @var{sym}.
  320. @end deffn
  321. For example:
  322. @example
  323. (string->header "Content-Length")
  324. @result{} content-length
  325. (header->string 'content-length)
  326. @result{} "Content-Length"
  327. (string->header "FOO")
  328. @result{} foo
  329. (header->string 'foo)
  330. @result{} "Foo"
  331. @end example
  332. Guile keeps a registry of known headers, their string names, and some
  333. parsing and serialization procedures. If a header is unknown, its
  334. string name is simply its symbol name in title-case.
  335. @deffn {Scheme Procedure} known-header? sym
  336. Return @code{#t} if @var{sym} is a known header, with associated
  337. parsers and serialization procedures, or @code{#f} otherwise.
  338. @end deffn
  339. @deffn {Scheme Procedure} header-parser sym
  340. Return the value parser for headers named @var{sym}. The result is a
  341. procedure that takes one argument, a string, and returns the parsed
  342. value. If the header isn't known to Guile, a default parser is returned
  343. that passes through the string unchanged.
  344. @end deffn
  345. @deffn {Scheme Procedure} header-validator sym
  346. Return a predicate which returns @code{#t} if the given value is valid
  347. for headers named @var{sym}. The default validator for unknown headers
  348. is @code{string?}.
  349. @end deffn
  350. @deffn {Scheme Procedure} header-writer sym
  351. Return a procedure that writes values for headers named @var{sym} to a
  352. port. The resulting procedure takes two arguments: a value and a port.
  353. The default writer is @code{display}.
  354. @end deffn
  355. For more on the set of headers that Guile knows about out of the box,
  356. @pxref{HTTP Headers}. To add your own, use the @code{declare-header!}
  357. procedure:
  358. @deffn {Scheme Procedure} declare-header! name parser validator writer @
  359. [#:multiple?=@code{#f}]
  360. Declare a parser, validator, and writer for a given header.
  361. @end deffn
  362. For example, let's say you are running a web server behind some sort of
  363. proxy, and your proxy adds an @code{X-Client-Address} header, indicating
  364. the IPv4 address of the original client. You would like for the HTTP
  365. request record to parse out this header to a Scheme value, instead of
  366. leaving it as a string. You could register this header with Guile's
  367. HTTP stack like this:
  368. @example
  369. (declare-header! "X-Client-Address"
  370. (lambda (str)
  371. (inet-pton AF_INET str))
  372. (lambda (ip)
  373. (and (integer? ip) (exact? ip) (<= 0 ip #xffffffff)))
  374. (lambda (ip port)
  375. (display (inet-ntop AF_INET ip) port)))
  376. @end example
  377. @deffn {Scheme Procedure} declare-opaque-header! name
  378. A specialised version of @code{declare-header!} for the case in which
  379. you want a header's value to be returned/written ``as-is''.
  380. @end deffn
  381. @deffn {Scheme Procedure} valid-header? sym val
  382. Return a true value if @var{val} is a valid Scheme value for the header
  383. with name @var{sym}, or @code{#f} otherwise.
  384. @end deffn
  385. Now that we have a generic interface for reading and writing headers, we
  386. do just that.
  387. @deffn {Scheme Procedure} read-header port
  388. Read one HTTP header from @var{port}. Return two values: the header
  389. name and the parsed Scheme value. May raise an exception if the header
  390. was known but the value was invalid.
  391. Returns the end-of-file object for both values if the end of the message
  392. body was reached (i.e., a blank line).
  393. @end deffn
  394. @deffn {Scheme Procedure} parse-header name val
  395. Parse @var{val}, a string, with the parser for the header named
  396. @var{name}. Returns the parsed value.
  397. @end deffn
  398. @deffn {Scheme Procedure} write-header name val port
  399. Write the given header name and value to @var{port}, using the writer
  400. from @code{header-writer}.
  401. @end deffn
  402. @deffn {Scheme Procedure} read-headers port
  403. Read the headers of an HTTP message from @var{port}, returning them
  404. as an ordered alist.
  405. @end deffn
  406. @deffn {Scheme Procedure} write-headers headers port
  407. Write the given header alist to @var{port}. Doesn't write the final
  408. @samp{\r\n}, as the user might want to add another header.
  409. @end deffn
  410. The @code{(web http)} module also has some utility procedures to read
  411. and write request and response lines.
  412. @deffn {Scheme Procedure} parse-http-method str [start] [end]
  413. Parse an HTTP method from @var{str}. The result is an upper-case symbol,
  414. like @code{GET}.
  415. @end deffn
  416. @deffn {Scheme Procedure} parse-http-version str [start] [end]
  417. Parse an HTTP version from @var{str}, returning it as a major--minor
  418. pair. For example, @code{HTTP/1.1} parses as the pair of integers,
  419. @code{(1 . 1)}.
  420. @end deffn
  421. @deffn {Scheme Procedure} parse-request-uri str [start] [end]
  422. Parse a URI from an HTTP request line. Note that URIs in requests do not
  423. have to have a scheme or host name. The result is a URI object.
  424. @end deffn
  425. @deffn {Scheme Procedure} read-request-line port
  426. Read the first line of an HTTP request from @var{port}, returning three
  427. values: the method, the URI, and the version.
  428. @end deffn
  429. @deffn {Scheme Procedure} write-request-line method uri version port
  430. Write the first line of an HTTP request to @var{port}.
  431. @end deffn
  432. @deffn {Scheme Procedure} read-response-line port
  433. Read the first line of an HTTP response from @var{port}, returning three
  434. values: the HTTP version, the response code, and the ``reason phrase''.
  435. @end deffn
  436. @deffn {Scheme Procedure} write-response-line version code reason-phrase port
  437. Write the first line of an HTTP response to @var{port}.
  438. @end deffn
  439. @node HTTP Headers
  440. @subsection HTTP Headers
  441. In addition to defining the infrastructure to parse headers, the
  442. @code{(web http)} module defines specific parsers and unparsers for all
  443. headers defined in the HTTP/1.1 standard.
  444. For example, if you receive a header named @samp{Accept-Language} with a
  445. value @samp{en, es;q=0.8}, Guile parses it as a quality list (defined
  446. below):
  447. @example
  448. (parse-header 'accept-language "en, es;q=0.8")
  449. @result{} ((1000 . "en") (800 . "es"))
  450. @end example
  451. The format of the value for @samp{Accept-Language} headers is defined
  452. below, along with all other headers defined in the HTTP standard. (If
  453. the header were unknown, the value would have been returned as a
  454. string.)
  455. For brevity, the header definitions below are given in the form,
  456. @var{Type} @code{@var{name}}, indicating that values for the header
  457. @code{@var{name}} will be of the given @var{Type}. Since Guile
  458. internally treats header names in lower case, in this document we give
  459. types title-cased names. A short description of the each header's
  460. purpose and an example follow.
  461. For full details on the meanings of all of these headers, see the HTTP
  462. 1.1 standard, RFC 2616.
  463. @subsubsection HTTP Header Types
  464. Here we define the types that are used below, when defining headers.
  465. @deftp {HTTP Header Type} Date
  466. A SRFI-19 date.
  467. @end deftp
  468. @deftp {HTTP Header Type} KVList
  469. A list whose elements are keys or key-value pairs. Keys are parsed to
  470. symbols. Values are strings by default. Non-string values are the
  471. exception, and are mentioned explicitly below, as appropriate.
  472. @end deftp
  473. @deftp {HTTP Header Type} SList
  474. A list of strings.
  475. @end deftp
  476. @deftp {HTTP Header Type} Quality
  477. An exact integer between 0 and 1000. Qualities are used to express
  478. preference, given multiple options. An option with a quality of 870,
  479. for example, is preferred over an option with quality 500.
  480. (Qualities are written out over the wire as numbers between 0.0 and
  481. 1.0, but since the standard only allows three digits after the decimal,
  482. it's equivalent to integers between 0 and 1000, so that's what Guile
  483. uses.)
  484. @end deftp
  485. @deftp {HTTP Header Type} QList
  486. A quality list: a list of pairs, the car of which is a quality, and the
  487. cdr a string. Used to express a list of options, along with their
  488. qualities.
  489. @end deftp
  490. @deftp {HTTP Header Type} ETag
  491. An entity tag, represented as a pair. The car of the pair is an opaque
  492. string, and the cdr is @code{#t} if the entity tag is a ``strong'' entity
  493. tag, and @code{#f} otherwise.
  494. @end deftp
  495. @subsubsection General Headers
  496. General HTTP headers may be present in any HTTP message.
  497. @deftypevr {HTTP Header} KVList cache-control
  498. A key-value list of cache-control directives. See RFC 2616, for more
  499. details.
  500. If present, parameters to @code{max-age}, @code{max-stale},
  501. @code{min-fresh}, and @code{s-maxage} are all parsed as non-negative
  502. integers.
  503. If present, parameters to @code{private} and @code{no-cache} are parsed
  504. as lists of header names, as symbols.
  505. @example
  506. (parse-header 'cache-control "no-cache,no-store"
  507. @result{} (no-cache no-store)
  508. (parse-header 'cache-control "no-cache=\"Authorization,Date\",no-store"
  509. @result{} ((no-cache . (authorization date)) no-store)
  510. (parse-header 'cache-control "no-cache=\"Authorization,Date\",max-age=10"
  511. @result{} ((no-cache . (authorization date)) (max-age . 10))
  512. @end example
  513. @end deftypevr
  514. @deftypevr {HTTP Header} List connection
  515. A list of header names that apply only to this HTTP connection, as
  516. symbols. Additionally, the symbol @samp{close} may be present, to
  517. indicate that the server should close the connection after responding to
  518. the request.
  519. @example
  520. (parse-header 'connection "close")
  521. @result{} (close)
  522. @end example
  523. @end deftypevr
  524. @deftypevr {HTTP Header} Date date
  525. The date that a given HTTP message was originated.
  526. @example
  527. (parse-header 'date "Tue, 15 Nov 1994 08:12:31 GMT")
  528. @result{} #<date ...>
  529. @end example
  530. @end deftypevr
  531. @deftypevr {HTTP Header} KVList pragma
  532. A key-value list of implementation-specific directives.
  533. @example
  534. (parse-header 'pragma "no-cache, broccoli=tasty")
  535. @result{} (no-cache (broccoli . "tasty"))
  536. @end example
  537. @end deftypevr
  538. @deftypevr {HTTP Header} List trailer
  539. A list of header names which will appear after the message body, instead
  540. of with the message headers.
  541. @example
  542. (parse-header 'trailer "ETag")
  543. @result{} (etag)
  544. @end example
  545. @end deftypevr
  546. @deftypevr {HTTP Header} List transfer-encoding
  547. A list of transfer codings, expressed as key-value lists. The only
  548. transfer coding defined by the specification is @code{chunked}.
  549. @example
  550. (parse-header 'transfer-encoding "chunked")
  551. @result{} ((chunked))
  552. @end example
  553. @end deftypevr
  554. @deftypevr {HTTP Header} List upgrade
  555. A list of strings, indicating additional protocols that a server could use
  556. in response to a request.
  557. @example
  558. (parse-header 'upgrade "WebSocket")
  559. @result{} ("WebSocket")
  560. @end example
  561. @end deftypevr
  562. FIXME: parse out more fully?
  563. @deftypevr {HTTP Header} List via
  564. A list of strings, indicating the protocol versions and hosts of
  565. intermediate servers and proxies. There may be multiple @code{via}
  566. headers in one message.
  567. @example
  568. (parse-header 'via "1.0 venus, 1.1 mars")
  569. @result{} ("1.0 venus" "1.1 mars")
  570. @end example
  571. @end deftypevr
  572. @deftypevr {HTTP Header} List warning
  573. A list of warnings given by a server or intermediate proxy. Each
  574. warning is a itself a list of four elements: a code, as an exact integer
  575. between 0 and 1000, a host as a string, the warning text as a string,
  576. and either @code{#f} or a SRFI-19 date.
  577. There may be multiple @code{warning} headers in one message.
  578. @example
  579. (parse-header 'warning "123 foo \"core breach imminent\"")
  580. @result{} ((123 "foo" "core-breach imminent" #f))
  581. @end example
  582. @end deftypevr
  583. @subsubsection Entity Headers
  584. Entity headers may be present in any HTTP message, and refer to the
  585. resource referenced in the HTTP request or response.
  586. @deftypevr {HTTP Header} List allow
  587. A list of allowed methods on a given resource, as symbols.
  588. @example
  589. (parse-header 'allow "GET, HEAD")
  590. @result{} (GET HEAD)
  591. @end example
  592. @end deftypevr
  593. @deftypevr {HTTP Header} List content-encoding
  594. A list of content codings, as symbols.
  595. @example
  596. (parse-header 'content-encoding "gzip")
  597. @result{} (gzip)
  598. @end example
  599. @end deftypevr
  600. @deftypevr {HTTP Header} List content-language
  601. The languages that a resource is in, as strings.
  602. @example
  603. (parse-header 'content-language "en")
  604. @result{} ("en")
  605. @end example
  606. @end deftypevr
  607. @deftypevr {HTTP Header} UInt content-length
  608. The number of bytes in a resource, as an exact, non-negative integer.
  609. @example
  610. (parse-header 'content-length "300")
  611. @result{} 300
  612. @end example
  613. @end deftypevr
  614. @deftypevr {HTTP Header} URI content-location
  615. The canonical URI for a resource, in the case that it is also accessible
  616. from a different URI.
  617. @example
  618. (parse-header 'content-location "http://example.com/foo")
  619. @result{} #<<uri> ...>
  620. @end example
  621. @end deftypevr
  622. @deftypevr {HTTP Header} String content-md5
  623. The MD5 digest of a resource.
  624. @example
  625. (parse-header 'content-md5 "ffaea1a79810785575e29e2bd45e2fa5")
  626. @result{} "ffaea1a79810785575e29e2bd45e2fa5"
  627. @end example
  628. @end deftypevr
  629. @deftypevr {HTTP Header} List content-range
  630. Range specification as a list of three elements: the symbol
  631. @code{bytes}, either the symbol @code{*} or a pair of integers
  632. indicating the byte range, and either @code{*} or an integer indicating
  633. the instance length. Used to indicate that a response only includes
  634. part of a resource.
  635. @example
  636. (parse-header 'content-range "bytes 10-20/*")
  637. @result{} (bytes (10 . 20) *)
  638. @end example
  639. @end deftypevr
  640. @deftypevr {HTTP Header} List content-type
  641. The MIME type of a resource, as a symbol, along with any parameters.
  642. @example
  643. (parse-header 'content-type "text/plain")
  644. @result{} (text/plain)
  645. (parse-header 'content-type "text/plain;charset=utf-8")
  646. @result{} (text/plain (charset . "utf-8"))
  647. @end example
  648. Note that the @code{charset} parameter is something of a misnomer, and
  649. the HTTP specification admits this. It specifies the @emph{encoding} of
  650. the characters, not the character set.
  651. @end deftypevr
  652. @deftypevr {HTTP Header} Date expires
  653. The date/time after which the resource given in a response is considered
  654. stale.
  655. @example
  656. (parse-header 'expires "Tue, 15 Nov 1994 08:12:31 GMT")
  657. @result{} #<date ...>
  658. @end example
  659. @end deftypevr
  660. @deftypevr {HTTP Header} Date last-modified
  661. The date/time on which the resource given in a response was last
  662. modified.
  663. @example
  664. (parse-header 'expires "Tue, 15 Nov 1994 08:12:31 GMT")
  665. @result{} #<date ...>
  666. @end example
  667. @end deftypevr
  668. @subsubsection Request Headers
  669. Request headers may only appear in an HTTP request, not in a response.
  670. @deftypevr {HTTP Header} List accept
  671. A list of preferred media types for a response. Each element of the
  672. list is itself a list, in the same format as @code{content-type}.
  673. @example
  674. (parse-header 'accept "text/html,text/plain;charset=utf-8")
  675. @result{} ((text/html) (text/plain (charset . "utf-8")))
  676. @end example
  677. Preference is expressed with quality values:
  678. @example
  679. (parse-header 'accept "text/html;q=0.8,text/plain;q=0.6")
  680. @result{} ((text/html (q . 800)) (text/plain (q . 600)))
  681. @end example
  682. @end deftypevr
  683. @deftypevr {HTTP Header} QList accept-charset
  684. A quality list of acceptable charsets. Note again that what HTTP calls
  685. a ``charset'' is what Guile calls a ``character encoding''.
  686. @example
  687. (parse-header 'accept-charset "iso-8859-5, unicode-1-1;q=0.8")
  688. @result{} ((1000 . "iso-8859-5") (800 . "unicode-1-1"))
  689. @end example
  690. @end deftypevr
  691. @deftypevr {HTTP Header} QList accept-encoding
  692. A quality list of acceptable content codings.
  693. @example
  694. (parse-header 'accept-encoding "gzip,identity=0.8")
  695. @result{} ((1000 . "gzip") (800 . "identity"))
  696. @end example
  697. @end deftypevr
  698. @deftypevr {HTTP Header} QList accept-language
  699. A quality list of acceptable languages.
  700. @example
  701. (parse-header 'accept-language "cn,en=0.75")
  702. @result{} ((1000 . "cn") (750 . "en"))
  703. @end example
  704. @end deftypevr
  705. @deftypevr {HTTP Header} Pair authorization
  706. Authorization credentials. The car of the pair indicates the
  707. authentication scheme, like @code{basic}. For basic authentication, the
  708. cdr of the pair will be the base64-encoded @samp{@var{user}:@var{pass}}
  709. string. For other authentication schemes, like @code{digest}, the cdr
  710. will be a key-value list of credentials.
  711. @example
  712. (parse-header 'authorization "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ=="
  713. @result{} (basic . "QWxhZGRpbjpvcGVuIHNlc2FtZQ==")
  714. @end example
  715. @end deftypevr
  716. @deftypevr {HTTP Header} List expect
  717. A list of expectations that a client has of a server. The expectations
  718. are key-value lists.
  719. @example
  720. (parse-header 'expect "100-continue")
  721. @result{} ((100-continue))
  722. @end example
  723. @end deftypevr
  724. @deftypevr {HTTP Header} String from
  725. The email address of a user making an HTTP request.
  726. @example
  727. (parse-header 'from "bob@@example.com")
  728. @result{} "bob@@example.com"
  729. @end example
  730. @end deftypevr
  731. @deftypevr {HTTP Header} Pair host
  732. The host for the resource being requested, as a hostname-port pair. If
  733. no port is given, the port is @code{#f}.
  734. @example
  735. (parse-header 'host "gnu.org:80")
  736. @result{} ("gnu.org" . 80)
  737. (parse-header 'host "gnu.org")
  738. @result{} ("gnu.org" . #f)
  739. @end example
  740. @end deftypevr
  741. @deftypevr {HTTP Header} *|List if-match
  742. A set of etags, indicating that the request should proceed if and only
  743. if the etag of the resource is in that set. Either the symbol @code{*},
  744. indicating any etag, or a list of entity tags.
  745. @example
  746. (parse-header 'if-match "*")
  747. @result{} *
  748. (parse-header 'if-match "asdfadf")
  749. @result{} (("asdfadf" . #t))
  750. (parse-header 'if-match W/"asdfadf")
  751. @result{} (("asdfadf" . #f))
  752. @end example
  753. @end deftypevr
  754. @deftypevr {HTTP Header} Date if-modified-since
  755. Indicates that a response should proceed if and only if the resource has
  756. been modified since the given date.
  757. @example
  758. (parse-header 'if-modified-since "Tue, 15 Nov 1994 08:12:31 GMT")
  759. @result{} #<date ...>
  760. @end example
  761. @end deftypevr
  762. @deftypevr {HTTP Header} *|List if-none-match
  763. A set of etags, indicating that the request should proceed if and only
  764. if the etag of the resource is not in the set. Either the symbol
  765. @code{*}, indicating any etag, or a list of entity tags.
  766. @example
  767. (parse-header 'if-none-match "*")
  768. @result{} *
  769. @end example
  770. @end deftypevr
  771. @deftypevr {HTTP Header} ETag|Date if-range
  772. Indicates that the range request should proceed if and only if the
  773. resource matches a modification date or an etag. Either an entity tag,
  774. or a SRFI-19 date.
  775. @example
  776. (parse-header 'if-range "\"original-etag\"")
  777. @result{} ("original-etag" . #t)
  778. @end example
  779. @end deftypevr
  780. @deftypevr {HTTP Header} Date if-unmodified-since
  781. Indicates that a response should proceed if and only if the resource has
  782. not been modified since the given date.
  783. @example
  784. (parse-header 'if-not-modified-since "Tue, 15 Nov 1994 08:12:31 GMT")
  785. @result{} #<date ...>
  786. @end example
  787. @end deftypevr
  788. @deftypevr {HTTP Header} UInt max-forwards
  789. The maximum number of proxy or gateway hops that a request should be
  790. subject to.
  791. @example
  792. (parse-header 'max-forwards "10")
  793. @result{} 10
  794. @end example
  795. @end deftypevr
  796. @deftypevr {HTTP Header} Pair proxy-authorization
  797. Authorization credentials for a proxy connection. See the documentation
  798. for @code{authorization} above for more information on the format.
  799. @example
  800. (parse-header 'proxy-authorization "Digest foo=bar,baz=qux"
  801. @result{} (digest (foo . "bar") (baz . "qux"))
  802. @end example
  803. @end deftypevr
  804. @deftypevr {HTTP Header} Pair range
  805. A range request, indicating that the client wants only part of a
  806. resource. The car of the pair is the symbol @code{bytes}, and the cdr
  807. is a list of pairs. Each element of the cdr indicates a range; the car
  808. is the first byte position and the cdr is the last byte position, as
  809. integers, or @code{#f} if not given.
  810. @example
  811. (parse-header 'range "bytes=10-30,50-")
  812. @result{} (bytes (10 . 30) (50 . #f))
  813. @end example
  814. @end deftypevr
  815. @deftypevr {HTTP Header} URI referer
  816. The URI of the resource that referred the user to this resource. The
  817. name of the header is a misspelling, but we are stuck with it.
  818. @example
  819. (parse-header 'referer "http://www.gnu.org/")
  820. @result{} #<uri ...>
  821. @end example
  822. @end deftypevr
  823. @deftypevr {HTTP Header} List te
  824. A list of transfer codings, expressed as key-value lists. A common
  825. transfer coding is @code{trailers}.
  826. @example
  827. (parse-header 'te "trailers")
  828. @result{} ((trailers))
  829. @end example
  830. @end deftypevr
  831. @deftypevr {HTTP Header} String user-agent
  832. A string indicating the user agent making the request. The
  833. specification defines a structured format for this header, but it is
  834. widely disregarded, so Guile does not attempt to parse strictly.
  835. @example
  836. (parse-header 'user-agent "Mozilla/5.0")
  837. @result{} "Mozilla/5.0"
  838. @end example
  839. @end deftypevr
  840. @subsubsection Response Headers
  841. @deftypevr {HTTP Header} List accept-ranges
  842. A list of range units that the server supports, as symbols.
  843. @example
  844. (parse-header 'accept-ranges "bytes")
  845. @result{} (bytes)
  846. @end example
  847. @end deftypevr
  848. @deftypevr {HTTP Header} UInt age
  849. The age of a cached response, in seconds.
  850. @example
  851. (parse-header 'age "3600")
  852. @result{} 3600
  853. @end example
  854. @end deftypevr
  855. @deftypevr {HTTP Header} ETag etag
  856. The entity-tag of the resource.
  857. @example
  858. (parse-header 'etag "\"foo\"")
  859. @result{} ("foo" . #t)
  860. @end example
  861. @end deftypevr
  862. @deftypevr {HTTP Header} URI-reference location
  863. A URI reference on which a request may be completed. Used in
  864. combination with a redirecting status code to perform client-side
  865. redirection.
  866. @example
  867. (parse-header 'location "http://example.com/other")
  868. @result{} #<uri ...>
  869. @end example
  870. @end deftypevr
  871. @deftypevr {HTTP Header} List proxy-authenticate
  872. A list of challenges to a proxy, indicating the need for authentication.
  873. @example
  874. (parse-header 'proxy-authenticate "Basic realm=\"foo\"")
  875. @result{} ((basic (realm . "foo")))
  876. @end example
  877. @end deftypevr
  878. @deftypevr {HTTP Header} UInt|Date retry-after
  879. Used in combination with a server-busy status code, like 503, to
  880. indicate that a client should retry later. Either a number of seconds,
  881. or a date.
  882. @example
  883. (parse-header 'retry-after "60")
  884. @result{} 60
  885. @end example
  886. @end deftypevr
  887. @deftypevr {HTTP Header} String server
  888. A string identifying the server.
  889. @example
  890. (parse-header 'server "My first web server")
  891. @result{} "My first web server"
  892. @end example
  893. @end deftypevr
  894. @deftypevr {HTTP Header} *|List vary
  895. A set of request headers that were used in computing this response.
  896. Used to indicate that server-side content negotiation was performed, for
  897. example in response to the @code{accept-language} header. Can also be
  898. the symbol @code{*}, indicating that all headers were considered.
  899. @example
  900. (parse-header 'vary "Accept-Language, Accept")
  901. @result{} (accept-language accept)
  902. @end example
  903. @end deftypevr
  904. @deftypevr {HTTP Header} List www-authenticate
  905. A list of challenges to a user, indicating the need for authentication.
  906. @example
  907. (parse-header 'www-authenticate "Basic realm=\"foo\"")
  908. @result{} ((basic (realm . "foo")))
  909. @end example
  910. @end deftypevr
  911. @node Transfer Codings
  912. @subsection Transfer Codings
  913. HTTP 1.1 allows for various transfer codings to be applied to message
  914. bodies. These include various types of compression, and HTTP chunked
  915. encoding. Currently, only chunked encoding is supported by guile.
  916. Chunked coding is an optional coding that may be applied to message
  917. bodies, to allow messages whose length is not known beforehand to be
  918. returned. Such messages can be split into chunks, terminated by a final
  919. zero length chunk.
  920. In order to make dealing with encodings more simple, guile provides
  921. procedures to create ports that ``wrap'' existing ports, applying
  922. transformations transparently under the hood.
  923. These procedures are in the @code{(web http)} module.
  924. @example
  925. (use-modules (web http))
  926. @end example
  927. @deffn {Scheme Procedure} make-chunked-input-port port [#:keep-alive?=#f]
  928. Returns a new port, that transparently reads and decodes chunk-encoded
  929. data from @var{port}. If no more chunk-encoded data is available, it
  930. returns the end-of-file object. When the port is closed, @var{port} will
  931. also be closed, unless @var{keep-alive?} is true.
  932. @end deffn
  933. @example
  934. (use-modules (ice-9 rdelim))
  935. (define s "5\r\nFirst\r\nA\r\n line\n Sec\r\n8\r\nond line\r\n0\r\n")
  936. (define p (make-chunked-input-port (open-input-string s)))
  937. (read-line s)
  938. @result{} "First line"
  939. (read-line s)
  940. @result{} "Second line"
  941. @end example
  942. @deffn {Scheme Procedure} make-chunked-output-port port [#:keep-alive?=#f]
  943. Returns a new port, which transparently encodes data as chunk-encoded
  944. before writing it to @var{port}. Whenever a write occurs on this port,
  945. it buffers it, until the port is flushed, at which point it writes a
  946. chunk containing all the data written so far. When the port is closed,
  947. the data remaining is written to @var{port}, as is the terminating zero
  948. chunk. It also causes @var{port} to be closed, unless @var{keep-alive?}
  949. is true.
  950. Note. Forcing a chunked output port when there is no data is buffered
  951. does not write a zero chunk, as this would cause the data to be
  952. interpreted incorrectly by the client.
  953. @end deffn
  954. @example
  955. (call-with-output-string
  956. (lambda (out)
  957. (define out* (make-chunked-output-port out #:keep-alive? #t))
  958. (display "first chunk" out*)
  959. (force-output out*)
  960. (force-output out*) ; note this does not write a zero chunk
  961. (display "second chunk" out*)
  962. (close-port out*)))
  963. @result{} "b\r\nfirst chunk\r\nc\r\nsecond chunk\r\n0\r\n"
  964. @end example
  965. @node Requests
  966. @subsection HTTP Requests
  967. @example
  968. (use-modules (web request))
  969. @end example
  970. The request module contains a data type for HTTP requests.
  971. @subsubsection An Important Note on Character Sets
  972. HTTP requests consist of two parts: the request proper, consisting of a
  973. request line and a set of headers, and (optionally) a body. The body
  974. might have a binary content-type, and even in the textual case its
  975. length is specified in bytes, not characters.
  976. Therefore, HTTP is a fundamentally binary protocol. However the request
  977. line and headers are specified to be in a subset of ASCII, so they can
  978. be treated as text, provided that the port's encoding is set to an
  979. ASCII-compatible one-byte-per-character encoding. ISO-8859-1 (latin-1)
  980. is just such an encoding, and happens to be very efficient for Guile.
  981. So what Guile does when reading requests from the wire, or writing them
  982. out, is to set the port's encoding to latin-1, and treating the request
  983. headers as text.
  984. The request body is another issue. For binary data, the data is
  985. probably in a bytevector, so we use the R6RS binary output procedures to
  986. write out the binary payload. Textual data usually has to be written
  987. out to some character encoding, usually UTF-8, and then the resulting
  988. bytevector is written out to the port.
  989. In summary, Guile reads and writes HTTP over latin-1 sockets, without
  990. any loss of generality.
  991. @subsubsection Request API
  992. @deffn {Scheme Procedure} request? obj
  993. @deffnx {Scheme Procedure} request-method request
  994. @deffnx {Scheme Procedure} request-uri request
  995. @deffnx {Scheme Procedure} request-version request
  996. @deffnx {Scheme Procedure} request-headers request
  997. @deffnx {Scheme Procedure} request-meta request
  998. @deffnx {Scheme Procedure} request-port request
  999. A predicate and field accessors for the request type. The fields are as
  1000. follows:
  1001. @table @code
  1002. @item method
  1003. The HTTP method, for example, @code{GET}.
  1004. @item uri
  1005. The URI as a URI record.
  1006. @item version
  1007. The HTTP version pair, like @code{(1 . 1)}.
  1008. @item headers
  1009. The request headers, as an alist of parsed values.
  1010. @item meta
  1011. An arbitrary alist of other data, for example information returned in
  1012. the @code{sockaddr} from @code{accept} (@pxref{Network Sockets and
  1013. Communication}).
  1014. @item port
  1015. The port on which to read or write a request body, if any.
  1016. @end table
  1017. @end deffn
  1018. @deffn {Scheme Procedure} read-request port [meta='()]
  1019. Read an HTTP request from @var{port}, optionally attaching the given
  1020. metadata, @var{meta}.
  1021. As a side effect, sets the encoding on @var{port} to ISO-8859-1
  1022. (latin-1), so that reading one character reads one byte. See the
  1023. discussion of character sets above, for more information.
  1024. Note that the body is not part of the request. Once you have read a
  1025. request, you may read the body separately, and likewise for writing
  1026. requests.
  1027. @end deffn
  1028. @deffn {Scheme Procedure} build-request uri [#:method='GET] @
  1029. [#:version='(1 . 1)] [#:headers='()] [#:port=#f] [#:meta='()] @
  1030. [#:validate-headers?=#t]
  1031. Construct an HTTP request object. If @var{validate-headers?} is true,
  1032. the headers are each run through their respective validators.
  1033. @end deffn
  1034. @deffn {Scheme Procedure} write-request r port
  1035. Write the given HTTP request to @var{port}.
  1036. Return a new request, whose @code{request-port} will continue writing
  1037. on @var{port}, perhaps using some transfer encoding.
  1038. @end deffn
  1039. @deffn {Scheme Procedure} read-request-body r
  1040. Reads the request body from @var{r}, as a bytevector. Return @code{#f}
  1041. if there was no request body.
  1042. @end deffn
  1043. @deffn {Scheme Procedure} write-request-body r bv
  1044. Write @var{bv}, a bytevector, to the port corresponding to the HTTP
  1045. request @var{r}.
  1046. @end deffn
  1047. The various headers that are typically associated with HTTP requests may
  1048. be accessed with these dedicated accessors. @xref{HTTP Headers}, for
  1049. more information on the format of parsed headers.
  1050. @deffn {Scheme Procedure} request-accept request [default='()]
  1051. @deffnx {Scheme Procedure} request-accept-charset request [default='()]
  1052. @deffnx {Scheme Procedure} request-accept-encoding request [default='()]
  1053. @deffnx {Scheme Procedure} request-accept-language request [default='()]
  1054. @deffnx {Scheme Procedure} request-allow request [default='()]
  1055. @deffnx {Scheme Procedure} request-authorization request [default=#f]
  1056. @deffnx {Scheme Procedure} request-cache-control request [default='()]
  1057. @deffnx {Scheme Procedure} request-connection request [default='()]
  1058. @deffnx {Scheme Procedure} request-content-encoding request [default='()]
  1059. @deffnx {Scheme Procedure} request-content-language request [default='()]
  1060. @deffnx {Scheme Procedure} request-content-length request [default=#f]
  1061. @deffnx {Scheme Procedure} request-content-location request [default=#f]
  1062. @deffnx {Scheme Procedure} request-content-md5 request [default=#f]
  1063. @deffnx {Scheme Procedure} request-content-range request [default=#f]
  1064. @deffnx {Scheme Procedure} request-content-type request [default=#f]
  1065. @deffnx {Scheme Procedure} request-date request [default=#f]
  1066. @deffnx {Scheme Procedure} request-expect request [default='()]
  1067. @deffnx {Scheme Procedure} request-expires request [default=#f]
  1068. @deffnx {Scheme Procedure} request-from request [default=#f]
  1069. @deffnx {Scheme Procedure} request-host request [default=#f]
  1070. @deffnx {Scheme Procedure} request-if-match request [default=#f]
  1071. @deffnx {Scheme Procedure} request-if-modified-since request [default=#f]
  1072. @deffnx {Scheme Procedure} request-if-none-match request [default=#f]
  1073. @deffnx {Scheme Procedure} request-if-range request [default=#f]
  1074. @deffnx {Scheme Procedure} request-if-unmodified-since request [default=#f]
  1075. @deffnx {Scheme Procedure} request-last-modified request [default=#f]
  1076. @deffnx {Scheme Procedure} request-max-forwards request [default=#f]
  1077. @deffnx {Scheme Procedure} request-pragma request [default='()]
  1078. @deffnx {Scheme Procedure} request-proxy-authorization request [default=#f]
  1079. @deffnx {Scheme Procedure} request-range request [default=#f]
  1080. @deffnx {Scheme Procedure} request-referer request [default=#f]
  1081. @deffnx {Scheme Procedure} request-te request [default=#f]
  1082. @deffnx {Scheme Procedure} request-trailer request [default='()]
  1083. @deffnx {Scheme Procedure} request-transfer-encoding request [default='()]
  1084. @deffnx {Scheme Procedure} request-upgrade request [default='()]
  1085. @deffnx {Scheme Procedure} request-user-agent request [default=#f]
  1086. @deffnx {Scheme Procedure} request-via request [default='()]
  1087. @deffnx {Scheme Procedure} request-warning request [default='()]
  1088. Return the given request header, or @var{default} if none was present.
  1089. @end deffn
  1090. @deffn {Scheme Procedure} request-absolute-uri r [default-host=#f] @
  1091. [default-port=#f] [default-scheme=#f]
  1092. A helper routine to determine the absolute URI of a request, using the
  1093. @code{host} header and the default scheme, host and port. If there is
  1094. no default scheme and the URI is not itself absolute, an error is
  1095. signalled.
  1096. @end deffn
  1097. @node Responses
  1098. @subsection HTTP Responses
  1099. @example
  1100. (use-modules (web response))
  1101. @end example
  1102. As with requests (@pxref{Requests}), Guile offers a data type for HTTP
  1103. responses. Again, the body is represented separately from the request.
  1104. @deffn {Scheme Procedure} response? obj
  1105. @deffnx {Scheme Procedure} response-version response
  1106. @deffnx {Scheme Procedure} response-code response
  1107. @deffnx {Scheme Procedure} response-reason-phrase response
  1108. @deffnx {Scheme Procedure} response-headers response
  1109. @deffnx {Scheme Procedure} response-port response
  1110. A predicate and field accessors for the response type. The fields are as
  1111. follows:
  1112. @table @code
  1113. @item version
  1114. The HTTP version pair, like @code{(1 . 1)}.
  1115. @item code
  1116. The HTTP response code, like @code{200}.
  1117. @item reason-phrase
  1118. The reason phrase, or the standard reason phrase for the response's
  1119. code.
  1120. @item headers
  1121. The response headers, as an alist of parsed values.
  1122. @item port
  1123. The port on which to read or write a response body, if any.
  1124. @end table
  1125. @end deffn
  1126. @deffn {Scheme Procedure} read-response port
  1127. Read an HTTP response from @var{port}.
  1128. As a side effect, sets the encoding on @var{port} to ISO-8859-1
  1129. (latin-1), so that reading one character reads one byte. See the
  1130. discussion of character sets in @ref{Responses}, for more information.
  1131. @end deffn
  1132. @deffn {Scheme Procedure} build-response [#:version='(1 . 1)] [#:code=200] [#:reason-phrase=#f] [#:headers='()] [#:port=#f] [#:validate-headers?=#t]
  1133. Construct an HTTP response object. If @var{validate-headers?} is true,
  1134. the headers are each run through their respective validators.
  1135. @end deffn
  1136. @deffn {Scheme Procedure} adapt-response-version response version
  1137. Adapt the given response to a different HTTP version. Return a new HTTP
  1138. response.
  1139. The idea is that many applications might just build a response for the
  1140. default HTTP version, and this method could handle a number of
  1141. programmatic transformations to respond to older HTTP versions (0.9 and
  1142. 1.0). But currently this function is a bit heavy-handed, just updating
  1143. the version field.
  1144. @end deffn
  1145. @deffn {Scheme Procedure} write-response r port
  1146. Write the given HTTP response to @var{port}.
  1147. Return a new response, whose @code{response-port} will continue writing
  1148. on @var{port}, perhaps using some transfer encoding.
  1149. @end deffn
  1150. @deffn {Scheme Procedure} response-must-not-include-body? r
  1151. Some responses, like those with status code 304, are specified as never
  1152. having bodies. This predicate returns @code{#t} for those responses.
  1153. Note also, though, that responses to @code{HEAD} requests must also not
  1154. have a body.
  1155. @end deffn
  1156. @deffn {Scheme Procedure} response-body-port r [#:decode?=#t] [#:keep-alive?=#t]
  1157. Return an input port from which the body of @var{r} can be read. The encoding
  1158. of the returned port is set according to @var{r}'s @code{content-type} header,
  1159. when it's textual, except if @var{decode?} is @code{#f}. Return @code{#f}
  1160. when no body is available.
  1161. When @var{keep-alive?} is @code{#f}, closing the returned port also closes
  1162. @var{r}'s response port.
  1163. @end deffn
  1164. @deffn {Scheme Procedure} read-response-body r
  1165. Read the response body from @var{r}, as a bytevector. Returns @code{#f}
  1166. if there was no response body.
  1167. @end deffn
  1168. @deffn {Scheme Procedure} write-response-body r bv
  1169. Write @var{bv}, a bytevector, to the port corresponding to the HTTP
  1170. response @var{r}.
  1171. @end deffn
  1172. As with requests, the various headers that are typically associated with
  1173. HTTP responses may be accessed with these dedicated accessors.
  1174. @xref{HTTP Headers}, for more information on the format of parsed
  1175. headers.
  1176. @deffn {Scheme Procedure} response-accept-ranges response [default=#f]
  1177. @deffnx {Scheme Procedure} response-age response [default='()]
  1178. @deffnx {Scheme Procedure} response-allow response [default='()]
  1179. @deffnx {Scheme Procedure} response-cache-control response [default='()]
  1180. @deffnx {Scheme Procedure} response-connection response [default='()]
  1181. @deffnx {Scheme Procedure} response-content-encoding response [default='()]
  1182. @deffnx {Scheme Procedure} response-content-language response [default='()]
  1183. @deffnx {Scheme Procedure} response-content-length response [default=#f]
  1184. @deffnx {Scheme Procedure} response-content-location response [default=#f]
  1185. @deffnx {Scheme Procedure} response-content-md5 response [default=#f]
  1186. @deffnx {Scheme Procedure} response-content-range response [default=#f]
  1187. @deffnx {Scheme Procedure} response-content-type response [default=#f]
  1188. @deffnx {Scheme Procedure} response-date response [default=#f]
  1189. @deffnx {Scheme Procedure} response-etag response [default=#f]
  1190. @deffnx {Scheme Procedure} response-expires response [default=#f]
  1191. @deffnx {Scheme Procedure} response-last-modified response [default=#f]
  1192. @deffnx {Scheme Procedure} response-location response [default=#f]
  1193. @deffnx {Scheme Procedure} response-pragma response [default='()]
  1194. @deffnx {Scheme Procedure} response-proxy-authenticate response [default=#f]
  1195. @deffnx {Scheme Procedure} response-retry-after response [default=#f]
  1196. @deffnx {Scheme Procedure} response-server response [default=#f]
  1197. @deffnx {Scheme Procedure} response-trailer response [default='()]
  1198. @deffnx {Scheme Procedure} response-transfer-encoding response [default='()]
  1199. @deffnx {Scheme Procedure} response-upgrade response [default='()]
  1200. @deffnx {Scheme Procedure} response-vary response [default='()]
  1201. @deffnx {Scheme Procedure} response-via response [default='()]
  1202. @deffnx {Scheme Procedure} response-warning response [default='()]
  1203. @deffnx {Scheme Procedure} response-www-authenticate response [default=#f]
  1204. Return the given response header, or @var{default} if none was present.
  1205. @end deffn
  1206. @deffn {Scheme Procedure} text-content-type? @var{type}
  1207. Return @code{#t} if @var{type}, a symbol as returned by
  1208. @code{response-content-type}, represents a textual type such as
  1209. @code{text/plain}.
  1210. @end deffn
  1211. @node Web Client
  1212. @subsection Web Client
  1213. @code{(web client)} provides a simple, synchronous HTTP client, built on
  1214. the lower-level HTTP, request, and response modules.
  1215. @example
  1216. (use-modules (web client))
  1217. @end example
  1218. @deffn {Scheme Procedure} open-socket-for-uri uri [#:verify-certificate? #t]
  1219. Return an open input/output port for a connection to URI. Guile
  1220. dynamically loads GnuTLS for HTTPS support.
  1221. @xref{Guile Preparations,
  1222. how to install the GnuTLS bindings for Guile,, gnutls-guile,
  1223. GnuTLS-Guile}, for more information.
  1224. @cindex certificate verification, for HTTPS
  1225. When @var{verify-certificate?} is true, verify the server's X.509
  1226. certificates against those read from @code{x509-certificate-directory}.
  1227. When an error occurs---e.g., the server's certificate has expired, or
  1228. its host name does not match---raise a @code{tls-certificate-error}
  1229. exception. The arguments to the @code{tls-certificate-error} exception
  1230. are:
  1231. @enumerate
  1232. @item
  1233. a symbol indicating the failure cause, @code{host-mismatch} if the
  1234. certificate's host name does not match the server's host name, and
  1235. @code{invalid-certificate} for other causes;
  1236. @item
  1237. the server's X.509 certificate (@pxref{Guile Reference, GnuTLS Guile
  1238. reference,, gnutls-guile, GnuTLS-Guile});
  1239. @item
  1240. the server's host name (a string);
  1241. @item
  1242. in the case of @code{invalid-certificate} errors, a list of GnuTLS
  1243. certificate status values---one of the @code{certificate-status/}
  1244. constants, such as @code{certificate-status/signer-not-found} or
  1245. @code{certificate-status/revoked}.
  1246. @end enumerate
  1247. @end deffn
  1248. @anchor{http-request}@deffn {Scheme Procedure} http-request @var{uri} @var{arg}@dots{}
  1249. Connect to the server corresponding to @var{uri} and make a request over
  1250. HTTP, using @var{method} (@code{GET}, @code{HEAD}, @code{POST}, etc.).
  1251. The following keyword arguments allow you to modify the requests in
  1252. various ways, for example attaching a body to the request, or setting
  1253. specific headers. The following table lists the keyword arguments and
  1254. their default values.
  1255. @table @code
  1256. @item #:method 'GET
  1257. @item #:body #f
  1258. @item #:verify-certificate? #t
  1259. @item #:port (open-socket-for-uri @var{uri} #:verify-certificate? @var{verify-certificate?})
  1260. @item #:version '(1 . 1)
  1261. @item #:keep-alive? #f
  1262. @item #:headers '()
  1263. @item #:decode-body? #t
  1264. @item #:streaming? #f
  1265. @end table
  1266. If you already have a port open, pass it as @var{port}. Otherwise, a
  1267. connection will be opened to the server corresponding to @var{uri}. Any
  1268. extra headers in the alist @var{headers} will be added to the request.
  1269. If @var{body} is not @code{#f}, a message body will also be sent with
  1270. the HTTP request. If @var{body} is a string, it is encoded according to
  1271. the content-type in @var{headers}, defaulting to UTF-8. Otherwise
  1272. @var{body} should be a bytevector, or @code{#f} for no body. Although a
  1273. message body may be sent with any request, usually only @code{POST} and
  1274. @code{PUT} requests have bodies.
  1275. If @var{decode-body?} is true, as is the default, the body of the
  1276. response will be decoded to string, if it is a textual content-type.
  1277. Otherwise it will be returned as a bytevector.
  1278. However, if @var{streaming?} is true, instead of eagerly reading the
  1279. response body from the server, this function only reads off the headers.
  1280. The response body will be returned as a port on which the data may be
  1281. read.
  1282. Unless @var{keep-alive?} is true, the port will be closed after the full
  1283. response body has been read.
  1284. If @var{port} is false, @var{uri} denotes an HTTPS URL, and @var{verify-certificate?} is
  1285. true, verify X.509 certificates against those available in
  1286. @code{x509-certificate-directory}.
  1287. Returns two values: the response read from the server, and the response
  1288. body as a string, bytevector, #f value, or as a port (if
  1289. @var{streaming?} is true).
  1290. @end deffn
  1291. @deffn {Scheme Procedure} http-get @var{uri} @var{arg}@dots{}
  1292. @deffnx {Scheme Procedure} http-head @var{uri} @var{arg}@dots{}
  1293. @deffnx {Scheme Procedure} http-post @var{uri} @var{arg}@dots{}
  1294. @deffnx {Scheme Procedure} http-put @var{uri} @var{arg}@dots{}
  1295. @deffnx {Scheme Procedure} http-delete @var{uri} @var{arg}@dots{}
  1296. @deffnx {Scheme Procedure} http-trace @var{uri} @var{arg}@dots{}
  1297. @deffnx {Scheme Procedure} http-options @var{uri} @var{arg}@dots{}
  1298. Connect to the server corresponding to @var{uri} and make a request over
  1299. HTTP, using the appropriate method (@code{GET}, @code{HEAD},
  1300. @code{POST}, etc.).
  1301. These procedures are variants of @code{http-request} specialized with a
  1302. specific @var{method} argument, and have the same prototype: a URI
  1303. followed by an optional sequence of keyword arguments.
  1304. @xref{http-request}, for full documentation on the various keyword
  1305. arguments.
  1306. @end deffn
  1307. @defvr {Scheme Parameter} x509-certificate-directory
  1308. @cindex X.509 certificate directory
  1309. @cindex HTTPS, X.509 certificates
  1310. @cindex certificates, for HTTPS
  1311. This parameter gives the name of the directory where X.509 certificates
  1312. for HTTPS connections should be looked for.
  1313. Its default value is one of:
  1314. @itemize
  1315. @item
  1316. @vindex GUILE_TLS_CERTIFICATE_DIRECTORY
  1317. the value of the @env{GUILE_TLS_CERTIFICATE_DIRECTORY} environment
  1318. variable;
  1319. @item
  1320. @vindex SSL_CERT_DIR
  1321. or the value of the @env{SSL_CERT_DIR} environment variable (also
  1322. honored by the OpenSSL library);
  1323. @item
  1324. or, as a last resort, @code{"/etc/ssl/certs"}.
  1325. @end itemize
  1326. X.509 certificates are used when authenticating the identity of a remote
  1327. site, when the @code{#:verify-certificate?} argument to
  1328. @code{open-socket-for-uri}, to @code{http-request}, or to related
  1329. procedures is true.
  1330. @end defvr
  1331. @code{http-get} is useful for making one-off requests to web sites. If
  1332. you are writing a web spider or some other client that needs to handle a
  1333. number of requests in parallel, it's better to build an event-driven URL
  1334. fetcher, similar in structure to the web server (@pxref{Web Server}).
  1335. Another option, good but not as performant, would be to use threads,
  1336. possibly via par-map or futures.
  1337. @deffn {Scheme Parameter} current-http-proxy
  1338. @deffnx {Scheme Parameter} current-https-proxy
  1339. Either @code{#f} or a non-empty string containing the URL of the HTTP
  1340. or HTTPS proxy server to be used by the procedures in the @code{(web client)}
  1341. module, including @code{open-socket-for-uri}. Its initial value is
  1342. based on the @env{http_proxy} and @env{https_proxy} environment variables.
  1343. @example
  1344. (current-http-proxy) @result{} "http://localhost:8123/"
  1345. (parameterize ((current-http-proxy #f))
  1346. (http-get "http://example.com/")) ; temporarily bypass proxy
  1347. (current-http-proxy) @result{} "http://localhost:8123/"
  1348. @end example
  1349. @end deffn
  1350. @node Web Server
  1351. @subsection Web Server
  1352. @code{(web server)} is a generic web server interface, along with a main
  1353. loop implementation for web servers controlled by Guile.
  1354. @example
  1355. (use-modules (web server))
  1356. @end example
  1357. The lowest layer is the @code{<server-impl>} object, which defines a set
  1358. of hooks to open a server, read a request from a client, write a
  1359. response to a client, and close a server. These hooks -- @code{open},
  1360. @code{read}, @code{write}, and @code{close}, respectively -- are bound
  1361. together in a @code{<server-impl>} object. Procedures in this module take a
  1362. @code{<server-impl>} object, if needed.
  1363. A @code{<server-impl>} may also be looked up by name. If you pass the
  1364. @code{http} symbol to @code{run-server}, Guile looks for a variable
  1365. named @code{http} in the @code{(web server http)} module, which should
  1366. be bound to a @code{<server-impl>} object. Such a binding is made by
  1367. instantiation of the @code{define-server-impl} syntax. In this way the
  1368. run-server loop can automatically load other backends if available.
  1369. The life cycle of a server goes as follows:
  1370. @enumerate
  1371. @item
  1372. The @code{open} hook is called, to open the server. @code{open} takes
  1373. zero or more arguments, depending on the backend, and returns an opaque
  1374. server socket object, or signals an error.
  1375. @item
  1376. The @code{read} hook is called, to read a request from a new client.
  1377. The @code{read} hook takes one argument, the server socket. It should
  1378. return three values: an opaque client socket, the request, and the
  1379. request body. The request should be a @code{<request>} object, from
  1380. @code{(web request)}. The body should be a string or a bytevector, or
  1381. @code{#f} if there is no body.
  1382. If the read failed, the @code{read} hook may return #f for the client
  1383. socket, request, and body.
  1384. @item
  1385. A user-provided handler procedure is called, with the request and body
  1386. as its arguments. The handler should return two values: the response,
  1387. as a @code{<response>} record from @code{(web response)}, and the
  1388. response body as bytevector, or @code{#f} if not present.
  1389. The respose and response body are run through @code{sanitize-response},
  1390. documented below. This allows the handler writer to take some
  1391. convenient shortcuts: for example, instead of a @code{<response>}, the
  1392. handler can simply return an alist of headers, in which case a default
  1393. response object is constructed with those headers. Instead of a
  1394. bytevector for the body, the handler can return a string, which will be
  1395. serialized into an appropriate encoding; or it can return a procedure,
  1396. which will be called on a port to write out the data. See the
  1397. @code{sanitize-response} documentation, for more.
  1398. @item
  1399. The @code{write} hook is called with three arguments: the client
  1400. socket, the response, and the body. The @code{write} hook returns no
  1401. values.
  1402. @item
  1403. At this point the request handling is complete. For a loop, we
  1404. loop back and try to read a new request.
  1405. @item
  1406. If the user interrupts the loop, the @code{close} hook is called on
  1407. the server socket.
  1408. @end enumerate
  1409. A user may define a server implementation with the following form:
  1410. @deffn {Scheme Syntax} define-server-impl name open read write close
  1411. Make a @code{<server-impl>} object with the hooks @var{open},
  1412. @var{read}, @var{write}, and @var{close}, and bind it to the symbol
  1413. @var{name} in the current module.
  1414. @end deffn
  1415. @deffn {Scheme Procedure} lookup-server-impl impl
  1416. Look up a server implementation. If @var{impl} is a server
  1417. implementation already, it is returned directly. If it is a symbol, the
  1418. binding named @var{impl} in the @code{(web server @var{impl})} module is
  1419. looked up. Otherwise an error is signaled.
  1420. Currently a server implementation is a somewhat opaque type, useful only
  1421. for passing to other procedures in this module, like @code{read-client}.
  1422. @end deffn
  1423. The @code{(web server)} module defines a number of routines that use
  1424. @code{<server-impl>} objects to implement parts of a web server. Given
  1425. that we don't expose the accessors for the various fields of a
  1426. @code{<server-impl>}, indeed these routines are the only procedures with
  1427. any access to the impl objects.
  1428. @deffn {Scheme Procedure} open-server impl open-params
  1429. Open a server for the given implementation. Return one value, the new
  1430. server object. The implementation's @code{open} procedure is applied to
  1431. @var{open-params}, which should be a list.
  1432. @end deffn
  1433. @deffn {Scheme Procedure} read-client impl server
  1434. Read a new client from @var{server}, by applying the implementation's
  1435. @code{read} procedure to the server. If successful, return three
  1436. values: an object corresponding to the client, a request object, and the
  1437. request body. If any exception occurs, return @code{#f} for all three
  1438. values.
  1439. @end deffn
  1440. @deffn {Scheme Procedure} handle-request handler request body state
  1441. Handle a given request, returning the response and body.
  1442. The response and response body are produced by calling the given
  1443. @var{handler} with @var{request} and @var{body} as arguments.
  1444. The elements of @var{state} are also passed to @var{handler} as
  1445. arguments, and may be returned as additional values. The new
  1446. @var{state}, collected from the @var{handler}'s return values, is then
  1447. returned as a list. The idea is that a server loop receives a handler
  1448. from the user, along with whatever state values the user is interested
  1449. in, allowing the user's handler to explicitly manage its state.
  1450. @end deffn
  1451. @deffn {Scheme Procedure} sanitize-response request response body
  1452. ``Sanitize'' the given response and body, making them appropriate for
  1453. the given request.
  1454. As a convenience to web handler authors, @var{response} may be given as
  1455. an alist of headers, in which case it is used to construct a default
  1456. response. Ensures that the response version corresponds to the request
  1457. version. If @var{body} is a string, encodes the string to a bytevector,
  1458. in an encoding appropriate for @var{response}. Adds a
  1459. @code{content-length} and @code{content-type} header, as necessary.
  1460. If @var{body} is a procedure, it is called with a port as an argument,
  1461. and the output collected as a bytevector. In the future we might try to
  1462. instead use a compressing, chunk-encoded port, and call this procedure
  1463. later, in the write-client procedure. Authors are advised not to rely on
  1464. the procedure being called at any particular time.
  1465. @end deffn
  1466. @deffn {Scheme Procedure} write-client impl server client response body
  1467. Write an HTTP response and body to @var{client}. If the server and
  1468. client support persistent connections, it is the implementation's
  1469. responsibility to keep track of the client thereafter, presumably by
  1470. attaching it to the @var{server} argument somehow.
  1471. @end deffn
  1472. @deffn {Scheme Procedure} close-server impl server
  1473. Release resources allocated by a previous invocation of
  1474. @code{open-server}.
  1475. @end deffn
  1476. Given the procedures above, it is a small matter to make a web server:
  1477. @deffn {Scheme Procedure} serve-one-client handler impl server state
  1478. Read one request from @var{server}, call @var{handler} on the request
  1479. and body, and write the response to the client. Return the new state
  1480. produced by the handler procedure.
  1481. @end deffn
  1482. @deffn {Scheme Procedure} run-server handler @
  1483. [impl='http] [open-params='()] @
  1484. arg @dots{}
  1485. Run Guile's built-in web server.
  1486. @var{handler} should be a procedure that takes two or more arguments,
  1487. the HTTP request and request body, and returns two or more values, the
  1488. response and response body.
  1489. For examples, skip ahead to the next section, @ref{Web Examples}.
  1490. The response and body will be run through @code{sanitize-response}
  1491. before sending back to the client.
  1492. Additional arguments to @var{handler} are taken from @var{arg}
  1493. @enddots{}. These arguments comprise a @dfn{state}. Additional return
  1494. values are accumulated into a new state, which will be used for
  1495. subsequent requests. In this way a handler can explicitly manage its
  1496. state.
  1497. @end deffn
  1498. The default web server implementation is @code{http}, which binds to a
  1499. socket, listening for request on that port.
  1500. @deffn {HTTP Implementation} http [#:host=#f] @
  1501. [#:family=AF_INET] @
  1502. [#:addr=INADDR_LOOPBACK] @
  1503. [#:port 8080] [#:socket]
  1504. The default HTTP implementation. We document it as a function with
  1505. keyword arguments, because that is precisely the way that it is -- all
  1506. of the @var{open-params} to @code{run-server} get passed to the
  1507. implementation's open function.
  1508. @example
  1509. ;; The defaults: localhost:8080
  1510. (run-server handler)
  1511. ;; Same thing
  1512. (run-server handler 'http '())
  1513. ;; On a different port
  1514. (run-server handler 'http '(#:port 8081))
  1515. ;; IPv6
  1516. (run-server handler 'http '(#:family AF_INET6 #:port 8081))
  1517. ;; Custom socket
  1518. (run-server handler 'http `(#:socket ,(sudo-make-me-a-socket)))
  1519. @end example
  1520. @end deffn
  1521. @node Web Examples
  1522. @subsection Web Examples
  1523. Well, enough about the tedious internals. Let's make a web application!
  1524. @subsubsection Hello, World!
  1525. The first program we have to write, of course, is ``Hello, World!''.
  1526. This means that we have to implement a web handler that does what we
  1527. want.
  1528. Now we define a handler, a function of two arguments and two return
  1529. values:
  1530. @example
  1531. (define (handler request request-body)
  1532. (values @var{response} @var{response-body}))
  1533. @end example
  1534. In this first example, we take advantage of a short-cut, returning an
  1535. alist of headers instead of a proper response object. The response body
  1536. is our payload:
  1537. @example
  1538. (define (hello-world-handler request request-body)
  1539. (values '((content-type . (text/plain)))
  1540. "Hello World!"))
  1541. @end example
  1542. Now let's test it, by running a server with this handler. Load up the
  1543. web server module if you haven't yet done so, and run a server with this
  1544. handler:
  1545. @example
  1546. (use-modules (web server))
  1547. (run-server hello-world-handler)
  1548. @end example
  1549. By default, the web server listens for requests on
  1550. @code{localhost:8080}. Visit that address in your web browser to
  1551. test. If you see the string, @code{Hello World!}, sweet!
  1552. @subsubsection Inspecting the Request
  1553. The Hello World program above is a general greeter, responding to all
  1554. URIs. To make a more exclusive greeter, we need to inspect the request
  1555. object, and conditionally produce different results. So let's load up
  1556. the request, response, and URI modules, and do just that.
  1557. @example
  1558. (use-modules (web server)) ; you probably did this already
  1559. (use-modules (web request)
  1560. (web response)
  1561. (web uri))
  1562. (define (request-path-components request)
  1563. (split-and-decode-uri-path (uri-path (request-uri request))))
  1564. (define (hello-hacker-handler request body)
  1565. (if (equal? (request-path-components request)
  1566. '("hacker"))
  1567. (values '((content-type . (text/plain)))
  1568. "Hello hacker!")
  1569. (not-found request)))
  1570. (run-server hello-hacker-handler)
  1571. @end example
  1572. Here we see that we have defined a helper to return the components of
  1573. the URI path as a list of strings, and used that to check for a request
  1574. to @code{/hacker/}. Then the success case is just as before -- visit
  1575. @code{http://localhost:8080/hacker/} in your browser to check.
  1576. You should always match against URI path components as decoded by
  1577. @code{split-and-decode-uri-path}. The above example will work for
  1578. @code{/hacker/}, @code{//hacker///}, and @code{/h%61ck%65r}.
  1579. But we forgot to define @code{not-found}! If you are pasting these
  1580. examples into a REPL, accessing any other URI in your web browser will
  1581. drop your Guile console into the debugger:
  1582. @example
  1583. <unnamed port>:38:7: In procedure module-lookup:
  1584. <unnamed port>:38:7: Unbound variable: not-found
  1585. Entering a new prompt. Type `,bt' for a backtrace or `,q' to continue.
  1586. scheme@@(guile-user) [1]>
  1587. @end example
  1588. So let's define the function, right there in the debugger. As you
  1589. probably know, we'll want to return a 404 response.
  1590. @example
  1591. ;; Paste this in your REPL
  1592. (define (not-found request)
  1593. (values (build-response #:code 404)
  1594. (string-append "Resource not found: "
  1595. (uri->string (request-uri request)))))
  1596. ;; Now paste this to let the web server keep going:
  1597. ,continue
  1598. @end example
  1599. Now if you access @code{http://localhost/foo/}, you get this error
  1600. message. (Note that some popular web browsers won't show
  1601. server-generated 404 messages, showing their own instead, unless the 404
  1602. message body is long enough.)
  1603. @subsubsection Higher-Level Interfaces
  1604. The web handler interface is a common baseline that all kinds of Guile
  1605. web applications can use. You will usually want to build something on
  1606. top of it, however, especially when producing HTML. Here is a simple
  1607. example that builds up HTML output using SXML (@pxref{SXML}).
  1608. First, load up the modules:
  1609. @example
  1610. (use-modules (web server)
  1611. (web request)
  1612. (web response)
  1613. (sxml simple))
  1614. @end example
  1615. Now we define a simple templating function that takes a list of HTML
  1616. body elements, as SXML, and puts them in our super template:
  1617. @example
  1618. (define (templatize title body)
  1619. `(html (head (title ,title))
  1620. (body ,@@body)))
  1621. @end example
  1622. For example, the simplest Hello HTML can be produced like this:
  1623. @example
  1624. (sxml->xml (templatize "Hello!" '((b "Hi!"))))
  1625. @print{}
  1626. <html><head><title>Hello!</title></head><body><b>Hi!</b></body></html>
  1627. @end example
  1628. Much better to work with Scheme data types than to work with HTML as
  1629. strings. Now we define a little response helper:
  1630. @example
  1631. (define* (respond #:optional body #:key
  1632. (status 200)
  1633. (title "Hello hello!")
  1634. (doctype "<!DOCTYPE html>\n")
  1635. (content-type-params '((charset . "utf-8")))
  1636. (content-type 'text/html)
  1637. (extra-headers '())
  1638. (sxml (and body (templatize title body))))
  1639. (values (build-response
  1640. #:code status
  1641. #:headers `((content-type
  1642. . (,content-type ,@@content-type-params))
  1643. ,@@extra-headers))
  1644. (lambda (port)
  1645. (if sxml
  1646. (begin
  1647. (if doctype (display doctype port))
  1648. (sxml->xml sxml port))))))
  1649. @end example
  1650. Here we see the power of keyword arguments with default initializers. By
  1651. the time the arguments are fully parsed, the @code{sxml} local variable
  1652. will hold the templated SXML, ready for sending out to the client.
  1653. Also, instead of returning the body as a string, @code{respond} gives a
  1654. procedure, which will be called by the web server to write out the
  1655. response to the client.
  1656. Now, a simple example using this responder, which lays out the incoming
  1657. headers in an HTML table.
  1658. @example
  1659. (define (debug-page request body)
  1660. (respond
  1661. `((h1 "hello world!")
  1662. (table
  1663. (tr (th "header") (th "value"))
  1664. ,@@(map (lambda (pair)
  1665. `(tr (td (tt ,(with-output-to-string
  1666. (lambda () (display (car pair))))))
  1667. (td (tt ,(with-output-to-string
  1668. (lambda ()
  1669. (write (cdr pair))))))))
  1670. (request-headers request))))))
  1671. (run-server debug-page)
  1672. @end example
  1673. Now if you visit any local address in your web browser, we actually see
  1674. some HTML, finally.
  1675. @subsubsection Conclusion
  1676. Well, this is about as far as Guile's built-in web support goes, for
  1677. now. There are many ways to make a web application, but hopefully by
  1678. standardizing the most fundamental data types, users will be able to
  1679. choose the approach that suits them best, while also being able to
  1680. switch between implementations of the server. This is a relatively new
  1681. part of Guile, so if you have feedback, let us know, and we can take it
  1682. into account. Happy hacking on the web!
  1683. @c Local Variables:
  1684. @c TeX-master: "guile.texi"
  1685. @c End: