123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117 |
- ;;; Web client
- ;; Copyright (C) 2011 Free Software Foundation, Inc.
- ;; This library is free software; you can redistribute it and/or
- ;; modify it under the terms of the GNU Lesser General Public
- ;; License as published by the Free Software Foundation; either
- ;; version 3 of the License, or (at your option) any later version.
- ;;
- ;; This library is distributed in the hope that it will be useful,
- ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- ;; Lesser General Public License for more details.
- ;;
- ;; You should have received a copy of the GNU Lesser General Public
- ;; License along with this library; if not, write to the Free Software
- ;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- ;; 02110-1301 USA
- ;;; Commentary:
- ;;;
- ;;; (web client) is a simple HTTP URL fetcher for Guile.
- ;;;
- ;;; In its current incarnation, (web client) is synchronous. If you
- ;;; want to fetch a number of URLs at once, probably the best thing to
- ;;; do is to write an event-driven URL fetcher, similar in structure to
- ;;; the web server.
- ;;;
- ;;; Another option, good but not as performant, would be to use threads,
- ;;; possibly via par-map or futures.
- ;;;
- ;;; Code:
- (define-module (web client)
- #:use-module (rnrs bytevectors)
- #:use-module (ice-9 binary-ports)
- #:use-module (ice-9 rdelim)
- #:use-module (web request)
- #:use-module (web response)
- #:use-module (web uri)
- #:export (open-socket-for-uri
- http-get))
- (define (open-socket-for-uri uri)
- (let* ((ai (car (getaddrinfo (uri-host uri)
- (cond
- ((uri-port uri) => number->string)
- (else (symbol->string (uri-scheme uri)))))))
- (s (socket (addrinfo:fam ai) (addrinfo:socktype ai)
- (addrinfo:protocol ai))))
- (set-port-encoding! s "ISO-8859-1")
- (connect s (addrinfo:addr ai))
- ;; Buffer input and output on this port.
- (setvbuf s _IOFBF)
- ;; Enlarge the receive buffer.
- (setsockopt s SOL_SOCKET SO_RCVBUF (* 12 1024))
- s))
- (define (decode-string bv encoding)
- (if (string-ci=? encoding "utf-8")
- (utf8->string bv)
- (let ((p (open-bytevector-input-port bv)))
- (set-port-encoding! p encoding)
- (let ((res (read-delimited "" p)))
- (close-port p)
- res))))
- (define (text-type? type)
- (let ((type (symbol->string type)))
- (or (string-prefix? "text/" type)
- (string-suffix? "/xml" type)
- (string-suffix? "+xml" type))))
- ;; Logically the inverse of (web server)'s `sanitize-response'.
- ;;
- (define (decode-response-body response body)
- ;; `body' is either #f or a bytevector.
- (cond
- ((not body) body)
- ((bytevector? body)
- (let ((rlen (response-content-length response))
- (blen (bytevector-length body)))
- (cond
- ((and rlen (not (= rlen blen)))
- (error "bad content-length" rlen blen))
- ((response-content-type response)
- => (lambda (type)
- (cond
- ((text-type? (car type))
- (decode-string body (or (assq-ref (cdr type) 'charset)
- "iso-8859-1")))
- (else body))))
- (else body))))
- (else
- (error "unexpected body type" body))))
- (define* (http-get uri #:key (port (open-socket-for-uri uri))
- (version '(1 . 1)) (keep-alive? #f) (extra-headers '())
- (decode-body? #t))
- (let ((req (build-request uri #:version version
- #:headers (if keep-alive?
- extra-headers
- (cons '(connection close)
- extra-headers)))))
- (write-request req port)
- (force-output port)
- (if (not keep-alive?)
- (shutdown port 1))
- (let* ((res (read-response port))
- (body (read-response-body res)))
- (if (not keep-alive?)
- (close-port port))
- (values res
- (if decode-body?
- (decode-response-body res body)
- body)))))
|