tokeniser.scm 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. ;; This file is part of scheme-GNUnet, a partial Scheme port of GNUnet.
  2. ;; Copyright (C) 2021 Maxime Devos <maximedevos@telenet.be>
  3. ;;
  4. ;; scheme-GNUnet is free software: you can redistribute it and/or modify it
  5. ;; under the terms of the GNU Affero General Public License as published
  6. ;; by the Free Software Foundation, either version 3 of the License,
  7. ;; or (at your option) any later version.
  8. ;;
  9. ;; scheme-GNUnet is distributed in the hope that it will be useful, but
  10. ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. ;; Affero General Public License for more details.
  13. ;;
  14. ;; You should have received a copy of the GNU Affero General Public License
  15. ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. ;;
  17. ;; SPDX-License-Identifier: AGPL-3.0-or-later
  18. (import (tests utils)
  19. (quickcheck)
  20. (quickcheck property)
  21. (quickcheck arbitrary)
  22. (quickcheck generator)
  23. (gnu gnunet utils tokeniser)
  24. (gnu gnunet utils bv-slice)
  25. (srfi srfi-1)
  26. (srfi srfi-8)
  27. (srfi srfi-43)
  28. (only (ice-9 control) let/ec)
  29. (ice-9 match)
  30. (only (system foreign)
  31. pointer->bytevector bytevector->pointer)
  32. (only (rnrs base) assert)
  33. (only (rnrs exceptions) guard)
  34. (only (rnrs conditions)
  35. assertion-violation? condition-who)
  36. (only (rnrs io ports)
  37. open-bytevector-input-port)
  38. (rnrs bytevectors)
  39. (gnu gnunet netstruct syntactic)
  40. (gnu gnunet util struct))
  41. (define (fluffed-bytevector %size %off fluff)
  42. ;; Returned bytevector is a complete message.
  43. (let* ((size (+ %size (sizeof /:message-header '())))
  44. (bv (make-bytevector (+ %off size)))
  45. (s (bv-slice/read-write bv)))
  46. (bytevector-copy! fluff 0 bv 0
  47. (min (bytevector-length fluff)
  48. (bytevector-length bv)))
  49. (set%! /:message-header '(size)
  50. (slice-slice s %off (sizeof /:message-header '()))
  51. size)
  52. (values bv %off size)))
  53. (test-begin "tokeniser")
  54. (define (no-return/overly-small . _)
  55. (error "unexpected call to return/overly-small"))
  56. (define (no-return/done . _)
  57. (error "unexpected call to return/done"))
  58. (define (no-return/done-eof . _)
  59. (error "unexpected call to return/done-eof"))
  60. (define (no-return/premature-eof . _)
  61. (error "unexpected call to return/premature-eof"))
  62. (define (no-handle/message . _)
  63. (error "unexpected call to handle/message"))
  64. ;; Some bugs this found:
  65. ;; * in some places, the 'offset' argument was ignored
  66. ;; and always the first or first two bytes of 'bv'
  67. ;; in 'continue' in 'add-bytevector!' would be used.
  68. ;; * some incorrect assertions in the tokeniser code
  69. ;; * when a message was fragmented (between header and data),
  70. ;; the data was not copied
  71. ;; * the type of a message was calculated incorrectly
  72. ;; whe ‘overly small message errors’ are reported
  73. ;; * the type of a message could not be calculated
  74. ;; for some fragmented messages, if the first 'length'
  75. ;; was 1 and the second 'length' was 3.
  76. (test-assert "[prop] complete messages are passed through"
  77. (quickcheck
  78. (property
  79. ((%size $natural)
  80. (%off $natural)
  81. (fluff $bytevector))
  82. (receive (bv offset size)
  83. (fluffed-bytevector %size %off fluff)
  84. (let ((handled? #f))
  85. (add-bytevector!
  86. (make-tokeniser)
  87. bv offset size
  88. (lambda (bv2 offset2 length)
  89. (assert (not handled?))
  90. (assert (eq? bv bv2))
  91. (assert (= offset offset2))
  92. (assert (= length size))
  93. (set! handled? #t))
  94. (lambda _ handled?)
  95. no-return/overly-small))))))
  96. ;; Test fragmented messages and multiple messages
  97. ;; are properly handled.
  98. (define choose-message
  99. (generator-let*
  100. ((length (choose-one/weighted
  101. ;; Very small
  102. `((1 . ,(choose-integer 4 5))
  103. (1 . ,(choose-integer 5 6))
  104. ;; Some length
  105. (2 . ,(choose-integer 4 9)))))
  106. ;; Arbitrary 'type' field and data
  107. (filler (choose-bytevector (- length 2))))
  108. (let ((bv (make-bytevector length)))
  109. (bytevector-u16-set! bv 0 length (endianness big))
  110. (bytevector-copy! filler 0 bv 2 (bytevector-length filler))
  111. (generator-return bv))))
  112. ;; Generate a list of message bytevectors
  113. (define choose-many-messages
  114. (sized-generator
  115. (cut choose-list choose-message <>)))
  116. (define (merge-bytevectors messages)
  117. (define size (reduce + 0 (map bytevector-length messages)))
  118. (define bv (make-bytevector size))
  119. (let loop ((offset 0) (messages messages))
  120. (if (null? messages)
  121. bv
  122. (let* ((head (car messages))
  123. (tail (cdr messages))
  124. (message-size (bytevector-length head)))
  125. (bytevector-copy! head 0 bv offset message-size)
  126. (loop (+ offset message-size) tail)))))
  127. ;; Try to occassionally split message in annoying places,
  128. ;; and avoid splitting at message boundaries.
  129. (define (choose-split-positions messages)
  130. (let loop ((offset 0) (messages messages))
  131. (if (null? messages)
  132. (generator-return '())
  133. (let* ((head (car messages))
  134. (tail (cdr messages))
  135. (message-size (bytevector-length head))
  136. (data-splittable? (> message-size 5)))
  137. (generator-let*
  138. ((rest-positions
  139. (loop (+ offset message-size) tail))
  140. (data-split-positions
  141. (if data-splittable?
  142. (generator-lift
  143. list
  144. (choose-integer 4 message-size))
  145. (generator-return '())))
  146. (end-split-positions
  147. (choose-one/weighted
  148. `((2 . ,(generator-return '()))
  149. (1 . ,(generator-return (list message-size))))))
  150. (head-split-positions
  151. (choose-one/weighted
  152. `((3 . ,(generator-return '())) ; don't split header
  153. (2 . ,(generator-return '(1))) ; split inside size field
  154. (2 . ,(generator-return '(2))) ; split between size field and type
  155. (1 . ,(generator-return '(1 2))))))) ; both of above
  156. (let* ((all-positions
  157. (append head-split-positions data-split-positions
  158. end-split-positions))
  159. (fixed-positions
  160. (map (cut + <> offset) all-positions)))
  161. (generator-return
  162. (append fixed-positions rest-positions))))))))
  163. ;; A list of (start . length).
  164. ;; Starts at the minimal 'start', and ends at 'end' (exclusive)
  165. (define* (positions->ranges positions end)
  166. (match positions
  167. (() `((,end . 0)))
  168. ((start) `((,start . ,(- end start))))
  169. ((start next . rest)
  170. `((,start . ,(- next start))
  171. ,@(positions->ranges `(,next ,@rest) end)))))
  172. (define $messages-and-ranges
  173. (arbitrary
  174. (gen (generator-let*
  175. ((messages choose-many-messages)
  176. (bv (generator-return
  177. (merge-bytevectors messages)))
  178. (split-positions
  179. (choose-split-positions messages))
  180. (ranges
  181. (generator-return
  182. (positions->ranges (cons 0 split-positions)
  183. (bytevector-length bv)))))
  184. (generator-return
  185. `#(,messages ,bv ,ranges))))
  186. (xform #f))) ; unneeded
  187. ;; A simplified test failure case of
  188. ;; "[prop] all fragmented & multiple messages received".
  189. ;; The issue was that (1 2 3 4) was not copied.
  190. (test-equal "message fragmented on header/data boundary reassembled"
  191. #vu8(0 8 50 50 1 2 3 4)
  192. (let ((tok (make-tokeniser))
  193. ;; Message size: 8
  194. (received? #f)
  195. (bv #vu8(0 8 50 50 1 2 3 4)))
  196. (add-bytevector! tok bv 0 4
  197. no-handle/message (const #t) no-return/overly-small)
  198. (add-bytevector! tok bv 4 4
  199. (lambda (bv offset length)
  200. ;; These two assertions are actually an implementation
  201. ;; detail, and test no overly large allocations are
  202. ;; made.
  203. (assert (= 0 offset))
  204. (assert (= length (bytevector-length bv)))
  205. (assert (not received?))
  206. (set! received? (bytevector-copy bv)))
  207. (const #t) no-return/overly-small)
  208. received?))
  209. ;; Found when debugging a test failure of
  210. ;; "[prop] all fragmented & multiple messages received".
  211. ;; The bug was a missing set-position! call.
  212. (test-equal "message fragmented in size field and after message header, some data"
  213. #vu8(0 6 236 197 216 19)
  214. (let ((tok (make-tokeniser))
  215. (received? #f)
  216. (bv #vu8(0 6 236 197 216 19)))
  217. ;; copy the zero
  218. (add-bytevector! tok bv 0 1
  219. no-handle/message (const #t) no-return/overly-small)
  220. ;; copy the rest of the message header
  221. (add-bytevector! tok bv 1 3
  222. no-handle/message (const #t) no-return/overly-small)
  223. ;; copy the data
  224. (add-bytevector! tok bv 4 2
  225. (lambda (bv offset length)
  226. ;; see previous test case
  227. (assert (= 0 offset))
  228. (assert (= length (bytevector-length bv)))
  229. (assert (not received?))
  230. (set! received? (bytevector-copy bv)))
  231. (const #t)
  232. no-return/overly-small)
  233. received?))
  234. ;; And return/done is called in tail position.
  235. (test-assert "[prop] all fragmented & multiple messages received"
  236. (quickcheck
  237. (property
  238. ((messages-and-ranges $messages-and-ranges))
  239. (match messages-and-ranges
  240. (#(messages bv ranges)
  241. (assert (= (apply + (map cdr ranges))
  242. (bytevector-length bv)))
  243. (guard (e ((assertion-violation? e)
  244. ;; 2: don't include 'make-stack' or
  245. ;; this guard
  246. (display-backtrace (make-stack #t 2) (current-error-port))
  247. (print-exception (current-error-port) #f '%exception (list e))
  248. #f))
  249. (let ((tok (make-tokeniser))
  250. (remove-message!
  251. (lambda (bv offset length)
  252. (define bv/range
  253. (pointer->bytevector
  254. (bytevector->pointer bv offset)
  255. length))
  256. ;; Sanity check
  257. (assert (<= 0 offset))
  258. (assert (<= (+ offset length) (bytevector-length bv)))
  259. (let/ec ec
  260. (pair-for-each
  261. (match-lambda
  262. (((and message (set! set-message!)) . rest)
  263. (when (and (bytevector? message)
  264. (bytevector=? message bv/range))
  265. (set-message! #f) ; mark it as received
  266. (ec))))
  267. messages) ; stop searching
  268. (assert (and #f
  269. "message not added but still received"))))))
  270. (for-each
  271. (match-lambda
  272. ((start . length)
  273. (assert
  274. (calls-in-tail-position?
  275. (lambda (return/done)
  276. (add-bytevector! tok bv start length
  277. remove-message!
  278. (lambda () (return/done))
  279. no-return/overly-small))))))
  280. ranges)))
  281. ;; All messages should have been received.
  282. (not (any identity messages)))))))
  283. ;; The type was read at an incorrect offset.
  284. (test-equal "overly small message error (complete header)"
  285. (map (lambda (n)
  286. `(#t ; in tail position
  287. ,(+ (* 256 n) (+ n 1)) ; message type
  288. ,n)) ; message size
  289. (iota 4))
  290. (map (lambda (n)
  291. (call-with-values
  292. (lambda ()
  293. (calls-in-tail-position?
  294. (lambda (return/overly-small)
  295. (add-bytevector! (make-tokeniser)
  296. (u8-list->bytevector
  297. ;; n (+ n 1): arbitrary message type.
  298. ;; Two separate values are used for
  299. ;; the two halves of the u16, to
  300. ;; detect little / big endianness issues.
  301. ;;
  302. ;; GNUnet usually (always?) uses
  303. ;; big-endian.
  304. (list 0 n n (+ n 1)))
  305. 0 4
  306. no-handle/message
  307. no-return/done
  308. return/overly-small))))
  309. list))
  310. ;; 4: size of message header
  311. ;; iota makes a list '(0 1 2 3)
  312. (iota 4)))
  313. ;; A bounds check at the call to return/overly-small
  314. ;; was overly strict, resulting in the message type being missing.
  315. (test-equal "overly small message error (header split in size field)"
  316. (map (lambda (n)
  317. `(#t ; in tail position
  318. ,(+ (* 256 (+ n 1)) n) ; message type
  319. ,n))
  320. (iota 4))
  321. (map (lambda (n)
  322. (let ((tok (make-tokeniser))
  323. (bv (u8-list->bytevector
  324. ;; see previous test case for why (+ n 1) n
  325. (list 0 n (+ n 1) n))))
  326. (add-bytevector! tok bv 0 1
  327. no-handle/message
  328. (const #t)
  329. no-return/overly-small)
  330. (call-with-values
  331. (lambda ()
  332. (calls-in-tail-position?
  333. (lambda (return/overly-small)
  334. (add-bytevector! tok bv 1 3
  335. no-handle/message
  336. no-return/done
  337. return/overly-small))))
  338. list)))
  339. (iota 4))) ; see previous test case for why (iota 4)
  340. ;; All the previous tests use 'small' messages. That is,
  341. ;; the message sizes were always < 256. However, messages
  342. ;; with size >= 256 definitely exist.
  343. ;;
  344. ;; This test detects the mutation
  345. ;; (bytevector-u8-ref bv offset) --> 0
  346. ;; in (! size/byte-0 [...]).
  347. (define huge-bv
  348. (let ((bv (make-bytevector #xfffe 17)))
  349. (bytevector-u16-set! bv 0 #xfffe (endianness big))
  350. bv))
  351. ;; Tests:
  352. ;; * the whole message is received
  353. ;; * return/done is called in tail position
  354. (test-equal "huge message, split early"
  355. (map (const #t) (iota 16))
  356. (map (lambda (split-position)
  357. (let ((tok (make-tokeniser))
  358. (received? #f))
  359. (receive (in-tail-position?)
  360. (calls-in-tail-position?
  361. (lambda (return/done)
  362. (add-bytevector! tok huge-bv 0 split-position
  363. no-handle/message
  364. return/done
  365. no-return/overly-small)))
  366. (assert in-tail-position?))
  367. (receive (in-tail-position?)
  368. (calls-in-tail-position?
  369. (lambda (return/done)
  370. (add-bytevector! tok huge-bv split-position
  371. (- #xfffe split-position)
  372. (lambda (bv offset length)
  373. (assert (not received?))
  374. ;; really an implementation detail,
  375. ;; but no bytevector-range-copy
  376. ;; exists.
  377. (assert (= 0 offset))
  378. (assert (= length (bytevector-length bv)))
  379. (set! received?
  380. (bytevector-copy bv)))
  381. return/done
  382. no-return/overly-small)))
  383. (assert in-tail-position?))
  384. (equal? huge-bv received?)))
  385. (iota 16)))
  386. (define (catch-errors thunk)
  387. (guard (e ((interrupted-tokeniser-violation? e)
  388. `(,(condition-who e) . interrupted))
  389. ((kaput-tokeniser-error? e)
  390. `(,(condition-who e) . kaput)))
  391. (thunk)))
  392. (test-equal "re-entrancy from message handler is detected (complete message)"
  393. '(add-bytevector! . interrupted)
  394. (let ((tok (make-tokeniser)))
  395. (catch-errors
  396. (lambda ()
  397. (add-bytevector! tok #vu8(0 4 0 0) 0 4
  398. (lambda (bv offset length)
  399. (add-bytevector! tok #vu8(0 4 1 1) 0 4
  400. no-handle/message
  401. no-return/done
  402. no-return/overly-small)
  403. (assert #f))
  404. no-return/done
  405. no-return/overly-small)))))
  406. (test-equal "tokeniser becomes kaput, split after size field"
  407. '(add-bytevector! . kaput)
  408. (let ((tok (make-tokeniser))
  409. (bv #vu8(0 3)))
  410. (receive (tail? type size)
  411. (calls-in-tail-position?
  412. (lambda (return/overly-small)
  413. (add-bytevector! tok bv 0 2 no-handle/message
  414. no-return/done
  415. return/overly-small)))
  416. (assert (eq? #f type))
  417. (assert (= size 3))
  418. (assert tail?))
  419. (catch-errors
  420. (lambda ()
  421. (add-bytevector! tok #vu8(0) 0 1
  422. no-handle/message no-return/done no-return/overly-small)
  423. (error "unreachable")))))
  424. (test-equal "tokeniser becomes kaput, split inside size field"
  425. '(add-bytevector! . kaput)
  426. (let ((tok (make-tokeniser))
  427. (bv #vu8(0 3 4 5)))
  428. (receive (tail?)
  429. (calls-in-tail-position?
  430. (lambda (return/done)
  431. (add-bytevector! tok bv 0 1 no-handle/message
  432. return/done
  433. no-return/overly-small)))
  434. (assert tail?))
  435. (receive (tail? type size)
  436. (calls-in-tail-position?
  437. (lambda (return/overly-small)
  438. (add-bytevector! tok bv 1 2 no-handle/message
  439. no-return/done
  440. return/overly-small)))
  441. (assert tail?)
  442. (assert (= size 3))
  443. (assert (eq? type #f)))
  444. (catch-errors
  445. (lambda ()
  446. (add-bytevector! tok bv 2 2
  447. no-handle/message no-return/done
  448. no-return/overly-small)
  449. (error "unreachable")))))
  450. (test-equal "eof detected"
  451. '(#t)
  452. (receive result
  453. (calls-in-tail-position?
  454. (lambda (return/done-eof)
  455. (add-from-port! (make-tokeniser) (%make-void-port "r")
  456. no-handle/message no-return/overly-small
  457. return/done-eof no-return/premature-eof)))
  458. result))
  459. (test-equal "eof detected (complete data)"
  460. '(#t)
  461. (receive result
  462. (calls-in-tail-position?
  463. (lambda (return/done-eof)
  464. (define handled? #f)
  465. (define (handle/message bv offset length)
  466. (assert (= length 4))
  467. ;; Verify the received message is correct
  468. (assert (= (bytevector-u32-ref bv offset (endianness big))
  469. (bytevector-u32-ref #vu8(0 4 0 0) 0 (endianness big))))
  470. (assert (not handled?))
  471. (set! handled? #t))
  472. (add-from-port! (make-tokeniser)
  473. (open-bytevector-input-port #vu8(0 4 0 0))
  474. handle/message no-return/overly-small return/done-eof
  475. no-return/done-eof)))
  476. result))
  477. (test-equal "premature eof detected"
  478. '(#t)
  479. (receive result
  480. (calls-in-tail-position?
  481. (lambda (return/premature-eof)
  482. ;; 4 bytes are expected, but only the stream only has 3.
  483. (add-from-port! (make-tokeniser) (open-bytevector-input-port #vu8(0 4 0))
  484. no-handle/message no-return/overly-small no-return/done-eof
  485. return/premature-eof)))
  486. result))
  487. (test-equal "add-from-port! and partial messages (split at header)"
  488. #vu8(0 8 2 3 4 5 6 7)
  489. (let ((tok (make-tokeniser))
  490. (message #f))
  491. (add-bytevector! tok #vu8(0 8 2 3) 0 4 no-handle/message
  492. (const #t) no-return/overly-small)
  493. (add-from-port! tok (open-bytevector-input-port #vu8(4 5 6 7))
  494. (lambda (bv offset length)
  495. (assert (not message))
  496. (let ((bv2 (make-bytevector length)))
  497. (bytevector-copy! bv offset bv2 0 length)
  498. (set! message bv2)))
  499. no-return/overly-small (lambda () message)
  500. no-return/premature-eof)))
  501. (test-equal "kaput tokeniser and add-from-port!"
  502. '(add-from-port! . kaput)
  503. (let ((tok (make-tokeniser))
  504. (bv #vu8(0 3 4 5)))
  505. ;; Make the tokeniser kaput (overly small message size)
  506. (add-bytevector! tok bv 0 4 no-handle/message no-return/done
  507. (const #t))
  508. ;; And feed it some bytes (with add-from-port!) anyway.
  509. (catch-errors
  510. (lambda ()
  511. (add-from-port! tok (open-bytevector-input-port #vu8(1 2 3 4))
  512. no-handle/message no-return/overly-small
  513. no-return/done-eof no-return/premature-eof)
  514. (error "unreachable")))))
  515. (test-end "tokeniser")