feedsnake.scm 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. ;;
  2. ;; Copyright 2022, Jaidyn Levesque <jadedctrl@posteo.at>
  3. ;;
  4. ;; This program is free software: you can redistribute it and/or
  5. ;; modify it under the terms of the GNU General Public License as
  6. ;; published by the Free Software Foundation, either version 3 of
  7. ;; the License, or (at your option) any later version.
  8. ;;
  9. ;; This program is distributed in the hope that it will be useful,
  10. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. ;; GNU General Public License for more details.
  13. ;;
  14. ;; You should have received a copy of the GNU General Public License
  15. ;; along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. ;;
  17. ;; Module for misc. helper functions used by both feedsnake & feedsnake-unix
  18. (module feedsnake-helpers
  19. (alist-car-ref date->utc-date current-date-utc)
  20. (import scheme
  21. (chicken base)
  22. srfi-19)
  23. ;; Just car's the value of alist-ref (if it exists)
  24. (define (alist-car-ref key alist)
  25. (let ([value (alist-ref key alist)])
  26. (if value
  27. (car value)
  28. #f)))
  29. ;; Convert a date of arbitrary timezone to UTC
  30. (define (date->utc-date date)
  31. (time-utc->date (date->time-utc date)))
  32. ;; The current date, with UTC (-0; Z) timezone
  33. (define (current-date-utc)
  34. (date->utc-date (current-date)))
  35. ) ;; feedsnake-helper module
  36. ;; The main feedsnake module; parses atom feeds into alists and strings
  37. (module feedsnake
  38. (updated-feed-string read-feed filter-entries write-entry update-feed-file
  39. write-entry-to-file write-entries-to-file all-entries entry->string
  40. *maildir-template* *mbox-template*)
  41. (import scheme
  42. (chicken base) (chicken condition) (chicken io) (chicken file)
  43. (chicken process-context) (chicken pathname) (chicken port)
  44. srfi-1 srfi-13 srfi-19 srfi-69
  45. date-strings
  46. feedsnake-helpers
  47. http-client
  48. named-format
  49. xattr
  50. atom rss)
  51. (define *maildir-template*
  52. `((entry-template
  53. ,(string-append
  54. "From: ~{{~A ||||from-name}}"
  55. "<~{{~A||feedsnake@localhost||FROM_ADDRESS||author-address||feed-address}}>"
  56. "\n"
  57. "To: ~{{~A ||You||TO_NAME||USER}}"
  58. "<~{{~A||you@localhost||TO_ADDRESS}}>"
  59. "\n"
  60. "Subject: ~{{~A||Unnamed post||title}}\n"
  61. "Date: ~{{~A||||updated-rfc228||published-rfc228}}\n"
  62. "\n"
  63. "~{{*** ~{~a~^ ~}~%||||urls}}\n"
  64. "~{{~A||||summary}}\n"))
  65. (multifile-output? #t)))
  66. (define *mbox-template*
  67. `((entry-template ,(string-append
  68. "From FEEDSNAKE ~{{~A||||updated-mbox||published-mbox}}\n"
  69. (car (alist-ref 'entry-template *maildir-template*))
  70. "\n"))
  71. (multifile-output? #f)))
  72. (define *default-template*
  73. (append *maildir-template*
  74. '((output-dir "./"))))
  75. (define *default-values*
  76. '((output-dir "./")))
  77. (define *default-multifile-values*
  78. '((filename-template "~{{~A||||updated||published}}.~{{~A||you||USER}}@~{{~A||localhost|HOSTNAME}}")
  79. (multifile-output? #t)))
  80. (define *default-singlefile-values*
  81. '((filename-template "feed.out")
  82. (multifile-output? #f)))
  83. ;; Read the given port into a feedsnake-feed (alist), no matter the format! c:<
  84. (define (read-feed in-port)
  85. (let (;;[rss (rss:read in-port)]
  86. [atom (read-atom-feed in-port)])
  87. (if atom
  88. (atom-doc->feedsnake-feed atom)
  89. #f)))
  90. ;; Construct a filter function for feeds, given the script's arguments
  91. (define (filter-entries feed filter)
  92. (let ([entry-date (lambda (entry) (car (alist-ref 'updated entry)))]
  93. [unfiltered (if feed
  94. (alist-car-ref 'entries feed)
  95. #f)]
  96. [entries '()])
  97. (if unfiltered
  98. (map
  99. (lambda (entry)
  100. (if (apply filter (list entry))
  101. (set! entries
  102. (append entries (list entry)))))
  103. unfiltered))
  104. entries))
  105. ;; Returns either the updated string of a feed (in comparison to old string),
  106. ;; or #f if literally nothing's changed
  107. (define (updated-feed-string url old-string)
  108. (let* ([new-string (fetch-http-string url)]
  109. [updated? (not (string=? old-string new-string))])
  110. (if updated?
  111. new-string
  112. #f)))
  113. (define (entry->string entry template)
  114. (named-format
  115. template
  116. (append entry
  117. (entry-templating-parameters entry template))))
  118. ;; Returns an alist of string replacements/parameters for a given entry
  119. ;; For use with named-format
  120. (define (entry-templating-parameters entry template)
  121. (append
  122. entry
  123. (entry-url-templating-parameters entry)
  124. (entry-author-templating-parameters entry)
  125. (entry-date-templating-parameters entry)))
  126. ;; URL-related named-format templating parameters for given entry
  127. (define (entry-url-templating-parameters entry)
  128. (let ([urls (alist-car-ref 'urls entry)])
  129. `((url ,(cond
  130. [(list? urls) (car urls)]
  131. [(string? urls) urls])))))
  132. ;; Author-related named-format templating parameters for given entry
  133. (define (entry-author-templating-parameters entry)
  134. (let* ([authors (alist-car-ref 'authors entry)]
  135. [author (if authors (car authors) (alist-car-ref 'feed-title entry))])
  136. `((author ,author))))
  137. ;; Date-related named-format templating parameters for given entry
  138. (define (entry-date-templating-parameters entry)
  139. (let* ([updated (or (alist-car-ref 'updated entry) (alist-car-ref 'published entry))]
  140. [published (or (alist-car-ref 'published entry) updated)])
  141. `((updated-rfc228 ,(if updated (date->rfc228-string updated)))
  142. (published-rfc228 ,(if published (date->rfc228-string published)))
  143. (updated-mbox ,(if updated (date->mbox-string updated)))
  144. (published-mbox ,(if published (date->mbox-string published))))))
  145. ;; Writes a given feed entry to the out-port, as per the feedsnake-unix-format template alist
  146. (define (write-entry entry template-alist out-port)
  147. (let ([env-variables
  148. (map (lambda (pair)
  149. (list (string->symbol (car pair))
  150. (cdr pair)))
  151. (get-environment-variables))])
  152. (write-string
  153. (entry->string (append env-variables entry)
  154. (alist-car-ref 'entry-template template-alist))
  155. #f
  156. out-port)))
  157. ;; Write an entry to the given file (directory for multifile; normal file otherwise)
  158. (define (write-entry-to-file entry template-alist out-path)
  159. (let* ([template (if (alist-car-ref 'multifile-output? template-alist)
  160. (append template-alist *default-multifile-values* *default-values*)
  161. (append template-alist *default-singlefile-values* *default-values*))]
  162. [file-mode (if (alist-car-ref 'multifile-output? template) #:text #:append)])
  163. (call-with-output-file
  164. (entry-output-path entry template out-path)
  165. (lambda (out-port)
  166. (write-entry entry template out-port))
  167. file-mode)))
  168. ;; Writes all entries in a list to an out-path (mere convenience function)
  169. (define (write-entries-to-file entries template-alist out-path)
  170. (map (lambda (entry)
  171. (write-entry-to-file entry template-alist out-path))
  172. entries))
  173. ;; Decides the correct output path for an entry, given the template's filename rules etc.
  174. (define (entry-output-path entry template-alist base-out-path)
  175. (let ([multifile? (alist-car-ref 'multifile-output? template-alist)])
  176. (if multifile?
  177. (multifile-entry-path entry template-alist base-out-path)
  178. (singlefile-entry-path entry template-alist base-out-path))))
  179. ;; Output path for entry with a single-file template
  180. (define (singlefile-entry-path entry template-alist base-out-path)
  181. (if (directory-exists? base-out-path)
  182. (signal
  183. (make-property-condition
  184. 'exn 'location 'file
  185. 'message (string-append base-out-path " shouldn't be a directory.")))
  186. base-out-path))
  187. ;; Output path for an entry w multifile template
  188. (define (multifile-entry-path entry template-alist base-out-path)
  189. (let* ([file-leaf (named-format (alist-car-ref 'filename-template template-alist) entry)]
  190. [new-out-path (string-append base-out-path "/" "new")])
  191. (if (and (create-directory base-out-path)
  192. (create-directory new-out-path)
  193. (create-directory (string-append base-out-path "/" "cur"))
  194. (create-directory (string-append base-out-path "/" "tmp")))
  195. (string-append new-out-path "/" file-leaf ":2,")
  196. (signal
  197. (make-property-condition
  198. 'exn 'location 'file
  199. 'message (string-append base-out-path " either isn't accessible or isn't a directory."))))))
  200. ;; Switch the cached version of the feed with a newer version, if available.
  201. ;; If the feed-path doesn't exist, then the feed will be downloaded fresh.
  202. (define (update-feed-file feed-path feed-url)
  203. (let* ([old-string (if (file-exists? feed-path)
  204. (call-with-input-file feed-path
  205. (lambda (in-port)
  206. (read-string #f in-port)))
  207. "")]
  208. [new-string (updated-feed-string
  209. feed-url
  210. old-string)])
  211. (if new-string
  212. (call-with-output-file feed-path
  213. (lambda (out-port)
  214. (write-string new-string #f out-port)))
  215. #f)))
  216. ;; List of all entries of the feed
  217. (define (all-entries feed)
  218. (alist-car-ref 'entries feed))
  219. ;; Atom parsing
  220. ;; ————————————————————————————————————————
  221. ;; Parse an atom feed into a feedsnake-friendly alist
  222. (define (atom-doc->feedsnake-feed atom)
  223. `((title ,(last (feed-title atom)))
  224. (url ,(atom-feed-preferred-url atom))
  225. (authors ,(map author-name (feed-authors atom)))
  226. (updated ,(feed-updated atom))
  227. (entry-updated ,(atom-feed-latest-entry-date atom))
  228. (entries ,(map
  229. (lambda (entry)
  230. (atom-entry->feedsnake-entry entry atom))
  231. (feed-entries atom)))))
  232. ;; Parse an atom entry into a feedsnake entry :>
  233. (define (atom-entry->feedsnake-entry entry atom)
  234. (let ([published (rfc339-string->date (entry-published entry))]
  235. [updated (rfc339-string->date (entry-updated entry))]
  236. [feed-authors (map author-name (feed-authors atom))]
  237. [entry-authors (map author-name (entry-authors entry))])
  238. `((title ,(last (entry-title entry)))
  239. (updated ,(or updated published))
  240. (published ,(or published updated))
  241. (summary ,(last (or (entry-summary entry)
  242. (entry-content entry))))
  243. (urls ,(map (lambda (link) (atom-link->string link atom))
  244. (entry-links entry)))
  245. (authors ,(if (null? entry-authors) feed-authors entry-authors))
  246. (feed-title ,(last (feed-title atom))))))
  247. ;; The preferred/given URL for an atom feed
  248. (define (atom-feed-preferred-url atom)
  249. (car
  250. (filter
  251. (lambda (link)
  252. (string=? (link-relation link) "self"))
  253. (feed-links atom))))
  254. ;; Get an atom feed's latest date for an entry's updating/publishing
  255. (define (atom-feed-latest-entry-date atom)
  256. (let ([entry-date
  257. (lambda (entry)
  258. (or (rfc339-string->date (entry-updated entry))
  259. (rfc339-string->date (entry-published entry))))])
  260. (reduce
  261. (lambda (a b)
  262. (if (date>=? a b) a b))
  263. #f
  264. (map entry-date (feed-entries atom)))))
  265. ;; Convert an atom-link into a proper, valid url
  266. (define (atom-link->string link atom)
  267. (if (string-contains (link-uri link) "://")
  268. (link-uri link)
  269. (string-append (pathname-directory (atom-feed-preferred-url atom))
  270. "/"
  271. (link-uri link))))
  272. ;; Misc. functions
  273. ;; ————————————————————————————————————————
  274. ;; Download a file over HTTP to the given port.
  275. (define (fetch-http url out-port)
  276. (call-with-input-request
  277. url #f
  278. (lambda (in-port) (copy-port in-port out-port))))
  279. ;; Download a feed (AKA fetch over HTTP to a string)
  280. (define (fetch-http-string url)
  281. (call-with-output-string
  282. (lambda (out) (fetch-http url out))))
  283. ) ;; feedsnake module
  284. ;; The UNIX-style frontend for feedsnake
  285. (module feedsnake-unix
  286. (main main)
  287. (import scheme
  288. (chicken base) (chicken condition) (chicken file) (chicken file posix)
  289. (chicken io) (chicken port) (chicken process-context)
  290. (chicken process-context posix)
  291. srfi-1 srfi-19
  292. date-strings
  293. feedsnake feedsnake-helpers
  294. getopt-long
  295. uri-common
  296. xattr)
  297. (define *help-msg*
  298. (string-append
  299. "usage: feedsnake [-hnuU] [-s|S] [-o|d] FILE...\n"
  300. " feedsnake [-hn] [-c] [-s] [-o|d] URL...\n"
  301. " feedsnake [-h] [-s] [-o|d]\n\n"
  302. "Feedsnake is a program for converting Atom feeds into mbox/maildir files.\n"
  303. "Any Atom feeds passed as input will be output in mbox or maildir format.\n\n"
  304. "If a FILE value is '-' or not provided, feedsnake will read a feed over standard\n"
  305. "input. --since-last and similar arguments have no impact on these feeds.\n\n"
  306. "If you want to subscribe to feeds with Feedsnake, you'll probably do something\n"
  307. "like so:\n"
  308. " feedsnake --cache ~/feeds/hacker_news.xml \\\n"
  309. " --output ~/feeds/hacker_news.mbox \\\n"
  310. " https://news.ycombinator.com/rss\n\n"
  311. "Then, to update your subscription, just run:\n"
  312. " feedsnake --update --since-last \\\n"
  313. " --output ~/feeds/hacker_news.mbox \\\n"
  314. " ~/feeds/hacker_news.xml\n\n"
  315. "For updating all feeds:\n"
  316. " feedsnake --update --since-last ~/feeds/*.xml > ~/feeds/all.mbox\n\n"
  317. "The FILE given as input can be any Atom/RSS file. If you'd like to update\n"
  318. "the FILE (with --update or --update-since), then it must have the\n"
  319. "'user.xdg.origin.url' extended attribute set as the feed URL. You can create\n"
  320. "such a file as in the above example, by passing a URL with a --cache file set.\n\n"))
  321. (define *opts*
  322. '((help
  323. "Print a usage message"
  324. (single-char #\h))
  325. (outdir
  326. "Output directory, used for maildir output"
  327. (single-char #\d)
  328. (value (required DIR)))
  329. (output
  330. "Output file, used for mbox output. Default is stdout ('-')."
  331. (single-char #\o)
  332. (value (required FILE)))
  333. (cache
  334. "The cache file used if a URL is passed as argument."
  335. (single-char #\c)
  336. (value (required FILE)))
  337. (update
  338. "Update a feed FILE by downloading its newest version to the same path."
  339. (single-char #\u))
  340. (update-since
  341. "Alias for --update and --since-last. This is probably the option you want."
  342. (single-char #\U))
  343. (since
  344. "Output entries after the given date, in YYYY-MM-DD hh:mm:ss format."
  345. (single-char #\s)
  346. (value (required DATETIME)))
  347. (since-last
  348. "Output entries dating from the last saved parsing of the file."
  349. (single-char #\S))
  350. (since-update
  351. "Output entries dating from the last update of the file.")
  352. (no-save-date
  353. "Don't save parse/update time of this operation, to avoid influencing --since-*."
  354. (single-char #\n))))
  355. ;; Prints cli usage to stderr.
  356. (define (help)
  357. (write-string *help-msg* #f (open-output-file* fileno/stderr))
  358. (write-string (usage *opts*) #f (open-output-file* fileno/stderr)))
  359. ;; Wrap around the main function, so that the user isn't scared off by exceptions
  360. (define-syntax exception-condom
  361. (syntax-rules ()
  362. ((exception-condom expr)
  363. (handle-exceptions exn
  364. (begin
  365. (write-string
  366. (string-append (get-condition-property exn 'exn 'message)
  367. " ("
  368. ;; (symbol->string (get-condition-property exn 'exn 'location))
  369. ")\n")
  370. #f (open-output-file* fileno/stderr))
  371. (exit 2))
  372. expr))))
  373. ;; Just ignore whatever exceptions the expression throws our way
  374. (define-syntax ignore-errors
  375. (syntax-rules ()
  376. ((ignore-errors expr)
  377. (handle-exceptions exn #f expr))))
  378. ;; The `main` procedure that should be called to run feedsnake-unix for use as script.
  379. (define (main)
  380. (exception-condom
  381. (let* ([args (getopt-long (command-line-arguments) *opts*)]
  382. [free-args (alist-ref '@ args)])
  383. (if (alist-ref 'help args)
  384. (help)
  385. (map (lambda (feed-pair)
  386. (process-feed args feed-pair))
  387. (get-feeds free-args args))))))
  388. ;; Turn the scripts free-args into parsed Feedsnake feed alists
  389. (define (get-feeds free-args args)
  390. (let ([feed-paths
  391. (if (eq? (length free-args) 0)
  392. '("-")
  393. free-args)])
  394. (map (lambda (path) (get-feed path args))
  395. feed-paths)))
  396. ;; Turn a given feed-path (free-arg) into a parsed Feedsnake feed, if possible
  397. (define (get-feed feed-path args)
  398. (let*
  399. ([uri (ignore-errors (absolute-uri feed-path))]
  400. [out-path (cond
  401. [(and uri (alist-ref 'cache args))
  402. (alist-ref 'cache args)]
  403. [uri "-"]
  404. [#t feed-path])]
  405. [feed
  406. (cond
  407. [(string=? feed-path "-")
  408. (call-with-input-string (read-string)
  409. read-feed)]
  410. [(and uri (not (string=? "-" out-path)))
  411. (begin
  412. (update-feed-file out-path (uri->string uri))
  413. (ignore-errors (call-with-input-file out-path read-feed)))]
  414. [uri
  415. (call-with-input-string (updated-feed-string (uri->string uri) "")
  416. read-feed)]
  417. [#t
  418. (ignore-errors (call-with-input-file out-path read-feed))])])
  419. ;; Set the origin URL, if newly-created cache file
  420. (if (and uri (not (string=? "-" out-path)))
  421. (set-xattr out-path "user.xdg.origin.url" (uri->string uri)))
  422. (list out-path feed)))
  423. ;; Process a parsed feed, given arguments passed to the script
  424. (define (process-feed args feed-pair)
  425. (let* ([feed (last feed-pair)]
  426. [feed-path (first feed-pair)]
  427. [update? (or (alist-ref 'update args) (alist-ref 'update-since args))])
  428. ;; Update the feed
  429. (if update?
  430. (begin
  431. (update-feed-file feed-path
  432. (get-xattr feed-path "user.xdg.origin.url"))
  433. (set! feed (call-with-input-file feed-path read-feed))))
  434. ;; Print all entries to stdout
  435. (output-entries args `(,feed-path ,feed))
  436. ;; Change file's update-date
  437. (if (and update?
  438. (not (alist-ref 'no-save-date args)))
  439. (set-xattr feed-path "user.feedsnake.updated"
  440. (date->rfc339-string (current-date-utc))))
  441. ;; Save the file's parsing date
  442. (if (and (file-exists? feed-path)
  443. (not (alist-ref 'no-save-date args)))
  444. (set-xattr feed-path "user.feedsnake.parsed"
  445. (date->rfc339-string (current-date-utc))))))
  446. ;; Output the appropriate entrise of the given feed, using script's args
  447. (define (output-entries args feed-pair)
  448. (let* ([feed (last feed-pair)]
  449. [output-dir (alist-ref 'outdir args)]
  450. [output (or (alist-ref 'output args) output-dir)]
  451. [template (if output-dir *maildir-template* *mbox-template*)]
  452. [filter (entry-filter feed-pair args)]
  453. [entries (filter-entries feed filter)])
  454. (cond
  455. [(not entries)
  456. #f]
  457. [output
  458. (write-entries-to-file entries template output)]
  459. [(not output)
  460. (map (lambda (entry)
  461. (write-entry entry template
  462. (open-output-file* fileno/stdout)))
  463. entries)])))
  464. ;; Construct a filter function for feeds, given the script's arguments
  465. (define (entry-filter feed-pair args)
  466. (let* ([since-string (alist-ref 'since args)]
  467. [since (if since-string
  468. (date->utc-date (string->date since-string "~Y-~m-~d ~H:~M:~S"))
  469. #f)]
  470. [entry-date (lambda (entry)
  471. (or (alist-car-ref 'updated entry)
  472. (alist-car-ref 'published entry)))]
  473. [last-parse-string (or (ignore-errors
  474. (get-xattr (first feed-pair) "user.feedsnake.parsed"))
  475. "1971-01-01T00:00:00Z")]
  476. [last-parse (rfc339-string->date last-parse-string)]
  477. [last-update-string (or (ignore-errors
  478. (get-xattr (first feed-pair) "user.feedsnake.updated"))
  479. "1971-01-01T00:00:00Z")]
  480. [last-update (rfc339-string->date last-update-string)])
  481. (lambda (entry)
  482. (cond [since
  483. (date>=? (entry-date entry) since)]
  484. [(or (alist-ref 'since-last args) (alist-ref 'update-since args))
  485. (date>=? (entry-date entry) (or last-parse last-update))]
  486. [(alist-ref 'since-update args)
  487. (date>=? (entry-date entry) last-update)]
  488. [#t
  489. #t]))))
  490. ;; Supposed config root of the user (as per XDG, or simple ~/.config)
  491. (define (config-directory)
  492. (or (get-environment-variable "XDG_CONFIG_HOME")
  493. (string-append (sixth (user-information (current-user-id))) "/.config")))
  494. ;; Path of the feedsnake config directory
  495. (define (feedsnake-directory)
  496. (create-directory (string-append (config-directory) "/feedsnake") #t))
  497. ;; Path of the feeds directory
  498. (define (feeds-directory)
  499. (create-directory (string-append (feedsnake-directory) "/feeds") #t))
  500. ;; Lists all configured feeds (files in feed directory)
  501. (define (feed-files)
  502. (map (lambda (relative-path)
  503. (string-append (feeds-directory) "/" relative-path))
  504. (directory (feeds-directory))))
  505. ) ;; feedsnake-unix module