lib.rs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698
  1. // -*- coding: utf-8 -*-
  2. //
  3. // Copyright (C) 2024 Michael Büsch <m@bues.ch>
  4. // Copyright (C) 2020 Marco Lochen
  5. //
  6. // This program is free software: you can redistribute it and/or modify
  7. // it under the terms of the GNU General Public License as published by
  8. // the Free Software Foundation, either version 2 of the License, or
  9. // (at your option) any later version.
  10. //
  11. // This program is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. // GNU General Public License for more details.
  15. //
  16. // You should have received a copy of the GNU General Public License
  17. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  18. //
  19. // SPDX-License-Identifier: GPL-2.0-or-later
  20. #![forbid(unsafe_code)]
  21. mod error;
  22. use crate::error::Error;
  23. use anyhow::{self as ah, format_err as err, Context as _};
  24. use chrono::{DateTime, Utc};
  25. use rusqlite::{Connection, OpenFlags, Row};
  26. use sha2::{Digest as _, Sha256};
  27. use std::{
  28. path::{Path, PathBuf},
  29. sync::{Arc, Mutex},
  30. time::{Duration, Instant},
  31. };
  32. use tokio::task::spawn_blocking;
  33. pub const DEBUG: bool = true;
  34. const TIMEOUT: Duration = Duration::from_millis(10_000);
  35. pub fn get_prefix() -> PathBuf {
  36. option_env!("FEEDREADER_PREFIX").unwrap_or("/").into()
  37. }
  38. pub fn get_varlib() -> PathBuf {
  39. get_prefix().join("var/lib/feedreader")
  40. }
  41. fn sql_to_dt(timestamp: i64) -> DateTime<Utc> {
  42. DateTime::<Utc>::from_timestamp(timestamp, 0).unwrap_or_else(Utc::now)
  43. }
  44. fn dt_to_sql(dt: &DateTime<Utc>) -> i64 {
  45. dt.timestamp()
  46. }
  47. #[derive(Clone, Debug)]
  48. pub struct Feed {
  49. pub feed_id: Option<i64>,
  50. pub href: String,
  51. pub title: String,
  52. pub last_retrieval: DateTime<Utc>,
  53. pub next_retrieval: DateTime<Utc>,
  54. pub last_activity: DateTime<Utc>,
  55. pub disabled: bool,
  56. pub updated_items: i64,
  57. }
  58. impl Feed {
  59. fn from_sql_row(row: &Row<'_>) -> rusqlite::Result<Self> {
  60. Ok(Self {
  61. feed_id: Some(row.get(0)?),
  62. href: row.get(1)?,
  63. title: row.get(2)?,
  64. last_retrieval: sql_to_dt(row.get(3)?),
  65. next_retrieval: sql_to_dt(row.get(4)?),
  66. last_activity: sql_to_dt(row.get(5)?),
  67. disabled: row.get(6)?,
  68. updated_items: row.get(7)?,
  69. })
  70. }
  71. }
  72. #[derive(Clone, Debug)]
  73. pub struct Item {
  74. pub item_id: Option<String>,
  75. pub feed_id: Option<i64>,
  76. pub retrieved: DateTime<Utc>,
  77. pub seen: bool,
  78. pub author: String,
  79. pub title: String,
  80. pub feed_item_id: String,
  81. pub link: String,
  82. pub published: DateTime<Utc>,
  83. pub summary: String,
  84. }
  85. impl Item {
  86. fn from_sql_row(row: &Row<'_>) -> rusqlite::Result<Self> {
  87. Ok(Self {
  88. item_id: Some(row.get(0)?),
  89. feed_id: Some(row.get(1)?),
  90. retrieved: sql_to_dt(row.get(2)?),
  91. seen: row.get(3)?,
  92. author: row.get(4)?,
  93. title: row.get(5)?,
  94. feed_item_id: row.get(6)?,
  95. link: row.get(7)?,
  96. published: sql_to_dt(row.get(8)?),
  97. summary: row.get(9)?,
  98. })
  99. }
  100. fn from_sql_row_with_count(row: &Row<'_>) -> rusqlite::Result<(Self, i64)> {
  101. Ok((Self::from_sql_row(row)?, row.get(10)?))
  102. }
  103. pub async fn make_id(&self) -> String {
  104. let mut h = Sha256::new();
  105. h.update(&self.feed_item_id);
  106. h.update(&self.author);
  107. h.update(&self.title);
  108. h.update(&self.link);
  109. h.update(format!("{}", dt_to_sql(&self.published)));
  110. h.update(&self.summary);
  111. hex::encode(h.finalize())
  112. }
  113. }
  114. #[derive(Clone, Copy, PartialEq, Eq, Debug)]
  115. pub enum ItemStatus {
  116. New,
  117. Updated,
  118. Exists,
  119. }
  120. async fn transaction<F, R>(conn: Arc<Mutex<Connection>>, mut f: F) -> ah::Result<R>
  121. where
  122. F: FnMut(rusqlite::Transaction) -> Result<R, Error> + Send + 'static,
  123. R: Send + 'static,
  124. {
  125. spawn_blocking(move || {
  126. let timeout = Instant::now() + TIMEOUT;
  127. loop {
  128. let mut conn = conn.lock().expect("Mutex poisoned");
  129. let trans = conn.transaction()?;
  130. match f(trans) {
  131. Ok(r) => {
  132. break Ok(r);
  133. }
  134. Err(Error::Sql(
  135. e @ rusqlite::Error::SqliteFailure(
  136. rusqlite::ffi::Error {
  137. code: rusqlite::ffi::ErrorCode::DatabaseBusy,
  138. ..
  139. },
  140. ..,
  141. ),
  142. )) => {
  143. drop(conn); // unlock
  144. if Instant::now() >= timeout {
  145. break Err(e.into());
  146. }
  147. std::thread::sleep(Duration::from_millis(20));
  148. }
  149. Err(e) => {
  150. break Err(e.into());
  151. }
  152. }
  153. }
  154. })
  155. .await?
  156. }
  157. pub struct DbConn {
  158. conn: Arc<Mutex<Connection>>,
  159. }
  160. impl DbConn {
  161. async fn new(path: &Path) -> ah::Result<Self> {
  162. let path = path.to_path_buf();
  163. let conn = spawn_blocking(move || -> ah::Result<Connection> {
  164. let timeout = Instant::now() + TIMEOUT;
  165. loop {
  166. let conn = match Connection::open_with_flags(
  167. &path,
  168. OpenFlags::SQLITE_OPEN_READ_WRITE
  169. | OpenFlags::SQLITE_OPEN_CREATE
  170. | OpenFlags::SQLITE_OPEN_NO_MUTEX,
  171. ) {
  172. Ok(conn) => conn,
  173. Err(
  174. e @ rusqlite::Error::SqliteFailure(
  175. rusqlite::ffi::Error {
  176. code: rusqlite::ffi::ErrorCode::DatabaseBusy,
  177. ..
  178. },
  179. ..,
  180. ),
  181. ) => {
  182. if Instant::now() >= timeout {
  183. break Err(e.into());
  184. }
  185. std::thread::sleep(Duration::from_millis(20));
  186. continue;
  187. }
  188. Err(e) => {
  189. break Err(e.into());
  190. }
  191. };
  192. conn.busy_timeout(TIMEOUT)?;
  193. conn.set_prepared_statement_cache_capacity(64);
  194. break Ok(conn);
  195. }
  196. })
  197. .await?
  198. .context("Open SQLite database")?;
  199. Ok(Self {
  200. conn: Arc::new(Mutex::new(conn)),
  201. })
  202. }
  203. #[rustfmt::skip]
  204. pub async fn init(&mut self) -> ah::Result<()> {
  205. transaction(Arc::clone(&self.conn), move |t| {
  206. t.execute(
  207. "\
  208. CREATE TABLE IF NOT EXISTS feeds (\
  209. feed_id INTEGER PRIMARY KEY, \
  210. href VARCHAR, \
  211. title VARCHAR, \
  212. last_retrieval TIMESTAMP, \
  213. next_retrieval TIMESTAMP, \
  214. last_activity TIMESTAMP, \
  215. disabled BOOLEAN, \
  216. updated_items INTEGER\
  217. )",
  218. [],
  219. )?;
  220. t.execute(
  221. "\
  222. CREATE TABLE IF NOT EXISTS items (\
  223. item_id VARCHAR PRIMARY KEY, \
  224. feed_id INTEGER, \
  225. retrieved TIMESTAMP, \
  226. seen BOOLEAN, \
  227. author VARCHAR, \
  228. title VARCHAR, \
  229. feed_item_id VARCHAR, \
  230. link VARCHAR, \
  231. published TIMESTAMP, \
  232. summary VARCHAR, \
  233. FOREIGN KEY(feed_id) REFERENCES feeds(feed_id)\
  234. )",
  235. [],
  236. )?;
  237. t.execute("CREATE INDEX IF NOT EXISTS feed_id ON feeds(feed_id)", [])?;
  238. t.execute("CREATE INDEX IF NOT EXISTS item_id ON items(item_id)", [])?;
  239. // Remove legacy table.
  240. t.execute("DROP TABLE IF EXISTS enclosures", [])?;
  241. // Remove dangling items.
  242. t.execute(
  243. "\
  244. DELETE FROM items \
  245. WHERE feed_id NOT IN (\
  246. SELECT feed_id FROM feeds\
  247. )\
  248. ",
  249. []
  250. )?;
  251. t.commit()?;
  252. Ok(())
  253. })
  254. .await
  255. }
  256. pub async fn vacuum(&mut self) -> ah::Result<()> {
  257. spawn_blocking({
  258. let conn = Arc::clone(&self.conn);
  259. move || {
  260. let conn = conn.lock().expect("Mutex poisoned");
  261. conn.execute("VACUUM", [])?;
  262. Ok(())
  263. }
  264. })
  265. .await?
  266. }
  267. pub async fn update_feed(
  268. &mut self,
  269. feed: &Feed,
  270. items: &[Item],
  271. gc_thres: Option<DateTime<Utc>>,
  272. ) -> ah::Result<()> {
  273. let feed = feed.clone();
  274. let items = items.to_vec();
  275. transaction(Arc::clone(&self.conn), move |t| {
  276. let Some(feed_id) = feed.feed_id else {
  277. return Err(Error::Ah(err!("update_feed(): Invalid feed. No feed_id.")));
  278. };
  279. t.prepare_cached(
  280. "\
  281. UPDATE feeds SET \
  282. href = ?, \
  283. title = ?, \
  284. last_retrieval = ?, \
  285. next_retrieval = ?, \
  286. last_activity = ?, \
  287. disabled = ?, \
  288. updated_items = ? \
  289. WHERE feed_id = ?\
  290. ",
  291. )?
  292. .execute((
  293. &feed.href,
  294. &feed.title,
  295. dt_to_sql(&feed.last_retrieval),
  296. dt_to_sql(&feed.next_retrieval),
  297. dt_to_sql(&feed.last_activity),
  298. feed.disabled,
  299. feed.updated_items,
  300. feed_id,
  301. ))?;
  302. for item in &items {
  303. let Some(item_id) = &item.item_id else {
  304. return Err(Error::Ah(err!("update_feed(): Invalid item. No item_id.")));
  305. };
  306. if item.feed_id.is_some() && item.feed_id != Some(feed_id) {
  307. return Err(Error::Ah(err!(
  308. "update_feed(): Invalid item. Invalid feed_id."
  309. )));
  310. }
  311. t.prepare_cached(
  312. "\
  313. INSERT INTO items \
  314. VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\
  315. ",
  316. )?
  317. .execute((
  318. item_id,
  319. feed_id,
  320. dt_to_sql(&item.retrieved),
  321. item.seen,
  322. &item.author,
  323. &item.title,
  324. &item.feed_item_id,
  325. &item.link,
  326. dt_to_sql(&item.published),
  327. &item.summary,
  328. ))?;
  329. }
  330. if let Some(gc_thres) = gc_thres.as_ref() {
  331. t.prepare_cached(
  332. "\
  333. DELETE FROM items \
  334. WHERE feed_id = ? AND published < ? AND seen = TRUE\
  335. ",
  336. )?
  337. .execute((feed_id, dt_to_sql(gc_thres)))?;
  338. }
  339. t.commit()?;
  340. Ok(())
  341. })
  342. .await
  343. }
  344. pub async fn add_feed(&mut self, href: &str) -> ah::Result<()> {
  345. let href = href.to_string();
  346. transaction(Arc::clone(&self.conn), move |t| {
  347. t.prepare_cached(
  348. "\
  349. INSERT INTO feeds \
  350. VALUES (?, ?, ?, ?, ?, ?, ?, ?)\
  351. ",
  352. )?
  353. .execute((
  354. None::<i64>,
  355. &href,
  356. "[New feed] Updating...",
  357. 0,
  358. 0,
  359. 0,
  360. false,
  361. 0,
  362. ))?;
  363. t.commit()?;
  364. Ok(())
  365. })
  366. .await
  367. }
  368. pub async fn delete_feeds(&mut self, feed_ids: &[i64]) -> ah::Result<()> {
  369. if !feed_ids.is_empty() {
  370. let feed_ids = feed_ids.to_vec();
  371. transaction(Arc::clone(&self.conn), move |t| {
  372. for feed_id in &feed_ids {
  373. t.prepare_cached(
  374. "\
  375. DELETE FROM items \
  376. WHERE feed_id = ?\
  377. ",
  378. )?
  379. .execute([feed_id])?;
  380. t.prepare_cached(
  381. "\
  382. DELETE FROM feeds \
  383. WHERE feed_id = ?\
  384. ",
  385. )?
  386. .execute([feed_id])?;
  387. }
  388. t.commit()?;
  389. Ok(())
  390. })
  391. .await
  392. } else {
  393. Ok(())
  394. }
  395. }
  396. pub async fn get_feeds_due(&mut self) -> ah::Result<Vec<Feed>> {
  397. let now = Utc::now();
  398. transaction(Arc::clone(&self.conn), move |t| {
  399. let feeds: Vec<Feed> = t
  400. .prepare_cached(
  401. "\
  402. SELECT * FROM feeds \
  403. WHERE next_retrieval < ? AND \
  404. disabled == FALSE\
  405. ",
  406. )?
  407. .query_map([dt_to_sql(&now)], Feed::from_sql_row)?
  408. .map(|f| f.unwrap())
  409. .collect();
  410. t.finish()?;
  411. Ok(feeds)
  412. })
  413. .await
  414. }
  415. pub async fn get_next_due_time(&mut self) -> ah::Result<DateTime<Utc>> {
  416. transaction(Arc::clone(&self.conn), move |t| {
  417. let next_retrieval = t
  418. .prepare_cached(
  419. "\
  420. SELECT min(next_retrieval) FROM feeds \
  421. WHERE disabled == FALSE\
  422. ",
  423. )?
  424. .query([])?
  425. .next()?
  426. .unwrap()
  427. .get(0)?;
  428. t.finish()?;
  429. Ok(sql_to_dt(next_retrieval))
  430. })
  431. .await
  432. }
  433. pub async fn get_feeds(&mut self, active_feed_id: Option<i64>) -> ah::Result<Vec<Feed>> {
  434. transaction(Arc::clone(&self.conn), move |t| {
  435. if let Some(active_feed_id) = active_feed_id {
  436. t.prepare_cached(
  437. "\
  438. UPDATE feeds \
  439. SET updated_items = 0 \
  440. WHERE feed_id = ?\
  441. ",
  442. )?
  443. .execute([active_feed_id])?;
  444. }
  445. let feeds: Vec<Feed> = t
  446. .prepare_cached(
  447. "\
  448. SELECT * FROM feeds \
  449. ORDER BY last_activity DESC\
  450. ",
  451. )?
  452. .query_map([], Feed::from_sql_row)?
  453. .map(|f| f.unwrap())
  454. .collect();
  455. if active_feed_id.is_some() {
  456. t.commit()?;
  457. } else {
  458. t.finish()?;
  459. }
  460. Ok(feeds)
  461. })
  462. .await
  463. }
  464. pub async fn get_feed_items(&mut self, feed_id: i64) -> ah::Result<Vec<(Item, i64)>> {
  465. transaction(Arc::clone(&self.conn), move |t| {
  466. let items: Vec<(Item, i64)> = t
  467. .prepare_cached(
  468. "\
  469. SELECT item_id, feed_id, max(retrieved), seen, \
  470. author, title, feed_item_id, link, published, \
  471. summary, count() as count \
  472. FROM items \
  473. WHERE feed_id = ? \
  474. GROUP BY feed_item_id \
  475. ORDER BY published DESC LIMIT 100\
  476. ",
  477. )?
  478. .query_map([feed_id], Item::from_sql_row_with_count)?
  479. .map(|i| i.unwrap())
  480. .collect();
  481. t.prepare_cached(
  482. "\
  483. UPDATE items \
  484. SET seen = TRUE \
  485. WHERE feed_id = ?\
  486. ",
  487. )?
  488. .execute([feed_id])?;
  489. t.commit()?;
  490. Ok(items)
  491. })
  492. .await
  493. }
  494. pub async fn get_feed_items_by_item_id(
  495. &mut self,
  496. feed_id: i64,
  497. item_id: &str,
  498. ) -> ah::Result<Vec<Item>> {
  499. let item_id = item_id.to_string();
  500. transaction(Arc::clone(&self.conn), move |t| {
  501. let items: Vec<Item> = t
  502. .prepare_cached(
  503. "\
  504. SELECT * FROM items \
  505. WHERE feed_id = ? AND feed_item_id IN (\
  506. SELECT feed_item_id FROM items \
  507. WHERE item_id = ?\
  508. ) \
  509. ORDER BY retrieved DESC\
  510. ",
  511. )?
  512. .query_map((feed_id, &item_id), Item::from_sql_row)?
  513. .map(|i| i.unwrap())
  514. .collect();
  515. t.prepare_cached(
  516. "\
  517. UPDATE items \
  518. SET seen = TRUE \
  519. WHERE feed_id = ?\
  520. ",
  521. )?
  522. .execute([feed_id])?;
  523. t.commit()?;
  524. Ok(items)
  525. })
  526. .await
  527. }
  528. pub async fn set_seen(&mut self, feed_id: Option<i64>) -> ah::Result<()> {
  529. transaction(Arc::clone(&self.conn), move |t| {
  530. if let Some(feed_id) = feed_id {
  531. t.prepare_cached(
  532. "\
  533. UPDATE items \
  534. SET seen = TRUE \
  535. WHERE feed_id = ?\
  536. ",
  537. )?
  538. .execute([feed_id])?;
  539. t.prepare_cached(
  540. "\
  541. UPDATE feeds \
  542. SET updated_items = 0 \
  543. WHERE feed_id = ?\
  544. ",
  545. )?
  546. .execute([feed_id])?;
  547. } else {
  548. t.prepare_cached(
  549. "\
  550. UPDATE items \
  551. SET seen = TRUE \
  552. ",
  553. )?
  554. .execute([])?;
  555. t.prepare_cached(
  556. "\
  557. UPDATE feeds \
  558. SET updated_items = 0 \
  559. ",
  560. )?
  561. .execute([])?;
  562. }
  563. t.commit()?;
  564. Ok(())
  565. })
  566. .await
  567. }
  568. pub async fn check_item_exists(&mut self, item: &Item) -> ah::Result<ItemStatus> {
  569. if let Some(item_id) = item.item_id.as_ref() {
  570. let item_id = item_id.clone();
  571. let feed_item_id = item.feed_item_id.clone();
  572. transaction(Arc::clone(&self.conn), move |t| {
  573. let feed_item_id_count: Vec<i64> = t
  574. .prepare_cached(
  575. "\
  576. SELECT count(feed_item_id) \
  577. FROM items \
  578. WHERE feed_item_id = ?\
  579. ",
  580. )?
  581. .query_map([&feed_item_id], |row| row.get(0))?
  582. .map(|c| c.unwrap())
  583. .collect();
  584. let item_id_count: Vec<i64> = t
  585. .prepare_cached(
  586. "\
  587. SELECT count(item_id) \
  588. FROM items \
  589. WHERE item_id = ?\
  590. ",
  591. )?
  592. .query_map([&item_id], |row| row.get(0))?
  593. .map(|c| c.unwrap())
  594. .collect();
  595. let feed_item_id_count = *feed_item_id_count.first().unwrap_or(&0);
  596. let item_id_count = *item_id_count.first().unwrap_or(&0);
  597. let status = if item_id_count == 0 && feed_item_id_count == 0 {
  598. ItemStatus::New
  599. } else if item_id_count == 0 {
  600. ItemStatus::Updated
  601. } else {
  602. ItemStatus::Exists
  603. };
  604. t.finish()?;
  605. Ok(status)
  606. })
  607. .await
  608. } else {
  609. Err(err!("check_item_exists(): Invalid item. No item_id."))
  610. }
  611. }
  612. }
  613. pub struct Db {
  614. path: PathBuf,
  615. }
  616. impl Db {
  617. pub async fn new(name: &str) -> ah::Result<Self> {
  618. if !name
  619. .chars()
  620. .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
  621. {
  622. return Err(err!("Invalid name"));
  623. }
  624. let path = get_varlib().join(format!("{name}.db"));
  625. Ok(Self { path })
  626. }
  627. pub async fn open(&self) -> ah::Result<DbConn> {
  628. DbConn::new(&self.path).await
  629. }
  630. }
  631. // vim: ts=4 sw=4 expandtab