// bebot // Copyright (C) 2023 Brian Tarricone // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . use std::{ fmt, io::{BufReader, BufWriter, ErrorKind}, path::PathBuf, time::{Duration, SystemTime}, }; use anyhow::Context; use chrono::{DateTime, Utc}; use futures::{future::join_all, FutureExt}; use matrix_sdk::{ ruma::{events::room::message::RoomMessageEventContent, OwnedRoomOrAliasId}, Client, }; use reqwest::redirect; use serde::de; use tokio::{fs::File, task::JoinHandle, time::sleep}; use crate::{ config::{MailArchiveConfig, MailListConfig}, matrix, }; #[derive(Clone, Copy, Serialize, Deserialize)] struct ListState { last_pub_date: DateTime, } #[derive(Debug, Deserialize)] struct RssPubDate { #[serde(rename = "$text", deserialize_with = "deser_rfc2616")] value: DateTime, } #[derive(Debug, Deserialize)] struct RssItem { title: String, link: String, #[serde(rename = "pubDate")] pub_date: RssPubDate, } #[derive(Debug, Deserialize)] struct RssChannel { #[serde(rename = "item")] items: Vec, } #[derive(Debug, Deserialize)] struct MailRss { channel: RssChannel, } async fn load_list_state(state_file: &PathBuf) -> anyhow::Result { match File::open(state_file).await { Err(err) if err.kind() == ErrorKind::NotFound => { // If we have no state, we probably don't want to blast out events // for every single item in the RSS feed, so pretend the last time // we published was right now. let list_state = ListState { last_pub_date: SystemTime::now().into(), }; save_list_state(list_state, state_file).await?; Ok(list_state) } Err(err) => Err(err)?, Ok(f) => { let r = BufReader::new(f.into_std().await); Ok(tokio::task::spawn_blocking(move || serde_yaml::from_reader(r)).await??) } } } async fn save_list_state(list_state: ListState, state_file: &PathBuf) -> anyhow::Result<()> { let f = File::options() .write(true) .truncate(true) .create(true) .open(state_file) .await?; let w = BufWriter::new(f.into_std().await); tokio::task::spawn_blocking(move || serde_yaml::to_writer(w, &list_state)).await??; Ok(()) } async fn handle_list( list: &MailListConfig, state_file: &PathBuf, http_client: &reqwest::Client, url: &String, matrix_client: &Client, room_ids: &[OwnedRoomOrAliasId], ) -> anyhow::Result<()> { let list_state = load_list_state(state_file).await?; let rooms_f = room_ids.iter().map(|room_id| { matrix::ensure_room_joined(matrix_client, room_id) .map(move |res| res.with_context(|| format!("Failed to join Matrix room '{}'", room_id))) }); let rooms = join_all(rooms_f) .await .into_iter() .flat_map(|room_res| match room_res { Err(err) => { warn!("{:#}", err); vec![] } Ok(room) => vec![room], }) .collect::>(); if rooms.is_empty() { return Err(anyhow!("Failed to join all rooms for list '{}'; skipping", list.name)); } let response = http_client .get(url) .send() .await .with_context(|| format!("Failed to fetch mail RSS feed from '{}'", url)) .and_then(|response| { if !response.status().is_success() { Err(anyhow!( "Failed to fetch mail RSS feed from '{}': server returned status {}", url, response.status().as_u16() )) } else { Ok(response) } })?; let body = response .text() .await .with_context(|| format!("Failed to decode RSS response body for '{}'", url))?; let mail_rss = tokio::task::spawn_blocking(move || quick_xml::de::from_str::(&body)) .await? .with_context(|| format!("Failed to parse RSS feed for '{}'", url))?; let items = mail_rss .channel .items .into_iter() .rev() .skip_while(|item| item.pub_date.value <= list_state.last_pub_date) .collect::>(); for room in rooms { for item in &items { let msg = RoomMessageEventContent::text_markdown(format!("\\[{}\\] [{}]({}]", list.name, item.title, item.link)); room.send(msg, None) .await .with_context(|| format!("Failed to send message to room '{}'", room.room_id()))?; save_list_state( ListState { last_pub_date: item.pub_date.value, }, state_file, ) .await?; } } Ok(()) } pub fn start_polling(config: MailArchiveConfig, matrix_client: Client) -> anyhow::Result>> { let http_client = reqwest::Client::builder() .user_agent(format!("{}/{}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))) .gzip(true) .redirect(redirect::Policy::default()) .timeout(Duration::from_secs(8)) .build()?; Ok(config .lists .into_iter() .map(|list| { let room_ids = if list.rooms.is_empty() { &config.default_rooms } else { &list.rooms } .clone(); let list = list.clone(); let http_client = http_client.clone(); let matrix_client = matrix_client.clone(); let url = format!("https://www.mail-archive.com/{}/maillist.xml", list.name); let state_file = config.state_dir.join(format!("{}.state", list.name)); let update_interval = Duration::from_secs(config.update_interval); tokio::spawn(async move { if !room_ids.is_empty() { loop { if let Err(err) = handle_list(&list, &state_file, &http_client, &url, &matrix_client, &room_ids).await { warn!("{:#}", err); } sleep(update_interval).await; } } }) }) .collect()) } fn deser_rfc2616<'de, D>(deserializer: D) -> Result, D::Error> where D: de::Deserializer<'de>, { struct Rfc2616Visitor; impl<'de> de::Visitor<'de> for Rfc2616Visitor { type Value = DateTime; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("an RFC2616-formatted datetime") } fn visit_str(self, v: &str) -> Result where E: de::Error, { dateparser::parse(v).map_err(E::custom) } } deserializer.deserialize_any(Rfc2616Visitor) } #[cfg(test)] mod test { use std::{fs::File, io::BufReader}; use super::MailRss; #[test] pub fn test_feed_deser() -> anyhow::Result<()> { let f = File::open(format!("{}/test-data/maillist.xml", env!("CARGO_MANIFEST_DIR")))?; let r = BufReader::new(f); let mail_rss = quick_xml::de::from_reader::<_, MailRss>(r)?; println!("{:#?}", mail_rss); Ok(()) } }