bebot/src/mail_archive.rs

263 lines
7.9 KiB
Rust

// bebot
// Copyright (C) 2023 Brian Tarricone <brian@tarricone.org>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
use std::{
fmt,
io::{BufReader, BufWriter, ErrorKind},
path::PathBuf,
time::{Duration, SystemTime},
};
use anyhow::Context;
use chrono::{DateTime, Utc};
use futures::{future::join_all, FutureExt};
use matrix_sdk::{
ruma::{events::room::message::RoomMessageEventContent, OwnedRoomOrAliasId},
Client,
};
use reqwest::redirect;
use serde::de;
use tokio::{fs::File, task::JoinHandle, time::sleep};
use crate::{
config::{MailArchiveConfig, MailListConfig},
matrix,
};
#[derive(Clone, Copy, Serialize, Deserialize)]
struct ListState {
last_pub_date: DateTime<Utc>,
}
#[derive(Debug, Deserialize)]
struct RssPubDate {
#[serde(rename = "$text", deserialize_with = "deser_rfc2616")]
value: DateTime<Utc>,
}
#[derive(Debug, Deserialize)]
struct RssItem {
title: String,
link: String,
#[serde(rename = "pubDate")]
pub_date: RssPubDate,
}
#[derive(Debug, Deserialize)]
struct RssChannel {
#[serde(rename = "item")]
items: Vec<RssItem>,
}
#[derive(Debug, Deserialize)]
struct MailRss {
channel: RssChannel,
}
async fn load_list_state(state_file: &PathBuf) -> anyhow::Result<ListState> {
match File::open(state_file).await {
Err(err) if err.kind() == ErrorKind::NotFound => {
// If we have no state, we probably don't want to blast out events
// for every single item in the RSS feed, so pretend the last time
// we published was right now.
let list_state = ListState {
last_pub_date: SystemTime::now().into(),
};
save_list_state(list_state, state_file).await?;
Ok(list_state)
}
Err(err) => Err(err)?,
Ok(f) => {
let r = BufReader::new(f.into_std().await);
Ok(tokio::task::spawn_blocking(move || serde_yaml::from_reader(r)).await??)
}
}
}
async fn save_list_state(list_state: ListState, state_file: &PathBuf) -> anyhow::Result<()> {
let f = File::options()
.write(true)
.truncate(true)
.create(true)
.open(state_file)
.await?;
let w = BufWriter::new(f.into_std().await);
tokio::task::spawn_blocking(move || serde_yaml::to_writer(w, &list_state)).await??;
Ok(())
}
async fn handle_list(
list: &MailListConfig,
state_file: &PathBuf,
http_client: &reqwest::Client,
url: &String,
matrix_client: &Client,
room_ids: &[OwnedRoomOrAliasId],
) -> anyhow::Result<()> {
let list_state = load_list_state(state_file).await?;
let rooms_f = room_ids.iter().map(|room_id| {
matrix::ensure_room_joined(matrix_client, room_id)
.map(move |res| res.with_context(|| format!("Failed to join Matrix room '{}'", room_id)))
});
let rooms = join_all(rooms_f)
.await
.into_iter()
.flat_map(|room_res| match room_res {
Err(err) => {
warn!("{:#}", err);
vec![]
}
Ok(room) => vec![room],
})
.collect::<Vec<_>>();
if rooms.is_empty() {
return Err(anyhow!("Failed to join all rooms for list '{}'; skipping", list.name));
}
let response = http_client
.get(url)
.send()
.await
.with_context(|| format!("Failed to fetch mail RSS feed from '{}'", url))
.and_then(|response| {
if !response.status().is_success() {
Err(anyhow!(
"Failed to fetch mail RSS feed from '{}': server returned status {}",
url,
response.status().as_u16()
))
} else {
Ok(response)
}
})?;
let body = response
.text()
.await
.with_context(|| format!("Failed to decode RSS response body for '{}'", url))?;
let mail_rss = tokio::task::spawn_blocking(move || quick_xml::de::from_str::<MailRss>(&body))
.await?
.with_context(|| format!("Failed to parse RSS feed for '{}'", url))?;
let items = mail_rss
.channel
.items
.into_iter()
.rev()
.skip_while(|item| item.pub_date.value <= list_state.last_pub_date)
.collect::<Vec<_>>();
for room in rooms {
for item in &items {
let msg =
RoomMessageEventContent::text_markdown(format!("\\[{}\\] [{}]({}]", list.name, item.title, item.link));
room.send(msg, None)
.await
.with_context(|| format!("Failed to send message to room '{}'", room.room_id()))?;
save_list_state(
ListState {
last_pub_date: item.pub_date.value,
},
state_file,
)
.await?;
}
}
Ok(())
}
pub fn start_polling(config: MailArchiveConfig, matrix_client: Client) -> anyhow::Result<Vec<JoinHandle<()>>> {
let http_client = reqwest::Client::builder()
.user_agent(format!("{}/{}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")))
.gzip(true)
.redirect(redirect::Policy::default())
.timeout(Duration::from_secs(8))
.build()?;
Ok(config
.lists
.into_iter()
.map(|list| {
let room_ids = if list.rooms.is_empty() {
&config.default_rooms
} else {
&list.rooms
}
.clone();
let list = list.clone();
let http_client = http_client.clone();
let matrix_client = matrix_client.clone();
let url = format!("https://www.mail-archive.com/{}/maillist.xml", list.name);
let state_file = config.state_dir.join(format!("{}.state", list.name));
let update_interval = Duration::from_secs(config.update_interval);
tokio::spawn(async move {
if !room_ids.is_empty() {
loop {
if let Err(err) =
handle_list(&list, &state_file, &http_client, &url, &matrix_client, &room_ids).await
{
warn!("{:#}", err);
}
sleep(update_interval).await;
}
}
})
})
.collect())
}
fn deser_rfc2616<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
where
D: de::Deserializer<'de>,
{
struct Rfc2616Visitor;
impl<'de> de::Visitor<'de> for Rfc2616Visitor {
type Value = DateTime<Utc>;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("an RFC2616-formatted datetime")
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
dateparser::parse(v).map_err(E::custom)
}
}
deserializer.deserialize_any(Rfc2616Visitor)
}
#[cfg(test)]
mod test {
use std::{fs::File, io::BufReader};
use super::MailRss;
#[test]
pub fn test_feed_deser() -> anyhow::Result<()> {
let f = File::open(format!("{}/test-data/maillist.xml", env!("CARGO_MANIFEST_DIR")))?;
let r = BufReader::new(f);
let mail_rss = quick_xml::de::from_reader::<_, MailRss>(r)?;
println!("{:#?}", mail_rss);
Ok(())
}
}