Add support for reading mail-archive.com RSS feeds
This commit is contained in:
262
src/mail_archive.rs
Normal file
262
src/mail_archive.rs
Normal file
@@ -0,0 +1,262 @@
|
||||
// bebot
|
||||
// Copyright (C) 2023 Brian Tarricone <brian@tarricone.org>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use std::{
|
||||
fmt,
|
||||
io::{BufReader, BufWriter, ErrorKind},
|
||||
path::PathBuf,
|
||||
time::{Duration, SystemTime},
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::{future::join_all, FutureExt};
|
||||
use matrix_sdk::{
|
||||
ruma::{events::room::message::RoomMessageEventContent, OwnedRoomOrAliasId},
|
||||
Client,
|
||||
};
|
||||
use reqwest::redirect;
|
||||
use serde::de;
|
||||
use tokio::{fs::File, task::JoinHandle, time::sleep};
|
||||
|
||||
use crate::{
|
||||
config::{MailArchiveConfig, MailListConfig},
|
||||
matrix,
|
||||
};
|
||||
|
||||
#[derive(Clone, Copy, Serialize, Deserialize)]
|
||||
struct ListState {
|
||||
last_pub_date: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct RssPubDate {
|
||||
#[serde(rename = "$text", deserialize_with = "deser_rfc2616")]
|
||||
value: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct RssItem {
|
||||
title: String,
|
||||
link: String,
|
||||
#[serde(rename = "pubDate")]
|
||||
pub_date: RssPubDate,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct RssChannel {
|
||||
#[serde(rename = "item")]
|
||||
items: Vec<RssItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct MailRss {
|
||||
channel: RssChannel,
|
||||
}
|
||||
|
||||
async fn load_list_state(state_file: &PathBuf) -> anyhow::Result<ListState> {
|
||||
match File::open(state_file).await {
|
||||
Err(err) if err.kind() == ErrorKind::NotFound => {
|
||||
// If we have no state, we probably don't want to blast out events
|
||||
// for every single item in the RSS feed, so pretend the last time
|
||||
// we published was right now.
|
||||
let list_state = ListState {
|
||||
last_pub_date: SystemTime::now().into(),
|
||||
};
|
||||
save_list_state(list_state, state_file).await?;
|
||||
Ok(list_state)
|
||||
}
|
||||
Err(err) => Err(err)?,
|
||||
Ok(f) => {
|
||||
let r = BufReader::new(f.into_std().await);
|
||||
Ok(tokio::task::spawn_blocking(move || serde_yaml::from_reader(r)).await??)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn save_list_state(list_state: ListState, state_file: &PathBuf) -> anyhow::Result<()> {
|
||||
let f = File::options()
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.create(true)
|
||||
.open(state_file)
|
||||
.await?;
|
||||
let w = BufWriter::new(f.into_std().await);
|
||||
tokio::task::spawn_blocking(move || serde_yaml::to_writer(w, &list_state)).await??;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_list(
|
||||
list: &MailListConfig,
|
||||
state_file: &PathBuf,
|
||||
http_client: &reqwest::Client,
|
||||
url: &String,
|
||||
matrix_client: &Client,
|
||||
room_ids: &[OwnedRoomOrAliasId],
|
||||
) -> anyhow::Result<()> {
|
||||
let list_state = load_list_state(state_file).await?;
|
||||
|
||||
let rooms_f = room_ids.iter().map(|room_id| {
|
||||
matrix::ensure_room_joined(matrix_client, room_id)
|
||||
.map(move |res| res.with_context(|| format!("Failed to join Matrix room '{}'", room_id)))
|
||||
});
|
||||
let rooms = join_all(rooms_f)
|
||||
.await
|
||||
.into_iter()
|
||||
.flat_map(|room_res| match room_res {
|
||||
Err(err) => {
|
||||
warn!("{:#}", err);
|
||||
vec![]
|
||||
}
|
||||
Ok(room) => vec![room],
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
if rooms.is_empty() {
|
||||
return Err(anyhow!("Failed to join all rooms for list '{}'; skipping", list.name));
|
||||
}
|
||||
|
||||
let response = http_client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.with_context(|| format!("Failed to fetch mail RSS feed from '{}'", url))
|
||||
.and_then(|response| {
|
||||
if !response.status().is_success() {
|
||||
Err(anyhow!(
|
||||
"Failed to fetch mail RSS feed from '{}': server returned status {}",
|
||||
url,
|
||||
response.status().as_u16()
|
||||
))
|
||||
} else {
|
||||
Ok(response)
|
||||
}
|
||||
})?;
|
||||
let body = response
|
||||
.text()
|
||||
.await
|
||||
.with_context(|| format!("Failed to decode RSS response body for '{}'", url))?;
|
||||
let mail_rss = tokio::task::spawn_blocking(move || quick_xml::de::from_str::<MailRss>(&body))
|
||||
.await?
|
||||
.with_context(|| format!("Failed to parse RSS feed for '{}'", url))?;
|
||||
let items = mail_rss
|
||||
.channel
|
||||
.items
|
||||
.into_iter()
|
||||
.rev()
|
||||
.skip_while(|item| item.pub_date.value <= list_state.last_pub_date)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for room in rooms {
|
||||
for item in &items {
|
||||
let msg =
|
||||
RoomMessageEventContent::text_markdown(format!("\\[{}\\] [{}]({}]", list.name, item.title, item.link));
|
||||
room.send(msg, None)
|
||||
.await
|
||||
.with_context(|| format!("Failed to send message to room '{}'", room.room_id()))?;
|
||||
save_list_state(
|
||||
ListState {
|
||||
last_pub_date: item.pub_date.value,
|
||||
},
|
||||
state_file,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn start_polling(config: MailArchiveConfig, matrix_client: Client) -> anyhow::Result<Vec<JoinHandle<()>>> {
|
||||
let http_client = reqwest::Client::builder()
|
||||
.user_agent(format!("{}/{}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")))
|
||||
.gzip(true)
|
||||
.redirect(redirect::Policy::default())
|
||||
.timeout(Duration::from_secs(8))
|
||||
.build()?;
|
||||
|
||||
Ok(config
|
||||
.lists
|
||||
.into_iter()
|
||||
.map(|list| {
|
||||
let room_ids = if list.rooms.is_empty() {
|
||||
&config.default_rooms
|
||||
} else {
|
||||
&list.rooms
|
||||
}
|
||||
.clone();
|
||||
let list = list.clone();
|
||||
let http_client = http_client.clone();
|
||||
let matrix_client = matrix_client.clone();
|
||||
let url = format!("https://www.mail-archive.com/{}/maillist.xml", list.name);
|
||||
let state_file = config.state_dir.join(format!("{}.state", list.name));
|
||||
let update_interval = Duration::from_secs(config.update_interval);
|
||||
|
||||
tokio::spawn(async move {
|
||||
if !room_ids.is_empty() {
|
||||
loop {
|
||||
if let Err(err) =
|
||||
handle_list(&list, &state_file, &http_client, &url, &matrix_client, &room_ids).await
|
||||
{
|
||||
warn!("{:#}", err);
|
||||
}
|
||||
|
||||
sleep(update_interval).await;
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn deser_rfc2616<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
|
||||
where
|
||||
D: de::Deserializer<'de>,
|
||||
{
|
||||
struct Rfc2616Visitor;
|
||||
|
||||
impl<'de> de::Visitor<'de> for Rfc2616Visitor {
|
||||
type Value = DateTime<Utc>;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("an RFC2616-formatted datetime")
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
dateparser::parse(v).map_err(E::custom)
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_any(Rfc2616Visitor)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::{fs::File, io::BufReader};
|
||||
|
||||
use super::MailRss;
|
||||
|
||||
#[test]
|
||||
pub fn test_feed_deser() -> anyhow::Result<()> {
|
||||
let f = File::open(format!("{}/test-data/maillist.xml", env!("CARGO_MANIFEST_DIR")))?;
|
||||
let r = BufReader::new(f);
|
||||
let mail_rss = quick_xml::de::from_reader::<_, MailRss>(r)?;
|
||||
println!("{:#?}", mail_rss);
|
||||
Ok(())
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user