263 lines
7.9 KiB
Rust
263 lines
7.9 KiB
Rust
// bebot
|
|
// Copyright (C) 2023 Brian Tarricone <brian@tarricone.org>
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
use std::{
|
|
fmt,
|
|
io::{BufReader, BufWriter, ErrorKind},
|
|
path::PathBuf,
|
|
time::{Duration, SystemTime},
|
|
};
|
|
|
|
use anyhow::Context;
|
|
use chrono::{DateTime, Utc};
|
|
use futures::{future::join_all, FutureExt};
|
|
use matrix_sdk::{
|
|
ruma::{events::room::message::RoomMessageEventContent, OwnedRoomOrAliasId},
|
|
Client,
|
|
};
|
|
use reqwest::redirect;
|
|
use serde::de;
|
|
use tokio::{fs::File, task::JoinHandle, time::sleep};
|
|
|
|
use crate::{
|
|
config::{MailArchiveConfig, MailListConfig},
|
|
matrix,
|
|
};
|
|
|
|
#[derive(Clone, Copy, Serialize, Deserialize)]
|
|
struct ListState {
|
|
last_pub_date: DateTime<Utc>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct RssPubDate {
|
|
#[serde(rename = "$text", deserialize_with = "deser_rfc2616")]
|
|
value: DateTime<Utc>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct RssItem {
|
|
title: String,
|
|
link: String,
|
|
#[serde(rename = "pubDate")]
|
|
pub_date: RssPubDate,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct RssChannel {
|
|
#[serde(rename = "item")]
|
|
items: Vec<RssItem>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct MailRss {
|
|
channel: RssChannel,
|
|
}
|
|
|
|
async fn load_list_state(state_file: &PathBuf) -> anyhow::Result<ListState> {
|
|
match File::open(state_file).await {
|
|
Err(err) if err.kind() == ErrorKind::NotFound => {
|
|
// If we have no state, we probably don't want to blast out events
|
|
// for every single item in the RSS feed, so pretend the last time
|
|
// we published was right now.
|
|
let list_state = ListState {
|
|
last_pub_date: SystemTime::now().into(),
|
|
};
|
|
save_list_state(list_state, state_file).await?;
|
|
Ok(list_state)
|
|
}
|
|
Err(err) => Err(err)?,
|
|
Ok(f) => {
|
|
let r = BufReader::new(f.into_std().await);
|
|
Ok(tokio::task::spawn_blocking(move || serde_yaml::from_reader(r)).await??)
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn save_list_state(list_state: ListState, state_file: &PathBuf) -> anyhow::Result<()> {
|
|
let f = File::options()
|
|
.write(true)
|
|
.truncate(true)
|
|
.create(true)
|
|
.open(state_file)
|
|
.await?;
|
|
let w = BufWriter::new(f.into_std().await);
|
|
tokio::task::spawn_blocking(move || serde_yaml::to_writer(w, &list_state)).await??;
|
|
Ok(())
|
|
}
|
|
|
|
async fn handle_list(
|
|
list: &MailListConfig,
|
|
state_file: &PathBuf,
|
|
http_client: &reqwest::Client,
|
|
url: &String,
|
|
matrix_client: &Client,
|
|
room_ids: &[OwnedRoomOrAliasId],
|
|
) -> anyhow::Result<()> {
|
|
let list_state = load_list_state(state_file).await?;
|
|
|
|
let rooms_f = room_ids.iter().map(|room_id| {
|
|
matrix::ensure_room_joined(matrix_client, room_id)
|
|
.map(move |res| res.with_context(|| format!("Failed to join Matrix room '{}'", room_id)))
|
|
});
|
|
let rooms = join_all(rooms_f)
|
|
.await
|
|
.into_iter()
|
|
.flat_map(|room_res| match room_res {
|
|
Err(err) => {
|
|
warn!("{:#}", err);
|
|
vec![]
|
|
}
|
|
Ok(room) => vec![room],
|
|
})
|
|
.collect::<Vec<_>>();
|
|
if rooms.is_empty() {
|
|
return Err(anyhow!("Failed to join all rooms for list '{}'; skipping", list.name));
|
|
}
|
|
|
|
let response = http_client
|
|
.get(url)
|
|
.send()
|
|
.await
|
|
.with_context(|| format!("Failed to fetch mail RSS feed from '{}'", url))
|
|
.and_then(|response| {
|
|
if !response.status().is_success() {
|
|
Err(anyhow!(
|
|
"Failed to fetch mail RSS feed from '{}': server returned status {}",
|
|
url,
|
|
response.status().as_u16()
|
|
))
|
|
} else {
|
|
Ok(response)
|
|
}
|
|
})?;
|
|
let body = response
|
|
.text()
|
|
.await
|
|
.with_context(|| format!("Failed to decode RSS response body for '{}'", url))?;
|
|
let mail_rss = tokio::task::spawn_blocking(move || quick_xml::de::from_str::<MailRss>(&body))
|
|
.await?
|
|
.with_context(|| format!("Failed to parse RSS feed for '{}'", url))?;
|
|
let items = mail_rss
|
|
.channel
|
|
.items
|
|
.into_iter()
|
|
.rev()
|
|
.skip_while(|item| item.pub_date.value <= list_state.last_pub_date)
|
|
.collect::<Vec<_>>();
|
|
|
|
for room in rooms {
|
|
for item in &items {
|
|
let msg =
|
|
RoomMessageEventContent::text_markdown(format!("\\[{}\\] [{}]({}]", list.name, item.title, item.link));
|
|
room.send(msg, None)
|
|
.await
|
|
.with_context(|| format!("Failed to send message to room '{}'", room.room_id()))?;
|
|
save_list_state(
|
|
ListState {
|
|
last_pub_date: item.pub_date.value,
|
|
},
|
|
state_file,
|
|
)
|
|
.await?;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn start_polling(config: MailArchiveConfig, matrix_client: Client) -> anyhow::Result<Vec<JoinHandle<()>>> {
|
|
let http_client = reqwest::Client::builder()
|
|
.user_agent(format!("{}/{}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")))
|
|
.gzip(true)
|
|
.redirect(redirect::Policy::default())
|
|
.timeout(Duration::from_secs(8))
|
|
.build()?;
|
|
|
|
Ok(config
|
|
.lists
|
|
.into_iter()
|
|
.map(|list| {
|
|
let room_ids = if list.rooms.is_empty() {
|
|
&config.default_rooms
|
|
} else {
|
|
&list.rooms
|
|
}
|
|
.clone();
|
|
let list = list.clone();
|
|
let http_client = http_client.clone();
|
|
let matrix_client = matrix_client.clone();
|
|
let url = format!("https://www.mail-archive.com/{}/maillist.xml", list.name);
|
|
let state_file = config.state_dir.join(format!("{}.state", list.name));
|
|
let update_interval = Duration::from_secs(config.update_interval);
|
|
|
|
tokio::spawn(async move {
|
|
if !room_ids.is_empty() {
|
|
loop {
|
|
if let Err(err) =
|
|
handle_list(&list, &state_file, &http_client, &url, &matrix_client, &room_ids).await
|
|
{
|
|
warn!("{:#}", err);
|
|
}
|
|
|
|
sleep(update_interval).await;
|
|
}
|
|
}
|
|
})
|
|
})
|
|
.collect())
|
|
}
|
|
|
|
fn deser_rfc2616<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
|
|
where
|
|
D: de::Deserializer<'de>,
|
|
{
|
|
struct Rfc2616Visitor;
|
|
|
|
impl<'de> de::Visitor<'de> for Rfc2616Visitor {
|
|
type Value = DateTime<Utc>;
|
|
|
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
|
formatter.write_str("an RFC2616-formatted datetime")
|
|
}
|
|
|
|
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
|
where
|
|
E: de::Error,
|
|
{
|
|
dateparser::parse(v).map_err(E::custom)
|
|
}
|
|
}
|
|
|
|
deserializer.deserialize_any(Rfc2616Visitor)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use std::{fs::File, io::BufReader};
|
|
|
|
use super::MailRss;
|
|
|
|
#[test]
|
|
pub fn test_feed_deser() -> anyhow::Result<()> {
|
|
let f = File::open(format!("{}/test-data/maillist.xml", env!("CARGO_MANIFEST_DIR")))?;
|
|
let r = BufReader::new(f);
|
|
let mail_rss = quick_xml::de::from_reader::<_, MailRss>(r)?;
|
|
println!("{:#?}", mail_rss);
|
|
Ok(())
|
|
}
|
|
}
|