Remove cache and send HEAD before parsing title

This commit is contained in:
Yash Karandikar 2022-02-20 11:17:52 -06:00
parent 9c6b18af93
commit 7b44a227be
Signed by: karx
GPG key ID: A794DA2529474BA5

View file

@ -1,5 +1,3 @@
use std::collections::HashMap;
use futures::prelude::*;
use htmlescape::decode_html;
use irc::client::prelude::*;
@ -83,7 +81,7 @@ async fn main() -> anyhow::Result<()> {
let title_regex = Regex::new(r"(?<=<title>)(.*)(?=</title>)").unwrap();
let spotify_regex = Regex::new(r"(?:https?|spotify):(?://open\.spotify\.com/)?(track|artist|album|playlist)[/:]([a-zA-Z0-9]*)").unwrap();
let mut cache: HashMap<String, String> = HashMap::new();
let hclient = reqwest::Client::new();
while let Some(message) = stream.next().await.transpose()? {
if vlog { print!("[IRC] {}", message) }
@ -101,18 +99,32 @@ async fn main() -> anyhow::Result<()> {
}
} else if let Some(m) = url_regex.find(&message) {
let url = &message[m.0..m.1];
if let Some(entry) = cache.get(&url.to_string()) {
client.send_privmsg(&channel, format!("\x039[Title]\x0311 {}", entry))?;
continue;
match hclient.head(url).send().await {
Ok(o) => {
let headers = o.headers();
let ctype = match headers.get("Content-Type") {
Some(c) => match c.to_str() {
Ok(s) => s,
Err(_) => continue,
},
None => continue,
};
if !ctype.starts_with("text/html") {
continue;
}
},
Err(e) => {
println!("[Title] Error! {}", e);
continue;
}
}
match reqwest::get(url).await {
match hclient.get(url).send().await {
Ok(o) => {
let body = o.text().await?;
if let Some(tm) = title_regex.find(&body) {
let title_match = &body[tm.0..tm.1];
let result = decode_html(title_match).unwrap_or_else(|_| title_match.to_string());
client.send_privmsg(&channel, format!("\x039[Title]\x0311 {}", result))?;
cache.insert(url.to_string(), result.to_string());
}
}
Err(e) => println!("[Title] Error! {}", e)