improve favicons fetching

This commit is contained in:
Bilal Elmoussaoui 2020-12-19 05:03:40 +01:00
parent 11196b6412
commit cd3a113a10
6 changed files with 96 additions and 44 deletions

1
Cargo.lock generated
View file

@ -235,6 +235,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"ashpd", "ashpd",
"async-std",
"byteorder", "byteorder",
"diesel", "diesel",
"diesel_migrations", "diesel_migrations",

View file

@ -31,4 +31,4 @@ serde_json = "1.0"
qrcode = {version ="0.12",features=["image"]} qrcode = {version ="0.12",features=["image"]}
gtk = { git = "https://github.com/gtk-rs/gtk4-rs", package = "gtk4"} gtk = { git = "https://github.com/gtk-rs/gtk4-rs", package = "gtk4"}
libhandy = { git = "https://gitlab.gnome.org/bilelmoussaoui/libhandy4-rs", package = "libhandy4"} libhandy = { git = "https://gitlab.gnome.org/bilelmoussaoui/libhandy4-rs", package = "libhandy4"}
async-std = "1.8"

View file

@ -223,7 +223,7 @@
</child> </child>
</template> </template>
<object class="GtkEntryCompletion" id="provider_completion"> <object class="GtkEntryCompletion" id="provider_completion">
<property name="minimum-key-length">2</property> <property name="minimum-key-length">1</property>
<property name="text-column">1</property> <property name="text-column">1</property>
<property name="inline-selection">True</property> <property name="inline-selection">True</property>
<child> <child>

View file

@ -15,7 +15,7 @@
<child> <child>
<object class="ProviderImage" id="image"> <object class="ProviderImage" id="image">
<property name="halign">start</property> <property name="halign">start</property>
<property name="size">48</property> <property name="size">32</property>
</object> </object>
</child> </child>
<child> <child>

View file

@ -1,6 +1,7 @@
use image::io::Reader as ImageReader;
use quick_xml::events::{attributes::Attribute, BytesStart, Event}; use quick_xml::events::{attributes::Attribute, BytesStart, Event};
use std::io::Cursor;
use url::Url; use url::Url;
const SUPPORTED_RELS: [&[u8]; 7] = [ const SUPPORTED_RELS: [&[u8]; 7] = [
b"icon", b"icon",
b"fluid-icon", b"fluid-icon",
@ -15,7 +16,6 @@ const SUPPORTED_RELS: [&[u8]; 7] = [
pub enum FaviconError { pub enum FaviconError {
Surf(surf::Error), Surf(surf::Error),
Url(url::ParseError), Url(url::ParseError),
GLib(gtk::glib::Error),
NoResults, NoResults,
} }
@ -25,51 +25,103 @@ impl From<surf::Error> for FaviconError {
} }
} }
impl From<gtk::glib::Error> for FaviconError {
fn from(e: gtk::glib::Error) -> Self {
Self::GLib(e)
}
}
impl From<url::ParseError> for FaviconError { impl From<url::ParseError> for FaviconError {
fn from(e: url::ParseError) -> Self { fn from(e: url::ParseError) -> Self {
Self::Url(e) Self::Url(e)
} }
} }
pub struct Favicon { impl std::error::Error for FaviconError {}
icons: Vec<Url>,
impl std::fmt::Display for FaviconError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
FaviconError::NoResults => write!(f, "FaviconError: No results were found"),
e => write!(f, "FaviconError: {}", e),
}
}
} }
impl Favicon {}
#[derive(Debug)] #[derive(Debug)]
pub struct FaviconScrapper; pub struct Favicon(Vec<Url>, surf::Client);
impl FaviconScrapper { impl Favicon {
pub async fn from_url(url: Url) -> Result<Vec<Url>, FaviconError> { pub async fn find_best(&self) -> Option<&Url> {
let mut res = surf::get(&url).await?; let mut largest_size = 0;
let body = res.body_string().await?; let mut best = None;
let mut reader = quick_xml::Reader::from_str(&body); for url in self.0.iter() {
if let Some(size) = self.get_size(url).await {
let icons = Self::from_reader(&mut reader, &url); // Only store the width & assumes it has the same height here to simplify things
if size.0 > largest_size {
Ok(icons) largest_size = size.0;
best = Some(url);
}
}
}
best
} }
fn from_reader(reader: &mut quick_xml::Reader<&[u8]>, base_url: &Url) -> Vec<Url> { pub async fn get_size(&self, url: &Url) -> Option<(u32, u32)> {
let mut response = self.1.get(url).await.ok()?;
let ext = std::path::Path::new(url.path())
.extension()
.map(|e| e.to_str().unwrap())?;
// Assumes the svg is the best size we can find
if ext == "svg" {
return Some((1024, 1024));
}
let format = match ext {
"png" => image::ImageFormat::Png,
"ico" => image::ImageFormat::Ico,
_ => unreachable!(),
};
let bytes = response.body_bytes().await.ok()?;
let mut image = ImageReader::new(Cursor::new(bytes));
image.set_format(format);
image.into_dimensions().ok()
}
}
#[derive(Debug)]
pub struct FaviconScrapper(surf::Client);
impl FaviconScrapper {
pub fn new() -> Self {
let client = surf::client().with(surf::middleware::Redirect::default());
Self(client)
}
pub async fn from_url(&self, url: Url) -> Result<Favicon, FaviconError> {
let mut res = self.0.get(&url).header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.2 Safari/605.1.15").await?;
let body = res.body_string().await?;
let mut reader = quick_xml::Reader::from_str(&body);
reader.check_end_names(false);
reader.trim_markup_names_in_closing_tags(true);
let icons = self.from_reader(&mut reader, &url);
if icons.is_empty() {
return Err(FaviconError::NoResults);
}
Ok(Favicon(icons, self.0.clone()))
}
fn from_reader(&self, reader: &mut quick_xml::Reader<&[u8]>, base_url: &Url) -> Vec<Url> {
let mut buf = Vec::new(); let mut buf = Vec::new();
let mut urls = Vec::new(); let mut urls = Vec::new();
loop { loop {
match reader.read_event(&mut buf) { match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) => { Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
if let b"link" = e.name() { if let b"link" = e.name() {
if let Some(url) = Self::from_link(e, base_url) { if let Some(url) = self.from_link(e, base_url) {
urls.push(url); urls.push(url);
} }
} }
} }
Ok(Event::Eof) => break, Ok(Event::Eof) => break,
Err(e) => warn!("Error at position {}: {:?}", reader.buffer_position(), e), Err(e) => debug!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (), _ => (),
} }
} }
@ -77,7 +129,7 @@ impl FaviconScrapper {
urls urls
} }
fn from_link(e: &BytesStart, base_url: &Url) -> Option<Url> { fn from_link(&self, e: &BytesStart, base_url: &Url) -> Option<Url> {
let mut url = None; let mut url = None;
let mut has_proper_rel = false; let mut has_proper_rel = false;
@ -87,7 +139,10 @@ impl FaviconScrapper {
key: b"href", key: b"href",
value, value,
}) => { }) => {
let href = String::from_utf8(value.into_owned()).unwrap(); let mut href = String::from_utf8(value.into_owned()).unwrap();
if href.starts_with("//") {
href = format!("https:{}", href);
}
url = match Url::parse(&href) { url = match Url::parse(&href) {
Ok(url) => Some(url), Ok(url) => Some(url),
Err(url::ParseError::RelativeUrlWithoutBase) => base_url.join(&href).ok(), Err(url::ParseError::RelativeUrlWithoutBase) => base_url.join(&href).ok(),

View file

@ -3,6 +3,7 @@ use crate::{
models::{database, Account, AccountsModel, FaviconError, FaviconScrapper}, models::{database, Account, AccountsModel, FaviconError, FaviconScrapper},
schema::providers, schema::providers,
}; };
use async_std::prelude::*;
use anyhow::Result; use anyhow::Result;
use core::cmp::Ordering; use core::cmp::Ordering;
use diesel::{ExpressionMethods, QueryDsl, RunQueryDsl}; use diesel::{ExpressionMethods, QueryDsl, RunQueryDsl};
@ -359,32 +360,27 @@ impl Provider {
.expect("Failed to create provider") .expect("Failed to create provider")
} }
pub async fn favicon(&self) -> Result<gio::File, FaviconError> { pub async fn favicon(&self) -> Result<gio::File, Box<dyn std::error::Error>> {
if let Some(ref website) = self.website() { if let Some(ref website) = self.website() {
let website_url = Url::parse(website)?; let website_url = Url::parse(website)?;
let favicons = FaviconScrapper::from_url(website_url).await?; let favicon = FaviconScrapper::new().from_url(website_url).await?;
let icon_name = format!("{}_{}", self.id(), self.name().replace(' ', "_")); let icon_name = format!("{}_{}", self.id(), self.name().replace(' ', "_"));
let cache_path = glib::get_user_cache_dir() let cache_path = glib::get_user_cache_dir()
.join("authenticator") .join("authenticator")
.join("favicons") .join("favicons")
.join(icon_name); .join(icon_name);
let dest = gio::File::new_for_path(cache_path); let mut dest = async_std::fs::File::create(cache_path.clone()).await?;
if let Some(favicon) = favicons.get(0) { if let Some(best_favicon) = favicon.find_best().await {
let mut res = surf::get(favicon).await?; let mut res = surf::get(best_favicon).await?;
let body = res.body_bytes().await?; let body = res.body_bytes().await?;
dest.replace_contents( dest.write_all(&body).await?;
&body,
None, return Ok(gio::File::new_for_path(cache_path));
false,
gio::FileCreateFlags::REPLACE_DESTINATION,
gio::NONE_CANCELLABLE,
)?;
return Ok(dest);
} }
} }
Err(FaviconError::NoResults) Err(Box::new(FaviconError::NoResults))
} }
pub fn id(&self) -> i32 { pub fn id(&self) -> i32 {