You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

476 lines
16KB

  1. #[macro_use]
  2. extern crate lazy_static;
  3. extern crate image;
  4. extern crate rayon;
  5. extern crate regex;
  6. extern crate errors;
  7. extern crate utils;
  8. use std::collections::hash_map::DefaultHasher;
  9. use std::collections::hash_map::Entry as HEntry;
  10. use std::collections::HashMap;
  11. use std::fs::{self, File};
  12. use std::hash::{Hash, Hasher};
  13. use std::path::{Path, PathBuf};
  14. use image::jpeg::JPEGEncoder;
  15. use image::png::PNGEncoder;
  16. use image::{FilterType, GenericImageView};
  17. use rayon::prelude::*;
  18. use regex::Regex;
  19. use errors::{Error, Result};
  20. use utils::fs as ufs;
  21. static RESIZED_SUBDIR: &str = "processed_images";
  22. lazy_static! {
  23. pub static ref RESIZED_FILENAME: Regex =
  24. Regex::new(r#"([0-9a-f]{16})([0-9a-f]{2})[.](jpg|png)"#).unwrap();
  25. }
  26. /// Describes the precise kind of a resize operation
  27. #[derive(Debug, Clone, Copy, PartialEq, Eq)]
  28. pub enum ResizeOp {
  29. /// A simple scale operation that doesn't take aspect ratio into account
  30. Scale(u32, u32),
  31. /// Scales the image to a specified width with height computed such
  32. /// that aspect ratio is preserved
  33. FitWidth(u32),
  34. /// Scales the image to a specified height with width computed such
  35. /// that aspect ratio is preserved
  36. FitHeight(u32),
  37. /// If the image is larger than the specified width or height, scales the image such
  38. /// that it fits within the specified width and height preserving aspect ratio.
  39. /// Either dimension may end up being smaller, but never larger than specified.
  40. Fit(u32, u32),
  41. /// Scales the image such that it fills the specified width and height.
  42. /// Output will always have the exact dimensions specified.
  43. /// The part of the image that doesn't fit in the thumbnail due to differing
  44. /// aspect ratio will be cropped away, if any.
  45. Fill(u32, u32),
  46. }
  47. impl ResizeOp {
  48. pub fn from_args(op: &str, width: Option<u32>, height: Option<u32>) -> Result<ResizeOp> {
  49. use ResizeOp::*;
  50. // Validate args:
  51. match op {
  52. "fit_width" => {
  53. if width.is_none() {
  54. return Err("op=\"fit_width\" requires a `width` argument".to_string().into());
  55. }
  56. }
  57. "fit_height" => {
  58. if height.is_none() {
  59. return Err("op=\"fit_height\" requires a `height` argument"
  60. .to_string()
  61. .into());
  62. }
  63. }
  64. "scale" | "fit" | "fill" => {
  65. if width.is_none() || height.is_none() {
  66. return Err(
  67. format!("op={} requires a `width` and `height` argument", op).into()
  68. );
  69. }
  70. }
  71. _ => return Err(format!("Invalid image resize operation: {}", op).into()),
  72. };
  73. Ok(match op {
  74. "scale" => Scale(width.unwrap(), height.unwrap()),
  75. "fit_width" => FitWidth(width.unwrap()),
  76. "fit_height" => FitHeight(height.unwrap()),
  77. "fit" => Fit(width.unwrap(), height.unwrap()),
  78. "fill" => Fill(width.unwrap(), height.unwrap()),
  79. _ => unreachable!(),
  80. })
  81. }
  82. pub fn width(self) -> Option<u32> {
  83. use ResizeOp::*;
  84. match self {
  85. Scale(w, _) => Some(w),
  86. FitWidth(w) => Some(w),
  87. FitHeight(_) => None,
  88. Fit(w, _) => Some(w),
  89. Fill(w, _) => Some(w),
  90. }
  91. }
  92. pub fn height(self) -> Option<u32> {
  93. use ResizeOp::*;
  94. match self {
  95. Scale(_, h) => Some(h),
  96. FitWidth(_) => None,
  97. FitHeight(h) => Some(h),
  98. Fit(_, h) => Some(h),
  99. Fill(_, h) => Some(h),
  100. }
  101. }
  102. }
  103. impl From<ResizeOp> for u8 {
  104. fn from(op: ResizeOp) -> u8 {
  105. use ResizeOp::*;
  106. match op {
  107. Scale(_, _) => 1,
  108. FitWidth(_) => 2,
  109. FitHeight(_) => 3,
  110. Fit(_, _) => 4,
  111. Fill(_, _) => 5,
  112. }
  113. }
  114. }
  115. #[allow(clippy::derive_hash_xor_eq)]
  116. impl Hash for ResizeOp {
  117. fn hash<H: Hasher>(&self, hasher: &mut H) {
  118. hasher.write_u8(u8::from(*self));
  119. if let Some(w) = self.width() {
  120. hasher.write_u32(w);
  121. }
  122. if let Some(h) = self.height() {
  123. hasher.write_u32(h);
  124. }
  125. }
  126. }
  127. /// Thumbnail image format
  128. #[derive(Debug, Clone, Copy, PartialEq, Eq)]
  129. pub enum Format {
  130. /// JPEG, The `u8` argument is JPEG quality (in percent).
  131. Jpeg(u8),
  132. /// PNG
  133. Png,
  134. }
  135. impl Format {
  136. pub fn from_args(source: &str, format: &str, quality: u8) -> Result<Format> {
  137. use Format::*;
  138. assert!(quality > 0 && quality <= 100, "Jpeg quality must be within the range [1; 100]");
  139. match format {
  140. "auto" => match Self::is_lossy(source) {
  141. Some(true) => Ok(Jpeg(quality)),
  142. Some(false) => Ok(Png),
  143. None => Err(format!("Unsupported image file: {}", source).into()),
  144. },
  145. "jpeg" | "jpg" => Ok(Jpeg(quality)),
  146. "png" => Ok(Png),
  147. _ => Err(format!("Invalid image format: {}", format).into()),
  148. }
  149. }
  150. /// Looks at file's extension and, if it's a supported image format, returns whether the format is lossless
  151. pub fn is_lossy<P: AsRef<Path>>(p: P) -> Option<bool> {
  152. p.as_ref()
  153. .extension()
  154. .and_then(std::ffi::OsStr::to_str)
  155. .map(|ext| match ext.to_lowercase().as_str() {
  156. "jpg" | "jpeg" => Some(true),
  157. "png" => Some(false),
  158. "gif" => Some(false),
  159. "bmp" => Some(false),
  160. _ => None,
  161. })
  162. .unwrap_or(None)
  163. }
  164. fn extension(&self) -> &str {
  165. // Kept in sync with RESIZED_FILENAME and op_filename
  166. use Format::*;
  167. match *self {
  168. Png => "png",
  169. Jpeg(_) => "jpg",
  170. }
  171. }
  172. }
  173. #[allow(clippy::derive_hash_xor_eq)]
  174. impl Hash for Format {
  175. fn hash<H: Hasher>(&self, hasher: &mut H) {
  176. use Format::*;
  177. let q = match *self {
  178. Png => 0,
  179. Jpeg(q) => q,
  180. };
  181. hasher.write_u8(q);
  182. }
  183. }
  184. /// Holds all data needed to perform a resize operation
  185. #[derive(Debug, PartialEq, Eq)]
  186. pub struct ImageOp {
  187. source: String,
  188. op: ResizeOp,
  189. format: Format,
  190. /// Hash of the above parameters
  191. hash: u64,
  192. /// If there is a hash collision with another ImageOp, this contains a sequential ID > 1
  193. /// identifying the collision in the order as encountered (which is essentially random).
  194. /// Therefore, ImageOps with collisions (ie. collision_id > 0) are always considered out of date.
  195. /// Note that this is very unlikely to happen in practice
  196. collision_id: u32,
  197. }
  198. impl ImageOp {
  199. pub fn new(source: String, op: ResizeOp, format: Format) -> ImageOp {
  200. let mut hasher = DefaultHasher::new();
  201. hasher.write(source.as_ref());
  202. op.hash(&mut hasher);
  203. format.hash(&mut hasher);
  204. let hash = hasher.finish();
  205. ImageOp { source, op, format, hash, collision_id: 0 }
  206. }
  207. pub fn from_args(
  208. source: String,
  209. op: &str,
  210. width: Option<u32>,
  211. height: Option<u32>,
  212. format: &str,
  213. quality: u8,
  214. ) -> Result<ImageOp> {
  215. let op = ResizeOp::from_args(op, width, height)?;
  216. let format = Format::from_args(&source, format, quality)?;
  217. Ok(Self::new(source, op, format))
  218. }
  219. fn perform(&self, content_path: &Path, target_path: &Path) -> Result<()> {
  220. use ResizeOp::*;
  221. let src_path = content_path.join(&self.source);
  222. if !ufs::file_stale(&src_path, target_path) {
  223. return Ok(());
  224. }
  225. let mut img = image::open(&src_path)?;
  226. let (img_w, img_h) = img.dimensions();
  227. const RESIZE_FILTER: FilterType = FilterType::Lanczos3;
  228. const RATIO_EPSILLION: f32 = 0.1;
  229. let img = match self.op {
  230. Scale(w, h) => img.resize_exact(w, h, RESIZE_FILTER),
  231. FitWidth(w) => img.resize(w, u32::max_value(), RESIZE_FILTER),
  232. FitHeight(h) => img.resize(u32::max_value(), h, RESIZE_FILTER),
  233. Fit(w, h) => {
  234. if img_w > w || img_h > h {
  235. img.resize(w, h, RESIZE_FILTER)
  236. } else {
  237. img
  238. }
  239. },
  240. Fill(w, h) => {
  241. let factor_w = img_w as f32 / w as f32;
  242. let factor_h = img_h as f32 / h as f32;
  243. if (factor_w - factor_h).abs() <= RATIO_EPSILLION {
  244. // If the horizontal and vertical factor is very similar,
  245. // that means the aspect is similar enough that there's not much point
  246. // in cropping, so just perform a simple scale in this case.
  247. img.resize_exact(w, h, RESIZE_FILTER)
  248. } else {
  249. // We perform the fill such that a crop is performed first
  250. // and then resize_exact can be used, which should be cheaper than
  251. // resizing and then cropping (smaller number of pixels to resize).
  252. let (crop_w, crop_h) = if factor_w < factor_h {
  253. (img_w, (factor_w * h as f32).round() as u32)
  254. } else {
  255. ((factor_h * w as f32).round() as u32, img_h)
  256. };
  257. let (offset_w, offset_h) = if factor_w < factor_h {
  258. (0, (img_h - crop_h) / 2)
  259. } else {
  260. ((img_w - crop_w) / 2, 0)
  261. };
  262. img.crop(offset_w, offset_h, crop_w, crop_h).resize_exact(w, h, RESIZE_FILTER)
  263. }
  264. }
  265. };
  266. let mut f = File::create(target_path)?;
  267. let (img_w, img_h) = img.dimensions();
  268. match self.format {
  269. Format::Png => {
  270. let enc = PNGEncoder::new(&mut f);
  271. enc.encode(&img.raw_pixels(), img_w, img_h, img.color())?;
  272. }
  273. Format::Jpeg(q) => {
  274. let mut enc = JPEGEncoder::new_with_quality(&mut f, q);
  275. enc.encode(&img.raw_pixels(), img_w, img_h, img.color())?;
  276. }
  277. }
  278. Ok(())
  279. }
  280. }
  281. /// A strcture into which image operations can be enqueued and then performed.
  282. /// All output is written in a subdirectory in `static_path`,
  283. /// taking care of file stale status based on timestamps and possible hash collisions.
  284. #[derive(Debug)]
  285. pub struct Processor {
  286. content_path: PathBuf,
  287. resized_path: PathBuf,
  288. resized_url: String,
  289. /// A map of a ImageOps by their stored hash.
  290. /// Note that this cannot be a HashSet, because hashset handles collisions and we don't want that,
  291. /// we need to be aware of and handle collisions ourselves.
  292. img_ops: HashMap<u64, ImageOp>,
  293. /// Hash collisions go here:
  294. img_ops_collisions: Vec<ImageOp>,
  295. }
  296. impl Processor {
  297. pub fn new(content_path: PathBuf, static_path: &Path, base_url: &str) -> Processor {
  298. Processor {
  299. content_path,
  300. resized_path: static_path.join(RESIZED_SUBDIR),
  301. resized_url: Self::resized_url(base_url),
  302. img_ops: HashMap::new(),
  303. img_ops_collisions: Vec::new(),
  304. }
  305. }
  306. fn resized_url(base_url: &str) -> String {
  307. if base_url.ends_with('/') {
  308. format!("{}{}", base_url, RESIZED_SUBDIR)
  309. } else {
  310. format!("{}/{}", base_url, RESIZED_SUBDIR)
  311. }
  312. }
  313. pub fn set_base_url(&mut self, base_url: &str) {
  314. self.resized_url = Self::resized_url(base_url);
  315. }
  316. pub fn source_exists(&self, source: &str) -> bool {
  317. self.content_path.join(source).exists()
  318. }
  319. pub fn num_img_ops(&self) -> usize {
  320. self.img_ops.len() + self.img_ops_collisions.len()
  321. }
  322. fn insert_with_collisions(&mut self, mut img_op: ImageOp) -> u32 {
  323. match self.img_ops.entry(img_op.hash) {
  324. HEntry::Occupied(entry) => {
  325. if *entry.get() == img_op {
  326. return 0;
  327. }
  328. }
  329. HEntry::Vacant(entry) => {
  330. entry.insert(img_op);
  331. return 0;
  332. }
  333. }
  334. // If we get here, that means a hash collision.
  335. // This is detected when there is an ImageOp with the same hash in the `img_ops`
  336. // map but which is not equal to this one.
  337. // To deal with this, all collisions get a (random) sequential ID number.
  338. // First try to look up this ImageOp in `img_ops_collisions`, maybe we've
  339. // already seen the same ImageOp.
  340. // At the same time, count IDs to figure out the next free one.
  341. // Start with the ID of 2, because we'll need to use 1 for the ImageOp
  342. // already present in the map:
  343. let mut collision_id = 2;
  344. for op in self.img_ops_collisions.iter().filter(|op| op.hash == img_op.hash) {
  345. if *op == img_op {
  346. // This is a colliding ImageOp, but we've already seen an equal one
  347. // (not just by hash, but by content too), so just return its ID:
  348. return collision_id;
  349. } else {
  350. collision_id += 1;
  351. }
  352. }
  353. // If we get here, that means this is a new colliding ImageOp and
  354. // `collision_id` is the next free ID
  355. if collision_id == 2 {
  356. // This is the first collision found with this hash, update the ID
  357. // of the matching ImageOp in the map.
  358. self.img_ops.get_mut(&img_op.hash).unwrap().collision_id = 1;
  359. }
  360. img_op.collision_id = collision_id;
  361. self.img_ops_collisions.push(img_op);
  362. collision_id
  363. }
  364. fn op_filename(hash: u64, collision_id: u32, format: Format) -> String {
  365. // Please keep this in sync with RESIZED_FILENAME
  366. assert!(collision_id < 256, "Unexpectedly large number of collisions: {}", collision_id);
  367. format!("{:016x}{:02x}.{}", hash, collision_id, format.extension())
  368. }
  369. fn op_url(&self, hash: u64, collision_id: u32, format: Format) -> String {
  370. format!("{}/{}", &self.resized_url, Self::op_filename(hash, collision_id, format))
  371. }
  372. pub fn insert(&mut self, img_op: ImageOp) -> String {
  373. let hash = img_op.hash;
  374. let format = img_op.format;
  375. let collision_id = self.insert_with_collisions(img_op);
  376. self.op_url(hash, collision_id, format)
  377. }
  378. pub fn prune(&self) -> Result<()> {
  379. // Do not create folders if they don't exist
  380. if !self.resized_path.exists() {
  381. return Ok(());
  382. }
  383. ufs::ensure_directory_exists(&self.resized_path)?;
  384. let entries = fs::read_dir(&self.resized_path)?;
  385. for entry in entries {
  386. let entry_path = entry?.path();
  387. if entry_path.is_file() {
  388. let filename = entry_path.file_name().unwrap().to_string_lossy();
  389. if let Some(capts) = RESIZED_FILENAME.captures(filename.as_ref()) {
  390. let hash = u64::from_str_radix(capts.get(1).unwrap().as_str(), 16).unwrap();
  391. let collision_id =
  392. u32::from_str_radix(capts.get(2).unwrap().as_str(), 16).unwrap();
  393. if collision_id > 0 || !self.img_ops.contains_key(&hash) {
  394. fs::remove_file(&entry_path)?;
  395. }
  396. }
  397. }
  398. }
  399. Ok(())
  400. }
  401. pub fn do_process(&mut self) -> Result<()> {
  402. if !self.img_ops.is_empty() {
  403. ufs::ensure_directory_exists(&self.resized_path)?;
  404. }
  405. self.img_ops
  406. .par_iter()
  407. .map(|(hash, op)| {
  408. let target =
  409. self.resized_path.join(Self::op_filename(*hash, op.collision_id, op.format));
  410. op.perform(&self.content_path, &target)
  411. .map_err(|e| Error::chain(format!("Failed to process image: {}", op.source), e))
  412. })
  413. .collect::<Result<()>>()
  414. }
  415. }