You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

468 lines
16KB

  1. #[macro_use]
  2. extern crate lazy_static;
  3. extern crate image;
  4. extern crate rayon;
  5. extern crate regex;
  6. extern crate errors;
  7. extern crate utils;
  8. use std::collections::hash_map::DefaultHasher;
  9. use std::collections::hash_map::Entry as HEntry;
  10. use std::collections::HashMap;
  11. use std::fs::{self, File};
  12. use std::hash::{Hash, Hasher};
  13. use std::path::{Path, PathBuf};
  14. use image::jpeg::JPEGEncoder;
  15. use image::png::PNGEncoder;
  16. use image::{FilterType, GenericImageView};
  17. use rayon::prelude::*;
  18. use regex::Regex;
  19. use errors::{Error, Result};
  20. use utils::fs as ufs;
  21. static RESIZED_SUBDIR: &'static str = "processed_images";
  22. lazy_static! {
  23. pub static ref RESIZED_FILENAME: Regex =
  24. Regex::new(r#"([0-9a-f]{16})([0-9a-f]{2})[.](jpg|png)"#).unwrap();
  25. }
  26. /// Describes the precise kind of a resize operation
  27. #[derive(Debug, Clone, Copy, PartialEq, Eq)]
  28. pub enum ResizeOp {
  29. /// A simple scale operation that doesn't take aspect ratio into account
  30. Scale(u32, u32),
  31. /// Scales the image to a specified width with height computed such
  32. /// that aspect ratio is preserved
  33. FitWidth(u32),
  34. /// Scales the image to a specified height with width computed such
  35. /// that aspect ratio is preserved
  36. FitHeight(u32),
  37. /// Scales the image such that it fits within the specified width and
  38. /// height preserving aspect ratio.
  39. /// Either dimension may end up being smaller, but never larger than specified.
  40. Fit(u32, u32),
  41. /// Scales the image such that it fills the specified width and height.
  42. /// Output will always have the exact dimensions specified.
  43. /// The part of the image that doesn't fit in the thumbnail due to differing
  44. /// aspect ratio will be cropped away, if any.
  45. Fill(u32, u32),
  46. }
  47. impl ResizeOp {
  48. pub fn from_args(op: &str, width: Option<u32>, height: Option<u32>) -> Result<ResizeOp> {
  49. use ResizeOp::*;
  50. // Validate args:
  51. match op {
  52. "fit_width" => {
  53. if width.is_none() {
  54. return Err("op=\"fit_width\" requires a `width` argument".to_string().into());
  55. }
  56. }
  57. "fit_height" => {
  58. if height.is_none() {
  59. return Err("op=\"fit_height\" requires a `height` argument"
  60. .to_string()
  61. .into());
  62. }
  63. }
  64. "scale" | "fit" | "fill" => {
  65. if width.is_none() || height.is_none() {
  66. return Err(
  67. format!("op={} requires a `width` and `height` argument", op).into()
  68. );
  69. }
  70. }
  71. _ => return Err(format!("Invalid image resize operation: {}", op).into()),
  72. };
  73. Ok(match op {
  74. "scale" => Scale(width.unwrap(), height.unwrap()),
  75. "fit_width" => FitWidth(width.unwrap()),
  76. "fit_height" => FitHeight(height.unwrap()),
  77. "fit" => Fit(width.unwrap(), height.unwrap()),
  78. "fill" => Fill(width.unwrap(), height.unwrap()),
  79. _ => unreachable!(),
  80. })
  81. }
  82. pub fn width(self) -> Option<u32> {
  83. use ResizeOp::*;
  84. match self {
  85. Scale(w, _) => Some(w),
  86. FitWidth(w) => Some(w),
  87. FitHeight(_) => None,
  88. Fit(w, _) => Some(w),
  89. Fill(w, _) => Some(w),
  90. }
  91. }
  92. pub fn height(self) -> Option<u32> {
  93. use ResizeOp::*;
  94. match self {
  95. Scale(_, h) => Some(h),
  96. FitWidth(_) => None,
  97. FitHeight(h) => Some(h),
  98. Fit(_, h) => Some(h),
  99. Fill(_, h) => Some(h),
  100. }
  101. }
  102. }
  103. impl From<ResizeOp> for u8 {
  104. fn from(op: ResizeOp) -> u8 {
  105. use ResizeOp::*;
  106. match op {
  107. Scale(_, _) => 1,
  108. FitWidth(_) => 2,
  109. FitHeight(_) => 3,
  110. Fit(_, _) => 4,
  111. Fill(_, _) => 5,
  112. }
  113. }
  114. }
  115. impl Hash for ResizeOp {
  116. fn hash<H: Hasher>(&self, hasher: &mut H) {
  117. hasher.write_u8(u8::from(*self));
  118. if let Some(w) = self.width() {
  119. hasher.write_u32(w);
  120. }
  121. if let Some(h) = self.height() {
  122. hasher.write_u32(h);
  123. }
  124. }
  125. }
  126. /// Thumbnail image format
  127. #[derive(Debug, Clone, Copy, PartialEq, Eq)]
  128. pub enum Format {
  129. /// JPEG, The `u8` argument is JPEG quality (in percent).
  130. Jpeg(u8),
  131. /// PNG
  132. Png,
  133. }
  134. impl Format {
  135. pub fn from_args(source: &str, format: &str, quality: u8) -> Result<Format> {
  136. use Format::*;
  137. assert!(quality > 0 && quality <= 100, "Jpeg quality must be within the range [1; 100]");
  138. match format {
  139. "auto" => match Self::is_lossy(source) {
  140. Some(true) => Ok(Jpeg(quality)),
  141. Some(false) => Ok(Png),
  142. None => Err(format!("Unsupported image file: {}", source).into()),
  143. },
  144. "jpeg" | "jpg" => Ok(Jpeg(quality)),
  145. "png" => Ok(Png),
  146. _ => Err(format!("Invalid image format: {}", format).into()),
  147. }
  148. }
  149. /// Looks at file's extension and, if it's a supported image format, returns whether the format is lossless
  150. pub fn is_lossy<P: AsRef<Path>>(p: P) -> Option<bool> {
  151. p.as_ref()
  152. .extension()
  153. .and_then(std::ffi::OsStr::to_str)
  154. .map(|ext| match ext.to_lowercase().as_str() {
  155. "jpg" | "jpeg" => Some(true),
  156. "png" => Some(false),
  157. "gif" => Some(false),
  158. "bmp" => Some(false),
  159. _ => None,
  160. })
  161. .unwrap_or(None)
  162. }
  163. fn extension(&self) -> &str {
  164. // Kept in sync with RESIZED_FILENAME and op_filename
  165. use Format::*;
  166. match *self {
  167. Png => "png",
  168. Jpeg(_) => "jpg",
  169. }
  170. }
  171. }
  172. impl Hash for Format {
  173. fn hash<H: Hasher>(&self, hasher: &mut H) {
  174. use Format::*;
  175. let q = match *self {
  176. Png => 0,
  177. Jpeg(q) => q,
  178. };
  179. hasher.write_u8(q);
  180. }
  181. }
  182. /// Holds all data needed to perform a resize operation
  183. #[derive(Debug, PartialEq, Eq)]
  184. pub struct ImageOp {
  185. source: String,
  186. op: ResizeOp,
  187. format: Format,
  188. /// Hash of the above parameters
  189. hash: u64,
  190. /// If there is a hash collision with another ImageOp, this contains a sequential ID > 1
  191. /// identifying the collision in the order as encountered (which is essentially random).
  192. /// Therefore, ImageOps with collisions (ie. collision_id > 0) are always considered out of date.
  193. /// Note that this is very unlikely to happen in practice
  194. collision_id: u32,
  195. }
  196. impl ImageOp {
  197. pub fn new(source: String, op: ResizeOp, format: Format) -> ImageOp {
  198. let mut hasher = DefaultHasher::new();
  199. hasher.write(source.as_ref());
  200. op.hash(&mut hasher);
  201. format.hash(&mut hasher);
  202. let hash = hasher.finish();
  203. ImageOp { source, op, format, hash, collision_id: 0 }
  204. }
  205. pub fn from_args(
  206. source: String,
  207. op: &str,
  208. width: Option<u32>,
  209. height: Option<u32>,
  210. format: &str,
  211. quality: u8,
  212. ) -> Result<ImageOp> {
  213. let op = ResizeOp::from_args(op, width, height)?;
  214. let format = Format::from_args(&source, format, quality)?;
  215. Ok(Self::new(source, op, format))
  216. }
  217. fn perform(&self, content_path: &Path, target_path: &Path) -> Result<()> {
  218. use ResizeOp::*;
  219. let src_path = content_path.join(&self.source);
  220. if !ufs::file_stale(&src_path, target_path) {
  221. return Ok(());
  222. }
  223. let mut img = image::open(&src_path)?;
  224. let (img_w, img_h) = img.dimensions();
  225. const RESIZE_FILTER: FilterType = FilterType::Lanczos3;
  226. const RATIO_EPSILLION: f32 = 0.1;
  227. let img = match self.op {
  228. Scale(w, h) => img.resize_exact(w, h, RESIZE_FILTER),
  229. FitWidth(w) => img.resize(w, u32::max_value(), RESIZE_FILTER),
  230. FitHeight(h) => img.resize(u32::max_value(), h, RESIZE_FILTER),
  231. Fit(w, h) => img.resize(w, h, RESIZE_FILTER),
  232. Fill(w, h) => {
  233. let factor_w = img_w as f32 / w as f32;
  234. let factor_h = img_h as f32 / h as f32;
  235. if (factor_w - factor_h).abs() <= RATIO_EPSILLION {
  236. // If the horizontal and vertical factor is very similar,
  237. // that means the aspect is similar enough that there's not much point
  238. // in cropping, so just perform a simple scale in this case.
  239. img.resize_exact(w, h, RESIZE_FILTER)
  240. } else {
  241. // We perform the fill such that a crop is performed first
  242. // and then resize_exact can be used, which should be cheaper than
  243. // resizing and then cropping (smaller number of pixels to resize).
  244. let (crop_w, crop_h) = if factor_w < factor_h {
  245. (img_w, (factor_w * h as f32).round() as u32)
  246. } else {
  247. ((factor_h * w as f32).round() as u32, img_h)
  248. };
  249. let (offset_w, offset_h) = if factor_w < factor_h {
  250. (0, (img_h - crop_h) / 2)
  251. } else {
  252. ((img_w - crop_w) / 2, 0)
  253. };
  254. img.crop(offset_w, offset_h, crop_w, crop_h).resize_exact(w, h, RESIZE_FILTER)
  255. }
  256. }
  257. };
  258. let mut f = File::create(target_path)?;
  259. let (img_w, img_h) = img.dimensions();
  260. match self.format {
  261. Format::Png => {
  262. let enc = PNGEncoder::new(&mut f);
  263. enc.encode(&img.raw_pixels(), img_w, img_h, img.color())?;
  264. }
  265. Format::Jpeg(q) => {
  266. let mut enc = JPEGEncoder::new_with_quality(&mut f, q);
  267. enc.encode(&img.raw_pixels(), img_w, img_h, img.color())?;
  268. }
  269. }
  270. Ok(())
  271. }
  272. }
  273. /// A strcture into which image operations can be enqueued and then performed.
  274. /// All output is written in a subdirectory in `static_path`,
  275. /// taking care of file stale status based on timestamps and possible hash collisions.
  276. #[derive(Debug)]
  277. pub struct Processor {
  278. content_path: PathBuf,
  279. resized_path: PathBuf,
  280. resized_url: String,
  281. /// A map of a ImageOps by their stored hash.
  282. /// Note that this cannot be a HashSet, because hashset handles collisions and we don't want that,
  283. /// we need to be aware of and handle collisions ourselves.
  284. img_ops: HashMap<u64, ImageOp>,
  285. /// Hash collisions go here:
  286. img_ops_collisions: Vec<ImageOp>,
  287. }
  288. impl Processor {
  289. pub fn new(content_path: PathBuf, static_path: &Path, base_url: &str) -> Processor {
  290. Processor {
  291. content_path,
  292. resized_path: static_path.join(RESIZED_SUBDIR),
  293. resized_url: Self::resized_url(base_url),
  294. img_ops: HashMap::new(),
  295. img_ops_collisions: Vec::new(),
  296. }
  297. }
  298. fn resized_url(base_url: &str) -> String {
  299. if base_url.ends_with('/') {
  300. format!("{}{}", base_url, RESIZED_SUBDIR)
  301. } else {
  302. format!("{}/{}", base_url, RESIZED_SUBDIR)
  303. }
  304. }
  305. pub fn set_base_url(&mut self, base_url: &str) {
  306. self.resized_url = Self::resized_url(base_url);
  307. }
  308. pub fn source_exists(&self, source: &str) -> bool {
  309. self.content_path.join(source).exists()
  310. }
  311. pub fn num_img_ops(&self) -> usize {
  312. self.img_ops.len() + self.img_ops_collisions.len()
  313. }
  314. fn insert_with_collisions(&mut self, mut img_op: ImageOp) -> u32 {
  315. match self.img_ops.entry(img_op.hash) {
  316. HEntry::Occupied(entry) => {
  317. if *entry.get() == img_op {
  318. return 0;
  319. }
  320. }
  321. HEntry::Vacant(entry) => {
  322. entry.insert(img_op);
  323. return 0;
  324. }
  325. }
  326. // If we get here, that means a hash collision.
  327. // This is detected when there is an ImageOp with the same hash in the `img_ops`
  328. // map but which is not equal to this one.
  329. // To deal with this, all collisions get a (random) sequential ID number.
  330. // First try to look up this ImageOp in `img_ops_collisions`, maybe we've
  331. // already seen the same ImageOp.
  332. // At the same time, count IDs to figure out the next free one.
  333. // Start with the ID of 2, because we'll need to use 1 for the ImageOp
  334. // already present in the map:
  335. let mut collision_id = 2;
  336. for op in self.img_ops_collisions.iter().filter(|op| op.hash == img_op.hash) {
  337. if *op == img_op {
  338. // This is a colliding ImageOp, but we've already seen an equal one
  339. // (not just by hash, but by content too), so just return its ID:
  340. return collision_id;
  341. } else {
  342. collision_id += 1;
  343. }
  344. }
  345. // If we get here, that means this is a new colliding ImageOp and
  346. // `collision_id` is the next free ID
  347. if collision_id == 2 {
  348. // This is the first collision found with this hash, update the ID
  349. // of the matching ImageOp in the map.
  350. self.img_ops.get_mut(&img_op.hash).unwrap().collision_id = 1;
  351. }
  352. img_op.collision_id = collision_id;
  353. self.img_ops_collisions.push(img_op);
  354. collision_id
  355. }
  356. fn op_filename(hash: u64, collision_id: u32, format: Format) -> String {
  357. // Please keep this in sync with RESIZED_FILENAME
  358. assert!(collision_id < 256, "Unexpectedly large number of collisions: {}", collision_id);
  359. format!("{:016x}{:02x}.{}", hash, collision_id, format.extension())
  360. }
  361. fn op_url(&self, hash: u64, collision_id: u32, format: Format) -> String {
  362. format!("{}/{}", &self.resized_url, Self::op_filename(hash, collision_id, format))
  363. }
  364. pub fn insert(&mut self, img_op: ImageOp) -> String {
  365. let hash = img_op.hash;
  366. let format = img_op.format;
  367. let collision_id = self.insert_with_collisions(img_op);
  368. self.op_url(hash, collision_id, format)
  369. }
  370. pub fn prune(&self) -> Result<()> {
  371. // Do not create folders if they don't exist
  372. if !self.resized_path.exists() {
  373. return Ok(());
  374. }
  375. ufs::ensure_directory_exists(&self.resized_path)?;
  376. let entries = fs::read_dir(&self.resized_path)?;
  377. for entry in entries {
  378. let entry_path = entry?.path();
  379. if entry_path.is_file() {
  380. let filename = entry_path.file_name().unwrap().to_string_lossy();
  381. if let Some(capts) = RESIZED_FILENAME.captures(filename.as_ref()) {
  382. let hash = u64::from_str_radix(capts.get(1).unwrap().as_str(), 16).unwrap();
  383. let collision_id =
  384. u32::from_str_radix(capts.get(2).unwrap().as_str(), 16).unwrap();
  385. if collision_id > 0 || !self.img_ops.contains_key(&hash) {
  386. fs::remove_file(&entry_path)?;
  387. }
  388. }
  389. }
  390. }
  391. Ok(())
  392. }
  393. pub fn do_process(&mut self) -> Result<()> {
  394. if !self.img_ops.is_empty() {
  395. ufs::ensure_directory_exists(&self.resized_path)?;
  396. }
  397. self.img_ops
  398. .par_iter()
  399. .map(|(hash, op)| {
  400. let target =
  401. self.resized_path.join(Self::op_filename(*hash, op.collision_id, op.format));
  402. op.perform(&self.content_path, &target)
  403. .map_err(|e| Error::chain(format!("Failed to process image: {}", op.source), e))
  404. })
  405. .collect::<Result<()>>()
  406. }
  407. }