398 lines

  1. #[macro_use]
  2. extern crate lazy_static;
  3. extern crate image;
  4. extern crate rayon;
  5. extern crate regex;
  6. extern crate errors;
  7. extern crate utils;
  8. use std::collections::hash_map::DefaultHasher;
  9. use std::collections::hash_map::Entry as HEntry;
  10. use std::collections::HashMap;
  11. use std::fs::{self, File};
  12. use std::hash::{Hash, Hasher};
  13. use std::path::{Path, PathBuf};
  14. use image::jpeg::JPEGEncoder;
  15. use image::{FilterType, GenericImageView};
  16. use rayon::prelude::*;
  17. use regex::Regex;
  18. use errors::{Result, ResultExt};
  19. use utils::fs as ufs;
  20. static RESIZED_SUBDIR: &'static str = "processed_images";
  21. lazy_static! {
  22. pub static ref RESIZED_FILENAME: Regex =
  23. Regex::new(r#"([0-9a-f]{16})([0-9a-f]{2})[.]jpg"#).unwrap();
  24. }
  25. /// Describes the precise kind of a resize operation
  26. #[derive(Debug, Clone, Copy, PartialEq, Eq)]
  27. pub enum ResizeOp {
  28. /// A simple scale operation that doesn't take aspect ratio into account
  29. Scale(u32, u32),
  30. /// Scales the image to a specified width with height computed such
  31. /// that aspect ratio is preserved
  32. FitWidth(u32),
  33. /// Scales the image to a specified height with width computed such
  34. /// that aspect ratio is preserved
  35. FitHeight(u32),
  36. /// Scales the image such that it fits within the specified width and
  37. /// height preserving aspect ratio.
  38. /// Either dimension may end up being smaller, but never larger than specified.
  39. Fit(u32, u32),
  40. /// Scales the image such that it fills the specified width and height.
  41. /// Output will always have the exact dimensions specified.
  42. /// The part of the image that doesn't fit in the thumbnail due to differing
  43. /// aspect ratio will be cropped away, if any.
  44. Fill(u32, u32),
  45. }
  46. impl ResizeOp {
  47. pub fn from_args(op: &str, width: Option<u32>, height: Option<u32>) -> Result<ResizeOp> {
  48. use ResizeOp::*;
  49. // Validate args:
  50. match op {
  51. "fit_width" => {
  52. if width.is_none() {
  53. return Err("op=\"fit_width\" requires a `width` argument".to_string().into());
  54. }
  55. }
  56. "fit_height" => {
  57. if height.is_none() {
  58. return Err("op=\"fit_height\" requires a `height` argument".to_string().into());
  59. }
  60. }
  61. "scale" | "fit" | "fill" => {
  62. if width.is_none() || height.is_none() {
  63. return Err(format!("op={} requires a `width` and `height` argument", op).into());
  64. }
  65. }
  66. _ => return Err(format!("Invalid image resize operation: {}", op).into()),
  67. };
  68. Ok(match op {
  69. "scale" => Scale(width.unwrap(), height.unwrap()),
  70. "fit_width" => FitWidth(width.unwrap()),
  71. "fit_height" => FitHeight(height.unwrap()),
  72. "fit" => Fit(width.unwrap(), height.unwrap()),
  73. "fill" => Fill(width.unwrap(), height.unwrap()),
  74. _ => unreachable!(),
  75. })
  76. }
  77. pub fn width(self) -> Option<u32> {
  78. use ResizeOp::*;
  79. match self {
  80. Scale(w, _) => Some(w),
  81. FitWidth(w) => Some(w),
  82. FitHeight(_) => None,
  83. Fit(w, _) => Some(w),
  84. Fill(w, _) => Some(w),
  85. }
  86. }
  87. pub fn height(self) -> Option<u32> {
  88. use ResizeOp::*;
  89. match self {
  90. Scale(_, h) => Some(h),
  91. FitWidth(_) => None,
  92. FitHeight(h) => Some(h),
  93. Fit(_, h) => Some(h),
  94. Fill(_, h) => Some(h),
  95. }
  96. }
  97. }
  98. impl From<ResizeOp> for u8 {
  99. fn from(op: ResizeOp) -> u8 {
  100. use ResizeOp::*;
  101. match op {
  102. Scale(_, _) => 1,
  103. FitWidth(_) => 2,
  104. FitHeight(_) => 3,
  105. Fit(_, _) => 4,
  106. Fill(_, _) => 5,
  107. }
  108. }
  109. }
  110. impl Hash for ResizeOp {
  111. fn hash<H: Hasher>(&self, hasher: &mut H) {
  112. hasher.write_u8(u8::from(*self));
  113. if let Some(w) = self.width() {
  114. hasher.write_u32(w);
  115. }
  116. if let Some(h) = self.height() {
  117. hasher.write_u32(h);
  118. }
  119. }
  120. }
  121. /// Holds all data needed to perform a resize operation
  122. #[derive(Debug, PartialEq, Eq)]
  123. pub struct ImageOp {
  124. source: String,
  125. op: ResizeOp,
  126. quality: u8,
  127. /// Hash of the above parameters
  128. hash: u64,
  129. /// If there is a hash collision with another ImageOp, this contains a sequential ID > 1
  130. /// identifying the collision in the order as encountered (which is essentially random).
  131. /// Therefore, ImageOps with collisions (ie. collision_id > 0) are always considered out of date.
  132. /// Note that this is very unlikely to happen in practice
  133. collision_id: u32,
  134. }
  135. impl ImageOp {
  136. pub fn new(source: String, op: ResizeOp, quality: u8) -> ImageOp {
  137. let mut hasher = DefaultHasher::new();
  138. hasher.write(source.as_ref());
  139. op.hash(&mut hasher);
  140. hasher.write_u8(quality);
  141. let hash = hasher.finish();
  142. ImageOp { source, op, quality, hash, collision_id: 0 }
  143. }
  144. pub fn from_args(
  145. source: String,
  146. op: &str,
  147. width: Option<u32>,
  148. height: Option<u32>,
  149. quality: u8,
  150. ) -> Result<ImageOp> {
  151. let op = ResizeOp::from_args(op, width, height)?;
  152. Ok(Self::new(source, op, quality))
  153. }
  154. fn perform(&self, content_path: &Path, target_path: &Path) -> Result<()> {
  155. use ResizeOp::*;
  156. let src_path = content_path.join(&self.source);
  157. if !ufs::file_stale(&src_path, target_path) {
  158. return Ok(());
  159. }
  160. let mut img = image::open(&src_path)?;
  161. let (img_w, img_h) = img.dimensions();
  162. const RESIZE_FILTER: FilterType = FilterType::Gaussian;
  163. const RATIO_EPSILLION: f32 = 0.1;
  164. let img = match self.op {
  165. Scale(w, h) => img.resize_exact(w, h, RESIZE_FILTER),
  166. FitWidth(w) => img.resize(w, u32::max_value(), RESIZE_FILTER),
  167. FitHeight(h) => img.resize(u32::max_value(), h, RESIZE_FILTER),
  168. Fit(w, h) => img.resize(w, h, RESIZE_FILTER),
  169. Fill(w, h) => {
  170. let factor_w = img_w as f32 / w as f32;
  171. let factor_h = img_h as f32 / h as f32;
  172. if (factor_w - factor_h).abs() <= RATIO_EPSILLION {
  173. // If the horizontal and vertical factor is very similar,
  174. // that means the aspect is similar enough that there's not much point
  175. // in cropping, so just perform a simple scale in this case.
  176. img.resize_exact(w, h, RESIZE_FILTER)
  177. } else {
  178. // We perform the fill such that a crop is performed first
  179. // and then resize_exact can be used, which should be cheaper than
  180. // resizing and then cropping (smaller number of pixels to resize).
  181. let (crop_w, crop_h) = if factor_w < factor_h {
  182. (img_w, (factor_w * h as f32).round() as u32)
  183. } else {
  184. ((factor_h * w as f32).round() as u32, img_h)
  185. };
  186. let (offset_w, offset_h) = if factor_w < factor_h {
  187. (0, (img_h - crop_h) / 2)
  188. } else {
  189. ((img_w - crop_w) / 2, 0)
  190. };
  191. img.crop(offset_w, offset_h, crop_w, crop_h).resize_exact(w, h, RESIZE_FILTER)
  192. }
  193. }
  194. };
  195. let mut f = File::create(target_path)?;
  196. let mut enc = JPEGEncoder::new_with_quality(&mut f, self.quality);
  197. let (img_w, img_h) = img.dimensions();
  198. enc.encode(&img.raw_pixels(), img_w, img_h, img.color())?;
  199. Ok(())
  200. }
  201. }
  202. /// A strcture into which image operations can be enqueued and then performed.
  203. /// All output is written in a subdirectory in `static_path`,
  204. /// taking care of file stale status based on timestamps and possible hash collisions.
  205. #[derive(Debug)]
  206. pub struct Processor {
  207. content_path: PathBuf,
  208. resized_path: PathBuf,
  209. resized_url: String,
  210. /// A map of a ImageOps by their stored hash.
  211. /// Note that this cannot be a HashSet, because hashset handles collisions and we don't want that,
  212. /// we need to be aware of and handle collisions ourselves.
  213. img_ops: HashMap<u64, ImageOp>,
  214. /// Hash collisions go here:
  215. img_ops_collisions: Vec<ImageOp>,
  216. }
  217. impl Processor {
  218. pub fn new(content_path: PathBuf, static_path: &Path, base_url: &str) -> Processor {
  219. Processor {
  220. content_path,
  221. resized_path: static_path.join(RESIZED_SUBDIR),
  222. resized_url: Self::resized_url(base_url),
  223. img_ops: HashMap::new(),
  224. img_ops_collisions: Vec::new(),
  225. }
  226. }
  227. fn resized_url(base_url: &str) -> String {
  228. if base_url.ends_with('/') {
  229. format!("{}{}", base_url, RESIZED_SUBDIR)
  230. } else {
  231. format!("{}/{}", base_url, RESIZED_SUBDIR)
  232. }
  233. }
  234. pub fn set_base_url(&mut self, base_url: &str) {
  235. self.resized_url = Self::resized_url(base_url);
  236. }
  237. pub fn source_exists(&self, source: &str) -> bool {
  238. self.content_path.join(source).exists()
  239. }
  240. pub fn num_img_ops(&self) -> usize {
  241. self.img_ops.len() + self.img_ops_collisions.len()
  242. }
  243. fn insert_with_collisions(&mut self, mut img_op: ImageOp) -> u32 {
  244. match self.img_ops.entry(img_op.hash) {
  245. HEntry::Occupied(entry) => {
  246. if *entry.get() == img_op {
  247. return 0;
  248. }
  249. }
  250. HEntry::Vacant(entry) => {
  251. entry.insert(img_op);
  252. return 0;
  253. }
  254. }
  255. // If we get here, that means a hash collision.
  256. // This is detected when there is an ImageOp with the same hash in the `img_ops`
  257. // map but which is not equal to this one.
  258. // To deal with this, all collisions get a (random) sequential ID number.
  259. // First try to look up this ImageOp in `img_ops_collisions`, maybe we've
  260. // already seen the same ImageOp.
  261. // At the same time, count IDs to figure out the next free one.
  262. // Start with the ID of 2, because we'll need to use 1 for the ImageOp
  263. // already present in the map:
  264. let mut collision_id = 2;
  265. for op in self.img_ops_collisions.iter().filter(|op| op.hash == img_op.hash) {
  266. if *op == img_op {
  267. // This is a colliding ImageOp, but we've already seen an equal one
  268. // (not just by hash, but by content too), so just return its ID:
  269. return collision_id;
  270. } else {
  271. collision_id += 1;
  272. }
  273. }
  274. // If we get here, that means this is a new colliding ImageOp and
  275. // `collision_id` is the next free ID
  276. if collision_id == 2 {
  277. // This is the first collision found with this hash, update the ID
  278. // of the matching ImageOp in the map.
  279. self.img_ops.get_mut(&img_op.hash).unwrap().collision_id = 1;
  280. }
  281. img_op.collision_id = collision_id;
  282. self.img_ops_collisions.push(img_op);
  283. collision_id
  284. }
  285. fn op_filename(hash: u64, collision_id: u32) -> String {
  286. // Please keep this in sync with RESIZED_FILENAME
  287. assert!(collision_id < 256, "Unexpectedly large number of collisions: {}", collision_id);
  288. format!("{:016x}{:02x}.jpg", hash, collision_id)
  289. }
  290. fn op_url(&self, hash: u64, collision_id: u32) -> String {
  291. format!("{}/{}", &self.resized_url, Self::op_filename(hash, collision_id))
  292. }
  293. pub fn insert(&mut self, img_op: ImageOp) -> String {
  294. let hash = img_op.hash;
  295. let collision_id = self.insert_with_collisions(img_op);
  296. self.op_url(hash, collision_id)
  297. }
  298. pub fn prune(&self) -> Result<()> {
  299. // Do not create folders if they don't exist
  300. if !self.resized_path.exists() {
  301. return Ok(());
  302. }
  303. ufs::ensure_directory_exists(&self.resized_path)?;
  304. let entries = fs::read_dir(&self.resized_path)?;
  305. for entry in entries {
  306. let entry_path = entry?.path();
  307. if entry_path.is_file() {
  308. let filename = entry_path.file_name().unwrap().to_string_lossy();
  309. if let Some(capts) = RESIZED_FILENAME.captures(filename.as_ref()) {
  310. let hash = u64::from_str_radix(capts.get(1).unwrap().as_str(), 16).unwrap();
  311. let collision_id =
  312. u32::from_str_radix(capts.get(2).unwrap().as_str(), 16).unwrap();
  313. if collision_id > 0 || !self.img_ops.contains_key(&hash) {
  314. fs::remove_file(&entry_path)?;
  315. }
  316. }
  317. }
  318. }
  319. Ok(())
  320. }
  321. pub fn do_process(&mut self) -> Result<()> {
  322. if !self.img_ops.is_empty() {
  323. ufs::ensure_directory_exists(&self.resized_path)?;
  324. }
  325. self.img_ops
  326. .par_iter()
  327. .map(|(hash, op)| {
  328. let target = self.resized_path.join(Self::op_filename(*hash, op.collision_id));
  329. op.perform(&self.content_path, &target)
  330. .chain_err(|| format!("Failed to process image: {}", op.source))
  331. })
  332. .collect::<Result<()>>()
  333. }
  334. }
  335. /// Looks at file's extension and returns whether it's a supported image format
  336. pub fn file_is_img<P: AsRef<Path>>(p: P) -> bool {
  337. p.as_ref()
  338. .extension()
  339. .and_then(|s| s.to_str())
  340. .map(|ext| match ext.to_lowercase().as_str() {
  341. "jpg" | "jpeg" => true,
  342. "png" => true,
  343. "gif" => true,
  344. "bmp" => true,
  345. _ => false,
  346. })
  347. .unwrap_or(false)
  348. }