You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

385 lines
13KB

  1. #[macro_use]
  2. extern crate lazy_static;
  3. extern crate regex;
  4. extern crate image;
  5. extern crate rayon;
  6. extern crate utils;
  7. extern crate errors;
  8. use std::path::{Path, PathBuf};
  9. use std::hash::{Hash, Hasher};
  10. use std::collections::HashMap;
  11. use std::collections::hash_map::Entry as HEntry;
  12. use std::collections::hash_map::DefaultHasher;
  13. use std::fs::{self, File};
  14. use regex::Regex;
  15. use image::{GenericImage, FilterType};
  16. use image::jpeg::JPEGEncoder;
  17. use rayon::prelude::*;
  18. use utils::fs as ufs;
  19. use errors::{Result, ResultExt};
  20. static RESIZED_SUBDIR: &'static str = "_processed_images";
  21. lazy_static!{
  22. pub static ref RESIZED_FILENAME: Regex = Regex::new(r#"([0-9a-f]{16})([0-9a-f]{2})[.]jpg"#).unwrap();
  23. }
  24. /// Describes the precise kind of a resize operation
  25. #[derive(Debug, Clone, Copy, PartialEq, Eq)]
  26. pub enum ResizeOp {
  27. /// A simple scale operation that doesn't take aspect ratio into account
  28. Scale(u32, u32),
  29. /// Scales the image to a specified width with height computed such
  30. /// that aspect ratio is preserved
  31. FitWidth(u32),
  32. /// Scales the image to a specified height with width computed such
  33. /// that aspect ratio is preserved
  34. FitHeight(u32),
  35. /// Scales the image such that it fits within the specified width and
  36. /// height preserving aspect ratio.
  37. /// Either dimension may end up being smaller, but never larger than specified.
  38. Fit(u32, u32),
  39. /// Scales the image such that it fills the specified width and height.
  40. /// Output will always have the exact dimensions specified.
  41. /// The part of the image that doesn't fit in the thumbnail due to differing
  42. /// aspect ratio will be cropped away, if any.
  43. Fill(u32, u32),
  44. }
  45. impl ResizeOp {
  46. pub fn from_args(op: &str, width: Option<u32>, height: Option<u32>) -> Result<ResizeOp> {
  47. use ResizeOp::*;
  48. // Validate args:
  49. match op {
  50. "fit_width" => if width.is_none() {
  51. return Err("op=\"fit_width\" requires a `width` argument".to_string().into())
  52. },
  53. "fit_height" => if height.is_none() {
  54. return Err("op=\"fit_height\" requires a `height` argument".to_string().into())
  55. },
  56. "scale" | "fit" | "fill" => if width.is_none() || height.is_none() {
  57. return Err(format!("op={} requires a `width` and `height` argument", op).into())
  58. },
  59. _ => return Err(format!("Invalid image resize operation: {}", op).into())
  60. };
  61. Ok(match op {
  62. "scale" => Scale(width.unwrap(), height.unwrap()),
  63. "fit_width" => FitWidth(width.unwrap()),
  64. "fit_height" => FitHeight(height.unwrap()),
  65. "fit" => Fit(width.unwrap(), height.unwrap()),
  66. "fill" => Fill(width.unwrap(), height.unwrap()),
  67. _ => unreachable!(),
  68. })
  69. }
  70. pub fn width(self) -> Option<u32> {
  71. use ResizeOp::*;
  72. match self {
  73. Scale(w, _) => Some(w),
  74. FitWidth(w) => Some(w),
  75. FitHeight(_) => None,
  76. Fit(w, _) => Some(w),
  77. Fill(w, _) => Some(w),
  78. }
  79. }
  80. pub fn height(self) -> Option<u32> {
  81. use ResizeOp::*;
  82. match self {
  83. Scale(_, h) => Some(h),
  84. FitWidth(_) => None,
  85. FitHeight(h) => Some(h),
  86. Fit(_, h) => Some(h),
  87. Fill(_, h) => Some(h),
  88. }
  89. }
  90. }
  91. impl From<ResizeOp> for u8 {
  92. fn from(op: ResizeOp) -> u8 {
  93. use ResizeOp::*;
  94. match op {
  95. Scale(_, _) => 1,
  96. FitWidth(_) => 2,
  97. FitHeight(_) => 3,
  98. Fit(_, _) => 4,
  99. Fill(_, _) => 5,
  100. }
  101. }
  102. }
  103. impl Hash for ResizeOp {
  104. fn hash<H: Hasher>(&self, hasher: &mut H) {
  105. hasher.write_u8(u8::from(*self));
  106. if let Some(w) = self.width() { hasher.write_u32(w); }
  107. if let Some(h) = self.height() { hasher.write_u32(h); }
  108. }
  109. }
  110. /// Holds all data needed to perform a resize operation
  111. #[derive(Debug, PartialEq, Eq)]
  112. pub struct ImageOp {
  113. source: String,
  114. op: ResizeOp,
  115. quality: u8,
  116. /// Hash of the above parameters
  117. hash: u64,
  118. /// If there is a hash collision with another ImageOp, this contains a sequential ID > 1
  119. /// identifying the collision in the order as encountered (which is essentially random).
  120. /// Therefore, ImageOps with collisions (ie. collision_id > 0) are always considered out of date.
  121. /// Note that this is very unlikely to happen in practice
  122. collision_id: u32,
  123. }
  124. impl ImageOp {
  125. pub fn new(source: String, op: ResizeOp, quality: u8) -> ImageOp {
  126. let mut hasher = DefaultHasher::new();
  127. hasher.write(source.as_ref());
  128. op.hash(&mut hasher);
  129. hasher.write_u8(quality);
  130. let hash = hasher.finish();
  131. ImageOp { source, op, quality, hash, collision_id: 0 }
  132. }
  133. pub fn from_args(
  134. source: String,
  135. op: &str,
  136. width: Option<u32>,
  137. height: Option<u32>,
  138. quality: u8,
  139. ) -> Result<ImageOp> {
  140. let op = ResizeOp::from_args(op, width, height)?;
  141. Ok(Self::new(source, op, quality))
  142. }
  143. fn perform(&self, content_path: &Path, target_path: &Path) -> Result<()> {
  144. use ResizeOp::*;
  145. let src_path = content_path.join(&self.source);
  146. if !ufs::file_stale(&src_path, target_path) {
  147. return Ok(())
  148. }
  149. let mut img = image::open(&src_path)?;
  150. let (img_w, img_h) = img.dimensions();
  151. const RESIZE_FILTER: FilterType = FilterType::Gaussian;
  152. const RATIO_EPSILLION: f32 = 0.1;
  153. let img = match self.op {
  154. Scale(w, h) => img.resize_exact(w, h, RESIZE_FILTER),
  155. FitWidth(w) => img.resize(w, u32::max_value(), RESIZE_FILTER),
  156. FitHeight(h) => img.resize(u32::max_value(), h, RESIZE_FILTER),
  157. Fit(w, h) => img.resize(w, h, RESIZE_FILTER),
  158. Fill(w, h) => {
  159. let factor_w = img_w as f32 / w as f32;
  160. let factor_h = img_h as f32 / h as f32;
  161. if (factor_w - factor_h).abs() <= RATIO_EPSILLION {
  162. // If the horizontal and vertical factor is very similar,
  163. // that means the aspect is similar enough that there's not much point
  164. // in cropping, so just perform a simple scale in this case.
  165. img.resize_exact(w, h, RESIZE_FILTER)
  166. } else {
  167. // We perform the fill such that a crop is performed first
  168. // and then resize_exact can be used, which should be cheaper than
  169. // resizing and then cropping (smaller number of pixels to resize).
  170. let (crop_w, crop_h) = if factor_w < factor_h {
  171. (img_w, (factor_w * h as f32).round() as u32)
  172. } else {
  173. ((factor_h * w as f32).round() as u32, img_h)
  174. };
  175. let (offset_w, offset_h) = if factor_w < factor_h {
  176. (0, (img_h - crop_h) / 2)
  177. } else {
  178. ((img_w - crop_w) / 2, 0)
  179. };
  180. img.crop(offset_w, offset_h, crop_w, crop_h)
  181. .resize_exact(w, h, RESIZE_FILTER)
  182. }
  183. },
  184. };
  185. let mut f = File::create(target_path)?;
  186. let mut enc = JPEGEncoder::new_with_quality(&mut f, self.quality);
  187. let (img_w, img_h) = img.dimensions();
  188. enc.encode(&img.raw_pixels(), img_w, img_h, img.color())?;
  189. Ok(())
  190. }
  191. }
  192. /// A strcture into which image operations can be enqueued and then performed.
  193. /// All output is written in a subdirectory in `static_path`,
  194. /// taking care of file stale status based on timestamps and possible hash collisions.
  195. #[derive(Debug)]
  196. pub struct Processor {
  197. content_path: PathBuf,
  198. resized_path: PathBuf,
  199. resized_url: String,
  200. /// A map of a ImageOps by their stored hash.
  201. /// Note that this cannot be a HashSet, because hashset handles collisions and we don't want that,
  202. /// we need to be aware of and handle collisions ourselves.
  203. img_ops: HashMap<u64, ImageOp>,
  204. /// Hash collisions go here:
  205. img_ops_collisions: Vec<ImageOp>,
  206. }
  207. impl Processor {
  208. pub fn new(content_path: PathBuf, static_path: &Path, base_url: &str) -> Processor {
  209. Processor {
  210. content_path,
  211. resized_path: static_path.join(RESIZED_SUBDIR),
  212. resized_url: Self::resized_url(base_url),
  213. img_ops: HashMap::new(),
  214. img_ops_collisions: Vec::new(),
  215. }
  216. }
  217. fn resized_url(base_url: &str) -> String {
  218. if base_url.ends_with('/') {
  219. format!("{}{}", base_url, RESIZED_SUBDIR)
  220. } else {
  221. format!("{}/{}", base_url, RESIZED_SUBDIR)
  222. }
  223. }
  224. pub fn set_base_url(&mut self, base_url: &str) {
  225. self.resized_url = Self::resized_url(base_url);
  226. }
  227. pub fn source_exists(&self, source: &str) -> bool {
  228. self.content_path.join(source).exists()
  229. }
  230. pub fn num_img_ops(&self) -> usize {
  231. self.img_ops.len() + self.img_ops_collisions.len()
  232. }
  233. fn insert_with_collisions(&mut self, mut img_op: ImageOp) -> u32 {
  234. match self.img_ops.entry(img_op.hash) {
  235. HEntry::Occupied(entry) => if *entry.get() == img_op { return 0; },
  236. HEntry::Vacant(entry) => {
  237. entry.insert(img_op);
  238. return 0;
  239. },
  240. }
  241. // If we get here, that means a hash collision.
  242. // This is detected when there is an ImageOp with the same hash in the `img_ops`
  243. // map but which is not equal to this one.
  244. // To deal with this, all collisions get a (random) sequential ID number.
  245. // First try to look up this ImageOp in `img_ops_collisions`, maybe we've
  246. // already seen the same ImageOp.
  247. // At the same time, count IDs to figure out the next free one.
  248. // Start with the ID of 2, because we'll need to use 1 for the ImageOp
  249. // already present in the map:
  250. let mut collision_id = 2;
  251. for op in self.img_ops_collisions.iter().filter(|op| op.hash == img_op.hash) {
  252. if *op == img_op {
  253. // This is a colliding ImageOp, but we've already seen an equal one
  254. // (not just by hash, but by content too), so just return its ID:
  255. return collision_id;
  256. } else {
  257. collision_id += 1;
  258. }
  259. }
  260. // If we get here, that means this is a new colliding ImageOp and
  261. // `collision_id` is the next free ID
  262. if collision_id == 2 {
  263. // This is the first collision found with this hash, update the ID
  264. // of the matching ImageOp in the map.
  265. self.img_ops.get_mut(&img_op.hash).unwrap().collision_id = 1;
  266. }
  267. img_op.collision_id = collision_id;
  268. self.img_ops_collisions.push(img_op);
  269. collision_id
  270. }
  271. fn op_filename(hash: u64, collision_id: u32) -> String {
  272. // Please keep this in sync with RESIZED_FILENAME
  273. assert!(collision_id < 256, "Unexpectedly large number of collisions: {}", collision_id);
  274. format!("{:016x}{:02x}.jpg", hash, collision_id)
  275. }
  276. fn op_url(&self, hash: u64, collision_id: u32) -> String {
  277. format!("{}/{}", &self.resized_url, Self::op_filename(hash, collision_id))
  278. }
  279. pub fn insert(&mut self, img_op: ImageOp) -> String {
  280. let hash = img_op.hash;
  281. let collision_id = self.insert_with_collisions(img_op);
  282. self.op_url(hash, collision_id)
  283. }
  284. pub fn prune(&self) -> Result<()> {
  285. // Do not create folders if they don't exist
  286. if !self.resized_path.exists() {
  287. return Ok(());
  288. }
  289. ufs::ensure_directory_exists(&self.resized_path)?;
  290. let entries = fs::read_dir(&self.resized_path)?;
  291. for entry in entries {
  292. let entry_path = entry?.path();
  293. if entry_path.is_file() {
  294. let filename = entry_path.file_name().unwrap().to_string_lossy();
  295. if let Some(capts) = RESIZED_FILENAME.captures(filename.as_ref()) {
  296. let hash = u64::from_str_radix(capts.get(1).unwrap().as_str(), 16).unwrap();
  297. let collision_id = u32::from_str_radix(
  298. capts.get(2).unwrap().as_str(), 16
  299. ).unwrap();
  300. if collision_id > 0 || !self.img_ops.contains_key(&hash) {
  301. fs::remove_file(&entry_path)?;
  302. }
  303. }
  304. }
  305. }
  306. Ok(())
  307. }
  308. pub fn do_process(&mut self) -> Result<()> {
  309. if !self.img_ops.is_empty() {
  310. ufs::ensure_directory_exists(&self.resized_path)?;
  311. }
  312. self.img_ops.par_iter().map(|(hash, op)| {
  313. let target = self.resized_path.join(Self::op_filename(*hash, op.collision_id));
  314. op.perform(&self.content_path, &target)
  315. .chain_err(|| format!("Failed to process image: {}", op.source))
  316. })
  317. .fold(|| Ok(()), Result::and)
  318. .reduce(|| Ok(()), Result::and)
  319. }
  320. }
  321. /// Looks at file's extension and returns whether it's a supported image format
  322. pub fn file_is_img<P: AsRef<Path>>(p: P) -> bool {
  323. p.as_ref().extension().and_then(|s| s.to_str()).map(|ext| {
  324. match ext.to_lowercase().as_str() {
  325. "jpg" | "jpeg" => true,
  326. "png" => true,
  327. "gif" => true,
  328. "bmp" => true,
  329. _ => false,
  330. }
  331. }).unwrap_or(false)
  332. }