noteshrink.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. #!/usr/bin/env python
  2. '''Converts sequence of images to compact PDF while removing speckles,
  3. bleedthrough, etc.
  4. '''
  5. # for some reason pylint complains about members being undefined :(
  6. # pylint: disable=E1101
  7. from __future__ import print_function
  8. import sys
  9. import os
  10. import re
  11. import subprocess
  12. import shlex
  13. from argparse import ArgumentParser
  14. import numpy as np
  15. from PIL import Image
  16. from scipy.cluster.vq import kmeans, vq
  17. ######################################################################
  18. def quantize(image, bits_per_channel=None):
  19. '''Reduces the number of bits per channel in the given image.'''
  20. if bits_per_channel is None:
  21. bits_per_channel = 6
  22. assert image.dtype == np.uint8
  23. shift = 8-bits_per_channel
  24. halfbin = (1 << shift) >> 1
  25. return ((image.astype(int) >> shift) << shift) + halfbin
  26. ######################################################################
  27. def pack_rgb(rgb):
  28. '''Packs a 24-bit RGB triples into a single integer,
  29. works on both arrays and tuples.'''
  30. orig_shape = None
  31. if isinstance(rgb, np.ndarray):
  32. assert rgb.shape[-1] == 3
  33. orig_shape = rgb.shape[:-1]
  34. else:
  35. assert len(rgb) == 3
  36. rgb = np.array(rgb)
  37. rgb = rgb.astype(int).reshape((-1, 3))
  38. packed = (rgb[:, 0] << 16 |
  39. rgb[:, 1] << 8 |
  40. rgb[:, 2])
  41. if orig_shape is None:
  42. return packed
  43. else:
  44. return packed.reshape(orig_shape)
  45. ######################################################################
  46. def unpack_rgb(packed):
  47. '''Unpacks a single integer or array of integers into one or more
  48. 24-bit RGB values.
  49. '''
  50. orig_shape = None
  51. if isinstance(packed, np.ndarray):
  52. assert packed.dtype == int
  53. orig_shape = packed.shape
  54. packed = packed.reshape((-1, 1))
  55. rgb = ((packed >> 16) & 0xff,
  56. (packed >> 8) & 0xff,
  57. (packed) & 0xff)
  58. if orig_shape is None:
  59. return rgb
  60. else:
  61. return np.hstack(rgb).reshape(orig_shape + (3,))
  62. ######################################################################
  63. def get_bg_color(image, bits_per_channel=None):
  64. '''Obtains the background color from an image or array of RGB colors
  65. by grouping similar colors into bins and finding the most frequent
  66. one.
  67. '''
  68. assert image.shape[-1] == 3
  69. quantized = quantize(image, bits_per_channel).astype(int)
  70. packed = pack_rgb(quantized)
  71. unique, counts = np.unique(packed, return_counts=True)
  72. packed_mode = unique[counts.argmax()]
  73. return unpack_rgb(packed_mode)
  74. ######################################################################
  75. def rgb_to_sv(rgb):
  76. '''Convert an RGB image or array of RGB colors to saturation and
  77. value, returning each one as a separate 32-bit floating point array or
  78. value.
  79. '''
  80. if not isinstance(rgb, np.ndarray):
  81. rgb = np.array(rgb)
  82. axis = len(rgb.shape)-1
  83. cmax = rgb.max(axis=axis).astype(np.float32)
  84. cmin = rgb.min(axis=axis).astype(np.float32)
  85. delta = cmax - cmin
  86. saturation = delta.astype(np.float32) / cmax.astype(np.float32)
  87. saturation = np.where(cmax == 0, 0, saturation)
  88. value = cmax/255.0
  89. return saturation, value
  90. ######################################################################
  91. def postprocess(output_filename, options):
  92. '''Runs the postprocessing command on the file provided.'''
  93. assert options.postprocess_cmd
  94. base, _ = os.path.splitext(output_filename)
  95. post_filename = base + options.postprocess_ext
  96. cmd = options.postprocess_cmd
  97. cmd = cmd.replace('%i', output_filename)
  98. cmd = cmd.replace('%o', post_filename)
  99. cmd = cmd.replace('%e', options.postprocess_ext)
  100. subprocess_args = shlex.split(cmd)
  101. if os.path.exists(post_filename):
  102. os.unlink(post_filename)
  103. if not options.quiet:
  104. print(' running "{}"...'.format(cmd), end=' ')
  105. sys.stdout.flush()
  106. try:
  107. result = subprocess.call(subprocess_args)
  108. before = os.stat(output_filename).st_size
  109. after = os.stat(post_filename).st_size
  110. except OSError:
  111. result = -1
  112. if result == 0:
  113. if not options.quiet:
  114. print('{:.1f}% reduction'.format(
  115. 100*(1.0-float(after)/before)))
  116. return post_filename
  117. else:
  118. sys.stderr.write('warning: postprocessing failed!\n')
  119. return None
  120. ######################################################################
  121. def percent(string):
  122. '''Convert a string (i.e. 85) to a fraction (i.e. .85).'''
  123. return float(string)/100.0
  124. ######################################################################
  125. def get_argument_parser():
  126. '''Parse the command-line arguments for this program.'''
  127. parser = ArgumentParser(
  128. description='convert scanned, hand-written notes to PDF')
  129. show_default = ' (default %(default)s)'
  130. parser.add_argument('filenames', metavar='IMAGE', nargs='+',
  131. help='files to convert')
  132. parser.add_argument('-q', dest='quiet', action='store_true',
  133. default=False,
  134. help='reduce program output')
  135. parser.add_argument('-b', dest='basename', metavar='BASENAME',
  136. default='page',
  137. help='output PNG filename base' + show_default)
  138. parser.add_argument('-o', dest='pdfname', metavar='PDF',
  139. default='output.pdf',
  140. help='output PDF filename' + show_default)
  141. parser.add_argument('-v', dest='value_threshold', metavar='PERCENT',
  142. type=percent, default='25',
  143. help='background value threshold %%'+show_default)
  144. parser.add_argument('-s', dest='sat_threshold', metavar='PERCENT',
  145. type=percent, default='20',
  146. help='background saturation '
  147. 'threshold %%'+show_default)
  148. parser.add_argument('-n', dest='num_colors', type=int,
  149. default='8',
  150. help='number of output colors '+show_default)
  151. parser.add_argument('-p', dest='sample_fraction',
  152. metavar='PERCENT',
  153. type=percent, default='5',
  154. help='%% of pixels to sample' + show_default)
  155. parser.add_argument('-w', dest='white_bg', action='store_true',
  156. default=False, help='make background white')
  157. parser.add_argument('-g', dest='global_palette',
  158. action='store_true', default=False,
  159. help='use one global palette for all pages')
  160. parser.add_argument('-S', dest='saturate', action='store_false',
  161. default=True, help='do not saturate colors')
  162. parser.add_argument('-K', dest='sort_numerically',
  163. action='store_false', default=True,
  164. help='keep filenames ordered as specified; '
  165. 'use if you *really* want IMG_10.png to '
  166. 'precede IMG_2.png')
  167. parser.add_argument('-P', dest='postprocess_cmd', default=None,
  168. help='set postprocessing command (see -O, -C, -Q)')
  169. parser.add_argument('-e', dest='postprocess_ext',
  170. default='_post.png',
  171. help='filename suffix/extension for '
  172. 'postprocessing command')
  173. parser.add_argument('-O', dest='postprocess_cmd',
  174. action='store_const',
  175. const='optipng -silent %i -out %o',
  176. help='same as -P "%(const)s"')
  177. parser.add_argument('-C', dest='postprocess_cmd',
  178. action='store_const',
  179. const='pngcrush -q %i %o',
  180. help='same as -P "%(const)s"')
  181. parser.add_argument('-Q', dest='postprocess_cmd',
  182. action='store_const',
  183. const='pngquant --ext %e %i',
  184. help='same as -P "%(const)s"')
  185. parser.add_argument('-c', dest='pdf_cmd', metavar="COMMAND",
  186. default='convert %i %o',
  187. help='PDF command (default "%(default)s")')
  188. return parser
  189. ######################################################################
  190. def get_filenames(options):
  191. '''Get the filenames from the command line, optionally sorted by
  192. number, so that IMG_10.png is re-arranged to come after IMG_9.png.
  193. This is a nice feature because some scanner programs (like Image
  194. Capture on Mac OS X) automatically number files without leading zeros,
  195. and this way you can supply files using a wildcard and still have the
  196. pages ordered correctly.
  197. '''
  198. if not options.sort_numerically:
  199. return options.filenames
  200. filenames = []
  201. for filename in options.filenames:
  202. basename = os.path.basename(filename)
  203. root, _ = os.path.splitext(basename)
  204. matches = re.findall(r'[0-9]+', root)
  205. if matches:
  206. num = int(matches[-1])
  207. else:
  208. num = -1
  209. filenames.append((num, filename))
  210. return [fn for (_, fn) in sorted(filenames)]
  211. ######################################################################
  212. def load(input_filename):
  213. '''Load an image with Pillow and convert it to numpy array. Also
  214. returns the image DPI in x and y as a tuple.'''
  215. try:
  216. pil_img = Image.open(input_filename)
  217. except IOError:
  218. sys.stderr.write('warning: error opening {}\n'.format(
  219. input_filename))
  220. return None, None
  221. if pil_img.mode != 'RGB':
  222. pil_img = pil_img.convert('RGB')
  223. if 'dpi' in pil_img.info:
  224. dpi = pil_img.info['dpi']
  225. else:
  226. dpi = (300, 300)
  227. img = np.array(pil_img)
  228. return img, dpi
  229. ######################################################################
  230. def sample_pixels(img, options):
  231. '''Pick a fixed percentage of pixels in the image, returned in random
  232. order.'''
  233. pixels = img.reshape((-1, 3))
  234. num_pixels = pixels.shape[0]
  235. num_samples = int(num_pixels*options.sample_fraction)
  236. idx = np.arange(num_pixels)
  237. np.random.shuffle(idx)
  238. return pixels[idx[:num_samples]]
  239. ######################################################################
  240. def get_fg_mask(bg_color, samples, options):
  241. '''Determine whether each pixel in a set of samples is foreground by
  242. comparing it to the background color. A pixel is classified as a
  243. foreground pixel if either its value or saturation differs from the
  244. background by a threshold.'''
  245. s_bg, v_bg = rgb_to_sv(bg_color)
  246. s_samples, v_samples = rgb_to_sv(samples)
  247. s_diff = np.abs(s_bg - s_samples)
  248. v_diff = np.abs(v_bg - v_samples)
  249. return ((v_diff >= options.value_threshold) |
  250. (s_diff >= options.sat_threshold))
  251. ######################################################################
  252. def get_palette(samples, options, return_mask=False, kmeans_iter=40):
  253. '''Extract the palette for the set of sampled RGB values. The first
  254. palette entry is always the background color; the rest are determined
  255. from foreground pixels by running K-means clustering. Returns the
  256. palette, as well as a mask corresponding to the foreground pixels.
  257. '''
  258. if not options.quiet:
  259. print(' getting palette...')
  260. bg_color = get_bg_color(samples, 6)
  261. fg_mask = get_fg_mask(bg_color, samples, options)
  262. centers, _ = kmeans(samples[fg_mask].astype(np.float32),
  263. options.num_colors-1,
  264. iter=kmeans_iter)
  265. palette = np.vstack((bg_color, centers)).astype(np.uint8)
  266. if not return_mask:
  267. return palette
  268. else:
  269. return palette, fg_mask
  270. ######################################################################
  271. def apply_palette(img, palette, options):
  272. '''Apply the pallete to the given image. The first step is to set all
  273. background pixels to the background color; then, nearest-neighbor
  274. matching is used to map each foreground color to the closest one in
  275. the palette.
  276. '''
  277. if not options.quiet:
  278. print(' applying palette...')
  279. bg_color = palette[0]
  280. fg_mask = get_fg_mask(bg_color, img, options)
  281. orig_shape = img.shape
  282. pixels = img.reshape((-1, 3))
  283. fg_mask = fg_mask.flatten()
  284. num_pixels = pixels.shape[0]
  285. labels = np.zeros(num_pixels, dtype=np.uint8)
  286. labels[fg_mask], _ = vq(pixels[fg_mask], palette)
  287. return labels.reshape(orig_shape[:-1])
  288. ######################################################################
  289. def save(output_filename, labels, palette, dpi, options):
  290. '''Save the label/palette pair out as an indexed PNG image. This
  291. optionally saturates the pallete by mapping the smallest color
  292. component to zero and the largest one to 255, and also optionally sets
  293. the background color to pure white.
  294. '''
  295. if not options.quiet:
  296. print(' saving {}...'.format(output_filename))
  297. if options.saturate:
  298. palette = palette.astype(np.float32)
  299. pmin = palette.min()
  300. pmax = palette.max()
  301. palette = 255 * (palette - pmin)/(pmax-pmin)
  302. palette = palette.astype(np.uint8)
  303. if options.white_bg:
  304. palette = palette.copy()
  305. palette[0] = (255, 255, 255)
  306. output_img = Image.fromarray(labels, 'P')
  307. output_img.putpalette(palette.flatten())
  308. output_img.save(output_filename, dpi=dpi)
  309. ######################################################################
  310. def get_global_palette(filenames, options):
  311. '''Fetch the global palette for a series of input files by merging
  312. their samples together into one large array.
  313. '''
  314. input_filenames = []
  315. all_samples = []
  316. if not options.quiet:
  317. print('building global palette...')
  318. for input_filename in filenames:
  319. img, _ = load(input_filename)
  320. if img is None:
  321. continue
  322. if not options.quiet:
  323. print(' processing {}...'.format(input_filename))
  324. samples = sample_pixels(img, options)
  325. input_filenames.append(input_filename)
  326. all_samples.append(samples)
  327. num_inputs = len(input_filenames)
  328. all_samples = [s[:int(round(float(s.shape[0])/num_inputs))]
  329. for s in all_samples]
  330. all_samples = np.vstack(tuple(all_samples))
  331. global_palette = get_palette(all_samples, options)
  332. if not options.quiet:
  333. print(' done\n')
  334. return input_filenames, global_palette
  335. ######################################################################
  336. def emit_pdf(outputs, options):
  337. '''Runs the PDF conversion command to generate the PDF.'''
  338. cmd = options.pdf_cmd
  339. cmd = cmd.replace('%o', options.pdfname)
  340. if len(outputs) > 2:
  341. cmd_print = cmd.replace('%i', ' '.join(outputs[:2] + ['...']))
  342. else:
  343. cmd_print = cmd.replace('%i', ' '.join(outputs))
  344. cmd = cmd.replace('%i', ' '.join(outputs))
  345. if not options.quiet:
  346. print('running PDF command "{}"...'.format(cmd_print))
  347. try:
  348. result = subprocess.call(shlex.split(cmd))
  349. except OSError:
  350. result = -1
  351. if result == 0:
  352. if not options.quiet:
  353. print(' wrote', options.pdfname)
  354. else:
  355. sys.stderr.write('warning: PDF command failed\n')
  356. ######################################################################
  357. def notescan_main(options):
  358. '''Main function for this program when run as script.'''
  359. filenames = get_filenames(options)
  360. outputs = []
  361. do_global = options.global_palette and len(filenames) > 1
  362. if do_global:
  363. filenames, palette = get_global_palette(filenames, options)
  364. do_postprocess = bool(options.postprocess_cmd)
  365. for input_filename in filenames:
  366. img, dpi = load(input_filename)
  367. if img is None:
  368. continue
  369. output_filename = '{}{:04d}.png'.format(
  370. options.basename, len(outputs))
  371. if not options.quiet:
  372. print('opened', input_filename)
  373. if not do_global:
  374. samples = sample_pixels(img, options)
  375. palette = get_palette(samples, options)
  376. labels = apply_palette(img, palette, options)
  377. save(output_filename, labels, palette, dpi, options)
  378. if do_postprocess:
  379. post_filename = postprocess(output_filename, options)
  380. if post_filename:
  381. output_filename = post_filename
  382. else:
  383. do_postprocess = False
  384. outputs.append(output_filename)
  385. if not options.quiet:
  386. print(' done\n')
  387. emit_pdf(outputs, options)
  388. ######################################################################
  389. def main():
  390. '''Parse args and call notescan_main().'''
  391. notescan_main(options=get_argument_parser().parse_args())
  392. if __name__ == '__main__':
  393. main()