123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588 |
- #!/usr/bin/env python
- '''Converts sequence of images to compact PDF while removing speckles,
- bleedthrough, etc.
- '''
- # for some reason pylint complains about members being undefined :(
- # pylint: disable=E1101
- from __future__ import print_function
- import sys
- import os
- import re
- import subprocess
- import shlex
- from argparse import ArgumentParser
- import numpy as np
- from PIL import Image
- from scipy.cluster.vq import kmeans, vq
- ######################################################################
- def quantize(image, bits_per_channel=None):
- '''Reduces the number of bits per channel in the given image.'''
- if bits_per_channel is None:
- bits_per_channel = 6
- assert image.dtype == np.uint8
- shift = 8-bits_per_channel
- halfbin = (1 << shift) >> 1
- return ((image.astype(int) >> shift) << shift) + halfbin
- ######################################################################
- def pack_rgb(rgb):
- '''Packs a 24-bit RGB triples into a single integer,
- works on both arrays and tuples.'''
- orig_shape = None
- if isinstance(rgb, np.ndarray):
- assert rgb.shape[-1] == 3
- orig_shape = rgb.shape[:-1]
- else:
- assert len(rgb) == 3
- rgb = np.array(rgb)
- rgb = rgb.astype(int).reshape((-1, 3))
- packed = (rgb[:, 0] << 16 |
- rgb[:, 1] << 8 |
- rgb[:, 2])
- if orig_shape is None:
- return packed
- else:
- return packed.reshape(orig_shape)
- ######################################################################
- def unpack_rgb(packed):
- '''Unpacks a single integer or array of integers into one or more
- 24-bit RGB values.
- '''
- orig_shape = None
- if isinstance(packed, np.ndarray):
- assert packed.dtype == int
- orig_shape = packed.shape
- packed = packed.reshape((-1, 1))
- rgb = ((packed >> 16) & 0xff,
- (packed >> 8) & 0xff,
- (packed) & 0xff)
- if orig_shape is None:
- return rgb
- else:
- return np.hstack(rgb).reshape(orig_shape + (3,))
- ######################################################################
- def get_bg_color(image, bits_per_channel=None):
- '''Obtains the background color from an image or array of RGB colors
- by grouping similar colors into bins and finding the most frequent
- one.
- '''
- assert image.shape[-1] == 3
- quantized = quantize(image, bits_per_channel).astype(int)
- packed = pack_rgb(quantized)
- unique, counts = np.unique(packed, return_counts=True)
- packed_mode = unique[counts.argmax()]
- return unpack_rgb(packed_mode)
- ######################################################################
- def rgb_to_sv(rgb):
- '''Convert an RGB image or array of RGB colors to saturation and
- value, returning each one as a separate 32-bit floating point array or
- value.
- '''
- if not isinstance(rgb, np.ndarray):
- rgb = np.array(rgb)
- axis = len(rgb.shape)-1
- cmax = rgb.max(axis=axis).astype(np.float32)
- cmin = rgb.min(axis=axis).astype(np.float32)
- delta = cmax - cmin
- saturation = delta.astype(np.float32) / cmax.astype(np.float32)
- saturation = np.where(cmax == 0, 0, saturation)
- value = cmax/255.0
- return saturation, value
- ######################################################################
- def postprocess(output_filename, options):
- '''Runs the postprocessing command on the file provided.'''
- assert options.postprocess_cmd
- base, _ = os.path.splitext(output_filename)
- post_filename = base + options.postprocess_ext
- cmd = options.postprocess_cmd
- cmd = cmd.replace('%i', output_filename)
- cmd = cmd.replace('%o', post_filename)
- cmd = cmd.replace('%e', options.postprocess_ext)
- subprocess_args = shlex.split(cmd)
- if os.path.exists(post_filename):
- os.unlink(post_filename)
- if not options.quiet:
- print(' running "{}"...'.format(cmd), end=' ')
- sys.stdout.flush()
- try:
- result = subprocess.call(subprocess_args)
- before = os.stat(output_filename).st_size
- after = os.stat(post_filename).st_size
- except OSError:
- result = -1
- if result == 0:
- if not options.quiet:
- print('{:.1f}% reduction'.format(
- 100*(1.0-float(after)/before)))
- return post_filename
- else:
- sys.stderr.write('warning: postprocessing failed!\n')
- return None
- ######################################################################
- def percent(string):
- '''Convert a string (i.e. 85) to a fraction (i.e. .85).'''
- return float(string)/100.0
- ######################################################################
- def get_argument_parser():
- '''Parse the command-line arguments for this program.'''
- parser = ArgumentParser(
- description='convert scanned, hand-written notes to PDF')
- show_default = ' (default %(default)s)'
- parser.add_argument('filenames', metavar='IMAGE', nargs='+',
- help='files to convert')
- parser.add_argument('-q', dest='quiet', action='store_true',
- default=False,
- help='reduce program output')
- parser.add_argument('-b', dest='basename', metavar='BASENAME',
- default='page',
- help='output PNG filename base' + show_default)
- parser.add_argument('-o', dest='pdfname', metavar='PDF',
- default='output.pdf',
- help='output PDF filename' + show_default)
- parser.add_argument('-v', dest='value_threshold', metavar='PERCENT',
- type=percent, default='25',
- help='background value threshold %%'+show_default)
- parser.add_argument('-s', dest='sat_threshold', metavar='PERCENT',
- type=percent, default='20',
- help='background saturation '
- 'threshold %%'+show_default)
- parser.add_argument('-n', dest='num_colors', type=int,
- default='8',
- help='number of output colors '+show_default)
- parser.add_argument('-p', dest='sample_fraction',
- metavar='PERCENT',
- type=percent, default='5',
- help='%% of pixels to sample' + show_default)
- parser.add_argument('-w', dest='white_bg', action='store_true',
- default=False, help='make background white')
- parser.add_argument('-g', dest='global_palette',
- action='store_true', default=False,
- help='use one global palette for all pages')
- parser.add_argument('-S', dest='saturate', action='store_false',
- default=True, help='do not saturate colors')
- parser.add_argument('-K', dest='sort_numerically',
- action='store_false', default=True,
- help='keep filenames ordered as specified; '
- 'use if you *really* want IMG_10.png to '
- 'precede IMG_2.png')
- parser.add_argument('-P', dest='postprocess_cmd', default=None,
- help='set postprocessing command (see -O, -C, -Q)')
- parser.add_argument('-e', dest='postprocess_ext',
- default='_post.png',
- help='filename suffix/extension for '
- 'postprocessing command')
- parser.add_argument('-O', dest='postprocess_cmd',
- action='store_const',
- const='optipng -silent %i -out %o',
- help='same as -P "%(const)s"')
- parser.add_argument('-C', dest='postprocess_cmd',
- action='store_const',
- const='pngcrush -q %i %o',
- help='same as -P "%(const)s"')
- parser.add_argument('-Q', dest='postprocess_cmd',
- action='store_const',
- const='pngquant --ext %e %i',
- help='same as -P "%(const)s"')
- parser.add_argument('-c', dest='pdf_cmd', metavar="COMMAND",
- default='convert %i %o',
- help='PDF command (default "%(default)s")')
- return parser
- ######################################################################
- def get_filenames(options):
- '''Get the filenames from the command line, optionally sorted by
- number, so that IMG_10.png is re-arranged to come after IMG_9.png.
- This is a nice feature because some scanner programs (like Image
- Capture on Mac OS X) automatically number files without leading zeros,
- and this way you can supply files using a wildcard and still have the
- pages ordered correctly.
- '''
- if not options.sort_numerically:
- return options.filenames
- filenames = []
- for filename in options.filenames:
- basename = os.path.basename(filename)
- root, _ = os.path.splitext(basename)
- matches = re.findall(r'[0-9]+', root)
- if matches:
- num = int(matches[-1])
- else:
- num = -1
- filenames.append((num, filename))
- return [fn for (_, fn) in sorted(filenames)]
- ######################################################################
- def load(input_filename):
- '''Load an image with Pillow and convert it to numpy array. Also
- returns the image DPI in x and y as a tuple.'''
- try:
- pil_img = Image.open(input_filename)
- except IOError:
- sys.stderr.write('warning: error opening {}\n'.format(
- input_filename))
- return None, None
- if pil_img.mode != 'RGB':
- pil_img = pil_img.convert('RGB')
- if 'dpi' in pil_img.info:
- dpi = pil_img.info['dpi']
- else:
- dpi = (300, 300)
- img = np.array(pil_img)
- return img, dpi
- ######################################################################
- def sample_pixels(img, options):
- '''Pick a fixed percentage of pixels in the image, returned in random
- order.'''
- pixels = img.reshape((-1, 3))
- num_pixels = pixels.shape[0]
- num_samples = int(num_pixels*options.sample_fraction)
- idx = np.arange(num_pixels)
- np.random.shuffle(idx)
- return pixels[idx[:num_samples]]
- ######################################################################
- def get_fg_mask(bg_color, samples, options):
- '''Determine whether each pixel in a set of samples is foreground by
- comparing it to the background color. A pixel is classified as a
- foreground pixel if either its value or saturation differs from the
- background by a threshold.'''
- s_bg, v_bg = rgb_to_sv(bg_color)
- s_samples, v_samples = rgb_to_sv(samples)
- s_diff = np.abs(s_bg - s_samples)
- v_diff = np.abs(v_bg - v_samples)
- return ((v_diff >= options.value_threshold) |
- (s_diff >= options.sat_threshold))
- ######################################################################
- def get_palette(samples, options, return_mask=False, kmeans_iter=40):
- '''Extract the palette for the set of sampled RGB values. The first
- palette entry is always the background color; the rest are determined
- from foreground pixels by running K-means clustering. Returns the
- palette, as well as a mask corresponding to the foreground pixels.
- '''
- if not options.quiet:
- print(' getting palette...')
- bg_color = get_bg_color(samples, 6)
- fg_mask = get_fg_mask(bg_color, samples, options)
- centers, _ = kmeans(samples[fg_mask].astype(np.float32),
- options.num_colors-1,
- iter=kmeans_iter)
- palette = np.vstack((bg_color, centers)).astype(np.uint8)
- if not return_mask:
- return palette
- else:
- return palette, fg_mask
- ######################################################################
- def apply_palette(img, palette, options):
- '''Apply the pallete to the given image. The first step is to set all
- background pixels to the background color; then, nearest-neighbor
- matching is used to map each foreground color to the closest one in
- the palette.
- '''
- if not options.quiet:
- print(' applying palette...')
- bg_color = palette[0]
- fg_mask = get_fg_mask(bg_color, img, options)
- orig_shape = img.shape
- pixels = img.reshape((-1, 3))
- fg_mask = fg_mask.flatten()
- num_pixels = pixels.shape[0]
- labels = np.zeros(num_pixels, dtype=np.uint8)
- labels[fg_mask], _ = vq(pixels[fg_mask], palette)
- return labels.reshape(orig_shape[:-1])
- ######################################################################
- def save(output_filename, labels, palette, dpi, options):
- '''Save the label/palette pair out as an indexed PNG image. This
- optionally saturates the pallete by mapping the smallest color
- component to zero and the largest one to 255, and also optionally sets
- the background color to pure white.
- '''
- if not options.quiet:
- print(' saving {}...'.format(output_filename))
- if options.saturate:
- palette = palette.astype(np.float32)
- pmin = palette.min()
- pmax = palette.max()
- palette = 255 * (palette - pmin)/(pmax-pmin)
- palette = palette.astype(np.uint8)
- if options.white_bg:
- palette = palette.copy()
- palette[0] = (255, 255, 255)
- output_img = Image.fromarray(labels, 'P')
- output_img.putpalette(palette.flatten())
- output_img.save(output_filename, dpi=dpi)
- ######################################################################
- def get_global_palette(filenames, options):
- '''Fetch the global palette for a series of input files by merging
- their samples together into one large array.
- '''
- input_filenames = []
- all_samples = []
- if not options.quiet:
- print('building global palette...')
- for input_filename in filenames:
- img, _ = load(input_filename)
- if img is None:
- continue
- if not options.quiet:
- print(' processing {}...'.format(input_filename))
- samples = sample_pixels(img, options)
- input_filenames.append(input_filename)
- all_samples.append(samples)
- num_inputs = len(input_filenames)
- all_samples = [s[:int(round(float(s.shape[0])/num_inputs))]
- for s in all_samples]
- all_samples = np.vstack(tuple(all_samples))
- global_palette = get_palette(all_samples, options)
- if not options.quiet:
- print(' done\n')
- return input_filenames, global_palette
- ######################################################################
- def emit_pdf(outputs, options):
- '''Runs the PDF conversion command to generate the PDF.'''
- cmd = options.pdf_cmd
- cmd = cmd.replace('%o', options.pdfname)
- if len(outputs) > 2:
- cmd_print = cmd.replace('%i', ' '.join(outputs[:2] + ['...']))
- else:
- cmd_print = cmd.replace('%i', ' '.join(outputs))
- cmd = cmd.replace('%i', ' '.join(outputs))
- if not options.quiet:
- print('running PDF command "{}"...'.format(cmd_print))
- try:
- result = subprocess.call(shlex.split(cmd))
- except OSError:
- result = -1
- if result == 0:
- if not options.quiet:
- print(' wrote', options.pdfname)
- else:
- sys.stderr.write('warning: PDF command failed\n')
- ######################################################################
- def notescan_main(options):
- '''Main function for this program when run as script.'''
- filenames = get_filenames(options)
- outputs = []
- do_global = options.global_palette and len(filenames) > 1
- if do_global:
- filenames, palette = get_global_palette(filenames, options)
- do_postprocess = bool(options.postprocess_cmd)
- for input_filename in filenames:
- img, dpi = load(input_filename)
- if img is None:
- continue
- output_filename = '{}{:04d}.png'.format(
- options.basename, len(outputs))
- if not options.quiet:
- print('opened', input_filename)
- if not do_global:
- samples = sample_pixels(img, options)
- palette = get_palette(samples, options)
- labels = apply_palette(img, palette, options)
- save(output_filename, labels, palette, dpi, options)
- if do_postprocess:
- post_filename = postprocess(output_filename, options)
- if post_filename:
- output_filename = post_filename
- else:
- do_postprocess = False
- outputs.append(output_filename)
- if not options.quiet:
- print(' done\n')
- emit_pdf(outputs, options)
- ######################################################################
- def main():
- '''Parse args and call notescan_main().'''
- notescan_main(options=get_argument_parser().parse_args())
- if __name__ == '__main__':
- main()
|