#!/bin/bash
#
# Revised by Fred Weinhaus ...................... revised 8/24/2010
# Revised by Anthony Thyssen to add -b option ... revised 8/24/2010
# Developed by Fred Weinhaus 1/30/2010 .......... revised 7/7/2010
# 
# USAGE: multicrop [-c coords] [-b bcolor] [-f fuzzval] [-g grid] [-u unrotate] [-m mask] infile outfile
# USAGE: multicrop [-h or -help]
# 
# OPTIONS:
# 
# -c          coords         pixel coordinate to extract background color; 
#                            may be expressed as gravity value (NorthWest, etc)
#                            or as "x,y" value; default is NorthWest=(0,0)
# -b          bcolor         background color to use instead of option -c;
#                            any valid IM color; default is to use option -c
# -f          fuzzval        fuzz value for separating background color;
#                            expressed as (integer) percent 0 to 100; 
#                            default=0 (uniform color)
# -g          grid           grid spacing in both x and y as percent of 
#                            image width and height; used to locate images;
#                            integer>0; default=10;
# -u          unrotate       unrotate method; choices are 1 for -deskew, 
#                            2 for unrotate script and 3 for no unrotate;
#                            default=1
# -m          mask           mask presentation method; choices are view, 
#                            save (to file) or output mask only; default 
#                            is none of the above, just output the images
# 
###
# 
# NAME: MULTICROP
# 
# PURPOSE: To crop and unrotate multiple images from a scanned image.
# 
# DESCRIPTION: MULTICROP crops and unrotates multiple images from a scanned image.
# The images must be well separate so that background color shows between them. 
# The process uses a floofill technique based upon a seed coordinate and a fuzz 
# value to separate the individual images from the background of the scan. 
# The correct choice of fuzz factor is very important. If too small, the images 
# will not be separate. If too larger, parts of the outer area of the image 
# containing similar colors will be lost and the image may be separated into 
# multiple parts. There are two unrotate methods. The first uses the IM deskew 
# function, but is limited to 5 degrees of rotate or less. The second uses my 
# unrotate script. It allows much larger rotations, but will be slower. If 
# using the second method, my unrotate script must be downloaded and installed.
# 
# IMPORTANT: The images in the scanned file must be well separated in x and y
# so that their bounding boxes do not overlap. This is especially important 
# if the images have a significant rotation.
# 
# The output images will be named from the specified outfile and -0, -1, 
# etc, will be appended before the .suffix.
# 
# Arguments: 
# 
# -c coords ... COORDS is any location within the background (non-image) area 
# for the algorithm to find the background color. It may be specified in terms  
# of gravity parameters (NorthWest, North, NorthEast, East, SouthEast, South, 
# SouthWest or West) or as a pixel coordinate "x,y". The default is the 
# upper left corner = NorthWest = "0,0".
# 
# -b bcolor ... BCOLOR is the background color to use for flood fill instead 
# of extracting this color from the image. This is useful when an image has 
# no borders with sub-images hard against the edges. Any valid IM color is 
# allowed. The default is to use option -c.
# 
# -f fuzzval ... FUZZVAL is the fuzz amount specified as an integer percent
# value between 0 to 100 (without the % sign). # The correct choice of fuzz
# factor is very important. If too small, the images will not be separate.
# If too larger, parts of the outer area of the image containing similar
# colors will be lost and the image may be separated into multiple parts.
# Typical values are probably between 5 and 20 percent. The default=10
# 
# -g grid ... GRID is the grid spacing for testing points in the input image 
# to see if they are background or image. The grid value is specified as an 
# integer percent greater than 0 and less than 100 of the width and height 
# of the input image. The default=10.
# 
# -u unrotate ... UNROTATE is the unrotation method. Choices are: 1, 2 or 3.
# The default is unrotate=1, which is fast and uses the IM -deskew function, 
# but is limited to images that are rotated no more than 5 degrees in the scan. 
# Option unrotate=2 uses my unrotate script. It can handle larger rotations, 
# but is slower. If using the latter method, my unrotate script must be 
# downloaded and also installed so that it is available for this script to use.
# Option unrotate=3 makes no attempt to unrotate the images.
# 
# -m mask ... MASK provides several options for reviewing the initial mask that 
# is generated by the fuzz value. The choices are: view (display to X11 window),
# save (to disk) along with the images, or output (without processing the images).
# The default is to simply process the images without showing or saving the mask.
# If using the view mode, then processing will stop until the image is closed. 
# But this allows you to then kill the script if the mask is not appropriate. 
# A good approach is to use the output mode repeatedly with various fuzzvals 
# until a reasonable mask is created. Note that the mask must separate the 
# images, but the background can "eat" a little into the images so long as no 
# full edge is lost or the images is split into multiple parts.
# 
# NOTE: If using unrotate method 2, then my script, unrotate, is required 
# as well.
# 
# CAVEAT: No guarantee that this script will work on all platforms, 
# nor that trapping of inconsistent parameters is complete and 
# foolproof. Use At Your Own Risk. 
# 
######
#

# set default values 
coords="0,0"			# initial coord for finding background color
bcolor=""      			# initial background color
fuzzval=10 				# fuzz amount in percent for making background transparent
grid=10					# grid spacing in percent image
mask=""					# view, save, output
unrotate=1				# 1=deskew 2=unrotate

# set directory for temporary files
dir="."    # suggestions are dir="." or dir="/tmp"

# set up functions to report Usage and Usage with Description
PROGNAME=`type $0 | awk '{print $3}'`  # search for executable on path
PROGDIR=`dirname $PROGNAME`            # extract directory of program
PROGNAME=`basename $PROGNAME`          # base name of program
usage1() 
	{
	echo >&2 ""
	echo >&2 "$PROGNAME:" "$@"
	sed >&2 -n '/^###/q;  /^#/!q;  s/^#//;  s/^ //;  4,$p' "$PROGDIR/$PROGNAME"
	}
usage2() 
	{
	echo >&2 ""
	echo >&2 "$PROGNAME:" "$@"
	sed >&2 -n '/^######/q;  /^#/!q;  s/^#*//;  s/^ //;  4,$p' "$PROGDIR/$PROGNAME"
	}

# function to report error messages
errMsg()
	{
	echo ""
	echo $1
	echo ""
	usage1
	exit 1
	}

# function to test for minus at start of value of second part of option 1 or 2
checkMinus()
	{
	test=`echo "$1" | grep -c '^-.*$'`   # returns 1 if match; 0 otherwise
    [ $test -eq 1 ] && errMsg "$errorMsg"
	}

# test for correct number of arguments and get values
if [ $# -eq 0 ]
	then
	# help information
	echo ""
	usage2
	exit 0
elif [ $# -gt 14 ]
	then
	errMsg "--- TOO MANY ARGUMENTS WERE PROVIDED ---"
else
	while [ $# -gt 0 ]
		do
		# get parameters
		case "$1" in
	  -h|-help)    # help information
				   echo ""
				   usage2
				   ;;
			-f)    # fuzzval
				   shift  # to get the next parameter
				   # test if parameter starts with minus sign 
				   errorMsg="--- INVALID FUZZVAL SPECIFICATION ---"
				   checkMinus "$1"
				   fuzzval=`expr "$1" : '\([0-9]*\)'`
				   [ "$fuzzval" = "" ] && errMsg "--- FUZZVAL=$fuzzval MUST BE A NON-NEGATIVE INTEGER VALUE (with no sign) ---"
				   fuzzvaltestA=`echo "$fuzzval < 0" | bc`
				   fuzzvaltestB=`echo "$fuzzval > 100" | bc`
				   [ $fuzzvaltestA -eq 1 -a $fuzzvaltestB -eq 1 ] && errMsg "--- FUZZVAL=$fuzzval MUST BE A NON-NEGATIVE INTEGER VALUE BETWEEN 0 AND 100 ---"
				   ;;
			-c)    # coords
				   shift  # to get the next parameter
				   # test if parameter starts with minus sign 
				   errorMsg="--- INVALID COORDS SPECIFICATION ---"
				   checkMinus "$1"
				   coords=$1
				   # further testing done later
				   ;;
			-b)    # coords
				   shift  # to get the next parameter
				   # test if parameter starts with minus sign
				   errorMsg="--- INVALID COORDS SPECIFICATION ---"
				   checkMinus "$1"
				   bcolor=$1
				   ;;
			-g)    # grid
				   shift  # to get the next parameter
				   # test if parameter starts with minus sign 
				   errorMsg="--- INVALID GRID SPECIFICATION ---"
				   checkMinus "$1"
				   grid=`expr "$1" : '\([0-9]*\)'`
				   [ "$grid" = "" ] && errMsg "--- GRID=$grid MUST BE A NON-NEGATIVE INTEGER VALUE (with no sign) ---"
				   gridtestA=`echo "$grid <= 0" | bc`
				   gridtestB=`echo "$grid >= 100" | bc`
				   [ $gridtestA -eq 1 -a $gridtestB -eq 1 ] && errMsg "--- GRID=$grid MUST BE A NON-NEGATIVE INTEGER VALUE LARGER THAN 0 AND SMALLER THAN 100 ---"
				   ;;
			-u)    # unrotate
				   shift  # to get the next parameter
				   # test if parameter starts with minus sign 
				   errorMsg="--- INVALID UNROTATE SPECIFICATION ---"
				   checkMinus "$1"
				   unrotate=`expr "$1" : '\([0-9]\)'`
				   [ $unrotate -lt 1 -a $unrotate -gt 3 ] && errMsg "--- UNROTATE=$unrotate MUST BE EITHER 1, 2 OR 3 ---"
				   ;;
			-m)    # mask
				   shift  # to get the next parameter
				   # test if parameter starts with minus sign 
				   errorMsg="--- INVALID MASK SPECIFICATION ---"
				   checkMinus "$1"
				   mask=`echo "$1" | tr "[:upper:]" "[:lower:]"`
				   [ "$mask" != "view" -a "$mask" != "save" -a "$mask" != "output"  ] && errMsg "--- MASK=$mask MUST BE EITHER VIEW, SAVE OR OUTPUT ---"
				   ;;
			 -)    # STDIN and end of arguments
				   break
				   ;;
			-*)    # any other - argument
				   errMsg "--- UNKNOWN OPTION ---"
				   ;;
			*)     # end of arguments
				   break
				   ;;
		esac
		shift   # next option
	done
	# get infile and outfile
	infile=$1
	outfile=$2
fi

# test if both bcolor and coords specified at the same time
if [ "X$bcolor" != "X" -a "X$coods" != "X" ]; then
  errMsg "--- BACKGROUND COLOR AND COODINATES CAN NOT BE USED TOGETHER ---"
fi

# test that infile provided
[ "$infile" = "" ] && errMsg "NO INPUT FILE SPECIFIED"

# test that outfile provided
[ "$outfile" = "" ] && errMsg "NO OUTPUT FILE SPECIFIED"

# set up temp file
tmpA1="$dir/multicrop_1_$$.mpc"
tmpB1="$dir/multicrop_1_$$.cache"
tmpA2="$dir/multicrop_2_$$.mpc"
tmpB2="$dir/multicrop_2_$$.cache"
tmpA3="$dir/multicrop_3_$$.mpc"
tmpB3="$dir/multicrop_3_$$.cache"
tmpA4="$dir/multicrop_4_$$.mpc"
tmpB4="$dir/multicrop_4_$$.cache"
tmpA5="$dir/multicrop_5_$$.mpc"
tmpB5="$dir/multicrop_5_$$.cache"

trap "rm -f $tmpA1 $tmpB1 $tmpA2 $tmpB2 $tmpA3 $tmpB3 $tmpA4 $tmpB4 $tmpA5 $tmpB5; exit 0" 0
trap "rm -f $tmpA1 $tmpB1 $tmpA2 $tmpB2 $tmpA3 $tmpB3 $tmpA4 $tmpB4 $tmpA5 $tmpB5; exit 1" 1 2 3 15


# read the input image into the temp files and test validity.
convert -quiet -regard-warnings "$infile" +repage "$tmpA1" ||
	errMsg "--- FILE $infile1 DOES NOT EXIST OR IS NOT AN ORDINARY FILE, NOT READABLE OR HAS ZERO SIZE  ---"

# get output filename and suffix
outnameArr=(`echo "$outfile" | sed -n 's/^\(.*\)[.]\([^.]*\)$/\1 \2/p'`)
outname="${outnameArr[0]}"
suffix="${outnameArr[1]}"
#echo "outname=$outname"
#echo "suffix=$suffix"


# get image width and height
width=`identify -ping -format "%w" $tmpA1`
height=`identify -ping -format "%h" $tmpA1`


# get color at user specified location
if [ "X$bgcolor" != "X" ]; then
	coords="0,0"
else
	widthm1=`convert xc: -format "%[fx:$width-1]" info:`
	heightm1=`convert xc: -format "%[fx:$height-1]" info:`
	midwidth=`convert xc: -format "%[fx:round(($width-1))/2]" info:`
	midheight=`convert xc: -format "%[fx:round(($height-1))/2]" info:`
	coords=`echo "$coords" | tr "[:upper:]" "[:lower:]"`
	case "$coords" in
		''|nw|northwest) coords="0,0" ;;
		n|north)         coords="$midwidth,0" ;;
		ne|northeast)    coords="$widthm1,0" ;;
		e|east)          coords="$widthm1,$midheight" ;;
		se|southeast)    coords="$widthm1,$heightm1" ;;
		s|south)         coords="$midwidth,$heightm1" ;;
		sw|southwest)    coords="0,$heightm1" ;;
		w|west)          coords="0,$midheight" ;;
		[0-9]*,[0-9]*)   coords=$coords ;;
		*)  errMsg "--- INVALID COORDS ---" ;;
	esac
	bgcolor=`convert $tmpA1 -format "%[pixel:u.p{$coords}]" info:`
fi
#echo "bgcolor=$bgcolor"


# get grid spacing
wg=`convert xc: -format "%[fx:round($grid*$width/100)]" info:`
hg=`convert xc: -format "%[fx:round($grid*$height/100)]" info:`
num=`convert xc: -format "%[fx:round(100/$grid) - 2]" info:`
#echo "width=$width; height=$height; wg=$wg; hg=$hg; num=$num"


# add a border, and flood fill from all edges inward
convert $tmpA1 -fuzz ${fuzzval}% -fill none \
	-bordercolor $bgcolor -border 1x1 \
	-draw "matte $coords floodfill" \
	-shave 1x1 -fill red +opaque none \
	$tmpA2

if [ "$mask" = "view" ]; then
	display $tmpA2
elif [ "$mask" = "save" ]; then
	convert $tmpA2 ${outname}_mask.gif
elif [ "$mask" = "output" ]; then
	convert $tmpA2 ${outname}_mask.gif
	exit 0
fi

# set up for unrotate 1 or 3
if [ $unrotate -eq 1 ]; then
	derotate="-deskew 40%"
elif [ $unrotate -eq 3 ]; then
	derotate=""
fi


echo ""
# loop over grid and floodfill and trim to get individual mask for each image
k=0
y=0
for ((j=0;j<=$num;j++))
	do
 	x=0
 	y=$(($y + $hg))
	for ((i=0;i<=$num;i++))
		do
		x=$(($x + $wg))
		# test if found color other than "none" (i.e. red)
		testcolor=`convert $tmpA2 -channel rgba -alpha on -format \
			"%[fx:u.p{$x,$y}=="none"?0:1]" info:` 
#		echo "$x $y $testcolor"
		if [ $testcolor -eq 1 ]; then
			echo "Processing Image $k"
			# Take red and none mask.
			# Floodfill the local red region with white.
			convert $tmpA2 -channel rgba -alpha on -fill "white" \
				-draw "color $x,$y floodfill" $tmpA3
			# Fill anything not white with transparency and 
			# turn transparency off so black.
			# Then clone and trim to bounds of white.
			# Then fill any black with white.
			# Then flatten back onto white and black image so that any white 
			# areas eaten away are filled with white.
			# Note flatten uses the virtual canvas left by -trim so that it 
			# goes back into the right location.
			convert \( $tmpA3 -channel rgba -alpha on \
				-fill none +opaque white -alpha off \) \
				\( +clone -trim -fill white -opaque black -write $tmpA5 \) \
				-flatten $tmpA4
			# Print size and page geometry
			identify -ping -format "  Size: %wx%h\n  Page Geometry: %g" $tmpA5
			# Composite the black and white mask onto the original scan.
			# Then trim and deskew/unrotate to make the output.
			if [ $unrotate -eq 1 -o $unrotate -eq 3 ]; then
				convert $tmpA1 $tmpA4 -compose multiply -composite \
					-fuzz ${fuzzval}% -trim -background "$bgcolor" $derotate \
					-compose over -bordercolor "$bgcolor" -border 2 -trim +repage \
					${outname}-${k}.${suffix}
			elif [ $unrotate -eq 2 ]; then
				convert $tmpA1 $tmpA4 -compose multiply -composite \
					-fuzz ${fuzzval}% -trim miff:- | \
				unrotate -f ${fuzzval}% - ${outname}-${k}.${suffix}
			fi
			# Fill the selected photo area in the red/none mask with none 
			# for use with next coordinate so that it does not find it again.
			convert $tmpA3 -channel rgba -alpha on -fill none -opaque white $tmpA2
			k=$(($k + 1))
		fi
	done
done
echo ""
exit 0