#! /usr/bin/perl -w
# Xsane to Tesseract by EquinoxeFR (http://www.equinoxefr.org/wp-content/uploads/2008/07/xsane2tess.pl)
# (http://www.equinoxefr.org/post/2008/07/05/xsane-et-tesseract-locr-qui-marche-tres-bien-sous-linux/comment-page-1/#comment-28408)

# Deskew command added 4-7-2015, Dominique Meeùs <dominique@d-meeus.be>.
# (From http://galfar.vevb.net/wp/projects/deskew/).
# You may want to put a symlink to deskew in /usr/bin. (I installed Deskew in /opt.)
# Deskewing makes thin triangles appear on the sides. Option -b "FFFFFF" makes them white.
# Because deskew accepts raw inputfile, convert is not needed.
# Dominique Meeùs

# PDF output config option added 15-4-2017, rev. 23-4-2017.
# Slow Tesseract 4, workaround about thread limit, rev. 1-6-2018.

use strict;
use Getopt::Long;

my $logfile = "/tmp/tesseract.log";
my $lang;
my $inputfile;
my $outputfile;

GetOptions ('log=s' => \$logfile,
	    'l=s'   => \$lang,
	    'i=s'   => \$inputfile,
	    'o=s'   => \$outputfile
    );

open (LOG,"> $logfile") or die "Error Opening log file $logfile\n";

print LOG "Xsane to Tesseract by EquinoxeFR\n";
print LOG "-i $inputfile\n";
print LOG "-o $outputfile\n";
print LOG "-l $lang\n";

print LOG `deskew -o "out.tif" -b "FFFFFF" "$inputfile" 2>&1`;
# By default, Deskew outputs to out.png but Tesseract’s PDF is lighter from out.tif.
print LOG `OMP_THREAD_LIMIT=1 tesseract "out.tif" "${outputfile}" -l $lang pdf 2>&1`;

unlink( "${outputfile}");

close(LOG);
