#!/bin/sh
#
#   Copyright
#
#       Copyright (C) 2007-2010 Jean-Pierre Demailly <demailly@fourier.ujf-grenoble.fr>
#       Copyright (C) 2010 Jari Aalto <jari.aalto@cante.net>
#
#   License
#
#       This program is free software; you can redistribute it and/or modify
#       it under the terms of the GNU General Public License as published by
#       the Free Software Foundation; either version 3 of the License, or
#       (at your option) any later version.
#
#       This program is distributed in the hope that it will be useful,
#       but WITHOUT ANY WARRANTY; without even the implied warranty of
#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#       GNU General Public License for more details.
#
#       You should have received a copy of the GNU General Public License
#       along with this program. If not, see <http://www.gnu.org/licenses/>.

Error ()
{
    echo "$0: [ERROR] $*" >&2
}

Die ()
{
    Error "$*"
    exit 1
}

Which ()
{
    which "${1:-nothing-here-is-interesting}" > /dev/null 2>&1
}

Main ()
{
    if Which xless ; then
        EDITOR=xless

    elif [ "$EDITOR" ]; then
	:    # Oboy user's setting

    elif Which sensible-editor ; then
	EDITOR=sensible-editor

    elif Which emacs ; then
	EDITOR=emacs

    elif Which nano ; then
	EDITOR=nano

    elif Which vi ; then
	EDITOR=vi
    else
	Die "environment variable EDITOR not set"
    fi

    FILE="$1"
    OUT="$HOME/snap0001.txt"
    PPM=__snap__.ppm
    TIF=__snap__.tif

    ENVLANG=$(echo $LANG | cut -d "_" -f 1)

    # You should have imagemagick installed
    # 200% magnification seems to improve OCR step
    # May have to be adapted to the context ...

    [ "$FILE"    ] || Die "Missing argument FILE"
    [ -f "$FILE" ] || Die "File not exists: $FILE"
    [ "$EDITOR"  ] || Die "EDITOR not set"

    Which convert || Die "Program convert not in PATH"

    # Select OCR engine

    if Which tesseract ; then

       if [ "$ENVLANG" = "en" ]; then
          LANGUAGE="eng"
       elif [ "$ENVLANG" = "fr" ]; then
          LANGUAGE="fra"
       elif [ "$ENVLANG" = "de" ]; then
          LANGUAGE="deu"
       elif [ "$ENVLANG" = "it" ]; then
          LANGUAGE="ita"
       else
          LANGUAGE="eng"
       fi
              
       OUTPUT="$HOME/snap0001"
       convert -scale 200% "$FILE" "$TIF"
       tesseract "$TIF" "$OUTPUT" -l $LANGUAGE

    elif Which ocrad ; then

       convert -negate -scale 200% "$FILE" "$PPM"
       echo "ocrad $FILE ..."
       ocrad "$PPM" > "$OUT"

    elif Which cuneiform ; then

       if [ "$ENVLANG" = "en" ]; then
          LANGUAGE="eng"
       else
          LANGUAGE=$( cuneiform -l = 2>&1 |
                      grep languages |
                      cut -d":" -f 2- |
                      tr " " "\n" |
                      grep "$ENVLANG" )
       fi

       convert -negate -scale 200% "$FILE" "$PPM"
       cuneiform -l "$LANGUAGE" "$PPM" -o "$OUT"

    else

       Error "OCR engine not found, Need ocrad or cuneiform" > "$OUT"
    fi

    # Concatenate
    cat "$OUT" >> $HOME/snap0000.txt

    if [ $EDITOR = "xless" ]; then

    if [ $(ps aux | grep -e "x?less" | grep -c "snap0000.txt") = "0" ]
    then

       xless -fn 10x20 $HOME/snap0000.txt

    fi
    else
       $(EDITOR) $HOME/snap0000.txt
    fi

    rm -f "$FILE" "$PPM" "$TIF"

    if [ "$2" = "1" ]; then
       gzip -f -c "$HOME/snap0000.txt" > "$HOME/snap0000.txt.gz" &
       gzip -f -c "$OUT" > "$OUT.gz" &
    fi
}

Main "$@"

# End of file
