#! /bin/bash

TAG=tag

TOKENIZE="hfst-tokenize -x $TAG/omorfi_tokenize.pmatch"
if ! command -v hfst-tokenize > /dev/null 2>&1; then
    TOKENIZE="$TAG/hfst-tokenize -x $TAG/omorfi_tokenize.pmatch"
fi

function print_help()
{
    echo "Lemmatizes and morphologically labels running Finnish text on standard input."
    echo
    echo "Options:"
    echo "  --version, -v        Print version information"
    echo "  --no-tokenize        Instead of running text, expect a newline-separated"
    echo "                       stream of tokens, with an empty line signifying sentence"
    echo "                       boundaries"
    echo
    echo "This package is based on the statistical (CRF-based) tagger FinnPos,"
    echo "the Finnish morphology package OmorFi and the FinnTreeBank corpus of"
    echo "labeled text."
    echo
    echo "Process entire files with redirection, eg."
    echo "  $ finnish-postag < mytext.txt > mytext_tagged.txt"
    echo "or type into the terminal and terminate with EOF (usually ctrl-D on"
    echo "your keyboard), or directly input a line of text with <<<. Example:"
    echo
    echo "$ finnish-postag <<< \"Voitteko ojentaa voita?\""
    echo ""
    echo "Voitteko	voida	[POS=VERB]|[VOICE=ACT]|[MOOD=INDV]|[TENSE=PRESENT]|[PERS=PL2]|[CLIT=KO]"
    echo "ojentaa	ojentaa	[POS=VERB]|[VOICE=ACT]|[MOOD=INDV]|[TENSE=PRESENT]|[PERS=SG3]"
    echo "voita	voi	[POS=NOUN]|[NUM=SG]|[CASE=PAR]"
    echo "?	?	[POS=PUNCTUATION]"
    exit 0
}

function print_version()
{
    echo "finnish-tagtools version 1.3.3"
    echo "2019-02-13"
    exit 0
}

function lookup_and_cut() {
    hfst-lookup -p $TAG/omorfi.tagtools.optcap.hfst | cut -f1,2
}

for var in "$@"
do
    case $var in
	"") ;;
	"--version")
            print_version ;;
	"-v")
            print_version ;;
	"--no-tokenize")
	    TOKENIZE=lookup_and_cut;;
	*)
	    print_help ;;
    esac
done


$TOKENIZE |
python3 $TAG/omorfi2finnpos.py ftb                                  |
python3 $TAG/finnpos-ratna-feats.py $TAG/freq_words                 |
$TAG/finnpos-label $TAG/ftb.omorfi.model 2>/dev/null                |
python3 $TAG/finnpos-restore-lemma.py                               |
cut -f1,3,4
