#!/bin/bash
set -e

pkg="ncbi-tools-bin"

if [ "$AUTOPKGTEST_TMP" = "" ] ; then
    AUTOPKGTEST_TMP=$(mktemp -d /tmp/${pkg}-test.XXXXXX)
    trap "rm -rf $AUTOPKGTEST_TMP" 0 INT QUIT ABRT PIPE TERM
fi
cd $AUTOPKGTEST_TMP
cp -a /usr/share/doc/${pkg}/test-data/* .
cp /usr/share/doc/${pkg}/{asnpub.all.gz,medline.prt.gz} .
cp /usr/share/ncbi/data/autofix.prt .
cp /usr/share/ncbi/data/UniVec_Core.* .
gunzip *.gz

check_GI()
{
	while read GI; do
		grep -q $GI $1
	done < $2
}

##################################################################
echo '---asn2asn test---'
##################################################################
/usr/bin/asn2asn -i nc0225.aso -b -o nc0225.text
[ -s nc0225.text ]
/usr/bin/asn2asn -i nc0225.text -s -o nc0225_new.aso
[ -s nc0225_new.aso ]
diff nc0225.aso nc0225_new.aso
rm nc0225.aso nc0225_new.aso

##################################################################
echo '---asn2all test---'
##################################################################
/usr/bin/asn2all -i nc0225.text -f g -o nc0225.nuc -v nc0225.prt
[ -s nc0225.nuc ]
genes_dna="$(grep -c " mol dna ," nc0225.text)"
genes_rna="$(grep -c " mol rna ," nc0225.text)"
genes=$(expr $genes_dna + $genes_rna)
nuc="$(grep -c "^LOCUS " nc0225.nuc)"
[ $genes -eq $nuc ]
[ -s nc0225.prt ]
proteins="$(grep -c " mol aa ," nc0225.text)"
prt="$(grep -c "^LOCUS " nc0225.prt)"
[ $proteins -eq $prt ]

##################################################################
echo '---asn2fsa test---'
##################################################################
/usr/bin/asn2fsa -i nc0225.text -a t -o nc0225.fna -v nc0225.faa
[ -s nc0225.fna ]
fna="$(grep -c "^>" nc0225.fna)"
[ $genes -eq $fna ]
[ -s nc0225.faa ]
faa="$(grep -c "^>" nc0225.faa)"
[ $proteins -eq $faa ]

##################################################################
echo '---asn2gb test---'
##################################################################
/usr/bin/asn2gb -i nc0225.text -a t -o nc0225.gbk
[ -s nc0225.gbk ]
gbk="$(grep -c "^LOCUS " nc0225.gbk)"
[ $genes -eq $gbk ]

##################################################################
echo '---asn2idx test---'
##################################################################
/usr/bin/asn2idx -p . -x .text < nc0225.text
[ -s nc0225.idx ]
[ -s master.idx ]
awk 'NR == 1 || NR % 50 == 0 {print $1}' nc0225.idx | sed 's/\.1//' > indexed_GIs
check_GI nc0225.text indexed_GIs

##################################################################
echo '---asn2xml test---'
##################################################################
/usr/bin/asn2xml -i nc0225.text -b F -o nc0225.xml
[ -s nc0225.xml ]
dna_xml="$(grep -c '<Seq-inst_mol value="dna"/>' nc0225.xml)"
rna_xml="$(grep -c '<Seq-inst_mol value="rna"/>' nc0225.xml)"
proteins_xml="$(grep -c '<Seq-inst_mol value="aa"/>' nc0225.xml)"
[ $genes -eq $(expr $dna_xml + $rna_xml) ]
[ $proteins -eq $proteins_xml ]

##################################################################
echo '---cleanasn test---'
##################################################################
/usr/bin/cleanasn -a t -D t -i nc0225.text -o nc0225_cleaned.text
[ -s nc0225_cleaned.text ]
titles="$(grep -c " title \"" nc0225.text)"
titles_to_clean="$(grep -A 1 " descr {$" nc0225.text | grep -c " title \"")"
titles_left="$(grep -c " title \"" nc0225_cleaned.text)"
[ $(expr $titles - $titles_to_clean) -eq $titles_left ]

##################################################################
echo '---gene2xml test---'
##################################################################
/usr/bin/gene2xml -b -i dsRNA_viruses.ags -o dsRNA_viruses.xgs
[ -s dsRNA_viruses.xgs ]
# The content in dsRNA_viruses.gene_info mirrors the content in dsRNA_viruses.ags
first_id="$(awk 'NR==2{print $1}' dsRNA_viruses.gene_info)"
last_id="$(awk 'END{print $1}' dsRNA_viruses.gene_info)"
# check the beginning of the output
start_in="$(grep -c "^${first_id}" dsRNA_viruses.gene_info)"
start_out="$(grep -c "<Object-id_id>${first_id}</Object-id_id>" dsRNA_viruses.xgs)"
[ $start_in -eq $start_out ]
#check the ending of the output
end_in="$(grep -c "^${last_id}" dsRNA_viruses.gene_info)"
end_out="$(grep -c "<Object-id_id>${last_id}</Object-id_id>" dsRNA_viruses.xgs)"
[ $end_in -eq $end_out ]

grep 'GI:' nc0225.gbk | head | sed 's/.*GI://' > GIs.txt

# These tests rely on the ID1 service, which is slated to retire.
if false && nc -z -w 1 www.ncbi.nlm.nih.gov 80; then
	##################################################################
	echo '---gbseqget test---'
	##################################################################
	/usr/bin/gbseqget -i GIs.txt -o gbseqget.xml
	[ -s gbseqget.xml ]
	check_GI gbseqget.xml GIs.txt
	##################################################################
	echo '---insdseqget test---'
	##################################################################
	/usr/bin/insdseqget -i GIs.txt > insdset.xml
	[ -s insdset.xml ]
	check_GI insdset.xml GIs.txt
	##################################################################
	echo '---idfetch test---'
	##################################################################
	/usr/bin/idfetch -G GIs.txt -o idfetch.text
	[ -s idfetch.text ]
	check_GI idfetch.text GIs.txt
fi


##################################################################
echo '---vecscreen test---'
##################################################################
/usr/bin/vecscreen -f 3 -d UniVec_Core < nc0225.fna > vecscreen.output
[ -s vecscreen.output ]
last_in="$(grep ">" nc0225.fna | tail -1 | sed 's/>//' | sed 's/ .*//')"
last_out="$(grep ">" vecscreen.output | tail -1 | sed 's/>Vector //' | sed 's/ .*//')"
[ $last_in==$last_out ]


echo '---asndisc test---'
/usr/bin/asndisc -i nc0225.text -a t -o nc0225.disc
[ -s nc0225.disc ]

echo '---asnval test---'
/usr/bin/asnval -i nc0225.text -a t -o nc0225.val -Q 2
[ -s nc0225.val ]

echo '---asnmacro test---'
/usr/bin/asnmacro -i nc0225.text -m autofix.prt -o asnmacro.output
[ -s asnmacro.output ]

echo '---asntool test---'
/usr/bin/asntool -m asnpub.all -v medline.prt -e medline.val
[ -s medline.val ]

echo '---indexpub test---'
/usr/bin/indexpub -i medline.val
[ -s medline.idx ]

echo '---getpub test---'
/usr/bin/getpub -i medline.val -o getpub.output
[ -s getpub.output ]

echo '---getmesh test---'
/usr/bin/getmesh -i getpub.output -o getmesh.output
[ -s getmesh.output ]

echo '---debruijn test---'
/usr/bin/debruijn -a ncbistdaa -n 4 > debruijn.output
[ -s debruijn.output ]

echo '---gil2bin test---'
/usr/bin/gil2bin -i GIs.txt -o GIs.bin
[ -s GIs.bin ]

echo '---makeset test---'
echo 'idfetch_g1234.npset' > files
/usr/bin/makeset -i files -o makeset.output
[ -s makeset.output ]

echo '---nps2gps test---'
/usr/bin/nps2gps -i idfetch_g1234.npset -o nps2gps.output
[ -s nps2gps.output ]

echo '---tbl2asn test---'
/usr/bin/tbl2asn -t Sc_16.sbt -i Sc_16.fsa
[ -s Sc_16.sqn ]

echo '---checksub test---'
/usr/bin/checksub -i Sc_16.sqn -o checksub.sqn
diff Sc_16.sqn checksub.sqn

echo '---fa2htgs test---'
/usr/bin/fa2htgs -i Sc_16.fsa -t Sc_16.sqn -n "Saccharomyces cerevisiae" -g mycenter -s ABC_1234567 -o fa2htgs.output -e fa2htgs.log
[ -s fa2htgs.output ]

echo '---subfuse test---'
/usr/bin/subfuse -p ./ -o subfuse.output
[ -s subfuse.output ]

echo '---trna2sap test---'
/usr/bin/trna2sap < trnascan-se_sample.output > trna2sap.output
[ -s trna2sap.output ]
echo '---trna2tbl test---'
/usr/bin/trna2tbl < trnascan-se_sample.output > trna2tbl.output
[ -s trna2tbl.output ]

echo '---spidey test---'
/usr/bin/spidey -i spidey_genome.fasta -m spidey_mRNA.fasta -p 1 -o spidey.summary
[ -s spidey.summary ]

