#! /usr/bin/perl -w

# Do a piece of a profmark benchmark, for phmmer searches by FPS
# (family-pairwise-search; best E-value of all individual queries).
#
# This script is normally called by pmark_master.pl; its command line
# syntax is tied to pmark_master.pl.
#
# Usage:     x-phmmer-fps <top_builddir>                     <top_srcdir> <resultdir> <tblfile> <msafile> <fafile> <outfile>
# Example: ./x-phmmer-fps ~/releases/hmmer-3.0/build-icc-mpi ~/src/hmmer  testdir     test.tbl  pmark.msa test.fa  test.out
#        
# SRE, Sun Feb  7 08:54:28 2010 [Casa de Gatos]
# SVN $Id$

BEGIN {
    $top_builddir  = shift;
    $top_srcdir    = shift;
    $wrkdir        = shift;
    $tblfile       = shift;
    $msafile       = shift;
    $fafile        = shift;
    $outfile       = shift;
}

$phmmer      = "$top_builddir/src/phmmer";
$opts        = "";

if (! -d $top_builddir)                                 { die "didn't find build directory $top_builddir"; }
if (! -d $top_srcdir)                                   { die "didn't find source directory $top_srcdir"; }
if (! -x $phmmer)                                       { die "didn't find executable $phmmer"; }
if (! -e $wrkdir)                                       { die "$wrkdir doesn't exist"; }

open(OUTFILE,">$outfile") || die "failed to open $outfile";
open(TABLE, "$tblfile")   || die "failed to open $tblfile";
MSA:
while (<TABLE>)
{
    ($msaname) = split;

    %seen          = ();
    %best_pval     = ();
    %best_bitscore = ();

    `esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname`;
    if ($?) { print "FAILED: esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname\n"; next MSA; }

    # Extract a list of individual sequence names from the multiple alignment.
    $output = `esl-seqstat -a $wrkdir/$msaname.sto | grep "^=" | awk '{print \$2}'`;
    if ($?) { print "FAILED: esl-seqstat\n"; next MSA; }

    @qnames = split(/^/,$output);
    chop (@qnames);

    # Loop over each query; phmmer; accumulate best pval for each target
    foreach $qname (@qnames) 
    {
	$output = `esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname`;
	if ($?) { print "FAILED: esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname\n"; next MSA; }

	`$phmmer --cpu 1 --tblout $wrkdir/$msaname.tmp $opts $wrkdir/$msaname.query.fa $fafile > /dev/null`;
	if ($?) { print "FAILED: $phmmer --tblout $msaname.tmp $opts $wrkdir/$msaname.query.fa $fafile\n"; next MSA; }

	open(OUTPUT, "$wrkdir/$msaname.tmp") || die "FAILED: to open $wrkdir/$msaname.tmp tabular output file"; 
        while (<OUTPUT>)
	{
	    if (/^\#/) { next; }
	    @fields   = split(' ', $_, 7);
	    $target   = $fields[0];
	    $pval     = $fields[4];
	    $bitscore = $fields[5];
	    if (! $seen{$target} || $pval < $best_pval{$target})
	    { 		
		$seen{$target}          = 1;
		$best_pval{$target}     = $pval; 
		$best_bitscore{$target} = $bitscore; 
	    }
	}
	close OUTPUT;
    }

    # Append to the outfile.
    foreach $target (keys(%seen)) 
    {
	printf OUTFILE "%g %.1f %s %s\n", $best_pval{$target}, $best_bitscore{$target}, $target, $msaname;
    }

    unlink "$wrkdir/$msaname.tmp";
    unlink "$wrkdir/$msaname.query.fa";
    unlink "$wrkdir/$msaname.sto";
}
close TABLE;
close OUTFILE;
