#!/usr/local/bin/perl

######################################################################
#
#   Greedy MDL-minimization algorithm 
#
#
#   Copyright 1996 Jaak Vilo 
#  
######################################################################

$PRATT = "./pratt";
$REALOUTPUT = "MDL.result" ;
$TMP_DIR = "mdl_tmp" ;

$C0=10;
$C1=10;
$C2=3;
$C3=10;


system( "mv MDL.result MDL.result.backup.$$" );  # Old results...
mkdir ( $TMP_DIR, 755 ) || die  unless -d $TMP_DIR ;
system( "rm -rf $TMP_DIR/*"); #clean up old versions

open( REALOUTPUT , ">$REALOUTPUT" ) || die;
$| = 1 ; # Force the flush

######################################################################
# Read in the data file
#

$name=''; $count= 0;
while( <> )
{
	chop; 
	$_ =~ s/\s+$//g;   # Remove empty line endings...
	if ( /^\>/ ) { $name = substr($_,1) ; $NAMES[++$count-1]=$name; next; }
	$LINES{$name} .= "$_\n";
}

######################################################################

$iteration=0;
while( $count > 0 )    # While some sequences not covered yet...
{
    ++$iteration;
    open( DATA , ">$TMP_DIR/mdl_data.$count" );
    for( $i=0; $i < $count ; $i++ )
    {
	print DATA ">$NAMES[$i]\n$LINES{$NAMES[$i]}";
    }
    close(DATA);
	
    # Run pratt, greedy choice
    @RESULT = (); # Empty result...

    # Cycle for different K values
    for( $K = $count ; $K >= 4 ; $K = $K - ( int($K/5) ? int($K/5) : 1 )  )
    {
        print "$K => " , $K - ( int($K/5) ? int($K/5) : 1 ) , "\n" ;
	sleep( 2 );

	open( S , ">parameters" ); # What parameters should be used for Pratt

print S <<ENDOFPARAM
M 
$K
N
0
O
$TMP_DIR/mdl_data.$count.$K.pat
W 
10
Z
Z0
$C0
Z1
$C1
z2
$C2
Z3    
$C3
H 
100
I 
0.1
X
ENDOFPARAM
;
	close(S);

        print "Call Pratt on file with $count sequences, covering at least $K of them\n" ;
	sleep(2);
	system( "$PRATT fasta $TMP_DIR/mdl_data.$count <parameters " );

	open( PRATTOUTPUT , "mdl.special" ) || die ;  # Pratt produces mdl.special file
        while ( <PRATTOUTPUT> ) { push( @RESULT, $_ ) ;  }
	close( PRATTOUTPUT );
   }

    open( MDLSPEC, ">$TMP_DIR/mdl.special.$count.$iteration" );
    print MDLSPEC @RESULT ;
    close(MDLSPEC);

    print "SORT\n" if $#RESUL => 0; sleep( 3 );
    sub sortorder { 
	@all=split(/\s+/,$a);
	$vala = ( $all[0] eq '' ) ? $all[1] : $all[0] ;
	@all=split(/\s+/,$b);
	$valb = ( $all[0] eq '' ) ? $all[1] : $all[0] ;
	# print "$vala <=> $valb\n";
	$valb <=> $vala ;  # Reverse sort - pick the largest value!
    }
    @RESULT = sort sortorder  @RESULT ;
    print @RESULT ; 

    $greedychoice = $RESULT[0] ;
    chop( $greedychoice );

    @all=split(/\s+/,$greedychoice );
    splice( @all,0,1 ) if $all[0] eq '' ; 
    $mdlcost = $all[0];
    $prattcost = $all[1]; 
    $pattern = $all[2];
    @sequences = splice( @all, 3 ); 

    # Output the answer of greedy choice
    for $i ( @sequences ){ 
	$answer{$NAMES[$i]}=$iteration;  # Mark removed
	print REALOUTPUT "$NAMES[$i] ";	
    }
    print REALOUTPUT "\n$mdlcost $prattcost\n$pattern\n\n";

    for $i ( @NAMES ) {   # Remove covered sequences from @NAMES 
	if ( $answer{$i} == $iteration ) { 
		for $j (0..$#NAMES){ 
			if( $NAMES[$j] eq $i ) {
				splice( @NAMES,$j,1 ) ; 
				last ;
			}
		}
	}
    }
    $count = $#NAMES + 1;

    # Finish if the RESULT was left empty...
    if( ! ( $#RESULT >= 0 ) ) { 
	for $i (0..$count-1) { print REALOUTPUT "$NAMES[$i]\n"; }
	goto FINISH ;
    }
}



FINISH:
close( REALOUTPUT );
print<<END

MDL-Pratt has finished it's job. 
The result is written to a file $REALOUTPUT

The result is as follows:

END
;

open( RES, "<$REALOUTPUT" );
while(<RES>){ print $_ };



