#!/usr/bin/perl 
#
# ratchet.pl v2.0
# Tiffani L. Williams
# March 2003
# 
# modified from
#
# perlRat.pl v1.0
# Modified February 13, 2002 10:00
# Olaf R.P. Bininda-Emonds
#
# Generates:
#   A nexus-formatted paup block with randomly selected characters and
#   necessary instructions for performing a parsimony ratchet
#
# Usage: ratchet.pl -f <input file> [-paup <paup file>] [-l <logfile>] 
#                   [-n <replicates>] [-p <percent>] [-w <factor>]
#                   [-v] [-t <tree file>] [-h] [-cmd] [-cmd <cmd file>] 
#                   [-contree <strict consensus tree file>]
#   options: 
#                         # kliu - this is wrong - needs to be nexus formatted!
#            -f = name of Phylip-formatted data file WITH PATH
#            -l <log file> = log file
#            -n <replicates> = number of ratchet replicates
#                             (default = 1000)
#            -paup <paup file> = name of the Nexus batch file for PAUP \n";
#            -p <percent> = percent of characters to be upweighted in each step
#                          (default = 25)
#            -t <tree file> = name of tree file 
#            -w <factor> = factor by which to upweight selected characters
#                         (default = 2)
#            -v = verbose output
#            -h = print this message and quit
#            -limit = specify a time limit (in seconds per ratchet replicate)
# -cmd = command file WITH PATH
# -contree = strict consensus tree WITH PATH

# seems to be performing random sequence addition to get first tree
# tbr moves from current set of trees otherwise - bad to do multrees as they do

# kliu - also disable dstatus reporting

$NumChar = 0;
$filename = "";
# kliu must parameterize this
$logfile = "ratchet.log";
$NumRepl = 1000;
$PercIncl = 0.25;
$search = 0;
$weight = 2;
$verbose = 0;
# kliu must parameterize this
$paupfile = "ratchet.nex";
$treefile = "ratchet.tre";
# kliu must parameterize this
$contreefile = "conratchet.tre";
# kliu - file suffix is pointless - remove it
#$file_suffix = "";
# this defaults to not used - <= 0 -> don't use it
$time_limit = 0;

for ($i = 0; $i <= $#ARGV; $i++){
  if ($ARGV[$i] eq "-f") {
    $filename = $ARGV[++$i];
  }
  elsif ($ARGV[$i] eq "-n") {
    $NumRepl = $ARGV[++$i];
  }
  elsif ($ARGV[$i] eq "-l") {
    $logfile = $ARGV[++$i];
  }
#	elsif ($ARGV[$i] eq "-fs"){
#    $file_suffix = $ARGV[++$i];
#  }
	elsif ($ARGV[$i] eq "limit" || $ARGV[$i] eq "-limit"){
    $time_limit = $ARGV[++$i];
  }
	elsif ($ARGV[$i] eq "-paup") {
    $paupfile = $ARGV[++$i];
  }
  elsif ($ARGV[$i] eq "-p") {
    $PercIncl = $ARGV[++$i] / 100;
  }
  elsif ($ARGV[$i] eq "-t") {
    $treefile = $ARGV[++$i];
  }
  elsif ($ARGV[$i] eq "-c") {
		$contreefile = $ARGV[++$i];
  }
  elsif ($ARGV[$i] eq "-w") {
    $weight = $ARGV[++$i];
  }
  elsif ($ARGV[$i] eq "-v") {
    $verbose = 1;
  }
  elsif ($ARGV[$i] eq "-cmd") {
    $paupfile = $ARGV[++$i];
  }
  elsif ($ARGV[$i] eq "-contree") {
    $contreefile = $ARGV[++$i];
  }
  elsif ($ARGV[$i] eq "-h") {
    print "Usage: ratchet.pl -f <input file> [-paup <paup file>] [-l <logfile>] \n";
    print "                  [-n <replicates>] [-p <percent>] [-w <factor>] \n";
    print "                  [-v] [-t <tree file>] [-h] [-cmd <cmd file>] [-contree <strict consensus tree file>]\n";
    print "Options:\n"; 
    print "         -f <input file> = name of Phylip-formatted data file\n";
    print "         -l <log file> = log file \n";
    print "                         (default = ratchet.log)\n";
    print "         -n <replicates> = number of ratchet replicates\n";
    print "                           (default = 1000)\n";
    print "         -paup <paup file> = name of the Nexus batch file for PAUP \n";
    print "                             (default = ratchet.nex) \n";
    print "         -p <percent> = percent of characters to be upweighted in each step\n";
    print "                        (default = 25)\n";
    print "         -t <tree file> = name of tree file \n";
    print "         -w <factor> = factor by which to upweight selected characters\n";
    print "                       (default = 2)\n";
    print "         -v = verbose output\n";
    print " -cmd = command file WITH PATH\n";
    print " -contree = strict consensus tree WITH PATH\n";

    exit(0);
  }
  else {
    print "Invalid argument $ARGV[$i]\n";
    print "For usage, type ratchet.pl -h\n";
    exit (1);
  }
}

#if ($file_suffix eq "") {
#  print "No file suffix!\n";
#  exit (1);
#}
#else {
#  $logfile .= $file_suffix;
#	$treefile .= $file_suffix;
#	$contreefile .= $file_suffix;
#  $paupfile .= $file_suffix;
#}

if (not $NumChar and not $filename) {
  die "Must specify total number of characters or filename\n" 
}

# include alignment file in paup commands
if ($filename) {
  open (fCMD, ">$paupfile") or die "Cannot open batch file $paupfile \n";

  print fCMD "#NEXUS\n";
  print fCMD "\n";

  print fCMD "execute $filename ;\n";
}

# kliu - OMIT THIS - just do execute load it manually
# Get number of characters from file (if specified)
#if ($filename) {
#  open (fDATA, "<$filename") or die "Cannot open data file $filename\n";
#  open (fCMD, ">$paupfile") or die "Cannot open batch file $paupfile \n";
#  while (<fDATA>) {
#    if ($_ =~ /\s*(\d+)\s+(\d+)/) {  #specific for Phylip file
#      $NumChar = $2;
#      print fCMD "#NEXUS\n";
#      print fCMD "begin characters;\n";
#      print fCMD "\tdimensions newtaxa ntax=$1 nchar=$2;\n";
#      print fCMD "\tformat datatype=nucleotide gap=- missing=?;\n";
#      print fCMD "\tmatrix\n";
#    }
#    else {
#      print fCMD "$_\n";
#    }
#  }
#  print fCMD ";\n";
#  print fCMD "end;\n\n";
#}

#die "Error reading number of characters" if (not $NumChar);

# kliu - just a comment - omit this
#$NumIncl = int($NumChar * $PercIncl);

#print fCMD "[Ratchet parameters: NumChar = $NumChar\n";
#print fCMD "                     Inclusion Percentage = $PercIncl ($NumIncl characters)\n";
#print fCMD "                     Replicates = $NumRepl\n";
#print fCMD "                     Final search = ";
#if ($search) {
#  print fCMD "yes]\n\n";
#}
#else {
#  print fCMD "no]\n\n";
#}

print fCMD "begin paup;\n";
# kliu - turn off monitor output to STDOUT - use logfile instead - changed monitor=yes
print fCMD "\tset autoclose=yes warntree=no warnreset=no notifybeep=no monitor=no taxlabels=full;\n";
print fCMD "\tlog file=$logfile replace;\n";
print fCMD "\tset criterion=parsimony;\n";
print fCMD "\tpset collapse=no;\n";
$PAUPseed=int(10000*rand());
print fCMD "\n\t[!][!*** Replicate 0 (initial tree) ***]\n\n";

# kliu alter from original to include time limit
#print fCMD "\thsearch addseq=random nreps=1 rseed=$PAUPseed swap=TBR multrees=no dstatus=NONE;\n";
if ($time_limit > 0) {
    # kliu testing
    #print "kliu time limit is $time_limit \n";

    print fCMD "\thsearch addseq=random nreps=1 rseed=$PAUPseed swap=TBR multrees=no dstatus=NONE timelimit=$time_limit ;\n";
}
else {
    print fCMD "\thsearch addseq=random nreps=1 rseed=$PAUPseed swap=TBR multrees=no dstatus=NONE;\n";
}

print fCMD "\tsavetrees file=$treefile format=nexus replace;\n\n";

for (1..$NumChar) {		#Create array containing numbers of all characters
  push @MasterCharString, $_;
}

for (1..$NumRepl) {
  @InclList = (); 
  @CharString = @MasterCharString;
  
  for (1..$NumIncl)	{
    #Randomly select required proportion of characters from array without replacement
    push @InclList, splice(@CharString,rand @CharString,1);
  }
  
  @InclList = sort {$a <=> $b} @InclList;
  
  print fCMD "\t[!][!*** Replicate #$_ ***]\n\n";
  print fCMD "\tweights $weight: @InclList;\n";
# kliu alter from original to include time limit
#  print fCMD "\thsearch start=current swap=TBR multrees=no dstatus=NONE;\n";
  if ($time_limit > 0) {
      # kliu testing
      #print "kliu time limit is $time_limit \n";


      print fCMD "\thsearch start=current swap=TBR multrees=no dstatus=NONE timelimit=$time_limit ;\n";
  }
  else {
      print fCMD "\thsearch start=current swap=TBR multrees=no dstatus=NONE;\n";
  }
  print fCMD "\tweights 1:all;\n";
# kliu alter from original to include time limit
#  print fCMD "\thsearch start=current swap=TBR multrees=no dstatus=NONE;\n";
  if ($time_limit > 0) {
      # kliu testing
      #print "kliu time limit is $time_limit \n";

      print fCMD "\thsearch start=current swap=TBR multrees=no dstatus=NONE timelimit=$time_limit ;\n";
  }
  else {
      print fCMD "\thsearch start=current swap=TBR multrees=no dstatus=NONE;\n";
  }
  print fCMD "\tsavetrees file=$treefile format=altnex append;\n\n";
  print fCMD "\tsavetrees file=$treefile.nex format=nexus append;\n\n";
}
print fCMD "\t[!][!*** Determining strict consensus tree ***]\n\n";
print fCMD "\tgettrees file=$treefile allblocks=yes warntree=no;\n";
print fCMD "\t\tfilter best=yes;\n";
print fCMD "\t\tcontree all / strict=yes treefile=$contreefile replace;\n";
print fCMD "\n\tlog stop;\n";
print fCMD "end;\n\n";
print fCMD "quit warntsave=no;\n";
close fCMD;

