[Swift-commit] r2849 - usertools/cio/science/blast
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Thu Apr 9 14:25:10 CDT 2009
Author: aespinosa
Date: 2009-04-09 14:25:09 -0500 (Thu, 09 Apr 2009)
New Revision: 2849
Added:
usertools/cio/science/blast/tc.bgp
Removed:
usertools/cio/science/blast/tc.data
Modified:
usertools/cio/science/blast/blast.swift
usertools/cio/science/blast/mapper-in.sh
usertools/cio/science/blast/mapper-out.sh
usertools/cio/science/blast/measure-runblast.sh
usertools/cio/science/blast/mockblast.sh
usertools/cio/science/blast/readseq.rb
usertools/cio/science/blast/runblast.sh
Log:
Test framework for MTDM and vanilla+bgp_enhancement swift
Modified: usertools/cio/science/blast/blast.swift
===================================================================
--- usertools/cio/science/blast/blast.swift 2009-04-08 22:38:45 UTC (rev 2848)
+++ usertools/cio/science/blast/blast.swift 2009-04-09 19:25:09 UTC (rev 2849)
@@ -24,13 +24,23 @@
cat @filenames(out.out) stdout=@sum;
}
-BlastDatabase pir[] <filesys_mapper;location="common", pattern="UNIPROT_for_blast_14.0.seq*">;
-BlastResult out[] <ext;exec="mapper-out.sh">;
-BlastQuery input[] <ext;exec="mapper-in.sh">;
+string db=@arg("db");
+string db_prefix=@arg("db_pref");
+string start=@arg("start");
+string end=@arg("end");
+string limit=@arg("limit");
+string runid=@arg("run");
+BlastDatabase pir[] <filesys_mapper;location=db_prefix,
+ pattern=@strcat(db, "*")>;
+BlastResult out[] <ext;exec="mapper-out.sh", a=start, b=end,
+ d=runid, l=limit>;
+BlastQuery input[] <ext;exec="mapper-in.sh", a=start, b=end,
+ d=runid, l=limit>;
+
foreach data,i in input {
(out[i]) = blastall(data, pir);
}
-BlastSummary sum <"summary.out">;
+BlastSummary sum <single_file_mapper;file=@strcat(runid,"/summary.out")>;
sum = summarize(out);
Modified: usertools/cio/science/blast/mapper-in.sh
===================================================================
--- usertools/cio/science/blast/mapper-in.sh 2009-04-08 22:38:45 UTC (rev 2848)
+++ usertools/cio/science/blast/mapper-in.sh 2009-04-09 19:25:09 UTC (rev 2849)
@@ -1,18 +1,21 @@
#!/bin/bash
-#while getopts ":a:b:d:" options; do
-# case $options in
-# a) export a=$OPTARG ;;
-# b) export b=$OPTARG ;;
-# d) export d=$OPTARG ;;
-# *) exit 1;;
-# esac
-#done
+while getopts ":a:b:d:l:" options; do
+ case $options in
+ a) export a=$OPTARG ;;
+ b) export b=$OPTARG ;;
+ d) export d=$OPTARG ;;
+ l) export l=$OPTARG ;;
+ *) exit 1;;
+ esac
+done
-export a=1
-export b=2
-export d=0000
-
-ls /disks/ci-gpfs/swift/blast/pir/input/$d/*.qry | \
- perl -wnl -e '$. >= $ENV{"a"} and $. <= $ENV{"b"}
- and print "[", $.-$ENV{"a"}, "] $_";'
+for (( i = a; i <= b; i++ )); do
+ seqid=`printf %07d $i`
+ if [[ $l == "l" ]]; then
+ prefix=$d/`printf %04d $(( i / 1000 ))`
+ else
+ prefix=$d
+ fi
+ echo [$(($i-a))] $prefix/SEQ$seqid\_*.qry
+done
Modified: usertools/cio/science/blast/mapper-out.sh
===================================================================
--- usertools/cio/science/blast/mapper-out.sh 2009-04-08 22:38:45 UTC (rev 2848)
+++ usertools/cio/science/blast/mapper-out.sh 2009-04-09 19:25:09 UTC (rev 2849)
@@ -1,22 +1,26 @@
#!/bin/bash
-#while getopts ":a:b:d:" options; do
-# case $options in
-# a) export a=$OPTARG ;;
-# b) export b=$OPTARG ;;
-# d) export d=$OPTARG ;;
-# *) exit 1;;
-# esac
-#done
-export a=1
-export b=2
-export d=0000
+while getopts ":a:b:d:l:" options; do
+ case $options in
+ a) export a=$OPTARG ;;
+ b) export b=$OPTARG ;;
+ d) export d=$OPTARG ;;
+ l) export l=$OPTARG ;;
+ *) exit 1;;
+ esac
+done
-ls /disks/ci-gpfs/swift/blast/pir/input/$d/*.qry | \
- perl -wnl -e '$. >= $ENV{"a"} and $. <= $ENV{"b"}
- and s/qry/out/ and s/input/output/
- and print "[", $.-$ENV{"a"}, "].out $_";'
-ls /disks/ci-gpfs/swift/blast/pir/input/$d/*.qry | \
- perl -wnl -e '$. >= $ENV{"a"} and $. <= $ENV{"b"}
- and s/qry/err/ and s/input/error/
- and print "[", $.-$ENV{"a"}, "].err $_";'
+
+for (( i = a; i <= b; i++ )); do
+ seqid=`printf %07d $i`
+ if [[ $l == "l" ]]; then
+ prefix=$d/`printf %04d $(( i / 1000 ))`
+ else
+ prefix=$d
+ fi
+ if [ ! -d $prefix ]; then
+ mkdir -p $prefix
+ fi
+ echo [$(($i-a))].out $prefix/SEQ$seqid.out
+ echo [$(($i-a))].err $prefix/SEQ$seqid.err
+done
Modified: usertools/cio/science/blast/measure-runblast.sh
===================================================================
--- usertools/cio/science/blast/measure-runblast.sh 2009-04-08 22:38:45 UTC (rev 2848)
+++ usertools/cio/science/blast/measure-runblast.sh 2009-04-09 19:25:09 UTC (rev 2849)
@@ -1 +1,34 @@
-./swiftblast.sh - UNIPROT_for_blast_14.0.seq 100001 100010
+#!/bin/bash
+
+PATH=$PATH:$CIOROOT/bin
+
+set_vanilla()
+{
+ cat > ~/bin/swift << EOF
+#!/bin/bash
+/home/falkon/swift_vanilla/cog/modules/swift/dist/swift-svn/bin/swift $@
+EOF
+}
+
+set_mtdm()
+{
+ cat > ~/bin/swift << EOF
+#!/bin/bash
+/home/falkon/swift_working/cog/modules/swift/dist/swift-svn/bin/swift $@
+EOF
+}
+
+NSIZE="64 128 256 512 1024"
+for i in $NSIZE; do
+ $CIOROOT/bin/falkon-start.sh default $i 60 4 1 4
+ set_vanilla
+ cobalt_id=`tail -1 $HOME/.falkonjobs | awk '{print $1}'`
+ ./runblast.sh . common/UNIPROT_for_blast_14.0.seq \
+ 100001 104000 l out.run_`printf %03d $i`
+ set_mtdm
+ falkon_id=`tail -1 $HOME/.falkonjobs | awk '{print $2}'`
+ /home/zzhang/DHT/bin/treebroadcast.sh $falkon_id common/common.tar
+ ./runblast.sh . common/UNIPROT_for_blast_14.0.seq \
+ 100001 104000 l out.run_`printf %03d $i`
+ qdel $cobalt_id
+done
Modified: usertools/cio/science/blast/mockblast.sh
===================================================================
--- usertools/cio/science/blast/mockblast.sh 2009-04-08 22:38:45 UTC (rev 2848)
+++ usertools/cio/science/blast/mockblast.sh 2009-04-09 19:25:09 UTC (rev 2849)
@@ -21,21 +21,24 @@
BLOCKS=1310272
-dd if=$query of=/dev/null bs=$BLOCKS #2> /dev/null
+dd=/fuse/bin/dd
+ls=/fuse/bin/ls
+$dd if=$query of=/dev/null bs=$BLOCKS #2> /dev/null
+
for i in $db; do
if [ -h $i ]; then
truefile=`readlink $i`
else
truefile=$i
fi
- size=`ls --block-size=$BLOCKS -s $truefile | cut -f 1 -d ' '`
+ size=`$ls --block-size=$BLOCKS -s $truefile | cut -f 1 -d ' '`
count=$(( size / 4))
if [ $count -eq 0 ]; then
count=$size
fi
echo $count
- dd if=$i of=/dev/null bs=$BLOCKS count=$count #2> /dev/null
+ $dd if=$i of=/dev/null bs=$BLOCKS count=$count #2> /dev/null
done
sleep 41
-dd if=/dev/zero of=$output bs=15812 count=1 #2> /dev/null
+$dd if=/dev/zero of=$output bs=15812 count=1 #2> /dev/null
Modified: usertools/cio/science/blast/readseq.rb
===================================================================
--- usertools/cio/science/blast/readseq.rb 2009-04-08 22:38:45 UTC (rev 2848)
+++ usertools/cio/science/blast/readseq.rb 2009-04-09 19:25:09 UTC (rev 2849)
@@ -21,7 +21,7 @@
x =~ />(.*)\n/
seqname = $1
if seq_start <= fasta_db.lineno
- dir = pad == "l" ? prefix +
+ dir = pad == "l" ? prefix +
sprintf("/%04d", fasta_db.lineno / 1000) : prefix
fname = sprintf "SEQ%07d_%s.qry", fasta_db.lineno, seqname
FileUtils.mkdir_p dir
Modified: usertools/cio/science/blast/runblast.sh
===================================================================
--- usertools/cio/science/blast/runblast.sh 2009-04-08 22:38:45 UTC (rev 2848)
+++ usertools/cio/science/blast/runblast.sh 2009-04-09 19:25:09 UTC (rev 2849)
@@ -1,18 +1,25 @@
#!/bin/bash
-# Script: swiftblast.sh [blast_db] [start_seq] [end_seq] [outdir]
+# Script: swiftblast.sh [falkon_id] [blast_db] [start_seq] [end_seq] [outdir]
# Invokes the swift workflow of reciprocal blast
if [ $# -lt 3 ]; then
cat << EOF
ERROR : too few arguments
-Usage : $0 [blast_db] [start_seq] [end_seq] [outdir]
+Usage : $0 [falkon_id][blast_db] [start_seq] [end_seq] [outdir]
Example : Reciprocal blast on the 100th to 200th sequences
of UNIPRT.seq and dump results to run_100:
$0 UNIPROT.seq 100 200 run_100
EOF
fi
+if [ -z $CIOROOT ]; then
+ echo "ERROR: CIOROOT not defined"
+ exit 1
+fi
+
+PATH=$PATH:$CIOROOT/bin
+
FALKON_ID=$1
BLAST_DB=$2
START_SEQ=$3
@@ -20,18 +27,49 @@
# Limit to 1k sequences per dir "l"
LIMIT=$5
+RUNID=$6 # Default output directory of everything!
BLASTROOT=$CIOROOT/science/blast
+if [ ! -d $RUNID ]; then
+ mkdir -p $RUNID
+fi
+
# Extract sequences
-ruby readseq.rb $BLAST_DB $START_SEQ $END_SEQ seqs
-if [ $FALKON_ID != "-" ]; then # Real Falkon job
- sleep 0
- # Regenerate tc.data and sites.xml @BGP@
+ruby readseq.rb `dirname $BLAST_DB`/fasta/`basename $BLAST_DB` \
+ $START_SEQ $END_SEQ $RUNID $LIMIT
+
+if [ $FALKON_ID == "." ]; then
+ FALKON_ID=`tail -1 $HOME/.falkonjobs | awk '{print $2}'`
+fi
+
+if [ $FALKON_ID == "-" ]; then # Real Falkon job
+ tcfile=tc.local
+ cp $tcfile tc.data
else
- sleep 0
+ tcfile=tc.bgp
+ gentcdata 32 < $tcfile > tc.data
fi
-# Run swift:
-#swift -sites.file sites.xml -tc.file tc.data $BLASTROOT/blast.swift
+
+if [ $FALKON_ID == "-" ]; then
+ echo "local"
+ swift -sites.file ./sites.xml -tc.file tc.data blast.swift \
+ -db=`basename $BLAST_DB` -db_pref=`dirname $BLAST_DB`\
+ -limit=$LIMIT -start=$START_SEQ -end=$END_SEQ \
+ -run=$RUNID
+else # Real Falkon job
+ fjob=`echo $FALKON_ID|awk '{printf "%.4d",$1}'`
+ nodes=`cat $HOME/.falkonjobs | awk "\\$2==\"$fjob\" {print \\$4}"`
+ fdir=/home/falkon/users/$USER
+ if [ ! -d $fdir/$fjob ]; then
+ echo $0: Falkon job id $fjob does not exist in $fdir. Last 5 jobs there are:
+ (cd $fdir; ls -1 | tail -5)
+ exit 1
+ fi
+ /usr/bin/time -p swift_bgp.sh $fjob $nodes $BLASTROOT/blast.swift \
+ -db=`basename $BLAST_DB` -db_pref=`dirname $BLAST_DB`\
+ -limit=$LIMIT -start=$START_SEQ -end=$END_SEQ \
+ -run=$RUNID >&swift_out.$RUNID
+fi
Copied: usertools/cio/science/blast/tc.bgp (from rev 2835, usertools/cio/science/blast/tc.data)
===================================================================
--- usertools/cio/science/blast/tc.bgp (rev 0)
+++ usertools/cio/science/blast/tc.bgp 2009-04-09 19:25:09 UTC (rev 2849)
@@ -0,0 +1,24 @@
+#This is the transformation catalog.
+#
+#It comes pre-configured with a number of simple transformations with
+#paths that are likely to work on a linux box. However, on some systems,
+#the paths to these executables will be different (for example, sometimes
+#some of these programs are found in /usr/bin rather than in /bin)
+#
+#NOTE WELL: fields in this file must be separated by tabs, not spaces; and
+#there must be no trailing whitespace at the end of each line.
+#
+# sitename transformation path INSTALLED platform profiles
+localhost echo /bin/echo INSTALLED INTEL32::LINUX null
+localhost cat /bin/cat INSTALLED INTEL32::LINUX null
+localhost ls /bin/ls INSTALLED INTEL32::LINUX null
+localhost grep /bin/grep INSTALLED INTEL32::LINUX null
+localhost sort /bin/sort INSTALLED INTEL32::LINUX null
+localhost paste /bin/paste INSTALLED INTEL32::LINUX null
+
+# BLAST binaries
+communicado blastall /home/aespinosa/science/blast/ncbi.communicado/bin/blastall INSTALLED INTEL32::LINUX null
+communicado mockblast /home/aespinosa/cio/science/blast/mockblast.sh INSTALLED INTEL32::LINUX null
+ at BGP@ blastall /home/espinosa/blast/ncbi/bin/blastall INSTALLED INTEL32::LINUX null
+ at BGP@ mockblast /home/espinosa/cio/science/blast/mockblast.sh INSTALLED INTEL32::LINUX null
+ at BGP@ cat /bin/cat INSTALLED INTEL32::LINUX null
Deleted: usertools/cio/science/blast/tc.data
===================================================================
--- usertools/cio/science/blast/tc.data 2009-04-08 22:38:45 UTC (rev 2848)
+++ usertools/cio/science/blast/tc.data 2009-04-09 19:25:09 UTC (rev 2849)
@@ -1,23 +0,0 @@
-#This is the transformation catalog.
-#
-#It comes pre-configured with a number of simple transformations with
-#paths that are likely to work on a linux box. However, on some systems,
-#the paths to these executables will be different (for example, sometimes
-#some of these programs are found in /usr/bin rather than in /bin)
-#
-#NOTE WELL: fields in this file must be separated by tabs, not spaces; and
-#there must be no trailing whitespace at the end of each line.
-#
-# sitename transformation path INSTALLED platform profiles
-localhost echo /bin/echo INSTALLED INTEL32::LINUX null
-localhost cat /bin/cat INSTALLED INTEL32::LINUX null
-localhost ls /bin/ls INSTALLED INTEL32::LINUX null
-localhost grep /bin/grep INSTALLED INTEL32::LINUX null
-localhost sort /bin/sort INSTALLED INTEL32::LINUX null
-localhost paste /bin/paste INSTALLED INTEL32::LINUX null
-
-# BLAST binaries
-communicado blastall /home/aespinosa/science/blast/ncbi.communicado/bin/blastall INSTALLED INTEL32::LINUX null
-communicado mockblast /home/aespinosa/cio/science/blast/mockblast.sh INSTALLED INTEL32::LINUX null
-surveyor blastall /home/espinosa/science/blast/ncbi.communicado/bin/blastall INSTALLED INTEL32::LINUX null
-surveyor mockblast /home/espinosa/cio/science/blast/mockblast.sh INSTALLED INTEL32::LINUX null
More information about the Swift-commit
mailing list