[Swift-commit] r7238 - SwiftApps/Swift-MapRed/mapred_combiner_V4

ketan at ci.uchicago.edu ketan at ci.uchicago.edu
Mon Oct 28 13:57:59 CDT 2013


Author: ketan
Date: 2013-10-28 13:57:59 -0500 (Mon, 28 Oct 2013)
New Revision: 7238

Added:
   SwiftApps/Swift-MapRed/mapred_combiner_V4/blastreduce.swift
Log:
 

Added: SwiftApps/Swift-MapRed/mapred_combiner_V4/blastreduce.swift
===================================================================
--- SwiftApps/Swift-MapRed/mapred_combiner_V4/blastreduce.swift	                        (rev 0)
+++ SwiftApps/Swift-MapRed/mapred_combiner_V4/blastreduce.swift	2013-10-28 18:57:59 UTC (rev 7238)
@@ -0,0 +1,91 @@
+type file;
+type fileptr;
+type fastaseq;
+type query;
+
+type script;
+type exec;
+
+type headerfile;
+type indexfile;
+type seqfile;
+type output;
+type database {
+  headerfile phr;
+  indexfile pin;
+  seqfile psq;
+}
+
+/* Blast app related declarations
+output blast_output_file <single_file_mapper;file = arg("o", "output.html")>;
+output out[] <ext; exec = "outputmapper.sh", n = num_partitions>;
+
+app (output o) blastapp(query i, fastaseq d, string p, string e, string f, database db){
+  blastall "-p" p "-i" filename(i) "-d" filename(d) "-o" filename(o) "-e" e "-T" "-F" f;
+}
+
+app (output o) blastmerge(output o_frags[]){
+  blastmerge filename(o) filenames(o_frags);
+}
+*/
+
+app (database out) formatdb (fastaseq i){
+ formatdb "-i" filename(i);
+}
+
+app (fastaseq out[]) split_database (fastaseq d, string n){
+ fastasplitn filename(d) n;
+}
+
+app (fileptr out, file err) map_data (script wrapper, script map, query _queryfile, fastaseq _fastafile, string resfilename, string p, string e, string f, database db){
+  bash @wrapper @map p filename(_queryfile) filename(_fastafile) resfilename e f stdout=@out stderr=@err;
+}
+
+app (fileptr out) aggregate (script a, fileptr map_results[]) {
+  bash "-c" @a @map_results stdout=@out;
+}
+
+app (file out, file err) reduce_mid (script wrapper, script reducer, string cmd, fileptr all_map){
+  mid_bash @wrapper @reducer cmd @all_map stdout=@out stderr=@err;
+}
+
+app (file out, file err) reduce_local (script wrapper, script reducer, string cmd, file site_results[]){
+  loc_bash @wrapper @reducer cmd @site_results stdout=@out stderr=@err;
+}
+
+string num_partitions = arg("n", "10");
+fastaseq partition[] <ext;exec = "examples/splitmapper.sh", n = num_partitions>;
+database formatdbout[] <ext; exec = "examples/formatdbmapper.sh", n = num_partitions>;
+
+script map_wrapper    <"map_wrapper.sh">;
+script red_wrapper    <"reduce_wrapper.sh">;
+script aggr           <"aggregate.sh">;
+script map_func       <"examples/blast/map_blast.sh">;
+script red_func       <"examples/reduce_sort.sh">;
+
+exec exeblastall <"examples/blastall">;
+exec exeblastmerge <"examples/blastmerge">;
+
+fileptr map_out[] <simple_mapper; prefix="map", suffix=".out">;
+file  map_err[] <simple_mapper; prefix="map", suffix=".err">;
+
+fastaseq dbin <single_file_mapper;file = arg("d", "/home/maheshwari/Swift-MapRed/blast/nr")>;
+query query_file <single_file_mapper;file = arg("i", "sequence.seq")>;
+
+partition=split_database(dbin, num_partitions);
+
+foreach item,i in partition {
+  formatdbout[i]=formatdb(item);
+  (map_out[i], map_err[i]) = map_data (map_wrapper, map_func, query_file, item, "tmpout", "blastp", "0.1", "F", formatdbout[i]);
+}
+
+fileptr all_map <"all_map.out">;
+#all_map = aggregate(aggr, map_out);
+file redsite_out[] <simple_mapper; prefix="reduce", suffix=".out">;
+file redsite_err[] <simple_mapper; prefix="reduce", suffix=".err">;
+#(redsite_out[1], redsite_err[1]) = reduce_mid (red_wrapper, red_func, "local", all_map);
+// Final global reduce
+file final_out <"final_result">;
+file final_err <"final_errors">;
+#(final_out, final_err) = reduce_local (red_wrapper, red_func, "naive", redsite_out);
+




More information about the Swift-commit mailing list