From noreply at svn.ci.uchicago.edu Tue Jun 1 17:03:57 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 1 Jun 2010 17:03:57 -0500 (CDT) Subject: [Swift-commit] r3339 - provenancedb/apps/oops Message-ID: <20100601220357.2E2FF9CCAB@vm-125-59.ci.uchicago.edu> Author: lgadelha Date: 2010-06-01 17:03:56 -0500 (Tue, 01 Jun 2010) New Revision: 3339 Modified: provenancedb/apps/oops/oops_extractor.sh Log: Modified: provenancedb/apps/oops/oops_extractor.sh =================================================================== --- provenancedb/apps/oops/oops_extractor.sh 2010-05-31 10:57:08 UTC (rev 3338) +++ provenancedb/apps/oops/oops_extractor.sh 2010-06-01 22:03:56 UTC (rev 3339) @@ -1,19 +1,15 @@ #!/bin/bash # Annotation extractor for the OOPS application -# Author: Luiz Gadelha -# Date: 2010-05-25 # # The directory $PROTESTS/swift-logs contains symbolic links to # OOPS' Swift logs. PROVDB_HOME=~/provenancedb PROTESTS_HOME=~/protests -workflow_id=$1 source $PROVDB_HOME/etc/provenance.config - # provdb_imported records runs already imported to the provenance database cd $PROTESTS_HOME if [ ! -a provdb_imported ]; then @@ -23,6 +19,7 @@ for i in `ls | grep run.loops`; do + cd $PROTESTS_HOME if ! grep $i provdb_imported; then if grep "Swift finished with no errors" $i/psim.loops-*.log; then cd swift-logs @@ -37,21 +34,35 @@ echo $i >> provdb_imported # annotate workflows with their oops runid - oops_run_id=`echo $i | awk -F . '{print $3}'` - log_filename=`ls $i | grep psim.loops- | grep "\."log$` - workflow_id=`echo "select workflow_id from known_workflows where workflow_log_filename like '%$log_filename%'" | $SQLCMD -t | awk '{print $1}'` - echo "insert into annotations values ('$workflow_id','oops_run_id','$oops_run_id');" | $SQLCMD + OOPS_RUN_ID=`echo $i | awk -F . '{print $3}'` + LOG_FILENAME=`ls $i | grep psim.loops- | grep "\."log$` + WORKFLOW_ID=`echo "select workflow_id from known_workflows where workflow_log_filename like '%$LOG_FILENAME%'" | $SQLCMD -t | awk '{print $1}'` + echo "insert into annotations values ('$WORKFLOW_ID','oops_run_id','$OOPS_RUN_ID');" | $SQLCMD # annotate dataset with scientific parameters passed to doLoopRound - #echo "\pset border 0;" > query.sql - #echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='doLoopRound' and param_name='modelData' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%';" >> query.sql - #$SQLCMD -t -A -F " " -f query.sql -o result.txt - #dataset_id=`awk '{print $1}' result.txt` - #filename=`awk '{print $2}' result.txt | sed 's/file:\/\/localhost\///g'` - #TODO extract name-value pairs + + # TODO: check why it is not recording doLoopRound in processes_in_workflows + #echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames,processes_in_workflows where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='loopModel' and param_name='d' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%' and processes_in_workflows.process_id=dataset_usage.process_id and processes_in_workflows.workflow_id='$WORKFLOW_ID';" > query.sql + + # using this as a workaround for the problem above, it will return nSim identical tuples + echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames,processes_in_workflows where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='loopModel' and param_name='d' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%' and processes_in_workflows.process_id=dataset_usage.process_id and processes_in_workflows.workflow_id='$WORKFLOW_ID';" > query.sql + + $SQLCMD -t -A -F " " -f query.sql -o result.txt + + #DATASET_ID=`awk '{print $1}' result.txt` + DATASET_ID=`awk '{if (NR==1) print $1}' result.txt` + + #FILENAME=`awk '{print $2}' result.txt | sed 's/file:\/\/localhost\///g'` + FILENAME=`awk '{if (NR==1) print $2}' result.txt | sed 's/file:\/\/localhost\///g'` + + cd $PROTESTS_HOME/run.loops.$OOPS_RUN_ID + + while read line + do + NAME=`echo $line | awk 'BEGIN { FS = "=" }; {print $1}'` + VALUE=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}'` + echo "insert into annotations values ('$DATASET_ID', '$NAME', '$VALUE');" | $SQLCMD + done < $FILENAME fi fi done - - - From noreply at svn.ci.uchicago.edu Fri Jun 4 12:25:11 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 4 Jun 2010 12:25:11 -0500 (CDT) Subject: [Swift-commit] r3340 - trunk/src/org/globus/swift Message-ID: <20100604172511.3EECA9CC90@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-04 12:25:11 -0500 (Fri, 04 Jun 2010) New Revision: 3340 Modified: trunk/src/org/globus/swift/ Log: Set svn:ignore Property changes on: trunk/src/org/globus/swift ___________________________________________________________________ Name: svn:ignore + .ignore language From noreply at svn.ci.uchicago.edu Fri Jun 4 12:26:42 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 4 Jun 2010 12:26:42 -0500 (CDT) Subject: [Swift-commit] r3341 - trunk/src/org/globus/swift/parser Message-ID: <20100604172642.8D5189CC90@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-04 12:26:42 -0500 (Fri, 04 Jun 2010) New Revision: 3341 Modified: trunk/src/org/globus/swift/parser/ Log: Set svn:ignore Property changes on: trunk/src/org/globus/swift/parser ___________________________________________________________________ Name: svn:ignore + .ignore SwiftScriptLexer.java SwiftScriptParserTokenTypes.txt SwiftScriptParserTokenTypes.java SwiftScriptParser.java From noreply at svn.ci.uchicago.edu Fri Jun 4 12:28:21 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 4 Jun 2010 12:28:21 -0500 (CDT) Subject: [Swift-commit] r3342 - trunk/src/org/globus/swift/parser Message-ID: <20100604172821.5EE5B9CC90@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-04 12:28:21 -0500 (Fri, 04 Jun 2010) New Revision: 3342 Modified: trunk/src/org/globus/swift/parser/ Log: This is whitespace-sensitive Property changes on: trunk/src/org/globus/swift/parser ___________________________________________________________________ Name: svn:ignore - .ignore SwiftScriptLexer.java SwiftScriptParserTokenTypes.txt SwiftScriptParserTokenTypes.java SwiftScriptParser.java + .ignore SwiftScriptLexer.java SwiftScriptParserTokenTypes.txt SwiftScriptParserTokenTypes.java SwiftScriptParser.java From noreply at svn.ci.uchicago.edu Fri Jun 4 12:49:58 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 4 Jun 2010 12:49:58 -0500 (CDT) Subject: [Swift-commit] r3343 - trunk/libexec Message-ID: <20100604174958.1EE3E9CC90@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-04 12:49:57 -0500 (Fri, 04 Jun 2010) New Revision: 3343 Modified: trunk/libexec/ Log: Set svn:ignore Property changes on: trunk/libexec ___________________________________________________________________ Name: svn:ignore + .ignore buildid.txt version.txt From noreply at svn.ci.uchicago.edu Tue Jun 15 09:21:30 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 09:21:30 -0500 (CDT) Subject: [Swift-commit] r3344 - text Message-ID: <20100615142130.0D0209CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 09:21:29 -0500 (Tue, 15 Jun 2010) New Revision: 3344 Added: text/parco10submission/ Log: Make copy for parco Copied: text/parco10submission (from rev 3343, text/hpdc09submission) From noreply at svn.ci.uchicago.edu Tue Jun 15 09:23:30 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 09:23:30 -0500 (CDT) Subject: [Swift-commit] r3345 - text/parco10submission Message-ID: <20100615142330.A3E129CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 09:23:30 -0500 (Tue, 15 Jun 2010) New Revision: 3345 Added: text/parco10submission/lx.zsh Log: Add lx.zsh Added: text/parco10submission/lx.zsh =================================================================== --- text/parco10submission/lx.zsh (rev 0) +++ text/parco10submission/lx.zsh 2010-06-15 14:23:30 UTC (rev 3345) @@ -0,0 +1,149 @@ +#!/bin/zsh + +# Guide: +# Use +f to force a recompile. +# Use +p & +d to create PDF & PS files. +# Modify DOC to change the relevant tex file. +# Modify TMP & BIB to use different temporary storage. + +DEFAULTDOC="paper" +COMPILER="pdflatex" + +NEEDBIB="yes" +MAKE_PS="no" +MAKE_PDF="no" +FORCE="no" + +TMP=.latex.out +BIB=.bibtex.out + +for arg in ${*} + do + case ${arg} in + ("+f") FORCE="yes" ;; + ("+p") MAKE_PS="yes" ;; + ("+d") MAKE_PDF="yes" ;; + ("-b") NEEDBIB="no" ;; + ("+b") NEEDBIB="yes" ;; + (*) DOC="${arg}" ;; + esac +done + +[[ ${DOC} == "" ]] && DOC=${DEFAULTDOC} +[[ ${MAKE_PDF} == "yes" ]] && MAKE_PS="yes" + +clean() +{ + local t + t=( core* *.aux *.bbl *.blg *.dvi *.latex* *.log *.pdf *.ps ) + t=( ${t} *.toc *.lot *.lof .*.out ) + if [[ ${#t} > 0 ]] + then + rm -fv ${t} + else + print "Nothing to clean." + fi + return 0 +} + +scan() +{ + [[ $1 == "" ]] && return + typeset -g -a $1 + local i=1 + while read T + do + eval "${1}[${i}]='${T}'" + (( i++ )) + done +} + +biblio() +{ + if [[ -f ${DOC}.bbl && + ${DOC}.bbl -nt $( readlink Wozniak.bib ) ]] + then + rm ${DOC}.bbl + fi + if { bibtex ${DOC} > ${BIB} } + then + printf "." + ${COMPILER} ${DOC} > /dev/null + printf "." + ${COMPILER} ${DOC} > ${TMP} + printf "." + WARNS=( $( grep "Warning--" ${BIB} ) ) + if (( ${#WARNS} > 0 )) + then + printf "\n" + print "Bibtex:" + print ${WARNS} + fi + else + printf "\n" + cat ${BIB} + fi +} + +printable() +{ + if [[ ! -f ${DOC}.ps || + ${DOC}.dvi -nt ${DOC}.ps ]] + then + if [[ ${MAKE_PS} == "yes" ]] + then + dvips -q -o ${DOC}.ps -t Letter ${DOC}.dvi + printf "!" + fi + fi + + if [[ ! -f ${DOC}.pdf || + ${DOC}.ps -nt ${DOC}.pdf ]] + then + if [[ ${MAKE_PDF} == "yes" ]] + then + ps2pdf ${DOC}.ps ${DOC}.pdf + printf "!" + fi + fi +} + +[[ ${DOC} == "clean" ]] && clean && exit + +grep -h includegraphics *.tex | scan A +EPSS=() +for line in ${A} +do + EPS=( $( print ${${line/'{'/ }/'}'/ } ) ) + EPS=${EPS[2]}.eps + EPSS=( ${EPSS} ${EPS} ) +done +for EPS in ${EPSS} + do + [[ ${EPS} -nt ${DOC}.dvi ]] && FORCE="yes" +done + +if [[ ! -f ${DOC}.dvi || + -f error || + ${DOC}.tex -nt ${DOC}.dvi || + lx.zsh -nt ${DOC}.dvi || + dht.bib -nt ${DOC}.dvi || + $( readlink Wozniak.bib ) -nt ${DOC}.dvi || + ${FORCE} == "yes" ]] + then + if { ${COMPILER} --interaction nonstopmode ${DOC} > ${TMP} } + then + printf "OK" + rm -f error + [[ ${NEEDBIB} == "yes" ]] && biblio + else + printf "Error! \n" + egrep '^l.|^!|argument' ${TMP} + touch error + fi +fi +[[ ${MAKE_PS} == "yes" ]] && printable +printf "\n" +grep "LaTeX Warning:" ${TMP} + +return 0 Property changes on: text/parco10submission/lx.zsh ___________________________________________________________________ Name: svn:executable + * From noreply at svn.ci.uchicago.edu Tue Jun 15 09:24:07 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 09:24:07 -0500 (CDT) Subject: [Swift-commit] r3346 - text/parco10submission Message-ID: <20100615142407.4A0179CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 09:24:07 -0500 (Tue, 15 Jun 2010) New Revision: 3346 Removed: text/parco10submission/Makefile text/parco10submission/makepaper Log: Drop old builders Deleted: text/parco10submission/Makefile =================================================================== --- text/parco10submission/Makefile 2010-06-15 14:23:30 UTC (rev 3345) +++ text/parco10submission/Makefile 2010-06-15 14:24:07 UTC (rev 3346) @@ -1,4 +0,0 @@ -all: paper.pdf - -paper.pdf: - pdflatex paper.latex \ No newline at end of file Deleted: text/parco10submission/makepaper =================================================================== --- text/parco10submission/makepaper 2010-06-15 14:23:30 UTC (rev 3345) +++ text/parco10submission/makepaper 2010-06-15 14:24:07 UTC (rev 3346) @@ -1,4 +0,0 @@ -pdflatex paper.latex -bibtex paper -pdflatex paper.latex -pdflatex paper.latex From noreply at svn.ci.uchicago.edu Tue Jun 15 09:54:17 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 09:54:17 -0500 (CDT) Subject: [Swift-commit] r3348 - text/parco10submission Message-ID: <20100615145417.312B69CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 09:54:17 -0500 (Tue, 15 Jun 2010) New Revision: 3348 Modified: text/parco10submission/paper.bib text/parco10submission/paper.tex Log: Fixes to get it to compile Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2010-06-15 14:25:00 UTC (rev 3347) +++ text/parco10submission/paper.bib 2010-06-15 14:54:17 UTC (rev 3348) @@ -13,14 +13,12 @@ @incollection{GCRPNOVA, author = {Yong Zhao and Ioan Raicu and Ian Foster an Mihael Hategan and Veronika Nefedova and Mike Wilde}, - title = {{Scalable and Reliable Scientific Computations in Grid Environments}} + title = {{Scalable and Reliable Scientific Computations in Grid Environments}}, booktitle = {Grid Computing Research Progress}, isbn = {978-1-60456-404-4}, - pages = {TODO}, publisher = {Nova Publisher}, year = 2008, - editor = (TODO}, - url = {http://people.cs.uchicago.edu/~iraicu/publications/2008_NOVA08_book-chapter_Swift.pdf), + url = {http://people.cs.uchicago.edu/~iraicu/publications/2008_NOVA08_book-chapter_Swift.pdf}, } @inproceedings{SWIFTIWSW2007, @@ -183,7 +181,7 @@ @inproceedings{mds, title = {{Grid Information Services for Distributed Resource Sharing}}, - author = {Czajkowski K and Fitzgerald S and Foster I and Kesselman C}, + author = {Czajkowski K and Fitzgerald S and Foster I and Kesselman C}, booktitle = {Proceedings of the Tenth IEEE International Symposium on High-Performance Distributed Computing (HPDC-10), IEEE Press}, month = {August}, year = 2001 @@ -191,8 +189,8 @@ @inproceedings{rls, title = {{Giggle: A Framework for Constructing Sclable Replica Location Services}}, - author = {Chervenak A and Deelman E and Foster I and Guy L and Hoschek W and Iamnitchi A and Kesselman C and - Kunst P and Ripeanu M and Schwartzkopf B and Stockinger H and Stockinger K and Tierney B}, + author = {Chervenak A and Deelman E and Foster I and Guy L and Hoschek W and Iamnitchi A and Kesselman C and + Kunst P and Ripeanu M and Schwartzkopf B and Stockinger H and Stockinger K and Tierney B}, booktitle = {Proceedings of Supercomputing 2002 (SC2002)}, month = {November}, year = 2002 Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 14:25:00 UTC (rev 3347) +++ text/parco10submission/paper.tex 2010-06-15 14:54:17 UTC (rev 3348) @@ -8,7 +8,7 @@ \bibliographystyle{abbrv} % for ACM SIGS style \title{Swift - a language for distributed -parallel scripting} + parallel scripting} % draft - contact benc at ci.uchicago.edu @@ -227,7 +227,7 @@ \subsection{Language basics} A Swift script describes data, application components, invocations -of applications components, and the inter-relations (data flow) +of applications components, and the inter-relations (data flow) between those invocations. Data is represented in a script by strongly-typed single-assignment @@ -237,7 +237,7 @@ type can be either a \emph{primitive type} or a \emph{mapped type}. Swift provides a fixed set of primitive types, such as \emph{integer} or \emph{string}. A mapped type indicates that the actual data does not -reside in CPU addressable memory (as it would in conventional +reside in CPU addressable memory (as it would in conventional programming languages), but in POSIX-like files. Composite types are further subdivided into \emph{structures} and \emph{arrays}. Structures are similar in most respects to structure types in other languages. One @@ -246,7 +246,7 @@ mapped types as \emph{datasets}. Mapped type and composite type variable declarations can be annotated with a -\emph{mapping} descriptor indicating the file(s) that make up that \emph{dataset}. +\emph{mapping} descriptor indicating the file(s) that make up that \emph{dataset}. For example, the following line declares a variable named \verb|photo| with type \verb|image|. It additionally declares that the data for this variable is stored in a single file named \verb|shane.jpeg| @@ -256,8 +256,8 @@ \end{verbatim} Conceptually, a parallel can be drawn between Swift \emph{mapped} variables -and Java \emph{reference types}. In both cases there is no syntactic distinction -between \emph{primitive types} and \emph{mapped} types or +and Java \emph{reference types}. In both cases there is no syntactic distinction +between \emph{primitive types} and \emph{mapped} types or \emph{reference types} respectively. Additionally, the semantic distinction is also kept to a minimum. @@ -587,8 +587,8 @@ \label{LanguageEnvironment} A SwiftScript \verb|app| declaration describes how a component -program is invoked. In order to ensure the correctness of the -Swift model, the environment in which programs are executed needs +program is invoked. In order to ensure the correctness of the +Swift model, the environment in which programs are executed needs to be constrained. A program is invoked in its own working directory; in that working @@ -699,8 +699,8 @@ \begin{verbatim} - + /home/benc/swifttest @@ -828,7 +828,7 @@ stats - the reliability of coasters vs clusters on a range of sites (eg a bunch of osg engage and TG sites). also could do: diagram showing clustering/coasters vs some plain gram submission - CNARI app with 3s -jobs shows this in an extreme way. Either show such a graph here or in +jobs shows this in an extreme way. Either show such a graph here or in CNARI app section. TODO: comment on how this relates to Falkon @@ -890,7 +890,8 @@ (out,err) = blastall(i, pir); \end{verbatim} -The trick here is that blastall reads takes the prefix name of the database files that it will read (.phr, .seq and .pin files). So i made a dummy file called "UNIPROT_for_blast_14.0.seq" to satisfy the data dependency . So here is the final list of my files: +The trick here is that blastall reads takes the prefix name of the database files that it will read (.phr, .seq and .pin files). +So i made a dummy file called ``{\tt UNIPROT\_for\_blast\_14.0.seq}'' to satisfy the data dependency . So here is the final list of my files: \begin{verbatim} -rw-r--r-- 1 aespinosa ci-users 0 Nov 15 13:49 UNIPROT_for_blast_14.0.seq @@ -908,7 +909,7 @@ I looked at the dock6 documentation for OSG. It looks that it recommends to transfer the datafiles to OSG sites manually via globus-url-copy. By my understanding of how swift works, it should be able to transfer my local files to the selected sites. I have yet to try this and will look more on examples in the data management side of Swift. -Do you know other users who went in this approach? The documentation has only a few examples in managing data. I'll check the swift Wiki later and see what material we have and also post this email/ notes. +Do you know other users who went in this approach? The documentation has only a few examples in managing data. I'll check the swift Wiki later and see what material we have and also post this email/ notes. \subsection{fMRI Application Example} @@ -918,7 +919,7 @@ \end{figure} \begin{verbatim} -type Study { Group g[]; } +type Study { Group g[]; } type Run { Volume v[]; } type Volume { Image img; @@ -929,15 +930,15 @@ type Subject { Volume anat; Run run[]; -} +} (Run resliced) reslice_wf ( Run r) { Run yR = reorientRun( r , "y", "n" ); Run roR = reorientRun( yR , "x", "n" ); Volume std = roR.v[1]; - AirVector roAirVec = alignlinearRun(std, roR, + AirVector roAirVec = alignlinearRun(std, roR, 12, 1000, 1000, "81 3 3"); - resliced = resliceRun( roR, roAirVec, "-o", + resliced = resliceRun( roR, roAirVec, "-o", "-k"); } @@ -949,7 +950,7 @@ } -(Run or) reorientRun (Run ir, string direction, +(Run or) reorientRun (Run ir, string direction, string overwrite) { foreach Volume iv, i in ir.v { or.v[i] = reorient (iv, direction, overwrite); @@ -972,7 +973,7 @@ by a mapper. The procedure reslice\_wf defines a compound procedure, which comprises a series of procedure calls, using variables to establish -data dependencies. +data dependencies. In the example, reslice\_wf defines a four-step pipeline computation, using variables to establish @@ -990,7 +991,7 @@ volumes. In this example we show the details of the procedure reorientRun, -which is also a compound procedure. +which is also a compound procedure. The foreach statement defines an iteration over the input run ir and applies the procedure reorient (which rotates a brain image along a certain axis) to each volume in the run to produces a @@ -1067,36 +1068,36 @@ \subsection{Molecular Dynamics with DOCK} \begin{verbatim} -(file t,DockOut tarout) dockcompute (DockIn infile, string targetlist) { - app { - rundock @infile targetlist stdout=@filename(t) @tarout; - } -} - -type params { - string ligandsfile; - string targetlist; -} - -#params pset[] ; -doall(params pset[]) -{ - foreach params,i in pset { - DockIn infile < single_file_mapper; file=@strcat("/home/houzx/dock- -run/databases/KEGG_and_Drugs/",pset[i].ligandsfile)>; - file sout ; - DockOut tout ; -# DockOut tout <"result.tar.gz">; -# sout = dockcompute(infile,pset[i].targetlist); - (sout,tout) = dockcompute(infile,pset[i].targetlist); - - } -} - -params p[]; -p = readdata("paramslist.txt"); -doall(p); +(file t,DockOut tarout) dockcompute (DockIn infile, string targetlist) { + app { + rundock @infile targetlist stdout=@filename(t) @tarout; + } +} + +type params { + string ligandsfile; + string targetlist; +} + +#params pset[] ; +doall(params pset[]) +{ + foreach params,i in pset { + DockIn infile < single_file_mapper; file=@strcat("/home/houzx/dock- +run/databases/KEGG_and_Drugs/",pset[i].ligandsfile)>; + file sout ; + DockOut tout ; +# DockOut tout <"result.tar.gz">; +# sout = dockcompute(infile,pset[i].targetlist); + (sout,tout) = dockcompute(infile,pset[i].targetlist); + + } +} + +params p[]; +p = readdata("paramslist.txt"); +doall(p); \end{verbatim} \section{Usage Experience} From noreply at svn.ci.uchicago.edu Tue Jun 15 09:58:30 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 09:58:30 -0500 (CDT) Subject: [Swift-commit] r3349 - text/parco10submission Message-ID: <20100615145830.EEA129CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 09:58:30 -0500 (Tue, 15 Jun 2010) New Revision: 3349 Modified: text/parco10submission/paper.tex Log: Shorten abstract Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 14:54:17 UTC (rev 3348) +++ text/parco10submission/paper.tex 2010-06-15 14:58:30 UTC (rev 3349) @@ -43,30 +43,14 @@ parallel computing resources provide a powerful way to get more of this type of work done faster, but using such resources imposes additional complexities. - Swift reduces these complexities with a scripting language for composing ordinary application programs (serial or parallel) into more powerful parallel applications that can be executed on distributed -resources. Applications expressed in Swift are location-independent -and automatically parallelized. - -Swift can execute scripts that perform tens of thousands of program -invocations on highly parallel resources, and handle the unreliable -and dynamic aspects of wide-area distributed resources. - -The language provides a high level representation of collections of -data and a specification of how those collections are to be mapped to -that abstract representation and processed by component -programs. Underlying this is an implementation that executes the -component programs on grids and other parallel platforms, providing -automated site selection, data management, and reliability. - +resources. We present the language, details of the implementation, application examples, measurements, and ongoing research, focusing on its importance as a distributed computing paradigm. -% TODO: DECIDE: Drop SwiftScript, use Swift throughout to refer to the language? - \end{abstract} \section{Introduction} @@ -89,8 +73,23 @@ the language simple and elegant, and minimizing any overlap with the tasks that existing scripting langauges do well. Swift regularizes and abstracts both the notion of data and process for distributed parallel -execution of application programs. +execution of application programs. Applications expressed in Swift +are location-independent and automatically parallelized by exploiting +available concurrency in the given dataflow . +Swift can execute scripts that perform tens of thousands of program +invocations on highly parallel resources, and handle the unreliable +and dynamic aspects of wide-area distributed resources. The language +provides a high level representation of collections of data and a +specification of how those collections are to be mapped to that +abstract representation and processed by component +programs. Underlying this is an implementation that executes the +component programs on grids and other parallel platforms, providing +automated site selection, data management, and reliability. + +% TODO: DECIDE: Drop SwiftScript, use Swift throughout to refer to the language? + + This paper goes into greater depth than prior publications \cite{SWIFTSWF08,SWIFTNNN} in describing the Swift language, how its implementation handles large-scale and distributed execution From noreply at svn.ci.uchicago.edu Tue Jun 15 10:06:19 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 10:06:19 -0500 (CDT) Subject: [Swift-commit] r3350 - text/parco10submission Message-ID: <20100615150619.144139CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 10:06:18 -0500 (Tue, 15 Jun 2010) New Revision: 3350 Added: text/parco10submission/img/ Log: Figures location From noreply at svn.ci.uchicago.edu Tue Jun 15 10:06:52 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 10:06:52 -0500 (CDT) Subject: [Swift-commit] r3351 - text/parco10submission Message-ID: <20100615150652.964C29CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 10:06:52 -0500 (Tue, 15 Jun 2010) New Revision: 3351 Added: text/parco10submission/notes/ Log: Location for misc docs and notes From noreply at svn.ci.uchicago.edu Tue Jun 15 10:07:16 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 10:07:16 -0500 (CDT) Subject: [Swift-commit] r3352 - text/parco10submission/notes Message-ID: <20100615150716.13F799CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 10:07:15 -0500 (Tue, 15 Jun 2010) New Revision: 3352 Added: text/parco10submission/notes/IMG_9463.jpg Log: Move cell phone pic here Copied: text/parco10submission/notes/IMG_9463.jpg (from rev 3344, text/parco10submission/IMG_9463.jpg) =================================================================== (Binary files differ) From noreply at svn.ci.uchicago.edu Tue Jun 15 10:08:11 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 10:08:11 -0500 (CDT) Subject: [Swift-commit] r3353 - in text/parco10submission: . img Message-ID: <20100615150811.684549CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 10:08:11 -0500 (Tue, 15 Jun 2010) New Revision: 3353 Added: text/parco10submission/img/figures.odg Removed: text/parco10submission/IMG_9463.jpg Log: Figures from SI2 grant- will be useful Also: drop original cell phone pic Deleted: text/parco10submission/IMG_9463.jpg =================================================================== (Binary files differ) Added: text/parco10submission/img/figures.odg =================================================================== (Binary files differ) Property changes on: text/parco10submission/img/figures.odg ___________________________________________________________________ Name: svn:mime-type + application/octet-stream From noreply at svn.ci.uchicago.edu Tue Jun 15 10:09:51 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 10:09:51 -0500 (CDT) Subject: [Swift-commit] r3354 - in text/parco10submission: . img Message-ID: <20100615150951.D691B9CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 10:09:51 -0500 (Tue, 15 Jun 2010) New Revision: 3354 Added: text/parco10submission/img/IMG_fmridataset.png text/parco10submission/img/omxFigure.jpg Removed: text/parco10submission/IMG_fmridataset.png text/parco10submission/omxFigure.jpg Modified: text/parco10submission/paper.tex Log: Update img locations Deleted: text/parco10submission/IMG_fmridataset.png =================================================================== (Binary files differ) Copied: text/parco10submission/img/IMG_fmridataset.png (from rev 3344, text/parco10submission/IMG_fmridataset.png) =================================================================== (Binary files differ) Copied: text/parco10submission/img/omxFigure.jpg (from rev 3344, text/parco10submission/omxFigure.jpg) =================================================================== (Binary files differ) Deleted: text/parco10submission/omxFigure.jpg =================================================================== (Binary files differ) Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 15:08:11 UTC (rev 3353) +++ text/parco10submission/paper.tex 2010-06-15 15:09:51 UTC (rev 3354) @@ -667,7 +667,7 @@ shown in figure \ref{FigureSwiftModel} \begin{figure}[htbp] -\includegraphics{IMG_9463} +\includegraphics{notes/IMG_9463} \caption{Swift site model} \label{FigureSwiftModel} \end{figure} @@ -913,7 +913,7 @@ \subsection{fMRI Application Example} \begin{figure}[htbp] -\includegraphics[scale=0.5]{IMG_fmridataset} +\includegraphics[scale=0.5]{img/IMG_fmridataset} \caption{FMRI application} \end{figure} @@ -1031,7 +1031,7 @@ calling OpenMx to generate and process models in parallel. \begin{figure}[htbp] -\includegraphics{omxFigure} +\includegraphics{img/omxFigure} \caption{Schematic of a single OpenMx model containing 4 regions of interest (I through L) with 5 regression starting values (asymmetric From noreply at svn.ci.uchicago.edu Tue Jun 15 10:58:16 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 10:58:16 -0500 (CDT) Subject: [Swift-commit] r3355 - in text/parco10submission: . img Message-ID: <20100615155816.780289CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 10:58:16 -0500 (Tue, 15 Jun 2010) New Revision: 3355 Added: text/parco10submission/img/swift-model.pdf Modified: text/parco10submission/img/figures.odg text/parco10submission/paper.tex Log: New Swift site model img (swift-model.pdf) Modified: text/parco10submission/img/figures.odg =================================================================== (Binary files differ) Added: text/parco10submission/img/swift-model.pdf =================================================================== (Binary files differ) Property changes on: text/parco10submission/img/swift-model.pdf ___________________________________________________________________ Name: svn:executable + * Name: svn:mime-type + application/octet-stream Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 15:09:51 UTC (rev 3354) +++ text/parco10submission/paper.tex 2010-06-15 15:58:16 UTC (rev 3355) @@ -666,13 +666,15 @@ files back out the submitting system. The site model used by Swift is shown in figure \ref{FigureSwiftModel} -\begin{figure}[htbp] -\includegraphics{notes/IMG_9463} -\caption{Swift site model} -\label{FigureSwiftModel} -\end{figure} +\begin{figure*}[htbp] + \begin{center} + \includegraphics{img/swift-model} + \caption{Swift site model} + \label{FigureSwiftModel} + \end{center} +\end{figure*} - A site in Swift consists of one or more worker nodes which will +A site in Swift consists of one or more worker nodes which will execute programs through some \emph{execution provider}, an \emph{accessible file system} which must be visible on the worker nodes as a POSIX-like file system and must be accessible through some From noreply at svn.ci.uchicago.edu Tue Jun 15 14:36:37 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 14:36:37 -0500 (CDT) Subject: [Swift-commit] r3356 - text/parco10submission Message-ID: <20100615193637.02B599CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 14:36:36 -0500 (Tue, 15 Jun 2010) New Revision: 3356 Modified: text/parco10submission/lx.zsh Log: Improvements Modified: text/parco10submission/lx.zsh =================================================================== --- text/parco10submission/lx.zsh 2010-06-15 15:58:16 UTC (rev 3355) +++ text/parco10submission/lx.zsh 2010-06-15 19:36:36 UTC (rev 3356) @@ -5,7 +5,10 @@ # Use +p & +d to create PDF & PS files. # Modify DOC to change the relevant tex file. # Modify TMP & BIB to use different temporary storage. +# Use "./lx.zsh clean" to clean up +# set -x + DEFAULTDOC="paper" COMPILER="pdflatex" @@ -32,6 +35,8 @@ [[ ${DOC} == "" ]] && DOC=${DEFAULTDOC} [[ ${MAKE_PDF} == "yes" ]] && MAKE_PS="yes" + + clean() { local t @@ -58,6 +63,24 @@ done } +shoot() +# print out an array loaded by scan() +{ + local i + local N + N=$( eval print '${#'$1'}' ) + # print N $N + for (( i=1 ; i <= N ; i++ )) + do + eval print -- "$"${1}"["${i}"]" + done +} + +check_bib_missing() +{ + awk '$0 ~ /Warn.*database entry/ { gsub(/\"/, "", $8); print "No entry for: " $8; }' +} + biblio() { if [[ -f ${DOC}.bbl && @@ -65,19 +88,19 @@ then rm ${DOC}.bbl fi - if { bibtex ${DOC} > ${BIB} } + if { bibtex ${DOC} >& ${BIB} } then printf "." - ${COMPILER} ${DOC} > /dev/null + ${COMPILER} ${DOC} >& /dev/null printf "." - ${COMPILER} ${DOC} > ${TMP} + ${COMPILER} ${DOC} >& ${TMP} printf "." - WARNS=( $( grep "Warning--" ${BIB} ) ) + check_bib_missing < ${BIB} | scan WARNS if (( ${#WARNS} > 0 )) then printf "\n" print "Bibtex:" - print ${WARNS} + shoot WARNS fi else printf "\n" @@ -127,11 +150,10 @@ -f error || ${DOC}.tex -nt ${DOC}.dvi || lx.zsh -nt ${DOC}.dvi || - dht.bib -nt ${DOC}.dvi || $( readlink Wozniak.bib ) -nt ${DOC}.dvi || ${FORCE} == "yes" ]] then - if { ${COMPILER} --interaction nonstopmode ${DOC} > ${TMP} } + if { ${COMPILER} --interaction nonstopmode ${DOC} >& ${TMP} } then printf "OK" rm -f error From noreply at svn.ci.uchicago.edu Tue Jun 15 14:57:50 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 14:57:50 -0500 (CDT) Subject: [Swift-commit] r3359 - text/parco10submission Message-ID: <20100615195750.BDF8B9CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 14:57:50 -0500 (Tue, 15 Jun 2010) New Revision: 3359 Modified: text/parco10submission/paper.tex Log: Reorg Ousterhout Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 19:55:38 UTC (rev 3358) +++ text/parco10submission/paper.tex 2010-06-15 19:57:50 UTC (rev 3359) @@ -50,7 +50,7 @@ Scientists, engineers and business analysts often work by performing a massive number of runs of domain-specific programs, typically coupled -loosely by large coollections of file-based data. Distributed and +loosely by large collections of file-based data. Distributed and parallel computing resources provide a powerful way to get more of this type of work done faster, but using such resources imposes additional complexities. Swift reduces these complexities with a @@ -105,14 +105,13 @@ % TODO: DECIDE: Drop SwiftScript, use Swift throughout to refer to the language? - This paper goes into greater depth than prior publications \cite{SWIFTSWF08,SWIFTNNN} in describing the Swift language, how its implementation handles large-scale and distributed execution environments, and its contribution to distributed parallel computing. -TODO: Provide a compelling example here, perhaps with -a code segment, of the power of Swift, in a single paragraph. +%TODO: Provide a compelling example here, perhaps with +% a code segment, of the power of Swift, in a single paragraph. \subsection{Swift language concepts} @@ -214,12 +213,6 @@ economical at the moment when they need to perform intensive computation - without continued reprogramming or adjustment of scripts. -% Ousterhout in (Ousterhout 1998) eloquently laid out the rational and -% motivation for scripting languages. As the creator of Tcl [ref], he -% described here the difference between programming and scripting, and -% the place of each in the scheme of applying computers to solving -% problems. - What's missing in current scripting languages is sufficient specification and encapsulation of inputs to, and outputs from, a given application, such that an execution environment could @@ -230,7 +223,7 @@ ``applications-as-procedures'', it provides a way to make the remote - and hence parallel - execution of applications fairly transparent. -TODO: Refine and condense this rationale. +% TODO: Refine and condense this rationale. In the remainder of this paper, we present the language, details of the implementation, application use-cases and ongoing @@ -1325,13 +1318,24 @@ \section{Comparison to Other Systems} -As a ``parallel scripting -language'', Swift is typically used to specify and execute scientific -``workflows'' - which we define here as the execution of a -series of steps to perform larger domain-specific tasks. We use the -term workflow as defined by (Taylor et. al. 2006). So we often call a -Swift script a workflow. TODO: Drop this paragraph/concept? Or crisp it up. Perhaps break down the systems that we compare Swift to into a few classes...? +%% As a ``parallel scripting language'', Swift is typically used to +%% specify and execute scientific ``workflows'' - which we define here as +%% the execution of a series of steps to perform larger domain-specific +%% tasks. We use the term workflow as defined by (Taylor +%% et. al. 2006). So we often call a Swift script a workflow. +% Ousterhout in (Ousterhout 1998) eloquently laid out the rational and +% motivation for scripting languages. As the creator of Tcl [ref], he +% described here the difference between programming and scripting, and +% the place of each in the scheme of applying computers to solving +% problems. + +% Ousterhout in (Ousterhout 1998) eloquently laid out the rational and +% motivation for scripting languages. As the creator of Tcl [ref], he +% described here the difference between programming and scripting, and +% the place of each in the scheme of applying computers to solving +% problems. + Coordination languages and systems such as Linda\cite{LINDA}, Strand\cite{STRAN} and PCN\cite{PCN} allow composition of distributed or parallel components, but usually require the components From noreply at svn.ci.uchicago.edu Tue Jun 15 14:59:55 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 14:59:55 -0500 (CDT) Subject: [Swift-commit] r3360 - text/parco10submission Message-ID: <20100615195955.749619CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 14:59:55 -0500 (Tue, 15 Jun 2010) New Revision: 3360 Modified: text/parco10submission/paper.tex Log: Reorg Ousterhout Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 19:57:50 UTC (rev 3359) +++ text/parco10submission/paper.tex 2010-06-15 19:59:55 UTC (rev 3360) @@ -1330,11 +1330,10 @@ % the place of each in the scheme of applying computers to solving % problems. -% Ousterhout in (Ousterhout 1998) eloquently laid out the rational and -% motivation for scripting languages. As the creator of Tcl [ref], he -% described here the difference between programming and scripting, and -% the place of each in the scheme of applying computers to solving -% problems. +The rational and motivation for scripting languages, the +difference between programming and scripting, and the place of each in +the scheme of applying computers to solving problems, has been +laid out previously~\cite{Ousterhout}. Coordination languages and systems such as Linda\cite{LINDA}, Strand\cite{STRAN} and PCN\cite{PCN} allow composition of From noreply at svn.ci.uchicago.edu Tue Jun 15 15:17:01 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 15:17:01 -0500 (CDT) Subject: [Swift-commit] r3362 - text/parco10submission Message-ID: <20100615201701.6ACF19CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 15:17:01 -0500 (Tue, 15 Jun 2010) New Revision: 3362 Modified: text/parco10submission/paper.tex Log: Reorg Intro Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 20:00:20 UTC (rev 3361) +++ text/parco10submission/paper.tex 2010-06-15 20:17:01 UTC (rev 3362) @@ -113,6 +113,55 @@ %TODO: Provide a compelling example here, perhaps with % a code segment, of the power of Swift, in a single paragraph. +\subsection{Rationale} + +The emergence of large-scale production computing infrastructure such +as clusters, grids and high-performance computing (HPC), and the +inherent complexity of programming on these systems, necessitated a +new approach. Swift was developed to create a higher-level language +that focuses not on the details of executing sequences or +``pipelines'' of programs, but rather on specific issues that arise +from the concurrent execution of disparate computational tasks at +large scale. + +While many application needs involve the execution of a single large +and perhaps message-passing parallel app, many others require the +coupling or orchestration of large numbers of application invocations: +either many invocations of the same app, or many invocations of +sequences and patterns of several apps. In this model, existing apps +become like functions in programming, and users typically need to +execute many of them. Scaling up requires the distribution of such +workloads among many computers (``resources''), and hence a ``grid'' +approach. Even if a single large parallel resource suffices, users +won't always have access to the same supercomputer cluster: hence the +need to utilize whatever resource happened to be available or +economical at the moment when they need to perform intensive +computation - without continued reprogramming or adjustment of scripts. + +We claim that the missing feature in current scripting languages is +sufficient specification and encapsulation of inputs to, and outputs +from, a given application, such that an execution environment could +automatically make remote execution transparent. Without this, +achieving location transparancy and automated parallel execution is +not feasible. Swift adds to scripting what RPC adds to programming: +by formalizing the inputs and outputs of +``applications-as-procedures'', it provides a way to make the remote - +and hence parallel - execution of applications fairly transparent. + +The remainder of this paper is organized as follows. In +Section~\ref{Language} we present the major concepts and language +structure of Swift. Section~\ref{Execution} provides details of the +implementation, including the distributed architecture that enables +Swift applications to run on distributed resources. +Section~\ref{Applications} demonstrates real-world applications using +Swift on scientific projects. Section~\ref{Related} describes Swift in +the context of other related systems. Section~\ref{Future} highlights +ongoing and future work in the Swift project, and we offer concluding +remarks in Section~\ref{Conclusion}. + +\section{The SwiftScript language} +\label{Language} + \subsection{Swift language concepts} The Swift programming model is data-oriented: it encapsulates the @@ -185,53 +234,7 @@ data files are associated with the logical representation of Swift's data model of variables and collections. -\subsection{Rationale for creating Swift} -Why do we need Swift? Why create yet another scripting language for -the execution of application programs when so many exist? Swift was -developed to create a higher-level language that focuses not on the -details of executing sequences or ``pipelines'' of programs, but -rather on specific issues that arise from scale. - -% These issues, -% however, once identified, seem to equally well apply to, and benefit -% the execution of, application pipelines that are not large-scale and -% not necessarily distributed. Our motivation for developing Swift is -% based on the following premises: - -While many application needs involve the execution of a single large -and perhaps message-passing parallel app, many others require the -coupling or orchestration of large numbers of application invocations: -either many invocations of the same app, or many invocations of -sequences and patterns of several apps. In this model, existing apps -become like functions in programming, and users typically need to -execute many of them. Scaling up requires the distribution of such -workloads among many computers (``resources''), and hence a ``grid'' -approach. Even if a single large parallel resource suffices, users -won't always have access to the same supercomputer cluster: hence the -need to utilize whatever resource happened to be available or -economical at the moment when they need to perform intensive -computation - without continued reprogramming or adjustment of scripts. - -What's missing in current scripting languages is sufficient -specification and encapsulation of inputs to, and outputs from, a -given application, such that an execution environment could -automatically make remote execution transparent. Without this, -achieving location transparancy and automated parallel execution is -not feasible. Swift adds to scripting what RPC adds to programming: -by formalizing the inputs and outputs of -``applications-as-procedures'', it provides a way to make the remote - -and hence parallel - execution of applications fairly transparent. - -% TODO: Refine and condense this rationale. - -In the remainder of this paper, we present the language, -details of the implementation, application use-cases and ongoing -research. TODO: refine this sentence. - -\section{The SwiftScript language} -\label{Language} - \subsection{Language basics} A Swift script describes data, application components, invocations @@ -1317,6 +1320,7 @@ active development group; releases roughly every 2 months. \section{Comparison to Other Systems} +\label{Related} %% As a ``parallel scripting language'', Swift is typically used to %% specify and execute scientific ``workflows'' - which we define here as @@ -1447,11 +1451,8 @@ TODO: Polish conclusion - was pasted here from intro and doesnt fit yet. -\section{Acknowledgements} +\section{Acknowledgments} -TODO: authors beyond number 3 go here according to ACM style guide, rather -than in header - TODO: NSF/DOE grant acknowledgements \section{TODO} From noreply at svn.ci.uchicago.edu Tue Jun 15 15:30:32 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 15:30:32 -0500 (CDT) Subject: [Swift-commit] r3363 - text/parco10submission Message-ID: <20100615203032.3E5459CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 15:30:32 -0500 (Tue, 15 Jun 2010) New Revision: 3363 Modified: text/parco10submission/paper.tex Log: Initial reorg of Section 2. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 20:17:01 UTC (rev 3362) +++ text/parco10submission/paper.tex 2010-06-15 20:30:32 UTC (rev 3363) @@ -162,10 +162,8 @@ \section{The SwiftScript language} \label{Language} -\subsection{Swift language concepts} - The Swift programming model is data-oriented: it encapsulates the -invocation of ``ordinary programs'' - technically, POSIX \emph{exec()} +invocation of ``ordinary programs'' - technically, POSIX {\tt exec()} operations - in a manner that explicitly specifies the files and other arguments that are the inputs and outputs of each program invocation. This formal but simple model (elaborated in section @@ -181,24 +179,23 @@ workstation. The same script can then be executed on a cluster, one or more grids of clusters, and on large scale parallel supercomputers such as the Sun Constellation (ref) or the IBM Blue Gene/P. (section -\ref{ExecutingSites}) +\ref{ExecutingSites}). Notable features include: \item Automatic parallelization of program invocations, invoking -programs that have no data dependencies in parallel (section -\ref{Language}) + programs that have no data dependencies in parallel; \item Automatic balancing work over available resources based on adaptive algorithms that account for both resource performance and reliability, and which throttle program invocations at a rate -appropriate for each execution location and mechanism (section -\ref{ExecutingSites}). +appropriate for each execution location and mechanism; -\item Reliability through retry and relocation of failed executions -and restart of interrupted scripts from the point of -failure. (section \ref{ExecutingReliably}) +\item Reliability through replication and automatic resubmission of + failed executions and restart of interrupted scripts from the point + of failure; -\item Recording the provenance of data objects produced by a Swift -script (section \ref{Provenance}). +\item Formalizing the creation and management of data objects in the + language and recording the provenance of data objects produced by a + Swift script. \end{itemize} @@ -209,42 +206,37 @@ language, which makes the benefits above possible, can be summarized as follows: +\subsection{Language basics} + + +A Swift script describes data, application components, invocations +of applications components, and the inter-relations (data flow) +between those invocations, using a C-like syntax. Swift scripts are written as a set of procedures, composed upwards, starting with \emph{atomic procedures} which specify the execution of -component programs, and then higher level procedures are composed as +external programs, and then higher level procedures are composed as pipelines (or more generally, graphs) of sub-procedures. Atomic procedures specify the inputs and outputs of application programs in terms of files and other parameters. Compound procedures are composed -of a graph of calls to atomic and other compound procedures +into a conceptual graph of calls to atomic and other compound +procedures. Swift variables hold either primitive values, files, or collections of -files. Atomic variables are \emph{single assignment}, which provides -the basis for Swift's model of procedure chaining. Procedures are +files. All variables are \emph{single assignment}, which provides the +basis for Swift's model of procedure chaining. Procedures are executed when their input parameters have all been set from existing data or prior procedure executions. Procedures are chained by specifying that an output variable of one procedure is passed as the -input variable to the second procedure. +input variable to the second procedure. This dataflow model means that +Swift procedures are not necessarily executed in source-code order but +rather when their input data becomes available. -% This dataflow model means that -% Swift procedures are not necessarily executed in source-code order but -% rather when their input data becomes available. - Variables are declared with a type, and when they contain files are associated with a \emph{mapper} which indicates how physical data files are associated with the logical representation of Swift's data model of variables and collections. - -\subsection{Language basics} - -A Swift script describes data, application components, invocations -of applications components, and the inter-relations (data flow) -between those invocations. - - Data is represented in a script by strongly-typed single-assignment -variables, using a C-like syntax. - - Types in Swift can be \emph{atomic} or \emph{composite}. An atomic +Types in Swift can be \emph{atomic} or \emph{composite}. An atomic type can be either a \emph{primitive type} or a \emph{mapped type}. Swift provides a fixed set of primitive types, such as \emph{integer} or \emph{string}. A mapped type indicates that the actual data does not @@ -278,21 +270,21 @@ describes a functional/dataflow style interface to imperative components. -For example, the following example lists a procedure which makes use of -the ImageMagick\cite{ImageMagick} convert command to rotate a supplied -image by a specified angle: +For example, the following example lists a procedure which makes use +of the ImageMagick\cite{ImageMagick} convert command to rotate a +supplied image by a specified angle: - \begin{verbatim} +\begin{verbatim} app (image output) rotate(image input) { convert "-rotate" angle @input @output; } - \end{verbatim} +\end{verbatim} -A procedure is invoked using the familiar syntax: +A procedure is invoked using a syntax similar to that of the C family: - \begin{verbatim} +\begin{verbatim} rotated = rotate(photo, 180); - \end{verbatim} +\end{verbatim} While this looks like an assignment, the actual unix level execution consists of invoking the command line specified in the \verb|app| @@ -304,9 +296,9 @@ definition of that type. We can declare it as a \emph{marker type} which has no structure exposed to SwiftScript: - \begin{verbatim} +\begin{verbatim} type image; - \end{verbatim} +\end{verbatim} This does not indicate that the data is unstructured; but it indicates that the structure of the data is not exposed to SwiftScript. Instead, @@ -329,7 +321,7 @@ rotated = rotate(photo, 180); \end{verbatim} -This script can be invoked from the command line: +This script can be invoked from the command line as: \begin{verbatim} $ ls *.jpeg @@ -353,27 +345,27 @@ \verb|filesys_mapper| maps all files matching a particular unix glob pattern into an array: - \begin{verbatim} +\begin{verbatim} file frames[] ; - \end{verbatim} +\end{verbatim} The \verb|foreach| construct can be used to apply the same procedure call(s) to each element of an array: - \begin{verbatim} +\begin{verbatim} foreach f,ix in frames { output[ix] = rotate(frames, 180); } - \end{verbatim} +\end{verbatim} Sequential iteration can be expressed using the \verb|iterate| construct: - \begin{verbatim} +\begin{verbatim} step[0] = initialCondition(); iterate ix { step[ix] = simulate(step[ix-1]); } - \end{verbatim} +\end{verbatim} This fragment will initialise the 0-th element of the \verb|step| array to some initial condition, and then repeatedly run the \verb|simulate| @@ -381,17 +373,16 @@ \subsection{Ordering of execution} -Non-array variables are \emph{single-assignment}, which means that they -must be assigned to exactly one value during execution. A procedure or -expression will be executed when all of its input parameters have been -assigned values. As a result of such execution, more variables may -become assigned, possibly allowing further parts of the script to -execute. +Non-array variables are \emph{single-assignment}, which means that +they must be assigned to exactly one value during execution. A +procedure or expression will be executed when all of its input +parameters have been assigned values. As a result of such execution, +more variables may become assigned, possibly allowing further parts of +the script to execute. In this way, scripts are implicitly +concurrent. Aside from serialisation implied by these dataflow +dependencies, execution of component programs can proceed without +synchronization in time. -In this way, scripts are implicitly parallel. Aside from serialisation -implied by these dataflow dependencies, execution of component programs -can proceed in parallel. - In this fragment, execution of procedures \verb|p| and \verb|q| can happen in parallel: @@ -413,14 +404,13 @@ content of an array increases during execution, but cannot otherwise change. Once a value for a particular element is known, then it cannot change. Eventually, all values for an array are known, and that array -is regarded as \emph{closed}. +is regarded as \emph{closed}. Statements which deal with the array as +a whole will wait for the array to be closed before executing (thus, a +closed array is the equivalent of a non-array type being +assigned). However, a \verb|foreach| statement will apply its body to +elements of an array as they become known. It will not wait until the +array is closed. -Statements which deal with the array as a whole will wait for the array -to be closed before executing (thus, a closed array is the equivalent -of a non-array type being assigned). However, a \verb|foreach| -statement will apply its body to elements of an array as they become -known. It will not wait until the array is closed. - Consider this script: \begin{verbatim} file a[]; @@ -442,16 +432,14 @@ \subsection{Compound procedures} -As with many other programming languages, procedures consisting of SwiftScript -code can be defined. These differ from the previously mentioned procedures -declared with the \verb|app| keyword, as they invoke other SwiftScript -procedures rather than a component program. +As with many other programming languages, procedures consisting of +SwiftScript code can be defined. These differ from the previously +mentioned procedures declared with the \verb|app| keyword, as they +invoke other SwiftScript procedures rather than a component +program. The basic structure of a composite procedure may be thought +of as a graph of calls to other procedures. -The basic structure of a composite procedure is a graph of calls to -other procedures. (TODO: does talking about call graphs make sense in -the context of programming language-style descriptions?) - - \begin{verbatim} +\begin{verbatim} (file output) process (file input) { file intermediate; intermediate = first(input); @@ -461,7 +449,7 @@ file x <"x.txt">; file y <"y.txt">; y = process(x); - \end{verbatim} +\end{verbatim} This will invoke two procedures, with an intermediate data file named anonymously connecting the \verb|first| and \verb|second| procedures. @@ -469,7 +457,7 @@ Ordering of execution is generally determined by execution of \verb|app| procedures, not by any containing procedures. In this code block: - \begin{verbatim} +\begin{verbatim} (file a, file b) A() { a = A1(); b = A2(); @@ -478,7 +466,7 @@ (x,y) = A(); s = S(x); t = S(y); - \end{verbatim} +\end{verbatim} then a valid execution order is: \verb|A1 S(x) A2 S(y)|. The compound procedure \verb|A| does not have to have fully completed @@ -544,8 +532,6 @@ data storage and access methods to be plugged in to scripts. \begin{verbatim} - # TODO I just made this up; need to check - # that it actually works type file; app (extern o) populateDatabase() { @@ -572,9 +558,6 @@ The single assignment and execution ordering rules will still apply though; populateDatabase will always be run before analyseDatabase. -TODO mappings may be to URLs, not only to local filesystem files; and more -explicit description of what mapping is. - \subsection{Swift mappers} Swift contains a number of built-in mappers. A representative sample of these is listed in table \ref{mappertable}. @@ -597,17 +580,16 @@ \subsection{The execution environment for component programs} \label{LanguageEnvironment} - A SwiftScript \verb|app| declaration describes how a component -program is invoked. In order to ensure the correctness of the -Swift model, the environment in which programs are executed needs -to be constrained. +A SwiftScript \verb|app| declaration describes how a component program +is invoked. In order to ensure the correctness of the Swift model, the +environment in which programs are executed needs to be constrained. - A program is invoked in its own working directory; in that working +A program is invoked in its own working directory; in that working directory or one of its subdirectories, the program can expect to find -all of the files that are passed as inputs to the application block; and -on exit, it should leave all files named by that application block in -the same working directory. Applications should also not assume that -they will be executed on a particular host (to facilitate site +all of the files that are passed as inputs to the application block; +and on exit, it should leave all files named by that application block +in the same working directory. Applications should also not assume +that they will be executed on a particular host (to facilitate site portability), run in in any particular order with respect to other application invocations in a script (except those implied by data dependency), or that their working directories will or will not be @@ -616,39 +598,38 @@ Consider the \verb|app| declaration for the \verb|rotate| procedure in section N. - \begin{verbatim} +\begin{verbatim} app (file output) rotate(file input, int angle) - \end{verbatim} +\end{verbatim} - The procedure signature declares the inputs and outputs for this +The procedure signature declares the inputs and outputs for this procedure. As in many other programming languages, this defines the type signatures and names of parameters; this also defines which files -will be placed into the application working directory before execution, -and which files will be expected there after execution. For the above -declaration, the file mapped to the \verb|input| parameter will be -placed in the working directory beforehand, and the file mapped to -\verb|output| will be expected there after execution; the input -parameter \verb|angle| is of primitive type\footnote{need to define -primitive type earlier on here...} and so no files are staged in for -this parameter. +will be placed into the application working directory before +execution, and which files will be expected there after execution. For +the above declaration, the file mapped to the \verb|input| parameter +will be placed in the working directory beforehand, and the file +mapped to \verb|output| will be expected there after execution; the +input parameter \verb|angle| is of primitive type and so no files are +staged in for this parameter. - \begin{verbatim} +\begin{verbatim} convert "-rotate" angle @input @output; - \end{verbatim} +\end{verbatim} - The body of the \verb|app| block defines the unix command-line that +The body of the \verb|app| block defines the unix command-line that will be executed when this procedure is invoked. The first token (in -this case \verb|convert|) defines a \emph{transformation name} which is -used to determine the unix executable name. Subsequent expressions, +this case \verb|convert|) defines a \emph{transformation name} which +is used to determine the unix executable name. Subsequent expressions, separated by spaces, define the command-line arguments for that executable: \verb|"-rotate"| is a string literal; angle specifies the value of the angle parameter; the syntax \verb|@variable| evaluates to the filename of the supplied variable, thus \verb|@input| and \verb|@output| evaluate to the filenames of the corresponding -parameters. It should be noted that it is possible to take the filename -of \verb|output| even though it is a return parameter; although the -value of that variable has not yet been computed, the filename where -that value will go is already known. +parameters. It should be noted that it is possible to take the +filename of \verb|output| even though it is a return parameter; +although the value of that variable has not yet been computed, the +filename where that value will go is already known. TODO comment (here?) about how this model appears somewhat constrained but provides a well defined atomicity that can be used for various @@ -662,21 +643,23 @@ \section{Execution} \label{Execution} + Swift is implemented by compiling to a Karajan program, which provides several benefits. A notable benefit visible to users is that of -providers. This enbles the Swift execution model to be extended by +providers. This enables the Swift execution model to be extended by adding new data providers and job execution providers. This is -explained in more detail in section \ref{ExecutingSites}: Executing on a remote site. +explained in more detail in section \ref{ExecutingSites}: Executing on +a remote site. \subsection{Executing on a remote site} \label{ExecutingSites} - With the above restrictions, execution of a unix program on a remote +With the above restrictions, execution of a unix program on a remote site is straightforward. The Swift runtime must prepare a remote working directory for each job with appropriate input files staged in; then it must execute the program; and then it must stage the output files back out the submitting system. The site model used by Swift is -shown in figure \ref{FigureSwiftModel} +shown in Figure~\ref{FigureSwiftModel}. \begin{figure*}[htbp] \begin{center} From noreply at svn.ci.uchicago.edu Tue Jun 15 15:38:21 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 15:38:21 -0500 (CDT) Subject: [Swift-commit] r3364 - text/parco10submission Message-ID: <20100615203821.E0A149CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 15:38:21 -0500 (Tue, 15 Jun 2010) New Revision: 3364 Modified: text/parco10submission/paper.tex Log: Drop "Usage Experience" section, move key points to "Execution" section Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 20:30:32 UTC (rev 3363) +++ text/parco10submission/paper.tex 2010-06-15 20:38:21 UTC (rev 3364) @@ -849,8 +849,44 @@ forwardref collective IO section if that gets written, or include that entire section here? -\section{Example applications} +\subsection{Features to support use on dynamic resources} +Using Swift to submit to a large number of sites poses a number of +practical challenges that are not encountered when running on a small +number of sites. These challenges are seen when comparing execution on +the TeraGrid\cite{TERAGRID} with execution on the Open Science +Grid\cite{OSG}. The set of sites which may be used is large and +changing. It is impractical to maintain a site catalog by hand in this +situation. In collaboration with the OSG Engagement group, Swift was +interfaced to ReSS\cite{ReSS} so that the site catalog is generated +from that information system. This provides a very straightforward way +to generate a large catalog of 'fairly likely to work' sites. + +Having discovered those sites, two significant problems remain: the +quality of those sites varies wildly; and user applications are not +installed on those sites. Individual OSG sites exhibit extremely +different behaviour, both with respect to other sites at the same +time, and with respect to themselves at other times. This is hard to +describe statically. Firstly, the load that a particular site will +bear varies over time. Secondly, some sites fail in unusual fashion. +Swift's site scoring mechanism deals well with this in the majority of +cases. However, continued discovery of unusual failure modes drives +the implementation of ever more fault tolerance mechanisms. + +When running jobs on dynamically discovered sites, it is likely that +component programs are not installed on those sites. OSG Engagement +has developed best practices to deal with this, which are implemented +straightforwardly in Swift. Applications may be compiled statically +and deployed as a small number of self contained files as part of the +input for a component program execution; in this case, the application +files are described as mapped input files in the same way as input +data files, and are passed as a parameter to the application +executable. Swift's existing input file management then handles +once-per-site-per run staging in of the application files, without +change. + +\section{Applications} + TODO: two or three applications in brief. discuss both the application behaviour in relation to Swift, but underlying grid behaviour in relation to Swift @@ -971,20 +1007,20 @@ a series of procedure calls, using variables to establish data dependencies. -In the example, reslice\_wf defines a four-step -pipeline computation, using variables to establish -data dependencies. It applies reorientRun to a run first in the x axis -and then in the y axis, and then aligns each image in the resulting -run with the first image. The program alignlinear determines how to -spatially adjust an image to match a reference image, and produces an -air parameter file. The actual alignment is done by the program -reslice. Note that variable yR, being the output of the first step and -the input of the second step, defines the data dependencies between -the two steps. The pipeline is illustrated in the center of figure \ref{FMRIFigure2}, while in figure \ref{FMRIgraph} we show the expanded graph for a 20-volume -run. Each volume comprises an image file and a header file, so there -are a total of 40 input files and 40 output files. We can also apply -the same procedure to a run containing hundreds or thousands of -volumes. +In the example, reslice\_wf defines a four-step pipeline computation, +using variables to establish data dependencies. It applies reorientRun +to a run first in the x axis and then in the y axis, and then aligns +each image in the resulting run with the first image. The program +alignlinear determines how to spatially adjust an image to match a +reference image, and produces an air parameter file. The actual +alignment is done by the program reslice. Note that variable yR, being +the output of the first step and the input of the second step, defines +the data dependencies between the two steps. The pipeline is +illustrated in the center of Figure~\ref{FMRIFigure2}, while in figure +\ref{FMRIgraph} we show the expanded graph for a 20-volume run. Each +volume comprises an image file and a header file, so there are a total +of 40 input files and 40 output files. We can also apply the same +procedure to a run containing hundreds or thousands of volumes. In this example we show the details of the procedure reorientRun, which is also a compound procedure. @@ -1096,73 +1132,6 @@ doall(p); \end{verbatim} -\section{Usage Experience} - -\subsection{Use on large numbers of sites in the Open Science Grid} - -TODO: get Mats to comment on this section...? - -Using Swift to submit to a large number of sites poses a number of -practical challenges that are not encountered when running on a small -number of sites. These challenges are seen when comparing execution on -the TeraGrid\cite{TERAGRID} with execution on the Open Science -Grid\cite{OSG}. - -The set of sites which may be used is large and changing. It is -impractical to maintain a site catalog by hand in this situation. -In collaboration with the OSG Engagement group, Swift was interfaced to -ReSS\cite{ReSS} so that the site catalog is generated from that information -system. This provides a very straightforward way to generate a large catalog -of 'fairly likely to work' sites. - -Having discovered those sites, two significant problems remain: the -quality of those sites varies wildly; and user applications are not -installed on those sites. - -Individual OSG sites exhibit extremely different behaviour, both with -respect to other sites at the same time, and with respect to themselves -at other times. This is hard to describe statically. Firstly, the -load that a particular site will bear varies over time. Secondly, some -sites fail in unusual fashion. - -Swift's site scoring mechanism deals well with this in the majority of -cases. However, continued discovery of unusual failure modes drives -the implementation of ever more fault tolerance mechanisms. - -\subsection{Automating Application Deployment} - -When running jobs on dynamically discovered sites, it is likely that -component programs are not installed on those sites. - -OSG Engagement has developed best practices to deal with this, which -are implemented straightforwardly in Swift. Applications may be compiled -statically and deployed as a small number of self contained files as part -of the input for a component program execution; in this case, the -application files are described as mapped input files in the same way -as input data files, and are passed as a parameter to the application -executable. Swift's existing input file management then handles -once-per-site-per run staging in of the application files, without change. - -\begin{verbatim} - // NEED TO MAKE THIS A BETTER CODE SAMPLE - // ... I JUST MADE IT UP FROM MEMORY OF - // BEING AT RENCI - - app (file o) myapp_inner (file i, file exe) { - sh "appexecutable" @i @o; - } - - (file o) myapp (file i) { - file appexe <"appexecutable">; - o = myapp_inner(i); - } -\end{verbatim} - -TODO: Zhengxiong Hou has also done stuff about application -stagein - this could be mentioned (see Zhengxiong's email and paper) - -TODO: what's the conclusion (if any) of this section? - \section{Future work} \subsection{Automatic characterisation of site and application behaviour} From noreply at svn.ci.uchicago.edu Tue Jun 15 15:44:14 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 15:44:14 -0500 (CDT) Subject: [Swift-commit] r3365 - text/parco10submission Message-ID: <20100615204414.C3A089CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 15:44:14 -0500 (Tue, 15 Jun 2010) New Revision: 3365 Modified: text/parco10submission/elsarticle-num.bst text/parco10submission/elsarticle.cls Log: Drop x bit Property changes on: text/parco10submission/elsarticle-num.bst ___________________________________________________________________ Name: svn:executable - * Property changes on: text/parco10submission/elsarticle.cls ___________________________________________________________________ Name: svn:executable - * From noreply at svn.ci.uchicago.edu Tue Jun 15 16:27:52 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 16:27:52 -0500 (CDT) Subject: [Swift-commit] r3366 - text/parco10submission Message-ID: <20100615212752.C7AE39CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 16:27:52 -0500 (Tue, 15 Jun 2010) New Revision: 3366 Modified: text/parco10submission/paper.tex Log: New mappers table Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 20:44:14 UTC (rev 3365) +++ text/parco10submission/paper.tex 2010-06-15 21:27:52 UTC (rev 3366) @@ -103,16 +103,11 @@ component programs on grids and other parallel platforms, providing automated site selection, data management, and reliability. -% TODO: DECIDE: Drop SwiftScript, use Swift throughout to refer to the language? - This paper goes into greater depth than prior publications \cite{SWIFTSWF08,SWIFTNNN} in describing the Swift language, how its implementation handles large-scale and distributed execution environments, and its contribution to distributed parallel computing. -%TODO: Provide a compelling example here, perhaps with -% a code segment, of the power of Swift, in a single paragraph. - \subsection{Rationale} The emergence of large-scale production computing infrastructure such @@ -208,7 +203,6 @@ \subsection{Language basics} - A Swift script describes data, application components, invocations of applications components, and the inter-relations (data flow) between those invocations, using a C-like syntax. @@ -385,19 +379,16 @@ In this fragment, execution of procedures \verb|p| and \verb|q| can happen in parallel: - - \begin{verbatim} +\begin{verbatim} y=p(x); z=q(x); - \end{verbatim} - +\end{verbatim} whilst in this fragment, execution is serialised by the variable \verb|y|, with procedure \verb|p| executing before \verb|q|: - - \begin{verbatim} +\begin{verbatim} y=p(x); z=q(y); - \end{verbatim} +\end{verbatim} Arrays in SwiftScript are more generally \emph{monotonic}\cite{MONOTONICPHD}; that is, knowledge about the @@ -411,8 +402,8 @@ elements of an array as they become known. It will not wait until the array is closed. -Consider this script: - \begin{verbatim} +Consider the script below: +\begin{verbatim} file a[]; file b[]; foreach v,i in a { @@ -420,8 +411,7 @@ } a[0] = r(); a[1] = s(); - \end{verbatim} - +\end{verbatim} Initially, the \verb|foreach| statement will have nothing to execute, as the array \verb|a| has not been assigned any values. The procedures \verb|r| and \verb|s| will execute. As soon as either of them is @@ -472,8 +462,6 @@ compound procedure \verb|A| does not have to have fully completed for its return values to be used by subsequent statements. -TODO: talk about anonymous mapping somewhere - a mappers section... - \subsection{More about types} \label{LanguageTypes} @@ -559,24 +547,70 @@ populateDatabase will always be run before analyseDatabase. \subsection{Swift mappers} -Swift contains a number of built-in mappers. A representative sample -of these is listed in table \ref{mappertable}. -\begin{table}[htb] -\begin{tabular}{|r|p{2in}|} -\hline -\verb|single_file_mapper| & maps a single explicitly named file \\ -\hline -\verb|filesys_mapper| & maps files matching a pattern into an array \\ -\hline -\verb|simple_mapper| & maps files to arbitrarily nested data types based on - components of the file name \\ -\hline -\end{tabular} -\caption{SwiftScript built-in mappers} -\label{mappertable} +The Swift programmer manipulate in-memory variables which are +\emph{mapped} to files in the filesystem. This is coordinated by an +extensible set of components called \emph{mappers}. Swift contains a +number of built-in mappers. A representative sample of these is listed +in Table~\ref{mappertable}. + +\begin{table}[t] + \begin{center} + \begin{tabular}{|l|p{3.5cm}|p{5cm}|} + \hline + {\bf Mapper name } & + {\bf Description} & + {\bf Example} \\\hline + \verb|single_file_mapper| & + maps single named file \vspace{5mm}& + \begin{minipage}{5cm} + \vspace{2mm} + \begin{center} + {\tt file f<"data.txt">;} \\ + --- \\ + $f \rightarrow {\tt file.txt}$ + \vspace{2mm} + \end{center} + \end{minipage} + \\ + \hline + \verb|filesys_mapper| & + maps directory contents into an array \vspace{5mm} & + \begin{minipage}{5cm} + \vspace{2mm} + \begin{center} + {\tt file f;} \\ + --- \\ + $f_0 \rightarrow {\tt file2.txt}$ + \end{center} + \end{minipage} + \\ + \hline + \verb|simple_mapper| & + maps components of the variable name \vspace{5mm} & + \begin{minipage}{5cm} + \vspace{2mm} + \begin{center} + {\tt file f;} \\ + --- \\ + $f.\textrm{red} \rightarrow {\tt file.red.txt}$ + \end{center} + \end{minipage} + \\ + \hline + \end{tabular} + \end{center} + \caption{SwiftScript built-in mappers: conceptual syntax} + \label{mappertable} \end{table} +Mappers essentially define a function that translates a given variable +name into a filename. Thus, a single mapper object, connected to a +structured Swift variable, can represent a large, structured data +set. + \subsection{The execution environment for component programs} \label{LanguageEnvironment} From noreply at svn.ci.uchicago.edu Tue Jun 15 16:44:50 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 16:44:50 -0500 (CDT) Subject: [Swift-commit] r3367 - text/parco10submission Message-ID: <20100615214450.9B09E9CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 16:44:50 -0500 (Tue, 15 Jun 2010) New Revision: 3367 Modified: text/parco10submission/paper.tex Log: Quick pass through Section 3 Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 21:27:52 UTC (rev 3366) +++ text/parco10submission/paper.tex 2010-06-15 21:44:50 UTC (rev 3367) @@ -618,7 +618,8 @@ is invoked. In order to ensure the correctness of the Swift model, the environment in which programs are executed needs to be constrained. -A program is invoked in its own working directory; in that working +The Swift execution model is based on the following assumptions: a +program is invoked in its own working directory; in that working directory or one of its subdirectories, the program can expect to find all of the files that are passed as inputs to the application block; and on exit, it should leave all files named by that application block @@ -630,7 +631,7 @@ cleaned up after execution. Consider the \verb|app| declaration for the \verb|rotate| procedure in -section N. +section N: \begin{verbatim} app (file output) rotate(file input, int angle) @@ -665,16 +666,6 @@ although the value of that variable has not yet been computed, the filename where that value will go is already known. -TODO comment (here?) about how this model appears somewhat constrained -but provides a well defined atomicity that can be used for various -reliability mechanisms, site portability, on-site efficiency tuning. -multi-site and reliabilty are already discussed; but the on-site -efficiency tuning (eg using GPFS and laying out files in a way that is -sympathetic to that, potentially using Collective IO fs, and using a -workernode local filesystem) - that discussion could go into the -'executing efficiently' section, or a different 'executing efficiently' -section (change titles...) - \section{Execution} \label{Execution} @@ -709,20 +700,18 @@ as a POSIX-like file system and must be accessible through some \emph{file access provider}. - Two common implementations of this model are execution on the local -system; and execution on one or more remote sites in a Globus\cite{GLOBUS}-based -grid. +Two common implementations of this model are execution on the local +system; and execution on one or more remote sites in a grid managed by +Globus~\cite{Globus_Metacomputing_1997} software. In the former case, +a local scratch file system (such as {\tt /var/tmp}) may be used as +the accessible file system; execution of programs is achieved by +direct POSIX fork; and access on both sides is provided by the POSIX +filesystem API. In the case of a grid site, commonly a shared file +system (NFS~\cite{NFS_1985} or GPFS~\cite{GPFS_2002}) will be provided +by the site with GridFTP~\cite{GridFTP_2005} access from the +submitting system to the remote system; and with GRAM~\cite{GRAM_1998} +and a local resource manager (LRM) providing an execution mechanism. -In the former case, a local scratch file system (such as /var/tmp) is -used as the accessible file system; execution of programs is achieved -by direct unix fork; and access on both sides is provided by the POSIX -filesystem API. - -In the case of a Globus-based grid site, commonly a shared file system -(NFS or GPFS) will be provided by the site with GridFTP\cite{GridFTP} access from -the submitting system to the remote system; and with GRAM\cite{GRAM} and a local -resource manager (LRM) providing an execution mechanism. - Sites are defined in the \emph{site catalog}, which contains descriptions of each site: @@ -739,18 +728,19 @@ \end{verbatim} This file may be constructed by hand or mechanically from some -pre-existing database (such as a grid's existing discovery system). - +pre-existing database (such as a grid's existing discovery +system). The site catalog is reusable and may be shared among multiple +users of the same resources- it is not connected to the application +script. This separates application code from system configuration. The site catalog may contain definitions for multiple sites in which -case execution will be attemted on all sites. In the presence of +case execution will be attempted on all sites. In the presence of multiple sites, it is necessary to choose between the avalable sites. -The Swift \emph{site selector} achivees this by maintaining a score for +The Swift \emph{site selector} achieves this by maintaining a score for each site which determines the load that Swift will place on that site. As a site is successful in executing jobs, this score wil be increased and as the site is uncsuccessful, this score will be cdecreased. In addition to selecting between sites, this mechanism provides some -dynamic rate limiting (as long as it is assumed that s site indicates -overload by causing jobs to fail -- for example, like TCP\cite{TCP}) +dynamic rate limiting if sites fail due to overload~\cite{FTSH_2003}. This provides an empirically measured estimate of a site's ability to bear load, distinct from more static information elsewhere published. @@ -760,10 +750,10 @@ is not properly quantified by published information (for example, due to load caused by other users). -\subsection{Executing reliably} +\subsection{Reliable execution} \label{ExecutingReliably} - The functional/dataflow(?) nature of SwiftScript with a clearly defined +The functional nature of SwiftScript provides a clearly defined interface to imperative components, in addition to allowing Swift great flexibility in where and when it runs component programs, allows those imperative components to be treated as atmoic components which can be @@ -772,56 +762,49 @@ the runtime that need not be exposed at the language level: \emph{retries}, \emph{restarts} and \emph{replication}. - In the simplest form of error handling in Swift, if a component +In the simplest form of error handling in Swift, if a component program fails then Swift will make a second (or subsequent) attempt to -run the program. +run the program. In contrast to many other systems, retry here is at +the level of the SwiftScript procedure invocation, and includes +completely reattempting site selection, stage in, execution and stage +out. This provides a natural way to deal with many transient errors, +such as temporary network loss, and with many changes in site state. - In contrast to many other systems, retry here is at the level of the -SwiftScript procedure invocation, and includes completely reattempting -site selection, stage in, execution and stage out. +Some errors are more permanent in nature; for example, a component +program may have a bug that causes it to always fail given a +particular set of inputs. In that case, Swift's retry mechanism will +not help; each job will be tried a number of times, and each time it +will fail resulting ultimately in the entire script failing. - This provides a very easy way to deal with many transient errors, -such as temporary network loss, and with many negative changes in site -state (such as a site going offline). +In such a case, Swift provides a \emph{restart log} which encapsulates +which procedure invocations have been succesfully completed. After +appropriate manual intervention, a subsequent Swift run may be started +with this restart log; this will suppress re-execution of already +executed invocations but otherwise allow the script to continue. - Some errors are more permanent in nature; for example, a component -program may have a bug that causes it to always fail given a particular -set of inputs. In that case, Swift's retry mechanism will not help; -each job will be tried a number of times, and each time it will fail -resulting ultimately in the entire script failing. - - In such a case, Swift provides a \emph{restart log} which -encapsulates which procedure invocations have been succesfully -completed. After appropriate manual intervention, a subsequent Swift -run may be started with this restart log; this will suppress -re-execution of already executed invocations but otherwise allow the -script to continue. - - A different class of failure is when jobs are submitted to a site but +A different class of failure is when jobs are submitted to a site but are then enqueued for a very long time on that site. This is a failure -in site selection, rather than in execution. Sometimes it can be a soft -failure, in that the job will eventually run on the chosen site - the -site selector has improperly chosen a very heavily loaded site; +in site selection, rather than in execution. Sometimes it can be a +soft failure, in that the job will eventually run on the chosen site - +the site selector has improperly chosen a very heavily loaded site; sometimes it can be a hard failure, in that the job will never run on the site because it has ceased to process its job queue - the site selector has improperly chosen a site which is not executing jobs. - To address this situation, Swift provides for \emph{job replication}. -After a job has been enqueued on a site for too long, a second instance -of the job will be submitted (again undergoing site selection, stagein, -execution and stageout); this will continue up to a defined limit (by -default 3?). +To address this situation, Swift provides for \emph{job replication}. +After a job has been enqueued on a site for too long, a second +instance of the job will be submitted (again undergoing site +selection, stagein, execution and stageout); this will continue up to +a defined limit. When any of those jobs begins executing, all other +replicas will be cancelled. - When any of those jobs begins executing, all other replicas will be -cancelled. - \subsection{Avoiding job submission penalties} In many applications, the overhead of job submission through commonly available mechanisms, such as through GRAM into an LRM, can dominate the execution time. In these situations, it is helpful to combine a -number of Swift level component program executions into a single GRAM/LRM -submission. +number of Swift level component program executions into a single +GRAM/LRM submission. Swift offers two approaches: \emph{clustering} and \emph{coasters}. Clustering constructs job submissions containing a number of component program @@ -836,17 +819,17 @@ In practical usage, the automatic deployment and execution of these components is difficult on a number sites. -However, ahead-of-time clustering can be less efficient than using coasters. -Coasters can react much more dynamically to changing numbers of available -worker nodes. -When clustering, some estimation of how available remote node count -and of job duration must be made to decide on a sensible cluster size. -Incorrectly estimating this can (in one direction) result in an insufficient -number of worker nodes being used, with excessive serialisation; or (in -the other direction) result in an excessive number of GRAM job submissions. -Coaster workers can be queued and executed before all -of the work that they will eventually execute is known, so can get more -work done per GRAM job submission, and get it done earlier. +However, ahead-of-time clustering can be less efficient than using +coasters. Coasters can react much more dynamically to changing numbers +of available worker nodes. When clustering, some estimation of how +available remote node count and of job duration must be made to decide +on a sensible cluster size. Incorrectly estimating this can (in one +direction) result in an insufficient number of worker nodes being +used, with excessive serialisation; or (in the other direction) result +in an excessive number of GRAM job submissions. Coaster workers can be +queued and executed before all of the work that they will eventually +execute is known, so can get more work done per GRAM job submission, +and get it done earlier. Job status for coasters is reported as jobs start and end; for clustered jobs, job completion status is only known at the end of the entire cluster. This @@ -854,35 +837,24 @@ jobs) is delayed (in the worst case, activity dependant on the first job in a cluster must wait for all of the jobs to run). -TODO: graphs or citation or something here giving numbers? two sets of -stats - the reliability of coasters vs clusters on a range of sites -(eg a bunch of osg engage and TG sites). also could do: diagram showing -clustering/coasters vs some plain gram submission - CNARI app with 3s -jobs shows this in an extreme way. Either show such a graph here or in -CNARI app section. +%%% Move this to Future Work -TODO: comment on how this relates to Falkon +%% \subsection{Avoiding filesystem inefficiency} -TODO: vocabulary in this section - talks about 'GRAM' - is there a nicer -way to talk about the 'underlying submission system, that underlies -coasters and clustering' ? +%% When running a large number of jobs on a site at once, access to the +%% shared filesystem on that site can be a bottleneck. -\subsection{Avoiding filesystem inefficiency} +%% On large systems, the shared file system is commonly provided by +%% GPFS\cite{GPFS}. This can scale well but when used na\"ively can +%% exhibit pathological behaviour. Early versions of Swift triggered this +%% behaviour by targetting too much file system activity at a single +%% working directory, so that GPFS lock contention came to dominate execution +%% time. -When running a large number of jobs on a site at once, access to the -shared filesystem on that site can be a bottleneck. +%% TODO more... - work done on arranging things in fs; presumably can +%% forwardref collective IO section if that gets written, or include that +%% entire section here? -On large systems, the shared file system is commonly provided by -GPFS\cite{GPFS}. This can scale well but when used na\"ively can -exhibit pathological behaviour. Early versions of Swift triggered this -behaviour by targetting too much file system activity at a single -working directory, so that GPFS lock contention came to dominate execution -time. - -TODO more... - work done on arranging things in fs; presumably can -forwardref collective IO section if that gets written, or include that -entire section here? - \subsection{Features to support use on dynamic resources} Using Swift to submit to a large number of sites poses a number of @@ -1521,7 +1493,7 @@ %\end{thebibliography} \bibliographystyle{elsarticle-num} -\bibliography{paper} % for ACM SIGS style +\bibliography{paper,Wozniak} % for ACM SIGS style \verb|$Id$| From noreply at svn.ci.uchicago.edu Tue Jun 15 16:49:55 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 16:49:55 -0500 (CDT) Subject: [Swift-commit] r3368 - text/parco10submission Message-ID: <20100615214955.7D2CD9CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 16:49:55 -0500 (Tue, 15 Jun 2010) New Revision: 3368 Modified: text/parco10submission/paper.tex Log: Move FS notes Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 21:44:50 UTC (rev 3367) +++ text/parco10submission/paper.tex 2010-06-15 21:49:55 UTC (rev 3368) @@ -837,24 +837,6 @@ jobs) is delayed (in the worst case, activity dependant on the first job in a cluster must wait for all of the jobs to run). -%%% Move this to Future Work - -%% \subsection{Avoiding filesystem inefficiency} - -%% When running a large number of jobs on a site at once, access to the -%% shared filesystem on that site can be a bottleneck. - -%% On large systems, the shared file system is commonly provided by -%% GPFS\cite{GPFS}. This can scale well but when used na\"ively can -%% exhibit pathological behaviour. Early versions of Swift triggered this -%% behaviour by targetting too much file system activity at a single -%% working directory, so that GPFS lock contention came to dominate execution -%% time. - -%% TODO more... - work done on arranging things in fs; presumably can -%% forwardref collective IO section if that gets written, or include that -%% entire section here? - \subsection{Features to support use on dynamic resources} Using Swift to submit to a large number of sites poses a number of @@ -1225,11 +1207,14 @@ \subsection{Collective IO} - TODO: I don't actually grasp what is going on here (as in wtf is -collective IO?), let alone what is going on that is "interesting", and -would need a much better understanding of before I could write about it -(let alone write about it in relation to Swift) +%% On large systems, the shared file system is commonly provided by +%% GPFS\cite{GPFS}. This can scale well but when used na\"ively can +%% exhibit pathological behaviour. Early versions of Swift triggered this +%% behaviour by targetting too much file system activity at a single +%% working directory, so that GPFS lock contention came to dominate execution +%% time. + \subsection{Language development} TODO: describe how it becomes more functional as time passes, as is From noreply at svn.ci.uchicago.edu Tue Jun 15 17:26:14 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 17:26:14 -0500 (CDT) Subject: [Swift-commit] r3369 - text/parco10submission Message-ID: <20100615222614.35D7E9CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 17:26:13 -0500 (Tue, 15 Jun 2010) New Revision: 3369 Modified: text/parco10submission/Wozniak.bib text/parco10submission/paper.tex Log: Fixed references: Compile script output is now clean Modified: text/parco10submission/Wozniak.bib =================================================================== --- text/parco10submission/Wozniak.bib 2010-06-15 21:49:55 UTC (rev 3368) +++ text/parco10submission/Wozniak.bib 2010-06-15 22:26:13 UTC (rev 3369) @@ -1,5 +1,3 @@ -% This file was created with JabRef 2.5. -% Encoding: Cp1252 @STRING{CCGRID = {Proc. CCGrid}} @@ -5916,7 +5914,7 @@ timestamp = {2010/04/02} } - at comment{jabref-meta: selector_publisher:1;} +{jabref-meta: selector_publisher:1;} @INBOOK{P2P-Grids_2003, author = {Geoffrey Fox and Dennis Gannon and Sung-Hoon Ko and @@ -6014,7 +6012,7 @@ number = 17, year={2004} } - at comment{publisher={Oxford Univ Press}} +{publisher={Oxford Univ Press}} @INPROCEEDINGS{GridBatch_2008, title = {{GridBatch}: Cloud Computing for Large-Scale @@ -6042,7 +6040,7 @@ number = 5, year = 2009 } - at comment{Pages: 541--551} +{Pages: 541--551} @ARTICLE{OSG_2007, author = {Ruth Pordes and Don Petravick and Bill Kramer and @@ -6066,7 +6064,7 @@ number = 1833, year = 2005, } - at comment{pages 1715-1728} +{pages 1715-1728} @MISC{RFC:4918_WebDAV_2007, author = {{IETF} {Network Working Group}}, @@ -6084,7 +6082,7 @@ volume = 2, year = 2002, } - at comment{pages={43}} +{pages={43}} @ARTICLE{ParaView_2001, title = {Large-Scale Data Visualization Using @@ -6096,7 +6094,7 @@ number = 4, year = {2001}, } - at comment{pages={34--41}, +{pages={34--41}, publisher={Published by the IEEE Computer Society}} @MISC{I2U2_WWW, @@ -6155,11 +6153,55 @@ year = 2010 } - at comment{jabref-meta: selector_publisher:} + at MISC{ImageMagick_WWW, + title = {{ImageMagick} Project Web Site}, + url = {http://www.imagemagick.org}, + year = 2010 +} - at comment{jabref-meta: selector_author:} + at INPROCEEDINGS{Strand_1989, + title = {{Strand}: {A} practical parallel programming language}, + author = {Foster, I. and Taylor, S.}, + booktitle = {Proc. North American Conference on Logic Programming}, + year = 1989 +} - at comment{jabref-meta: selector_journal:} + at ARTICLE{PCN_1993, + title = {Productive parallel programming: {T}he {PCN} approach}, + author = {Foster, I. and Olson, R. and Tuecke, S.}, + journal = {Scientific Programming}, + volume = 1, + number = 1, + year = 1992, +} +{pages = {51--66}, publisher={IOS Press}} - at comment{jabref-meta: selector_keywords:} + at article{Sawzall_2005, + title = {Interpreting the data: {P}arallel analysis with {Sawzall}}, + author = {Pike, R. and Dorward, S. and Griesemer, R. and Quinlan, S.}, + journal = {Scientific Programming}, + volume = {13}, + number = {4}, + year = 2005, +} +{pages = {277--298}, publisher={IOS Press}} + at BOOK{BPEL_2006, + author = {Juric, Matjaz B.}, + title = {Business Process Execution Language for Web Services}, + year = 2006, +} +isbn = 1904811817, publisher = {Packt Publishing} + + at INBOOK{Sedna_2007, + chapter = {{S}edna: {A} {BPEL}-Based Environment for + Visual Scientific Workflow Modeling}, + title = {Workflows for e-{S}cience}, + publisher = {Springer}, + year = 2007, + author = {Bruno Wassermann and Wolfgang Emmerich and Ben + Butchart and Nick Cameron and Liang Chen and Jignesh + Patel} +} +pages = {18}, + Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 21:49:55 UTC (rev 3368) +++ text/parco10submission/paper.tex 2010-06-15 22:26:13 UTC (rev 3369) @@ -103,10 +103,10 @@ component programs on grids and other parallel platforms, providing automated site selection, data management, and reliability. -This paper goes into greater depth than prior publications -\cite{SWIFTSWF08,SWIFTNNN} in describing the Swift language, how its -implementation handles large-scale and distributed execution -environments, and its contribution to distributed parallel computing. +This paper goes into greater depth than prior +publications~\cite{Swift_2007} in describing the Swift language, how +its implementation handles large-scale and distributed execution +environments, and its contribution to distributed and parallel computing. \subsection{Rationale} @@ -265,7 +265,7 @@ components. For example, the following example lists a procedure which makes use -of the ImageMagick\cite{ImageMagick} convert command to rotate a +of the ImageMagick~\cite{ImageMagick_WWW} convert command to rotate a supplied image by a specified angle: \begin{verbatim} @@ -874,6 +874,7 @@ change. \section{Applications} +\label{Applications} TODO: two or three applications in brief. discuss both the application behaviour in relation to Swift, but underlying grid behaviour in @@ -1004,8 +1005,10 @@ alignment is done by the program reslice. Note that variable yR, being the output of the first step and the input of the second step, defines the data dependencies between the two steps. The pipeline is -illustrated in the center of Figure~\ref{FMRIFigure2}, while in figure -\ref{FMRIgraph} we show the expanded graph for a 20-volume run. Each +illustrated in the center of % Figure~\ref{FMRIFigure2}, +while in figure +% \ref{FMRIgraph} +we show the expanded graph for a 20-volume run. Each volume comprises an image file and a header file, so there are a total of 40 input files and 40 output files. We can also apply the same procedure to a run containing hundreds or thousands of volumes. @@ -1121,6 +1124,7 @@ \end{verbatim} \section{Future work} +\label{Future} \subsection{Automatic characterisation of site and application behaviour} @@ -1141,9 +1145,9 @@ GRAM and LRM overhead. A resource provisioning system such as Falkon\cite{FALKON} or the -CoG\cite{COG} coaster mechanism developed for Swift can be used to -ameliorate this overhead, by incurring the allocation overhead once per -worker node. +CoG~\cite{CoG_2001} coaster mechanism developed for Swift can be used +to ameliorate this overhead, by incurring the allocation overhead once +per worker node. Both of these mechanisms can be plugged into Swift straightforwardly through the CoG provider API. @@ -1280,37 +1284,36 @@ The rational and motivation for scripting languages, the difference between programming and scripting, and the place of each in the scheme of applying computers to solving problems, has been -laid out previously~\cite{Ousterhout}. +laid out previously~\cite{Scripting_1998}. -Coordination languages and systems such as Linda\cite{LINDA}, -Strand\cite{STRAN} and PCN\cite{PCN} allow composition of +Coordination languages and systems such as Linda~\cite{LINDA}, +Strand~\cite{STRAND_1989} and PCN~\cite{PCN_1993} allow composition of distributed or parallel components, but usually require the components to be programmed in specific languages and linked with the systems; where we need to coordinate procedures that may already exist (e.g., legacy applications), were coded in various programming languages and run in different platforms and architectures. Linda defines a set of coordination primitives for concurrent agents to put and retrieve -tuples from a shared data space called a tuple space, which serves as the -medium for communication and coordination. Strand and PCN use -single-assignment variables\cite{singleassigment} as coordination mechanism. Like Linda, -Strand and PCN are data driven in the sense that the action of sending -and receiving data are decoupled, and processes execute only when data -are available. The Swift system uses similar mechanism called future -[16] for workflow evaluation and scheduling. +tuples from a shared data space called a tuple space, which serves as +the medium for communication and coordination. Strand and PCN use +single-assignment variables as coordination +mechanism. Like Linda, Strand and PCN are data driven in the sense +that the action of sending and receiving data are decoupled, and +processes execute only when data are available. -MapReduce\cite{MAPREDUCE} also provides a programming models and a runtime system -to support the processing of large scale datasets. The two key -functions \emph{map} and \emph{reduce} are borrowed from functional language: a -map function iterates over a set of items, performs a specific -operation on each of them and produces a new set of items, where a -reduce function performs aggregation on a set of items. The runtime -system automatically partitions input data and schedules the execution -of programs in a large cluster of commodity machines. The system is -made fault tolerant by checking worker nodes periodically and -reassigning failed jobs to other worker nodes. Sawzall\cite{sawzall} is an -interpreted language that builds on MapReduce and separates the -filtering and aggregation phases for more concise program -specification and better parallelization. +MapReduce~\cite{MapReduce_2004} also provides a programming models and +a runtime system to support the processing of large scale +datasets. The two key functions \emph{map} and \emph{reduce} are +borrowed from functional language: a map function iterates over a set +of items, performs a specific operation on each of them and produces a +new set of items, where a reduce function performs aggregation on a +set of items. The runtime system automatically partitions input data +and schedules the execution of programs in a large cluster of +commodity machines. The system is made fault tolerant by checking +worker nodes periodically and reassigning failed jobs to other worker +nodes. Sawzall\cite{Sawzall_2005} is an interpreted language that +builds on MapReduce and separates the filtering and aggregation phases +for more concise program specification and better parallelization. Swift and MapReduce/Sawzall share the same goals to providing a programming tool for the specification and execution of large parallel @@ -1342,35 +1345,35 @@ \end{itemize} -BPEL\cite{BPEL} is a Web Service-based standard that specifies how a set of -Web services interact to form a larger, composite Web Service. BPEL is -starting to be tested in scientific contexts\cite{BPELScience}. While BPEL can -transfer data as XML messages, for very large scale datasets, data -exchange must be handled via separate mechanisms. In BPEL 1.0 -specification, it does not have support for dataset -iterations. According to Emmerich et al, an application with -repetitive patterns on a collection of datasets could result in a BPEL -document of 200MB in size, and BPEL is cumbersome if not impossible to -write for computational scientists\cite{BPEL2}. Although BPEL can use XML -Schema to describe data types, it does not provide support for mapping -between a logical XML view and arbitrary physical representations. +BPEL~\cite{BPEL_2006} is a Web Service-based standard that specifies +how a set of Web services interact to form a larger, composite Web +Service. BPEL is starting to be tested in scientific contexts. While +BPEL can transfer data as XML messages, for very large scale datasets, +data exchange must be handled via separate mechanisms. In BPEL 1.0 +specification, it does not have support for dataset iterations. An +application with repetitive patterns on a collection of datasets could +result in large, repetitive BPEL documents~\cite{Sedna_2007}, and BPEL +is cumbersome if not impossible to write for computational +scientists. Although BPEL can use XML Schema to describe data types, +it does not provide support for mapping between a logical XML view and +arbitrary physical representations. -DAGMan\cite{DAGman} provides a workflow engine that manages Condor jobs -organized as directed acyclic graphs (DAGs) in which each edge -corresponds to an explicit task precedence. It has no knowledge of -data flow, and in distributed environment works best with a -higher-level, data-cognizant layer. It is based on static workflow +DAGMan~\cite{Condor_Experience_2004} provides a workflow engine that +manages Condor jobs organized as directed acyclic graphs (DAGs) in +which each edge corresponds to an explicit task precedence. It has no +knowledge of data flow, and in distributed environment works best with +a higher-level, data-cognizant layer. It is based on static workflow graphs and lacks dynamic features such as iteration or conditional execution, although these features are being researched. -Pegasus\cite{Pegasus} is primarily a set of DAG transformers. Pegasus planners -translate a workflow graph into a location specific DAGMan input file, -adding stages for data staging, inter-site transfer and data -registration. They can prune tasks for files that already exist, -select sites for jobs, and cluster jobs based on various -criteria. Pegasus performs graph transformation with the knowledge of -the whole workflow graph, while in Swift, the structure of a workflow -is constructed and expanded dynamically. +Pegasus~\cite{Pegasus_2005} is primarily a set of DAG +transformers. Pegasus planners translate a workflow graph into a +location specific DAGMan input file, adding stages for data staging, +inter-site transfer and data registration. They can prune tasks for +files that already exist, select sites for jobs, and cluster jobs +based on various criteria. Pegasus performs graph transformation with +the knowledge of the whole workflow graph, while in Swift, the +structure of a workflow is constructed and expanded dynamically. Swift integrates the CoG Karajan workflow engine. Karajan provides the libraries and primitives for job scheduling, data transfer, and Grid @@ -1380,6 +1383,7 @@ (via Falkon and CoG coasters) fast job execution. \section{Conclusion} +\label{Conclusion} Our experience reinforces the belief that Swift plays an important role in the family of programming languages. Ordinary scripting From noreply at svn.ci.uchicago.edu Tue Jun 15 19:43:13 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 19:43:13 -0500 (CDT) Subject: [Swift-commit] r3370 - text/parco10submission Message-ID: <20100616004313.C43B09CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 19:43:13 -0500 (Tue, 15 Jun 2010) New Revision: 3370 Modified: text/parco10submission/Wozniak.bib text/parco10submission/paper.tex Log: Additional citations, etc. Modified: text/parco10submission/Wozniak.bib =================================================================== --- text/parco10submission/Wozniak.bib 2010-06-15 22:26:13 UTC (rev 3369) +++ text/parco10submission/Wozniak.bib 2010-06-16 00:43:13 UTC (rev 3370) @@ -1521,8 +1521,8 @@ journal = {Lecture Notes in Computer Science}, year = {1998}, volume = {1459}, - url = {citeseer.ist.psu.edu/czajkowski97resource.html} } +url = {citeseer.ist.psu.edu/czajkowski97resource.html} @INPROCEEDINGS{Coallocation_1999, author = {Karl Czajkowski and Ian Foster and Carl Kesselman}, @@ -2909,9 +2909,10 @@ volume = {13}, number = {8-9}, comment = {vonLaszewski-final.bib}, - url = {http://www.mcs.anl.gov/~gregor/papers/vonLaszewski--cog-cpe-final.pdf} } +url = {http://www.mcs.anl.gov/~gregor/papers/vonLaszewski--cog-cpe-final.pdf} + @INPROCEEDINGS{las01pse, author = {Gregor von Laszewski and Ian Foster and Jarek Gawor and Peter Lane and Nell Rehn and Mike Russell}, @@ -6062,7 +6063,7 @@ journal = PTRS_A, volume = 363, number = 1833, - year = 2005, + year = 2005 } {pages 1715-1728} @@ -6205,3 +6206,19 @@ } pages = {18}, + at INPROCEEDINGS{SunConstellation_2008, + title = {Performance and Scalability Study of + Sun Constellation Cluster 'Ranger' + using Application-Based Benchmarks}, + author = {Byoung-Do Kim and John E. Cazes}, + booktitle = {Proc. TeraGrid}, + year = 2008 +} + + at INPROCEEDINGS{ReSS_2007, + author = {G. Garzoglio and T. Levshina and P. Mhashilkar and S. Timm}, + title = {{ReSS}: {A} Resource Selection Service for the + {O}pen {S}cience {G}rid}, + booktitle = {Proc. International Symposium of Grid Computing}, + year = 2007 +} Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-15 22:26:13 UTC (rev 3369) +++ text/parco10submission/paper.tex 2010-06-16 00:43:13 UTC (rev 3370) @@ -173,8 +173,8 @@ environments. Swift scripts can be tested on a single local workstation. The same script can then be executed on a cluster, one or more grids of clusters, and on large scale parallel supercomputers -such as the Sun Constellation (ref) or the IBM Blue Gene/P. (section -\ref{ExecutingSites}). Notable features include: +such as the Sun Constellation~\cite{SunConstellation_2008} +or the IBM Blue Gene/P~\cite{BGP_2008}. Notable features include: \item Automatic parallelization of program invocations, invoking programs that have no data dependencies in parallel; @@ -249,7 +249,7 @@ variable is stored in a single file named \verb|shane.jpeg| \begin{verbatim} - image photo <"shane.jpeg">; + image photo <"shane.jpeg">; \end{verbatim} Conceptually, a parallel can be drawn between Swift \emph{mapped} variables @@ -269,15 +269,15 @@ supplied image by a specified angle: \begin{verbatim} - app (image output) rotate(image input) { - convert "-rotate" angle @input @output; - } + app (image output) rotate(image input) { + convert "-rotate" angle @input @output; + } \end{verbatim} A procedure is invoked using a syntax similar to that of the C family: \begin{verbatim} - rotated = rotate(photo, 180); + rotated = rotate(photo, 180); \end{verbatim} While this looks like an assignment, the actual unix level execution @@ -291,7 +291,7 @@ which has no structure exposed to SwiftScript: \begin{verbatim} - type image; + type image; \end{verbatim} This does not indicate that the data is unstructured; but it indicates @@ -304,26 +304,26 @@ script: \begin{verbatim} - type image; - image photo <"shane.jpeg">; - image rotated <"rotated.jpeg">; + type image; + image photo <"shane.jpeg">; + image rotated <"rotated.jpeg">; - app (image output) rotate(image input, int angle) { - convert "-rotate" angle @input @output; - } + app (image output) rotate(image input, int angle) { + convert "-rotate" angle @input @output; + } - rotated = rotate(photo, 180); + rotated = rotate(photo, 180); \end{verbatim} This script can be invoked from the command line as: \begin{verbatim} - $ ls *.jpeg - shane.jpeg - $ swift example.swift - ... - $ ls *.jpeg - shane.jpeg rotated.jpeg + $ ls *.jpeg + shane.jpeg + $ swift example.swift + ... + $ ls *.jpeg + shane.jpeg rotated.jpeg \end{verbatim} This executes a single \verb|convert| command, hiding from the user features @@ -340,7 +340,7 @@ all files matching a particular unix glob pattern into an array: \begin{verbatim} - file frames[] ; + file frames[] ; \end{verbatim} The \verb|foreach| construct can be used to apply the same procedure @@ -380,14 +380,14 @@ In this fragment, execution of procedures \verb|p| and \verb|q| can happen in parallel: \begin{verbatim} - y=p(x); - z=q(x); + y=p(x); + z=q(x); \end{verbatim} whilst in this fragment, execution is serialised by the variable \verb|y|, with procedure \verb|p| executing before \verb|q|: \begin{verbatim} - y=p(x); - z=q(y); + y=p(x); + z=q(y); \end{verbatim} Arrays in SwiftScript are more generally @@ -404,13 +404,13 @@ Consider the script below: \begin{verbatim} - file a[]; - file b[]; - foreach v,i in a { - b[i] = p(v); - } - a[0] = r(); - a[1] = s(); + file a[]; + file b[]; + foreach v,i in a { + b[i] = p(v); + } + a[0] = r(); + a[1] = s(); \end{verbatim} Initially, the \verb|foreach| statement will have nothing to execute, as the array \verb|a| has not been assigned any values. The procedures @@ -430,15 +430,15 @@ of as a graph of calls to other procedures. \begin{verbatim} - (file output) process (file input) { - file intermediate; - intermediate = first(input); - output = second(intermediate); - } + (file output) process (file input) { + file intermediate; + intermediate = first(input); + output = second(intermediate); + } - file x <"x.txt">; - file y <"y.txt">; - y = process(x); + file x <"x.txt">; + file y <"y.txt">; + y = process(x); \end{verbatim} This will invoke two procedures, with an intermediate data file named @@ -448,14 +448,14 @@ procedures, not by any containing procedures. In this code block: \begin{verbatim} - (file a, file b) A() { - a = A1(); - b = A2(); - } - file x, y, s, t; - (x,y) = A(); - s = S(x); - t = S(y); + (file a, file b) A() { + a = A1(); + b = A2(); + } + file x, y, s, t; + (x,y) = A(); + s = S(x); + t = S(y); \end{verbatim} then a valid execution order is: \verb|A1 S(x) A2 S(y)|. The @@ -484,29 +484,29 @@ \emph{Complex types} may be defined using the \verb|type| keyword: \begin{verbatim} - type headerfile; - type voxelfile; - type volume { - headerfile h; - voxelfile v; - } + type headerfile; + type voxelfile; + type volume { + headerfile h; + voxelfile v; + } \end{verbatim} Members of a complex type can be accessed using the \verb|.| operator: - \begin{verbatim} - volume brain; - o = p(brain.h); - \end{verbatim} +\begin{verbatim} + volume brain; + o = p(brain.h); +\end{verbatim} Collections of files can be mapped to complex types using mappers, like for arrays. For example, the simple mapper used in this expression will map the files \verb|data.h| and \verb|data.v| to the variable members \verb|m.h| and \verb|m.v| respectively: - \begin{verbatim} - volume m ; - \end{verbatim} +\begin{verbatim} + volume m ; +\end{verbatim} Sometimes data may be stored in a form that does not fit with Swift's file-and-site model; for example, data might be stored in an RDBMS on some @@ -520,21 +520,21 @@ data storage and access methods to be plugged in to scripts. \begin{verbatim} - type file; + type file; - app (extern o) populateDatabase() { - populationProgram; - } + app (extern o) populateDatabase() { + populationProgram; + } - app (file o) analyseDatabase(extern i) { - analysisProgram @o; - } + app (file o) analyseDatabase(extern i) { + analysisProgram @o; + } - extern database; - file result <"results.txt">; + extern database; + file result <"results.txt">; - database = populateDatabase(); - result = analyseDatabase(database); + database = populateDatabase(); + result = analyseDatabase(database); \end{verbatim} Some external database is represented by the \verb|database| variable. The @@ -634,7 +634,7 @@ section N: \begin{verbatim} - app (file output) rotate(file input, int angle) + app (file output) rotate(file input, int angle) \end{verbatim} The procedure signature declares the inputs and outputs for this @@ -649,7 +649,7 @@ staged in for this parameter. \begin{verbatim} - convert "-rotate" angle @input @output; + convert "-rotate" angle @input @output; \end{verbatim} The body of the \verb|app| block defines the unix command-line that @@ -715,17 +715,17 @@ Sites are defined in the \emph{site catalog}, which contains descriptions of each site: - \begin{verbatim} - - - - - /home/benc/swifttest - +\begin{verbatim} + + + + + /home/benc/swifttest + - \end{verbatim} +\end{verbatim} This file may be constructed by hand or mechanically from some pre-existing database (such as a grid's existing discovery @@ -842,13 +842,14 @@ Using Swift to submit to a large number of sites poses a number of practical challenges that are not encountered when running on a small number of sites. These challenges are seen when comparing execution on -the TeraGrid\cite{TERAGRID} with execution on the Open Science -Grid\cite{OSG}. The set of sites which may be used is large and +the TeraGrid~\cite{TeraGrid_2005} with execution on the Open Science +Grid (OSG)~\cite{OSG_2007}. The set of sites which may be used is large and changing. It is impractical to maintain a site catalog by hand in this situation. In collaboration with the OSG Engagement group, Swift was -interfaced to ReSS\cite{ReSS} so that the site catalog is generated -from that information system. This provides a very straightforward way -to generate a large catalog of 'fairly likely to work' sites. +interfaced to ReSS\cite{ReSS_2007} so that the site catalog is +generated from that information system. This provides a very +straightforward way to generate a large catalog of sites that are +likely to work. Having discovered those sites, two significant problems remain: the quality of those sites varies wildly; and user applications are not @@ -1126,146 +1127,113 @@ \section{Future work} \label{Future} -\subsection{Automatic characterisation of site and application behaviour} +Swift is an actively developed project. Current directions in Swift +development focus on improvements for short-running tasks, massively +parallel resources, data access mechanisms, site management, and +provenance. -TODO The replication mechanism is the beginning of this - but there is scope -for a bunch more - eg. better statistics about jobs, sites, split by -job name; realisation that certain types of jobs fail on a particular site, -etc. Note that this can fit into the engine without needing language -changes. (ties into site selection section too?) - - \subsection{Provisioning for more granular applications} -TODO: maybe this is already covered in the 'executing efficiently' section? +In some applications (such as CNARI\cite{CNARI}) the execution time +for a program is very short. In such circumstances, execution time can +become dominated by GRAM and LRM overhead. A resource provisioning +system such as Falkon\cite{FALKON} or the CoG~\cite{CoG_2001} coaster +mechanism developed for Swift can be used to ameliorate this overhead, +by incurring the allocation overhead once per worker node. Both of +these mechanisms can be plugged into Swift straightforwardly through +the CoG provider API. -In some applications (such as CNARI\cite{CNARI}) the execution time for a program -is very short (compared to what is traditionally expected for a grid -job). In such circumstances, execution time can become dominated by -GRAM and LRM overhead. +\subsection{Swift on thousands of cores} -A resource provisioning system such as Falkon\cite{FALKON} or the -CoG~\cite{CoG_2001} coaster mechanism developed for Swift can be used -to ameliorate this overhead, by incurring the allocation overhead once -per worker node. +Systems such as the Sun Constellation~\cite{SunConstellation_2008} or +IBM BlueGene/P~\cite{BGP_2008} have hundreds of thousands of cores, +and systems with millions of cores are planned. Scheduling and +managing tasks running at this scale is a challenging problem in +itself and relies of the rapid submission of tasks as noted +above. Swift applications currently do run on these systems by +scheduling Coasters workers using the standard job submission +techniques and employing an internal IP network. -Both of these mechanisms can be plugged into Swift straightforwardly -through the CoG provider API. +\subsection{Filesystem access optimizations} Similarly, some applications deal with files that are uncomfortably small for GridFTP (on the order of tens of bytes). For this, a lightweight file access mechanism provided by CoG Coasters can be -substituted for GridFTP. +substituted for GridFTP. When running on HPC resources, the thousands +of small accesses to the filesystem may create a bottleneck. To +approach this problem, we have investigated application needs and +initiated a set of Collective Data Management (CDM)~\cite{CDM_2009} +primitives to mitigate these problems. \subsection{Provenance} \label{Provenance} Swift produces log information regarding the provenance of its output files. -In an existing development module, this information can be imported into -relational and XML databases for later querying. +In an existing development module, this information can be imported +into relational and XML databases for later querying. Providing an +efficient query mechanism for such provenance data is an area of +ongoing research; whilst many queries can be easily answered +efficiently by a suitably indexed relational or XML database, the lack +of support for efficient transitive queries can make some common +queries involving either transitivity over time (such as 'find all +data derived from input file X') or over dataset containment (such as +'find all procedures which took an input containing the file F') +expensive to evaluate and awkward to express. -Providing an efficient query mechanism for such provenance data is an area -of ongoing research; whilst many queries can be easily answered efficiently -by a suitably indexed relational or XML database, the lack of support for -efficient transitive queries can make some common queries involving -either transitivity over time (such as 'find all data derived from input -file X') or over dataset containment (such as 'find all procedures which -took an input containing the file F') expensive to evaluate and awkward -to express. +%% \subsection{GUI workflow design tools} -TODO reference the VDC from VDS\cite{VDS} +%% In contrast to a text-oriented programming language like SwiftScript, +%% some scientists prefer to design simple programs using GUI design tools. +%% An example of this is the LONI Pipeline tool\cite{LONIPIPELINE}. Preliminary +%% investigations suggest that scientific workflows designed with that tool +%% can be straightforwardly compiled into SwiftScript and thus benefit from +%% Swift's execution system. -\subsection{GUI workflow design tools} +%% \subsection{Site selection research} -In contrast to a text-oriented programming language like SwiftScript, -some scientists prefer to design simple programs using GUI design tools. -An example of this is the LONI Pipeline tool\cite{LONIPIPELINE}. Preliminary -investigations suggest that scientific workflows designed with that tool -can be straightforwardly compiled into SwiftScript and thus benefit from -Swift's execution system. +%% TODO: data affinity between sites, based on our knowledge of what is +%% already staged on each site -\subsection{The IBM BG/P} +%% TODO: Is anything else interesting happening here in our group? -TODO: hopefully Ioan will write some section that is interesting in this -area. +%% \subsection{Language development} - TODO: interesting from Swift perspective: +%% TODO: describe how it becomes more functional as time passes, as is +%% becoming more popular. can ref mapreduce here\cite{MAPREDUCE} eg map +%% operator extension - looks like foreach; and maybe some other +%% popular-ish functional language eg F\# - 1. getting things running at all: use of BG/P for loosely coupled -tasks, which is a somewhat untraditional use of such a machine; lack of -antive LRM that is anywhere near appropraite for that (pset granularity -only, and only running one executable) - falkon as solution to this; +%% TODO type-inference - implemented by Milena but not put into +%% production. -decomposition of large machine into multiple Swift sites, with 1 pset = -1 Swift site - how some of the problems related to running on multisite -grids are sort-of similar to problems within the BG/P - hierarchical -scheduling of of jobs and hierarchical management of data. +%% TODO libraries/code reuse - some traditional language stuff there but +%% orthogonal to that is how to express transformation catalog (which ties +%% together language declarations with site declarations, and hence makes +%% procedures vs sites not completely orthogonal) - 2. performance -\subsection{Site selection research} +%% TODO unification of procedures and functions (a historical artifact), +%% and possibly of mappers - TODO: data affinity between sites, based on our knowledge of what is -already staged on each site +%% \subsection{Debugging} - TODO: Is anything else interesting happening here in our group? +%% TODO: debugging of distributed system - can have a non-futures section +%% on what is available now - logprocessing module, as well as +%% mentioning CEDPS\cite{CEDPS} as somewhat promising(?) for the future. -\subsection{Collective IO} +%% \subsection{Swift as a library} +%% Could existing programs execute Swift calls through a library +%% approach? The answer to this is certainly ``yes''. (?) -%% On large systems, the shared file system is commonly provided by -%% GPFS\cite{GPFS}. This can scale well but when used na\"ively can -%% exhibit pathological behaviour. Early versions of Swift triggered this -%% behaviour by targetting too much file system activity at a single -%% working directory, so that GPFS lock contention came to dominate execution -%% time. +%% \subsection{Swift library / source code management} +%% (TODO benc: unclear what is meant by this paragraph. it was originally in the +%% introduction, but as it appears to talk about something which does not (yet?) +%% exist, then it is probably better being absorbed into the future section) -\subsection{Language development} +%% Swift does not yet have a notion of libraries. Swift programs execute as +%% if all procedures called in the script are present in a single logical +%% source file and are thus passed to the Swift virtual machine all at once. - TODO: describe how it becomes more functional as time passes, as is -becoming more popular. can ref mapreduce here\cite{MAPREDUCE} eg map -operator extension - looks like foreach; and maybe some other -popular-ish functional language eg F\# - - TODO type-inference - implemented by Milena but not put into -production. - - TODO libraries/code reuse - some traditional language stuff there but -orthogonal to that is how to express transformation catalog (which ties -together language declarations with site declarations, and hence makes -procedures vs sites not completely orthogonal) - - TODO unification of procedures and functions (a historical artifact), - and possibly of mappers - -\subsection{Debugging} - - TODO: debugging of distributed system - can have a non-futures section -on what is available now - logprocessing module, as well as -mentioning CEDPS\cite{CEDPS} as somewhat promising(?) for the future. - -\subsection{Swift as a library} -Could existing programs execute Swift calls through a library -approach? The answer to this is certainly ``yes''. (?) - -\subsection{Swift library / source code management} - - -(TODO benc: unclear what is meant by this paragraph. it was originally in the -introduction, but as it appears to talk about something which does not (yet?) -exist, then it is probably better being absorbed into the future section) - - Swift does not yet have a notion of libraries. Swift programs execute as -if all procedures called in the script are present in a single logical -source file and are thus passed to the Swift virtual machine all at once. - - - -\section{Implementation status} - - TODO: list how Swift can be downloaded here. describe development group? - -active development group; releases roughly every 2 months. - \section{Comparison to Other Systems} \label{Related} @@ -1323,25 +1291,27 @@ \begin{itemize} -\item Programming model: MapReduce only supports key-value pairs as input -or output datasets and two types of computation functions - map and -reduce; where Swift provides a type system and allows the definition -of complex data structures and arbitrary computation procedures. +\item Programming model: MapReduce only supports key-value pairs as + input or output datasets and two types of computation functions - + map and reduce; where Swift provides a type system and allows the + definition of complex data structures and arbitrary computation + procedures. -\item Data format: in MapReduce, input and output data can be of several -different formats, and it is also possible to define new data -sources. Swift provides a more flexible mapping mechanism to map -between logical data structures and various physical representations. +\item Data format: in MapReduce, input and output data can be of + several different formats, and it is also possible to define new + data sources. Swift provides a more flexible mapping mechanism to + map between logical data structures and various physical + representations. \item Dataset partition: Swift does not automatically partition input -datasets. Instead, datasets can be organized in structures, and -individual items in a dataset can be transferred accordingly along -with computations. + datasets. Instead, datasets can be organized in structures, and + individual items in a dataset can be transferred accordingly along + with computations. \item Execution environment: MapReduce schedules computations within a -cluster with shared Google File System, where Swift schedules across -distributed Grid sites that may span multiple administrative domains, -and deals with security and resource usage policy issues. + cluster with shared Goojgle File System, where Swift schedules across + distributed Grid sites that may span multiple administrative + domains, and deals with security and resource usage policy issues. \end{itemize} @@ -1375,12 +1345,12 @@ the knowledge of the whole workflow graph, while in Swift, the structure of a workflow is constructed and expanded dynamically. -Swift integrates the CoG Karajan workflow engine. Karajan provides the -libraries and primitives for job scheduling, data transfer, and Grid -job submission; Swift adds support for high-level abstract -specification of large parallel computations, data abstraction, and -workflow restart, reliable execution over multiple Grid sites, and -(via Falkon and CoG coasters) fast job execution. +Swift integrates with the CoG Karajan workflow engine. Karajan +provides the libraries and primitives for job scheduling, data +transfer, and Grid job submission; Swift adds support for high-level +abstract specification of large parallel computations, data +abstraction, and workflow restart, reliable execution over multiple +Grid sites, and (via Falkon and CoG coasters) fast job execution. \section{Conclusion} \label{Conclusion} @@ -1394,97 +1364,67 @@ code that manipulates data directly. They contain instead the "data flow recipes" and input/output specifications of each program invocation such that the location and environment transparency goals -can be implemented automatically by the Swift environment. +can be implemented automatically by the Swift environment. This simple +model has demonstrated many successes as a tool for scientific +computing. -TODO: Polish conclusion - was pasted here from intro and doesnt fit yet. +\section{Implementation status} +Swift is an open source project available at: \\ +{\tt http://www.ci.uchicago.edu/swift}. + \section{Acknowledgments} TODO: NSF/DOE grant acknowledgements -\section{TODO} +%% \section{TODO} - Reference Swift as a follow-on project to VDL in VDS; how does XDTM fit - into this? Is it of any interest other than as part of the - project history? And is history of this project interesting? maybe so... +%% Reference Swift as a follow-on project to VDL in VDS; how does XDTM fit +%% into this? Is it of any interest other than as part of the +%% project history? And is history of this project interesting? maybe so... - Acknowledgement of all developers names? +%% Acknowledgement of all developers names? - info logs and kickstart logs +%% info logs and kickstart logs - relation to: karajan, falkon, java cog, globus needs more clearly -defining; specifically for CoG, need to declare that it builds on top -of that; relation to old old VDL2 papers (eg that Yong was on...) +%% relation to: karajan, falkon, java cog, globus needs more clearly +%% defining; specifically for CoG, need to declare that it builds on top +%% of that; relation to old old VDL2 papers (eg that Yong was on...) - some dude (it was Xu Du) did some stuff about BOINC - that could have a one-liner -if it was actually written up somewhere; otherwise ignore. -Not likely that it was written up but I will ask. (mike) +%% some dude (it was Xu Du) did some stuff about BOINC - that could have a one-liner +%% if it was actually written up somewhere; otherwise ignore. +%% Not likely that it was written up but I will ask. (mike) - performance: application tuning graphs; provisioning and coaster -file access (give one-liner numbers for those); file system layout -tuning to accomodate GPFS - can make before/after one-liners for that -quite easily +%% performance: application tuning graphs; provisioning and coaster +%% file access (give one-liner numbers for those); file system layout +%% tuning to accomodate GPFS - can make before/after one-liners for that +%% quite easily -people who have thus far contributed directly to this written paper: -me, wilde +%% people who have thus far contributed directly to this written paper: +%% me, wilde -people who have thus far contributed to the Swift work described here: -Swift core: me, wilde, hategan, milena, yong, ian -CNARI: skenny -OSG: mats -Site selection: xi li, ragib -App installed: Zhengxiong Howe -Falkon: Ioan, zhao -Collective IO: allan, zhao, ioan +%% people who have thus far contributed to the Swift work described here: +%% Swift core: me, wilde, hategan, milena, yong, ian +%% CNARI: skenny +%% OSG: mats +%% Site selection: xi li, ragib +%% App installed: Zhengxiong Howe +%% Falkon: Ioan, zhao +%% Collective IO: allan, zhao, ioan -Users: Uri, Kubal, Hocky, UMD Student, ... +%% Users: Uri, Kubal, Hocky, UMD Student, ... -more explicit mapper description should include table of all/common mappers +%% more explicit mapper description should include table of all/common mappers -ramble about separation of parallel execution concerns and dataflow spec -in the same way that gph has a separation of same concerns... compare contrast +%% ramble about separation of parallel execution concerns and dataflow spec +%% in the same way that gph has a separation of same concerns... compare contrast -%\begin{thebibliography}{99} -%\bibitem{CEDPS} - cedps -% -%\bibitem{MONOTONICPHD} - the phd on distributed language that defines the term 'monotonic' - although maybe it comes from elsewhere -% -%\bibitem{GLOBUS} globus toolkit -% -%\bibitem{GRAM} gram -% -%\bibitem{GridFTP} - gridftp -% -%\bibitem{TCP} - tcp -% -%\bibitem{CNARI} - something about the cnari paper -% -%\bibitem{FALKON} - falkon -% -%\bibitem{COG} - cog -% -%\bibitem{VDS} - VDS -% -%\bibitem{LONIPIPELINE} - loni pipeline -% -%\bibitem{MAPREDUCE} - mapreduce -% -%\bibitem{TERAGRID} - teragrid -% -%\bibitem{OSG} - open science grid -% -%\bibitem{ReSS} - ress -% -%\bibitem{GPFS} - GPFS -% -%\end{thebibliography} - \bibliographystyle{elsarticle-num} \bibliography{paper,Wozniak} % for ACM SIGS style -\verb|$Id$| +% \verb|$Id$| \end{document} From noreply at svn.ci.uchicago.edu Tue Jun 15 20:27:21 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 20:27:21 -0500 (CDT) Subject: [Swift-commit] r3371 - text/parco10submission/img Message-ID: <20100616012721.055DF9CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 20:27:20 -0500 (Tue, 15 Jun 2010) New Revision: 3371 Modified: text/parco10submission/img/figures.odg text/parco10submission/img/swift-model.pdf Log: Use Karajan syntax Modified: text/parco10submission/img/figures.odg =================================================================== (Binary files differ) Modified: text/parco10submission/img/swift-model.pdf =================================================================== (Binary files differ) From noreply at svn.ci.uchicago.edu Tue Jun 15 20:27:30 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 20:27:30 -0500 (CDT) Subject: [Swift-commit] r3372 - text/parco10submission Message-ID: <20100616012730.00BD49CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 20:27:29 -0500 (Tue, 15 Jun 2010) New Revision: 3372 Modified: text/parco10submission/Wozniak.bib text/parco10submission/paper.tex Log: More fixes Modified: text/parco10submission/Wozniak.bib =================================================================== --- text/parco10submission/Wozniak.bib 2010-06-16 01:27:20 UTC (rev 3371) +++ text/parco10submission/Wozniak.bib 2010-06-16 01:27:29 UTC (rev 3372) @@ -6222,3 +6222,14 @@ booktitle = {Proc. International Symposium of Grid Computing}, year = 2007 } + + at ARTICLE{CNARI_2007, + title = {Accelerating Medical Research using the {S}wift + Workflow System}, + author = {Stef-Praun, T. and Clifford, B. and Foster, I. and + Hasson, U. and Hategan, M. and Small, S. L. and + Wilde, M. and Zhao, Y.}, + journal = {Studies in Health Technology and Informatics}, + volume = 126, + year = 2007 +} \ No newline at end of file Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-16 01:27:20 UTC (rev 3371) +++ text/parco10submission/paper.tex 2010-06-16 01:27:29 UTC (rev 3372) @@ -428,7 +428,9 @@ invoke other SwiftScript procedures rather than a component program. The basic structure of a composite procedure may be thought of as a graph of calls to other procedures. - +The following script will invoke two procedures, with an intermediate +data file named anonymously connecting the \verb|first| and +\verb|second| procedures: \begin{verbatim} (file output) process (file input) { file intermediate; @@ -441,12 +443,8 @@ y = process(x); \end{verbatim} -This will invoke two procedures, with an intermediate data file named -anonymously connecting the \verb|first| and \verb|second| procedures. - Ordering of execution is generally determined by execution of \verb|app| procedures, not by any containing procedures. In this code block: - \begin{verbatim} (file a, file b) A() { a = A1(); @@ -457,11 +455,10 @@ s = S(x); t = S(y); \end{verbatim} +a valid execution order is: \verb|A1 S(x) A2 S(y)|. The compound +procedure \verb|A| does not have to have fully completed for its +return values to be used by subsequent statements. -then a valid execution order is: \verb|A1 S(x) A2 S(y)|. The -compound procedure \verb|A| does not have to have fully completed -for its return values to be used by subsequent statements. - \subsection{More about types} \label{LanguageTypes} @@ -1134,14 +1131,14 @@ \subsection{Provisioning for more granular applications} -In some applications (such as CNARI\cite{CNARI}) the execution time -for a program is very short. In such circumstances, execution time can -become dominated by GRAM and LRM overhead. A resource provisioning -system such as Falkon\cite{FALKON} or the CoG~\cite{CoG_2001} coaster -mechanism developed for Swift can be used to ameliorate this overhead, -by incurring the allocation overhead once per worker node. Both of -these mechanisms can be plugged into Swift straightforwardly through -the CoG provider API. +In some applications the execution time for a program is very +short. In such circumstances, execution time can become dominated by +GRAM and LRM overhead. A resource provisioning system such as +Falkon~\cite{Falkon_2008} or the CoG~\cite{CoG_2001} coaster mechanism +developed for Swift can be used to ameliorate this overhead, by +incurring the allocation overhead once per worker node. Both of these +mechanisms can be plugged into Swift straightforwardly through the CoG +provider API. \subsection{Swift on thousands of cores} From noreply at svn.ci.uchicago.edu Tue Jun 15 21:37:05 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 21:37:05 -0500 (CDT) Subject: [Swift-commit] r3373 - text/parco10submission Message-ID: <20100616023705.99C029CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 21:37:05 -0500 (Tue, 15 Jun 2010) New Revision: 3373 Modified: text/parco10submission/paper.tex Log: Misc fixes after read-through Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-16 01:27:29 UTC (rev 3372) +++ text/parco10submission/paper.tex 2010-06-16 02:37:05 UTC (rev 3373) @@ -16,7 +16,7 @@ % draft - contact benc at ci.uchicago.edu -% ACM styleguide says max 3 ayuthors here, rest in acknowledgements +% ACM styleguide says max 3 authors here, rest in acknowledgements %% \numberofauthors{4} @@ -30,8 +30,6 @@ %% \alignauthor Mihael Hategan \\ %% \affaddr{University of Chicago Computation Institute}\\ %% \and -%% \alignauthor Sarah Kenny \\ -%% \affaddr{University of Chicago Computation Institute}\\ %% \alignauthor Michael Wilde \\ %% \affaddr{University of Chicago Computation Institute}\\ %% \affaddr{Argonne National Laboratory} \\ @@ -40,7 +38,6 @@ \author{Ben Clifford} \author{Ian Foster} \author{Mihael Hategan} -\author{Sarah Kenny} \author{Justin M. Wozniak} \author{Michael Wilde} @@ -50,16 +47,15 @@ Scientists, engineers and business analysts often work by performing a massive number of runs of domain-specific programs, typically coupled -loosely by large collections of file-based data. Distributed and +loosely by large collections of file-based data. Distributed and parallel computing resources provide a powerful way to get more of this type of work done faster, but using such resources imposes additional complexities. Swift reduces these complexities with a scripting language for composing ordinary application programs (serial or parallel) into more powerful parallel applications that can be executed on distributed resources. We present the language, details of -the implementation, application examples, measurements, and ongoing -research, focusing on its importance as a distributed computing -paradigm. +the implementation, application examples, and ongoing research, +focusing on its importance as a distributed computing paradigm. \end{abstract} @@ -75,23 +71,23 @@ application programs (serial or parallel) into distributed, parallelized applications for execution on grids and supercomputers with tens to hundreds of thousands of processors. It is intended to -serve as a higher level framework for composing parallel pipelines of -other programs and scripts, sitting above (and utilizing) existing -scripting languages and applications. Swift scripts express the -execution of programs to produce datasets using a dataflow-driven -specification. The application programs executed by a Swift script can -be binary executables or can be scripts written in any other scripting -language. +serve as a higher level framework for composing the interaction of +concurrently executing programs and scripts, sitting above (and +utilizing) existing scripting languages and applications. Swift +scripts express the execution of programs to produce datasets using a +dataflow-driven specification. The application programs executed by a +Swift script can be binary executables or can be scripts written in +any other scripting language. Swift's contribution and primary value is that it provides a simple, minimal set of language constructs to specifiy how applications are glued together at large scale in a simple compact form, while keeping the language simple and elegant, and minimizing any overlap with the tasks that existing scripting langauges do well. Swift regularizes and -abstracts both the notion of data and process for distributed parallel -execution of application programs. Applications expressed in Swift -are location-independent and automatically parallelized by exploiting -available concurrency in the given dataflow . +abstracts notions of external data and processes for distributed +parallel execution of application programs. Applications expressed in +Swift are location-independent and automatically parallelized by +exploiting available concurrency in the given dataflow. Swift can execute scripts that perform tens of thousands of program invocations on highly parallel resources, and handle the unreliable @@ -108,7 +104,7 @@ its implementation handles large-scale and distributed execution environments, and its contribution to distributed and parallel computing. -\subsection{Rationale} +\subsection*{Rationale} The emergence of large-scale production computing infrastructure such as clusters, grids and high-performance computing (HPC), and the @@ -120,28 +116,28 @@ large scale. While many application needs involve the execution of a single large -and perhaps message-passing parallel app, many others require the -coupling or orchestration of large numbers of application invocations: -either many invocations of the same app, or many invocations of -sequences and patterns of several apps. In this model, existing apps -become like functions in programming, and users typically need to -execute many of them. Scaling up requires the distribution of such -workloads among many computers (``resources''), and hence a ``grid'' -approach. Even if a single large parallel resource suffices, users -won't always have access to the same supercomputer cluster: hence the -need to utilize whatever resource happened to be available or -economical at the moment when they need to perform intensive -computation - without continued reprogramming or adjustment of scripts. +message-passing parallel program, many others require the coupling or +orchestration of large numbers of application invocations: either many +invocations of the same program, or many invocations of sequences and +patterns of several programs. In this model, existing apps become like +functions in programming, and users typically need to execute many of +them. Scaling up requires the distribution of such workloads among +many computers (``resources''), and hence a ``grid'' approach. Even if +a single large parallel resource suffices, users will not always have +access to the same supercomputer cluster: hence the need to utilize +whatever resource happened to be available or economical at the moment +when they need to perform intensive computation - without continued +reprogramming or adjustment of scripts. We claim that the missing feature in current scripting languages is sufficient specification and encapsulation of inputs to, and outputs from, a given application, such that an execution environment could automatically make remote execution transparent. Without this, achieving location transparancy and automated parallel execution is -not feasible. Swift adds to scripting what RPC adds to programming: -by formalizing the inputs and outputs of -``applications-as-procedures'', it provides a way to make the remote - -and hence parallel - execution of applications fairly transparent. +not feasible. Swift adds to scripting what remote procedure calls +(RPC) add to programming: by formalizing the inputs and outputs of +applications-as-procedures, it provides a way to make the remote +execution of applications fairly transparent. The remainder of this paper is organized as follows. In Section~\ref{Language} we present the major concepts and language @@ -161,20 +157,21 @@ invocation of ``ordinary programs'' - technically, POSIX {\tt exec()} operations - in a manner that explicitly specifies the files and other arguments that are the inputs and outputs of each program -invocation. This formal but simple model (elaborated in section -\ref{LanguageEnvironment}) enables Swift to provide several critical -features not provided by - nor readily implemented in - existing -scripting languages like Perl, Python, or shells: +invocation. This formal but simple model enables Swift to provide +several critical characteristics not provided by - nor readily +implemented in - existing scripting languages like Perl, Python, or +shells. Notable features include: \begin{itemize} \item Location transparent execution: automatically selecting a -location for each program invocation and managing diverse execution -environments. Swift scripts can be tested on a single local -workstation. The same script can then be executed on a cluster, one or -more grids of clusters, and on large scale parallel supercomputers -such as the Sun Constellation~\cite{SunConstellation_2008} -or the IBM Blue Gene/P~\cite{BGP_2008}. Notable features include: + location for each program invocation and managing diverse execution + environments. Swift scripts can be tested on a single local + workstation. The same script can then be executed on a cluster, one + or more grids of clusters, and on large scale parallel + supercomputers such as the Sun + Constellation~\cite{SunConstellation_2008} or the IBM Blue + Gene/P~\cite{BGP_2008}. \item Automatic parallelization of program invocations, invoking programs that have no data dependencies in parallel; @@ -280,7 +277,7 @@ rotated = rotate(photo, 180); \end{verbatim} -While this looks like an assignment, the actual unix level execution +While this looks like an assignment, the actual level execution consists of invoking the command line specified in the \verb|app| declaration, with variables on the left of the assignment bound to the output parameters, and variables to the right of the procedure @@ -337,7 +334,7 @@ can be mapped to a collection of files, one element per file, by using a different form of mapping expression. For example, the \verb|filesys_mapper| maps -all files matching a particular unix glob pattern into an array: +all files matching a particular glob pattern into an array: \begin{verbatim} file frames[] ; @@ -345,7 +342,6 @@ The \verb|foreach| construct can be used to apply the same procedure call(s) to each element of an array: - \begin{verbatim} foreach f,ix in frames { output[ix] = rotate(frames, 180); @@ -353,14 +349,12 @@ \end{verbatim} Sequential iteration can be expressed using the \verb|iterate| construct: - \begin{verbatim} step[0] = initialCondition(); iterate ix { step[ix] = simulate(step[ix-1]); } \end{verbatim} - This fragment will initialise the 0-th element of the \verb|step| array to some initial condition, and then repeatedly run the \verb|simulate| procedure, using each execution's outputs as input to the next step. @@ -373,7 +367,7 @@ parameters have been assigned values. As a result of such execution, more variables may become assigned, possibly allowing further parts of the script to execute. In this way, scripts are implicitly -concurrent. Aside from serialisation implied by these dataflow +concurrent. Aside from serialization implied by these dataflow dependencies, execution of component programs can proceed without synchronization in time. @@ -390,17 +384,16 @@ z=q(y); \end{verbatim} -Arrays in SwiftScript are more generally -\emph{monotonic}\cite{MONOTONICPHD}; that is, knowledge about the -content of an array increases during execution, but cannot otherwise -change. Once a value for a particular element is known, then it cannot -change. Eventually, all values for an array are known, and that array -is regarded as \emph{closed}. Statements which deal with the array as -a whole will wait for the array to be closed before executing (thus, a -closed array is the equivalent of a non-array type being -assigned). However, a \verb|foreach| statement will apply its body to -elements of an array as they become known. It will not wait until the -array is closed. +Arrays in SwiftScript are more generally \emph{monotonic}, that is, +knowledge about the content of an array increases during execution, +but cannot otherwise change. Once a value for a particular element is +known, then it cannot change. Eventually, all values for an array are +known, and that array is regarded as \emph{closed}. Statements which +deal with the array as a whole will wait for the array to be closed +before executing (thus, a closed array is the equivalent of a +non-array type being assigned). However, a \verb|foreach| statement +will apply its body to elements of an array as they become known. It +will not wait until the array is closed. Consider the script below: \begin{verbatim} @@ -444,7 +437,7 @@ \end{verbatim} Ordering of execution is generally determined by execution of \verb|app| -procedures, not by any containing procedures. In this code block: +procedures, not by any containing procedures. In this script segment: \begin{verbatim} (file a, file b) A() { a = A1(); @@ -545,7 +538,7 @@ \subsection{Swift mappers} -The Swift programmer manipulate in-memory variables which are +The Swift programmer manipulates in-memory variables which are \emph{mapped} to files in the filesystem. This is coordinated by an extensible set of components called \emph{mappers}. Swift contains a number of built-in mappers. A representative sample of these is listed @@ -627,13 +620,11 @@ dependency), or that their working directories will or will not be cleaned up after execution. -Consider the \verb|app| declaration for the \verb|rotate| procedure in -section N: - +Consider the following \verb|app| declaration for the \verb|rotate| +procedure: \begin{verbatim} app (file output) rotate(file input, int angle) \end{verbatim} - The procedure signature declares the inputs and outputs for this procedure. As in many other programming languages, this defines the type signatures and names of parameters; this also defines which files @@ -649,19 +640,19 @@ convert "-rotate" angle @input @output; \end{verbatim} -The body of the \verb|app| block defines the unix command-line that -will be executed when this procedure is invoked. The first token (in -this case \verb|convert|) defines a \emph{transformation name} which -is used to determine the unix executable name. Subsequent expressions, -separated by spaces, define the command-line arguments for that -executable: \verb|"-rotate"| is a string literal; angle specifies the -value of the angle parameter; the syntax \verb|@variable| evaluates to -the filename of the supplied variable, thus \verb|@input| and -\verb|@output| evaluate to the filenames of the corresponding -parameters. It should be noted that it is possible to take the -filename of \verb|output| even though it is a return parameter; -although the value of that variable has not yet been computed, the -filename where that value will go is already known. +The body of the \verb|app| block defines the command-line that will be +executed when this procedure is invoked. The first token (in this case +\verb|convert|) defines a \emph{transformation name} which is used to +determine the executable name. Subsequent expressions, separated by +spaces, define the command-line arguments for that executable: +\verb|"-rotate"| is a string literal; angle specifies the value of the +angle parameter; the syntax \verb|@variable| evaluates to the filename +of the supplied variable, thus \verb|@input| and \verb|@output| +evaluate to the filenames of the corresponding parameters. It should +be noted that it is possible to take the filename of \verb|output| +even though it is a return parameter; although the value of that +variable has not yet been computed, the filename where that value will +go is already available from the mapper. \section{Execution} \label{Execution} @@ -669,14 +660,12 @@ Swift is implemented by compiling to a Karajan program, which provides several benefits. A notable benefit visible to users is that of providers. This enables the Swift execution model to be extended by -adding new data providers and job execution providers. This is -explained in more detail in section \ref{ExecutingSites}: Executing on -a remote site. +adding new data providers and job execution providers. \subsection{Executing on a remote site} \label{ExecutingSites} -With the above restrictions, execution of a unix program on a remote +With the above restrictions, execution of a program on a remote site is straightforward. The Swift runtime must prepare a remote working directory for each job with appropriate input files staged in; then it must execute the program; and then it must stage the output @@ -698,7 +687,7 @@ \emph{file access provider}. Two common implementations of this model are execution on the local -system; and execution on one or more remote sites in a grid managed by +system, and execution on one or more remote sites in a grid managed by Globus~\cite{Globus_Metacomputing_1997} software. In the former case, a local scratch file system (such as {\tt /var/tmp}) may be used as the accessible file system; execution of programs is achieved by @@ -760,12 +749,12 @@ \emph{restarts} and \emph{replication}. In the simplest form of error handling in Swift, if a component -program fails then Swift will make a second (or subsequent) attempt to -run the program. In contrast to many other systems, retry here is at -the level of the SwiftScript procedure invocation, and includes -completely reattempting site selection, stage in, execution and stage -out. This provides a natural way to deal with many transient errors, -such as temporary network loss, and with many changes in site state. +program fails, Swift will make a subsequent attempt to run the +program. In contrast to many other systems, retry here is at the level +of the SwiftScript procedure invocation, and includes completely +reattempting site selection, stage in, execution and stage out. This +provides a natural way to deal with many transient errors, such as +temporary network loss, and with many changes in site state. Some errors are more permanent in nature; for example, a component program may have a bug that causes it to always fail given a @@ -810,15 +799,16 @@ component program executions are bound to worker jobs (and thus to worker nodes) as those nodes become available. -Clustering requires very little additional support on the remote site; -coasters require an active component on the head node (in Java) and -on the worker nodes (in PERL) as well as additional network connectivity. -In practical usage, the automatic deployment and execution of these -components is difficult on a number sites. +Clustering requires very little additional support on the remote site. +However, the coasters framework requires an active component on the +head node (in Java) and on the worker nodes (in PERL) as well as +additional network connectivity within a site. In practical usage, the +automatic deployment and execution of these components can be +difficult on a number sites. -However, ahead-of-time clustering can be less efficient than using -coasters. Coasters can react much more dynamically to changing numbers -of available worker nodes. When clustering, some estimation of how +However, clustering can be less efficient than using coasters. +Coasters can react much more dynamically to changing numbers of +available worker nodes. When clustering, some estimation of how available remote node count and of job duration must be made to decide on a sensible cluster size. Incorrectly estimating this can (in one direction) result in an insufficient number of worker nodes being @@ -828,11 +818,12 @@ execute is known, so can get more work done per GRAM job submission, and get it done earlier. -Job status for coasters is reported as jobs start and end; for clustered jobs, -job completion status is only known at the end of the entire cluster. This -means that subsequent activity (stageouts, and the beginning of dependant -jobs) is delayed (in the worst case, activity dependant on the first job -in a cluster must wait for all of the jobs to run). +Job status for coasters is reported as jobs start and end; for +clustered jobs, job completion status is only known at the end of the +entire cluster. This means that subsequent activity (stageouts, and +the beginning of dependant jobs) is delayed (in the worst case, +activity dependant on the first job in a cluster must wait for all of +the jobs to run). \subsection{Features to support use on dynamic resources} @@ -840,24 +831,24 @@ practical challenges that are not encountered when running on a small number of sites. These challenges are seen when comparing execution on the TeraGrid~\cite{TeraGrid_2005} with execution on the Open Science -Grid (OSG)~\cite{OSG_2007}. The set of sites which may be used is large and -changing. It is impractical to maintain a site catalog by hand in this -situation. In collaboration with the OSG Engagement group, Swift was -interfaced to ReSS\cite{ReSS_2007} so that the site catalog is -generated from that information system. This provides a very -straightforward way to generate a large catalog of sites that are +Grid (OSG)~\cite{OSG_2007}. The set of sites which may be used is +large and changing. It is impractical to maintain a site catalog by +hand in this situation. In collaboration with the OSG Engagement +group, Swift was interfaced to ReSS~\cite{ReSS_2007} so that the site +catalog is generated from that information system. This provides a +very straightforward way to generate a large catalog of sites that are likely to work. Having discovered those sites, two significant problems remain: the quality of those sites varies wildly; and user applications are not installed on those sites. Individual OSG sites exhibit extremely different behaviour, both with respect to other sites at the same -time, and with respect to themselves at other times. This is hard to -describe statically. Firstly, the load that a particular site will -bear varies over time. Secondly, some sites fail in unusual fashion. -Swift's site scoring mechanism deals well with this in the majority of -cases. However, continued discovery of unusual failure modes drives -the implementation of ever more fault tolerance mechanisms. +time, and with respect to themselves at other times. The load that a +particular site will bear varies over time and some sites fail in an +unusual manner. Swift's site scoring mechanism deals well with this in +the majority of cases. However, continued discovery of unusual failure +modes drives the implementation of ever additional fault tolerance +mechanisms. When running jobs on dynamically discovered sites, it is likely that component programs are not installed on those sites. OSG Engagement @@ -1121,116 +1112,6 @@ doall(p); \end{verbatim} -\section{Future work} -\label{Future} - -Swift is an actively developed project. Current directions in Swift -development focus on improvements for short-running tasks, massively -parallel resources, data access mechanisms, site management, and -provenance. - -\subsection{Provisioning for more granular applications} - -In some applications the execution time for a program is very -short. In such circumstances, execution time can become dominated by -GRAM and LRM overhead. A resource provisioning system such as -Falkon~\cite{Falkon_2008} or the CoG~\cite{CoG_2001} coaster mechanism -developed for Swift can be used to ameliorate this overhead, by -incurring the allocation overhead once per worker node. Both of these -mechanisms can be plugged into Swift straightforwardly through the CoG -provider API. - -\subsection{Swift on thousands of cores} - -Systems such as the Sun Constellation~\cite{SunConstellation_2008} or -IBM BlueGene/P~\cite{BGP_2008} have hundreds of thousands of cores, -and systems with millions of cores are planned. Scheduling and -managing tasks running at this scale is a challenging problem in -itself and relies of the rapid submission of tasks as noted -above. Swift applications currently do run on these systems by -scheduling Coasters workers using the standard job submission -techniques and employing an internal IP network. - -\subsection{Filesystem access optimizations} - -Similarly, some applications deal with files that are uncomfortably -small for GridFTP (on the order of tens of bytes). For this, a -lightweight file access mechanism provided by CoG Coasters can be -substituted for GridFTP. When running on HPC resources, the thousands -of small accesses to the filesystem may create a bottleneck. To -approach this problem, we have investigated application needs and -initiated a set of Collective Data Management (CDM)~\cite{CDM_2009} -primitives to mitigate these problems. - -\subsection{Provenance} -\label{Provenance} - -Swift produces log information regarding the provenance of its output files. -In an existing development module, this information can be imported -into relational and XML databases for later querying. Providing an -efficient query mechanism for such provenance data is an area of -ongoing research; whilst many queries can be easily answered -efficiently by a suitably indexed relational or XML database, the lack -of support for efficient transitive queries can make some common -queries involving either transitivity over time (such as 'find all -data derived from input file X') or over dataset containment (such as -'find all procedures which took an input containing the file F') -expensive to evaluate and awkward to express. - -%% \subsection{GUI workflow design tools} - -%% In contrast to a text-oriented programming language like SwiftScript, -%% some scientists prefer to design simple programs using GUI design tools. -%% An example of this is the LONI Pipeline tool\cite{LONIPIPELINE}. Preliminary -%% investigations suggest that scientific workflows designed with that tool -%% can be straightforwardly compiled into SwiftScript and thus benefit from -%% Swift's execution system. - -%% \subsection{Site selection research} - -%% TODO: data affinity between sites, based on our knowledge of what is -%% already staged on each site - -%% TODO: Is anything else interesting happening here in our group? - -%% \subsection{Language development} - -%% TODO: describe how it becomes more functional as time passes, as is -%% becoming more popular. can ref mapreduce here\cite{MAPREDUCE} eg map -%% operator extension - looks like foreach; and maybe some other -%% popular-ish functional language eg F\# - -%% TODO type-inference - implemented by Milena but not put into -%% production. - -%% TODO libraries/code reuse - some traditional language stuff there but -%% orthogonal to that is how to express transformation catalog (which ties -%% together language declarations with site declarations, and hence makes -%% procedures vs sites not completely orthogonal) - -%% TODO unification of procedures and functions (a historical artifact), -%% and possibly of mappers - -%% \subsection{Debugging} - -%% TODO: debugging of distributed system - can have a non-futures section -%% on what is available now - logprocessing module, as well as -%% mentioning CEDPS\cite{CEDPS} as somewhat promising(?) for the future. - -%% \subsection{Swift as a library} -%% Could existing programs execute Swift calls through a library -%% approach? The answer to this is certainly ``yes''. (?) - -%% \subsection{Swift library / source code management} - -%% (TODO benc: unclear what is meant by this paragraph. it was originally in the -%% introduction, but as it appears to talk about something which does not (yet?) -%% exist, then it is probably better being absorbed into the future section) - -%% Swift does not yet have a notion of libraries. Swift programs execute as -%% if all procedures called in the script are present in a single logical -%% source file and are thus passed to the Swift virtual machine all at once. - \section{Comparison to Other Systems} \label{Related} @@ -1255,13 +1136,13 @@ Strand~\cite{STRAND_1989} and PCN~\cite{PCN_1993} allow composition of distributed or parallel components, but usually require the components to be programmed in specific languages and linked with the systems; -where we need to coordinate procedures that may already exist (e.g., -legacy applications), were coded in various programming languages and -run in different platforms and architectures. Linda defines a set of -coordination primitives for concurrent agents to put and retrieve -tuples from a shared data space called a tuple space, which serves as -the medium for communication and coordination. Strand and PCN use -single-assignment variables as coordination +contrarily, we need to coordinate procedures that may already exist +(e.g., legacy applications), were coded in various programming +languages and run in different platforms and architectures. Linda +defines a set of coordination primitives for concurrent agents to put +and retrieve tuples from a shared data space called a tuple space, +which serves as the medium for communication and coordination. Strand +and PCN use single-assignment variables as coordination mechanism. Like Linda, Strand and PCN are data driven in the sense that the action of sending and receiving data are decoupled, and processes execute only when data are available. @@ -1276,7 +1157,7 @@ and schedules the execution of programs in a large cluster of commodity machines. The system is made fault tolerant by checking worker nodes periodically and reassigning failed jobs to other worker -nodes. Sawzall\cite{Sawzall_2005} is an interpreted language that +nodes. Sawzall~\cite{Sawzall_2005} is an interpreted language that builds on MapReduce and separates the filtering and aggregation phases for more concise program specification and better parallelization. @@ -1290,7 +1171,7 @@ \item Programming model: MapReduce only supports key-value pairs as input or output datasets and two types of computation functions - - map and reduce; where Swift provides a type system and allows the + map and reduce; Swift provides a type system and allows the definition of complex data structures and arbitrary computation procedures. @@ -1306,7 +1187,7 @@ with computations. \item Execution environment: MapReduce schedules computations within a - cluster with shared Goojgle File System, where Swift schedules across + cluster with shared Google File System, where Swift schedules across distributed Grid sites that may span multiple administrative domains, and deals with security and resource usage policy issues. @@ -1317,7 +1198,7 @@ Service. BPEL is starting to be tested in scientific contexts. While BPEL can transfer data as XML messages, for very large scale datasets, data exchange must be handled via separate mechanisms. In BPEL 1.0 -specification, it does not have support for dataset iterations. An +specification there is no support for dataset iterations. An application with repetitive patterns on a collection of datasets could result in large, repetitive BPEL documents~\cite{Sedna_2007}, and BPEL is cumbersome if not impossible to write for computational @@ -1349,6 +1230,115 @@ abstraction, and workflow restart, reliable execution over multiple Grid sites, and (via Falkon and CoG coasters) fast job execution. +\section{Future work} +\label{Future} + +Swift is an actively developed project. Current directions in Swift +development focus on improvements for short-running tasks, massively +parallel resources, data access mechanisms, site management, and +provenance. + +\subsection{Provisioning for tasks of finer granularity} + +In some applications the execution time for a program is very +short. In such circumstances, execution time can become dominated by +GRAM and LRM overhead. A resource provisioning system such as +Falkon~\cite{Falkon_2008} or the CoG coaster mechanism developed for +Swift can be used to ameliorate this overhead, by incurring the +allocation overhead once per worker node. Both of these mechanisms can +be plugged into Swift straightforwardly through the CoG provider API. + +\subsection{Scripting on thousands of cores} + +Systems such as the Sun Constellation~\cite{SunConstellation_2008} or +IBM BlueGene/P~\cite{BGP_2008} have hundreds of thousands of cores, +and systems with millions of cores are planned. Scheduling and +managing tasks running at this scale is a challenging problem in +itself and relies of the rapid submission of tasks as noted +above. Swift applications currently do run on these systems by +scheduling Coasters workers using the standard job submission +techniques and employing an internal IP network. + +\subsection{Filesystem access optimizations} + +Similarly, some applications deal with files that are uncomfortably +small for GridFTP (on the order of tens of bytes). For this, a +lightweight file access mechanism provided by CoG Coasters can be +substituted for GridFTP. When running on HPC resources, the thousands +of small accesses to the filesystem may create a bottleneck. To +approach this problem, we have investigated application needs and +initiated a set of Collective Data Management (CDM)~\cite{CDM_2009} +primitives to mitigate these problems. + +\subsection{Provenance} +\label{Provenance} + +Swift produces log information regarding the provenance of its output files. +In an existing development module, this information can be imported +into relational and XML databases for later querying. Providing an +efficient query mechanism for such provenance data is an area of +ongoing research; whilst many queries can be easily answered +efficiently by a suitably indexed relational or XML database, the lack +of support for efficient transitive queries can make some common +queries involving either transitivity over time (such as 'find all +data derived from input file X') or over dataset containment (such as +'find all procedures which took an input containing the file F') +expensive to evaluate and awkward to express. + +%% \subsection{GUI workflow design tools} + +%% In contrast to a text-oriented programming language like SwiftScript, +%% some scientists prefer to design simple programs using GUI design tools. +%% An example of this is the LONI Pipeline tool\cite{LONIPIPELINE}. Preliminary +%% investigations suggest that scientific workflows designed with that tool +%% can be straightforwardly compiled into SwiftScript and thus benefit from +%% Swift's execution system. + +%% \subsection{Site selection research} + +%% TODO: data affinity between sites, based on our knowledge of what is +%% already staged on each site + +%% TODO: Is anything else interesting happening here in our group? + +%% \subsection{Language development} + +%% TODO: describe how it becomes more functional as time passes, as is +%% becoming more popular. can ref mapreduce here\cite{MAPREDUCE} eg map +%% operator extension - looks like foreach; and maybe some other +%% popular-ish functional language eg F\# + +%% TODO type-inference - implemented by Milena but not put into +%% production. + +%% TODO libraries/code reuse - some traditional language stuff there but +%% orthogonal to that is how to express transformation catalog (which ties +%% together language declarations with site declarations, and hence makes +%% procedures vs sites not completely orthogonal) + +%% TODO unification of procedures and functions (a historical artifact), +%% and possibly of mappers + +%% \subsection{Debugging} + +%% TODO: debugging of distributed system - can have a non-futures section +%% on what is available now - logprocessing module, as well as +%% mentioning CEDPS\cite{CEDPS} as somewhat promising(?) for the future. + +%% \subsection{Swift as a library} +%% Could existing programs execute Swift calls through a library +%% approach? The answer to this is certainly ``yes''. (?) + +%% \subsection{Swift library / source code management} + +%% (TODO benc: unclear what is meant by this paragraph. it was originally in the +%% introduction, but as it appears to talk about something which does not (yet?) +%% exist, then it is probably better being absorbed into the future section) + +%% Swift does not yet have a notion of libraries. Swift programs execute as +%% if all procedures called in the script are present in a single logical +%% source file and are thus passed to the Swift virtual machine all at once. + \section{Conclusion} \label{Conclusion} @@ -1358,8 +1348,8 @@ contain rich operators, primitives, and libraries for large classes of useful operations such as string, math, internet, and file operations. In contrast, Swift scripts typically contain very little -code that manipulates data directly. They contain instead the "data -flow recipes" and input/output specifications of each program +code that manipulates data directly. They contain instead the ``data +flow recipes'' and input/output specifications of each program invocation such that the location and environment transparency goals can be implemented automatically by the Swift environment. This simple model has demonstrated many successes as a tool for scientific @@ -1424,4 +1414,3 @@ % \verb|$Id$| \end{document} - From noreply at svn.ci.uchicago.edu Tue Jun 15 21:39:54 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 21:39:54 -0500 (CDT) Subject: [Swift-commit] r3374 - text/parco10submission Message-ID: <20100616023954.2F5F49CD2E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-15 21:39:54 -0500 (Tue, 15 Jun 2010) New Revision: 3374 Modified: text/parco10submission/paper.tex Log: Grants and ANL note Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-16 02:37:05 UTC (rev 3373) +++ text/parco10submission/paper.tex 2010-06-16 02:39:54 UTC (rev 3374) @@ -1362,7 +1362,11 @@ \section{Acknowledgments} -TODO: NSF/DOE grant acknowledgements +This research is supported in part by NSF grants OCI-721939 and +OCI-0944332, and the U.S. Department of Energy under contract +DE-AC02-06CH11357. Computing resources were provided by the Argonne +Leadership Computing Facility, TeraGrid, the Open Science Grid, the +Petascale Active Data Store, and Amazon Web Services. %% \section{TODO} @@ -1413,4 +1417,15 @@ % \verb|$Id$| +\newpage + +The submitted manuscript has been created by UChicago Argonne, LLC, +Operator of Argonne National Laboratory ("Argonne"). Argonne, a +U.S. Department of Energy Office of Science laboratory, is operated +under Contract No. DE-AC02-06CH11357. The U.S. Government retains for +itself, and others acting on its behalf, a paid-up nonexclusive, +irrevocable worldwide license in said article to reproduce, prepare +derivative works, distribute copies to the public, and perform +publicly and display publicly, by or on behalf of the Government. + \end{document} From noreply at svn.ci.uchicago.edu Tue Jun 15 23:47:39 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 15 Jun 2010 23:47:39 -0500 (CDT) Subject: [Swift-commit] r3375 - text/parco10submission Message-ID: <20100616044739.C13C49CD34@vm-125-59.ci.uchicago.edu> Author: wilde Date: 2010-06-15 23:47:39 -0500 (Tue, 15 Jun 2010) New Revision: 3375 Modified: text/parco10submission/paper.tex Log: adjusted authors. reduce verbatim font. edits to Examples section. Added MODIS example. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-16 02:39:54 UTC (rev 3374) +++ text/parco10submission/paper.tex 2010-06-16 04:47:39 UTC (rev 3375) @@ -4,7 +4,14 @@ \usepackage{graphicx} \journal{Parallel Computing} +\makeatletter +\g at addto@macro\@verbatim\small +\makeatother +\makeatletter +\g at addto@macro\@verbatim\small +\makeatother + \begin{document} % \bibliographystyle{unsrt} % initial temp bib style for editing @@ -35,14 +42,18 @@ %% \affaddr{Argonne National Laboratory} \\ %% } -\author{Ben Clifford} \author{Ian Foster} \author{Mihael Hategan} \author{Justin M. Wozniak} \author{Michael Wilde} -\address{Computation Institute, University of Chicago} +\address{Mathematics and Computer Science Division, Argonne National + Laboratory, Computation Institute, University of Chicago} +\author{Ben Clifford} + +\address{Computation Institute, University of Chicago (at time of writing) } + \begin{abstract} Scientists, engineers and business analysts often work by performing a @@ -865,19 +876,22 @@ \section{Applications} \label{Applications} -TODO: two or three applications in brief. discuss both the application -behaviour in relation to Swift, but underlying grid behaviour in -relation to Swift +% TODO: two or three applications in brief. discuss both the application +% behaviour in relation to Swift, but underlying grid behaviour in +% relation to Swift -One app: CNARI + TeraGrid - small jobs (3s), many of them. +% One app: CNARI + TeraGrid - small jobs (3s), many of them. -Another app: Rosetta on OSG? OSG was designed with a focus on -heterogeneity between sites. Large number of sites; automatic site file -selection; and automatic app deployment there. +% Another app: Rosetta on OSG? OSG was designed with a focus on +% heterogeneity between sites. Large number of sites; automatic site file +% selection; and automatic app deployment there. +We describe in this section a few representative Swift applications +from various diverse disciplines. + \subsection{BLAST Application Example} -The following is notes from the Wiki by Allan: needs much refinement, adding here as a placeholder. +% The following is notes from the Wiki by Allan: needs much refinement, adding here as a placeholder. \begin{verbatim} type database; @@ -885,41 +899,44 @@ type output; type error; -(output out, error err) blastall(query i, database db) { - app { - blastall "-p" "blastp" "-F" "F" "-d" @filename(db) "-i" - at filename(i) "-v" "300" "-b" "300" "-m8" "-o" @filename(out) -stderr=@filename(err); - } +app (output out, error err) blastall(query i, database db) { + blastall "-p" "blastp" "-F" "F" + "-d" @filename(db) "-i" @filename(i) + "-v" "300" "-b" "300" "-m8" + "-o" @filename(out) stderr=@filename(err); } -database pir ; +database pir ; + +query i <"test.in">; output out <"test.out">; -query i <"test.in">; -error err <"test.err">; +error err <"test.err">; + (out,err) = blastall(i, pir); \end{verbatim} -The trick here is that blastall reads takes the prefix name of the database files that it will read (.phr, .seq and .pin files). -So i made a dummy file called ``{\tt UNIPROT\_for\_blast\_14.0.seq}'' to satisfy the data dependency . So here is the final list of my files: +The application {\tt \small blastall} expects the prefix of the database files that it will read (.phr, .seq and .pin files). +This example employs a dummy file called {\tt \small + UNIPROT.14.0.seq} to satisfy the data dependency. When executed, +the Swift script processes the following input directory {\tt\small /ci/pir}: \begin{verbatim} --rw-r--r-- 1 aespinosa ci-users 0 Nov 15 13:49 UNIPROT_for_blast_14.0.seq --rw-r--r-- 1 aespinosa ci-users 204106872 Oct 20 16:50 UNIPROT_for_blast_14.0.seq.00.phr --rw-r--r-- 1 aespinosa ci-users 23001752 Oct 20 16:50 UNIPROT_for_blast_14.0.seq.00.pin --rw-r--r-- 1 aespinosa ci-users 999999669 Oct 20 16:51 UNIPROT_for_blast_14.0.seq.00.psq --rw-r--r-- 1 aespinosa ci-users 233680738 Oct 20 16:51 UNIPROT_for_blast_14.0.seq.01.phr --rw-r--r-- 1 aespinosa ci-users 26330312 Oct 20 16:51 UNIPROT_for_blast_14.0.seq.01.pin --rw-r--r-- 1 aespinosa ci-users 999999864 Oct 20 16:52 UNIPROT_for_blast_14.0.seq.01.psq --rw-r--r-- 1 aespinosa ci-users 21034886 Oct 20 16:52 UNIPROT_for_blast_14.0.seq.02.phr --rw-r--r-- 1 aespinosa ci-users 2370216 Oct 20 16:52 UNIPROT_for_blast_14.0.seq.02.pin --rw-r--r-- 1 aespinosa ci-users 103755125 Oct 20 16:52 UNIPROT_for_blast_14.0.seq.02.psq --rw-r--r-- 1 aespinosa ci-users 208 Oct 20 16:52 UNIPROT_for_blast_14.0.seq.pal +-rw-r--r-- 1 ben ci 0 Nov 15 13:49 UNIPROT.14.0.seq +-rw-r--r-- 1 ben ci 204106872 Oct 20 16:50 UNIPROT.14.0.seq.00.phr +-rw-r--r-- 1 ben ci 23001752 Oct 20 16:50 UNIPROT.14.0.seq.00.pin +-rw-r--r-- 1 ben ci 999999669 Oct 20 16:51 UNIPROT.14.0.seq.00.psq +-rw-r--r-- 1 ben ci 233680738 Oct 20 16:51 UNIPROT.14.0.seq.01.phr +-rw-r--r-- 1 ben ci 26330312 Oct 20 16:51 UNIPROT.14.0.seq.01.pin +-rw-r--r-- 1 ben ci 999999864 Oct 20 16:52 UNIPROT.14.0.seq.01.psq +-rw-r--r-- 1 ben ci 21034886 Oct 20 16:52 UNIPROT.14.0.seq.02.phr +-rw-r--r-- 1 ben ci 2370216 Oct 20 16:52 UNIPROT.14.0.seq.02.pin +-rw-r--r-- 1 ben ci 103755125 Oct 20 16:52 UNIPROT.14.0.seq.02.psq +-rw-r--r-- 1 ben ci 208 Oct 20 16:52 UNIPROT.14.0.seq.pal \end{verbatim} -I looked at the dock6 documentation for OSG. It looks that it recommends to transfer the datafiles to OSG sites manually via globus-url-copy. By my understanding of how swift works, it should be able to transfer my local files to the selected sites. I have yet to try this and will look more on examples in the data management side of Swift. +% I looked at the dock6 documentation for OSG. It looks that it recommends to transfer the datafiles to OSG sites manually via globus-url-copy. By my understanding of how swift works, it should be able to transfer my local files to the selected sites. I have yet to try this and will look more on examples in the data management side of Swift. -Do you know other users who went in this approach? The documentation has only a few examples in managing data. I'll check the swift Wiki later and see what material we have and also post this email/ notes. +% Do you know other users who went in this approach? The documentation has only a few examples in managing data. I'll check the swift Wiki later and see what material we have and also post this email/ notes. \subsection{fMRI Application Example} @@ -1112,6 +1129,80 @@ doall(p); \end{verbatim} +\subsection{Satellite image data processing.} + +The last example (from a class project) processes data from a large dataset of files that categorize the Earth's surface, from the MODIS sensor instruments that orbit Earth on two NASA satellites of the Earth Observing System. +The Swift script analyzes the dataset to find the files with the ten +largest total urban area and then produce a new dataset with viewable +color images of these top-ten urban data "tiles". + +The dataset consists of 317 "tile" files that categorize every +250-meter square of non-ocean surface of the earth into one of 17 +"land cover" categories, such as water, ice, forest, barren and +urban. Each pixel of these TIFF-format data files has a value 0-16 +describing one 250-meter square of the earth's surface for a specific +point in time. Each tile file has 5M pixels, covering a region of 2400 +x 2400 250-meter squares, based on a specific map projection. + +The input datasets are not ``viewable'' images because of its pixel +values, thus requiring the color rendering step above. + +\begin{verbatim} +type file; +type imagefile; +type landuse; + +app (landuse output) getLandUse (imagefile input, int sortfield) +{ + getlanduse @input sortfield stdout=@output ; +} + +app (file output, file tilelist) analyzeLandUse (landuse input[], int usetype, int maxnum) +{ + analyzelanduse @output @tilelist usetype maxnum @filenames(input); +} + +app (imagefile output) colormodis (imagefile input) +{ + colormodis @input @output; +} + +imagefile geos[]; +landuse land[]; + +# Find the land use of each modis tile + +foreach g,i in geos { + land[i] = getLandUse(g,1); +} + +# Find the top 10 most urban tiles (by area) + +int UsageTypeURBAN=13; +file bigurban<"topurban.txt">; +file urbantiles<"urbantiles.txt">; +(bigurban, urbantiles) = analyzeLandUse(land, UsageTypeURBAN, 10); + +# Map the files to an array + +string urbanfilenames[] = readData(urbantiles); + +imagefile urbanfiles[] ; + +# Create a set of recolored images for just the urban tiles + +foreach uf, i in urbanfiles { + imagefile recoloredImage ; + recoloredImage = colormodis(uf); +} + +imagefile geos[]; + +\end{verbatim} + \section{Comparison to Other Systems} \label{Related} From noreply at svn.ci.uchicago.edu Wed Jun 16 00:09:02 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 16 Jun 2010 00:09:02 -0500 (CDT) Subject: [Swift-commit] r3376 - text/parco10submission/img Message-ID: <20100616050902.5BC489CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-16 00:09:02 -0500 (Wed, 16 Jun 2010) New Revision: 3376 Modified: text/parco10submission/img/figures.odg text/parco10submission/img/swift-model.pdf Log: Correction to SwiftScript in figure Modified: text/parco10submission/img/figures.odg =================================================================== (Binary files differ) Modified: text/parco10submission/img/swift-model.pdf =================================================================== (Binary files differ) From noreply at svn.ci.uchicago.edu Wed Jun 16 00:23:16 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 16 Jun 2010 00:23:16 -0500 (CDT) Subject: [Swift-commit] r3377 - text/parco10submission Message-ID: <20100616052316.848BF9CD34@vm-125-59.ci.uchicago.edu> Author: wilde Date: 2010-06-16 00:23:16 -0500 (Wed, 16 Jun 2010) New Revision: 3377 Modified: text/parco10submission/paper.tex Log: added sem scripts; commented out blast for now (there was no paralelism); adjusted dock example. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-16 05:09:02 UTC (rev 3376) +++ text/parco10submission/paper.tex 2010-06-16 05:23:16 UTC (rev 3377) @@ -889,54 +889,54 @@ We describe in this section a few representative Swift applications from various diverse disciplines. -\subsection{BLAST Application Example} +% \subsection{BLAST Application Example} -% The following is notes from the Wiki by Allan: needs much refinement, adding here as a placeholder. +% % The following is notes from the Wiki by Allan: needs much refinement, adding here as a placeholder. -\begin{verbatim} -type database; -type query; -type output; -type error; +% \begin{verbatim} +% type database; +% type query; +% type output; +% type error; -app (output out, error err) blastall(query i, database db) { - blastall "-p" "blastp" "-F" "F" - "-d" @filename(db) "-i" @filename(i) - "-v" "300" "-b" "300" "-m8" - "-o" @filename(out) stderr=@filename(err); -} +% app (output out, error err) blastall(query i, database db) { +% blastall "-p" "blastp" "-F" "F" +% "-d" @filename(db) "-i" @filename(i) +% "-v" "300" "-b" "300" "-m8" +% "-o" @filename(out) stderr=@filename(err); +% } -database pir ; +% database pir ; -query i <"test.in">; -output out <"test.out">; -error err <"test.err">; +% query i <"test.in">; +% output out <"test.out">; +% error err <"test.err">; -(out,err) = blastall(i, pir); -\end{verbatim} +% (out,err) = blastall(i, pir); +% \end{verbatim} -The application {\tt \small blastall} expects the prefix of the database files that it will read (.phr, .seq and .pin files). -This example employs a dummy file called {\tt \small - UNIPROT.14.0.seq} to satisfy the data dependency. When executed, -the Swift script processes the following input directory {\tt\small /ci/pir}: +% The application {\tt \small blastall} expects the prefix of the database files that it will read (.phr, .seq and .pin files). +% This example employs a dummy file called {\tt \small +% UNIPROT.14.0.seq} to satisfy the data dependency. When executed, +% the Swift script processes the following input directory {\tt\small /ci/pir}: -\begin{verbatim} --rw-r--r-- 1 ben ci 0 Nov 15 13:49 UNIPROT.14.0.seq --rw-r--r-- 1 ben ci 204106872 Oct 20 16:50 UNIPROT.14.0.seq.00.phr --rw-r--r-- 1 ben ci 23001752 Oct 20 16:50 UNIPROT.14.0.seq.00.pin --rw-r--r-- 1 ben ci 999999669 Oct 20 16:51 UNIPROT.14.0.seq.00.psq --rw-r--r-- 1 ben ci 233680738 Oct 20 16:51 UNIPROT.14.0.seq.01.phr --rw-r--r-- 1 ben ci 26330312 Oct 20 16:51 UNIPROT.14.0.seq.01.pin --rw-r--r-- 1 ben ci 999999864 Oct 20 16:52 UNIPROT.14.0.seq.01.psq --rw-r--r-- 1 ben ci 21034886 Oct 20 16:52 UNIPROT.14.0.seq.02.phr --rw-r--r-- 1 ben ci 2370216 Oct 20 16:52 UNIPROT.14.0.seq.02.pin --rw-r--r-- 1 ben ci 103755125 Oct 20 16:52 UNIPROT.14.0.seq.02.psq --rw-r--r-- 1 ben ci 208 Oct 20 16:52 UNIPROT.14.0.seq.pal -\end{verbatim} +% \begin{verbatim} +% -rw-r--r-- 1 ben ci 0 Nov 15 13:49 UNIPROT.14.0.seq +% -rw-r--r-- 1 ben ci 204106872 Oct 20 16:50 UNIPROT.14.0.seq.00.phr +% -rw-r--r-- 1 ben ci 23001752 Oct 20 16:50 UNIPROT.14.0.seq.00.pin +% -rw-r--r-- 1 ben ci 999999669 Oct 20 16:51 UNIPROT.14.0.seq.00.psq +% -rw-r--r-- 1 ben ci 233680738 Oct 20 16:51 UNIPROT.14.0.seq.01.phr +% -rw-r--r-- 1 ben ci 26330312 Oct 20 16:51 UNIPROT.14.0.seq.01.pin +% -rw-r--r-- 1 ben ci 999999864 Oct 20 16:52 UNIPROT.14.0.seq.01.psq +% -rw-r--r-- 1 ben ci 21034886 Oct 20 16:52 UNIPROT.14.0.seq.02.phr +% -rw-r--r-- 1 ben ci 2370216 Oct 20 16:52 UNIPROT.14.0.seq.02.pin +% -rw-r--r-- 1 ben ci 103755125 Oct 20 16:52 UNIPROT.14.0.seq.02.psq +% -rw-r--r-- 1 ben ci 208 Oct 20 16:52 UNIPROT.14.0.seq.pal +% \end{verbatim} -% I looked at the dock6 documentation for OSG. It looks that it recommends to transfer the datafiles to OSG sites manually via globus-url-copy. By my understanding of how swift works, it should be able to transfer my local files to the selected sites. I have yet to try this and will look more on examples in the data management side of Swift. +% % I looked at the dock6 documentation for OSG. It looks that it recommends to transfer the datafiles to OSG sites manually via globus-url-copy. By my understanding of how swift works, it should be able to transfer my local files to the selected sites. I have yet to try this and will look more on examples in the data management side of Swift. -% Do you know other users who went in this approach? The documentation has only a few examples in managing data. I'll check the swift Wiki later and see what material we have and also post this email/ notes. +% % Do you know other users who went in this approach? The documentation has only a few examples in managing data. I'll check the swift Wiki later and see what material we have and also post this email/ notes. \subsection{fMRI Application Example} @@ -1040,6 +1040,8 @@ \subsection{Structural Equation Modeling using OpenMx} +% \cite{OpenMx} + OpenMx is an R library designed for structural equation modeling (SEM), a technique currently used in the neuroimaging field to examine connectivity between brain areas. @@ -1084,49 +1086,99 @@ connection weights (or strength of the relationships between anatomical regions) can be explored based on the fit of each model. -modgenproc.swift is used to submit each of the necessary computation +%modgenproc.swift +A Swift script is used to submit each of the necessary computation components to TeraGrid's Ranger cluster: a) the model object b) the covariance matrix derived from the database and c) the R script which -makes the call to OpenMx. Once the job is assigned to a node, OpenMx? +makes the call to OpenMx. Once the job is assigned to a node, OpenMx estimates weight parameters for each connection within the given model that results in a model covariance closest to the observed covariance of the data. Each of these compute jobs returns its solution model object as well as a file containing the minimum value achieved from that model. The processing of these models on Ranger was achieved in <45 minutes. +A model generator was developed for the OpenMx package and is designed +explicitly to enable parallel execution of exhaustive or partially +pruned sets of model objects. Given an n x n covariance matrix it can +generate the entire set of possible models with anywhere from 0 to n2 +connections; however, it can also take as input a single index from +that set and it will generate and run a single model. What this means +in the context of workflow design is that the generator can be +controlled (and parallelized) easily by a Swift script. For example, +using Swift as the interface to OpenMx we have these few lines of +code: + +Script 1: 4-region exhaustive SEM for a single experimental condition: + +\begin{verbatim} + +1. app (mxModel min) mxModelProcessor(file covMatrix, Rscript mxModProc, int modnum, float initweight, string cond){ +2. { +3. RInvoke @filename(mxModProc) @filename(covMatrix) modnum initweight cond; +4. } +5. file covMatrix; +6. Rscript mxScript; +7. int totalperms[] = [1:65536]; +8. float initweight = .5; +9. foreach perm in totalperms{ +10. mxModel modmin; +11. modmin = mxModelProcessor(covMatrix, mxScript, perm, initweight, ?speech?); +12. } +\end{verbatim} + +First, a covariance matrix containing activation data for 4 brain regions, over 8 time points, averaged over a group of subjects in the speech condition was drawn from the experiment database and its location (in this example, on the local file system, though the file could be located anywhere) is mapped in line 5. Line 6 maps the R processing script and lines 1 through 4 define the atomic procedure for invoking R. Each iteration of the foreach loop maps its optimized model output file and calls mxModelProcessor() with the necessary parameters to generate and run a model. Each of these invocations of mxModelProcessor() is independent and is submitted for processing in parallel. Swift passes 5 variables for each invocation: (1) the covariance matrix; (2) the R script containing the call to OpenMx; (3) the permutation number, i.e., the index of the model; (4) the initialization weight for the free parameters of the given model; and (5) the experimental condition. Clearly, in t his workflow all free parameters of the given model will have the same initialization weight as Swift is passing only one weight variable. When the job reaches a worker node on Ranger an R process is initialized, the generator creates the desired model by calculating where in the array that permutation of the model matrix falls. OpenMx then estimates the model parameters using a non-linear optimization algorithm called NPSOL (Gill, 1986) and the optimized model is returned and written out by Swift to the location specified in its mapping on line 10. + +The above script completed in approximately 40 minutes. The script can +then be altered to run over multiple experimental conditions by adding +another outer loop: + +Script 2: 4-region exhaustive SEM for 2 experimental conditions + +\begin{verbatim} +1. string conditions[] = ["emblem", "speech"]; +2. int totalperms[] = [1:65536]; +3. float initweight = .5; +4. foreach cond in conditions{ +5. foreach perm in totalperms{ +6. file covMatrix; +7. mxModel modmin; +8. modmin= mxModelProcessor(covMatrix, mxScript, perm, initweight, cond); +9. } +\end{verbatim} + +When the outer loop is added, the new workflow consists of 131,072 jobs since we are now running the entire set for two conditions. This workflow completed in approximately 2 hours + \subsection{Molecular Dynamics with DOCK} \begin{verbatim} -(file t,DockOut tarout) dockcompute (DockIn infile, string targetlist) { - app { - rundock @infile targetlist stdout=@filename(t) @tarout; - } + +app (file t,DockOut tarout) dock (DockIn infile, string targetlist) { + dock6 @infile targetlist stdout=@filename(t) @tarout; } type params { - string ligandsfile; - string targetlist; + string ligands; + string targets; } -#params pset[] ; -doall(params pset[]) +params pset[] ; + +runDocks(params pset[]) { foreach params,i in pset { - DockIn infile < single_file_mapper; file=@strcat("/home/houzx/dock- -run/databases/KEGG_and_Drugs/",pset[i].ligandsfile)>; - file sout ; - DockOut tout ; -# DockOut tout <"result.tar.gz">; -# sout = dockcompute(infile,pset[i].targetlist); - (sout,tout) = dockcompute(infile,pset[i].targetlist); - + DockIn infile < single_file_mapper; + file=@strcat("/ci/dock/db/KEGGDrugs/",pset[i].ligands)>; + file sout ; + DockOut docking ; + (sout,docking) = dock(infile,pset[i].targetlist); } } params p[]; p = readdata("paramslist.txt"); -doall(p); +runDocks(p); \end{verbatim} \subsection{Satellite image data processing.} From noreply at svn.ci.uchicago.edu Wed Jun 16 00:31:18 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 16 Jun 2010 00:31:18 -0500 (CDT) Subject: [Swift-commit] r3378 - text/parco10submission Message-ID: <20100616053118.5754E9CD34@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-16 00:31:18 -0500 (Wed, 16 Jun 2010) New Revision: 3378 Modified: text/parco10submission/paper.tex Log: aspell Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-16 05:23:16 UTC (rev 3377) +++ text/parco10submission/paper.tex 2010-06-16 05:31:18 UTC (rev 3378) @@ -91,10 +91,10 @@ any other scripting language. Swift's contribution and primary value is that it provides a simple, -minimal set of language constructs to specifiy how applications are +minimal set of language constructs to specify how applications are glued together at large scale in a simple compact form, while keeping the language simple and elegant, and minimizing any overlap with the -tasks that existing scripting langauges do well. Swift regularizes and +tasks that existing scripting languages do well. Swift regularizes and abstracts notions of external data and processes for distributed parallel execution of application programs. Applications expressed in Swift are location-independent and automatically parallelized by @@ -144,7 +144,7 @@ sufficient specification and encapsulation of inputs to, and outputs from, a given application, such that an execution environment could automatically make remote execution transparent. Without this, -achieving location transparancy and automated parallel execution is +achieving location transparency and automated parallel execution is not feasible. Swift adds to scripting what remote procedure calls (RPC) add to programming: by formalizing the inputs and outputs of applications-as-procedures, it provides a way to make the remote @@ -366,7 +366,7 @@ step[ix] = simulate(step[ix-1]); } \end{verbatim} -This fragment will initialise the 0-th element of the \verb|step| array +This fragment will initialize the 0-th element of the \verb|step| array to some initial condition, and then repeatedly run the \verb|simulate| procedure, using each execution's outputs as input to the next step. @@ -388,7 +388,7 @@ y=p(x); z=q(x); \end{verbatim} -whilst in this fragment, execution is serialised by the variable +whilst in this fragment, execution is serialized by the variable \verb|y|, with procedure \verb|p| executing before \verb|q|: \begin{verbatim} y=p(x); @@ -731,11 +731,11 @@ script. This separates application code from system configuration. The site catalog may contain definitions for multiple sites in which case execution will be attempted on all sites. In the presence of -multiple sites, it is necessary to choose between the avalable sites. +multiple sites, it is necessary to choose between the available sites. The Swift \emph{site selector} achieves this by maintaining a score for each site which determines the load that Swift will place on that site. -As a site is successful in executing jobs, this score wil be increased -and as the site is uncsuccessful, this score will be cdecreased. In +As a site is successful in executing jobs, this score will be increased +and as the site is uncsuccessful, this score will be decreased. In addition to selecting between sites, this mechanism provides some dynamic rate limiting if sites fail due to overload~\cite{FTSH_2003}. @@ -753,7 +753,7 @@ The functional nature of SwiftScript provides a clearly defined interface to imperative components, in addition to allowing Swift great flexibility in where and when it runs component programs, allows those -imperative components to be treated as atmoic components which can be +imperative components to be treated as atomic components which can be executed multiple times for any given SwiftScript procedure invocation. This facilitates three different reliability mechanisms implemented by the runtime that need not be exposed at the language level: \emph{retries}, @@ -774,13 +774,13 @@ will fail resulting ultimately in the entire script failing. In such a case, Swift provides a \emph{restart log} which encapsulates -which procedure invocations have been succesfully completed. After +which procedure invocations have been successfully completed. After appropriate manual intervention, a subsequent Swift run may be started with this restart log; this will suppress re-execution of already executed invocations but otherwise allow the script to continue. A different class of failure is when jobs are submitted to a site but -are then enqueued for a very long time on that site. This is a failure +are then en queued for a very long time on that site. This is a failure in site selection, rather than in execution. Sometimes it can be a soft failure, in that the job will eventually run on the chosen site - the site selector has improperly chosen a very heavily loaded site; @@ -789,11 +789,11 @@ selector has improperly chosen a site which is not executing jobs. To address this situation, Swift provides for \emph{job replication}. -After a job has been enqueued on a site for too long, a second +After a job has been en queued on a site for too long, a second instance of the job will be submitted (again undergoing site selection, stagein, execution and stageout); this will continue up to a defined limit. When any of those jobs begins executing, all other -replicas will be cancelled. +replicas will be canceled. \subsection{Avoiding job submission penalties} @@ -823,7 +823,7 @@ available remote node count and of job duration must be made to decide on a sensible cluster size. Incorrectly estimating this can (in one direction) result in an insufficient number of worker nodes being -used, with excessive serialisation; or (in the other direction) result +used, with excessive serialization; or (in the other direction) result in an excessive number of GRAM job submissions. Coaster workers can be queued and executed before all of the work that they will eventually execute is known, so can get more work done per GRAM job submission, @@ -832,8 +832,8 @@ Job status for coasters is reported as jobs start and end; for clustered jobs, job completion status is only known at the end of the entire cluster. This means that subsequent activity (stageouts, and -the beginning of dependant jobs) is delayed (in the worst case, -activity dependant on the first job in a cluster must wait for all of +the beginning of dependent jobs) is delayed (in the worst case, +activity dependent on the first job in a cluster must wait for all of the jobs to run). \subsection{Features to support use on dynamic resources} @@ -853,7 +853,7 @@ Having discovered those sites, two significant problems remain: the quality of those sites varies wildly; and user applications are not installed on those sites. Individual OSG sites exhibit extremely -different behaviour, both with respect to other sites at the same +different behavior, both with respect to other sites at the same time, and with respect to themselves at other times. The load that a particular site will bear varies over time and some sites fail in an unusual manner. Swift's site scoring mechanism deals well with this in From noreply at svn.ci.uchicago.edu Wed Jun 16 15:25:28 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 16 Jun 2010 15:25:28 -0500 (CDT) Subject: [Swift-commit] r3379 - branches Message-ID: <20100616202528.EE3229CCAA@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-16 15:25:28 -0500 (Wed, 16 Jun 2010) New Revision: 3379 Added: branches/woz-01/ Log: Branch for experiments by wozniak Copied: branches/woz-01 (from rev 3378, trunk) From noreply at svn.ci.uchicago.edu Wed Jun 16 15:43:16 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 16 Jun 2010 15:43:16 -0500 (CDT) Subject: [Swift-commit] r3380 - branches/woz-01 Message-ID: <20100616204316.053A29CCA5@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-16 15:43:15 -0500 (Wed, 16 Jun 2010) New Revision: 3380 Modified: branches/woz-01/.classpath Log: Drop absent provider-wonky Modified: branches/woz-01/.classpath =================================================================== --- branches/woz-01/.classpath 2010-06-16 20:25:28 UTC (rev 3379) +++ branches/woz-01/.classpath 2010-06-16 20:43:15 UTC (rev 3380) @@ -18,7 +18,6 @@ - From noreply at svn.ci.uchicago.edu Wed Jun 16 16:50:53 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 16 Jun 2010 16:50:53 -0500 (CDT) Subject: [Swift-commit] r3381 - text/parco10submission Message-ID: <20100616215053.DC7229CCAA@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-16 16:50:53 -0500 (Wed, 16 Jun 2010) New Revision: 3381 Modified: text/parco10submission/paper.tex Log: Center figures Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2010-06-16 20:43:15 UTC (rev 3380) +++ text/parco10submission/paper.tex 2010-06-16 21:50:53 UTC (rev 3381) @@ -4,13 +4,13 @@ \usepackage{graphicx} \journal{Parallel Computing} -\makeatletter -\g at addto@macro\@verbatim\small -\makeatother +\makeatletter +\g at addto@macro\@verbatim\small +\makeatother -\makeatletter -\g at addto@macro\@verbatim\small -\makeatother +\makeatletter +\g at addto@macro\@verbatim\small +\makeatother \begin{document} % \bibliographystyle{unsrt} % initial temp bib style for editing @@ -908,7 +908,7 @@ % database pir ; -% query i <"test.in">; +% query i <"test.in">; % output out <"test.out">; % error err <"test.err">; @@ -941,8 +941,10 @@ \subsection{fMRI Application Example} \begin{figure}[htbp] -\includegraphics[scale=0.5]{img/IMG_fmridataset} -\caption{FMRI application} + \begin{center} + \includegraphics[scale=0.5]{img/IMG_fmridataset} + \caption{FMRI application} + \end{center} \end{figure} \begin{verbatim} @@ -1063,12 +1065,13 @@ calling OpenMx to generate and process models in parallel. \begin{figure}[htbp] -\includegraphics{img/omxFigure} - -\caption{Schematic of a single OpenMx model containing 4 regions of -interest (I through L) with 5 regression starting values (asymmetric -connections) of weight 0.75 and 4 residual variances (symmetric connections) -of weight 1.0} + \begin{center} + \includegraphics{img/omxFigure} + \caption{Schematic of a single OpenMx model containing 4 + regions of interest (I through L) with 5 regression starting + values (asymmetric connections) of weight 0.75 and 4 + residual variances (symmetric connections) of weight 1.0} + \end{center} \end{figure} Using OpenMx's model generator -- a set of functions which creates @@ -1119,18 +1122,18 @@ 5. file covMatrix; 6. Rscript mxScript; 7. int totalperms[] = [1:65536]; -8. float initweight = .5; -9. foreach perm in totalperms{ -10. mxModel modmin; -11. modmin = mxModelProcessor(covMatrix, mxScript, perm, initweight, ?speech?); -12. } +8. float initweight = .5; +9. foreach perm in totalperms{ +10. mxModel modmin; +11. modmin = mxModelProcessor(covMatrix, mxScript, perm, initweight, ?speech?); +12. } \end{verbatim} First, a covariance matrix containing activation data for 4 brain regions, over 8 time points, averaged over a group of subjects in the speech condition was drawn from the experiment database and its location (in this example, on the local file system, though the file could be located anywhere) is mapped in line 5. Line 6 maps the R processing script and lines 1 through 4 define the atomic procedure for invoking R. Each iteration of the foreach loop maps its optimized model output file and calls mxModelProcessor() with the necessary parameters to generate and run a model. Each of these invocations of mxModelProcessor() is independent and is submitted for processing in parallel. Swift passes 5 variables for each invocation: (1) the covariance matrix; (2) the R script containing the call to OpenMx; (3) the permutation number, i.e., the index of the model; (4) the initialization weight for the free parameters of the given model; and (5) the experimental condition. Clearly, in t his workflow all free parameters of the given model will have the same initialization weight as Swift is passing only one weight variable. When the job reaches a worker node on Ranger an R process is initialized, the generator creates the desired model by calculating where in the array that permutation of the model matrix falls. OpenMx then estimates the model parameters using a non-linear optimization algorithm called NPSOL (Gill, 1986) and the optimized model is returned and written out by Swift to the location specified in its mapping on line 10. The above script completed in approximately 40 minutes. The script can then be altered to run over multiple experimental conditions by adding -another outer loop: +another outer loop: Script 2: 4-region exhaustive SEM for 2 experimental conditions From noreply at svn.ci.uchicago.edu Wed Jun 16 16:54:29 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 16 Jun 2010 16:54:29 -0500 (CDT) Subject: [Swift-commit] r3382 - branches/woz-01/libexec/log-processing Message-ID: <20100616215429.DDE549CCAA@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-16 16:54:29 -0500 (Wed, 16 Jun 2010) New Revision: 3382 Modified: branches/woz-01/libexec/log-processing/coaster-block-timeline.plot branches/woz-01/libexec/log-processing/extract-coaster-timeline branches/woz-01/libexec/log-processing/makefile.coasters Log: Patches to plotting tools Modified: branches/woz-01/libexec/log-processing/coaster-block-timeline.plot =================================================================== --- branches/woz-01/libexec/log-processing/coaster-block-timeline.plot 2010-06-16 21:50:53 UTC (rev 3381) +++ branches/woz-01/libexec/log-processing/coaster-block-timeline.plot 2010-06-16 21:54:29 UTC (rev 3382) @@ -2,5 +2,8 @@ set output "coaster-block-timeline.png" set title "Queued/Active Coaster Workers" + +set xlabel "seconds" + set style data steps -plot "coaster-blocks.data" u 1:2 w steps title "Queued Workers", "coaster-blocks.data" u 1:3 w steps title "Running Workers" \ No newline at end of file +plot "coaster-blocks.data" u 1:2 w steps title "Queued Workers", "coaster-blocks.data" u 1:3 w steps title "Running Workers" Modified: branches/woz-01/libexec/log-processing/extract-coaster-timeline =================================================================== --- branches/woz-01/libexec/log-processing/extract-coaster-timeline 2010-06-16 21:50:53 UTC (rev 3381) +++ branches/woz-01/libexec/log-processing/extract-coaster-timeline 2010-06-16 21:54:29 UTC (rev 3382) @@ -1,30 +1,16 @@ #!/bin/bash +set -x + LOG=$1 -grep "RemoteLogHandler BLOCK_.*" $LOG | iso-to-secs | normalise-event-start-time >coasters.tmp -rm -f block-count-table.tmp -rm -f coaster-block.data +grep "RemoteLogHandler BLOCK_" < $LOG > blocks.log.tmp -IFS=$'\n' -REQUESTED=0 -RUNNING=0 -for LINE in $(cat coasters.tmp); do - TIME=`echo $LINE | sed 's/^\([^ ]*\) .*$/\1/' ` - ID=`echo $LINE | sed 's/^.*id=\([^ ]*\)\(,.*$\|$\)/\1/'` - ADD=`echo $LINE | sed -n 's/^.*BLOCK_REQUESTED.*w=\([^ ]*\),.*$/\1/p'` - if [ "$ADD" != "" ]; then - REQUESTED=$(($REQUESTED + $ADD)) - echo "$ID,$ADD" >> block-count-table.tmp - else - COUNT=`cat block-count-table.tmp | grep "$ID" | cut -d , -f 2` - if echo $LINE | grep "BLOCK_ACTIVE" >/dev/null; then - RUNNING=$(($RUNNING + $COUNT)) - fi - if echo $LINE | grep "BLOCK_SHUTDOWN" >/dev/null; then - RUNNING=$(($RUNNING - $COUNT)) - REQUESTED=$(($REQUESTED - $COUNT)) - fi - fi - echo $TIME $REQUESTED $RUNNING >> coaster-blocks.data -done +iso-to-secs < $LOG > secs.log.tmp + +head -1 secs.log.tmp | awk '{ print $1 }' > start-time.tmp + +normalise-event-start-time < secs.log.tmp > coasters.tmp + +# rm -f block-count-table.tmp +# rm -f coaster-block.data Modified: branches/woz-01/libexec/log-processing/makefile.coasters =================================================================== --- branches/woz-01/libexec/log-processing/makefile.coasters 2010-06-16 21:50:53 UTC (rev 3381) +++ branches/woz-01/libexec/log-processing/makefile.coasters 2010-06-16 21:54:29 UTC (rev 3382) @@ -1,12 +1,13 @@ coaster-block-timeline.png: coaster-block-timeline.data gnuplot $(SWIFT_PLOT_HOME)/coaster-block-timeline.plot -extract-timeline: +coaster-block-timeline.data: coasters.tmp + extract-coaster-blocks coasters.tmp + +coasters.tmp: extract-coaster-timeline $(LOG) -coaster-block-timeline.data: extract-timeline -coasters.tmp: extract-timeline -block-count-table.tmp: extract-timeline +# block-count-table.tmp: extract-timeline extract-coaster-qwait: coasters.tmp extract-coaster-qwait $(LOG) From noreply at svn.ci.uchicago.edu Thu Jun 17 07:37:46 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 17 Jun 2010 07:37:46 -0500 (CDT) Subject: [Swift-commit] r3383 - text/internals/trunk Message-ID: <20100617123746.91A8D9CCAA@vm-125-59.ci.uchicago.edu> Author: wilde Date: 2010-06-17 07:37:46 -0500 (Thu, 17 Jun 2010) New Revision: 3383 Modified: text/internals/trunk/internals.tex Log: correct latex errors so that document builds. Modified: text/internals/trunk/internals.tex =================================================================== --- text/internals/trunk/internals.tex 2010-06-16 21:54:29 UTC (rev 3382) +++ text/internals/trunk/internals.tex 2010-06-17 12:37:46 UTC (rev 3383) @@ -274,7 +274,7 @@ dumps "cba" -\sectio{Swift compilation} +\section{Swift compilation} swift source -> parser -> vdlx -> dump parser / compiled /translated -> .kml @@ -294,7 +294,7 @@ -> two namespaces vdl and swiftscript "swiftscript" : actual mapping functions @name - @name = filename(foo) equivalent to C's '&' + @name = filename(foo) equivalent to C's '\&' originally @no data flow with mapping functions. limited processing on a string . supposed to assist in the mapping functions @@ -411,7 +411,7 @@ parallel(1,2) == (1,2) -> has some efficient barrier thing inside - takes more memory - storing ordering information -\subsetion{uparallel} +\subsection{uparallel} uParallel(1,2) -> (1,2) or (2,1) - saves on memory usage From noreply at svn.ci.uchicago.edu Thu Jun 17 11:38:56 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 17 Jun 2010 11:38:56 -0500 (CDT) Subject: [Swift-commit] r3384 - branches/woz-01/libexec/log-processing Message-ID: <20100617163856.73BA09CCAA@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-17 11:38:56 -0500 (Thu, 17 Jun 2010) New Revision: 3384 Added: branches/woz-01/libexec/log-processing/README.txt Modified: branches/woz-01/libexec/log-processing/extract-coaster-timeline branches/woz-01/libexec/log-processing/makefile.coasters branches/woz-01/libexec/log-processing/normalise-event-start-time Log: Probable fixes to coasters plots Added: branches/woz-01/libexec/log-processing/README.txt =================================================================== --- branches/woz-01/libexec/log-processing/README.txt (rev 0) +++ branches/woz-01/libexec/log-processing/README.txt 2010-06-17 16:38:56 UTC (rev 3384) @@ -0,0 +1,17 @@ + +USE CASES: + +* Plot coasters utilization + +PLOT_HOME=/scratch/wozniak/cog/modules/swift/libexec/log-processing + +make -f ${PLOT_HOME}/makefile.coasters LOG=./map-20100606-2328-nv9xntzg.log SWIFT_PLOT_HOME=${PLOT_HOME} coaster-block-timeline.png + +TODO: + +* Rename all shell scripts to *.sh +* Rename *.makefile to *.mk +* Rename makefile to main.mk +* Stop using *.data - this clobbers tc.data +* Rename temporary files to *.log.ptmp (plot-temporary) + *.data.ptmp Modified: branches/woz-01/libexec/log-processing/extract-coaster-timeline =================================================================== --- branches/woz-01/libexec/log-processing/extract-coaster-timeline 2010-06-17 12:37:46 UTC (rev 3383) +++ branches/woz-01/libexec/log-processing/extract-coaster-timeline 2010-06-17 16:38:56 UTC (rev 3384) @@ -6,7 +6,7 @@ grep "RemoteLogHandler BLOCK_" < $LOG > blocks.log.tmp -iso-to-secs < $LOG > secs.log.tmp +iso-to-secs < blocks.log.tmp > secs.log.tmp head -1 secs.log.tmp | awk '{ print $1 }' > start-time.tmp Modified: branches/woz-01/libexec/log-processing/makefile.coasters =================================================================== --- branches/woz-01/libexec/log-processing/makefile.coasters 2010-06-17 12:37:46 UTC (rev 3383) +++ branches/woz-01/libexec/log-processing/makefile.coasters 2010-06-17 16:38:56 UTC (rev 3384) @@ -4,7 +4,7 @@ coaster-block-timeline.data: coasters.tmp extract-coaster-blocks coasters.tmp -coasters.tmp: +coasters.tmp: $(LOG) extract-coaster-timeline $(LOG) # block-count-table.tmp: extract-timeline Modified: branches/woz-01/libexec/log-processing/normalise-event-start-time =================================================================== --- branches/woz-01/libexec/log-processing/normalise-event-start-time 2010-06-17 12:37:46 UTC (rev 3383) +++ branches/woz-01/libexec/log-processing/normalise-event-start-time 2010-06-17 16:38:56 UTC (rev 3384) @@ -1,4 +1,3 @@ #!/bin/bash normalise-event-start-time-to-any $(cat start-time.tmp); - From noreply at svn.ci.uchicago.edu Thu Jun 17 11:47:40 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 17 Jun 2010 11:47:40 -0500 (CDT) Subject: [Swift-commit] r3385 - branches/woz-01/libexec/log-processing Message-ID: <20100617164740.261289CCAA@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-17 11:47:39 -0500 (Thu, 17 Jun 2010) New Revision: 3385 Modified: branches/woz-01/libexec/log-processing/README.txt Log: README notes Modified: branches/woz-01/libexec/log-processing/README.txt =================================================================== --- branches/woz-01/libexec/log-processing/README.txt 2010-06-17 16:38:56 UTC (rev 3384) +++ branches/woz-01/libexec/log-processing/README.txt 2010-06-17 16:47:39 UTC (rev 3385) @@ -1,4 +1,16 @@ +STRUCTURE: + +* There is main makefile named "makefile" +* "makefile" reads extra rules from a bunch of other makefiles + "*.makefile" +* These makefile rules use various shell scripts + +NOTES: + +* Insert "set -x" into shell scripts that look interesting +* + USE CASES: * Plot coasters utilization @@ -7,6 +19,18 @@ make -f ${PLOT_HOME}/makefile.coasters LOG=./map-20100606-2328-nv9xntzg.log SWIFT_PLOT_HOME=${PLOT_HOME} coaster-block-timeline.png +IMPROVEMENT STRATEGY: + +(READABILITY) + +* Move dataflow operations into makefiles; refer to all scripts and + files in makefiles + +(EFFICIENCY) + +* Replace scripts that make multiple external calls per line + with AWK, or if necessary Perl + TODO: * Rename all shell scripts to *.sh From noreply at svn.ci.uchicago.edu Thu Jun 17 14:42:58 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 17 Jun 2010 14:42:58 -0500 (CDT) Subject: [Swift-commit] r3386 - text/internals/trunk Message-ID: <20100617194258.9A4D09CC99@vm-125-59.ci.uchicago.edu> Author: aespinosa Date: 2010-06-17 14:42:58 -0500 (Thu, 17 Jun 2010) New Revision: 3386 Modified: text/internals/trunk/internals.tex Log: Mappers, scheduler and coasters Modified: text/internals/trunk/internals.tex =================================================================== --- text/internals/trunk/internals.tex 2010-06-17 16:47:39 UTC (rev 3385) +++ text/internals/trunk/internals.tex 2010-06-17 19:42:58 UTC (rev 3386) @@ -415,12 +415,158 @@ uParallel(1,2) -> (1,2) or (2,1) - saves on memory usage -\section{Replication groups} +\section{Mappers} -\section{Restarting} +No distinction between input and output mappers. +remap(v, f) +isRemappable() + +map(): V -> F +Rmap(): F -> V + +existing: -> { x | x \in V} - invoked at the beginning of Swift. If it has no +dependencies, the mapper gets resolved when swift starts. Occasionally gets +called in dependencies. Normally invoked for input variables. Invoked before +starting apps generally. + +Exception that hold existing up: fixed array mapper as a result of some shell +command. Equivalent to use an ext mapper. + +exists(): V -> boolean i.e. exits("a[1]") + +Map() is invoked whenever its needed. + +Ext mapper: ``pregrown } + +potatoes''. You don't have to use every mapping. + +Isomorphism in the space of variables and the space of files. + +There is no atomic mapping operation. All mapping is done on demand. + +f(x) = sin(x) + +vs + +f(\pi) = 0 +f(2\pi) = 1 + +same but you can't get an infinite mapping that easily. + +existing() needs to gets invoked after the application is done. existing() +doesn't get called in a stageout. + +get the type system. + +Concurrent by definition is remappable. + +Remappable - aliasing. + +file f,g; +f = g; + +dest \ src & false & true +false & file copy & src.remap() +true & des.remap() & + +file s ; +file dest <"a.txt">; +s = app(); +dest = s; // dealt with before the application is run since swift constructs the + // graph first. + +Swift has nominal type equivalence + +type img; +type hdr; + +img f; +hdr g; + +cannot do an f = g. + +Leaves get mapped. + \section{Scheduler} +Scheduling in general is NPC. + +Swift schedules one job at a time from a list of ready jobs. Coasters and +clusters make a time window. Time window is all in the provider and clustering +site and completely separated. + +Score modifiers: + +1. sending a job decreases the score a little bit (more loaded). a leveling +mechanism. + * Load is included in the calculation of the score. +2. A job finishes - increase number of jobs sent to this site since it runs +jobs quicker. + +Similar from increasing a credit limit in credit cards. + +unscaled score and scaled score as a sigmoid function + +scaled \in [1/T, T] + +Fast ramp up of the score if a site finishes jobs and then converge to a +maximum to avoid blowing up the instability of the algorithm. + +A discrete time feedback system. Process control + +WeightedHostScheduler class - contains the implementation of the sigmoid +function. Almost contained in one place. + +Throttle of jobs and tranfers + * Jobs - jobThrottle and initialScore + +\section{Replication} + +Resending queued jobs somewhere else. Deleting other queued jobs when one of +the jobs in the replica jobs are in the active state. + +\section{Clustering} + +A lot of small jobs chunked into 1 request. Class (?) - swift specific +(VDSAdaptiveScheduler in the logs). Amortizing the queueing time. + +\section{Restarting} +The original Baker and Hewitt paper described implicit futures, which are naturally supported in the Actor model of computation and pure object-oriented programming languages like Smalltalk. The Friedman and Wise paper described only explicit futures, probably reflecting the difficulty of efficiently implementing implicit futures on stock hardware. The difficulty is that stock hardware does not deal with futures for primitive data types like integers. For example, an add instruction does not know how to deal with 3 + future factorial(100000). In pure object or Actor languages this problem can be solved by sending future factorial(100000) the message +[3], which asks the future to add 3 to itself and return the result. Note that the message passing approach works regardless of when factorial(100000) finishes computation and that no stinging/forcing is required. \section{Coasters} +Large workflow + +Minimal software stack. No need to install stuff on the remote stack. + +Client -> Server -> Worker + +Problem of GRAM2 callbacks. + +Choices: +1. keep the connection +2. callbacks or polling + +PERL's event-based system for the workers + +overAllocation factor - determine block's walltime + * lowOverAllocation + * highOverAllocation + + - creates a decreasing exponential curve + +nodeGranularity - width of block (multiple) + +workersPerNode - + +overAllocation on largest job + +Spread - + +Area of the box is equal to total cpu job hours + +\section{CDM} + +Disabling file transfer/ data staging stuff at runtime. + \end{document} From noreply at svn.ci.uchicago.edu Thu Jun 17 15:08:33 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 17 Jun 2010 15:08:33 -0500 (CDT) Subject: [Swift-commit] r3387 - branches/woz-01/bin Message-ID: <20100617200833.051BB9CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-17 15:08:32 -0500 (Thu, 17 Jun 2010) New Revision: 3387 Modified: branches/woz-01/bin/swift-plot-log Log: Arg list error message Modified: branches/woz-01/bin/swift-plot-log =================================================================== --- branches/woz-01/bin/swift-plot-log 2010-06-17 19:42:58 UTC (rev 3386) +++ branches/woz-01/bin/swift-plot-log 2010-06-17 20:08:32 UTC (rev 3387) @@ -5,6 +5,11 @@ # $1 should be the pathname of the log file to plot +if [[ $1 == "" ]] + then + echo "No log file given!" + exit 1 +fi LOG_CODE_HOME="`dirname $0`/../libexec/log-processing/" export SWIFT_PLOT_HOME=$LOG_CODE_HOME From noreply at svn.ci.uchicago.edu Thu Jun 17 15:14:33 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 17 Jun 2010 15:14:33 -0500 (CDT) Subject: [Swift-commit] r3388 - branches/woz-01/bin Message-ID: <20100617201433.976109CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-17 15:14:33 -0500 (Thu, 17 Jun 2010) New Revision: 3388 Modified: branches/woz-01/bin/swift-plot-log Log: Drop extraneous variable Modified: branches/woz-01/bin/swift-plot-log =================================================================== --- branches/woz-01/bin/swift-plot-log 2010-06-17 20:08:32 UTC (rev 3387) +++ branches/woz-01/bin/swift-plot-log 2010-06-17 20:14:33 UTC (rev 3388) @@ -1,9 +1,5 @@ #!/bin/bash -ORIGDIR=$(pwd) -WORKINGDIR=$(mktemp -d /tmp/swift-plot-log-XXXXXXXXXXXXXXXX) - - # $1 should be the pathname of the log file to plot if [[ $1 == "" ]] then @@ -11,10 +7,11 @@ exit 1 fi -LOG_CODE_HOME="`dirname $0`/../libexec/log-processing/" -export SWIFT_PLOT_HOME=$LOG_CODE_HOME +ORIGDIR=$(pwd) +WORKINGDIR=$(mktemp -d /tmp/swift-plot-log-XXXXXXXXXXXXXXXX) -export PATH=${LOG_CODE_HOME}:${PATH} +export SWIFT_PLOT_HOME="`dirname $0`/../libexec/log-processing/" +export PATH=${SWIFT_PLOT_HOME}:${PATH} FC="${1:0:1}" @@ -35,7 +32,7 @@ cd $WORKINGDIR echo Now in directory $(pwd) -MAKEENV="-f ${LOG_CODE_HOME}/makefile -I ${LOG_CODE_HOME}" +MAKEENV="-f ${SWIFT_PLOT_HOME}/makefile -I ${SWIFT_PLOT_HOME}" MAKETARGETS="" INFODIR=${LOG_DIRECTORY}/${LOG_FILE_BASE}.d From noreply at svn.ci.uchicago.edu Thu Jun 17 16:13:55 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 17 Jun 2010 16:13:55 -0500 (CDT) Subject: [Swift-commit] r3389 - branches/woz-01/libexec/log-processing Message-ID: <20100617211355.7AC369CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-17 16:13:55 -0500 (Thu, 17 Jun 2010) New Revision: 3389 Modified: branches/woz-01/libexec/log-processing/README.txt Log: Clarification Modified: branches/woz-01/libexec/log-processing/README.txt =================================================================== --- branches/woz-01/libexec/log-processing/README.txt 2010-06-17 20:14:33 UTC (rev 3388) +++ branches/woz-01/libexec/log-processing/README.txt 2010-06-17 21:13:55 UTC (rev 3389) @@ -28,8 +28,8 @@ (EFFICIENCY) -* Replace scripts that make multiple external calls per line - with AWK, or if necessary Perl +* Replace scripts that make multiple external calls per input data line + with AWK, or, if necessary, Perl TODO: From noreply at svn.ci.uchicago.edu Thu Jun 17 16:14:15 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 17 Jun 2010 16:14:15 -0500 (CDT) Subject: [Swift-commit] r3390 - branches/woz-01/libexec/log-processing Message-ID: <20100617211415.66C429CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-17 16:14:15 -0500 (Thu, 17 Jun 2010) New Revision: 3390 Modified: branches/woz-01/libexec/log-processing/makefile Log: Minor adjustment Modified: branches/woz-01/libexec/log-processing/makefile =================================================================== --- branches/woz-01/libexec/log-processing/makefile 2010-06-17 21:13:55 UTC (rev 3389) +++ branches/woz-01/libexec/log-processing/makefile 2010-06-17 21:14:15 UTC (rev 3390) @@ -10,6 +10,10 @@ include makefile.errors include makefile.coasters +TMPS = start-times.data kickstart-times.data start-time.tmp end-time.tmp threads.list tasks.list *.shifted *.event *.coloured-event *.total *.tmp *.transitions *.last karatasks-type-counts.txt index.html *.lastsummary execstages.plot total.plot colour.plot jobs-sites.html jobs.retrycount.summary kickstart.stats execution-counts.txt site-duration.txt jobs.retrycount sp.plot karatasks.coloured-sorted-event *.cedps *.stats t.inf *.seenstates tmp-* clusterstats trname-summary sites-list.data.nm info-md5sums pse2d-tmp.eip karajan.html falkon.html execute2.html info.html execute.html kickstart.html scheduler.html assorted.html + +RM_CMD = rm -fv + distributable: rm -f *.tmp log *.transitions tmp-* @@ -18,8 +22,8 @@ cp *.event *.html *.png $(SDL) clean: - rm -f start-times.data kickstart-times.data start-time.tmp end-time.tmp threads.list tasks.list log *.data *.shifted *.png *.event *.coloured-event *.total *.tmp *.transitions *.last karatasks-type-counts.txt index.html *.lastsummary execstages.plot total.plot colour.plot jobs-sites.html jobs.retrycount.summary kickstart.stats execution-counts.txt site-duration.txt jobs.retrycount sp.plot karatasks.coloured-sorted-event *.cedps *.stats t.inf *.seenstates tmp-* clusterstats trname-summary sites-list.data.nm info-md5sums pse2d-tmp.eip karajan.html falkon.html execute2.html info.html execute.html kickstart.html scheduler.html assorted.html - + $(RM_CMD) $(TMPS) + t.inf: $(LOG) compute-t-inf > t.inf < $(LOG) From noreply at svn.ci.uchicago.edu Thu Jun 17 16:15:57 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 17 Jun 2010 16:15:57 -0500 (CDT) Subject: [Swift-commit] r3391 - branches/woz-01/libexec/log-processing Message-ID: <20100617211557.A18BA9CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-17 16:15:57 -0500 (Thu, 17 Jun 2010) New Revision: 3391 Added: branches/woz-01/libexec/log-processing/extract-coaster-blocks Log: Missing file Added: branches/woz-01/libexec/log-processing/extract-coaster-blocks =================================================================== --- branches/woz-01/libexec/log-processing/extract-coaster-blocks (rev 0) +++ branches/woz-01/libexec/log-processing/extract-coaster-blocks 2010-06-17 21:15:57 UTC (rev 3391) @@ -0,0 +1,27 @@ + +set -x + +COASTERS_TMP=$1 + +IFS=$'\n' +REQUESTED=0 +RUNNING=0 +for LINE in $(cat ${COASTERS_TMP} ); do + TIME=`echo $LINE | sed 's/^\([^ ]*\) .*$/\1/' ` + ID=`echo $LINE | sed 's/^.*id=\([^ ]*\)\(,.*$\|$\)/\1/'` + ADD=`echo $LINE | sed -n 's/^.*BLOCK_REQUESTED.*w=\([^ ]*\),.*$/\1/p'` + if [ "$ADD" != "" ]; then + REQUESTED=$(($REQUESTED + $ADD)) + echo "$ID,$ADD" >> block-count-table.tmp + else + COUNT=`cat block-count-table.tmp | grep "$ID" | cut -d , -f 2` + if echo $LINE | grep "BLOCK_ACTIVE" >/dev/null; then + RUNNING=$(($RUNNING + $COUNT)) + fi + if echo $LINE | grep "BLOCK_SHUTDOWN" >/dev/null; then + RUNNING=$(($RUNNING - $COUNT)) + REQUESTED=$(($REQUESTED - $COUNT)) + fi + fi + echo $TIME $REQUESTED $RUNNING >> coaster-blocks.data +done Property changes on: branches/woz-01/libexec/log-processing/extract-coaster-blocks ___________________________________________________________________ Name: svn:executable + * From noreply at svn.ci.uchicago.edu Thu Jun 17 16:20:07 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 17 Jun 2010 16:20:07 -0500 (CDT) Subject: [Swift-commit] r3393 - branches/woz-01/libexec/log Message-ID: <20100617212007.6C7B69CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-17 16:20:07 -0500 (Thu, 17 Jun 2010) New Revision: 3393 Added: branches/woz-01/libexec/log/cluster.mk branches/woz-01/libexec/log/coasters.mk branches/woz-01/libexec/log/errors.mk branches/woz-01/libexec/log/falkon.mk branches/woz-01/libexec/log/implicit.mk branches/woz-01/libexec/log/karatasks.mk branches/woz-01/libexec/log/kickstart.mk branches/woz-01/libexec/log/webpage.mk Removed: branches/woz-01/libexec/log/makefile.cluster branches/woz-01/libexec/log/makefile.coasters branches/woz-01/libexec/log/makefile.errors branches/woz-01/libexec/log/makefile.falkon branches/woz-01/libexec/log/makefile.implicit branches/woz-01/libexec/log/makefile.karatasks branches/woz-01/libexec/log/makefile.kickstart branches/woz-01/libexec/log/makefile.webpage Log: Shorter names with extensions Copied: branches/woz-01/libexec/log/cluster.mk (from rev 3392, branches/woz-01/libexec/log/makefile.cluster) =================================================================== --- branches/woz-01/libexec/log/cluster.mk (rev 0) +++ branches/woz-01/libexec/log/cluster.mk 2010-06-17 21:20:07 UTC (rev 3393) @@ -0,0 +1,12 @@ +karatasks.clusters.transitions: karatasks.JOB_SUBMISSION.transitions + grep cluster- $< | grep --invert-match Submitted > $@ + +cluster.report: karatasks.clusters.lastsummary karatasks.clusters.last karatasks.clusters.event + cluster-report + +clusterstats: karatasks.clusters.lastsummary karatasks.clusters.last karatasks.clusters.event + cluster-stats > clusterstats + +cluster-jobcount-histogram.png: clusterstats + plot-cluster-jobcount-histogram + Copied: branches/woz-01/libexec/log/coasters.mk (from rev 3392, branches/woz-01/libexec/log/makefile.coasters) =================================================================== --- branches/woz-01/libexec/log/coasters.mk (rev 0) +++ branches/woz-01/libexec/log/coasters.mk 2010-06-17 21:20:07 UTC (rev 3393) @@ -0,0 +1,42 @@ +coaster-block-timeline.png: coaster-block-timeline.data + gnuplot $(SWIFT_PLOT_HOME)/coaster-block-timeline.plot + +coaster-block-timeline.data: coasters.tmp + extract-coaster-blocks coasters.tmp + +coasters.tmp: $(LOG) + extract-coaster-timeline $(LOG) + +# block-count-table.tmp: extract-timeline + +extract-coaster-qwait: coasters.tmp + extract-coaster-qwait $(LOG) + +coaster-qwait-count.data: extract-coaster-qwait +coaster-qwait-size.data: extract-coaster-qwait +coaster-qwait-wtime.data: extract-coaster-qwait +coaster-block-utilization-vs-count.data: extract-block-utilization-vs-count + +extract-block-utilization: coasters.tmp + extract-coaster-block-utilization $(LOG) + +extract-block-utilization-vs-count: coasters.tmp + extract-block-utilization-vs-count $(LOG) + +coaster-block-utilization.data: + extract-coaster-block-utilization + +coaster-qwait-count.png: coaster-qwait-count.data + gnuplot $(SWIFT_PLOT_HOME)/coaster-qwait-count.plot + +coaster-qwait-size.png: coaster-qwait-size.data + gnuplot $(SWIFT_PLOT_HOME)/coaster-qwait-size.plot + +coaster-qwait-wtime.png: coaster-qwait-wtime.data + gnuplot $(SWIFT_PLOT_HOME)/coaster-qwait-wtime.plot + +coaster-block-utilization.png: coaster-block-utilization.data + gnuplot $(SWIFT_PLOT_HOME)/coaster-block-utilization.plot + +coaster-block-utilization-vs-count.png: coaster-block-utilization-vs-count.data + gnuplot $(SWIFT_PLOT_HOME)/coaster-block-utilization-vs-count.plot Copied: branches/woz-01/libexec/log/errors.mk (from rev 3392, branches/woz-01/libexec/log/makefile.errors) =================================================================== --- branches/woz-01/libexec/log/errors.mk (rev 0) +++ branches/woz-01/libexec/log/errors.mk 2010-06-17 21:20:07 UTC (rev 3393) @@ -0,0 +1,3 @@ + +error-summary.txt: $(LOG) + error-summary $< > $@ Copied: branches/woz-01/libexec/log/falkon.mk (from rev 3392, branches/woz-01/libexec/log/makefile.falkon) =================================================================== --- branches/woz-01/libexec/log/falkon.mk (rev 0) +++ branches/woz-01/libexec/log/falkon.mk 2010-06-17 21:20:07 UTC (rev 3393) @@ -0,0 +1,11 @@ +falkon.event: $(FDIR) + falkon-to-event $(FDIR)/falkon_task_perf.txt > falkon.event + +execute2-by-falkon.coloured-event: falkon.event execute2.event + colour-execute2-by-falkon > execute2-by-falkon.coloured-event + +execute2-by-falkon-coloured.png: execute2-by-falkon.coloured-event start-time.tmp workflow.event + coloured-event-plot execute2-by-falkon.coloured-event execute2-by-falkon-coloured.png execute2-by-falkon.channels + +falkon-task-status.transitions: $(FDIR)/falkon_task_status.txt + falkon/task-status-to-transitions < $(FDIR)/falkon_task_status.txt > $@ Copied: branches/woz-01/libexec/log/implicit.mk (from rev 3392, branches/woz-01/libexec/log/makefile.implicit) =================================================================== --- branches/woz-01/libexec/log/implicit.mk (rev 0) +++ branches/woz-01/libexec/log/implicit.mk 2010-06-17 21:20:07 UTC (rev 3393) @@ -0,0 +1,45 @@ +%.lastsummary: %.last + cat $< | sed 's/^\([^ ]*\) \([^ ]*\) \([^ ]*\)\(.*\)/\3/' | sort | uniq -c > $@ + +%.last: %.transitions + cat $< | swap-and-sort |last-transition-line > $@ + +%.event: %.transitions t.inf + cat $< | swap-and-sort | transitions-to-event > $@ + +%.sorted-start.event: %.event + sort -n < $< > $@ + +%.sorted-by-duration: %.event + cat $< | sort -n -k 2 > $@ + +%-total.png: %.event start-time.tmp + total-event-plot $< $@ + +%.png: %.event start-time.tmp workflow.event + simple-event-plot $< $@ + +%.stats: %.event %.eip + event-duration-stats $< > $@ + +%-duration-histogram.png: %.sorted-by-duration + plot-duration-histogram $< $@ + +%.cedps: %.transitions + transitions-to-cedps $< > $@ + +%.seenstates: %.transitions + cat $< | sed 's/[^ ]* *[^ ]* \([^ ]*\).*/\1/' | sort | uniq > $@ + +%-trails.png: %.seenstates %.transitions start-time.tmp workflow.event + trail $* + +%-trailsx.png: %.seenstates %.transitions + trail-freex $* + +%.global.event: %.event + add-runid-as-prefix $< $@ + +%.eip: %.event + events-in-progress < $< > $@ + Copied: branches/woz-01/libexec/log/karatasks.mk (from rev 3392, branches/woz-01/libexec/log/makefile.karatasks) =================================================================== --- branches/woz-01/libexec/log/karatasks.mk (rev 0) +++ branches/woz-01/libexec/log/karatasks.mk 2010-06-17 21:20:07 UTC (rev 3393) @@ -0,0 +1,41 @@ +export LOG + +karajan-times.data: log tasks.list start-time.tmp end-time.tmp + times-for-all-tasks > karajan-times.data + +tasks.list: log + list-known-tasks > tasks.list + +karatasks-type-counts.txt: karatasks.event + cat karatasks.event | cut -f 4 -d ' ' | sort | uniq -c > karatasks-type-counts.txt + +karatasks.coloured-event: karatasks.event + colour-karatasks < karatasks.event > karatasks.coloured-event + +karatasks.coloured-sorted-event: karatasks.sorted-start.event + colour-karatasks < karatasks.sorted-start.event > karatasks.coloured-sorted-event + +karatasks-coloured.png: karatasks.coloured-event start-time.tmp workflow.event + coloured-event-plot karatasks.coloured-event karatasks-coloured.png ${SWIFT_PLOT_HOME}/karatasks-coloured.channels + +karatasks-coloured-sorted.png: karatasks.coloured-sorted-event start-time.tmp workflow.event + coloured-event-plot karatasks.coloured-sorted-event karatasks-coloured-sorted.png ${SWIFT_PLOT_HOME}/karatasks-coloured.channels + +karatasks.transitions: $(LOG) + log-to-karatasks-transitions < $(LOG) > karatasks.transitions + +karatasks.FILE_OPERATION.transitions: karatasks.transitions + karatasks-only FILE_OPERATION > $@ + +karatasks.FILE_TRANSFER.transitions: karatasks.transitions + karatasks-only FILE_TRANSFER > $@ + +karatasks.JOB_SUBMISSION.transitions: karatasks.transitions + karatasks-only JOB_SUBMISSION > $@ + +karatasks.JOB_SUBMISSION.Active.transitions: karatasks.JOB_SUBMISSION.transitions + cat karatasks.JOB_SUBMISSION.transitions | swap-and-sort-and-swap | active-state-transitions > karatasks.JOB_SUBMISSION.Active.transitions + +karatasks.JOB_SUBMISSION.Queue.transitions: karatasks.JOB_SUBMISSION.transitions + cat karatasks.JOB_SUBMISSION.transitions | swap-and-sort-and-swap | queue-state-transitions > karatasks.JOB_SUBMISSION.Queue.transitions + Copied: branches/woz-01/libexec/log/kickstart.mk (from rev 3392, branches/woz-01/libexec/log/makefile.kickstart) =================================================================== --- branches/woz-01/libexec/log/kickstart.mk (rev 0) +++ branches/woz-01/libexec/log/kickstart.mk 2010-06-17 21:20:07 UTC (rev 3393) @@ -0,0 +1,15 @@ +kickstart-times.data: kickstarts-to-plot start-time.tmp + kickstarts-to-plot > kickstart-times.data + +start-last-times-and-kickstart.data: start-last-times.data + start-last-times-and-kickstart > start-last-times-and-kickstart.data + +start-last-times-and-kickstart.png: start-last-times-and-kickstart.data start-last-times-and-kickstart.plot + gnuplot $(SWIFT_PLOT_HOME)/start-last-times-and-kickstart.plot + +kickstart.event: execute2.event + kickstarts-to-event > kickstart.event + +all-kickstart.event: + all-kickstarts-to-event > $@ + Deleted: branches/woz-01/libexec/log/makefile.cluster =================================================================== --- branches/woz-01/libexec/log/makefile.cluster 2010-06-17 21:18:09 UTC (rev 3392) +++ branches/woz-01/libexec/log/makefile.cluster 2010-06-17 21:20:07 UTC (rev 3393) @@ -1,12 +0,0 @@ -karatasks.clusters.transitions: karatasks.JOB_SUBMISSION.transitions - grep cluster- $< | grep --invert-match Submitted > $@ - -cluster.report: karatasks.clusters.lastsummary karatasks.clusters.last karatasks.clusters.event - cluster-report - -clusterstats: karatasks.clusters.lastsummary karatasks.clusters.last karatasks.clusters.event - cluster-stats > clusterstats - -cluster-jobcount-histogram.png: clusterstats - plot-cluster-jobcount-histogram - Deleted: branches/woz-01/libexec/log/makefile.coasters =================================================================== --- branches/woz-01/libexec/log/makefile.coasters 2010-06-17 21:18:09 UTC (rev 3392) +++ branches/woz-01/libexec/log/makefile.coasters 2010-06-17 21:20:07 UTC (rev 3393) @@ -1,42 +0,0 @@ -coaster-block-timeline.png: coaster-block-timeline.data - gnuplot $(SWIFT_PLOT_HOME)/coaster-block-timeline.plot - -coaster-block-timeline.data: coasters.tmp - extract-coaster-blocks coasters.tmp - -coasters.tmp: $(LOG) - extract-coaster-timeline $(LOG) - -# block-count-table.tmp: extract-timeline - -extract-coaster-qwait: coasters.tmp - extract-coaster-qwait $(LOG) - -coaster-qwait-count.data: extract-coaster-qwait -coaster-qwait-size.data: extract-coaster-qwait -coaster-qwait-wtime.data: extract-coaster-qwait -coaster-block-utilization-vs-count.data: extract-block-utilization-vs-count - -extract-block-utilization: coasters.tmp - extract-coaster-block-utilization $(LOG) - -extract-block-utilization-vs-count: coasters.tmp - extract-block-utilization-vs-count $(LOG) - -coaster-block-utilization.data: - extract-coaster-block-utilization - -coaster-qwait-count.png: coaster-qwait-count.data - gnuplot $(SWIFT_PLOT_HOME)/coaster-qwait-count.plot - -coaster-qwait-size.png: coaster-qwait-size.data - gnuplot $(SWIFT_PLOT_HOME)/coaster-qwait-size.plot - -coaster-qwait-wtime.png: coaster-qwait-wtime.data - gnuplot $(SWIFT_PLOT_HOME)/coaster-qwait-wtime.plot - -coaster-block-utilization.png: coaster-block-utilization.data - gnuplot $(SWIFT_PLOT_HOME)/coaster-block-utilization.plot - -coaster-block-utilization-vs-count.png: coaster-block-utilization-vs-count.data - gnuplot $(SWIFT_PLOT_HOME)/coaster-block-utilization-vs-count.plot Deleted: branches/woz-01/libexec/log/makefile.errors =================================================================== --- branches/woz-01/libexec/log/makefile.errors 2010-06-17 21:18:09 UTC (rev 3392) +++ branches/woz-01/libexec/log/makefile.errors 2010-06-17 21:20:07 UTC (rev 3393) @@ -1,3 +0,0 @@ - -error-summary.txt: $(LOG) - error-summary $< > $@ Deleted: branches/woz-01/libexec/log/makefile.falkon =================================================================== --- branches/woz-01/libexec/log/makefile.falkon 2010-06-17 21:18:09 UTC (rev 3392) +++ branches/woz-01/libexec/log/makefile.falkon 2010-06-17 21:20:07 UTC (rev 3393) @@ -1,11 +0,0 @@ -falkon.event: $(FDIR) - falkon-to-event $(FDIR)/falkon_task_perf.txt > falkon.event - -execute2-by-falkon.coloured-event: falkon.event execute2.event - colour-execute2-by-falkon > execute2-by-falkon.coloured-event - -execute2-by-falkon-coloured.png: execute2-by-falkon.coloured-event start-time.tmp workflow.event - coloured-event-plot execute2-by-falkon.coloured-event execute2-by-falkon-coloured.png execute2-by-falkon.channels - -falkon-task-status.transitions: $(FDIR)/falkon_task_status.txt - falkon/task-status-to-transitions < $(FDIR)/falkon_task_status.txt > $@ Deleted: branches/woz-01/libexec/log/makefile.implicit =================================================================== --- branches/woz-01/libexec/log/makefile.implicit 2010-06-17 21:18:09 UTC (rev 3392) +++ branches/woz-01/libexec/log/makefile.implicit 2010-06-17 21:20:07 UTC (rev 3393) @@ -1,45 +0,0 @@ -%.lastsummary: %.last - cat $< | sed 's/^\([^ ]*\) \([^ ]*\) \([^ ]*\)\(.*\)/\3/' | sort | uniq -c > $@ - -%.last: %.transitions - cat $< | swap-and-sort |last-transition-line > $@ - -%.event: %.transitions t.inf - cat $< | swap-and-sort | transitions-to-event > $@ - -%.sorted-start.event: %.event - sort -n < $< > $@ - -%.sorted-by-duration: %.event - cat $< | sort -n -k 2 > $@ - -%-total.png: %.event start-time.tmp - total-event-plot $< $@ - -%.png: %.event start-time.tmp workflow.event - simple-event-plot $< $@ - -%.stats: %.event %.eip - event-duration-stats $< > $@ - -%-duration-histogram.png: %.sorted-by-duration - plot-duration-histogram $< $@ - -%.cedps: %.transitions - transitions-to-cedps $< > $@ - -%.seenstates: %.transitions - cat $< | sed 's/[^ ]* *[^ ]* \([^ ]*\).*/\1/' | sort | uniq > $@ - -%-trails.png: %.seenstates %.transitions start-time.tmp workflow.event - trail $* - -%-trailsx.png: %.seenstates %.transitions - trail-freex $* - -%.global.event: %.event - add-runid-as-prefix $< $@ - -%.eip: %.event - events-in-progress < $< > $@ - Deleted: branches/woz-01/libexec/log/makefile.karatasks =================================================================== --- branches/woz-01/libexec/log/makefile.karatasks 2010-06-17 21:18:09 UTC (rev 3392) +++ branches/woz-01/libexec/log/makefile.karatasks 2010-06-17 21:20:07 UTC (rev 3393) @@ -1,41 +0,0 @@ -export LOG - -karajan-times.data: log tasks.list start-time.tmp end-time.tmp - times-for-all-tasks > karajan-times.data - -tasks.list: log - list-known-tasks > tasks.list - -karatasks-type-counts.txt: karatasks.event - cat karatasks.event | cut -f 4 -d ' ' | sort | uniq -c > karatasks-type-counts.txt - -karatasks.coloured-event: karatasks.event - colour-karatasks < karatasks.event > karatasks.coloured-event - -karatasks.coloured-sorted-event: karatasks.sorted-start.event - colour-karatasks < karatasks.sorted-start.event > karatasks.coloured-sorted-event - -karatasks-coloured.png: karatasks.coloured-event start-time.tmp workflow.event - coloured-event-plot karatasks.coloured-event karatasks-coloured.png ${SWIFT_PLOT_HOME}/karatasks-coloured.channels - -karatasks-coloured-sorted.png: karatasks.coloured-sorted-event start-time.tmp workflow.event - coloured-event-plot karatasks.coloured-sorted-event karatasks-coloured-sorted.png ${SWIFT_PLOT_HOME}/karatasks-coloured.channels - -karatasks.transitions: $(LOG) - log-to-karatasks-transitions < $(LOG) > karatasks.transitions - -karatasks.FILE_OPERATION.transitions: karatasks.transitions - karatasks-only FILE_OPERATION > $@ - -karatasks.FILE_TRANSFER.transitions: karatasks.transitions - karatasks-only FILE_TRANSFER > $@ - -karatasks.JOB_SUBMISSION.transitions: karatasks.transitions - karatasks-only JOB_SUBMISSION > $@ - -karatasks.JOB_SUBMISSION.Active.transitions: karatasks.JOB_SUBMISSION.transitions - cat karatasks.JOB_SUBMISSION.transitions | swap-and-sort-and-swap | active-state-transitions > karatasks.JOB_SUBMISSION.Active.transitions - -karatasks.JOB_SUBMISSION.Queue.transitions: karatasks.JOB_SUBMISSION.transitions - cat karatasks.JOB_SUBMISSION.transitions | swap-and-sort-and-swap | queue-state-transitions > karatasks.JOB_SUBMISSION.Queue.transitions - Deleted: branches/woz-01/libexec/log/makefile.kickstart =================================================================== --- branches/woz-01/libexec/log/makefile.kickstart 2010-06-17 21:18:09 UTC (rev 3392) +++ branches/woz-01/libexec/log/makefile.kickstart 2010-06-17 21:20:07 UTC (rev 3393) @@ -1,15 +0,0 @@ -kickstart-times.data: kickstarts-to-plot start-time.tmp - kickstarts-to-plot > kickstart-times.data - -start-last-times-and-kickstart.data: start-last-times.data - start-last-times-and-kickstart > start-last-times-and-kickstart.data - -start-last-times-and-kickstart.png: start-last-times-and-kickstart.data start-last-times-and-kickstart.plot - gnuplot $(SWIFT_PLOT_HOME)/start-last-times-and-kickstart.plot - -kickstart.event: execute2.event - kickstarts-to-event > kickstart.event - -all-kickstart.event: - all-kickstarts-to-event > $@ - Deleted: branches/woz-01/libexec/log/makefile.webpage =================================================================== --- branches/woz-01/libexec/log/makefile.webpage 2010-06-17 21:18:09 UTC (rev 3392) +++ branches/woz-01/libexec/log/makefile.webpage 2010-06-17 21:20:07 UTC (rev 3393) @@ -1,61 +0,0 @@ -webpage: pngs htmls - -htmls: index.html execute2.html execute.html karajan.html info.html \ -assorted.html kickstart.html falkon.html scheduler.html coasters.html - -webpage.kara: karatasks.png karatasks-coloured.png karatasks-type-counts.txt \ -karatasks.lastsummary karatasks-coloured-sorted.png \ -karatasks.FILE_TRANSFER.sorted-start.png \ -karatasks.FILE_OPERATION.sorted-start.png \ -karatasks.JOB_SUBMISSION.sorted-start.png \ -karatasks.FILE_TRANSFER-total.png karatasks.JOB_SUBMISSION-total.png \ -karatasks.FILE_OPERATION-total.png karatasks.JOB_SUBMISSION-trails.png \ -karatasks.FILE_OPERATION-trails.png karatasks.FILE_TRANSFER-trails.png \ -karatasks.FILE_TRANSFER.stats karatasks.FILE_OPERATION.stats \ -karatasks.JOB_SUBMISSION.stats karatasks-total.png \ -karatasks.JOB_SUBMISSION.Active-total.png index.html.kara \ -karatasks.JOB_SUBMISSION.Active.sorted-start.png \ -karatasks.JOB_SUBMISSION.Active.stats \ -karatasks.JOB_SUBMISSION.Active-duration-histogram.png \ -karatasks.JOB_SUBMISSION.Queue-total.png \ -karatasks.JOB_SUBMISSION.Queue.sorted-start.png \ -karatasks.JOB_SUBMISSION.Queue.stats \ -karatasks.JOB_SUBMISSION.Queue-duration-histogram.png - -webpage.coasters: coaster-block-timeline.png coaster-qwait-count.png coaster-qwait-size.png \ - coaster-qwait-wtime.png coaster-block-utilization.png coaster-block-utilization-vs-count.png - -webpage.clusters: karatasks.clusters.png karatasks.clusters-total.png \ -karatasks.clusters.sorted-start.png karatasks.clusters-duration-histogram.png - -webpage.falkon: falkon.png falkon-total.png falkon-task-status-trails.png - -webpage.weights: weights.png tscores.png - -webpage.info: info.png info.sorted-start.png info-trails.png \ -info-duration-histogram.png info.zeroed-trailsx.png info.stats info-total.png - -webpage.kickstart: kickstart.png kickstart-total.png \ -kickstart-duration-histogram.png \ -index.html.kickstart kickstart.sorted-start.png - -pngs: workflow.png execute2.png execute.png execute2-total.png \ -execute-total.png execute2-coloured.png execute.sorted-start.png \ -initshareddir.png createdirset.png createdirset-total.png dostagein.png \ -dostagein-total.png dostageout.png dostageout-total.png execstages.png \ -dostageout.sorted-start.png dostagein-duration-histogram.png \ -dostageout-duration-histogram.png execute-trails.png execute2-trails.png \ -info-and-karajan-actives.png info-and-karajan-actives.2.png - -htmldeps: $(SWIFT_PLOT_HOME)/index.html.template execution-counts.txt jobs-sites.html site-duration.txt execute2.lastsummary execute.lastsummary jobs.retrycount.summary trname-summary - -%.html: htmldeps - m4 -I$(SWIFT_PLOT_HOME) $(SWIFT_PLOT_HOME)/$@.template > $@ - - -index.html.kara: karatasks.lastsummary karatasks.FILE_OPERATION.stats karatasks.FILE_TRANSFER.stats karatasks.JOB_SUBMISSION.stats karatasks-type-counts.txt - -index.html.kickstart: kickstart.stats - -info-and-karajan-actives.png info-and-karajan-actives.2.png: execute2.event karatasks.transitions - info-and-karajan-actives $(IDIR) Copied: branches/woz-01/libexec/log/webpage.mk (from rev 3392, branches/woz-01/libexec/log/makefile.webpage) =================================================================== --- branches/woz-01/libexec/log/webpage.mk (rev 0) +++ branches/woz-01/libexec/log/webpage.mk 2010-06-17 21:20:07 UTC (rev 3393) @@ -0,0 +1,61 @@ +webpage: pngs htmls + +htmls: index.html execute2.html execute.html karajan.html info.html \ +assorted.html kickstart.html falkon.html scheduler.html coasters.html + +webpage.kara: karatasks.png karatasks-coloured.png karatasks-type-counts.txt \ +karatasks.lastsummary karatasks-coloured-sorted.png \ +karatasks.FILE_TRANSFER.sorted-start.png \ +karatasks.FILE_OPERATION.sorted-start.png \ +karatasks.JOB_SUBMISSION.sorted-start.png \ +karatasks.FILE_TRANSFER-total.png karatasks.JOB_SUBMISSION-total.png \ +karatasks.FILE_OPERATION-total.png karatasks.JOB_SUBMISSION-trails.png \ +karatasks.FILE_OPERATION-trails.png karatasks.FILE_TRANSFER-trails.png \ +karatasks.FILE_TRANSFER.stats karatasks.FILE_OPERATION.stats \ +karatasks.JOB_SUBMISSION.stats karatasks-total.png \ +karatasks.JOB_SUBMISSION.Active-total.png index.html.kara \ +karatasks.JOB_SUBMISSION.Active.sorted-start.png \ +karatasks.JOB_SUBMISSION.Active.stats \ +karatasks.JOB_SUBMISSION.Active-duration-histogram.png \ +karatasks.JOB_SUBMISSION.Queue-total.png \ +karatasks.JOB_SUBMISSION.Queue.sorted-start.png \ +karatasks.JOB_SUBMISSION.Queue.stats \ +karatasks.JOB_SUBMISSION.Queue-duration-histogram.png + +webpage.coasters: coaster-block-timeline.png coaster-qwait-count.png coaster-qwait-size.png \ + coaster-qwait-wtime.png coaster-block-utilization.png coaster-block-utilization-vs-count.png + +webpage.clusters: karatasks.clusters.png karatasks.clusters-total.png \ +karatasks.clusters.sorted-start.png karatasks.clusters-duration-histogram.png + +webpage.falkon: falkon.png falkon-total.png falkon-task-status-trails.png + +webpage.weights: weights.png tscores.png + +webpage.info: info.png info.sorted-start.png info-trails.png \ +info-duration-histogram.png info.zeroed-trailsx.png info.stats info-total.png + +webpage.kickstart: kickstart.png kickstart-total.png \ +kickstart-duration-histogram.png \ +index.html.kickstart kickstart.sorted-start.png + +pngs: workflow.png execute2.png execute.png execute2-total.png \ +execute-total.png execute2-coloured.png execute.sorted-start.png \ +initshareddir.png createdirset.png createdirset-total.png dostagein.png \ +dostagein-total.png dostageout.png dostageout-total.png execstages.png \ +dostageout.sorted-start.png dostagein-duration-histogram.png \ +dostageout-duration-histogram.png execute-trails.png execute2-trails.png \ +info-and-karajan-actives.png info-and-karajan-actives.2.png + +htmldeps: $(SWIFT_PLOT_HOME)/index.html.template execution-counts.txt jobs-sites.html site-duration.txt execute2.lastsummary execute.lastsummary jobs.retrycount.summary trname-summary + +%.html: htmldeps + m4 -I$(SWIFT_PLOT_HOME) $(SWIFT_PLOT_HOME)/$@.template > $@ + + +index.html.kara: karatasks.lastsummary karatasks.FILE_OPERATION.stats karatasks.FILE_TRANSFER.stats karatasks.JOB_SUBMISSION.stats karatasks-type-counts.txt + +index.html.kickstart: kickstart.stats + +info-and-karajan-actives.png info-and-karajan-actives.2.png: execute2.event karatasks.transitions + info-and-karajan-actives $(IDIR) From noreply at svn.ci.uchicago.edu Thu Jun 17 16:24:40 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 17 Jun 2010 16:24:40 -0500 (CDT) Subject: [Swift-commit] r3394 - branches/woz-01/libexec/log Message-ID: <20100617212440.1BB519CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-17 16:24:39 -0500 (Thu, 17 Jun 2010) New Revision: 3394 Modified: branches/woz-01/libexec/log/README.txt Log: Update notes Modified: branches/woz-01/libexec/log/README.txt =================================================================== --- branches/woz-01/libexec/log/README.txt 2010-06-17 21:20:07 UTC (rev 3393) +++ branches/woz-01/libexec/log/README.txt 2010-06-17 21:24:39 UTC (rev 3394) @@ -1,9 +1,11 @@ STRUCTURE: +* This stuff was originally supposed to be controlled by + bin/swift-plot-log * There is main makefile named "makefile" * "makefile" reads extra rules from a bunch of other makefiles - "*.makefile" + "*.mk" * These makefile rules use various shell scripts NOTES: @@ -15,9 +17,9 @@ * Plot coasters utilization -PLOT_HOME=/scratch/wozniak/cog/modules/swift/libexec/log-processing +PLOT_HOME=/scratch/wozniak/cog/modules/swift/libexec/log -make -f ${PLOT_HOME}/makefile.coasters LOG=./map-20100606-2328-nv9xntzg.log SWIFT_PLOT_HOME=${PLOT_HOME} coaster-block-timeline.png +make -f ${PLOT_HOME}/coasters.mk LOG=./map-20100606-2328-nv9xntzg.log SWIFT_PLOT_HOME=${PLOT_HOME} coaster-block-timeline.png IMPROVEMENT STRATEGY: @@ -34,8 +36,7 @@ TODO: * Rename all shell scripts to *.sh -* Rename *.makefile to *.mk -* Rename makefile to main.mk +* Rename *.makefile to *.mk (DONE) * Stop using *.data - this clobbers tc.data * Rename temporary files to *.log.ptmp (plot-temporary) *.data.ptmp From noreply at svn.ci.uchicago.edu Thu Jun 17 17:22:44 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 17 Jun 2010 17:22:44 -0500 (CDT) Subject: [Swift-commit] r3395 - branches/woz-01/libexec/log Message-ID: <20100617222244.B1BCC9CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-17 17:22:44 -0500 (Thu, 17 Jun 2010) New Revision: 3395 Added: branches/woz-01/libexec/log/extract-coaster-blocks.sh branches/woz-01/libexec/log/mkplot.sh Modified: branches/woz-01/libexec/log/coasters.mk branches/woz-01/libexec/log/extract-coaster-blocks branches/woz-01/libexec/log/makefile Log: New single-shot plot tools (mkplot.sh) and related modifications Modified: branches/woz-01/libexec/log/coasters.mk =================================================================== --- branches/woz-01/libexec/log/coasters.mk 2010-06-17 21:24:39 UTC (rev 3394) +++ branches/woz-01/libexec/log/coasters.mk 2010-06-17 22:22:44 UTC (rev 3395) @@ -1,8 +1,8 @@ -coaster-block-timeline.png: coaster-block-timeline.data +coaster-block-timeline.png: coaster-blocks.data gnuplot $(SWIFT_PLOT_HOME)/coaster-block-timeline.plot -coaster-block-timeline.data: coasters.tmp - extract-coaster-blocks coasters.tmp +coaster-blocks.data: coasters.tmp + extract-coaster-blocks.sh < coasters.tmp > coaster-blocks.data coasters.tmp: $(LOG) extract-coaster-timeline $(LOG) Modified: branches/woz-01/libexec/log/extract-coaster-blocks =================================================================== --- branches/woz-01/libexec/log/extract-coaster-blocks 2010-06-17 21:24:39 UTC (rev 3394) +++ branches/woz-01/libexec/log/extract-coaster-blocks 2010-06-17 22:22:44 UTC (rev 3395) @@ -15,6 +15,7 @@ echo "$ID,$ADD" >> block-count-table.tmp else COUNT=`cat block-count-table.tmp | grep "$ID" | cut -d , -f 2` + echo COUNT: _$COUNT_ if echo $LINE | grep "BLOCK_ACTIVE" >/dev/null; then RUNNING=$(($RUNNING + $COUNT)) fi Added: branches/woz-01/libexec/log/extract-coaster-blocks.sh =================================================================== --- branches/woz-01/libexec/log/extract-coaster-blocks.sh (rev 0) +++ branches/woz-01/libexec/log/extract-coaster-blocks.sh 2010-06-17 22:22:44 UTC (rev 3395) @@ -0,0 +1,28 @@ +#/bin/bash + +set -x + +rm -fv block-count-table.tmp + +IFS=$'\n' +REQUESTED=0 +RUNNING=0 +for LINE in $( cat ); do + TIME=`echo $LINE | sed 's/^\([^ ]*\) .*$/\1/' ` + ID=`echo $LINE | sed 's/^.*id=\([^ ]*\)\(,.*$\|$\)/\1/'` + ADD=`echo $LINE | sed -n 's/^.*BLOCK_REQUESTED.*w=\([^ ]*\),.*$/\1/p'` + if [ "$ADD" != "" ]; then + REQUESTED=$(($REQUESTED + $ADD)) + echo "$ID,$ADD" >> block-count-table.tmp + else + COUNT=`cat block-count-table.tmp | grep "$ID" | cut -d , -f 2` + if echo $LINE | grep "BLOCK_ACTIVE" >/dev/null; then + RUNNING=$(($RUNNING + $COUNT)) + fi + if echo $LINE | grep "BLOCK_SHUTDOWN" >/dev/null; then + RUNNING=$(($RUNNING - $COUNT)) + REQUESTED=$(($REQUESTED - $COUNT)) + fi + fi + echo $TIME $REQUESTED $RUNNING +done Property changes on: branches/woz-01/libexec/log/extract-coaster-blocks.sh ___________________________________________________________________ Name: svn:executable + * Modified: branches/woz-01/libexec/log/makefile =================================================================== --- branches/woz-01/libexec/log/makefile 2010-06-17 21:24:39 UTC (rev 3394) +++ branches/woz-01/libexec/log/makefile 2010-06-17 22:22:44 UTC (rev 3395) @@ -2,13 +2,13 @@ all: clean webpage -include makefile.implicit -include makefile.falkon -include makefile.karatasks -include makefile.webpage -include makefile.kickstart -include makefile.errors -include makefile.coasters +include $(SWIFT_PLOT_HOME)/implicit.mk +include $(SWIFT_PLOT_HOME)/falkon.mk +include $(SWIFT_PLOT_HOME)/karatasks.mk +include $(SWIFT_PLOT_HOME)/webpage.mk +include $(SWIFT_PLOT_HOME)/kickstart.mk +include $(SWIFT_PLOT_HOME)/errors.mk +include $(SWIFT_PLOT_HOME)/coasters.mk TMPS = start-times.data kickstart-times.data start-time.tmp end-time.tmp threads.list tasks.list *.shifted *.event *.coloured-event *.total *.tmp *.transitions *.last karatasks-type-counts.txt index.html *.lastsummary execstages.plot total.plot colour.plot jobs-sites.html jobs.retrycount.summary kickstart.stats execution-counts.txt site-duration.txt jobs.retrycount sp.plot karatasks.coloured-sorted-event *.cedps *.stats t.inf *.seenstates tmp-* clusterstats trname-summary sites-list.data.nm info-md5sums pse2d-tmp.eip karajan.html falkon.html execute2.html info.html execute.html kickstart.html scheduler.html assorted.html Added: branches/woz-01/libexec/log/mkplot.sh =================================================================== --- branches/woz-01/libexec/log/mkplot.sh (rev 0) +++ branches/woz-01/libexec/log/mkplot.sh 2010-06-17 22:22:44 UTC (rev 3395) @@ -0,0 +1,65 @@ +#!/bin/bash + +# MKPLOT +# New log plot entry point + +# Usage: +# mklog.sh -h +# print help +# mklog.sh -l +# list pngs we know work +# mklog.sh +# generate a png from a log + +message_help() +{ + echo "See the top of this file for usage" + exit 0 +} + +message_list() +{ + echo "Known PNG output:" + for PNG in ${KNOWN_PNGS[@]} + do + printf "\t" + echo ${PNG} + done + exit 0 +} + +# List of known PNGs: what we know will work +KNOWN_PNGS=() +KNOWN_PNGS=( ${KNOWN_PNGS[@]} coaster-block-timeline.png ) + +export SWIFT_PLOT_HOME=$( dirname $0)/../../libexec/log + +while getopts "hl" OPTION + do + case ${OPTION} + in + h) message_help ;; + l) message_list ;; + esac +done + +# Input parameters +# The log file to plot +LOG=$1 +# The plot to generate +PNG=$2 + +if [[ ${LOG} == "" ]] + then + echo "No log file given!" + exit 1 +fi + +if [[ ${PNG} == "" ]] + then + echo "No png target given!" + exit 1 +fi + +MAKEFILE=${SWIFT_PLOT_HOME}/makefile +make -f ${MAKEFILE} ${PNG} Property changes on: branches/woz-01/libexec/log/mkplot.sh ___________________________________________________________________ Name: svn:executable + * From noreply at svn.ci.uchicago.edu Fri Jun 18 16:21:54 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 18 Jun 2010 16:21:54 -0500 (CDT) Subject: [Swift-commit] r3396 - text/internals/trunk Message-ID: <20100618212154.188459CC99@vm-125-59.ci.uchicago.edu> Author: aespinosa Date: 2010-06-18 16:21:53 -0500 (Fri, 18 Jun 2010) New Revision: 3396 Modified: text/internals/trunk/internals.tex Log: Karajan 2 Modified: text/internals/trunk/internals.tex =================================================================== --- text/internals/trunk/internals.tex 2010-06-17 22:22:44 UTC (rev 3395) +++ text/internals/trunk/internals.tex 2010-06-18 21:21:53 UTC (rev 3396) @@ -569,4 +569,178 @@ Disabling file transfer/ data staging stuff at runtime. +\section{Karajan 2} + +Current karajan is slow. + +ident() + +if () { + // true +} +else { + // false +} + +if(cond, true, false) + +Changes: parser. Short form a lambda. An implicit combinator. + +f(a) { b } -> f (a, {b} ) + +f(a) else {b} -> f (a, {b}) + +f(a) {b} else {c} -> f(a, {b}, {c}) + +foreach: +for(i, range) { } + +Unlike C, it is more functional with well defined role. A semantic instead of a +syntax. + +Pascal syntax: + +x := 1.0 + +x:Real := 1.0 + +Type inference engine figures out the types + +f := function(n) { ... } + +Namespaces for overloading (as in v1). Interpreter looks at all namespace if it +is not found. + +f := functin(Integer) {} | function(Real) {} + +Can also define operators + +a = '+'(1, 2) + +Primitive types: Real, Integer, String + +--- +Tuple: e.g. (Real, Integer) + +--- +Alt +Real | Integer + +ie + +a := if(cond) { 1.0 } + else { 2 } + +a:Real|Integer + +--- +Product +T* + +zero or more repetitions of the same type, not bounded and closely related to a +channel. ie. for() + +Type reductor does all the heavly lifting of figuring out the type of variables. + +---- +Parametric types + +Like generics in Java + +List(T). Types themselves are firstclass values + +myInt:=Integer + +a:myInt + +Creates a structural type inference + +intList:=List(Integers) + +a:intList = [1, 2, 3] + +Karajan does not implement full dependent types (they are turing complete) + +example: + +a:Matrix(3,4) - Type system can check if they can be multiplied since they are +dependent types. + +Potential for fixing current bugs today. + +Type system is a subsemantic of the language that gets figured out at compile +time. Potential of catching errors early (at compile time). Anticipation of +values being used to create certain optimizations. Only the components that are +found to be compute intensive are the ones that only get translated to machine +code. + +Variables are only assigned once and are lexical. + +a ;= 0; + +f := function() { + a +} +a := 1 + +print f => dumps 1 + +No need for a specific distinction for the type system. + +tree := type (value) { + value + left:tree|Null + right:tree|Null +} + +left::value + +if(left != null) { + left::value +} + +if(typeof(left) != NULL) { ... } + +f(a) else g(b) -> f(a, g(b)) + +Do as much as possible at runtime. + +add := function(a) { + function(b) { + a+b + } +} + +add5 := add(5) + +print (add5(10)) => 15 + +*Light weight threaded and compiled to java. + +Parallel stuff are the same + +parallelFor -> can't use normal java stacks. + +Channels are now more optimized because of the above stuff. + +print: function(...) {channel(?)} -> signature(stdout)}... + +Blocks -- + +properly typechecked. + +ifeven := function(x, t:Block, f:Block) { + if(x % 2 == 0) { + t() + } + else { f() } +ifeven(5) {print("even")} +else {print("odd")} + +\subsection{Syntax} + +\subsection{Semantics} + +\subsection{Concurrency} + \end{document} From noreply at svn.ci.uchicago.edu Mon Jun 21 13:23:45 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 21 Jun 2010 13:23:45 -0500 (CDT) Subject: [Swift-commit] r3397 - in provenancedb: . apps/oops Message-ID: <20100621182345.2F19A9CCAF@vm-125-59.ci.uchicago.edu> Author: lgadelha Date: 2010-06-21 13:23:44 -0500 (Mon, 21 Jun 2010) New Revision: 3397 Modified: provenancedb/apps/oops/oops_extractor.sh provenancedb/prov-init.sql Log: Modified: provenancedb/apps/oops/oops_extractor.sh =================================================================== --- provenancedb/apps/oops/oops_extractor.sh 2010-06-18 21:21:53 UTC (rev 3396) +++ provenancedb/apps/oops/oops_extractor.sh 2010-06-21 18:23:44 UTC (rev 3397) @@ -6,63 +6,78 @@ # OOPS' Swift logs. PROVDB_HOME=~/provenancedb -PROTESTS_HOME=~/protests +PROTESTS_HOME=/home/aashish/CASP +IMPORT_HOME=~/protests source $PROVDB_HOME/etc/provenance.config # provdb_imported records runs already imported to the provenance database -cd $PROTESTS_HOME +cd $IMPORT_HOME if [ ! -a provdb_imported ]; then touch provdb_imported fi - -for i in `ls | grep run.loops`; -do - cd $PROTESTS_HOME - if ! grep $i provdb_imported; then - if grep "Swift finished with no errors" $i/psim.loops-*.log; then - cd swift-logs - for j in `ls ../$i | grep psim.loops-`; do - ln -s ../$i/$j - done - cd import - # swift-prov-import-all-logs also controls what already has been - # imported, so it does not repeat work - $PROVDB_HOME/swift-prov-import-all-logs - cd $PROTESTS_HOME - echo $i >> provdb_imported - - # annotate workflows with their oops runid - OOPS_RUN_ID=`echo $i | awk -F . '{print $3}'` - LOG_FILENAME=`ls $i | grep psim.loops- | grep "\."log$` - WORKFLOW_ID=`echo "select workflow_id from known_workflows where workflow_log_filename like '%$LOG_FILENAME%'" | $SQLCMD -t | awk '{print $1}'` - echo "insert into annotations values ('$WORKFLOW_ID','oops_run_id','$OOPS_RUN_ID');" | $SQLCMD - - # annotate dataset with scientific parameters passed to doLoopRound - - # TODO: check why it is not recording doLoopRound in processes_in_workflows - #echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames,processes_in_workflows where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='loopModel' and param_name='d' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%' and processes_in_workflows.process_id=dataset_usage.process_id and processes_in_workflows.workflow_id='$WORKFLOW_ID';" > query.sql - - # using this as a workaround for the problem above, it will return nSim identical tuples - echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames,processes_in_workflows where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='loopModel' and param_name='d' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%' and processes_in_workflows.process_id=dataset_usage.process_id and processes_in_workflows.workflow_id='$WORKFLOW_ID';" > query.sql - - $SQLCMD -t -A -F " " -f query.sql -o result.txt - - #DATASET_ID=`awk '{print $1}' result.txt` - DATASET_ID=`awk '{if (NR==1) print $1}' result.txt` - - #FILENAME=`awk '{print $2}' result.txt | sed 's/file:\/\/localhost\///g'` - FILENAME=`awk '{if (NR==1) print $2}' result.txt | sed 's/file:\/\/localhost\///g'` - - cd $PROTESTS_HOME/run.loops.$OOPS_RUN_ID - - while read line - do - NAME=`echo $line | awk 'BEGIN { FS = "=" }; {print $1}'` - VALUE=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}'` - echo "insert into annotations values ('$DATASET_ID', '$NAME', '$VALUE');" | $SQLCMD - done < $FILENAME +cd $PROTESTS_HOME +for k in `ls -1`; +do + cd $PROTESTS_HOME/$k + for i in `ls | grep run.loops`; + do + cd $IMPORT_HOME + if ! grep --silent $i provdb_imported; then + if grep --silent "Swift finished with no errors" $PROTESTS_HOME/$k/$i/psim.loops-*.log; then + cd swift-logs + for j in `ls $PROTESTS_HOME/$k/$i | grep psim.loops-`; do + ln -s $PROTESTS_HOME/$k/$i/$j + done + cd import + # swift-prov-import-all-logs also controls what already has been + # imported, so it does not repeat work + $PROVDB_HOME/swift-prov-import-all-logs + cd $IMPORT_HOME + echo $i >> provdb_imported + cd swift-logs + # annotate workflows with their oops runid + OOPS_RUN_ID=`echo $i | awk -F . '{print $3}'` + cd $PROTESTS_HOME/$k/$i + LOG_FILENAME=`ls | grep psim.loops- | grep "\."log$` + WORKFLOW_ID=`echo "select workflow_id from known_workflows where workflow_log_filename like '%$LOG_FILENAME%'" | $SQLCMD -t | awk '{print $1}'` + cd $IMPORT_HOME/swift-logs + echo "insert into workflow_annotations_varchar values ('$WORKFLOW_ID','oops_run_id','$OOPS_RUN_ID');" | $SQLCMD + + # using this as a workaround for the problem above, it will return nSim identical tuples + echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames,processes_in_workflows where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='loopModel' and param_name='d' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%' and processes_in_workflows.process_id=dataset_usage.process_id and processes_in_workflows.workflow_id='$WORKFLOW_ID';" > query.sql + + $SQLCMD -t -A -F " " -f query.sql -o result.txt + + #DATASET_ID=`awk '{print $1}' result.txt` + DATASET_ID=`awk '{if (NR==1) print $1}' result.txt` + + #FILENAME=`awk '{print $2}' result.txt | sed 's/file:\/\/localhost\///g'` + FILENAME=`awk '{if (NR==1) print $2}' result.txt | sed 's/file:\/\/localhost\///g'` + + cd $PROTESTS_HOME/$k/run.loops.$OOPS_RUN_ID + + while read line; do + NAME=`echo $line | awk 'BEGIN { FS = "=" }; {print $1}'` + if [ "$NAME" = "SAMPLE RANGE" ]; then + VALUE1=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}' | awk 'BEGIN { FS = "-" }; {print $1}'` + VALUE2=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}' | awk 'BEGIN { FS = "-" }; {print $2}'` + echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME BEGIN', $VALUE1);" | $SQLCMD + echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME END', $VALUE2);" | $SQLCMD + fi + if [ "$NAME" = "RESTRAIN DISTANCE" ]; then + VALUE1=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}' | awk 'BEGIN { FS = "," }; {print $1}'` + VALUE2=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}' | awk 'BEGIN { FS = "," }; {print $2}'` + echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME 1', $VALUE1);" | $SQLCMD + echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME 2', $VALUE2);" | $SQLCMD + fi + if [ "$NAME" = "MAXIMUM NUMBER OF STEPS" ]; then + VALUE=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}'` + echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME', $VALUE);" | $SQLCMD + fi + done < $FILENAME + fi fi - fi + done done Modified: provenancedb/prov-init.sql =================================================================== --- provenancedb/prov-init.sql 2010-06-18 21:21:53 UTC (rev 3396) +++ provenancedb/prov-init.sql 2010-06-21 18:23:44 UTC (rev 3397) @@ -17,23 +17,30 @@ DROP TABLE createarray; DROP TABLE createarray_member; DROP TABLE array_range; -DROP TABLE annotations; - - +DROP TABLE dataset_annotations_numeric; +DROP TABLE dataset_annotations_varchar; +DROP TABLE dataset_annotations_boolean; +DROP TABLE process_annotations_numeric; +DROP TABLE process_annotations_varchar; +DROP TABLE process_annotations_boolean; +DROP TABLE workflow_annotations_numeric; +DROP TABLE workflow_annotations_varchar; +DROP TABLE workflow_annotations_boolean; -- associates each process with its containing workflow -- TODO - perhaps a workflow is itself a big big process? -- in which case this looks very much like a compound/app -- containment? CREATE TABLE processes_in_workflows - (workflow_id char(256), - process_id char(256) + (workflow_id varchar(2048), + process_id varchar(2048), + primary key (workflow_id, process_id) ); -- processes gives information about each process (in the OPM sense) -- it is augmented by information in other tables CREATE TABLE processes - (id char(256) PRIMARY KEY, -- a uri - type char(16) -- specifies the type of process. for any type, it + (id varchar(2048) PRIMARY KEY, -- a uri + type varchar(16) -- specifies the type of process. for any type, it -- must be the case that the specific type table -- has an entry for this process. -- Having this type here seems poor normalisation, though? @@ -44,12 +51,12 @@ -- each execute is identified by a unique URI. other information from -- swift logs is also stored here. an execute is an OPM process. CREATE TABLE executes - (id char(256) PRIMARY KEY, -- actually foreign key to processes + (id varchar(2048) PRIMARY KEY, -- actually foreign key to processes starttime numeric, duration numeric, - finalstate char(256), - app char(256), - scratch char(256) + finalstate varchar(2048), + app varchar(2048), + scratch varchar(2048) ); -- this gives information about each execute2, which is an attempt to @@ -57,12 +64,12 @@ -- information such as wrapper logs CREATE TABLE execute2s - (id char(256) PRIMARY KEY, - execute_id char(256), -- secondary key to executes and processes tables + (id varchar(2048) PRIMARY KEY, + execute_id varchar(2048), -- secondary key to executes and processes tables starttime numeric, duration numeric, - finalstate char(256), - site char(256) + finalstate varchar(2048), + site varchar(2048) ); -- dataset_usage records usage relationships between processes and datasets; @@ -74,11 +81,11 @@ -- dataset_id for common queries? maybe add arbitrary ID for sake of it? CREATE TABLE dataset_usage - (process_id char(256), -- foreign key but not enforced because maybe process + (process_id varchar(2048), -- foreign key but not enforced because maybe process -- doesn't exist at time. same type as processes.id direction char(1), -- I or O for input or output - dataset_id char(256), -- this will perhaps key against dataset table - param_name char(256) -- the name of the parameter in this execute that + dataset_id varchar(2048), -- this will perhaps key against dataset table + param_name varchar(2048) -- the name of the parameter in this execute that -- this dataset was bound to. sometimes this must -- be contrived (for example, in positional varargs) ); @@ -89,11 +96,9 @@ -- TODO probably desirable that this is part of executes table -- but for now this is the easiest to pull data from logs. - --- TODO primary key should be execute_id CREATE TABLE invocation_procedure_names - (execute_id char(256), - procedure_name char(256) + (execute_id varchar(2048) PRIMARY KEY, + procedure_name varchar(2048) ); @@ -107,19 +112,17 @@ -- a containment hierarchy. The relationship (such as array index or -- structure member name) should also be stored in this table. CREATE TABLE dataset_containment - ( outer_dataset_id char(256), - inner_dataset_id char(256) + ( outer_dataset_id varchar(2048), + inner_dataset_id varchar(2048) ); -- dataset_filenames stores the filename mapped to each dataset. As some -- datasets do not have filenames, it should not be expected that -- every dataset will have a row in this table - --- TODO dataset_id should be primary key CREATE TABLE dataset_filenames - ( dataset_id char(256), - filename char(256) + ( dataset_id varchar(2048) PRIMARY KEY, + filename varchar(2048) ); -- dataset_values stores the value for each dataset which is known to have @@ -128,8 +131,8 @@ -- example) SQL numerical operations should not be expected to work, even -- though the user knows that a particular dataset stores a numeric value. CREATE TABLE dataset_values - ( dataset_id char(256), -- should be primary key - value char(256) + ( dataset_id varchar(2048) PRIMARY KEY, + value varchar(2048) ); -- The above dataset_* tables are the original containment representation @@ -139,21 +142,21 @@ -- It is unclear which is the better representation. CREATE TABLE createarray - ( array_id char(256) + ( array_id varchar(2048) ); CREATE TABLE createarray_member - ( array_id char(256), - ix char(256), - member_id char(256) + ( array_id varchar(2048), + ix varchar(2048), + member_id varchar(2048) ); -- TODO step CREATE TABLE array_range - ( array_id char(256), - from_id char(256), - to_id char(256), - step_id char(256) -- nullable, if step is unspecified + ( array_id varchar(2048), + from_id varchar(2048), + to_id varchar(2048), + step_id varchar(2048) -- nullable, if step is unspecified ); -- known_workflows stores some information about each workflow log that has @@ -161,36 +164,90 @@ -- status. CREATE TABLE known_workflows ( - workflow_id char(256), - workflow_log_filename char(256), - version char(256), - importstatus char(256) + workflow_id varchar(2048) PRIMARY KEY, + workflow_log_filename varchar(2048), + version varchar(2048), + importstatus varchar(2048) ); -- workflow_events stores the start time and duration for each workflow -- that has been successfully imported. CREATE TABLE workflow_events - ( workflow_id char(256), + ( workflow_id varchar(2048) PRIMARY KEY, starttime numeric, duration numeric ); -- extrainfo stores lines generated by the SWIFT_EXTRA_INFO feature CREATE TABLE extrainfo - ( execute2id char(256), - extrainfo char(1024) + ( execute2id varchar(2048), + extrainfo varchar(1024) ); -- annotations -CREATE TABLE annotations - ( id char(256), -- either dataset_id, process_id, or workflow_id - name char(256), - value char(256) +CREATE TABLE dataset_annotations_numeric + ( dataset_id varchar(2048), + name varchar(2048), + value numeric, + primary key(dataset_id, name) ); +CREATE TABLE dataset_annotations_varchar + ( dataset_id varchar(2048), + name varchar(2048), + value varchar(4096), + primary key(dataset_id, name) + ); + +CREATE TABLE dataset_annotations_boolean + ( dataset_id varchar(2048), + name varchar(2048), + value boolean, + primary key(dataset_id, name) + ); + +CREATE TABLE process_annotations_numeric + ( process_id varchar(2048), + name varchar(2048), + value numeric, + primary key(process_id, name) + ); + +CREATE TABLE process_annotations_varchar + ( process_id varchar(2048), + name varchar(2048), + value varchar(1024), + primary key(process_id, name) + ); + +CREATE TABLE process_annotations_boolean + ( process_id varchar(2048), + name varchar(2048), + value boolean, + primary key(process_id, name) + ); + +CREATE TABLE workflow_annotations_numeric + ( workflow_id varchar(2048), + name varchar(2048), + value numeric, + primary key(workflow_id, name) + ); + +CREATE TABLE workflow_annotations_varchar + ( workflow_id varchar(2048), + name varchar(2048), + value varchar(1024), + primary key(workflow_id, name) + ); + +CREATE TABLE workflow_annotations_boolean + ( workflow_id varchar(2048), + name varchar(2048), + value boolean, + primary key(workflow_id, name) + ); -- this GRANT does not work for sqlite; you'll get a syntax error but -- ignore it, as it is not needed in sqlite grant all on dataset_containment, dataset_filenames, dataset_usage, processes_in_workflows, invocation_procedure_names, known_workflows, workflow_events to public, operators; - - From noreply at svn.ci.uchicago.edu Tue Jun 22 12:08:17 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 22 Jun 2010 12:08:17 -0500 (CDT) Subject: [Swift-commit] r3398 - in branches/woz-01/src/org/griphyn/vdl: karajan/lib mapping Message-ID: <20100622170817.B882B9CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-22 12:08:17 -0500 (Tue, 22 Jun 2010) New Revision: 3398 Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java branches/woz-01/src/org/griphyn/vdl/mapping/AbstractDataNode.java Log: Move some logging from INFO to DEBUG Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java 2010-06-21 18:23:44 UTC (rev 3397) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java 2010-06-22 17:08:17 UTC (rev 3398) @@ -10,6 +10,7 @@ import org.globus.cog.karajan.workflow.futures.FutureNotYetAvailable; import org.griphyn.vdl.karajan.Pair; import org.griphyn.vdl.karajan.PairIterator; +import org.griphyn.vdl.mapping.AbstractDataNode; import org.griphyn.vdl.mapping.DSHandle; import org.griphyn.vdl.mapping.InvalidPathException; import org.griphyn.vdl.mapping.Path; @@ -32,9 +33,9 @@ Path path = parsePath(OA_PATH.getValue(stack), stack); DSHandle leaf = var.getField(path); DSHandle value = (DSHandle) PA_VALUE.getValue(stack); - if (logger.isInfoEnabled()) { - logger.info("Setting " + leaf + " to " + value); - } + + log(leaf, value); + synchronized (var.getRoot()) { // TODO want to do a type check here, for runtime type checking // and pull out the appropriate internal value from value if it @@ -61,7 +62,20 @@ } } - /** make dest look like source - if its a simple value, copy that + private void log(DSHandle leaf, DSHandle value) { + if (logger.isDebugEnabled()) { + logger.debug("Setting " + leaf + " to " + value); + } + else { + if (leaf instanceof AbstractDataNode) { + AbstractDataNode data = (AbstractDataNode) leaf; + String name = data.getDisplayableName(); + logger.info("Set: " + name + "=" + value); + } + } + } + + /** make dest look like source - if its a simple value, copy that and if its an array then recursively copy */ void deepCopy(DSHandle dest, DSHandle source, VariableStack stack) throws ExecutionException { if (source.getType().isPrimitive()) { Modified: branches/woz-01/src/org/griphyn/vdl/mapping/AbstractDataNode.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/mapping/AbstractDataNode.java 2010-06-21 18:23:44 UTC (rev 3397) +++ branches/woz-01/src/org/griphyn/vdl/mapping/AbstractDataNode.java 2010-06-22 17:08:17 UTC (rev 3398) @@ -151,7 +151,7 @@ return sb.toString(); } - protected String getDisplayableName() { + public String getDisplayableName() { String prefix = getRoot().getParam("dbgname"); if (prefix == null) { prefix = getRoot().getParam("prefix"); @@ -395,7 +395,7 @@ this.closed = true; notifyListeners(); if (logger.isInfoEnabled()) { - logger.info("closed " + this.getIdentifyingString()); + logger.debug("closed " + this.getIdentifyingString()); } // so because its closed, we can dump the contents From noreply at svn.ci.uchicago.edu Tue Jun 22 12:34:29 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 22 Jun 2010 12:34:29 -0500 (CDT) Subject: [Swift-commit] r3399 - branches/woz-01/src/org/griphyn/vdl/karajan/lib Message-ID: <20100622173429.ED8399CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-22 12:34:29 -0500 (Tue, 22 Jun 2010) New Revision: 3399 Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/VDLFunction.java Log: Moving log messages from INFO to DEBUG Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/VDLFunction.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/VDLFunction.java 2010-06-22 17:08:17 UTC (rev 3398) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/VDLFunction.java 2010-06-22 17:34:29 UTC (rev 3399) @@ -580,12 +580,17 @@ return provenanceIDCount++; } - public static void logProvenanceResult(int id, DSHandle result, String name) throws ExecutionException { - logger.info("FUNCTION id="+id+" name="+name+" result="+result.getIdentifier()); + public static void logProvenanceResult(int id, DSHandle result, + String name) + throws ExecutionException { + if (logger.isDebugEnabled()) + logger.debug("FUNCTION id="+id+" name="+name+" result="+result.getIdentifier()); + else if (logger.isInfoEnabled()) + logger.info("FUNCTION: " + name + "()"); } public static void logProvenanceParameter(int id, DSHandle parameter, String paramName) throws ExecutionException { - logger.info("FUNCTIONPARAMETER id="+id+" input="+parameter.getIdentifier()+" name="+paramName); - + if (logger.isDebugEnabled()) + logger.debug("FUNCTIONPARAMETER id="+id+" input="+parameter.getIdentifier()+" name="+paramName); } } From noreply at svn.ci.uchicago.edu Tue Jun 22 12:39:26 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 22 Jun 2010 12:39:26 -0500 (CDT) Subject: [Swift-commit] r3400 - in branches/woz-01/src/org/griphyn/vdl: karajan/lib mapping Message-ID: <20100622173926.A1F179CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-22 12:39:26 -0500 (Tue, 22 Jun 2010) New Revision: 3400 Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/CloseDataset.java branches/woz-01/src/org/griphyn/vdl/karajan/lib/New.java branches/woz-01/src/org/griphyn/vdl/mapping/AbstractDataNode.java Log: Move more log INFO to DEBUG Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/CloseDataset.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/CloseDataset.java 2010-06-22 17:34:29 UTC (rev 3399) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/CloseDataset.java 2010-06-22 17:39:26 UTC (rev 3400) @@ -23,8 +23,8 @@ Path path = parsePath(OA_PATH.getValue(stack), stack); DSHandle var = (DSHandle) PA_VAR.getValue(stack); try { - if (logger.isInfoEnabled()) { - logger.info("Closing " + var); + if (logger.isDebugEnabled()) { + logger.debug("Closing " + var); } var = var.getField(path); closeChildren(stack, var); Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/New.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/New.java 2010-06-22 17:34:29 UTC (rev 3399) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/New.java 2010-06-22 17:39:26 UTC (rev 3400) @@ -134,7 +134,7 @@ throw new ExecutionException("vdl:new requires a type specification for value " + value); } - logger.info("NEW id="+handle.getIdentifier()); + logger.debug("NEW id="+handle.getIdentifier()); return handle; } catch (Exception e) { Modified: branches/woz-01/src/org/griphyn/vdl/mapping/AbstractDataNode.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/mapping/AbstractDataNode.java 2010-06-22 17:34:29 UTC (rev 3399) +++ branches/woz-01/src/org/griphyn/vdl/mapping/AbstractDataNode.java 2010-06-22 17:39:26 UTC (rev 3400) @@ -563,8 +563,8 @@ } public synchronized void addListener(DSHandleListener listener) { - if (logger.isInfoEnabled()) { - logger.info("Adding handle listener \"" + listener + "\" to \"" + if (logger.isDebugEnabled()) { + logger.debug("Adding handle listener \"" + listener + "\" to \"" + getIdentifyingString() + "\""); } if (listeners == null) { @@ -582,9 +582,9 @@ while (i.hasNext()) { DSHandleListener listener = (DSHandleListener) i.next(); i.remove(); - if (logger.isInfoEnabled()) { - logger.info("Notifying listener \"" + listener - + "\" about \"" + getIdentifyingString() + "\""); + if (logger.isDebugEnabled()) { + logger.debug("Notifying listener \"" + listener + + "\" about \"" + getIdentifyingString() + "\""); } listener.handleClosed(this); } From noreply at svn.ci.uchicago.edu Tue Jun 22 14:48:19 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 22 Jun 2010 14:48:19 -0500 (CDT) Subject: [Swift-commit] r3401 - branches/woz-01 Message-ID: <20100622194819.06F589CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-22 14:48:18 -0500 (Tue, 22 Jun 2010) New Revision: 3401 Modified: branches/woz-01/build.xml Log: Skip parser generation if parser is ready Modified: branches/woz-01/build.xml =================================================================== --- branches/woz-01/build.xml 2010-06-22 17:39:26 UTC (rev 3400) +++ branches/woz-01/build.xml 2010-06-22 19:48:18 UTC (rev 3401) @@ -82,117 +82,117 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -257,7 +257,9 @@ - + @@ -267,8 +269,15 @@ + + + + + From noreply at svn.ci.uchicago.edu Tue Jun 22 14:50:00 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 22 Jun 2010 14:50:00 -0500 (CDT) Subject: [Swift-commit] r3402 - in branches/woz-01: resources src/org/griphyn/vdl/karajan src/org/griphyn/vdl/karajan/lib Message-ID: <20100622195000.B604A9CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-22 14:50:00 -0500 (Tue, 22 Jun 2010) New Revision: 3402 Modified: branches/woz-01/resources/Karajan.stg branches/woz-01/src/org/griphyn/vdl/karajan/Loader.java branches/woz-01/src/org/griphyn/vdl/karajan/lib/PartialCloseDataset.java branches/woz-01/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java Log: More INFO -> DEBUG changes Modified: branches/woz-01/resources/Karajan.stg =================================================================== --- branches/woz-01/resources/Karajan.stg 2010-06-22 19:48:18 UTC (rev 3401) +++ branches/woz-01/resources/Karajan.stg 2010-06-22 19:50:00 UTC (rev 3402) @@ -2,6 +2,7 @@ // TODO can move progress ticker start into vdl:mains so karajan files // are smaller + program(types,procedures,declarations,statements,constants,buildversion) ::= << @@ -101,7 +102,7 @@ >> vdl_execute(outputs,inputs,application,name,line) ::= << - + $application.exec$ $inputs:vdl_stagein();separator="\n"$ @@ -109,7 +110,7 @@ $vdl_arguments(arguments=application.arguments, stdin=application.stdin,stdout=application.stdout,stderr=application.stderr)$ $outputs:vdl_closedataset();separator="\n"$ - + >> vdl_log_input() ::= << @@ -213,14 +214,14 @@ $endif$ - -SCOPE thread={#thread} + +SCOPE thread={#thread} $declarations;separator="\n"$ $if(statements)$ $parallel(statements=statements)$ $endif$ - + >> @@ -230,7 +231,7 @@ callInternal(func, outputs, inputs) ::= << - + #thread <$func$> @@ -238,7 +239,7 @@ $inputs:callInternal_log_input();separator="\n"$ - + >> @@ -412,12 +413,12 @@ $condition$ -SCOPE thread={#thread} +SCOPE thread={#thread} $vthen$ $if(velse)$ -SCOPE thread={#thread} +SCOPE thread={#thread} $velse$ $endif$ @@ -544,7 +545,7 @@ - + SLICEARRAY thread={#thread} slice= member=$memberchild$ array= Modified: branches/woz-01/src/org/griphyn/vdl/karajan/Loader.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/Loader.java 2010-06-22 19:48:18 UTC (rev 3401) +++ branches/woz-01/src/org/griphyn/vdl/karajan/Loader.java 2010-06-22 19:50:00 UTC (rev 3402) @@ -477,6 +477,7 @@ ca.activateOptions(); } if (ap.isPresent(ARG_TUI)) { + System.out.println("TUI"); ma = new MonitorAppender(projectName); Logger.getRootLogger().addAppender(ma); Logger.getLogger(Log.class).setLevel(Level.DEBUG); Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/PartialCloseDataset.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/PartialCloseDataset.java 2010-06-22 19:48:18 UTC (rev 3401) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/PartialCloseDataset.java 2010-06-22 19:50:00 UTC (rev 3402) @@ -32,15 +32,14 @@ boolean hasUnseenToken = false; DSHandle var = (DSHandle) PA_VAR.getValue(stack); String statementID = (String) OA_STATEMENTID.getValue(stack); - if (logger.isInfoEnabled()) { - logger.info("Partially closing " + var + " for statement " + statementID); + if (logger.isDebugEnabled()) { + logger.debug("Partially closing " + var + + " hash: " + var.hashCode() + + " for statement " + statementID); } - logger.info("var is "+var); - logger.info("var hash is "+var.hashCode()); - if(var.isClosed()) { - logger.info("variable already closed - skipping partial close processing"); + logger.debug("variable already closed - skipping partial close processing"); return null; } @@ -53,10 +52,10 @@ } c.add(statementID); - logger.info("Adding token "+statementID+" with hash "+statementID.hashCode()); + logger.debug("Adding token "+statementID+" with hash "+statementID.hashCode()); String needToWaitFor = (String) var.getParam("waitfor"); - logger.info("need to wait for "+needToWaitFor); + logger.debug("need to wait for "+needToWaitFor); StringTokenizer stok = new StringTokenizer(needToWaitFor, " "); while(stok.hasMoreTokens()) { String s = stok.nextToken(); @@ -66,16 +65,16 @@ // then we have a required element that we have not // seen yet, so... hasUnseenToken = true; - logger.info("Container does not contain token "+s); + logger.debug("Container does not contain token "+s); } else { - logger.info("Container does contain token "+s); + logger.debug("Container does contain token "+s); } } } logger.info("hasUnseenToken = "+hasUnseenToken); if(!hasUnseenToken) { - if(logger.isInfoEnabled()) { - logger.info("All partial closes for " + var + " have happened. Closing fully."); + if(logger.isDebugEnabled()) { + logger.debug("All partial closes for " + var + " have happened. Closing fully."); } var.closeDeep(); pendingDatasets.remove(var); Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java 2010-06-22 19:48:18 UTC (rev 3401) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java 2010-06-22 19:50:00 UTC (rev 3402) @@ -69,7 +69,8 @@ else { if (leaf instanceof AbstractDataNode) { AbstractDataNode data = (AbstractDataNode) leaf; - String name = data.getDisplayableName(); + String name = data.getDisplayableName() + + data.getPathFromRoot(); logger.info("Set: " + name + "=" + value); } } From noreply at svn.ci.uchicago.edu Tue Jun 22 15:25:36 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 22 Jun 2010 15:25:36 -0500 (CDT) Subject: [Swift-commit] r3403 - in branches/woz-01: . etc libexec/log src/org/griphyn/vdl/karajan/lib Message-ID: <20100622202536.62ED99CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-22 15:25:36 -0500 (Tue, 22 Jun 2010) New Revision: 3403 Modified: branches/woz-01/build.xml branches/woz-01/etc/log4j.properties branches/woz-01/libexec/log/README.txt branches/woz-01/src/org/griphyn/vdl/karajan/lib/Log.java Log: Only Modified: branches/woz-01/build.xml =================================================================== --- branches/woz-01/build.xml 2010-06-22 19:50:00 UTC (rev 3402) +++ branches/woz-01/build.xml 2010-06-22 20:25:36 UTC (rev 3403) @@ -278,7 +278,9 @@ targetfile="${cog.dir}/modules/${module.name}/src/org/globus/swift/parser/marker.tmp" /> - + @@ -293,8 +295,18 @@ + + + + + + - Modified: branches/woz-01/etc/log4j.properties =================================================================== --- branches/woz-01/etc/log4j.properties 2010-06-22 19:50:00 UTC (rev 3402) +++ branches/woz-01/etc/log4j.properties 2010-06-22 20:25:36 UTC (rev 3403) @@ -9,8 +9,10 @@ log4j.appender.FILE=org.apache.log4j.FileAppender log4j.appender.FILE.File=swift.log log4j.appender.FILE.layout=org.apache.log4j.PatternLayout +# Original log4j.appender.FILE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSSZZZZZ} %-5p %c{1} %m%n - +# Modified +# log4j.appender.FILE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm} %-5p %c{1}.%M %m%n log4j.logger.swift=DEBUG log4j.logger.org.apache.axis.utils=ERROR @@ -23,10 +25,13 @@ log4j.logger.org.globus.cog.karajan.scheduler.WeightedHostScoreScheduler=INFO log4j.logger.org.griphyn.vdl.toolkit.VDLt2VDLx=DEBUG log4j.logger.org.griphyn.vdl.karajan.VDL2ExecutionContext=DEBUG -log4j.logger.org.globus.cog.abstraction.impl.common.task.TaskImpl=DEBUG -log4j.logger.org.griphyn.vdl.karajan.lib.GetFieldValue=DEBUG +log4j.logger.org.globus.cog.abstraction.impl.common.task.TaskImpl=INFO +# log4j.logger.org.griphyn.vdl.karajan.lib.GetFieldValue=DEBUG log4j.logger.org.griphyn.vdl.engine.Karajan=INFO log4j.logger.org.globus.cog.abstraction.coaster.rlog=INFO +# log4j.logger.org.globus.cog.abstraction.impl.scheduler.common.AbstractQueuePoller=DEBUG + # log4j.logger.org.globus.swift.data.Director=DEBUG log4j.logger.org.griphyn.vdl.karajan.lib=INFO +log4j.logger.org.globus.cog.karajan.workflow.service.commands=DEBUG Modified: branches/woz-01/libexec/log/README.txt =================================================================== --- branches/woz-01/libexec/log/README.txt 2010-06-22 19:50:00 UTC (rev 3402) +++ branches/woz-01/libexec/log/README.txt 2010-06-22 20:25:36 UTC (rev 3403) @@ -21,22 +21,3 @@ make -f ${PLOT_HOME}/coasters.mk LOG=./map-20100606-2328-nv9xntzg.log SWIFT_PLOT_HOME=${PLOT_HOME} coaster-block-timeline.png -IMPROVEMENT STRATEGY: - -(READABILITY) - -* Move dataflow operations into makefiles; refer to all scripts and - files in makefiles - -(EFFICIENCY) - -* Replace scripts that make multiple external calls per input data line - with AWK, or, if necessary, Perl - -TODO: - -* Rename all shell scripts to *.sh -* Rename *.makefile to *.mk (DONE) -* Stop using *.data - this clobbers tc.data -* Rename temporary files to *.log.ptmp (plot-temporary) - *.data.ptmp Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/Log.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/Log.java 2010-06-22 19:50:00 UTC (rev 3402) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/Log.java 2010-06-22 20:25:36 UTC (rev 3403) @@ -70,6 +70,7 @@ cls = "unknown"; } Level lvl = getLevel(TypeUtil.toString(LEVEL.getValue(stack))); + System.out.println(lvl); Logger logger = getLogger(cls); if (logger.isEnabledFor(lvl)) { Object smsg = MESSAGE.getValue(stack); From noreply at svn.ci.uchicago.edu Tue Jun 22 16:15:04 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 22 Jun 2010 16:15:04 -0500 (CDT) Subject: [Swift-commit] r3404 - branches/woz-01/etc Message-ID: <20100622211504.BE2019CC7E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-22 16:15:04 -0500 (Tue, 22 Jun 2010) New Revision: 3404 Modified: branches/woz-01/etc/log4j.properties Log: Undo unintentional commit to log4j.properties Modified: branches/woz-01/etc/log4j.properties =================================================================== --- branches/woz-01/etc/log4j.properties 2010-06-22 20:25:36 UTC (rev 3403) +++ branches/woz-01/etc/log4j.properties 2010-06-22 21:15:04 UTC (rev 3404) @@ -9,10 +9,8 @@ log4j.appender.FILE=org.apache.log4j.FileAppender log4j.appender.FILE.File=swift.log log4j.appender.FILE.layout=org.apache.log4j.PatternLayout -# Original log4j.appender.FILE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSSZZZZZ} %-5p %c{1} %m%n -# Modified -# log4j.appender.FILE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm} %-5p %c{1}.%M %m%n + log4j.logger.swift=DEBUG log4j.logger.org.apache.axis.utils=ERROR @@ -25,13 +23,10 @@ log4j.logger.org.globus.cog.karajan.scheduler.WeightedHostScoreScheduler=INFO log4j.logger.org.griphyn.vdl.toolkit.VDLt2VDLx=DEBUG log4j.logger.org.griphyn.vdl.karajan.VDL2ExecutionContext=DEBUG -log4j.logger.org.globus.cog.abstraction.impl.common.task.TaskImpl=INFO -# log4j.logger.org.griphyn.vdl.karajan.lib.GetFieldValue=DEBUG +log4j.logger.org.globus.cog.abstraction.impl.common.task.TaskImpl=DEBUG +log4j.logger.org.griphyn.vdl.karajan.lib.GetFieldValue=DEBUG log4j.logger.org.griphyn.vdl.engine.Karajan=INFO log4j.logger.org.globus.cog.abstraction.coaster.rlog=INFO -# log4j.logger.org.globus.cog.abstraction.impl.scheduler.common.AbstractQueuePoller=DEBUG - # log4j.logger.org.globus.swift.data.Director=DEBUG log4j.logger.org.griphyn.vdl.karajan.lib=INFO -log4j.logger.org.globus.cog.karajan.workflow.service.commands=DEBUG From noreply at svn.ci.uchicago.edu Tue Jun 22 16:22:42 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 22 Jun 2010 16:22:42 -0500 (CDT) Subject: [Swift-commit] r3405 - branches/woz-01 Message-ID: <20100622212242.CCE049CC7E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-22 16:22:42 -0500 (Tue, 22 Jun 2010) New Revision: 3405 Modified: branches/woz-01/build.xml Log: Remove some deletes and overwrites Modified: branches/woz-01/build.xml =================================================================== --- branches/woz-01/build.xml 2010-06-22 21:15:04 UTC (rev 3404) +++ branches/woz-01/build.xml 2010-06-22 21:22:42 UTC (rev 3405) @@ -194,25 +194,23 @@ - - + + + + file="${cog.dir}/modules/${module.name}/etc/karajan.properties" /> + file="${cog.dir}/modules/${module.name}/etc/log4j.properties" /> COG_INSTALL_PATH SWIFT_HOME From noreply at svn.ci.uchicago.edu Tue Jun 22 17:24:48 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 22 Jun 2010 17:24:48 -0500 (CDT) Subject: [Swift-commit] r3406 - branches/woz-01 Message-ID: <20100622222448.209969CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-22 17:24:47 -0500 (Tue, 22 Jun 2010) New Revision: 3406 Modified: branches/woz-01/build.xml Log: Actually need to overwrite properties due to top-level functionality Modified: branches/woz-01/build.xml =================================================================== --- branches/woz-01/build.xml 2010-06-22 21:22:42 UTC (rev 3405) +++ branches/woz-01/build.xml 2010-06-22 22:24:47 UTC (rev 3406) @@ -208,9 +208,11 @@ --> + file="${cog.dir}/modules/${module.name}/etc/karajan.properties" + overwrite="true" /> + file="${cog.dir}/modules/${module.name}/etc/log4j.properties" + overwrite="true" /> COG_INSTALL_PATH SWIFT_HOME From noreply at svn.ci.uchicago.edu Tue Jun 22 17:37:12 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 22 Jun 2010 17:37:12 -0500 (CDT) Subject: [Swift-commit] r3407 - in branches/woz-01/src/org/griphyn/vdl/karajan: . lib Message-ID: <20100622223712.9386C9CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-22 17:37:12 -0500 (Tue, 22 Jun 2010) New Revision: 3407 Modified: branches/woz-01/src/org/griphyn/vdl/karajan/Loader.java branches/woz-01/src/org/griphyn/vdl/karajan/lib/Log.java Log: Temporary debugging output Modified: branches/woz-01/src/org/griphyn/vdl/karajan/Loader.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/Loader.java 2010-06-22 22:24:47 UTC (rev 3406) +++ branches/woz-01/src/org/griphyn/vdl/karajan/Loader.java 2010-06-22 22:37:12 UTC (rev 3407) @@ -476,8 +476,8 @@ ca.setThreshold(level); ca.activateOptions(); } + Logger.getLogger(Log.class).setLevel(Level.INFO); if (ap.isPresent(ARG_TUI)) { - System.out.println("TUI"); ma = new MonitorAppender(projectName); Logger.getRootLogger().addAppender(ma); Logger.getLogger(Log.class).setLevel(Level.DEBUG); Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/Log.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/Log.java 2010-06-22 22:24:47 UTC (rev 3406) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/Log.java 2010-06-22 22:37:12 UTC (rev 3407) @@ -70,10 +70,12 @@ cls = "unknown"; } Level lvl = getLevel(TypeUtil.toString(LEVEL.getValue(stack))); - System.out.println(lvl); + System.out.println("lvl: " + lvl); Logger logger = getLogger(cls); + System.out.println("cls: " + cls); if (logger.isEnabledFor(lvl)) { Object smsg = MESSAGE.getValue(stack); + System.out.println("message: " + smsg); if (smsg != null) { logger.log(lvl, smsg); } From noreply at svn.ci.uchicago.edu Tue Jun 22 17:55:15 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 22 Jun 2010 17:55:15 -0500 (CDT) Subject: [Swift-commit] r3408 - branches/woz-01/src/org/griphyn/vdl/karajan/lib Message-ID: <20100622225515.D14B99CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-22 17:55:15 -0500 (Tue, 22 Jun 2010) New Revision: 3408 Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/Log.java Log: Undo temporary debugging output Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/Log.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/Log.java 2010-06-22 22:37:12 UTC (rev 3407) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/Log.java 2010-06-22 22:55:15 UTC (rev 3408) @@ -70,12 +70,9 @@ cls = "unknown"; } Level lvl = getLevel(TypeUtil.toString(LEVEL.getValue(stack))); - System.out.println("lvl: " + lvl); Logger logger = getLogger(cls); - System.out.println("cls: " + cls); if (logger.isEnabledFor(lvl)) { Object smsg = MESSAGE.getValue(stack); - System.out.println("message: " + smsg); if (smsg != null) { logger.log(lvl, smsg); } From noreply at svn.ci.uchicago.edu Tue Jun 22 17:55:56 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 22 Jun 2010 17:55:56 -0500 (CDT) Subject: [Swift-commit] r3409 - in branches/woz-01: etc src/org/griphyn/vdl/karajan/lib Message-ID: <20100622225556.AB0469CC99@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-22 17:55:56 -0500 (Tue, 22 Jun 2010) New Revision: 3409 Modified: branches/woz-01/etc/log4j.properties branches/woz-01/src/org/griphyn/vdl/karajan/lib/PartialCloseDataset.java Log: Remove more debugging output Modified: branches/woz-01/etc/log4j.properties =================================================================== --- branches/woz-01/etc/log4j.properties 2010-06-22 22:55:15 UTC (rev 3408) +++ branches/woz-01/etc/log4j.properties 2010-06-22 22:55:56 UTC (rev 3409) @@ -11,7 +11,8 @@ log4j.appender.FILE.layout=org.apache.log4j.PatternLayout log4j.appender.FILE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSSZZZZZ} %-5p %c{1} %m%n -log4j.logger.swift=DEBUG +# Logger for generated Karajan code +log4j.logger.swift=INFO log4j.logger.org.apache.axis.utils=ERROR Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/PartialCloseDataset.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/PartialCloseDataset.java 2010-06-22 22:55:15 UTC (rev 3408) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/PartialCloseDataset.java 2010-06-22 22:55:56 UTC (rev 3409) @@ -71,7 +71,7 @@ } } } - logger.info("hasUnseenToken = "+hasUnseenToken); + logger.debug("hasUnseenToken = "+hasUnseenToken); if(!hasUnseenToken) { if(logger.isDebugEnabled()) { logger.debug("All partial closes for " + var + " have happened. Closing fully."); From noreply at svn.ci.uchicago.edu Wed Jun 23 11:30:13 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 23 Jun 2010 11:30:13 -0500 (CDT) Subject: [Swift-commit] r3410 - branches/woz-01/src/org/griphyn/vdl/karajan/lib Message-ID: <20100623163013.1BF599CCC9@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-23 11:30:12 -0500 (Wed, 23 Jun 2010) New Revision: 3410 Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/Execute.java Log: Simplify log message for INFO Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/Execute.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/Execute.java 2010-06-22 22:55:56 UTC (rev 3409) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/Execute.java 2010-06-23 16:30:12 UTC (rev 3410) @@ -49,7 +49,12 @@ logger.debug("Submitting task " + task); } String jobid = (String)A_JOBID.getValue(stack,null); - logger.info("jobid="+jobid+" task=" + task); + if (logger.isDebugEnabled()) { + logger.debug("jobid="+jobid+" task=" + task); + } + else if (logger.isInfoEnabled()) { + logger.info("Submit: " + task.getSpecification()); + } scheduler.addJobStatusListener(this, task); synchronized (tasks) { tasks.put(task, stack); From noreply at svn.ci.uchicago.edu Wed Jun 23 13:21:37 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 23 Jun 2010 13:21:37 -0500 (CDT) Subject: [Swift-commit] r3411 - branches/woz-01/src/org/griphyn/vdl/karajan/lib Message-ID: <20100623182137.BF4E49CC7E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-23 13:21:37 -0500 (Wed, 23 Jun 2010) New Revision: 3411 Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java Log: Clean variable display Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java 2010-06-23 16:30:12 UTC (rev 3410) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java 2010-06-23 18:21:37 UTC (rev 3411) @@ -69,8 +69,11 @@ else { if (leaf instanceof AbstractDataNode) { AbstractDataNode data = (AbstractDataNode) leaf; - String name = data.getDisplayableName() + - data.getPathFromRoot(); + Path path = data.getPathFromRoot(); + String p = path.toString(); + if (p.equals("$")) + p = ""; + String name = data.getDisplayableName() + p; logger.info("Set: " + name + "=" + value); } } From noreply at svn.ci.uchicago.edu Wed Jun 23 13:22:10 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 23 Jun 2010 13:22:10 -0500 (CDT) Subject: [Swift-commit] r3412 - branches/woz-01/src/org/griphyn/vdl/karajan/lib Message-ID: <20100623182210.8172D9CC7E@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-23 13:22:10 -0500 (Wed, 23 Jun 2010) New Revision: 3412 Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/Execute.java Log: Cleaner INFO line for Task submission Modified: branches/woz-01/src/org/griphyn/vdl/karajan/lib/Execute.java =================================================================== --- branches/woz-01/src/org/griphyn/vdl/karajan/lib/Execute.java 2010-06-23 18:21:37 UTC (rev 3411) +++ branches/woz-01/src/org/griphyn/vdl/karajan/lib/Execute.java 2010-06-23 18:22:10 UTC (rev 3412) @@ -5,6 +5,8 @@ import org.apache.log4j.Logger; import org.globus.cog.abstraction.impl.common.StatusEvent; +import org.globus.cog.abstraction.interfaces.Specification; +import org.globus.cog.abstraction.interfaces.JobSpecification; import org.globus.cog.abstraction.interfaces.Status; import org.globus.cog.abstraction.interfaces.Task; import org.globus.cog.karajan.arguments.Arg; @@ -44,17 +46,7 @@ Object constraints) throws ExecutionException { try { registerReplica(stack, task); - if (logger.isDebugEnabled()) { - logger.debug(task); - logger.debug("Submitting task " + task); - } - String jobid = (String)A_JOBID.getValue(stack,null); - if (logger.isDebugEnabled()) { - logger.debug("jobid="+jobid+" task=" + task); - } - else if (logger.isInfoEnabled()) { - logger.info("Submit: " + task.getSpecification()); - } + log(task, stack); scheduler.addJobStatusListener(this, task); synchronized (tasks) { tasks.put(task, stack); @@ -69,6 +61,32 @@ } } + void log(Task task, VariableStack stack) + throws ExecutionException + { + if (logger.isDebugEnabled()) { + logger.debug(task); + logger.debug("Submitting task " + task); + } + String jobid = (String)A_JOBID.getValue(stack,null); + if (logger.isDebugEnabled()) { + logger.debug("jobid="+jobid+" task=" + task); + } + else if (logger.isInfoEnabled()) { + Specification spec = task.getSpecification(); + if (spec instanceof JobSpecification) { + JobSpecification jobspec = (JobSpecification) spec; + logger.info("Submit: " + + "in: " + jobspec.getDirectory() + + " command: " + jobspec.getExecutable() + + " " + jobspec.getArguments()); + } + else { + logger.info("Submit: " + spec); + } + } + } + protected void registerReplica(VariableStack stack, Task task) throws CanceledReplicaException { setTaskIdentity(stack, task); try { From noreply at svn.ci.uchicago.edu Thu Jun 24 15:37:31 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 24 Jun 2010 15:37:31 -0500 (CDT) Subject: [Swift-commit] r3413 - branches/woz-01/libexec Message-ID: <20100624203731.7271E9CCC9@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-24 15:37:31 -0500 (Thu, 24 Jun 2010) New Revision: 3413 Modified: branches/woz-01/libexec/vdl-int.k Log: INFO -> DEBUG Modified: branches/woz-01/libexec/vdl-int.k =================================================================== --- branches/woz-01/libexec/vdl-int.k 2010-06-23 18:22:10 UTC (rev 3412) +++ branches/woz-01/libexec/vdl-int.k 2010-06-24 20:37:31 UTC (rev 3413) @@ -180,13 +180,13 @@ * Ideally this would be done by creating a tree of the directories * to be created and (eventually) exploiting the concurrency in that. */ - log(LOG:INFO, "START jobid={jobid} host={host} - Initializing directory structure") + log(LOG:DEBUG, "START jobid={jobid} host={host} - Initializing directory structure") for(u, dirs cacheOn(list(u, destdir, host) createdirs(u, destdir, host) ) ) - log(LOG:INFO, "END jobid={jobid} - Done initializing directory structure") + log(LOG:DEBUG, "END jobid={jobid} - Done initializing directory structure") ) element(cleanup, [dir, host] From noreply at svn.ci.uchicago.edu Tue Jun 29 13:57:28 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 29 Jun 2010 13:57:28 -0500 (CDT) Subject: [Swift-commit] r3414 - trunk/src/org/griphyn/vdl/mapping/file Message-ID: <20100629185728.F2E199CCC8@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-29 13:57:28 -0500 (Tue, 29 Jun 2010) New Revision: 3414 Modified: trunk/src/org/griphyn/vdl/mapping/file/CSVMapper.java Log: Probable bug fix to CSVMapper Modified: trunk/src/org/griphyn/vdl/mapping/file/CSVMapper.java =================================================================== --- trunk/src/org/griphyn/vdl/mapping/file/CSVMapper.java 2010-06-24 20:37:31 UTC (rev 3413) +++ trunk/src/org/griphyn/vdl/mapping/file/CSVMapper.java 2010-06-29 18:57:28 UTC (rev 3414) @@ -14,7 +14,10 @@ import org.griphyn.vdl.mapping.AbsFile; import org.griphyn.vdl.mapping.AbstractMapper; +import org.griphyn.vdl.mapping.DSHandle; +import org.griphyn.vdl.mapping.GeneralizedFileFormat; import org.griphyn.vdl.mapping.InvalidMappingParameterException; +import org.griphyn.vdl.mapping.Mapper; import org.griphyn.vdl.mapping.MappingParam; import org.griphyn.vdl.mapping.Path; import org.griphyn.vdl.mapping.PhysicalFormat; @@ -61,13 +64,16 @@ if (read) { return; } - String file = PARAM_FILE.getStringValue(this); + + String file = getCSVFile(); + String delim = PARAM_DELIMITER.getStringValue(this); String hdelim = PARAM_HDELIMITER.getStringValue(this); boolean header = PARAM_HEADER.getBooleanValue(this); int skip = PARAM_SKIP.getIntValue(this); try { - BufferedReader br = new BufferedReader(new FileReader(file)); + BufferedReader br = + new BufferedReader(new FileReader(file)); String line; StringTokenizer st; @@ -78,10 +84,6 @@ int ix = 0; while (st.hasMoreTokens()) { String column = st.nextToken(); - // TODO PMD reports this for the - // following line: - // An operation on an Immutable object (String, BigDecimal or BigInteger) won't change the object itself - // This is likely a bug column.replaceAll("\\s", "_"); cols.add(column); colindex.put(column, new Integer(ix)); @@ -127,7 +129,19 @@ } } - public Collection existing() { + private String getCSVFile() { + String result = null; + Object object = PARAM_FILE.getRawValue(this); + DSHandle handle = (DSHandle) object; + Mapper mapper = handle.getMapper(); + PhysicalFormat format = mapper.map(Path.EMPTY_PATH); + GeneralizedFileFormat fileFormat = + (GeneralizedFileFormat) format; + result = fileFormat.getPath(); + return result; + } + + public Collection existing() { readFile(); List l = new ArrayList(); Iterator itl = content.iterator(); From noreply at svn.ci.uchicago.edu Tue Jun 29 14:05:17 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 29 Jun 2010 14:05:17 -0500 (CDT) Subject: [Swift-commit] r3415 - trunk/tests/language-behaviour Message-ID: <20100629190517.832F09CCC8@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-29 14:05:17 -0500 (Tue, 29 Jun 2010) New Revision: 3415 Added: trunk/tests/language-behaviour/760-csv-mapper.sh trunk/tests/language-behaviour/760-csv-mapper.swift Log: Add CSV mapper test (from recent bug fix) Not sure how to add this to automated testing framework Added: trunk/tests/language-behaviour/760-csv-mapper.sh =================================================================== --- trunk/tests/language-behaviour/760-csv-mapper.sh (rev 0) +++ trunk/tests/language-behaviour/760-csv-mapper.sh 2010-06-29 19:05:17 UTC (rev 3415) @@ -0,0 +1,10 @@ +#!/bin/sh + +NAME=$1 +CSV_OUT=$2 + +{ + echo "m" + touch $NAME + echo $NAME +} > $CSV_OUT Property changes on: trunk/tests/language-behaviour/760-csv-mapper.sh ___________________________________________________________________ Name: svn:executable + * Added: trunk/tests/language-behaviour/760-csv-mapper.swift =================================================================== --- trunk/tests/language-behaviour/760-csv-mapper.swift (rev 0) +++ trunk/tests/language-behaviour/760-csv-mapper.swift 2010-06-29 19:05:17 UTC (rev 3415) @@ -0,0 +1,28 @@ + +type file; + +type s +{ + file m; +} + +app (file out) create () +{ + echo_sh "file1" @filename(out); +} + +(file result) list (file inf) +{ + s diffs[] ; + foreach f, i in diffs + { + tracef("%s\n", @f.m); + } +} + +file o<"tmp.csv">; +file i; +file p; + +o = create(); +p = list(o); From noreply at svn.ci.uchicago.edu Tue Jun 29 16:16:24 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 29 Jun 2010 16:16:24 -0500 (CDT) Subject: [Swift-commit] r3416 - trunk/etc Message-ID: <20100629211624.637EF9CCC6@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-29 16:16:24 -0500 (Tue, 29 Jun 2010) New Revision: 3416 Modified: trunk/etc/log4j.properties Log: New recommended log4j settings Modified: trunk/etc/log4j.properties =================================================================== --- trunk/etc/log4j.properties 2010-06-29 19:05:17 UTC (rev 3415) +++ trunk/etc/log4j.properties 2010-06-29 21:16:24 UTC (rev 3416) @@ -11,7 +11,7 @@ log4j.appender.FILE.layout=org.apache.log4j.PatternLayout log4j.appender.FILE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSSZZZZZ} %-5p %c{1} %m%n -log4j.logger.swift=DEBUG +log4j.logger.swift=INFO log4j.logger.org.apache.axis.utils=ERROR @@ -23,7 +23,7 @@ log4j.logger.org.globus.cog.karajan.scheduler.WeightedHostScoreScheduler=INFO log4j.logger.org.griphyn.vdl.toolkit.VDLt2VDLx=DEBUG log4j.logger.org.griphyn.vdl.karajan.VDL2ExecutionContext=DEBUG -log4j.logger.org.globus.cog.abstraction.impl.common.task.TaskImpl=DEBUG +log4j.logger.org.globus.cog.abstraction.impl.common.task.TaskImpl=INFO log4j.logger.org.griphyn.vdl.karajan.lib.GetFieldValue=DEBUG log4j.logger.org.griphyn.vdl.engine.Karajan=INFO log4j.logger.org.globus.cog.abstraction.coaster.rlog=INFO From noreply at svn.ci.uchicago.edu Tue Jun 29 16:16:55 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 29 Jun 2010 16:16:55 -0500 (CDT) Subject: [Swift-commit] r3417 - in trunk: resources src/org/griphyn/vdl/karajan src/org/griphyn/vdl/karajan/lib src/org/griphyn/vdl/mapping Message-ID: <20100629211655.D77419CCC6@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-29 16:16:55 -0500 (Tue, 29 Jun 2010) New Revision: 3417 Modified: trunk/resources/Karajan.stg trunk/src/org/griphyn/vdl/karajan/Loader.java trunk/src/org/griphyn/vdl/karajan/lib/CloseDataset.java trunk/src/org/griphyn/vdl/karajan/lib/Execute.java trunk/src/org/griphyn/vdl/karajan/lib/New.java trunk/src/org/griphyn/vdl/karajan/lib/PartialCloseDataset.java trunk/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java Log: New Swift log formatting Modified: trunk/resources/Karajan.stg =================================================================== --- trunk/resources/Karajan.stg 2010-06-29 21:16:24 UTC (rev 3416) +++ trunk/resources/Karajan.stg 2010-06-29 21:16:55 UTC (rev 3417) @@ -2,6 +2,7 @@ // TODO can move progress ticker start into vdl:mains so karajan files // are smaller + program(types,procedures,declarations,statements,constants,buildversion) ::= << @@ -101,7 +102,7 @@ >> vdl_execute(outputs,inputs,application,name,line) ::= << - + $application.exec$ $inputs:vdl_stagein();separator="\n"$ @@ -109,7 +110,7 @@ $vdl_arguments(arguments=application.arguments, stdin=application.stdin,stdout=application.stdout,stderr=application.stderr)$ $outputs:vdl_closedataset();separator="\n"$ - + >> vdl_log_input() ::= << @@ -213,14 +214,14 @@ $endif$ - -SCOPE thread={#thread} + +SCOPE thread={#thread} $declarations;separator="\n"$ $if(statements)$ $parallel(statements=statements)$ $endif$ - + >> @@ -230,7 +231,7 @@ callInternal(func, outputs, inputs) ::= << - + #thread <$func$> @@ -238,7 +239,7 @@ $inputs:callInternal_log_input();separator="\n"$ - + >> @@ -412,12 +413,12 @@ $condition$ -SCOPE thread={#thread} +SCOPE thread={#thread} $vthen$ $if(velse)$ -SCOPE thread={#thread} +SCOPE thread={#thread} $velse$ $endif$ @@ -544,7 +545,7 @@ - + SLICEARRAY thread={#thread} slice= member=$memberchild$ array= Modified: trunk/src/org/griphyn/vdl/karajan/Loader.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/Loader.java 2010-06-29 21:16:24 UTC (rev 3416) +++ trunk/src/org/griphyn/vdl/karajan/Loader.java 2010-06-29 21:16:55 UTC (rev 3417) @@ -476,6 +476,7 @@ ca.setThreshold(level); ca.activateOptions(); } + Logger.getLogger(Log.class).setLevel(Level.INFO); if (ap.isPresent(ARG_TUI)) { ma = new MonitorAppender(projectName); Logger.getRootLogger().addAppender(ma); Modified: trunk/src/org/griphyn/vdl/karajan/lib/CloseDataset.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/lib/CloseDataset.java 2010-06-29 21:16:24 UTC (rev 3416) +++ trunk/src/org/griphyn/vdl/karajan/lib/CloseDataset.java 2010-06-29 21:16:55 UTC (rev 3417) @@ -23,8 +23,8 @@ Path path = parsePath(OA_PATH.getValue(stack), stack); DSHandle var = (DSHandle) PA_VAR.getValue(stack); try { - if (logger.isInfoEnabled()) { - logger.info("Closing " + var); + if (logger.isDebugEnabled()) { + logger.debug("Closing " + var); } var = var.getField(path); closeChildren(stack, var); Modified: trunk/src/org/griphyn/vdl/karajan/lib/Execute.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/lib/Execute.java 2010-06-29 21:16:24 UTC (rev 3416) +++ trunk/src/org/griphyn/vdl/karajan/lib/Execute.java 2010-06-29 21:16:55 UTC (rev 3417) @@ -5,6 +5,8 @@ import org.apache.log4j.Logger; import org.globus.cog.abstraction.impl.common.StatusEvent; +import org.globus.cog.abstraction.interfaces.Specification; +import org.globus.cog.abstraction.interfaces.JobSpecification; import org.globus.cog.abstraction.interfaces.Status; import org.globus.cog.abstraction.interfaces.Task; import org.globus.cog.karajan.arguments.Arg; @@ -44,12 +46,7 @@ Object constraints) throws ExecutionException { try { registerReplica(stack, task); - if (logger.isDebugEnabled()) { - logger.debug(task); - logger.debug("Submitting task " + task); - } - String jobid = (String)A_JOBID.getValue(stack,null); - logger.info("jobid="+jobid+" task=" + task); + log(task, stack); scheduler.addJobStatusListener(this, task); synchronized (tasks) { tasks.put(task, stack); @@ -64,6 +61,32 @@ } } + void log(Task task, VariableStack stack) + throws ExecutionException + { + if (logger.isDebugEnabled()) { + logger.debug(task); + logger.debug("Submitting task " + task); + } + String jobid = (String)A_JOBID.getValue(stack,null); + if (logger.isDebugEnabled()) { + logger.debug("jobid="+jobid+" task=" + task); + } + else if (logger.isInfoEnabled()) { + Specification spec = task.getSpecification(); + if (spec instanceof JobSpecification) { + JobSpecification jobspec = (JobSpecification) spec; + logger.info("Submit: " + + "in: " + jobspec.getDirectory() + + " command: " + jobspec.getExecutable() + + " " + jobspec.getArguments()); + } + else { + logger.info("Submit: " + spec); + } + } + } + protected void registerReplica(VariableStack stack, Task task) throws CanceledReplicaException { setTaskIdentity(stack, task); try { Modified: trunk/src/org/griphyn/vdl/karajan/lib/New.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/lib/New.java 2010-06-29 21:16:24 UTC (rev 3416) +++ trunk/src/org/griphyn/vdl/karajan/lib/New.java 2010-06-29 21:16:55 UTC (rev 3417) @@ -134,7 +134,7 @@ throw new ExecutionException("vdl:new requires a type specification for value " + value); } - logger.info("NEW id="+handle.getIdentifier()); + logger.debug("NEW id="+handle.getIdentifier()); return handle; } catch (Exception e) { Modified: trunk/src/org/griphyn/vdl/karajan/lib/PartialCloseDataset.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/lib/PartialCloseDataset.java 2010-06-29 21:16:24 UTC (rev 3416) +++ trunk/src/org/griphyn/vdl/karajan/lib/PartialCloseDataset.java 2010-06-29 21:16:55 UTC (rev 3417) @@ -32,15 +32,14 @@ boolean hasUnseenToken = false; DSHandle var = (DSHandle) PA_VAR.getValue(stack); String statementID = (String) OA_STATEMENTID.getValue(stack); - if (logger.isInfoEnabled()) { - logger.info("Partially closing " + var + " for statement " + statementID); + if (logger.isDebugEnabled()) { + logger.debug("Partially closing " + var + + " hash: " + var.hashCode() + + " for statement " + statementID); } - logger.info("var is "+var); - logger.info("var hash is "+var.hashCode()); - if(var.isClosed()) { - logger.info("variable already closed - skipping partial close processing"); + logger.debug("variable already closed - skipping partial close processing"); return null; } @@ -53,10 +52,10 @@ } c.add(statementID); - logger.info("Adding token "+statementID+" with hash "+statementID.hashCode()); + logger.debug("Adding token "+statementID+" with hash "+statementID.hashCode()); String needToWaitFor = (String) var.getParam("waitfor"); - logger.info("need to wait for "+needToWaitFor); + logger.debug("need to wait for "+needToWaitFor); StringTokenizer stok = new StringTokenizer(needToWaitFor, " "); while(stok.hasMoreTokens()) { String s = stok.nextToken(); @@ -66,16 +65,16 @@ // then we have a required element that we have not // seen yet, so... hasUnseenToken = true; - logger.info("Container does not contain token "+s); + logger.debug("Container does not contain token "+s); } else { - logger.info("Container does contain token "+s); + logger.debug("Container does contain token "+s); } } } - logger.info("hasUnseenToken = "+hasUnseenToken); + logger.debug("hasUnseenToken = "+hasUnseenToken); if(!hasUnseenToken) { - if(logger.isInfoEnabled()) { - logger.info("All partial closes for " + var + " have happened. Closing fully."); + if(logger.isDebugEnabled()) { + logger.debug("All partial closes for " + var + " have happened. Closing fully."); } var.closeDeep(); pendingDatasets.remove(var); Modified: trunk/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java 2010-06-29 21:16:24 UTC (rev 3416) +++ trunk/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java 2010-06-29 21:16:55 UTC (rev 3417) @@ -10,6 +10,7 @@ import org.globus.cog.karajan.workflow.futures.FutureNotYetAvailable; import org.griphyn.vdl.karajan.Pair; import org.griphyn.vdl.karajan.PairIterator; +import org.griphyn.vdl.mapping.AbstractDataNode; import org.griphyn.vdl.mapping.DSHandle; import org.griphyn.vdl.mapping.InvalidPathException; import org.griphyn.vdl.mapping.Path; @@ -32,9 +33,9 @@ Path path = parsePath(OA_PATH.getValue(stack), stack); DSHandle leaf = var.getField(path); DSHandle value = (DSHandle) PA_VALUE.getValue(stack); - if (logger.isInfoEnabled()) { - logger.info("Setting " + leaf + " to " + value); - } + + log(leaf, value); + synchronized (var.getRoot()) { // TODO want to do a type check here, for runtime type checking // and pull out the appropriate internal value from value if it @@ -61,7 +62,24 @@ } } - /** make dest look like source - if its a simple value, copy that + private void log(DSHandle leaf, DSHandle value) { + if (logger.isDebugEnabled()) { + logger.debug("Setting " + leaf + " to " + value); + } + else if (logger.isInfoEnabled()) { + if (leaf instanceof AbstractDataNode) { + AbstractDataNode data = (AbstractDataNode) leaf; + Path path = data.getPathFromRoot(); + String p = path.toString(); + if (p.equals("$")) + p = ""; + String name = data.getDisplayableName() + p; + logger.info("Set: " + name + "=" + value); + } + } + } + + /** make dest look like source - if its a simple value, copy that and if its an array then recursively copy */ void deepCopy(DSHandle dest, DSHandle source, VariableStack stack) throws ExecutionException { if (source.getType().isPrimitive()) { Modified: trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java 2010-06-29 21:16:24 UTC (rev 3416) +++ trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java 2010-06-29 21:16:55 UTC (rev 3417) @@ -580,12 +580,17 @@ return provenanceIDCount++; } - public static void logProvenanceResult(int id, DSHandle result, String name) throws ExecutionException { - logger.info("FUNCTION id="+id+" name="+name+" result="+result.getIdentifier()); + public static void logProvenanceResult(int id, DSHandle result, + String name) + throws ExecutionException { + if (logger.isDebugEnabled()) + logger.debug("FUNCTION id="+id+" name="+name+" result="+result.getIdentifier()); + else if (logger.isInfoEnabled()) + logger.info("FUNCTION: " + name + "()"); } public static void logProvenanceParameter(int id, DSHandle parameter, String paramName) throws ExecutionException { - logger.info("FUNCTIONPARAMETER id="+id+" input="+parameter.getIdentifier()+" name="+paramName); - + if (logger.isDebugEnabled()) + logger.debug("FUNCTIONPARAMETER id="+id+" input="+parameter.getIdentifier()+" name="+paramName); } } Modified: trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java =================================================================== --- trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java 2010-06-29 21:16:24 UTC (rev 3416) +++ trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java 2010-06-29 21:16:55 UTC (rev 3417) @@ -151,7 +151,7 @@ return sb.toString(); } - protected String getDisplayableName() { + public String getDisplayableName() { String prefix = getRoot().getParam("dbgname"); if (prefix == null) { prefix = getRoot().getParam("prefix"); @@ -395,7 +395,7 @@ this.closed = true; notifyListeners(); if (logger.isInfoEnabled()) { - logger.info("closed " + this.getIdentifyingString()); + logger.debug("closed " + this.getIdentifyingString()); } // so because its closed, we can dump the contents @@ -563,8 +563,8 @@ } public synchronized void addListener(DSHandleListener listener) { - if (logger.isInfoEnabled()) { - logger.info("Adding handle listener \"" + listener + "\" to \"" + if (logger.isDebugEnabled()) { + logger.debug("Adding handle listener \"" + listener + "\" to \"" + getIdentifyingString() + "\""); } if (listeners == null) { @@ -582,9 +582,9 @@ while (i.hasNext()) { DSHandleListener listener = (DSHandleListener) i.next(); i.remove(); - if (logger.isInfoEnabled()) { - logger.info("Notifying listener \"" + listener - + "\" about \"" + getIdentifyingString() + "\""); + if (logger.isDebugEnabled()) { + logger.debug("Notifying listener \"" + listener + + "\" about \"" + getIdentifyingString() + "\""); } listener.handleClosed(this); } From noreply at svn.ci.uchicago.edu Wed Jun 30 17:31:37 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 30 Jun 2010 17:31:37 -0500 (CDT) Subject: [Swift-commit] r3419 - in trunk/src/org/griphyn/vdl: engine karajan/lib karajan/lib/swiftscript Message-ID: <20100630223137.47D059CC94@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-30 17:31:37 -0500 (Wed, 30 Jun 2010) New Revision: 3419 Modified: trunk/src/org/griphyn/vdl/engine/ProcedureSignature.java trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java trunk/src/org/griphyn/vdl/karajan/lib/swiftscript/Misc.java Log: New @dirname() Modified: trunk/src/org/griphyn/vdl/engine/ProcedureSignature.java =================================================================== --- trunk/src/org/griphyn/vdl/engine/ProcedureSignature.java 2010-06-30 05:37:34 UTC (rev 3418) +++ trunk/src/org/griphyn/vdl/engine/ProcedureSignature.java 2010-06-30 22:31:37 UTC (rev 3419) @@ -180,14 +180,21 @@ FormalArgumentSignature filenameOut1 = new FormalArgumentSignature("string"); filename.addOutputArg(filenameOut1); functionsMap.put(filename.getName(), filename); - + ProcedureSignature filenames = new ProcedureSignature("filenames"); FormalArgumentSignature filenamesIn1 = new FormalArgumentSignature(true); /* file can be specified as any type */ filenames.addInputArg(filenamesIn1); FormalArgumentSignature filenamesOut1 = new FormalArgumentSignature("string[]"); /* i think this is what it returns */ filenames.addOutputArg(filenamesOut1); functionsMap.put(filenames.getName(), filenames); - + + ProcedureSignature dirname = new ProcedureSignature("dirname"); + FormalArgumentSignature dirnameIn1 = new FormalArgumentSignature(true); /* dir can be specified as any type */ + dirname.addInputArg(dirnameIn1); + FormalArgumentSignature dirnameOut1 = new FormalArgumentSignature("string"); + dirname.addOutputArg(dirnameOut1); + functionsMap.put(dirname.getName(), dirname); + ProcedureSignature regexp = new ProcedureSignature("regexp"); FormalArgumentSignature regexpIn1 = new FormalArgumentSignature("string"); regexp.addInputArg(regexpIn1); Modified: trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java 2010-06-30 05:37:34 UTC (rev 3418) +++ trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java 2010-06-30 22:31:37 UTC (rev 3419) @@ -175,7 +175,7 @@ public static final String[] EMPTY_STRING_ARRAY = new String[0]; - public String[] filename(VariableStack stack) throws ExecutionException { + public static String[] filename(VariableStack stack) throws ExecutionException { DSHandle ovar = (DSHandle)PA_VAR.getValue(stack); synchronized(ovar.getRoot()) { try { @@ -475,7 +475,7 @@ } } - protected static Future addFutureListener(VariableStack stack, DSHandle handle) + public static Future addFutureListener(VariableStack stack, DSHandle handle) throws ExecutionException { assert Thread.holdsLock(handle.getRoot()); return getFutureWrapperMap(stack).addNodeListener(handle); Modified: trunk/src/org/griphyn/vdl/karajan/lib/swiftscript/Misc.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/lib/swiftscript/Misc.java 2010-06-30 05:37:34 UTC (rev 3418) +++ trunk/src/org/griphyn/vdl/karajan/lib/swiftscript/Misc.java 2010-06-30 22:31:37 UTC (rev 3419) @@ -11,6 +11,7 @@ import org.globus.cog.karajan.util.TypeUtil; import org.globus.cog.karajan.workflow.ExecutionException; import org.globus.cog.karajan.workflow.nodes.functions.FunctionsCollection; +import org.griphyn.vdl.karajan.lib.PathUtils; import org.griphyn.vdl.karajan.lib.SwiftArg; import org.griphyn.vdl.karajan.lib.VDLFunction; import org.griphyn.vdl.mapping.DSHandle; @@ -24,6 +25,9 @@ import org.griphyn.vdl.type.Types; import org.griphyn.vdl.util.VDL2Config; +import org.griphyn.vdl.mapping.AbsFile; +import org.globus.cog.karajan.workflow.futures.FutureNotYetAvailable; + public class Misc extends FunctionsCollection { private static final Logger logger = Logger.getLogger(Misc.class); @@ -37,11 +41,12 @@ setArguments("swiftscript_tracef", new Arg[] { Arg.VARGS }); setArguments("swiftscript_strcat", new Arg[] { Arg.VARGS }); setArguments("swiftscript_strcut", new Arg[] { PA_INPUT, PA_PATTERN }); - setArguments("swiftscript_strstr", new Arg[] { PA_INPUT, PA_PATTERN }); + setArguments("swiftscript_strstr", new Arg[] { PA_INPUT, PA_PATTERN }); setArguments("swiftscript_strsplit", new Arg[] { PA_INPUT, PA_PATTERN }); setArguments("swiftscript_regexp", new Arg[] { PA_INPUT, PA_PATTERN, PA_TRANSFORM }); setArguments("swiftscript_toint", new Arg[] { PA_INPUT }); setArguments("swiftscript_tostring", new Arg[] { PA_INPUT }); + setArguments("swiftscript_dirname", new Arg[] { Arg.VARGS }); } private static final Logger traceLogger = Logger.getLogger("org.globus.swift.trace"); @@ -362,12 +367,42 @@ return handle; } - public DSHandle swiftscript_tostring(VariableStack stack) throws ExecutionException, NoSuchTypeException, - InvalidPathException { - Object input = PA_INPUT.getValue(stack); - DSHandle handle = new RootDataNode(Types.STRING); - handle.setValue(""+input); - handle.closeShallow(); - return handle; + public DSHandle swiftscript_tostring(VariableStack stack) + throws ExecutionException, NoSuchTypeException, + InvalidPathException { + Object input = PA_INPUT.getValue(stack); + DSHandle handle = new RootDataNode(Types.STRING); + handle.setValue(""+input); + handle.closeShallow(); + return handle; } + + public DSHandle swiftscript_dirname(VariableStack stack) + throws ExecutionException, NoSuchTypeException, InvalidPathException { + DSHandle handle; + try + { + DSHandle[] args = SwiftArg.VARGS.asDSHandleArray(stack); + DSHandle arg = args[0]; + String[] input = VDLFunction.filename(arg); + String name = input[0]; + String result = new AbsFile(name).getDir(); + handle = new RootDataNode(Types.STRING); + handle.setValue(result); + handle.closeShallow(); + } + catch (HandleOpenException e) { + throw new FutureNotYetAvailable + (VDLFunction.addFutureListener(stack, e.getSource())); + } + return handle; + } } + +/* + * Local Variables: + * c-basic-offset: 8 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ From noreply at svn.ci.uchicago.edu Wed Jun 30 17:31:48 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 30 Jun 2010 17:31:48 -0500 (CDT) Subject: [Swift-commit] r3420 - trunk/libexec Message-ID: <20100630223148.D2C109CC94@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-30 17:31:48 -0500 (Wed, 30 Jun 2010) New Revision: 3420 Modified: trunk/libexec/vdl-lib.xml Log: Linkage for @dirname() Modified: trunk/libexec/vdl-lib.xml =================================================================== --- trunk/libexec/vdl-lib.xml 2010-06-30 22:31:37 UTC (rev 3419) +++ trunk/libexec/vdl-lib.xml 2010-06-30 22:31:48 UTC (rev 3420) @@ -18,6 +18,7 @@ + From noreply at svn.ci.uchicago.edu Wed Jun 30 17:33:17 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 30 Jun 2010 17:33:17 -0500 (CDT) Subject: [Swift-commit] r3421 - trunk/tests/language-behaviour Message-ID: <20100630223317.9235F9CC94@vm-125-59.ci.uchicago.edu> Author: wozniak Date: 2010-06-30 17:33:17 -0500 (Wed, 30 Jun 2010) New Revision: 3421 Added: trunk/tests/language-behaviour/761-dirname.swift Log: Test for @dirname() Added: trunk/tests/language-behaviour/761-dirname.swift =================================================================== --- trunk/tests/language-behaviour/761-dirname.swift (rev 0) +++ trunk/tests/language-behaviour/761-dirname.swift 2010-06-30 22:33:17 UTC (rev 3421) @@ -0,0 +1,9 @@ + +type file{} + +file f<"/d/f.txt">; + +string s = @dirname(f); +string t = @tostring(s); + +tracef("dirname: %s\n", s);