From noreply at svn.ci.uchicago.edu Sun Jan 2 16:31:01 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sun, 2 Jan 2011 16:31:01 -0600 (CST) Subject: [Swift-commit] r3836 - in trunk/src/org/griphyn/vdl: karajan/lib mapping Message-ID: <20110102223101.7C9E9FC41@svn.ci.uchicago.edu> Author: hategan Date: 2011-01-02 16:31:00 -0600 (Sun, 02 Jan 2011) New Revision: 3836 Modified: trunk/src/org/griphyn/vdl/karajan/lib/CloseDataset.java trunk/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java trunk/src/org/griphyn/vdl/mapping/ArrayDataNode.java trunk/src/org/griphyn/vdl/mapping/Path.java Log: merged changes from 1.0 branch Modified: trunk/src/org/griphyn/vdl/karajan/lib/CloseDataset.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/lib/CloseDataset.java 2010-12-31 23:39:12 UTC (rev 3835) +++ trunk/src/org/griphyn/vdl/karajan/lib/CloseDataset.java 2011-01-02 22:31:00 UTC (rev 3836) @@ -6,6 +6,7 @@ import org.apache.log4j.Logger; import org.globus.cog.karajan.arguments.Arg; import org.globus.cog.karajan.stack.VariableStack; +import org.globus.cog.karajan.util.TypeUtil; import org.globus.cog.karajan.workflow.ExecutionException; import org.griphyn.vdl.mapping.DSHandle; import org.griphyn.vdl.mapping.InvalidPathException; @@ -13,12 +14,13 @@ public class CloseDataset extends VDLFunction { public static final Logger logger = Logger.getLogger(CloseDataset.class); + + public static final Arg OA_CHILDREN_ONLY = new Arg.Optional("childrenOnly", Boolean.FALSE); static { - setArguments(CloseDataset.class, new Arg[] { PA_VAR, OA_PATH }); + setArguments(CloseDataset.class, new Arg[] { PA_VAR, OA_PATH, OA_CHILDREN_ONLY }); } - // TODO path is not used! public Object function(VariableStack stack) throws ExecutionException { Path path = parsePath(OA_PATH.getValue(stack), stack); DSHandle var = (DSHandle) PA_VAR.getValue(stack); @@ -27,7 +29,13 @@ logger.debug("Closing " + var); } var = var.getField(path); - closeChildren(stack, var); + + if (TypeUtil.toBoolean(OA_CHILDREN_ONLY.getValue(stack))) { + closeChildren(stack, var); + } + else { + closeDeep(stack, var); + } } catch (InvalidPathException e) { throw new ExecutionException(e); Modified: trunk/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java 2010-12-31 23:39:12 UTC (rev 3835) +++ trunk/src/org/griphyn/vdl/karajan/lib/SetFieldValue.java 2011-01-02 22:31:00 UTC (rev 3836) @@ -42,6 +42,9 @@ if (!value.isClosed()) { throw new FutureNotYetAvailable(addFutureListener(stack, value)); } + if (var.getParent() != null && var.getParent().getType().isArray()) { + markAsAvailable(stack, leaf.getParent(), leaf.getPathFromRoot().getLast()); + } } synchronized (var.getRoot()) { deepCopy(leaf, value, stack); Modified: trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java 2010-12-31 23:39:12 UTC (rev 3835) +++ trunk/src/org/griphyn/vdl/karajan/lib/VDLFunction.java 2011-01-02 22:31:00 UTC (rev 3836) @@ -297,9 +297,10 @@ return relativize(String.valueOf(var.getValue())); } else { - PhysicalFormat f; - Path pathFromRoot = var.getPathFromRoot(); - f = mapper.map(pathFromRoot); + if (var.getMapper() == null) { + throw new ExecutionException("Cannot invoke filename() on data without a mapper: " + var); + } + PhysicalFormat f = var.getMapper().map(var.getPathFromRoot()); if (f instanceof GeneralizedFileFormat) { String filename = ((GeneralizedFileFormat) f).getURIAsString(); if (filename == null) { @@ -451,8 +452,31 @@ markToRoot(stack, handle); } } + + protected void closeDeep(VariableStack stack, DSHandle handle) + throws ExecutionException, InvalidPathException { + synchronized(handle.getRoot()) { + closeDeep(stack, handle, getFutureWrapperMap(stack)); + } + } - private void markToRoot(VariableStack stack, DSHandle handle) throws ExecutionException { + private void closeDeep(VariableStack stack, DSHandle handle, + WrapperMap hash) throws InvalidPathException, ExecutionException { + handle.closeShallow(); + hash.close(handle); + try { + // Mark all leaves + Iterator it = handle.getFields(Path.CHILDREN).iterator(); + while (it.hasNext()) { + closeDeep(stack, (DSHandle) it.next(), hash); + } + } + catch (HandleOpenException e) { + throw new ExecutionException("Handle open in closeChildren",e); + } + } + + private void markToRoot(VariableStack stack, DSHandle handle) throws ExecutionException { // Also mark all arrays from root Path fullPath = handle.getPathFromRoot(); DSHandle root = handle.getRoot(); Modified: trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java =================================================================== --- trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java 2010-12-31 23:39:12 UTC (rev 3835) +++ trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java 2011-01-02 22:31:00 UTC (rev 3836) @@ -550,6 +550,26 @@ } return pathFromRoot; } + + /** + * Recursively closes arrays through a tree of arrays and complex types. + */ + public void closeArraySizes() { + if (!this.closed && this.getType().isArray()) { + closeShallow(); + } + synchronized (handles) { + Iterator i = handles.entrySet().iterator(); + while (i.hasNext()) { + Map.Entry e = (Map.Entry) i.next(); + AbstractDataNode child = (AbstractDataNode) e.getValue(); + if (child.getType().isArray() || + child.getType().getFields().size() > 0) { + child.closeArraySizes(); + } + } + } + } public Mapper getMapper() { return ((AbstractDataNode) getRoot()).getMapper(); Modified: trunk/src/org/griphyn/vdl/mapping/ArrayDataNode.java =================================================================== --- trunk/src/org/griphyn/vdl/mapping/ArrayDataNode.java 2010-12-31 23:39:12 UTC (rev 3835) +++ trunk/src/org/griphyn/vdl/mapping/ArrayDataNode.java 2011-01-02 22:31:00 UTC (rev 3836) @@ -42,6 +42,24 @@ } } } + + /** Recursively closes arrays through a tree of arrays and complex + types. */ + public void closeDeep() { + assert(this.getType().isArray()); + if (!this.isClosed()) { + closeShallow(); + } + Map handles = getHandles(); + synchronized (handles) { + Iterator i = handles.entrySet().iterator(); + while (i.hasNext()) { + Map.Entry e = (Map.Entry) i.next(); + AbstractDataNode child = (AbstractDataNode) e.getValue(); + child.closeDeep(); + } + } + } public boolean isArray() { Modified: trunk/src/org/griphyn/vdl/mapping/Path.java =================================================================== --- trunk/src/org/griphyn/vdl/mapping/Path.java 2010-12-31 23:39:12 UTC (rev 3835) +++ trunk/src/org/griphyn/vdl/mapping/Path.java 2011-01-02 22:31:00 UTC (rev 3836) @@ -179,7 +179,7 @@ } public String getLast() { - return ((Entry) elements.get(elements.size() - 1)).name; + return ((Entry) elements.get(elements.size() - 1)).name; } public boolean isEmpty() { From noreply at svn.ci.uchicago.edu Mon Jan 3 23:18:08 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 3 Jan 2011 23:18:08 -0600 (CST) Subject: [Swift-commit] r3837 - trunk/src/org/griphyn/vdl/mapping Message-ID: <20110104051808.6D2B89CCC2@svn.ci.uchicago.edu> Author: hategan Date: 2011-01-03 23:18:08 -0600 (Mon, 03 Jan 2011) New Revision: 3837 Modified: trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java Log: removed duplicate method after merge Modified: trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java =================================================================== --- trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java 2011-01-02 22:31:00 UTC (rev 3836) +++ trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java 2011-01-04 05:18:08 UTC (rev 3837) @@ -550,26 +550,6 @@ } return pathFromRoot; } - - /** - * Recursively closes arrays through a tree of arrays and complex types. - */ - public void closeArraySizes() { - if (!this.closed && this.getType().isArray()) { - closeShallow(); - } - synchronized (handles) { - Iterator i = handles.entrySet().iterator(); - while (i.hasNext()) { - Map.Entry e = (Map.Entry) i.next(); - AbstractDataNode child = (AbstractDataNode) e.getValue(); - if (child.getType().isArray() || - child.getType().getFields().size() > 0) { - child.closeArraySizes(); - } - } - } - } public Mapper getMapper() { return ((AbstractDataNode) getRoot()).getMapper(); From noreply at svn.ci.uchicago.edu Mon Jan 3 23:28:18 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 3 Jan 2011 23:28:18 -0600 (CST) Subject: [Swift-commit] r3838 - branches Message-ID: <20110104052818.985F19CCC2@svn.ci.uchicago.edu> Author: skenny Date: 2011-01-03 23:28:18 -0600 (Mon, 03 Jan 2011) New Revision: 3838 Added: branches/release-0.92/ Log: branching to stabilize for release Copied: branches/release-0.92 (from rev 3837, trunk) From noreply at svn.ci.uchicago.edu Tue Jan 4 14:06:02 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 4 Jan 2011 14:06:02 -0600 (CST) Subject: [Swift-commit] r3839 - trunk/tests/groups Message-ID: <20110104200602.4B31F9CC94@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-04 14:06:02 -0600 (Tue, 04 Jan 2011) New Revision: 3839 Added: trunk/tests/groups/group-mpi.sh Log: Add group-mpi Added: trunk/tests/groups/group-mpi.sh =================================================================== --- trunk/tests/groups/group-mpi.sh (rev 0) +++ trunk/tests/groups/group-mpi.sh 2011-01-04 20:06:02 UTC (rev 3839) @@ -0,0 +1,6 @@ + +# GROUPLIST definition to run local MPI test + +GROUPLIST=( $TESTDIR/mpi ) + +checkvars WORK From noreply at svn.ci.uchicago.edu Tue Jan 4 15:41:40 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 4 Jan 2011 15:41:40 -0600 (CST) Subject: [Swift-commit] r3840 - in www: . downloads inc main Message-ID: <20110104214140.EAD059CC94@svn.ci.uchicago.edu> Author: hategan Date: 2011-01-04 15:41:40 -0600 (Tue, 04 Jan 2011) New Revision: 3840 Added: www/index.html www/main/ www/main/index.php Removed: www/index.php Modified: www/downloads/index.php www/inc/downloads_sidebar.php www/inc/footer.php www/inc/header.php www/inc/home_sidebar.php www/inc/nav.php www/inc/papers_sidebar.php Log: made internal links relative Modified: www/downloads/index.php =================================================================== --- www/downloads/index.php 2011-01-04 20:06:02 UTC (rev 3839) +++ www/downloads/index.php 2011-01-04 21:41:40 UTC (rev 3840) @@ -226,7 +226,7 @@

Swift 0.4 - 2008/03/18

Swift v0.4 is a development release intended to release functionality and fixes that have gone in to trunk since v0.3. More details are contained -in the release notes. +in the release notes. [vdsk-0.4.tar.gz] [release-notes-0.4.txt]

Modified: www/inc/downloads_sidebar.php =================================================================== --- www/inc/downloads_sidebar.php 2011-01-04 20:06:02 UTC (rev 3839) +++ www/inc/downloads_sidebar.php 2011-01-04 21:41:40 UTC (rev 3840) @@ -1,6 +1,6 @@

GETTING STARTED

-Swift Quickstart Guide [html]

+Swift Quickstart Guide [html]

Use the Quickstart Guide to help you install and configure swift and run a simple 'Hello World' example.

BUG REPORTS

Modified: www/inc/footer.php =================================================================== --- www/inc/footer.php 2011-01-04 20:06:02 UTC (rev 3839) +++ www/inc/footer.php 2011-01-04 21:41:40 UTC (rev 3840) @@ -1,7 +1,7 @@ -HOME - : DOWNLOADS - : SUPPORT - : DOCUMENTATION - : PAPERS +HOME + : DOWNLOADS + : SUPPORT + : DOCUMENTATION + : PAPERS Modified: www/inc/header.php =================================================================== --- www/inc/header.php 2011-01-04 20:06:02 UTC (rev 3839) +++ www/inc/header.php 2011-01-04 21:41:40 UTC (rev 3840) @@ -1,3 +1,3 @@ -Swift - +Swift + Modified: www/inc/home_sidebar.php =================================================================== --- www/inc/home_sidebar.php 2011-01-04 20:06:02 UTC (rev 3839) +++ www/inc/home_sidebar.php 2011-01-04 21:41:40 UTC (rev 3840) @@ -1,10 +1,10 @@

What's New?

Paper published

-

Parallel Scripting for Applications at the Petascale and Beyond [pdf]


+

Parallel Scripting for Applications at the Petascale and Beyond [pdf]


SWIFT 0.9 RELEASE - 27 APR 2009

The latest release of Swift, v0.9, is available from the -downloads page. +downloads page.


Modified: www/inc/nav.php =================================================================== --- www/inc/nav.php 2011-01-04 20:06:02 UTC (rev 3839) +++ www/inc/nav.php 2011-01-04 21:41:40 UTC (rev 3840) @@ -1,9 +1,9 @@ Modified: www/inc/papers_sidebar.php =================================================================== --- www/inc/papers_sidebar.php 2011-01-04 20:06:02 UTC (rev 3839) +++ www/inc/papers_sidebar.php 2011-01-04 21:41:40 UTC (rev 3840) @@ -1,6 +1,6 @@

HIGHLIGHTS

Technology

-

Parallel Scripting for Applications at the Petascale and Beyond (2009) [pdf]


+

Parallel Scripting for Applications at the Petascale and Beyond (2009) [pdf]


A Notation and System for Expressing and Executing Cleanly Typed Workflows on Messy Scientific Data (2005) [pdf]


XDTM: XML Data Type and Mapping for Specifying Datasets (2005) [pdf]


The Virtual Data Grid: A New Model and Architecture for Data-Intensive Collaboration (2003) [pdf]


Added: www/index.html =================================================================== --- www/index.html (rev 0) +++ www/index.html 2011-01-04 21:41:40 UTC (rev 3840) @@ -0,0 +1,9 @@ + + + Redirecting to Swift Home Page + + + + Redirecting to Swift Home Page + + \ No newline at end of file Deleted: www/index.php =================================================================== --- www/index.php 2011-01-04 20:06:02 UTC (rev 3839) +++ www/index.php 2011-01-04 21:41:40 UTC (rev 3840) @@ -1,107 +0,0 @@ - - - - - Swift - - - - - - -
- - - - - - - -
- -
- -

HOME

- -

- Swift is a system for the rapid and - reliable specification, execution, and management of - large-scale science and engineering - workflows. It supports applications that execute many tasks - coupled by disk-resident datasets - as is common, for - example, when analyzing large quantities of data or performing parameter - studies or ensemble simulations. -

- -
    - - The open source Swift software combines: - -
  • A simple scripting language to enable the concise, high-level - specifications of complex parallel computations, and mappers for accessing - diverse data formats in a convenient manner.
  • - -
  • An execution engine that can manage the dispatch of many (10,000) - tasks to many (100) processors, whether on parallel computers, campus - grids, or multi-site grids.
  • - -
- -

- Swift users span the physical sciences, biological sciences, social - sciences, humanities, computer science, and education. Swift users have - achieved multiple-order-of-magnitude savings in program development and - execution time. -

- -

- Swift builds on and includes technology previously distributed as the - GriPhyN Virtual Data System. -

- -
- workflow -
-
"Swift applied to computational neuroscience. On the left, a small workflow from functional MRI study in aphasia; on the right, a map of brain activation clusters caused by various stimuli." -
- -

-The Swift project is supported by the National Science Foundation with -additional support from NIH, Argonne National Laboratory and the University -of Chicago Computation Institute. -

-

-Swift is an effort undergoing incubation at Globus. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful Globus projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by Globus. -

- -
- - - - -
- - - - -
- - - - - - - Copied: www/main/index.php (from rev 3838, www/index.php) =================================================================== --- www/main/index.php (rev 0) +++ www/main/index.php 2011-01-04 21:41:40 UTC (rev 3840) @@ -0,0 +1,107 @@ + + + + + Swift + + + + + + +
+ + + + + + + +
+ +
+ +

HOME

+ +

+ Swift is a system for the rapid and + reliable specification, execution, and management of + large-scale science and engineering + workflows. It supports applications that execute many tasks + coupled by disk-resident datasets - as is common, for + example, when analyzing large quantities of data or performing parameter + studies or ensemble simulations. +

+ +
    + + The open source Swift software combines: + +
  • A simple scripting language to enable the concise, high-level + specifications of complex parallel computations, and mappers for accessing + diverse data formats in a convenient manner.
  • + +
  • An execution engine that can manage the dispatch of many (10,000) + tasks to many (100) processors, whether on parallel computers, campus + grids, or multi-site grids.
  • + +
+ +

+ Swift users span the physical sciences, biological sciences, social + sciences, humanities, computer science, and education. Swift users have + achieved multiple-order-of-magnitude savings in program development and + execution time. +

+ +

+ Swift builds on and includes technology previously distributed as the + GriPhyN Virtual Data System. +

+ +
+ workflow +
+
"Swift applied to computational neuroscience. On the left, a small workflow from functional MRI study in aphasia; on the right, a map of brain activation clusters caused by various stimuli." +
+ +

+The Swift project is supported by the National Science Foundation with +additional support from NIH, Argonne National Laboratory and the University +of Chicago Computation Institute. +

+

+Swift is an effort undergoing incubation at Globus. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful Globus projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by Globus. +

+ +
+ + + + +
+ + + + +
+ + + + + + + From noreply at svn.ci.uchicago.edu Tue Jan 4 19:15:43 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 4 Jan 2011 19:15:43 -0600 (CST) Subject: [Swift-commit] r3841 - in branches/release-0.92/tests: groups providers/local-pbs providers/local-pbs/pads Message-ID: <20110105011543.BBA8D9CC94@svn.ci.uchicago.edu> Author: davidk Date: 2011-01-04 19:15:43 -0600 (Tue, 04 Jan 2011) New Revision: 3841 Added: branches/release-0.92/tests/groups/group-pads.sh branches/release-0.92/tests/providers/local-pbs/pads/ branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.check.sh branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.swift branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.timeout branches/release-0.92/tests/providers/local-pbs/pads/catsn.0001.out.expected branches/release-0.92/tests/providers/local-pbs/pads/catsn.0002.out.expected branches/release-0.92/tests/providers/local-pbs/pads/catsn.0003.out.expected branches/release-0.92/tests/providers/local-pbs/pads/catsn.0004.out.expected branches/release-0.92/tests/providers/local-pbs/pads/catsn.0005.out.expected branches/release-0.92/tests/providers/local-pbs/pads/catsn.0006.out.expected branches/release-0.92/tests/providers/local-pbs/pads/catsn.0007.out.expected branches/release-0.92/tests/providers/local-pbs/pads/catsn.0008.out.expected branches/release-0.92/tests/providers/local-pbs/pads/catsn.0009.out.expected branches/release-0.92/tests/providers/local-pbs/pads/catsn.0010.out.expected branches/release-0.92/tests/providers/local-pbs/pads/data.txt branches/release-0.92/tests/providers/local-pbs/pads/sites.template.xml branches/release-0.92/tests/providers/local-pbs/pads/tc.template.data branches/release-0.92/tests/providers/local-pbs/pads/title.txt Removed: branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.check.sh branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.setup.sh branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.swift branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.timeout branches/release-0.92/tests/providers/local-pbs/catsn.0001.out.expected branches/release-0.92/tests/providers/local-pbs/catsn.0002.out.expected branches/release-0.92/tests/providers/local-pbs/catsn.0003.out.expected branches/release-0.92/tests/providers/local-pbs/catsn.0004.out.expected branches/release-0.92/tests/providers/local-pbs/catsn.0005.out.expected branches/release-0.92/tests/providers/local-pbs/catsn.0006.out.expected branches/release-0.92/tests/providers/local-pbs/catsn.0007.out.expected branches/release-0.92/tests/providers/local-pbs/catsn.0008.out.expected branches/release-0.92/tests/providers/local-pbs/catsn.0009.out.expected branches/release-0.92/tests/providers/local-pbs/catsn.0010.out.expected branches/release-0.92/tests/providers/local-pbs/data.txt branches/release-0.92/tests/providers/local-pbs/sites.template.xml branches/release-0.92/tests/providers/local-pbs/tc.template.data branches/release-0.92/tests/providers/local-pbs/title.txt Log: PADS tests Added: branches/release-0.92/tests/groups/group-pads.sh =================================================================== --- branches/release-0.92/tests/groups/group-pads.sh (rev 0) +++ branches/release-0.92/tests/groups/group-pads.sh 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1,7 @@ + +# GROUPLIST definition to run pbs tests + +GROUPLIST=( + $TESTDIR/providers/local \ + $TESTDIR/providers/local-pbs/pads \ +) Deleted: branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.check.sh =================================================================== --- branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.check.sh 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.check.sh 2011-01-05 01:15:43 UTC (rev 3841) @@ -1,12 +0,0 @@ -#!/bin/bash - -set -x - -for count in `seq --format "%04.f" 1 1 10` -do - [ -f catsn.$count.out ] || exit 1 - CONTENTS1=$( cat catsn.$count.out.expected ) - CONTENTS2=$( cat catsn.$count.out ) - [[ $CONTENTS1 == $CONTENTS2 ]] || exit 1 -done -exit 0 Deleted: branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.setup.sh =================================================================== --- branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.setup.sh 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.setup.sh 2011-01-05 01:15:43 UTC (rev 3841) @@ -1,4 +0,0 @@ -#!/bin/bash - -cp -v $GROUP/data.txt . || exit 1 -cp -v $GROUP/*expected . || exit 1 Deleted: branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.swift =================================================================== --- branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.swift 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.swift 2011-01-05 01:15:43 UTC (rev 3841) @@ -1,15 +0,0 @@ -type file; - -app (file o) cat (file i) -{ - cat @i stdout=@o; -} - -string t = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; -string char[] = @strsplit(t, ""); - -file out[]; -foreach j in [1:@toint(@arg("n","10"))] { - file data<"data.txt">; - out[j] = cat(data); -} Deleted: branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.timeout =================================================================== --- branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.timeout 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/001-catsn-localpbs.timeout 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -180 Deleted: branches/release-0.92/tests/providers/local-pbs/catsn.0001.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/catsn.0001.out.expected 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/catsn.0001.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -Hello world Deleted: branches/release-0.92/tests/providers/local-pbs/catsn.0002.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/catsn.0002.out.expected 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/catsn.0002.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -Hello world Deleted: branches/release-0.92/tests/providers/local-pbs/catsn.0003.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/catsn.0003.out.expected 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/catsn.0003.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -Hello world Deleted: branches/release-0.92/tests/providers/local-pbs/catsn.0004.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/catsn.0004.out.expected 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/catsn.0004.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -Hello world Deleted: branches/release-0.92/tests/providers/local-pbs/catsn.0005.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/catsn.0005.out.expected 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/catsn.0005.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -Hello world Deleted: branches/release-0.92/tests/providers/local-pbs/catsn.0006.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/catsn.0006.out.expected 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/catsn.0006.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -Hello world Deleted: branches/release-0.92/tests/providers/local-pbs/catsn.0007.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/catsn.0007.out.expected 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/catsn.0007.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -Hello world Deleted: branches/release-0.92/tests/providers/local-pbs/catsn.0008.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/catsn.0008.out.expected 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/catsn.0008.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -Hello world Deleted: branches/release-0.92/tests/providers/local-pbs/catsn.0009.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/catsn.0009.out.expected 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/catsn.0009.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -Hello world Deleted: branches/release-0.92/tests/providers/local-pbs/catsn.0010.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/catsn.0010.out.expected 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/catsn.0010.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -Hello world Deleted: branches/release-0.92/tests/providers/local-pbs/data.txt =================================================================== --- branches/release-0.92/tests/providers/local-pbs/data.txt 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/data.txt 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -Hello world Added: branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.check.sh =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.check.sh (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.check.sh 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1,4 @@ +#!/bin/bash + +cp -v $GROUP/data.txt . || exit 1 +cp -v $GROUP/*expected . || exit 1 Property changes on: branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.check.sh ___________________________________________________________________ Name: svn:executable + * Added: branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.swift =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.swift (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.swift 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1,15 @@ +type file; + +app (file o) cat (file i) +{ + cat @i stdout=@o; +} + +string t = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; +string char[] = @strsplit(t, ""); + +file out[]; +foreach j in [1:@toint(@arg("n","10"))] { + file data<"data.txt">; + out[j] = cat(data); +} Added: branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.timeout =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.timeout (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/001-catsn-pads-pbs.timeout 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +180 Added: branches/release-0.92/tests/providers/local-pbs/pads/catsn.0001.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/catsn.0001.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/catsn.0001.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/pads/catsn.0002.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/catsn.0002.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/catsn.0002.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/pads/catsn.0003.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/catsn.0003.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/catsn.0003.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/pads/catsn.0004.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/catsn.0004.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/catsn.0004.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/pads/catsn.0005.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/catsn.0005.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/catsn.0005.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/pads/catsn.0006.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/catsn.0006.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/catsn.0006.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/pads/catsn.0007.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/catsn.0007.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/catsn.0007.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/pads/catsn.0008.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/catsn.0008.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/catsn.0008.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/pads/catsn.0009.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/catsn.0009.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/catsn.0009.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/pads/catsn.0010.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/catsn.0010.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/catsn.0010.out.expected 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/pads/data.txt =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/data.txt (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/data.txt 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/pads/sites.template.xml =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/sites.template.xml (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/sites.template.xml 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1,8 @@ + + + + + 0 + _WORK_ + + Added: branches/release-0.92/tests/providers/local-pbs/pads/tc.template.data =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/tc.template.data (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/tc.template.data 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1,8 @@ +pads-pbs echo /bin/echo INSTALLED INTEL32::LINUX +pads-pbs cat /bin/cat INSTALLED INTEL32::LINUX +pads-pbs ls /bin/ls INSTALLED INTEL32::LINUX +pads-pbs grep /bin/grep INSTALLED INTEL32::LINUX +pads-pbs sort /bin/sort INSTALLED INTEL32::LINUX +pads-pbs paste /bin/paste INSTALLED INTEL32::LINUX +pads-pbs wc /usr/bin/wc INSTALLED INTEL32::LINUX + Added: branches/release-0.92/tests/providers/local-pbs/pads/title.txt =================================================================== --- branches/release-0.92/tests/providers/local-pbs/pads/title.txt (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/pads/title.txt 2011-01-05 01:15:43 UTC (rev 3841) @@ -0,0 +1 @@ +Pads PBS Configuration Test Deleted: branches/release-0.92/tests/providers/local-pbs/sites.template.xml =================================================================== --- branches/release-0.92/tests/providers/local-pbs/sites.template.xml 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/sites.template.xml 2011-01-05 01:15:43 UTC (rev 3841) @@ -1,8 +0,0 @@ - - - - - 0 - _WORK_ - - Deleted: branches/release-0.92/tests/providers/local-pbs/tc.template.data =================================================================== --- branches/release-0.92/tests/providers/local-pbs/tc.template.data 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/tc.template.data 2011-01-05 01:15:43 UTC (rev 3841) @@ -1,8 +0,0 @@ -local-pbs echo /bin/echo INSTALLED INTEL32::LINUX -local-pbs cat /bin/cat INSTALLED INTEL32::LINUX -local-pbs ls /bin/ls INSTALLED INTEL32::LINUX -local-pbs grep /bin/grep INSTALLED INTEL32::LINUX -local-pbs sort /bin/sort INSTALLED INTEL32::LINUX -local-pbs paste /bin/paste INSTALLED INTEL32::LINUX -local-pbs wc /usr/bin/wc INSTALLED INTEL32::LINUX - Deleted: branches/release-0.92/tests/providers/local-pbs/title.txt =================================================================== --- branches/release-0.92/tests/providers/local-pbs/title.txt 2011-01-04 21:41:40 UTC (rev 3840) +++ branches/release-0.92/tests/providers/local-pbs/title.txt 2011-01-05 01:15:43 UTC (rev 3841) @@ -1 +0,0 @@ -Local PBS Configuration Test From noreply at svn.ci.uchicago.edu Wed Jan 5 00:01:42 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 00:01:42 -0600 (CST) Subject: [Swift-commit] r3843 - text/parco10submission Message-ID: <20110105060142.F26329CC9B@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-05 00:01:42 -0600 (Wed, 05 Jan 2011) New Revision: 3843 Added: text/parco10submission/paper.pdf Log: Adding generated pdf to svn control for ease of reviewing. Added: text/parco10submission/paper.pdf =================================================================== (Binary files differ) Property changes on: text/parco10submission/paper.pdf ___________________________________________________________________ Name: svn:mime-type + application/octet-stream From noreply at svn.ci.uchicago.edu Wed Jan 5 09:02:03 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 09:02:03 -0600 (CST) Subject: [Swift-commit] r3844 - text/parco10submission Message-ID: <20110105150203.77675FC41@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-05 09:02:03 -0600 (Wed, 05 Jan 2011) New Revision: 3844 Modified: text/parco10submission/paper.bib text/parco10submission/paper.tex Log: fixing the cite errors Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-05 06:01:42 UTC (rev 3843) +++ text/parco10submission/paper.bib 2011-01-05 15:02:03 UTC (rev 3844) @@ -194,6 +194,22 @@ pages = {237--247} } + at article {Karajan, + author = {von Laszewski, Gregor and Hategan, Mike}, + affiliation = {Argonne National Laboratory Mathematics and Computer Science Division, Argonne National Laboratory 9700 S. Cass Ave. Argonne IL 60440 USA}, + title = {Workflow Concepts of the Java CoG Kit}, + journal = {Journal of Grid Computing}, + publisher = {Springer Netherlands}, + issn = {1570-7873}, + keyword = {Computer Science}, + pages = {239-258}, + volume = {3}, + issue = {3}, + url = {http://dx.doi.org/10.1007/s10723-005-9013-5}, + note = {10.1007/s10723-005-9013-5}, + year = {2005} +} + % Items below are from an older paper - retain for the moment in case any are useful here @article{condor-g, Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-05 06:01:42 UTC (rev 3843) +++ text/parco10submission/paper.tex 2011-01-05 15:02:03 UTC (rev 3844) @@ -313,7 +313,7 @@ The \emph{if} and \emph{switch} statements are rather standard, but \emph{foreach} merits more discussion. Similar to \emph{Go} -(\ref{GOLANG}) and \emph{Python}, its control ``variables'' can be both +(\cite{GOLANG}) and \emph{Python}, its control ``variables'' can be both an index and a value. The syntax is as follows: \begin{verbatim} From noreply at svn.ci.uchicago.edu Wed Jan 5 09:24:20 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 09:24:20 -0600 (CST) Subject: [Swift-commit] r3845 - text/parco10submission Message-ID: <20110105152420.41464FC41@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-05 09:24:20 -0600 (Wed, 05 Jan 2011) New Revision: 3845 Modified: text/parco10submission/paper.bib text/parco10submission/paper.tex Log: one more change Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-05 15:02:03 UTC (rev 3844) +++ text/parco10submission/paper.bib 2011-01-05 15:24:20 UTC (rev 3845) @@ -194,22 +194,31 @@ pages = {237--247} } - at article {Karajan, - author = {von Laszewski, Gregor and Hategan, Mike}, - affiliation = {Argonne National Laboratory Mathematics and Computer Science Division, Argonne National Laboratory 9700 S. Cass Ave. Argonne IL 60440 USA}, - title = {Workflow Concepts of the Java CoG Kit}, - journal = {Journal of Grid Computing}, - publisher = {Springer Netherlands}, - issn = {1570-7873}, - keyword = {Computer Science}, - pages = {239-258}, - volume = {3}, - issue = {3}, - url = {http://dx.doi.org/10.1007/s10723-005-9013-5}, - note = {10.1007/s10723-005-9013-5}, - year = {2005} + at article {Karajan, + author = {von Laszewski, Gregor and Hategan, Mike}, + affiliation = {Argonne National Laboratory Mathematics and Computer Science Division, Argonne National Laboratory 9700 S. Cass Ave. Argonne IL 60440 USA}, + title = {Workflow Concepts of the {Java CoG} Kit}, + journal = {Journal of Grid Computing}, + publisher = {Springer Netherlands}, + issn = {1570-7873}, + keyword = {Computer Science}, + pages = {239-258}, + volume = {3}, + issue = {3}, + url = {http://dx.doi.org/10.1007/s10723-005-9013-5}, + note = {10.1007/s10723-005-9013-5}, + year = {2005} } + at techreport{NPSOL, +title={User's Guide for {NPSOL} (Version 4.0): A Fortran Package for Nonlinear Programming}, +institution={Stanford University Systems Optimization Lab}, +author={Gill, Philip E. and Murray, Walter and Saunders, Michael A. and Wright, Margaret H.}, +month={Jan}, +year={1986} +} + + % Items below are from an older paper - retain for the moment in case any are useful here @article{condor-g, Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-05 15:02:03 UTC (rev 3844) +++ text/parco10submission/paper.tex 2011-01-05 15:24:20 UTC (rev 3845) @@ -54,12 +54,15 @@ \author{Mihael Hategan} \author{Justin M. Wozniak} \author{Ian Foster} -\author{Daniel Katz} \author{Michael Wilde} \address{Mathematics and Computer Science Division, Argonne National Laboratory, and Computation Institute, University of Chicago and Argonne National Laboratory} +\author{Daniel S. Katz} + +\address{Computation Institute, University of Chicago and Argonne National Laboratory} + \author{Ben Clifford} \address{Argonne Leadership Computing Facility, Argonne National Laboratory } @@ -1466,7 +1469,7 @@ generator creates the desired model by calculating where in the array that permutation of the model matrix falls. OpenMx then estimates the model parameters using a non-linear optimization algorithm called -NPSOL (Gill, 1986) \katznote{change to cite} +NPSOL~\cite{NPSOL} and the optimized model is returned and written out by Swift to the location specified in its mapping on line 10. @@ -1939,7 +1942,7 @@ \newpage The submitted manuscript has been created by UChicago Argonne, LLC, -Operator of Argonne National Laboratory ("Argonne"). Argonne, a +Operator of Argonne National Laboratory (``Argonne''). Argonne, a U.S. Department of Energy Office of Science laboratory, is operated under Contract No. DE-AC02-06CH11357. The U.S. Government retains for itself, and others acting on its behalf, a paid-up nonexclusive, From noreply at svn.ci.uchicago.edu Wed Jan 5 09:30:08 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 09:30:08 -0600 (CST) Subject: [Swift-commit] r3846 - in branches/release-0.92/tests: groups providers providers/local-cobalt providers/local-cobalt/intrepid Message-ID: <20110105153008.A2784FC41@svn.ci.uchicago.edu> Author: davidk Date: 2011-01-05 09:30:08 -0600 (Wed, 05 Jan 2011) New Revision: 3846 Added: branches/release-0.92/tests/providers/local-cobalt/ branches/release-0.92/tests/providers/local-cobalt/intrepid/ branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.check.sh branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.clean.sh branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.setup.sh branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.swift branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.timeout branches/release-0.92/tests/providers/local-cobalt/intrepid/sites.template.xml branches/release-0.92/tests/providers/local-cobalt/intrepid/tc.template.data branches/release-0.92/tests/providers/local-cobalt/intrepid/title.txt Modified: branches/release-0.92/tests/groups/group-intrepid.sh Log: Re-adding Intrepid tests Modified: branches/release-0.92/tests/groups/group-intrepid.sh =================================================================== --- branches/release-0.92/tests/groups/group-intrepid.sh 2011-01-05 15:24:20 UTC (rev 3845) +++ branches/release-0.92/tests/groups/group-intrepid.sh 2011-01-05 15:30:08 UTC (rev 3846) @@ -2,7 +2,7 @@ # GROUPLIST definition to run on Intrepid GROUPLIST=( $TESTDIR/local \ - $TESTDIR/site/intrepid + $TESTDIR/providers/local-cobalt/intrepid \ ) checkvars WORK QUEUE PROJECT Added: branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.check.sh =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.check.sh (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.check.sh 2011-01-05 15:30:08 UTC (rev 3846) @@ -0,0 +1,7 @@ +#!/bin/sh + +set -x + +grep $( uname -m ) 100-cp-output.txt || exit 1 + +exit 0 Property changes on: branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.check.sh ___________________________________________________________________ Name: svn:executable + * Added: branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.clean.sh =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.clean.sh (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.clean.sh 2011-01-05 15:30:08 UTC (rev 3846) @@ -0,0 +1,7 @@ +#!/bin/sh + +set -x + +rm -v 100-cp-input.txt 100-cp-output.txt || exit 1 + +exit 0 Property changes on: branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.clean.sh ___________________________________________________________________ Name: svn:executable + * Added: branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.setup.sh =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.setup.sh (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.setup.sh 2011-01-05 15:30:08 UTC (rev 3846) @@ -0,0 +1,7 @@ +#!/bin/sh + +set -x + +uname -a > 100-cp-input.txt || exit 1 + +exit 0 Property changes on: branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.setup.sh ___________________________________________________________________ Name: svn:executable + * Added: branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.swift =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.swift (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.swift 2011-01-05 15:30:08 UTC (rev 3846) @@ -0,0 +1,13 @@ + +type file; + +app (file o) cp(file i) +{ + cp @i @o; +} + +file input<"100-cp-input.txt">; +file output<"100-cp-output.txt">; + +output = cp(input); + Added: branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.timeout =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.timeout (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/intrepid/100-cp.timeout 2011-01-05 15:30:08 UTC (rev 3846) @@ -0,0 +1 @@ +3000 Added: branches/release-0.92/tests/providers/local-cobalt/intrepid/sites.template.xml =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/intrepid/sites.template.xml (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/intrepid/sites.template.xml 2011-01-05 15:30:08 UTC (rev 3846) @@ -0,0 +1,32 @@ + + + + + + /scratch/wozniak/work + + 0.04 + file + + + + + + + _HOST_ + _PROJECT_ + _QUEUE_ + zeptoos + true + 21 + 10000 + 1 + DEBUG + 1 + 900 + 64 + 64 + _WORK_ + + + Added: branches/release-0.92/tests/providers/local-cobalt/intrepid/tc.template.data =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/intrepid/tc.template.data (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/intrepid/tc.template.data 2011-01-05 15:30:08 UTC (rev 3846) @@ -0,0 +1 @@ +coasters_alcfbgp cp /bin/cp INSTALLED INTEL32::LINUX null Added: branches/release-0.92/tests/providers/local-cobalt/intrepid/title.txt =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/intrepid/title.txt (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/intrepid/title.txt 2011-01-05 15:30:08 UTC (rev 3846) @@ -0,0 +1 @@ +Site Test: BG/P: Intrepid From noreply at svn.ci.uchicago.edu Wed Jan 5 11:16:08 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 11:16:08 -0600 (CST) Subject: [Swift-commit] r3847 - in branches/release-0.92/tests: groups providers/local-pbs providers/local-pbs/queenbee Message-ID: <20110105171608.6B2E7FC41@svn.ci.uchicago.edu> Author: davidk Date: 2011-01-05 11:16:07 -0600 (Wed, 05 Jan 2011) New Revision: 3847 Added: branches/release-0.92/tests/groups/group-queenbee.sh branches/release-0.92/tests/providers/local-pbs/queenbee/ branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.check.sh branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.swift branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.timeout branches/release-0.92/tests/providers/local-pbs/queenbee/README branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0001.out.expected branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0002.out.expected branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0003.out.expected branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0004.out.expected branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0005.out.expected branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0006.out.expected branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0007.out.expected branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0008.out.expected branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0009.out.expected branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0010.out.expected branches/release-0.92/tests/providers/local-pbs/queenbee/data.txt branches/release-0.92/tests/providers/local-pbs/queenbee/sites.template.xml branches/release-0.92/tests/providers/local-pbs/queenbee/tc.template.data branches/release-0.92/tests/providers/local-pbs/queenbee/title.txt Log: Queenbee pbs tests Added: branches/release-0.92/tests/groups/group-queenbee.sh =================================================================== --- branches/release-0.92/tests/groups/group-queenbee.sh (rev 0) +++ branches/release-0.92/tests/groups/group-queenbee.sh 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1,6 @@ +# GROUPLIST definition to run queenbee tests + +GROUPLIST=( + $TESTDIR/providers/local \ + $TESTDIR/providers/local-pbs/queenbee \ +) Added: branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.check.sh =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.check.sh (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.check.sh 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1,4 @@ +#!/bin/bash + +cp -v $GROUP/data.txt . || exit 1 +cp -v $GROUP/*expected . || exit 1 Property changes on: branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.check.sh ___________________________________________________________________ Name: svn:executable + * Added: branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.swift =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.swift (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.swift 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1,15 @@ +type file; + +app (file o) cat (file i) +{ + cat @i stdout=@o; +} + +string t = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; +string char[] = @strsplit(t, ""); + +file out[]; +foreach j in [1:@toint(@arg("n","10"))] { + file data<"data.txt">; + out[j] = cat(data); +} Added: branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.timeout =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.timeout (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/001-catsn-queenbee-pbs.timeout 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +180 Added: branches/release-0.92/tests/providers/local-pbs/queenbee/README =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/README (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/README 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1,2 @@ +Queenbee uses an older version of bash which does not work with the nightly.sh by default +In order to run these tests, download and compile a newer version of bash from ftp.gnu.org/pub/bash Added: branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0001.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0001.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0001.out.expected 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0002.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0002.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0002.out.expected 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0003.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0003.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0003.out.expected 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0004.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0004.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0004.out.expected 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0005.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0005.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0005.out.expected 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0006.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0006.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0006.out.expected 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0007.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0007.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0007.out.expected 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0008.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0008.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0008.out.expected 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0009.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0009.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0009.out.expected 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0010.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0010.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/catsn.0010.out.expected 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/queenbee/data.txt =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/data.txt (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/data.txt 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-pbs/queenbee/sites.template.xml =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/sites.template.xml (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/sites.template.xml 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1,8 @@ + + + + + 0 + _WORK_ + + Added: branches/release-0.92/tests/providers/local-pbs/queenbee/tc.template.data =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/tc.template.data (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/tc.template.data 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1,7 @@ +queenbee-pbs echo /bin/echo INSTALLED INTEL32::LINUX +queenbee-pbs cat /bin/cat INSTALLED INTEL32::LINUX +queenbee-pbs ls /bin/ls INSTALLED INTEL32::LINUX +queenbee-pbs grep /bin/grep INSTALLED INTEL32::LINUX +queenbee-pbs sort /bin/sort INSTALLED INTEL32::LINUX +queenbee-pbs paste /bin/paste INSTALLED INTEL32::LINUX +queenbee-pbs wc /usr/bin/wc INSTALLED INTEL32::LINUX Added: branches/release-0.92/tests/providers/local-pbs/queenbee/title.txt =================================================================== --- branches/release-0.92/tests/providers/local-pbs/queenbee/title.txt (rev 0) +++ branches/release-0.92/tests/providers/local-pbs/queenbee/title.txt 2011-01-05 17:16:07 UTC (rev 3847) @@ -0,0 +1 @@ +QueenBee PBS Configuration Test From noreply at svn.ci.uchicago.edu Wed Jan 5 11:33:02 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 11:33:02 -0600 (CST) Subject: [Swift-commit] r3848 - trunk/tests/groups Message-ID: <20110105173302.04671FC41@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-05 11:33:01 -0600 (Wed, 05 Jan 2011) New Revision: 3848 Modified: trunk/tests/groups/group-all-local.sh Log: WORK must be set in the environment for sites.xml generation Modified: trunk/tests/groups/group-all-local.sh =================================================================== --- trunk/tests/groups/group-all-local.sh 2011-01-05 17:16:07 UTC (rev 3847) +++ trunk/tests/groups/group-all-local.sh 2011-01-05 17:33:01 UTC (rev 3848) @@ -11,3 +11,5 @@ $TESTDIR/cdm/ps/pinned # $TESTDIR/site/intrepid ) + +checkvars WORK From noreply at svn.ci.uchicago.edu Wed Jan 5 11:34:55 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 11:34:55 -0600 (CST) Subject: [Swift-commit] r3849 - trunk/tests Message-ID: <20110105173455.6CBAFFC41@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-05 11:34:55 -0600 (Wed, 05 Jan 2011) New Revision: 3849 Modified: trunk/tests/nightly.sh Log: Set a default WORK directory Modified: trunk/tests/nightly.sh =================================================================== --- trunk/tests/nightly.sh 2011-01-05 17:33:01 UTC (rev 3848) +++ trunk/tests/nightly.sh 2011-01-05 17:34:55 UTC (rev 3849) @@ -919,6 +919,11 @@ done } +if [[ $WORK == "" ]] +then + WORK=$TOPDIR/work +fi + checkvars GROUPLISTFILE echo "GROUPLISTFILE: $GROUPLISTFILE" source $GROUPLISTFILE || exit 1 From noreply at svn.ci.uchicago.edu Wed Jan 5 12:25:36 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 12:25:36 -0600 (CST) Subject: [Swift-commit] r3850 - trunk/libexec Message-ID: <20110105182536.6D5869CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-05 12:25:36 -0600 (Wed, 05 Jan 2011) New Revision: 3850 Modified: trunk/libexec/_swiftwrap.staging Log: New SWIFTWRAP_FAST variable to turn off logging and certain checks Usage: add to sites.xml: 1 Modified: trunk/libexec/_swiftwrap.staging =================================================================== --- trunk/libexec/_swiftwrap.staging 2011-01-05 17:34:55 UTC (rev 3849) +++ trunk/libexec/_swiftwrap.staging 2011-01-05 18:25:36 UTC (rev 3850) @@ -1,5 +1,20 @@ # this script must be invoked inside of bash, not plain sh +openinfo() { + exec 3<> $1 + INFO=3 +} + +closeinfo() { + exec 3>&- +} + +# Clobbered by SWIFTWRAP_FAST +log() { + echo "$@" >& "$INFO" +} + +# Clobbered by SWIFTWRAP_FAST infosection() { echo >& "$INFO" echo "_____________________________________________________________________________" >& "$INFO" @@ -38,14 +53,11 @@ fi } +# Clobbered by SWIFTWRAP_FAST logstate() { echo "Progress " `date +"%Y-%m-%d %H:%M:%S.%N%z"` " $@" >& "$INFO" } -log() { - echo "$@" >& "$INFO" -} - fail() { EC=$1 shift @@ -77,13 +89,32 @@ fi } +# Clobbered by SWIFTWRAP_FAST +checkMissing() { + MISSING= + for O in $OUTF ; do + if [ ! -f "$O" ]; then + if [ "$MISSING" == "" ]; then + MISSING=$O + else + MISSING="$MISSING, $O" + fi + fi + done + if [ "$MISSING" != "" ]; then + fail 254 "The following output files were not created by the application: $MISSING" + fi +} + checkparamfile() { log "checking for paramfile" if [ "$1" == "-p" ]; then JOBDIR=$2 PARAMFILE=${WFDIR}/parameters/${JOBDIR}/param-${ID} + log "paramfile is: $PARAMFILE" + else + log "no paramfile: using command line arguments" fi - log "paramfile is: $PARAMFILE" } getarg() { @@ -107,15 +138,6 @@ fi } -openinfo() { - exec 3<> $1 - INFO=3 -} - -closeinfo() { - exec 3>&- -} - COMMANDLINE=$@ # make the WFDIR absolute @@ -125,13 +147,24 @@ checkparamfile "$@" +# # SWIFTWRAP_FAST: Turn things off for speed +if [[ $SWIFTWRAP_FAST == "1" ]]; then + shopt -s expand_aliases + alias infosection=: + alias logstate=: + alias log=: + alias checkMissing=: +fi + if [ "X$INFODIR" == "X" ]; then INFODIR="." fi logstate "LOG_START" -infosection "Wrapper" +infosection "Wrapper (_swiftwrap.staging)" +log $COMMANDLINE + getarg "-e" "$@" EXEC=$VALUE shift $SHIFTCOUNT @@ -253,7 +286,6 @@ logstate "EXECUTE" -#ls >>$WRAPPERLOG if [ ! -f "$EXEC" ]; then fail 254 "The executable $EXEC does not exist" fi @@ -290,19 +322,7 @@ logstate "EXECUTE_DONE" log "Job ran successfully" -MISSING= -for O in $OUTF ; do - if [ ! -f "$O" ]; then - if [ "$MISSING" == "" ]; then - MISSING=$O - else - MISSING="$MISSING, $O" - fi - fi -done -if [ "$MISSING" != "" ]; then - fail 254 "The following output files were not created by the application: $MISSING" -fi +checkMissing logstate "END" From noreply at svn.ci.uchicago.edu Wed Jan 5 12:26:11 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 12:26:11 -0600 (CST) Subject: [Swift-commit] r3851 - trunk/libexec Message-ID: <20110105182611.A7BD19CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-05 12:26:11 -0600 (Wed, 05 Jan 2011) New Revision: 3851 Modified: trunk/libexec/_swiftwrap Log: Label the _swiftwrap output Modified: trunk/libexec/_swiftwrap =================================================================== --- trunk/libexec/_swiftwrap 2011-01-05 18:25:36 UTC (rev 3850) +++ trunk/libexec/_swiftwrap 2011-01-05 18:26:11 UTC (rev 3851) @@ -238,7 +238,7 @@ openinfo "$INFOFILE" logstate "LOG_START" -infosection "Wrapper" +infosection "Wrapper (_swiftwrap)" getarg "-e" "$@" EXEC=$VALUE From noreply at svn.ci.uchicago.edu Wed Jan 5 12:33:55 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 12:33:55 -0600 (CST) Subject: [Swift-commit] r3852 - trunk/tests/mpi Message-ID: <20110105183355.E04B59CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-05 12:33:54 -0600 (Wed, 05 Jan 2011) New Revision: 3852 Modified: trunk/tests/mpi/ Log: Ignore compiled binary Property changes on: trunk/tests/mpi ___________________________________________________________________ Name: svn:ignore + mpi-cp From noreply at svn.ci.uchicago.edu Wed Jan 5 13:19:19 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 13:19:19 -0600 (CST) Subject: [Swift-commit] r3853 - text/parco10submission Message-ID: <20110105191919.663EBFC41@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-05 13:19:19 -0600 (Wed, 05 Jan 2011) New Revision: 3853 Modified: text/parco10submission/paper.pdf text/parco10submission/paper.tex Log: Edits to abstract and intro. Modified: text/parco10submission/paper.pdf =================================================================== (Binary files differ) Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-05 18:33:54 UTC (rev 3852) +++ text/parco10submission/paper.tex 2011-01-05 19:19:19 UTC (rev 3853) @@ -97,26 +97,22 @@ Swift is a scripting language designed for composing application programs into distributed, -parallelized applications for execution on grids and supercomputers +parallelized applications for execution on clusters, grids, clouds and supercomputers with tens to hundreds of thousands of processors. It is intended to serve as a higher level framework for composing the interaction of -concurrently executing programs and scripts, sitting above (and -utilizing) existing scripting languages and applications. Swift +concurrently executing programs (even parallel ones) and scripts written in other scripting languages. Swift scripts express the execution of programs to produce datasets using a C-like syntax consisting of function definitions and expressions, but with -dataflow-driven semantics and implicit parallelism. The application programs executed by a -Swift script can be binary executables (even parallel ones) or can be scripts written in -any other scripting language. +dataflow-driven semantics and implicit parallelism. The emergence of large-scale production computing infrastructure such -as clusters, grids and high-performance computing (HPC), and the +as clusters, grids and supercomputers, and the inherent complexity of programming on these systems, necessitates a new approach. - -Swift was developed to create a higher-level language +Swift is a higher-level language that focuses not on the details of executing sequences or -``pipelines'' of programs, but rather on specific issues that arise -from the concurrent execution of disparate computational tasks at +pipelines of programs, but rather on the issues that arise +from the concurrent execution, composition, and coordination of many independent computational tasks at large scale. While many application needs involve the execution of a single large @@ -128,21 +124,19 @@ them. Scaling up requires the distribution of such workloads among many computers or clusters and hence a ``grid'' approach. Even if a single large parallel cluster suffices, users will not always have -access to the same system (i.e., big machines may be congested, or temporarily unavailable). This leads to the need to be able to utilize +access to the same system (i.e., big machines may be congested, or temporarily unavailable to a user due to maintenance or allocation depletion). This leads to the need to be able to utilize whatever resource happens to be available or economical at the moment -when the user needs to perform intensive computation -- without continued -reprogramming or adjustment of scripts. +when the user needs to perform intensive computation -- without the need to continually +reprogram or adjust execution scripts. Swift's contribution and primary value is that it provides a simple, minimal set of language constructs to specify how applications are glued together and executed in parallel at large scale. +It regularizes and +abstracts notions of processes and external data for distributed +parallel execution of application programs. Swift scripts are location-independent and automatically parallelized by exploiting the maximal concurrency permitted by their data dependencies and by resource availability. -Swift regularizes and -abstracts notions of external data and processes for distributed -parallel execution of application programs. Swift scripts are location-independent and automatically parallelized by exploiting the maximal concurrency permitted by their data dependencies. - As a language, Swift is simpler than most scripting languages because it does not replicate the capabilities that existing scripting languages like Perl, Python, and shells do very well, but instead makes it easy to call such scripts as small applications. - Swift can execute scripts that perform tens of thousands of program invocations on highly parallel resources, and handle the unreliable and dynamic aspects of wide-area distributed resources. Such issues are handled by Swift's runtime system, and are not manifest in the user's scripts. @@ -156,9 +150,9 @@ automated site selection, data management, and reliability. Swift has been described previously~\cite{Swift_2007}; -this paper goes into greater depth in describing the Swift language, how +this paper goes into greater depth in describing the parallel aspects of the Swift language, how its implementation handles large-scale and distributed execution -environments, and its contribution to distributed and parallel computing. +environments, and its contribution to distributed and parallel programming models. The remainder of this paper is organized as follows. Section~\ref{Rationale} explains the motivation for the Swift programming model. From noreply at svn.ci.uchicago.edu Wed Jan 5 13:28:07 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 13:28:07 -0600 (CST) Subject: [Swift-commit] r3854 - trunk/src/org/griphyn/vdl/karajan Message-ID: <20110105192807.D305EFC41@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-05 13:28:07 -0600 (Wed, 05 Jan 2011) New Revision: 3854 Modified: trunk/src/org/griphyn/vdl/karajan/Loader.java Log: Set final output to INFO Modified: trunk/src/org/griphyn/vdl/karajan/Loader.java =================================================================== --- trunk/src/org/griphyn/vdl/karajan/Loader.java 2011-01-05 19:19:19 UTC (rev 3853) +++ trunk/src/org/griphyn/vdl/karajan/Loader.java 2011-01-05 19:28:07 UTC (rev 3854) @@ -205,10 +205,10 @@ } if (runerror) { - logger.debug("Swift finished with errors"); + logger.info("Swift finished with errors"); } else { - logger.debug("Swift finished with no errors"); + logger.info("Swift finished with no errors"); } if (ap.isPresent(ARG_TUI)) { ma.close(); From noreply at svn.ci.uchicago.edu Wed Jan 5 13:37:04 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 13:37:04 -0600 (CST) Subject: [Swift-commit] r3855 - text/parco10submission Message-ID: <20110105193704.4B7CEFC41@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-05 13:37:04 -0600 (Wed, 05 Jan 2011) New Revision: 3855 Modified: text/parco10submission/paper.tex Log: Edits to abstract and intro. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-05 19:28:07 UTC (rev 3854) +++ text/parco10submission/paper.tex 2011-01-05 19:37:04 UTC (rev 3855) @@ -137,6 +137,7 @@ parallel execution of application programs. Swift scripts are location-independent and automatically parallelized by exploiting the maximal concurrency permitted by their data dependencies and by resource availability. As a language, Swift is simpler than most scripting languages because it does not replicate the capabilities that existing scripting languages like Perl, Python, and shells do very well, but instead makes it easy to call such scripts as small applications. +% say: it has fewer statements, limited data types and a compact library of useful support primitives. It can be extended using built-in functions coded in Java, and by mappers coded as Java built-ins or as external scripts. These functions execute in parallel as part of expression evaluation in the same mapper as externally called application programs or scripts do. Swift can execute scripts that perform tens of thousands of program invocations on highly parallel resources, and handle the unreliable and dynamic aspects of wide-area distributed resources. Such issues are handled by Swift's runtime system, and are not manifest in the user's scripts. @@ -171,33 +172,42 @@ %%% \begin{msection} +% said already: The main goal of Swift is to allow the composition of coarse grained processes, and to parallelize and manage the execution of scripts on distributed collections of parallel resources. -Swift is implicitly parallel and distributed, in that the user does not explicitly code parallel behavior, nor is any knowledge of runtime execution locations encoded into a Swift script. The function model on which Swift is based ensures that execution of Swift scripts is deterministic, thus simplifying the scripting process. +%keep: +Swift is implicitly parallel and distributed, in that the user does not explicitly code either parallel behavior or synchronization (or mutual exclusion); does not code explicit data transfer of files to the execution sites of jobs and back. In fact no knowledge of runtime execution locations is directly specified in a Swift script. The function model on which Swift is based ensures that execution of Swift scripts is deterministic, thus simplifying the scripting process. -Having the results of a Swift script be independent of the way the processes -are parallelized implies that the processes must, for the same input, +%adjust: address degrees of determinism +Having the results of a Swift script be independent of the way that its function invocations +are parallelized implies that the functions must, for the same input, produce the same output, irrespective of the time, order or location in which they are ``executed''. This characteristic is reminiscent of referential transparency, and one may readily extend the concept to encompass arbitrary processes without difficulty. +%keep: discuss kthread/function duality; dont confuse with the parameter issue? Swift enables users to specify process composition by representing processes as functions, with input data files and process parameters become function parameters and output data files become function return values. - +%keep The exact number of processing units available on such shared resources varies with time. In order to take advantage of as many processing units as possible during the execution of a Swift program, it is necessary to be flexible in the way the execution of individual processes is parallelized. +% consider: where to define kthreads and jthreads; how to describe the function/process duality; where to discuss the implementation + +%keep: how best to state process/function duality? Each invocationu of a function is a process; all functions run in parallel; foreach loops are unfolded and run in parallel; essentinally the entire program is unfolded. (Note: itereate stops this behavior and is thus useful; address scalability issues of this and future graph partioning; how throttling keeps this manageable. This duality allows the formal specification of process behavior. In the following Swift statement, the semantics are defined in terms of the specification of the function -``rotate'' when supplied with specific parameter types.: +``rotate'' when supplied with specific parameter types: \begin{verbatim} rotatedImage = rotate(image, angle); \end{verbatim} +%Q: should we have any code examples in the intro? eg: 1 call, 1 foreach? + \hide{ % and whether the % implementation can be described as a ``library call'' or a ``program @@ -209,33 +219,41 @@ applications. They can equally consist of library calls or functions written in Swift itself, as long as they are side-effect free. -%A soft -%restriction arises from the desire to distribute the execution of +%A soft restriction arises from the desire to distribute the execution of %functions across a collection of heterogeneous resources, which, with %the advent of projects such as TeraGrid, suggests an implementation in %which functions are applications readily executable on them through the %careful employment of grid middleware. } +%keep: +Note that some Swift scripts are specified as library calls. + +%decide: is referential transparency relevant? Having established the constraint that Swift functions must in general be referentially transparent, and in order to preserve referential transparency at different levels of abstractions within the language, it follows that the appropriate form for the Swift language is functional. +%keep: discuss determinism, side effects, referential transparency, and interleaving??? +%I think the KEY aspect of "functional" is (a) in-out tracking for distribtability and side effect management and (b) the write-once-future model for all data. + We choose to make the Swift language purely functional (i.e., we disallow side effects in the language) in order to prevent the difficulties that arise from having to track side effects to ensure determinism in complex concurrency scenarios. +%discuss: is lazy vs eager relevant? What does it really mean to swift? Functional programming allows consistent implementations of evaluation strategies different from the widespread eager evaluation, as seen in lazily evaluated languages such as Haskell \cite{Haskell}. - + +%Keep: KEY: In order to achieve automatic parallelization in Swift is based on the synchronization construct of \emph{futures}\cite{Futures}, which results in eager parallelism. Every Swift variable (including every members of structures and arrays) is a write-once future. -% In this process, we trade the ability to efficiently deal with infinite structures for the ability to minimize computation time. +% consider: In this process, we trade the ability to efficiently deal with infinite structures for the ability to minimize computation time. I think this pertains to the "unroll everything" strategy. Using a futures-based evaluation strategy has an enormous benefit: automatic parallelization is achieved without the need for dependency analysis, which would significantly complicate the Swift implementation. From noreply at svn.ci.uchicago.edu Wed Jan 5 14:27:12 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 14:27:12 -0600 (CST) Subject: [Swift-commit] r3856 - text/parco10submission Message-ID: <20110105202712.6646FFC41@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-05 14:27:12 -0600 (Wed, 05 Jan 2011) New Revision: 3856 Modified: text/parco10submission/paper.pdf text/parco10submission/paper.tex Log: changes merging intro and rationale - in progress Modified: text/parco10submission/paper.pdf =================================================================== (Binary files differ) Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-05 19:37:04 UTC (rev 3855) +++ text/parco10submission/paper.tex 2011-01-05 20:27:12 UTC (rev 3856) @@ -136,12 +136,27 @@ abstracts notions of processes and external data for distributed parallel execution of application programs. Swift scripts are location-independent and automatically parallelized by exploiting the maximal concurrency permitted by their data dependencies and by resource availability. +\katznote{integrate with previous paragraph} +Swift is implicitly parallel and distributed, in that the user does not explicitly code either parallel behavior or synchronization (or mutual exclusion); does not code explicit data transfer of files to the execution sites of jobs and back. In fact no knowledge of runtime execution locations is directly specified in a Swift script. The function model on which Swift is based ensures that execution of Swift scripts is deterministic, thus simplifying the scripting process. + +\katznote{fix this} +Having the results of a Swift script be independent of the way that its function invocations +are parallelized implies that the functions must, for the same input, +produce the same output, irrespective of the time, order or location in +which they are ``executed''. %This characteristic is reminiscent of +%referential transparency, and one may readily extend the concept to +%encompass arbitrary processes without difficulty. + As a language, Swift is simpler than most scripting languages because it does not replicate the capabilities that existing scripting languages like Perl, Python, and shells do very well, but instead makes it easy to call such scripts as small applications. % say: it has fewer statements, limited data types and a compact library of useful support primitives. It can be extended using built-in functions coded in Java, and by mappers coded as Java built-ins or as external scripts. These functions execute in parallel as part of expression evaluation in the same mapper as externally called application programs or scripts do. Swift can execute scripts that perform tens of thousands of program invocations on highly parallel resources, and handle the unreliable and dynamic aspects of wide-area distributed resources. Such issues are handled by Swift's runtime system, and are not manifest in the user's scripts. +%keep: discuss kthread/function duality; dont confuse with the parameter issue? +Swift enables users to specify process composition by representing processes as functions, where input data files and process parameters become function parameters and output data files become function return values. \katznote{these are really Karajan threads - forward link to that? - what do we call these in this paper?} A Swift script is a graph of function calls - each function call ... see section 3. +%keep: + The Swift language provides a high level representation of collections of data and a specification of how those collections are to be mapped to that @@ -150,13 +165,50 @@ external programs on clusters, grids and other parallel platforms, providing automated site selection, data management, and reliability. + +%keep +The exact number of processing units available on such shared resources +varies with time. In order to take advantage of as many processing units +as possible during the execution of a Swift program, it is necessary to +be flexible in the way the execution of individual processes is +parallelized. + +We choose to make the Swift language purely functional (i.e., all operations +have a well-defined set of inputs and outputs, all variables are write-once, +and side effects are disallowed in the language) in order to prevent the difficulties that +arise from having to track side effects to ensure determinism in complex +concurrency scenarios. +Functional programming allows consistent +implementations of evaluation strategies different from the widespread +eager evaluation, as seen in lazily evaluated languages +such as Haskell~\cite{Haskell}. + +In order to achieve automatic +parallelization, Swift is based on the synchronization construct of \emph{futures}~\cite{Futures}, which +can result in abundant parallelism. Every Swift variable (including every members of structures and arrays) is a future. +Using a futures-based evaluation strategy has an enormous benefit: +automatic parallelization is achieved without the need for +dependency analysis, which would significantly complicate the Swift implementation. + +We believe that the missing feature in current scripting languages is +sufficient specification and encapsulation of inputs to, and outputs +from, a given application, such that an execution environment could +automatically make remote execution transparent. + +Without this, +achieving location transparency %and automated parallel execution +is not feasible. Swift adds to scripting what the remote procedure call (RPC) paradigm +\cite{RPC} adds to programming: by formalizing the inputs and outputs of +applications that have been declared as app() functions, it provides a way to make the remote +execution of applications transparent. + Swift has been described previously~\cite{Swift_2007}; this paper goes into greater depth in describing the parallel aspects of the Swift language, how its implementation handles large-scale and distributed execution environments, and its contribution to distributed and parallel programming models. The remainder of this paper is organized as follows. -Section~\ref{Rationale} explains the motivation for the Swift programming model. +%Section~\ref{Rationale} explains the motivation for the Swift programming model. In Section~\ref{Language} we present the major concepts and language structure of Swift. Section~\ref{Execution} provides details of the implementation, including the distributed architecture that enables @@ -167,96 +219,58 @@ ongoing and future work in the Swift project, and we offer concluding remarks in Section~\ref{Conclusion}. -\section{Rationale for the Swift programming model} -\label{Rationale} +%\section{Rationale for the Swift programming model} +%\label{Rationale} %%% \begin{msection} % said already: -The main goal of Swift is to allow the composition of coarse grained -processes, and to parallelize and manage the execution of scripts -on distributed collections of parallel resources. +%The main goal of Swift is to allow the composition of coarse grained +%processes, and to parallelize and manage the execution of scripts +%on distributed collections of parallel resources. -%keep: -Swift is implicitly parallel and distributed, in that the user does not explicitly code either parallel behavior or synchronization (or mutual exclusion); does not code explicit data transfer of files to the execution sites of jobs and back. In fact no knowledge of runtime execution locations is directly specified in a Swift script. The function model on which Swift is based ensures that execution of Swift scripts is deterministic, thus simplifying the scripting process. -%adjust: address degrees of determinism -Having the results of a Swift script be independent of the way that its function invocations -are parallelized implies that the functions must, for the same input, -produce the same output, irrespective of the time, order or location in -which they are ``executed''. This characteristic is reminiscent of -referential transparency, and one may readily extend the concept to -encompass arbitrary processes without difficulty. +% consider: where to define kthreads and jthreads; how to describe the function/process duality; where to discuss the implementation -%keep: discuss kthread/function duality; dont confuse with the parameter issue? -Swift enables users to specify process composition by representing processes as functions, with input data files and process parameters become function parameters and output data files become function return values. -%keep -The exact number of processing units available on such shared resources -varies with time. In order to take advantage of as many processing units -as possible during the execution of a Swift program, it is necessary to -be flexible in the way the execution of individual processes is -parallelized. +%keep: how best to state process/function duality? Each invocation of a function is a process; all functions run in parallel; foreach loops are unfolded and run in parallel; essentially the entire program is unfolded. (Note: iterate stops this behavior and is thus useful; address scalability issues of this and future graph partitioning; how throttling keeps this manageable. -% consider: where to define kthreads and jthreads; how to describe the function/process duality; where to discuss the implementation +%This duality allows the formal specification of process behavior. In the following Swift statement, the semantics are defined in terms of the specification of the function +%``rotate'' when supplied with specific parameter types: +%\begin{verbatim} +% rotatedImage = rotate(image, angle); +%\end{verbatim} -%keep: how best to state process/function duality? Each invocationu of a function is a process; all functions run in parallel; foreach loops are unfolded and run in parallel; essentinally the entire program is unfolded. (Note: itereate stops this behavior and is thus useful; address scalability issues of this and future graph partioning; how throttling keeps this manageable. -This duality allows the formal specification of process behavior. In the following Swift statement, the semantics are defined in terms of the specification of the function -``rotate'' when supplied with specific parameter types: -\begin{verbatim} - rotatedImage = rotate(image, angle); -\end{verbatim} - %Q: should we have any code examples in the intro? eg: 1 call, 1 foreach? -\hide{ % and whether the % implementation can be described as a ``library call'' or a ``program % invocation'' changes nothing with respect to what the piece of program % fundamentally does: produce a rotated version of the original. -Indeed, there is no strict requirement in the specification of the Swift -language dictating that functions be implemented as command-line -applications. They can equally consist of library calls or functions -written in Swift itself, as long as they are side-effect free. +%Indeed, there is no strict requirement in the specification of the Swift +%language dictating that functions be implemented as command-line +%applications. They can equally consist of library calls or functions +%written in Swift itself, as long as they are side-effect free. %A soft restriction arises from the desire to distribute the execution of %functions across a collection of heterogeneous resources, which, with %the advent of projects such as TeraGrid, suggests an implementation in %which functions are applications readily executable on them through the %careful employment of grid middleware. -} -%keep: -Note that some Swift scripts are specified as library calls. - %decide: is referential transparency relevant? -Having established the constraint that Swift functions must in general -be referentially transparent, and in order to preserve referential -transparency at different levels of abstractions within the language, it -follows that the appropriate form for the Swift language is functional. +%Having established the constraint that Swift functions must in general +%be referentially transparent, and in order to preserve referential +%transparency at different levels of abstractions within the language, it +%follows that the appropriate form for the Swift language is functional. %keep: discuss determinism, side effects, referential transparency, and interleaving??? -%I think the KEY aspect of "functional" is (a) in-out tracking for distribtability and side effect management and (b) the write-once-future model for all data. -We choose to make the Swift language purely functional (i.e., we disallow -side effects in the language) in order to prevent the difficulties that -arise from having to track side effects to ensure determinism in complex -concurrency scenarios. +%I think the KEY aspect of "functional" is (a) in-out tracking for distributability and side effect management and (b) the write-once-future model for all data. %discuss: is lazy vs eager relevant? What does it really mean to swift? -Functional programming allows consistent -implementations of evaluation strategies different from the widespread -eager evaluation, as seen in lazily evaluated languages -such as Haskell \cite{Haskell}. - -%Keep: KEY: -In order to achieve automatic -parallelization in Swift is based on the synchronization construct of \emph{futures}\cite{Futures}, which -results in eager parallelism. Every Swift variable (including every members of structures and arrays) is a write-once future. + % consider: In this process, we trade the ability to efficiently deal with infinite structures for the ability to minimize computation time. I think this pertains to the "unroll everything" strategy. -Using a futures-based evaluation strategy has an enormous benefit: -automatic parallelization is achieved without the need for -dependency analysis, which would significantly complicate the Swift implementation. \hide{A number of issues may be noted at this point. First, there exist a certain class of processes that may break referential transparency, @@ -284,18 +298,7 @@ %%% vvvvv This is rationale and dovetails with the functional model parts above: -We believe that the missing feature in current scripting languages is -sufficient specification and encapsulation of inputs to, and outputs -from, a given application, such that an execution environment could -automatically make remote execution transparent. -Without this, -achieving location transparency and automated parallel execution is -not feasible. Swift adds to scripting what the remote procedure call (RPC) paradigm -\cite{RPC} adds to programming: by formalizing the inputs and outputs of -applications that have been declared as app() functions, it provides a way to make the parallel and remote -execution of applications transparent. - %%% ^^^^^ \section{The Swift language} @@ -310,7 +313,8 @@ \begin{description} \item[External functions] (also called ``atomic'') are functions whose implementations are not written in Swift. Currently external functions -are implemented as command-line applications. +are implemented as command-line applications\footnote{Note that some Swift scripts are specified as library calls.}. + \item[Internal functions] (also called ``compound'') are functions implemented in Swift. \end{description} From noreply at svn.ci.uchicago.edu Wed Jan 5 14:52:25 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 14:52:25 -0600 (CST) Subject: [Swift-commit] r3857 - text/parco10submission Message-ID: <20110105205225.1C86DFC41@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-05 14:52:24 -0600 (Wed, 05 Jan 2011) New Revision: 3857 Modified: text/parco10submission/paper.pdf text/parco10submission/paper.tex Log: finished cut at merging intro and rationale sections together. Modified: text/parco10submission/paper.pdf =================================================================== (Binary files differ) Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-05 20:27:12 UTC (rev 3856) +++ text/parco10submission/paper.tex 2011-01-05 20:52:24 UTC (rev 3857) @@ -97,23 +97,25 @@ Swift is a scripting language designed for composing application programs into distributed, -parallelized applications for execution on clusters, grids, clouds and supercomputers -with tens to hundreds of thousands of processors. It is intended to -serve as a higher level framework for composing the interaction of -concurrently executing programs (even parallel ones) and scripts written in other scripting languages. Swift -scripts express the execution of programs to produce datasets using a C-like syntax +parallelized applications for execution on clusters, grids, clouds, and supercomputers +with tens to hundreds of thousands of processors. +%It is intended to serve as a higher level framework for composing the interaction of +%concurrently executing programs (even parallel ones) and scripts +%written in other scripting languages. +Swift %is a higher-level language that +focuses not on the details of executing sequences or +pipelines of scripts and programs (even parallel ones), but rather on the issues that arise +from the concurrent execution, composition, and coordination of many independent computational tasks at +large scale. +% +Swift scripts express the execution of programs to produce datasets using a C-like syntax consisting of function definitions and expressions, but with dataflow-driven semantics and implicit parallelism. -The emergence of large-scale production computing infrastructure such -as clusters, grids and supercomputers, and the -inherent complexity of programming on these systems, necessitates a -new approach. -Swift is a higher-level language -that focuses not on the details of executing sequences or -pipelines of programs, but rather on the issues that arise -from the concurrent execution, composition, and coordination of many independent computational tasks at -large scale. +%The emergence of large-scale production computing infrastructure such +%as clusters, grids and supercomputers, and the +%inherent complexity of programming on these systems, necessitates a +%new approach. While many application needs involve the execution of a single large message-passing parallel program, many others require the coupling or @@ -134,16 +136,15 @@ glued together and executed in parallel at large scale. It regularizes and abstracts notions of processes and external data for distributed -parallel execution of application programs. Swift scripts are location-independent and automatically parallelized by exploiting the maximal concurrency permitted by their data dependencies and by resource availability. +parallel execution of application programs. -\katznote{integrate with previous paragraph} -Swift is implicitly parallel and distributed, in that the user does not explicitly code either parallel behavior or synchronization (or mutual exclusion); does not code explicit data transfer of files to the execution sites of jobs and back. In fact no knowledge of runtime execution locations is directly specified in a Swift script. The function model on which Swift is based ensures that execution of Swift scripts is deterministic, thus simplifying the scripting process. - -\katznote{fix this} +Swift is implicitly parallel and distributed (or location-independent), in that the user does not explicitly code either parallel behavior or synchronization (or mutual exclusion) and does not code explicit transfer of files to and from execution sites. In fact, no knowledge of runtime execution locations is directly specified in a Swift script. The function model on which Swift is based ensures that execution of Swift scripts is deterministic (if the called functions are themselves deterministic), thus simplifying the scripting process. Having the results of a Swift script be independent of the way that its function invocations are parallelized implies that the functions must, for the same input, produce the same output, irrespective of the time, order or location in -which they are ``executed''. %This characteristic is reminiscent of +which they are ``executed''. + +%This characteristic is reminiscent of %referential transparency, and one may readily extend the concept to %encompass arbitrary processes without difficulty. @@ -152,27 +153,24 @@ Swift can execute scripts that perform tens of thousands of program invocations on highly parallel resources, and handle the unreliable and dynamic aspects of wide-area distributed resources. Such issues are handled by Swift's runtime system, and are not manifest in the user's scripts. - -%keep: discuss kthread/function duality; dont confuse with the parameter issue? -Swift enables users to specify process composition by representing processes as functions, where input data files and process parameters become function parameters and output data files become function return values. \katznote{these are really Karajan threads - forward link to that? - what do we call these in this paper?} A Swift script is a graph of function calls - each function call ... see section 3. -%keep: - -The Swift language -provides a high level representation of collections of data and a -specification of how those collections are to be mapped to that -abstract representation and processed by external -programs. Underlying this is an implementation that executes the -external programs on clusters, grids and other parallel platforms, providing -automated site selection, data management, and reliability. - - -%keep The exact number of processing units available on such shared resources varies with time. In order to take advantage of as many processing units as possible during the execution of a Swift program, it is necessary to be flexible in the way the execution of individual processes is parallelized. +Swift exploits the maximal concurrency permitted by data dependencies and by resource availability. +%keep: discuss kthread/function duality; dont confuse with the parameter issue? +Swift enables users to specify process composition by representing processes as functions, where input data files and process parameters become function parameters and output data files become function return values. %\katznote{these are really Karajan threads - forward link to that? - what do we call these in this paper?} A Swift script is a graph of function calls - each function call ... see section~\ref{Execution}. +% +Swift also +provides a high level representation of collections of data (used as +function inputs and outputs) and a +specification (``mappers'') that allows those collections to be processed by external +programs. %Underlying this is an implementation that executes the +%external programs on clusters, grids and other parallel platforms, providing +%automated site selection, data management, and reliability. + We choose to make the Swift language purely functional (i.e., all operations have a well-defined set of inputs and outputs, all variables are write-once, and side effects are disallowed in the language) in order to prevent the difficulties that @@ -194,11 +192,11 @@ sufficient specification and encapsulation of inputs to, and outputs from, a given application, such that an execution environment could automatically make remote execution transparent. - +% Without this, achieving location transparency %and automated parallel execution -is not feasible. Swift adds to scripting what the remote procedure call (RPC) paradigm -\cite{RPC} adds to programming: by formalizing the inputs and outputs of +is not feasible. Swift adds to scripting what the remote procedure call (RPC) +paradigm~\cite{RPC} adds to programming: by formalizing the inputs and outputs of applications that have been declared as app() functions, it provides a way to make the remote execution of applications transparent. @@ -331,8 +329,8 @@ %%% The \emph{if} and \emph{switch} statements are rather standard, but -\emph{foreach} merits more discussion. Similar to \emph{Go} -(\cite{GOLANG}) and \emph{Python}, its control ``variables'' can be both +\emph{foreach} merits more discussion. Similar to \emph{Go}~\cite{GOLANG} +and \emph{Python}, its control ``variables'' can be both an index and a value. The syntax is as follows: \begin{verbatim} @@ -342,7 +340,7 @@ \end{verbatim} This is necessary because Swift does not allow the use of mutable state -(i.e., variables are single-assignment), therefore one would not be able +(i.e., variables are single-assignment). Therefore, one is not able to write statements such as \verb|i = i + 1|. \subsection{Data model} @@ -970,7 +968,7 @@ \section{Execution} \label{Execution} -Swift is implemented by compiling to a Karajan program\cite{Karajan}, which provides +Swift is implemented by compiling to a Karajan program~\cite{Karajan}, which provides several benefits: a lightweight threading model, futures, remote job execution, @@ -1825,7 +1823,7 @@ %% In contrast to a text-oriented programming language like SwiftScript, %% some scientists prefer to design simple programs using GUI design tools. -%% An example of this is the LONI Pipeline tool\cite{LONIPIPELINE}. Preliminary +%% An example of this is the LONI Pipeline tool~\cite{LONIPIPELINE}. Preliminary %% investigations suggest that scientific workflows designed with that tool %% can be straightforwardly compiled into SwiftScript and thus benefit from %% Swift's execution system. @@ -1840,7 +1838,7 @@ %% \subsection{Language development} %% TODO: describe how it becomes more functional as time passes, as is -%% becoming more popular. can ref mapreduce here\cite{MAPREDUCE} eg map +%% becoming more popular. can ref mapreduce here~\cite{MAPREDUCE} eg map %% operator extension - looks like foreach; and maybe some other %% popular-ish functional language eg F\# @@ -1859,7 +1857,7 @@ %% TODO: debugging of distributed system - can have a non-futures section %% on what is available now - logprocessing module, as well as -%% mentioning CEDPS\cite{CEDPS} as somewhat promising(?) for the future. +%% mentioning CEDPS~\cite{CEDPS} as somewhat promising(?) for the future. %% \subsection{Swift as a library} %% Could existing programs execute Swift calls through a library From noreply at svn.ci.uchicago.edu Wed Jan 5 15:39:39 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 15:39:39 -0600 (CST) Subject: [Swift-commit] r3858 - text/parco10submission Message-ID: <20110105213939.7C78AFC41@svn.ci.uchicago.edu> Author: hategan Date: 2011-01-05 15:39:39 -0600 (Wed, 05 Jan 2011) New Revision: 3858 Modified: text/parco10submission/paper.tex Log: added some comments Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-05 20:52:24 UTC (rev 3857) +++ text/parco10submission/paper.tex 2011-01-05 21:39:39 UTC (rev 3858) @@ -150,6 +150,10 @@ As a language, Swift is simpler than most scripting languages because it does not replicate the capabilities that existing scripting languages like Perl, Python, and shells do very well, but instead makes it easy to call such scripts as small applications. % say: it has fewer statements, limited data types and a compact library of useful support primitives. It can be extended using built-in functions coded in Java, and by mappers coded as Java built-ins or as external scripts. These functions execute in parallel as part of expression evaluation in the same mapper as externally called application programs or scripts do. +% Mihael thinks that we should not claim that Swift is "simpler". The language is as complete as a language can +% be and the monentary lack of libraries is independent of the language. Most functional languages are simpler +% than Swift. + Swift can execute scripts that perform tens of thousands of program invocations on highly parallel resources, and handle the unreliable and dynamic aspects of wide-area distributed resources. Such issues are handled by Swift's runtime system, and are not manifest in the user's scripts. @@ -163,6 +167,14 @@ %keep: discuss kthread/function duality; dont confuse with the parameter issue? Swift enables users to specify process composition by representing processes as functions, where input data files and process parameters become function parameters and output data files become function return values. %\katznote{these are really Karajan threads - forward link to that? - what do we call these in this paper?} A Swift script is a graph of function calls - each function call ... see section~\ref{Execution}. % +% Mihael thinks that the graph of function calls is not quite appropriate since this may +% project an overly simplified view. Swift allows +% this graph to be built dynamically. For that matter, any parallel program will end up +% having a trace that can be represented as a graph. And any purely functional program +% will have a graph representing dependencies (and again, that graph may be a non-trivial +% thing built at run-time). It so happens that, using futures, we make the dependency +% graph also be the trace of the execution graph. +% Swift also provides a high level representation of collections of data (used as function inputs and outputs) and a @@ -188,6 +200,12 @@ automatic parallelization is achieved without the need for dependency analysis, which would significantly complicate the Swift implementation. +% Mihael thinks it's more powerful to say that dependency analysis +% is a complex issue for any non-functional language. It is not that +% some swift constraints make dependency analysis complex, but +% the very idea of dependency analysis when allowing side-effects +% is complex. + We believe that the missing feature in current scripting languages is sufficient specification and encapsulation of inputs to, and outputs from, a given application, such that an execution environment could @@ -232,6 +250,9 @@ %keep: how best to state process/function duality? Each invocation of a function is a process; all functions run in parallel; foreach loops are unfolded and run in parallel; essentially the entire program is unfolded. (Note: iterate stops this behavior and is thus useful; address scalability issues of this and future graph partitioning; how throttling keeps this manageable. +% re iterate. foreach can also stop this behavior if there are inter-iteration dependencies +% + %This duality allows the formal specification of process behavior. In the following Swift statement, the semantics are defined in terms of the specification of the function %``rotate'' when supplied with specific parameter types: %\begin{verbatim} @@ -257,6 +278,9 @@ %careful employment of grid middleware. %decide: is referential transparency relevant? + +% Referential transparency is THE relevant thing! The other things follow from it. + %Having established the constraint that Swift functions must in general %be referentially transparent, and in order to preserve referential %transparency at different levels of abstractions within the language, it @@ -268,6 +292,14 @@ %discuss: is lazy vs eager relevant? What does it really mean to swift? +% in procedural programming you can only do eager evaluation. The minute +% you eliminate side-effects, generalized lazy, future and lazy-future +% strategies become immediately possible. From a theoretical standpoint, +% in order to validate the swift idea, we only need to point out that +% we can see app invocations as pure function invocations and then +% ride on the functional train to get to automatic parallelization +% via futures. + % consider: In this process, we trade the ability to efficiently deal with infinite structures for the ability to minimize computation time. I think this pertains to the "unroll everything" strategy. \hide{A number of issues may be noted at this point. First, there exist a From noreply at svn.ci.uchicago.edu Wed Jan 5 15:42:56 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 15:42:56 -0600 (CST) Subject: [Swift-commit] r3859 - text/parco10submission Message-ID: <20110105214256.9B193FC41@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-05 15:42:56 -0600 (Wed, 05 Jan 2011) New Revision: 3859 Modified: text/parco10submission/paper.bib text/parco10submission/paper.tex Log: changes in related work section (7) Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-05 21:39:39 UTC (rev 3858) +++ text/parco10submission/paper.bib 2011-01-05 21:42:56 UTC (rev 3859) @@ -218,7 +218,87 @@ year={1986} } + at inproceedings{Dryad, +title={Dryad: Distributed Data-Parallel Programs from Sequential Building Blocks}, +author={Michael Isard and Mihai Budiu and Yuan Yu and Andrew Birrell and Dennis Fetterly}, +booktitle={Proceedings of European Conference on Computer Systems (EuroSys)}, +month={Mar}, +year={2007} +} + at inproceedings{DryadLINQ, +title={{DryadLINQ}: A System for General-Purpose Distributed Data-Parallel Computing Using a High-Level Language}, +author={Yuan Yu and Michael Isard and Dennis Fetterly and Mihai Budiu and Ulfar Erlingsson and Pradeep Kumar Gunda and Jon Currey}, +booktitle={Proceedings of Symposium on Operating System Design and Implementation (OSDI)}, +month={Dec}, +year={2008} +} + + at article{GEL, + author = {Ching Lian, Chua and Tang, Francis and Issac, Praveen and Krishnan, Arun}, + title = {GEL: Grid execution language}, + journal = {J. Parallel Distrib. Comput.}, + volume = {65}, + issue = {7}, + month = {July}, + year = {2005}, + issn = {0743-7315}, + pages = {857--869}, + numpages = {13}, + url = {http://dx.doi.org/10.1016/j.jpdc.2005.03.002}, + doi = {http://dx.doi.org/10.1016/j.jpdc.2005.03.002}, + acmid = {1088525}, + publisher = {Academic Press, Inc.}, + address = {Orlando, FL, USA}, + keywords = {Grid application development, Grid computing, Grid programming, Workflows}, +} + + at inproceedings{DataFlowShell, + author = {Walker, Edward and Xu, Weijia and Chandar, Vinoth}, + title = {Composing and executing parallel data-flow graphs with shell pipes}, + booktitle = {Proceedings of the 4th Workshop on Workflows in Support of Large-Scale Science}, + series = {WORKS '09}, + year = {2009}, + isbn = {978-1-60558-717-2}, + location = {Portland, Oregon}, + pages = {11:1--11:10}, + articleno = {11}, + numpages = {10}, + url = {http://doi.acm.org/10.1145/1645164.1645175}, + doi = {http://doi.acm.org/10.1145/1645164.1645175}, + acmid = {1645175}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {coordination languages, data-flow processing, parallel processing}, +} + + at article{GXPmake, +author = {Kenjiro Taura and Takuya Matsuzaki and Makoto Miwa and Yoshikazu Kamoshida and Daisaku Yokoyama and Nan Dun and Takeshi Shibata and Choi Sung Jun and Jun'ichi Tsujii}, +title = {Design and Implementation of GXP Make -- A Workflow System Based on Make}, +journal ={IEEE International Conference on eScience}, +isbn = {978-0-7695-4290-4}, +year = {2010}, +pages = {214--221}, +doi = {http://doi.ieeecomputersociety.org/10.1109/eScience.2010.43}, +publisher = {IEEE Computer Society}, +address = {Los Alamitos, CA, USA}, +} + + at article {makeflow, + author = {Yu, Li and Moretti, Christopher and Thrasher, Andrew and Emrich, Scott and Judd, Kenneth and Thain, Douglas}, + affiliation = {University of Notre Dame Department of Computer Science and Engineering South Bend USA}, + title = {Harnessing parallelism in multicore clusters with the All-Pairs, Wavefront, and Makeflow abstractions}, + journal = {Cluster Computing}, + publisher = {Springer Netherlands}, + issn = {1386-7857}, + keyword = {Computer Science}, + pages = {243-256}, + volume = {13}, + issue = {3}, + url = {http://dx.doi.org/10.1007/s10586-010-0134-7}, + note = {10.1007/s10586-010-0134-7}, + year = {2010} +} % Items below are from an older paper - retain for the moment in case any are useful here @article{condor-g, Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-05 21:39:39 UTC (rev 3858) +++ text/parco10submission/paper.tex 2011-01-05 21:42:56 UTC (rev 3859) @@ -1662,10 +1662,9 @@ \end{verbatim} -\section{Comparison to Other Systems} +\section{Related Work} \label{Related} -\katznote{I would change the section name to ``related work''} %% As a ``parallel scripting language'', Swift is typically used to %% specify and execute scientific ``workflows'' - which we define here as %% the execution of a series of steps to perform larger domain-specific @@ -1774,7 +1773,30 @@ the knowledge of the whole workflow graph, while in Swift, the structure of a workflow is constructed and expanded dynamically. -Swift integrates with the CoG Karajan workflow engine. Karajan +Drayd~\cite{Dryad} is an infrastructure for running data-parallel programs on a parallel or distributed system. In addition to allowing files to be used for passing data between +tasks (like Swift), it also allows TCP pipes and shared memory FIFOs to be used. +Dryad tasks are written in C++, while Swift tasks can be written in any language. +Dryad graphs are explicitly developed by the programmer; Swift graphs are implicit and the programmer doesn't worry about them. A tool called Nebula was originally developed +above Dryad, but it doesn't seem to be supported currently. It appears to have been +used for clusters and well-connected groups of clusters in a single administrative domain, +unlike Swift supports a wider variety of platforms. Also related is DryadLINQ~\cite{DryadLINQ}, +which generates Dryad computations from the LINQ extensions to C\#. + +GEL~\cite{GEL} is somewhat similar to Swift. It defines programs to be run, then +uses a script to express the order in which they should be run, handling the needed +data movement and job execution for the user. The user explicitly +states what is parallel and what is not, unlike Swift, which determines this +based on data dependencies. + +Walker et al.~\cite{DataFlowShell} have recently been developing extensions to +BASH that allow a user to define a dataflow graph, including the concepts +of fork, join, cycles, and key-value aggregation, but just on a single parallel system. + +A few groups have been working on parallel and distributed versions of make~\cite{GXPmake, makeflow}. These tools use the concept of virtual data, where the user defines the processing by which data is created, then calls for the final data product. The make-like tools determine what processing is needed to get from the existing files to the final product, which includes +running processing tasks. If this is run on a distributed system, data movement also must +be handled by the tools. \katznote{Need to say something about Swift in comparison} + +Swift integrates with the CoG Karajan workflow engine~\cite{Karajan}. Karajan provides the libraries and primitives for job scheduling, data transfer, and grid job submission; Swift adds support for high-level abstract specification of large parallel computations, data @@ -1782,14 +1804,6 @@ grid sites, and (via Falkon~\cite{Falkon_2008} and CoG coasters) \katznote{need to talk about what CoG coasters is vs coasters as previously introduced, or clear up the fact that the previous ``coasters'' didn't talk about CoG.} fast job execution. -\katznote{Re: Dryad -Dryad allows TCP pipes and shared memory FIFOs for passing data between tasks, unlike Swift. -(Dryad also allows files-just pointing out a difference). -Dryad tasks are written in C++ (but not required?) It also looks more like a component model in some ways. -Dryad graphs are explicitly developed by the programmer; Swift grafts are implicit and the programmer doesn't worry about them -Nebula on top of Dryad looks much more similar to Swift (I don't really know anything about Nebula - is it still supported? And how is it related to LINQ) -Is there something about the systems that Dryad is meant for vs those that Swift is meant for? (i.e. Dryad is optimized for clusters and well-connected groups of clusters in a single administrative domain) -} \section{Future work} From noreply at svn.ci.uchicago.edu Wed Jan 5 16:00:52 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 16:00:52 -0600 (CST) Subject: [Swift-commit] r3860 - text/parco10submission Message-ID: <20110105220052.06F9BFC41@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-05 16:00:51 -0600 (Wed, 05 Jan 2011) New Revision: 3860 Modified: text/parco10submission/paper.tex Log: small addition in Section 7 Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-05 21:42:56 UTC (rev 3859) +++ text/parco10submission/paper.tex 2011-01-05 22:00:51 UTC (rev 3860) @@ -1794,7 +1794,10 @@ A few groups have been working on parallel and distributed versions of make~\cite{GXPmake, makeflow}. These tools use the concept of virtual data, where the user defines the processing by which data is created, then calls for the final data product. The make-like tools determine what processing is needed to get from the existing files to the final product, which includes running processing tasks. If this is run on a distributed system, data movement also must -be handled by the tools. \katznote{Need to say something about Swift in comparison} +be handled by the tools. In comparison, Swift is a language, which may be slightly +less compact for describing applications that can be represented as static DAGs, but +also allows easy programming of applications that have cycles and runtime decisions, +such as in optimization problems. Swift integrates with the CoG Karajan workflow engine~\cite{Karajan}. Karajan provides the libraries and primitives for job scheduling, data From noreply at svn.ci.uchicago.edu Wed Jan 5 17:15:41 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 17:15:41 -0600 (CST) Subject: [Swift-commit] r3861 - text/parco10submission Message-ID: <20110105231541.89D96FC41@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-05 17:15:41 -0600 (Wed, 05 Jan 2011) New Revision: 3861 Added: text/parco10submission/plots/ Log: Adding From noreply at svn.ci.uchicago.edu Wed Jan 5 17:22:00 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 17:22:00 -0600 (CST) Subject: [Swift-commit] r3862 - text/parco10submission Message-ID: <20110105232200.78240FC41@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-05 17:22:00 -0600 (Wed, 05 Jan 2011) New Revision: 3862 Modified: text/parco10submission/paper.tex Log: Stake out Performance section Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-05 23:15:41 UTC (rev 3861) +++ text/parco10submission/paper.tex 2011-01-05 23:22:00 UTC (rev 3862) @@ -69,7 +69,7 @@ \begin{abstract} -The work of scientists, engineers and statisticians often requires executing domain-specific application programs a +The work of scientists, engineers and statisticians often requires executing domain-specific application programs a massive number of times on large collections of file-based data. This process requires complex data management to pass data to, from, and between application invocations. Distributed and parallel computing resources can greatly speed up such processing, but their use increases the complexity of the programming effort and presents new barriers. The Swift parallel scripting language reduces these complexities with a @@ -102,14 +102,14 @@ %It is intended to serve as a higher level framework for composing the interaction of %concurrently executing programs (even parallel ones) and scripts %written in other scripting languages. -Swift %is a higher-level language that +Swift %is a higher-level language that focuses not on the details of executing sequences or pipelines of scripts and programs (even parallel ones), but rather on the issues that arise from the concurrent execution, composition, and coordination of many independent computational tasks at large scale. % Swift scripts express the execution of programs to produce datasets using a C-like syntax -consisting of function definitions and expressions, but with +consisting of function definitions and expressions, but with dataflow-driven semantics and implicit parallelism. %The emergence of large-scale production computing infrastructure such @@ -161,7 +161,7 @@ varies with time. In order to take advantage of as many processing units as possible during the execution of a Swift program, it is necessary to be flexible in the way the execution of individual processes is -parallelized. +parallelized. Swift exploits the maximal concurrency permitted by data dependencies and by resource availability. %keep: discuss kthread/function duality; dont confuse with the parameter issue? @@ -169,7 +169,7 @@ % % Mihael thinks that the graph of function calls is not quite appropriate since this may % project an overly simplified view. Swift allows -% this graph to be built dynamically. For that matter, any parallel program will end up +% this graph to be built dynamically. For that matter, any parallel program will end up % having a trace that can be represented as a graph. And any purely functional program % will have a graph representing dependencies (and again, that graph may be a non-trivial % thing built at run-time). It so happens that, using futures, we make the dependency @@ -192,7 +192,7 @@ implementations of evaluation strategies different from the widespread eager evaluation, as seen in lazily evaluated languages such as Haskell~\cite{Haskell}. - + In order to achieve automatic parallelization, Swift is based on the synchronization construct of \emph{futures}~\cite{Futures}, which can result in abundant parallelism. Every Swift variable (including every members of structures and arrays) is a future. @@ -202,7 +202,7 @@ % Mihael thinks it's more powerful to say that dependency analysis % is a complex issue for any non-functional language. It is not that -% some swift constraints make dependency analysis complex, but +% some swift constraints make dependency analysis complex, but % the very idea of dependency analysis when allowing side-effects % is complex. @@ -251,7 +251,7 @@ %keep: how best to state process/function duality? Each invocation of a function is a process; all functions run in parallel; foreach loops are unfolded and run in parallel; essentially the entire program is unfolded. (Note: iterate stops this behavior and is thus useful; address scalability issues of this and future graph partitioning; how throttling keeps this manageable. % re iterate. foreach can also stop this behavior if there are inter-iteration dependencies -% +% %This duality allows the formal specification of process behavior. In the following Swift statement, the semantics are defined in terms of the specification of the function %``rotate'' when supplied with specific parameter types: @@ -264,12 +264,12 @@ % and whether the % implementation can be described as a ``library call'' or a ``program % invocation'' changes nothing with respect to what the piece of program -% fundamentally does: produce a rotated version of the original. +% fundamentally does: produce a rotated version of the original. %Indeed, there is no strict requirement in the specification of the Swift %language dictating that functions be implemented as command-line %applications. They can equally consist of library calls or functions -%written in Swift itself, as long as they are side-effect free. +%written in Swift itself, as long as they are side-effect free. %A soft restriction arises from the desire to distribute the execution of %functions across a collection of heterogeneous resources, which, with @@ -308,7 +308,7 @@ processes. Under the assumption that eager evaluation of compositions of Monte Carlo processes also produces valid results, an eager Swift implementation (which is the case with the current implementation) -readily accommodates Monte Carlo processes. +readily accommodates Monte Carlo processes. However, further discussion is necessary if optimizations (such as memoization) are employed. @@ -338,14 +338,14 @@ \subsection{Language facilities} -At the core of the Swift language are function definitions, of which +At the core of the Swift language are function definitions, of which two types exist: \begin{description} \item[External functions] (also called ``atomic'') are functions whose implementations are not written in Swift. Currently external functions are implemented as command-line applications\footnote{Note that some Swift scripts are specified as library calls.}. -\item[Internal functions] (also called ``compound'') are functions +\item[Internal functions] (also called ``compound'') are functions implemented in Swift. \end{description} @@ -360,7 +360,7 @@ } %%% -The \emph{if} and \emph{switch} statements are rather standard, but +The \emph{if} and \emph{switch} statements are rather standard, but \emph{foreach} merits more discussion. Similar to \emph{Go}~\cite{GOLANG} and \emph{Python}, its control ``variables'' can be both an index and a value. The syntax is as follows: @@ -371,7 +371,7 @@ } \end{verbatim} -This is necessary because Swift does not allow the use of mutable state +This is necessary because Swift does not allow the use of mutable state (i.e., variables are single-assignment). Therefore, one is not able to write statements such as \verb|i = i + 1|. @@ -383,7 +383,7 @@ provided by the Swift runtime. Standard operators are defined for primitive types, such as addition, multiplication, concatenation, etc. -\item[Mapped types] are types of data for which some external +\item[Mapped types] are types of data for which some external implementation exists. Swift provides a mechanism to describe isomorphisms between instances of Swift data structures and subsets in the external implementation. This mechanism is called ``mapping'' and @@ -846,7 +846,7 @@ component program atomicity on data output. \katznote{this previous sentence -has a lot of stuff that hasn't been defined, and the next one is equally confusing at this point in the paper.} +has a lot of stuff that hasn't been defined, and the next one is equally confusing at this point in the paper.} This can add substantial responsibility to component programs, in exchange for allowing arbitrary @@ -1115,10 +1115,10 @@ will fail, ultimately resulting in the entire script failing. In such a case, Swift provides a \emph{restart log} that encapsulates -which function invocations have been successfully completed. +which function invocations have been successfully completed. %%%%%% What manual interv. and why??? After -appropriate manual intervention, +appropriate manual intervention, a subsequent Swift run may be started with this restart log; this will avoid re-execution of already executed invocations. @@ -1190,7 +1190,7 @@ Using Swift to submit to a large number of sites poses a number of practical challenges that are not encountered when running on a small number of sites. These challenges are seen when comparing execution on -the relatively static TeraGrid~\cite{TeraGrid_2005} with execution on the +the relatively static TeraGrid~\cite{TeraGrid_2005} with execution on the more dynamic Open Science Grid (OSG)~\cite{OSG_2007}, where the set of sites that may be used is large and changing. It is impractical to maintain a site catalog by @@ -1662,6 +1662,11 @@ \end{verbatim} +\section{Performance Characteristics} +\label{Performance} + + + \section{Related Work} \label{Related} @@ -1720,7 +1725,7 @@ \begin{itemize} \item Programming model: MapReduce only supports key-value pairs as - input or output datasets and two types of computation functions, + input or output datasets and two types of computation functions, map and reduce; Swift provides a type system and allows the definition of complex data structures and arbitrary computational procedures. @@ -1780,7 +1785,7 @@ above Dryad, but it doesn't seem to be supported currently. It appears to have been used for clusters and well-connected groups of clusters in a single administrative domain, unlike Swift supports a wider variety of platforms. Also related is DryadLINQ~\cite{DryadLINQ}, -which generates Dryad computations from the LINQ extensions to C\#. +which generates Dryad computations from the LINQ extensions to C\#. GEL~\cite{GEL} is somewhat similar to Swift. It defines programs to be run, then uses a script to express the order in which they should be run, handling the needed @@ -1795,7 +1800,7 @@ A few groups have been working on parallel and distributed versions of make~\cite{GXPmake, makeflow}. These tools use the concept of virtual data, where the user defines the processing by which data is created, then calls for the final data product. The make-like tools determine what processing is needed to get from the existing files to the final product, which includes running processing tasks. If this is run on a distributed system, data movement also must be handled by the tools. In comparison, Swift is a language, which may be slightly -less compact for describing applications that can be represented as static DAGs, but +less compact for describing applications that can be represented as static DAGs, but also allows easy programming of applications that have cycles and runtime decisions, such as in optimization problems. @@ -1839,7 +1844,7 @@ scheduling Coasters workers using the standard job submission techniques and employing an internal IP network. -\mikenote{In order to achieve automatic parallelization in Swift, instead of using thunks (i.e., suspended computations), which yield lazy +\mikenote{In order to achieve automatic parallelization in Swift, instead of using thunks (i.e., suspended computations), which yield lazy evaluation, we employ futures, which result in eager parallelism. In this process, we trade the ability to efficiently deal with infinite structures for the ability to minimize computation time. It must, however, be noted that a middle ground exists: lazy futures (futures whose computation is delayed until a value is first needed).} \subsection{Filesystem access optimizations} From noreply at svn.ci.uchicago.edu Wed Jan 5 17:53:30 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 17:53:30 -0600 (CST) Subject: [Swift-commit] r3864 - text/parco10submission Message-ID: <20110105235330.1C5BA9CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-05 17:53:29 -0600 (Wed, 05 Jan 2011) New Revision: 3864 Modified: text/parco10submission/paper.pdf text/parco10submission/paper.tex Log: First plot: sleep 10 jobs Modified: text/parco10submission/paper.pdf =================================================================== (Binary files differ) Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-05 23:53:12 UTC (rev 3863) +++ text/parco10submission/paper.tex 2011-01-05 23:53:29 UTC (rev 3864) @@ -1665,6 +1665,13 @@ \section{Performance Characteristics} \label{Performance} +\begin{figure*}[htbp] + \begin{center} + \includegraphics[scale=0.50]{plots/sleep} + \caption{Node utilization for {\tt sleep} tasks} + \label{PlotSleep} + \end{center} +\end{figure*} \section{Related Work} From noreply at svn.ci.uchicago.edu Wed Jan 5 17:59:34 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 17:59:34 -0600 (CST) Subject: [Swift-commit] r3865 - text/parco10submission Message-ID: <20110105235934.448CE9CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-05 17:59:34 -0600 (Wed, 05 Jan 2011) New Revision: 3865 Removed: text/parco10submission/paper.pdf Log: Removed - putting in the pdf was a bad idea, per Dan. Deleted: text/parco10submission/paper.pdf =================================================================== (Binary files differ) From noreply at svn.ci.uchicago.edu Wed Jan 5 18:12:04 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 18:12:04 -0600 (CST) Subject: [Swift-commit] r3866 - text/parco10submission Message-ID: <20110106001204.BBBEA9CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-05 18:12:04 -0600 (Wed, 05 Jan 2011) New Revision: 3866 Modified: text/parco10submission/paper.tex Log: Revised Language section intro and Data Model subsection. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-05 23:59:34 UTC (rev 3865) +++ text/parco10submission/paper.tex 2011-01-06 00:12:04 UTC (rev 3866) @@ -334,18 +334,69 @@ \section{The Swift language} \label{Language} -%%% \begin{msection} +Swift is by design a sparse, minimal scripting +language which executes external programs remotely and in parallel. +As such, Swift has only a very limited set of data +types, operators, and built-in functions. +Its simple, uniform data model is composed of a few atomic types (which can be simple scalar values or references to external files) and two collection types (arrays and structures). -\subsection{Language facilities} +Swift expresses the +invocation of ``ordinary programs''---technically, POSIX {\tt exec()} +operations---in a manner that explicitly specifies the files and command-line +arguments that are the inputs of each program +invocation. It similarly expresses all output files that results from the programs. +This enables Swift to provide distributed, location-independent execution of external application programs. -At the core of the Swift language are function definitions, of which +The Swift parallel execution model is based on two concepts that are applied uniformly throughout the language. First, every Swift data element behaves like a \emph{future}. By ``data element'', we mean both the named variables within a function's environment, such as its local variables, parameters, and returns, and the individual elements of array and structure collections. Second, every expression in a Swift program is conceptually executed in parallel. Expressions (including function evaluations) wait for input values when they are required, and then set their result values as their computation proceeds. These fundamental concepts are discussed in more detail below. + +% can be thought of as a massively-parallel lazy (ie, on-demand, or just in time) evaluation - say later on? + +\subsection{Data model} + +Every data object in Swift is built up from atomic data elements which contain three fields: a value, a state, and a queue of function invocations that are waiting for the value to be set. + +Variables are used in Swift to name the local variables, arguments, and returns of a function. Every Swift variable is assigned a concrete data type, based on a very simple type model (with no concepts of inheritance, abstraction, etc). The outermost function in a Swift (akin to ``main'' in C) is only unique in that the variables in its environment can be declared ``global'' to make them accessible to every other function in the script. + +Swift provides three basic classes of data types: + +\emph{Primitive types} are provided for integer, float, string, and boolean values by the Swift runtime. Common operators are defined for +primitive types, such as arithmetic, concatenation, explicit conversion, etc. +An additional primitive type ``external'' is provided for manual synchronization. + +\emph{Mapped types} are data elements that refer, through a process called``mapping'' to files external to the Swift script. These are the files that will be read and written by the external application programs called by Swift. +The mapping process can map single variables to single files, and structures and arrays to collections of files. + +Primitive and mapped types are called \emph{atomic types}. + +\hide{Swift mapped types can be +seen as generalizations of reference types in traditional languages in +that reference types are language representations of data stored in +internal memory, in contrast with primitive (value) types for which no +explicit storage is generally specified.} + +\hide{There is no syntactic distinction between primitive types and fileRef +types, and the semantic differences between the two classes of +types are minimal.} +% clarify minimal + +\hide{Atomic mapped types do not specify any information about the structure of +the data. It is up to the user to assign a ``proper'' type to external +data. Consequently Swift must and does implement nominal type equivalence.} + +\emph{Collection types} are provided in Swift by \emph{arrays} and \emph{structures}. +Structure fields can be of any type, while arrays contain only uniform values of a single type. Both types of collections can contain members of atomic or collection types. Structures contain a finite number of elements. Arrays contain a varying number of elements. Structures and arrays can both recursively contain each other in addition to atomic values. + +Due to the dynamic nature of the execution model, Swift arrays have no notion of size. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. + +\subsection{Functions - Mihael} + +At the core of the Swift language are function definitions, of which two types exist: \begin{description} \item[External functions] (also called ``atomic'') are functions whose implementations are not written in Swift. Currently external functions -are implemented as command-line applications\footnote{Note that some Swift scripts are specified as library calls.}. - -\item[Internal functions] (also called ``compound'') are functions +are implemented as command-line applications. +\item[Internal functions] (also called ``compound'') are functions implemented in Swift. \end{description} @@ -360,9 +411,9 @@ } %%% -The \emph{if} and \emph{switch} statements are rather standard, but -\emph{foreach} merits more discussion. Similar to \emph{Go}~\cite{GOLANG} -and \emph{Python}, its control ``variables'' can be both +The \emph{if} and \emph{switch} statements are rather standard, but +\emph{foreach} merits more discussion. Similar to \emph{Go} +(\cite{GOLANG}) and \emph{Python}, its control ``variables'' can be both an index and a value. The syntax is as follows: \begin{verbatim} @@ -371,103 +422,13 @@ } \end{verbatim} -This is necessary because Swift does not allow the use of mutable state -(i.e., variables are single-assignment). Therefore, one is not able +This is necessary because Swift does not allow the use of mutable state +(i.e., variables are single-assignment), therefore one would not be able to write statements such as \verb|i = i + 1|. -\subsection{Data model} -Swift provides two basic classes of data types: -\begin{description} -\item[Primitive types] (\emph{integer}, \emph{string}) are types -provided by the Swift runtime. Standard operators are defined for -primitive types, such as addition, multiplication, concatenation, etc. - -\item[Mapped types] are types of data for which some external -implementation exists. Swift provides a mechanism to describe -isomorphisms between instances of Swift data structures and subsets in -the external implementation. This mechanism is called ``mapping'' and -specific instances of isomorphisms are called ``mappers''. Currently the -only external implementation is a POSIX-like filesystem. - -However the -``external'' data type can be used to accommodate any external data that -Swift cannot and should not directly handle. - -Swift mapped types can be -seen as generalizations of reference types in traditional languages in -that reference types are language representations of data stored in -internal memory, in contrast with primitive (value) types for which no -explicit storage is generally specified. -\end{description} - -There is no syntactic distinction between primitive types and fileRef -types, and the semantic differences between the two classes of -types are minimal. -% clarify minimal - -Data can be aggregated using two ``composite types'': \emph{arrays} and \emph{structures}. -This can be done recursively in that arrays of arrays, structures -containing structures, arrays of structures and structures -containing arrays can be created. Types that have no internal -structure (i.e. scalar and hence non-composite types) are called ``atomic types''. - -Atomic mapped types do not specify any information about the structure of -the data. It is up to the user to assign a ``proper'' type to external -data. Consequently Swift must and does implement nominal type equivalence. - -%%% \end{msection} - -The Swift programming model is data-oriented: it encapsulates the -invocation of ``ordinary programs''---technically, POSIX {\tt exec()} -operations---in a manner that explicitly specifies the files and other -arguments that are the inputs and outputs of each program -invocation. This formal but simple model enables Swift to provide -several critical characteristics not provided by, nor readily -implemented in, existing scripting languages such as Perl, Python, or -shells. -% mention that python comes close with futures and decorators. -Notable features include: - -\begin{itemize} - -\item Location transparent execution: automatically selection of a - location for each program invocation and management of diverse execution - environments. A Swift script can be tested on a single local - workstation, and then the same script can be executed on a cluster, one - or more grids of clusters, and/or on large scale parallel - supercomputers such as the Sun - Constellation~\cite{SunConstellation_2008} or the IBM Blue - Gene/P~\cite{BGP_2008}. - -\item Automatic parallelization of program invocations: parallel invocation - of programs that have no data dependencies. - -\item Automatic balancing of work over available resources, based -on adaptive algorithms that account for both resource performance -and reliability, and that throttle program invocations at a rate -appropriate for each execution location and mechanism. - -\item Reliability, through replication and automatic resubmission of - failed executions and restarting of interrupted scripts from the point - of failure. - -\item Formalizing the creation and management of data objects in the - language and recording the provenance of data objects produced by a - Swift script. - -\end{itemize} - -Swift is intentionally designed to be a sparse, minimal scripting -language. Its sole purpose is to sequence and schedule the execution -of other programs. As such, Swift has only a very limited set of data -types, operators, and built-in functions. The essence of the Swift -language, which makes the benefits above possible, can be summarized -as follows: - \subsection{Language basics} - A Swift script describes data, application components, invocations of applications components, and the inter-relations (data flow) between those invocations, using a C-like syntax. @@ -846,7 +807,7 @@ component program atomicity on data output. \katznote{this previous sentence -has a lot of stuff that hasn't been defined, and the next one is equally confusing at this point in the paper.} +has a lot of stuff that hasn't been defined, and the next one is equally confusing at this point in the paper.} This can add substantial responsibility to component programs, in exchange for allowing arbitrary @@ -944,9 +905,43 @@ structured Swift variable, can represent a large, structured data set. -\subsection{The execution environment for component programs} +\subsection{Swift runtime environment} + \label{LanguageEnvironment} +Notable runtime features include: + +\begin{itemize} + +\item Location transparent execution: automatically selection of a + location for each program invocation and management of diverse execution + environments. A Swift script can be tested on a single local + workstation, and then the same script can be executed on a cluster, one + or more grids of clusters, and/or on large scale parallel + supercomputers such as the Sun + Constellation~\cite{SunConstellation_2008} or the IBM Blue + Gene/P~\cite{BGP_2008}. + +\item Automatic parallelization of program invocations: parallel invocation + of programs that have no data dependencies. + +\item Automatic balancing of work over available resources, based +on adaptive algorithms that account for both resource performance +and reliability, and that throttle program invocations at a rate +appropriate for each execution location and mechanism. + +\item Reliability, through replication and automatic resubmission of + failed executions and restarting of interrupted scripts from the point + of failure. + +\item Formalizing the creation and management of data objects in the + language and recording the provenance of data objects produced by a + Swift script. + +\end{itemize} + + + A Swift \verb|app| declaration describes how a component program is invoked. In order to ensure the correctness of the Swift model, the environment in which programs are executed needs to be constrained. @@ -1000,7 +995,7 @@ \section{Execution} \label{Execution} -Swift is implemented by compiling to a Karajan program~\cite{Karajan}, which provides +Swift is implemented by compiling to a Karajan program\cite{Karajan}, which provides several benefits: a lightweight threading model, futures, remote job execution, @@ -1115,10 +1110,10 @@ will fail, ultimately resulting in the entire script failing. In such a case, Swift provides a \emph{restart log} that encapsulates -which function invocations have been successfully completed. +which function invocations have been successfully completed. %%%%%% What manual interv. and why??? After -appropriate manual intervention, +appropriate manual intervention, a subsequent Swift run may be started with this restart log; this will avoid re-execution of already executed invocations. @@ -1190,7 +1185,7 @@ Using Swift to submit to a large number of sites poses a number of practical challenges that are not encountered when running on a small number of sites. These challenges are seen when comparing execution on -the relatively static TeraGrid~\cite{TeraGrid_2005} with execution on the +the relatively static TeraGrid~\cite{TeraGrid_2005} with execution on the more dynamic Open Science Grid (OSG)~\cite{OSG_2007}, where the set of sites that may be used is large and changing. It is impractical to maintain a site catalog by @@ -1791,7 +1786,7 @@ Dryad graphs are explicitly developed by the programmer; Swift graphs are implicit and the programmer doesn't worry about them. A tool called Nebula was originally developed above Dryad, but it doesn't seem to be supported currently. It appears to have been used for clusters and well-connected groups of clusters in a single administrative domain, -unlike Swift supports a wider variety of platforms. Also related is DryadLINQ~\cite{DryadLINQ}, +while Swift supports a wider variety of platforms. Also related is DryadLINQ~\cite{DryadLINQ}, which generates Dryad computations from the LINQ extensions to C\#. GEL~\cite{GEL} is somewhat similar to Swift. It defines programs to be run, then From noreply at svn.ci.uchicago.edu Wed Jan 5 19:10:54 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 19:10:54 -0600 (CST) Subject: [Swift-commit] r3867 - text/parco10submission Message-ID: <20110106011054.997B39CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-05 19:10:54 -0600 (Wed, 05 Jan 2011) New Revision: 3867 Modified: text/parco10submission/paper.tex Log: edits to initial subsections of Language section. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 00:12:04 UTC (rev 3866) +++ text/parco10submission/paper.tex 2011-01-06 01:10:54 UTC (rev 3867) @@ -340,11 +340,19 @@ types, operators, and built-in functions. Its simple, uniform data model is composed of a few atomic types (which can be simple scalar values or references to external files) and two collection types (arrays and structures). -Swift expresses the -invocation of ``ordinary programs''---technically, POSIX {\tt exec()} -operations---in a manner that explicitly specifies the files and command-line +A Swift script describes data, application components, invocations +of applications components, and the inter-relations (data flow) +between those invocations, using a C-like syntax. +Swift scripts are written as a set of functions, composed upwards, +starting with \emph{atomic functions} that specify the execution of +external programs, and then higher level functions are composed as +pipelines (or more generally, graphs) of sub-functions. + +Unlike most other scripting languages, Swift expresses +invocations of ``ordinary programs''---technically, POSIX {\tt exec()} +operations---in a manner that explicitly declares the files and command-line arguments that are the inputs of each program -invocation. It similarly expresses all output files that results from the programs. +invocation. Swift scripts similarly declare all output files that results from program invocations. This enables Swift to provide distributed, location-independent execution of external application programs. The Swift parallel execution model is based on two concepts that are applied uniformly throughout the language. First, every Swift data element behaves like a \emph{future}. By ``data element'', we mean both the named variables within a function's environment, such as its local variables, parameters, and returns, and the individual elements of array and structure collections. Second, every expression in a Swift program is conceptually executed in parallel. Expressions (including function evaluations) wait for input values when they are required, and then set their result values as their computation proceeds. These fundamental concepts are discussed in more detail below. @@ -384,32 +392,36 @@ data. Consequently Swift must and does implement nominal type equivalence.} \emph{Collection types} are provided in Swift by \emph{arrays} and \emph{structures}. -Structure fields can be of any type, while arrays contain only uniform values of a single type. Both types of collections can contain members of atomic or collection types. Structures contain a finite number of elements. Arrays contain a varying number of elements. Structures and arrays can both recursively contain each other in addition to atomic values. +Structure fields can be of any type, while arrays contain only uniform values of a single type. Both types of collections can contain members of atomic or collection types. Structures contain a finite number of elements. Arrays contain a varying number of elements. Structures and arrays can both recursively reference other structures and arrays in addition to atomic values. -Due to the dynamic nature of the execution model, Swift arrays have no notion of size. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. +Due to the dynamic, highly parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. Also, since all data elements have single-assignment semantics, no garbage collection issues arise. -\subsection{Functions - Mihael} +\subsection{Execution model} -At the core of the Swift language are function definitions, of which -two types exist: -\begin{description} -\item[External functions] (also called ``atomic'') are functions whose +Swift has three types of functions: + +\emph{Built-in functions} are defined in the Java code of the Swift runtime system, and perform various utility functions. + +\emph{Atomic functions}are functions whose implementations are not written in Swift. Currently external functions -are implemented as command-line applications. -\item[Internal functions] (also called ``compound'') are functions -implemented in Swift. -\end{description} +are implemented as command-line applications or built-in functions defined in Java. -In addition to functions, the Swift language provides conditional +Application wrapper functions (declared using the app keyword) +specify the interface (input files and parameters, and output files) of application programs in +terms of files and other parameters. + +\emph{Compound functions} are functions +that call atomic and other compound +functions. + +In addition to function invocation, the Swift language provides conditional execution through the \emph{if} and \emph{switch} statements as well as a \emph{foreach} construct used for iterating over arrays of data. -%%% -\hide{{\color{red} Note: I'm skipping \emph{iterate} on purpose. We should +\hide{Mihael: Note: I'm skipping \emph{iterate} on purpose. We should deprecate it since it's hard to understand and everything that can be -done with it can also be done with \emph{foreach}} +done with it can also be done with \emph{foreach} } -%%% The \emph{if} and \emph{switch} statements are rather standard, but \emph{foreach} merits more discussion. Similar to \emph{Go} @@ -426,21 +438,6 @@ (i.e., variables are single-assignment), therefore one would not be able to write statements such as \verb|i = i + 1|. - -\subsection{Language basics} - -A Swift script describes data, application components, invocations -of applications components, and the inter-relations (data flow) -between those invocations, using a C-like syntax. -Swift scripts are written as a set of functions, composed upwards, -starting with \emph{atomic functions} that specify the execution of -external programs, and then higher level functions are composed as -pipelines (or more generally, graphs) of sub-functions. Atomic -functions specify the inputs and outputs of application programs in -terms of files and other parameters. Compound functions are composed -into a conceptual graph of calls to atomic and other compound -functions. - Swift variables hold either primitive values, files, or collections of files. All variables are \emph{single-assignment} (meaning that they must be assigned to exactly one value during execution), @@ -478,13 +475,17 @@ are similar in most respects to structure types in other languages. One array type is provided for every atomic type (integer, string, boolean, and fileRef). +%%% ^^^ fileref, type issues. Arrays use numeric indices, but are sparse. Arrays can be nested to provide multi-dimensional indexing. We often refer to instances of composites of mapped types as \emph{datasets}. + %\katznote{maybe a little figure here?} +\subsection{Mapping files to data elements} + Mapped type and composite type variable declarations can be annotated with a \emph{mapping} descriptor that indicates the file(s) that make(s) up that \emph{dataset}. For example, the following line declares a variable named \verb|photo| of From noreply at svn.ci.uchicago.edu Wed Jan 5 23:31:35 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 5 Jan 2011 23:31:35 -0600 (CST) Subject: [Swift-commit] r3868 - text/parco10submission Message-ID: <20110106053135.636A59CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-05 23:31:34 -0600 (Wed, 05 Jan 2011) New Revision: 3868 Modified: text/parco10submission/paper.tex Log: Many edits to the Language section. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 01:10:54 UTC (rev 3867) +++ text/parco10submission/paper.tex 2011-01-06 05:31:34 UTC (rev 3868) @@ -365,6 +365,14 @@ Variables are used in Swift to name the local variables, arguments, and returns of a function. Every Swift variable is assigned a concrete data type, based on a very simple type model (with no concepts of inheritance, abstraction, etc). The outermost function in a Swift (akin to ``main'' in C) is only unique in that the variables in its environment can be declared ``global'' to make them accessible to every other function in the script. +Swift data elements (atomic variables and array elements) are \emph{single-assignment}: +they behave as futures and can be assigned at most one value during execution. +This semantic provides the +basis for Swift's model of parallel function evaluation and chaining. +While Swift arrays and structures are not +single-assignment, each of their elements are. + +Each variables in a Swift script is declared to be of a specific (single) type. Swift provides three basic classes of data types: \emph{Primitive types} are provided for integer, float, string, and boolean values by the Swift runtime. Common operators are defined for @@ -373,78 +381,85 @@ \emph{Mapped types} are data elements that refer, through a process called``mapping'' to files external to the Swift script. These are the files that will be read and written by the external application programs called by Swift. The mapping process can map single variables to single files, and structures and arrays to collections of files. - Primitive and mapped types are called \emph{atomic types}. -\hide{Swift mapped types can be -seen as generalizations of reference types in traditional languages in -that reference types are language representations of data stored in -internal memory, in contrast with primitive (value) types for which no -explicit storage is generally specified.} +\emph{Collection types} are provided in Swift by \emph{arrays} and \emph{structures}. +Structure fields can be of any type, while arrays contain only uniform values of a single type. One +array type is provided for every atomic type (integer, string, boolean, and file reference). +Arrays use numeric +indices, but are sparse. +Both types of collections can contain members of atomic or collection types. Structures contain a finite number of elements. Arrays contain a varying number of elements. Structures and arrays can both recursively reference other structures and arrays in addition to atomic values. Arrays can be nested to provide multi-dimensional indexing. -\hide{There is no syntactic distinction between primitive types and fileRef -types, and the semantic differences between the two classes of -types are minimal.} -% clarify minimal +Due to the dynamic, highly parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. Also, since all data elements have single-assignment semantics, no garbage collection issues arise. -\hide{Atomic mapped types do not specify any information about the structure of -the data. It is up to the user to assign a ``proper'' type to external -data. Consequently Swift must and does implement nominal type equivalence.} +Variables that are declared to be file references +are associated with a \emph{mapper} which defines (often through a dynamic lookup process) the +data files that are to be mapped to the variable. Array and structure elements that are declared to be file references are similarly mapped. -\emph{Collection types} are provided in Swift by \emph{arrays} and \emph{structures}. -Structure fields can be of any type, while arrays contain only uniform values of a single type. Both types of collections can contain members of atomic or collection types. Structures contain a finite number of elements. Arrays contain a varying number of elements. Structures and arrays can both recursively reference other structures and arrays in addition to atomic values. +Mapped type and composite type variable declarations can be annotated with a +\emph{mapping} descriptor that specify the file(s) that are to be mapped to the Swift data element(s). -Due to the dynamic, highly parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. Also, since all data elements have single-assignment semantics, no garbage collection issues arise. +For example, the following line declares a variable named \verb|photo| of +type \verb|image|. Since image is a fileRef type, it additionally declares that the +variable refers to a single file named \verb|shane.jpeg| -\subsection{Execution model} +\begin{verbatim} + image photo <"shane.jpeg">; +\end{verbatim} -Swift has three types of functions: +We can declare {\tt image} to be an \emph{external file type}: -\emph{Built-in functions} are defined in the Java code of the Swift runtime system, and perform various utility functions. +\begin{verbatim} + type image {}; +\end{verbatim} -\emph{Atomic functions}are functions whose -implementations are not written in Swift. Currently external functions -are implemented as command-line applications or built-in functions defined in Java. +The notation \verb|{}| indicates +that the type represents a reference to a single \emph{opaque} +file --- i.e., a reference to an external object whose structure is opaque to the Swift script. For convenience such type declarations typically use the equivalent shorthand \verb|type image;| (which new users find confusing but which has become a Swift idiom). -Application wrapper functions (declared using the app keyword) -specify the interface (input files and parameters, and output files) of application programs in -terms of files and other parameters. +Mapped type variable declarations can be specified with a +\emph{mapping} descriptor enclosed in \verb|<>| that indicates the file to be mapped to the variable. +For example, the following line declares a variable named \verb|photo| of +type \verb|image|. Since image is a mapped file type, it additionally declares that the +variable refers to a single file named \verb|puppy.jpeg|: -\emph{Compound functions} are functions -that call atomic and other compound -functions. +\begin{verbatim} + image photo <"puppy.jpeg">; +\end{verbatim} -In addition to function invocation, the Swift language provides conditional -execution through the \emph{if} and \emph{switch} statements as well as -a \emph{foreach} construct used for iterating over arrays of data. +\emph{Structure types} are defined in this manner: -\hide{Mihael: Note: I'm skipping \emph{iterate} on purpose. We should -deprecate it since it's hard to understand and everything that can be -done with it can also be done with \emph{foreach} -} +\begin{verbatim} + type image; + type metadata; + type snapshot { + metadata m; + image i; + } +\end{verbatim} -The \emph{if} and \emph{switch} statements are rather standard, but -\emph{foreach} merits more discussion. Similar to \emph{Go} -(\cite{GOLANG}) and \emph{Python}, its control ``variables'' can be both -an index and a value. The syntax is as follows: +Members of a structure can be accessed using the \verb|.| operator: \begin{verbatim} -foreach v[, k] in array { - ... -} + snapshot s; + image i; + i = s.i; \end{verbatim} -This is necessary because Swift does not allow the use of mutable state -(i.e., variables are single-assignment), therefore one would not be able -to write statements such as \verb|i = i + 1|. +\subsection{Execution model} -Swift variables hold either primitive values, files, or collections of -files. All variables are \emph{single-assignment} (meaning -that they must be assigned to exactly one value during execution), -which provides the -basis for Swift's model of function chaining. (Note that while Swift arrays and structures are not -strictly single-assignment, each of their elements of are, as discussed in -Section~\ref{ordering}.) +Swift has three types of functions: + +\emph{Built-in functions} are defined in the Java code of the Swift runtime system, and perform various utility functions (numeric conversion, string manipulation, etc.) Operators (+ *, etc.) defined by the language behave similarly. + +\emph{Application interface functions} (declared using the app keyword) +specify the interface (input files and parameters, and output files) of application programs in +terms of files and other parameters. They serve as an adapter between the Swift programming model and the mechanisms used to invoke application programs at run time. + +\emph{Compound functions} are functions +that call atomic and other compound +functions. + Through the use of futures, functions are executed when their input parameters have all been set from existing data or prior function executions. Function calls are chained by @@ -456,61 +471,13 @@ rather when their input data becomes available. % mention that every expression in the body of a function or sub-expression is conceptually executed in parallel, and physically executed when all of their arguments have been assigned a value. -Each variables in a Swift script is declared to be of a specific (single) type. -Variables that are declared to be (or contain, in the case of aggregates) file references, -are associated with a \emph{mapper} which defines (often through a dynamic lookup process) the -data files that are to be mapped to the variable. - -%^^^\katznote{bad grammar here - not clear what this is saying} - -Types in Swift can be \emph{atomic} or \emph{composite}. An atomic (i.e. scalar) -type can be either a \emph{primitive type} or a \emph{mapped type}. -Swift provides a fixed set of primitive types, for example, \emph{integer} and -\emph{string}. A mapped type indicates that the actual data does not -reside in CPU addressable memory (as it would in conventional -programming languages), but in POSIX-like files. - -Two composite types are provided: \emph{structures} and \emph{arrays}. -Structures -are similar in most respects to structure types in other languages. -One -array type is provided for every atomic type (integer, string, boolean, and fileRef). -%%% ^^^ fileref, type issues. -Arrays use numeric -indices, but are sparse. Arrays can be nested to provide multi-dimensional indexing. -We often refer to instances of composites of -mapped types as \emph{datasets}. - - -%\katznote{maybe a little figure here?} - -\subsection{Mapping files to data elements} - -Mapped type and composite type variable declarations can be annotated with a -\emph{mapping} descriptor that indicates the file(s) that make(s) up that \emph{dataset}. -For example, the following line declares a variable named \verb|photo| of -type \verb|image|. Since image is a fileRef type, it additionally declares that the -variable refers to a single file named \verb|shane.jpeg| - -\begin{verbatim} - image photo <"shane.jpeg">; -\end{verbatim} - -%Conceptually, a parallel can be drawn between Swift \emph{mapped} variables -%and Java \emph{reference types}. In both cases, there is no syntactic distinction -%between \emph{primitive types} and \emph{mapped} types or -%\emph{reference types}, respectively. Additionally, the semantic distinction -%is kept to a minimum. - -Component programs of scripts are declared in an \emph{app -declaration} that contains the description of the command line syntax for that -program and a list of input and output data. An \verb|app| block -describes a functional/dataflow style interface to imperative -components. - +%vvvv +Atomic application interface functions are defined with in an \emph{app +declaration} that describes the command line syntax for that +program and its input and output files. For example, the following example lists a function that makes use -of the ImageMagick~\cite{ImageMagick_WWW} \verb|convert| command to rotate a -supplied image by a specified angle: +of the common utility {\tt convert}\cite{ImageMagick_WWW} to rotate an +image by a specified angle: \begin{verbatim} app (image output) rotate(image input, int angle) { @@ -519,41 +486,25 @@ \end{verbatim} %\katznote{do you need to say anything about where/how convert is defined/located?} -(The convert application itself is located through a catalog of applications specified to the runtime environment, or through a PATH lookup). +(The {\tt convert} executable is located at run time through a catalog of applications or through a PATH environment variable). -A function is invoked using a syntax similar to that of C: +The rotate function is then invoked as follows: \begin{verbatim} rotated = rotate(photo, 180); \end{verbatim} While this statement looks like an ordinary function invocation and assignment, its execution in fact -consists of invoking the command line specified in the \verb|app| +consists of invoking the program specified in the \verb|app| declaration, with variables on the left of the assignment bound to the output parameters, and variables to the right of the function invocation passed as inputs. +We can build a complete (albeit simple) Swift script: -The examples above have used the type \verb|image| without a -definition of that type. We can declare it as an \emph{external file type}, -which has no structure exposed to Swift: - \begin{verbatim} type image; -\end{verbatim} - -This does not indicate that the data is unstructured; it indicates -that the structure of the data is not exposed to Swift. -Swift will treat variables of this type as individual opaque -files. - -With mechanisms to declare types, map variables to data files, and -declare and invoke functions, we can build a complete (albeit simple) -script: - -\begin{verbatim} - type image; - image photo <"shane.jpeg">; + image photo <"puppy.jpeg">; image rotated <"rotated.jpeg">; app (image output) rotate(image input, int angle) { @@ -574,47 +525,52 @@ shane.jpeg rotated.jpeg \end{verbatim} -This executes a single \verb|convert| command, while hiding from the user features +This executes a single \verb|convert| command, while automatically performing for the user features such as remote multisite execution and fault tolerance, which will be discussed in a later section. +In addition to function invocation, the Swift language provides conditional +execution through the \emph{if} and \emph{switch} statements as well as +a \emph{foreach} construct used for iterating over arrays of data. + \subsection{Arrays and Parallel Execution} \label{ArraysAndForeach} -Arrays of values can be declared using the \verb|[]| suffix. An array -can be mapped to a collection of files (one element per file) using -a different form of mapping expression. For example, the -\verb|filesys_mapper| maps -all files matching a particular glob pattern into an array: +Arrays are declared using the \verb|[]| suffix: \begin{verbatim} file frames[] ; \end{verbatim} -The \verb|foreach| construct can be used to apply the same function -call(s) to each element of an array: +Here we used a built-in mapper called \verb|filesys_mapper| to +all files matching the name pattern \verb|*.jpeg| to an array. + +The \verb|foreach| construct can be used to apply a function to each element of an array: \begin{verbatim} foreach f,ix in frames { output[ix] = rotate(f, 180); } \end{verbatim} +\hide{ Sequential iteration can be expressed using the \verb|iterate| construct: \begin{verbatim} step[0] = initialCondition(); iterate ix { step[ix] = simulate(step[ix-1]); - } + } until (terminationCondition() ); \end{verbatim} This fragment will initialize the 0-th element of the \verb|step| array to some initial condition, and then repeatedly run the \verb|simulate| -function, using each execution's output as the input to the next step. +function, using each execution's output as the input to the next step. The iteration ends when the termination condition in the {\tt until()} clause returns {\tt true}. +} +\hide{ \subsection{Expressing functional idioms} Several common idioms seen in functional languages can readily expressed using Swift's -\emph{foreach}. The \ +\emph{foreach}: \begin{description} @@ -649,23 +605,21 @@ } \end{verbatim} \end{description} +} - \subsection{Ordering of execution and implicit parallelism} \label{ordering} -As previously stated, atomic variables are single-assignment, -which means that they must be assigned to exactly one value during execution. A -function or expression will be executed when all of its input +Since all variables and collection elements are single-assignment, +they can be assigned a value at most once during the execution of a script. +A function or expression will be executed when all of its input parameters have been assigned values. As a result of such execution, more variables may become assigned, possibly allowing further parts of the script to execute. In this way, scripts are implicitly -concurrent. Aside from serialization implied by these dataflow -dependencies, execution of component programs can proceed without -synchronization in time. +concurrent. -In this fragment, execution of functions \verb|p| and \verb|q| can -happen in parallel: +In this script fragment, execution of functions \verb|p| and \verb|q| can +occur in parallel: \begin{verbatim} y=p(x); z=q(x); @@ -677,18 +631,18 @@ z=q(y); \end{verbatim} -%\katznote{is this common use of monotonic? Are the arrays monotonic? Or is the assignment of elements in the array monotonic?} Arrays in Swift are treated as collections of simple variables, in the sense that all array elements are single-assignment futures. Once the value of an array element is set, then it cannot change. When all the values for the array which can be set (as determined by limited flow analysis) are set, then the array is regarded as \emph{closed}. Statements which deal with the array as a whole will wait for the array to be closed -before executing (thus, a closed array is the equivalent of a -non-array type being assigned). An example of such an action is the expansion of the array values into an app command line). -However, a \verb|foreach| statement -will apply its body to elements of an array as they become known. It -will not wait until the array is closed. +before executing. An example of such an action is the expansion of the array values into an app command line. +Thus, the closing of an array is the equivalent to setting an atomic variable, with respect to any statement that was waiting for the array itself to get a value. However, a \verb|foreach| statement +will apply its body of statements to elements of an array, as they are set to a value. It +will not wait until the array is closed. In practice this type of ``pipelining'' gives Swift scripts a high degree of parallelism at run time. +Because of simplicity and regularity of the Swift data model, a high degree of implicit parallelism is achieved. For example, a foreach() statement that processes an array returned by a function may begin processing members of the returned array that have been already set, even before the function completes and returns. This yields programs that are very heavily pipelined with significant overlapping parallel activities. + Consider the script below: \begin{verbatim} file a[]; @@ -699,17 +653,18 @@ a[0] = r(); a[1] = s(); \end{verbatim} -Initially, the \verb|foreach| statement will have nothing to execute, -as the array \verb|a| has not been assigned any values. The functions +Initially, the \verb|foreach| statement will block, with nothing to execute, +as the array \verb|a| has not been assigned any values. At some point, in parallel, the functions \verb|r| and \verb|s| will execute. As soon as either of them is finished, the corresponding invocation of function \verb|p| will occur. After both \verb|r| and \verb|s| have completed, the array \verb|a| will be regarded as closed since no other statements in the script make an assignment to \verb|a|. -Because of simplicity and regularity of the Swift data model, a high degree of implicit parallelism is achieved. For example, a foreach() statement that processes an array returned by a function may begin processing members of the returned array that have been already set, even before the function completes and returns. This yields programs that are very heavily pipelined with significant overlapping parallel activities. % show a (tested) example and if possible illustrate with a figure. +\hide{ + \subsection{Compound functions} As with many other programming languages, functions consisting of @@ -748,10 +703,10 @@ a valid execution order is: \verb|A1 S(x) A2 S(y)|. The compound function \verb|A| does not have to have fully completed for its return values to be used by subsequent statements. +} -\subsection{More about types} -\label{LanguageTypes} +\hide{ Each variable and function parameter in Swift is strongly typed. Types are used to structure data, to aid in debugging and program correctness and to influence how Swift interacts with data. @@ -767,34 +722,8 @@ There are a number of primitive types: \verb|int|, \verb|string|, \verb|float|, \verb|boolean|, which represent integers, strings, floating point numbers and true/false values, respectively. +} -\emph{Complex types} may be defined using the \verb|type| keyword: - -\begin{verbatim} - type headerfile; - type voxelfile; - type volume { - headerfile h; - voxelfile v; - } -\end{verbatim} - -Members of a complex type can be accessed using the \verb|.| operator: - -\begin{verbatim} - volume brain; - o = p(brain.h); -\end{verbatim} - -Collections of files can be mapped to complex types (arrays and structures) -using special operators called \emph{mappers}, syntactically designate with angle brackets (< >). For example, the simple mapper used in this expression will -map the files \verb|data.h| and \verb|data.v| to the variable members -\verb|m.h| and \verb|m.v| respectively: - -\begin{verbatim} - volume m ; -\end{verbatim} - \hide{ % hide description of externals till this text is refined %fixed \katznote{Swift's ``file-and-site model'' hasn't been introduced before. I'm not even sure what it is.} @@ -844,9 +773,8 @@ \subsection{Swift mappers} -Swift supports in-memory variables that are -\emph{mapped} to files in the filesystem. This is coordinated by an -extensible set of built-in primitives called \emph{mappers}. A representative sample of these is listed +Swift provides an +extensible set of built-in mapping primitives. A representative sample of these is listed in Table~\ref{mappertable}. \begin{table}[t] @@ -906,11 +834,20 @@ structured Swift variable, can represent a large, structured data set. +Collections of files can be mapped to complex types (arrays and structures) +using a variety of built-in mappers. For example, the \verb|simple mapper| used in this expression will +map the files \verb|data.p| and \verb|data.m| to the variable members +\verb|m.h| and \verb|m.v| respectively: + +\begin{verbatim} + snapshot m ; +\end{verbatim} + \subsection{Swift runtime environment} \label{LanguageEnvironment} -Notable runtime features include: +Notable features of the Swift runtime environment include: \begin{itemize} @@ -941,12 +878,9 @@ \end{itemize} - - -A Swift \verb|app| declaration describes how a component program -is invoked. In order to ensure the correctness of the Swift model, the -environment in which programs are executed needs to be constrained. - +A Swift \verb|app| declaration describes how an application program +is invoked. In order to provide a consistent execution environment that works for virtually all application programs, the +environment in which programs are executed needs to be constrained with a set of uniform conventions. The Swift execution model is based on the following assumptions: a program is invoked in its own working directory; in that working directory or one of its subdirectories, the program can expect to find @@ -982,8 +916,7 @@ The body of the \verb|app| block defines the command-line that will be executed when the function is invoked. The first token (in this case \verb|convert|) defines a \emph{transformation name} which is used to -determine the executable name. Subsequent expressions, separated by -spaces, define the command-line arguments for that executable: +determine the executable name. Subsequent expressions define the command-line arguments for that executable: \verb|"-rotate"| is a string literal; angle specifies the value of the angle parameter; the syntax \verb|@variable| evaluates to the filename of the supplied variable, thus \verb|@input| and \verb|@output| @@ -993,7 +926,7 @@ variable has not yet been computed, the filename where that value will go is already available from the mapper. -\section{Execution} +\section{Execution engine} \label{Execution} Swift is implemented by compiling to a Karajan program\cite{Karajan}, which provides @@ -1112,7 +1045,7 @@ In such a case, Swift provides a \emph{restart log} that encapsulates which function invocations have been successfully completed. -%%%%%% What manual interv. and why??? +\mikenote{What manual interv. and why???} After appropriate manual intervention, a subsequent Swift run may be started From noreply at svn.ci.uchicago.edu Thu Jan 6 08:38:21 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 08:38:21 -0600 (CST) Subject: [Swift-commit] r3869 - text/parco10submission Message-ID: <20110106143821.D6B5C9CC9B@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-06 08:38:21 -0600 (Thu, 06 Jan 2011) New Revision: 3869 Modified: text/parco10submission/paper.tex Log: minor changes in 2 and 3 Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 05:31:34 UTC (rev 3868) +++ text/parco10submission/paper.tex 2011-01-06 14:38:21 UTC (rev 3869) @@ -415,7 +415,7 @@ The notation \verb|{}| indicates that the type represents a reference to a single \emph{opaque} -file --- i.e., a reference to an external object whose structure is opaque to the Swift script. For convenience such type declarations typically use the equivalent shorthand \verb|type image;| (which new users find confusing but which has become a Swift idiom). +file, i.e., a reference to an external object whose structure is opaque to the Swift script. For convenience such type declarations typically use the equivalent shorthand \verb|type image;| (which new users find confusing but which has become a Swift idiom). Mapped type variable declarations can be specified with a \emph{mapping} descriptor enclosed in \verb|<>| that indicates the file to be mapped to the variable. @@ -450,7 +450,7 @@ Swift has three types of functions: -\emph{Built-in functions} are defined in the Java code of the Swift runtime system, and perform various utility functions (numeric conversion, string manipulation, etc.) Operators (+ *, etc.) defined by the language behave similarly. +\emph{Built-in functions} are defined in the Java code of the Swift runtime system, and perform various utility functions (numeric conversion, string manipulation, etc.) Operators (+, *, etc.) defined by the language behave similarly. \emph{Application interface functions} (declared using the app keyword) specify the interface (input files and parameters, and output files) of application programs in @@ -476,7 +476,7 @@ declaration} that describes the command line syntax for that program and its input and output files. For example, the following example lists a function that makes use -of the common utility {\tt convert}\cite{ImageMagick_WWW} to rotate an +of the common utility {\tt convert}~\cite{ImageMagick_WWW} to rotate an image by a specified angle: \begin{verbatim} @@ -486,7 +486,7 @@ \end{verbatim} %\katznote{do you need to say anything about where/how convert is defined/located?} -(The {\tt convert} executable is located at run time through a catalog of applications or through a PATH environment variable). +(The {\tt convert} executable is located at run time through a catalog of applications or through a PATH environment variable.) The rotate function is then invoked as follows: @@ -542,7 +542,7 @@ file frames[] ; \end{verbatim} -Here we used a built-in mapper called \verb|filesys_mapper| to +This uses a built-in mapper called \verb|filesys_mapper| to all files matching the name pattern \verb|*.jpeg| to an array. The \verb|foreach| construct can be used to apply a function to each element of an array: @@ -917,7 +917,7 @@ executed when the function is invoked. The first token (in this case \verb|convert|) defines a \emph{transformation name} which is used to determine the executable name. Subsequent expressions define the command-line arguments for that executable: -\verb|"-rotate"| is a string literal; angle specifies the value of the +``\verb|-rotate|'' is a string literal; angle specifies the value of the angle parameter; the syntax \verb|@variable| evaluates to the filename of the supplied variable, thus \verb|@input| and \verb|@output| evaluate to the filenames of the corresponding parameters. It should @@ -929,14 +929,14 @@ \section{Execution engine} \label{Execution} -Swift is implemented by compiling to a Karajan program\cite{Karajan}, which provides +Swift is implemented by compiling to a Karajan program~\cite{Karajan}, which provides several benefits: a lightweight threading model, futures, remote job execution, and remote file transfer and data management. Both remote execution and data transfer and management functions are provided through generalized -abstracted interfaces called "providers". +abstracted interfaces called ``providers''. Data providers enable data transfer and management to be performed through a wide variety of protocols including direct local copying, GridFTP, HTTP, WebDAV, SCP, and FTP. Execution providers enable job execution to take place using direct POSIX process fork, Globus GRAM, Condor (and Condor-G), PBS, SGE, SSH. The Swift execution model can thus be extended by @@ -1046,6 +1046,9 @@ In such a case, Swift provides a \emph{restart log} that encapsulates which function invocations have been successfully completed. \mikenote{What manual interv. and why???} +\katznote{Maybe ignore this, and just say: A subsequent Swift run may be started +with this restart log; this will avoid re-execution of already +executed invocations.} After appropriate manual intervention, a subsequent Swift run may be started From noreply at svn.ci.uchicago.edu Thu Jan 6 09:54:40 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 09:54:40 -0600 (CST) Subject: [Swift-commit] r3870 - in text/parco10submission: . plots Message-ID: <20110106155440.58BEB9CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-06 09:54:40 -0600 (Thu, 06 Jan 2011) New Revision: 3870 Modified: text/parco10submission/paper.tex text/parco10submission/plots/sleep.pdf Log: Minor plot improvements Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 14:38:21 UTC (rev 3869) +++ text/parco10submission/paper.tex 2011-01-06 15:54:40 UTC (rev 3870) @@ -346,7 +346,7 @@ Swift scripts are written as a set of functions, composed upwards, starting with \emph{atomic functions} that specify the execution of external programs, and then higher level functions are composed as -pipelines (or more generally, graphs) of sub-functions. +pipelines (or more generally, graphs) of sub-functions. Unlike most other scripting languages, Swift expresses invocations of ``ordinary programs''---technically, POSIX {\tt exec()} @@ -387,7 +387,7 @@ Structure fields can be of any type, while arrays contain only uniform values of a single type. One array type is provided for every atomic type (integer, string, boolean, and file reference). Arrays use numeric -indices, but are sparse. +indices, but are sparse. Both types of collections can contain members of atomic or collection types. Structures contain a finite number of elements. Arrays contain a varying number of elements. Structures and arrays can both recursively reference other structures and arrays in addition to atomic values. Arrays can be nested to provide multi-dimensional indexing. Due to the dynamic, highly parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. Also, since all data elements have single-assignment semantics, no garbage collection issues arise. @@ -452,11 +452,11 @@ \emph{Built-in functions} are defined in the Java code of the Swift runtime system, and perform various utility functions (numeric conversion, string manipulation, etc.) Operators (+, *, etc.) defined by the language behave similarly. -\emph{Application interface functions} (declared using the app keyword) +\emph{Application interface functions} (declared using the app keyword) specify the interface (input files and parameters, and output files) of application programs in terms of files and other parameters. They serve as an adapter between the Swift programming model and the mechanisms used to invoke application programs at run time. -\emph{Compound functions} are functions +\emph{Compound functions} are functions that call atomic and other compound functions. @@ -616,7 +616,7 @@ parameters have been assigned values. As a result of such execution, more variables may become assigned, possibly allowing further parts of the script to execute. In this way, scripts are implicitly -concurrent. +concurrent. In this script fragment, execution of functions \verb|p| and \verb|q| can occur in parallel: @@ -737,7 +737,7 @@ component program atomicity on data output. \katznote{this previous sentence -has a lot of stuff that hasn't been defined, and the next one is equally confusing at this point in the paper.} +has a lot of stuff that hasn't been defined, and the next one is equally confusing at this point in the paper.} This can add substantial responsibility to component programs, in exchange for allowing arbitrary @@ -1044,13 +1044,13 @@ will fail, ultimately resulting in the entire script failing. In such a case, Swift provides a \emph{restart log} that encapsulates -which function invocations have been successfully completed. +which function invocations have been successfully completed. \mikenote{What manual interv. and why???} \katznote{Maybe ignore this, and just say: A subsequent Swift run may be started with this restart log; this will avoid re-execution of already executed invocations.} After -appropriate manual intervention, +appropriate manual intervention, a subsequent Swift run may be started with this restart log; this will avoid re-execution of already executed invocations. @@ -1122,7 +1122,7 @@ Using Swift to submit to a large number of sites poses a number of practical challenges that are not encountered when running on a small number of sites. These challenges are seen when comparing execution on -the relatively static TeraGrid~\cite{TeraGrid_2005} with execution on the +the relatively static TeraGrid~\cite{TeraGrid_2005} with execution on the more dynamic Open Science Grid (OSG)~\cite{OSG_2007}, where the set of sites that may be used is large and changing. It is impractical to maintain a site catalog by @@ -1599,7 +1599,7 @@ \begin{figure*}[htbp] \begin{center} - \includegraphics[scale=0.50]{plots/sleep} + \includegraphics[scale=0.70]{plots/sleep} \caption{Node utilization for {\tt sleep} tasks} \label{PlotSleep} \end{center} Modified: text/parco10submission/plots/sleep.pdf =================================================================== (Binary files differ) From noreply at svn.ci.uchicago.edu Thu Jan 6 13:42:31 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 13:42:31 -0600 (CST) Subject: [Swift-commit] r3871 - text/parco10submission Message-ID: <20110106194231.0A0759CC9B@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-06 13:42:30 -0600 (Thu, 06 Jan 2011) New Revision: 3871 Modified: text/parco10submission/paper.tex Log: updates in section 2 Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 15:54:40 UTC (rev 3870) +++ text/parco10submission/paper.tex 2011-01-06 19:42:30 UTC (rev 3871) @@ -334,18 +334,18 @@ \section{The Swift language} \label{Language} -Swift is by design a sparse, minimal scripting -language which executes external programs remotely and in parallel. +Swift is, by design, a sparse, minimal scripting +language that executes external programs remotely and in parallel. As such, Swift has only a very limited set of data types, operators, and built-in functions. -Its simple, uniform data model is composed of a few atomic types (which can be simple scalar values or references to external files) and two collection types (arrays and structures). +Its simple, uniform data model is composed of a few atomic types (that can be scalar values or references to external files) and two collection types (arrays and structures). A Swift script describes data, application components, invocations -of applications components, and the inter-relations (data flow) +of applications components, and the interrelations (data flow) between those invocations, using a C-like syntax. Swift scripts are written as a set of functions, composed upwards, starting with \emph{atomic functions} that specify the execution of -external programs, and then higher level functions are composed as +external programs. Higher level functions are then composed as pipelines (or more generally, graphs) of sub-functions. Unlike most other scripting languages, Swift expresses @@ -355,59 +355,60 @@ invocation. Swift scripts similarly declare all output files that results from program invocations. This enables Swift to provide distributed, location-independent execution of external application programs. -The Swift parallel execution model is based on two concepts that are applied uniformly throughout the language. First, every Swift data element behaves like a \emph{future}. By ``data element'', we mean both the named variables within a function's environment, such as its local variables, parameters, and returns, and the individual elements of array and structure collections. Second, every expression in a Swift program is conceptually executed in parallel. Expressions (including function evaluations) wait for input values when they are required, and then set their result values as their computation proceeds. These fundamental concepts are discussed in more detail below. +The Swift parallel execution model is based on two concepts that are applied uniformly throughout the language. First, every Swift data element behaves like a \emph{future}. By ``data element'', we mean both the named variables exposed to Swift within a function's environment, such as its local variables, parameters, and returns, and the individual elements of array and structure collections. Second, all expressions in a Swift program are conceptually executed in parallel. Expressions (including function evaluations) wait for input values when they are required, and then set their result values as their computation proceeds. These fundamental concepts are discussed in more detail below. % can be thought of as a massively-parallel lazy (ie, on-demand, or just in time) evaluation - say later on? \subsection{Data model} -Every data object in Swift is built up from atomic data elements which contain three fields: a value, a state, and a queue of function invocations that are waiting for the value to be set. +Every data object in Swift is built up from atomic data elements that contain three fields: a value, a state, and a queue of function invocations that are waiting for the value to be set. Variables are used in Swift to name the local variables, arguments, and returns of a function. Every Swift variable is assigned a concrete data type, based on a very simple type model (with no concepts of inheritance, abstraction, etc). The outermost function in a Swift (akin to ``main'' in C) is only unique in that the variables in its environment can be declared ``global'' to make them accessible to every other function in the script. -Swift data elements (atomic variables and array elements) are \emph{single-assignment}: -they behave as futures and can be assigned at most one value during execution. +Swift data elements (atomic variables and array elements) are \emph{single-assignment}--- +they can be assigned at most one value during execution---and behave as futures. This semantic provides the basis for Swift's model of parallel function evaluation and chaining. -While Swift arrays and structures are not -single-assignment, each of their elements are. +While Swift collection types (arrays and structures) are not +single-assignment, each of their elements is single-assignment. -Each variables in a Swift script is declared to be of a specific (single) type. +Each variable in a Swift script is declared to be of a specific (single) type. Swift provides three basic classes of data types: \emph{Primitive types} are provided for integer, float, string, and boolean values by the Swift runtime. Common operators are defined for primitive types, such as arithmetic, concatenation, explicit conversion, etc. An additional primitive type ``external'' is provided for manual synchronization. -\emph{Mapped types} are data elements that refer, through a process called``mapping'' to files external to the Swift script. These are the files that will be read and written by the external application programs called by Swift. +\emph{Mapped types} are data elements that refer (through a process called``mapping'') to files external to the Swift script. These are the files that will be read and written by the external application programs called by Swift. The mapping process can map single variables to single files, and structures and arrays to collections of files. Primitive and mapped types are called \emph{atomic types}. \emph{Collection types} are provided in Swift by \emph{arrays} and \emph{structures}. -Structure fields can be of any type, while arrays contain only uniform values of a single type. One +Structure fields can be of any type, while arrays contain values of only a single type. One array type is provided for every atomic type (integer, string, boolean, and file reference). Arrays use numeric indices, but are sparse. Both types of collections can contain members of atomic or collection types. Structures contain a finite number of elements. Arrays contain a varying number of elements. Structures and arrays can both recursively reference other structures and arrays in addition to atomic values. Arrays can be nested to provide multi-dimensional indexing. -Due to the dynamic, highly parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. Also, since all data elements have single-assignment semantics, no garbage collection issues arise. +Due to the dynamic, highly parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. Also, since all data elements have single-assignment semantics, no garbage collection issues arise. \katznote{does this follow? garbage collection removed variables that are no longer needed - I don't see how single assignment helps here.} Variables that are declared to be file references -are associated with a \emph{mapper} which defines (often through a dynamic lookup process) the +are associated with a \emph{mapper}, which defines (often through a dynamic lookup process) the data files that are to be mapped to the variable. Array and structure elements that are declared to be file references are similarly mapped. -Mapped type and composite type variable declarations can be annotated with a +Mapped type and composite \katznote{I don't know what composite means here} +type variable declarations can be annotated with a \emph{mapping} descriptor that specify the file(s) that are to be mapped to the Swift data element(s). For example, the following line declares a variable named \verb|photo| of -type \verb|image|. Since image is a fileRef type, it additionally declares that the +type \verb|image|. Since image is a fileRef type \katznote{how do I know this? And, should ``fileRef'' have been defined 2 paragraphs ago?}, it additionally declares that the variable refers to a single file named \verb|shane.jpeg| \begin{verbatim} image photo <"shane.jpeg">; \end{verbatim} -We can declare {\tt image} to be an \emph{external file type}: +We can declare {\tt image} to be an \emph{external file type}: \katznote{is this different from a fileRef type?} \begin{verbatim} type image {}; @@ -441,18 +442,20 @@ Members of a structure can be accessed using the \verb|.| operator: \begin{verbatim} - snapshot s; - image i; - i = s.i; + snapshot sn; + image im; + im = sn.i; \end{verbatim} +\katznote{please check the above - I changed a couple of variables so ``i'' wasn't used twice for different things in the same example.} + \subsection{Execution model} Swift has three types of functions: \emph{Built-in functions} are defined in the Java code of the Swift runtime system, and perform various utility functions (numeric conversion, string manipulation, etc.) Operators (+, *, etc.) defined by the language behave similarly. -\emph{Application interface functions} (declared using the app keyword) +\emph{Application interface functions} (declared using the \verb|app| keyword) specify the interface (input files and parameters, and output files) of application programs in terms of files and other parameters. They serve as an adapter between the Swift programming model and the mechanisms used to invoke application programs at run time. @@ -460,15 +463,14 @@ that call atomic and other compound functions. -Through the use of futures, functions are +Through the use of futures, functions can be executed when their input parameters have all been set from existing data or prior function executions. Function calls are chained by specifying that an output variable of one function is passed as the input variable to the second function. -%\katznote{mention futures here?} This dataflow model means that Swift functions are not necessarily executed in source-code order but -rather when their input data becomes available. +rather, when their input data become available. % mention that every expression in the body of a function or sub-expression is conceptually executed in parallel, and physically executed when all of their arguments have been assigned a value. %vvvv @@ -476,7 +478,7 @@ declaration} that describes the command line syntax for that program and its input and output files. For example, the following example lists a function that makes use -of the common utility {\tt convert}~\cite{ImageMagick_WWW} to rotate an +of the common utility, {\tt convert}~\cite{ImageMagick_WWW}, to rotate an image by a specified angle: \begin{verbatim} @@ -486,7 +488,7 @@ \end{verbatim} %\katznote{do you need to say anything about where/how convert is defined/located?} -(The {\tt convert} executable is located at run time through a catalog of applications or through a PATH environment variable.) +(The {\tt convert} executable is found at run time in a catalog of applications or through a PATH environment variable.) The rotate function is then invoked as follows: @@ -526,8 +528,8 @@ \end{verbatim} This executes a single \verb|convert| command, while automatically performing for the user features -such as remote multisite execution and fault tolerance, which will be -discussed in a later section. +such as remote multisite execution and fault tolerance, which are +discussed later. In addition to function invocation, the Swift language provides conditional execution through the \emph{if} and \emph{switch} statements as well as @@ -611,8 +613,8 @@ \label{ordering} Since all variables and collection elements are single-assignment, -they can be assigned a value at most once during the execution of a script. -A function or expression will be executed when all of its input +%they can be assigned a value at most once during the execution of a script. +a function or expression can be executed when all of its input parameters have been assigned values. As a result of such execution, more variables may become assigned, possibly allowing further parts of the script to execute. In this way, scripts are implicitly @@ -631,17 +633,18 @@ z=q(y); \end{verbatim} -Arrays in Swift are treated as collections of simple variables, in the sense that all array elements are single-assignment futures. +Arrays in Swift are treated as collections of simple variables, in the sense that all array elements are single-assignment. Once the value of an array element is set, then it cannot change. When all the values for the array which can be set (as determined by limited flow analysis) are -set, then the array is regarded as \emph{closed}. Statements which +set, then the array is regarded as \emph{closed}. +\katznote{the few lines before this in this paragraph have been repeated from earlier in the section.} Statements that deal with the array as a whole will wait for the array to be closed before executing. An example of such an action is the expansion of the array values into an app command line. -Thus, the closing of an array is the equivalent to setting an atomic variable, with respect to any statement that was waiting for the array itself to get a value. However, a \verb|foreach| statement +Thus, the closing of an array is the equivalent to setting a future variable, with respect to any statement that was waiting for the array itself to be assigned a value. However, a \verb|foreach| statement will apply its body of statements to elements of an array, as they are set to a value. It will not wait until the array is closed. In practice this type of ``pipelining'' gives Swift scripts a high degree of parallelism at run time. -Because of simplicity and regularity of the Swift data model, a high degree of implicit parallelism is achieved. For example, a foreach() statement that processes an array returned by a function may begin processing members of the returned array that have been already set, even before the function completes and returns. This yields programs that are very heavily pipelined with significant overlapping parallel activities. +Because of the simplicity and regularity of the Swift data model, a high degree of implicit parallelism is achieved. For example, a foreach() statement that processes an array returned by a function may begin processing members of the returned array that have been already set, even before the entire function completes and returns. This yields programs that are very heavily pipelined with significant overlapping parallel activities. Consider the script below: \begin{verbatim} @@ -835,7 +838,7 @@ set. Collections of files can be mapped to complex types (arrays and structures) -using a variety of built-in mappers. For example, the \verb|simple mapper| used in this expression will +using a variety of built-in mappers. For example, the \verb|simple mapper| used in the next expression will map the files \verb|data.p| and \verb|data.m| to the variable members \verb|m.h| and \verb|m.v| respectively: @@ -891,7 +894,7 @@ portability), run in in any particular order with respect to other application invocations in a script (except those implied by data dependency), or that their working directories will or will not be -cleaned up after execution. +cleaned up after execution. \katznote{say something about apps should not cause side-effects?} Consider the following \verb|app| declaration for the \verb|rotate| function: @@ -917,8 +920,8 @@ executed when the function is invoked. The first token (in this case \verb|convert|) defines a \emph{transformation name} which is used to determine the executable name. Subsequent expressions define the command-line arguments for that executable: -``\verb|-rotate|'' is a string literal; angle specifies the value of the -angle parameter; the syntax \verb|@variable| evaluates to the filename +``\verb|-rotate|'' is a string literal; \verb|angle| specifies the value of the +angle parameter; and the syntax \verb|@variable| evaluates to the filename of the supplied variable, thus \verb|@input| and \verb|@output| evaluate to the filenames of the corresponding parameters. It should be noted that it is possible to take the filename of \verb|output| @@ -936,7 +939,7 @@ and remote file transfer and data management. Both remote execution and data transfer and management functions are provided through generalized -abstracted interfaces called ``providers''. +abstracted interfaces called \emph{providers}. Data providers enable data transfer and management to be performed through a wide variety of protocols including direct local copying, GridFTP, HTTP, WebDAV, SCP, and FTP. Execution providers enable job execution to take place using direct POSIX process fork, Globus GRAM, Condor (and Condor-G), PBS, SGE, SSH. The Swift execution model can thus be extended by From noreply at svn.ci.uchicago.edu Thu Jan 6 15:26:49 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 15:26:49 -0600 (CST) Subject: [Swift-commit] r3872 - text/parco10submission Message-ID: <20110106212649.034D09CC7F@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-06 15:26:48 -0600 (Thu, 06 Jan 2011) New Revision: 3872 Modified: text/parco10submission/paper.tex Log: updates in related work Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 19:42:30 UTC (rev 3871) +++ text/parco10submission/paper.tex 2011-01-06 21:26:48 UTC (rev 3872) @@ -1662,34 +1662,26 @@ programming tool for the specification and execution of large parallel computations on large quantities of data, and facilitating the utilization of large distributed resources. However, the two also -differ in many aspects: +differ in many aspects. The +MapReduce programming model supports key-value pairs as +input or output datasets and two types of computation functions, +map and reduce; Swift provides a type system and allows the +definition of complex data structures and arbitrary computational +procedures. +In MapReduce, input and output data can be of +several different formats, and it is also possible to define new +data sources; Swift provides a more flexible mapping mechanism to +map between logical data structures and various physical +representations. +Swift does not automatically partition input +datasets as MapReduce does; Swift datasets can be organized in structures, and +individual items in a dataset can be transferred accordingly along +with computations. +MapReduce schedules computations within a +cluster with shared Google File System; Swift schedules across +distributed Grid sites that may span multiple administrative +domains, and deals with security and resource usage policy issues. -\begin{itemize} - -\item Programming model: MapReduce only supports key-value pairs as - input or output datasets and two types of computation functions, - map and reduce; Swift provides a type system and allows the - definition of complex data structures and arbitrary computational - procedures. - -\item Data format: in MapReduce, input and output data can be of - several different formats, and it is also possible to define new - data sources. Swift provides a more flexible mapping mechanism to - map between logical data structures and various physical - representations. - -\item Dataset partition: Swift does not automatically partition input - datasets. Instead, datasets can be organized in structures, and - individual items in a dataset can be transferred accordingly along - with computations. - -\item Execution environment: MapReduce schedules computations within a - cluster with shared Google File System, where Swift schedules across - distributed Grid sites that may span multiple administrative - domains, and deals with security and resource usage policy issues. - -\end{itemize} - BPEL~\cite{BPEL_2006} is a Web Service-based standard that specifies how a set of Web services interact to form a larger, composite Web Service. BPEL is starting to be tested in scientific contexts. While @@ -1699,7 +1691,7 @@ application with repetitive patterns on a collection of datasets could result in large, repetitive BPEL documents~\cite{Sedna_2007}, and BPEL is cumbersome if not impossible to write for computational -scientists. Although BPEL can use \katznote{an? the?} XML Schema to describe data types, +scientists. Although BPEL can use an XML Schema to describe data types, it does not provide support for mapping between a logical XML view and arbitrary physical representations. From noreply at svn.ci.uchicago.edu Thu Jan 6 15:29:35 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 15:29:35 -0600 (CST) Subject: [Swift-commit] r3873 - text/parco10submission Message-ID: <20110106212935.79B199CC7F@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-06 15:29:35 -0600 (Thu, 06 Jan 2011) New Revision: 3873 Modified: text/parco10submission/paper.tex Log: removing the section number for Acks, and folding the open source sentence into the last section, rather than it being a section of its own Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 21:26:48 UTC (rev 3872) +++ text/parco10submission/paper.tex 2011-01-06 21:29:35 UTC (rev 3873) @@ -1877,12 +1877,12 @@ model has demonstrated many successes as a tool for scientific computing. -\section{Implementation status} +%\section{Implementation status} Swift is an open source project available at: \\ {\tt http://www.ci.uchicago.edu/swift}. -\section{Acknowledgments} +\section*{Acknowledgments} This research is supported in part by NSF grants OCI-721939 and OCI-0944332, and the U.S. Department of Energy under contract From noreply at svn.ci.uchicago.edu Thu Jan 6 15:32:47 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 15:32:47 -0600 (CST) Subject: [Swift-commit] r3874 - text/parco10submission Message-ID: <20110106213247.6EB029CC7F@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-06 15:32:47 -0600 (Thu, 06 Jan 2011) New Revision: 3874 Modified: text/parco10submission/paper.tex Log: adding a comment Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 21:29:35 UTC (rev 3873) +++ text/parco10submission/paper.tex 2011-01-06 21:32:47 UTC (rev 3874) @@ -1779,7 +1779,7 @@ techniques and employing an internal IP network. \mikenote{In order to achieve automatic parallelization in Swift, instead of using thunks (i.e., suspended computations), which yield lazy -evaluation, we employ futures, which result in eager parallelism. In this process, we trade the ability to efficiently deal with infinite structures for the ability to minimize computation time. It must, however, be noted that a middle ground exists: lazy futures (futures whose computation is delayed until a value is first needed).} +evaluation, we employ futures, which result in eager parallelism. In this process, we trade the ability to efficiently deal with infinite structures for the ability to minimize computation time. It must, however, be noted that a middle ground exists: lazy futures (futures whose computation is delayed until a value is first needed).} \katznote{this is very confusing to me - it's mixing too many concepts and overloading lazy and eager.} \subsection{Filesystem access optimizations} From noreply at svn.ci.uchicago.edu Thu Jan 6 16:01:22 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 16:01:22 -0600 (CST) Subject: [Swift-commit] r3875 - text/parco10submission Message-ID: <20110106220122.898A49CC9B@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-06 16:01:22 -0600 (Thu, 06 Jan 2011) New Revision: 3875 Modified: text/parco10submission/paper.bib text/parco10submission/paper.tex Log: updating CoG stuff Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-06 21:32:47 UTC (rev 3874) +++ text/parco10submission/paper.bib 2011-01-06 22:01:22 UTC (rev 3875) @@ -136,15 +136,6 @@ pages = {237--247} } - at article{COG, - title = {{COG}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} -} - @misc{LONIPIPELINE, title="LONI Pipeline http://pipeline.loni.ucla.edu/" } @@ -284,20 +275,20 @@ address = {Los Alamitos, CA, USA}, } - at article {makeflow, - author = {Yu, Li and Moretti, Christopher and Thrasher, Andrew and Emrich, Scott and Judd, Kenneth and Thain, Douglas}, - affiliation = {University of Notre Dame Department of Computer Science and Engineering South Bend USA}, - title = {Harnessing parallelism in multicore clusters with the All-Pairs, Wavefront, and Makeflow abstractions}, - journal = {Cluster Computing}, - publisher = {Springer Netherlands}, - issn = {1386-7857}, - keyword = {Computer Science}, - pages = {243-256}, - volume = {13}, - issue = {3}, - url = {http://dx.doi.org/10.1007/s10586-010-0134-7}, - note = {10.1007/s10586-010-0134-7}, - year = {2010} + at article {makeflow, + author = {Yu, Li and Moretti, Christopher and Thrasher, Andrew and Emrich, Scott and Judd, Kenneth and Thain, Douglas}, + affiliation = {University of Notre Dame Department of Computer Science and Engineering South Bend USA}, + title = {Harnessing parallelism in multicore clusters with the All-Pairs, Wavefront, and Makeflow abstractions}, + journal = {Cluster Computing}, + publisher = {Springer Netherlands}, + issn = {1386-7857}, + keyword = {Computer Science}, + pages = {243-256}, + volume = {13}, + issue = {3}, + url = {http://dx.doi.org/10.1007/s10586-010-0134-7}, + note = {10.1007/s10586-010-0134-7}, + year = {2010} } % Items below are from an older paper - retain for the moment in case any are useful here Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 21:32:47 UTC (rev 3874) +++ text/parco10submission/paper.tex 2011-01-06 22:01:22 UTC (rev 3875) @@ -939,7 +939,7 @@ and remote file transfer and data management. Both remote execution and data transfer and management functions are provided through generalized -abstracted interfaces called \emph{providers}. +abstracted interfaces called \emph{providers}~\cite{Karajan}. Data providers enable data transfer and management to be performed through a wide variety of protocols including direct local copying, GridFTP, HTTP, WebDAV, SCP, and FTP. Execution providers enable job execution to take place using direct POSIX process fork, Globus GRAM, Condor (and Condor-G), PBS, SGE, SSH. The Swift execution model can thus be extended by @@ -1738,13 +1738,13 @@ also allows easy programming of applications that have cycles and runtime decisions, such as in optimization problems. -Swift integrates with the CoG Karajan workflow engine~\cite{Karajan}. Karajan +Swift integrates with the Karajan workflow engine~\cite{Karajan}. Karajan provides the libraries and primitives for job scheduling, data transfer, and grid job submission; Swift adds support for high-level abstract specification of large parallel computations, data abstraction, and workflow restart, reliable execution over multiple -grid sites, and (via Falkon~\cite{Falkon_2008} and CoG coasters) -\katznote{need to talk about what CoG coasters is vs coasters as previously introduced, or clear up the fact that the previous ``coasters'' didn't talk about CoG.} fast job execution. +grid sites, and (via Falkon~\cite{Falkon_2008} and coasters) +fast job execution. @@ -1762,10 +1762,10 @@ short. In such circumstances, execution time can become dominated by %GRAM and LRM program submission overhead. A resource provisioning system such as -Falkon or the CoG coaster mechanism developed for +Falkon or the coaster mechanism developed for Swift can be used to ameliorate this overhead, by incurring the allocation overhead once per worker node. Both of these mechanisms can -be plugged into Swift straightforwardly through the CoG provider API. +be plugged into Swift straightforwardly through the CoG provider API~\cite{karajan}. \subsection{Scripting on thousands to millions of cores} @@ -1785,7 +1785,7 @@ Similarly, some applications deal with files that are uncomfortably small for GridFTP (on the order of tens of bytes). In this situation, a -lightweight file access mechanism provided by CoG Coasters can be +lightweight file access mechanism provided by Coasters can be substituted for GridFTP. When running on HPC resources, the thousands of small accesses to the filesystem may create a bottleneck. To approach this problem, we have investigated application needs and From noreply at svn.ci.uchicago.edu Thu Jan 6 16:02:16 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 16:02:16 -0600 (CST) Subject: [Swift-commit] r3876 - text/parco10submission Message-ID: <20110106220216.DB1609CC9B@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-06 16:02:16 -0600 (Thu, 06 Jan 2011) New Revision: 3876 Modified: text/parco10submission/paper.tex Log: another small change Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 22:01:22 UTC (rev 3875) +++ text/parco10submission/paper.tex 2011-01-06 22:02:16 UTC (rev 3876) @@ -1765,7 +1765,7 @@ Falkon or the coaster mechanism developed for Swift can be used to ameliorate this overhead, by incurring the allocation overhead once per worker node. Both of these mechanisms can -be plugged into Swift straightforwardly through the CoG provider API~\cite{karajan}. +be plugged into Swift straightforwardly through the CoG provider API~\cite{Karajan}. \subsection{Scripting on thousands to millions of cores} From noreply at svn.ci.uchicago.edu Thu Jan 6 16:10:10 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 16:10:10 -0600 (CST) Subject: [Swift-commit] r3877 - text/parco10submission Message-ID: <20110106221010.8AE859CC9B@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-06 16:10:08 -0600 (Thu, 06 Jan 2011) New Revision: 3877 Modified: text/parco10submission/paper.bib text/parco10submission/paper.tex Log: adding coasters wiki as a ref Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-06 22:02:16 UTC (rev 3876) +++ text/parco10submission/paper.bib 2011-01-06 22:10:08 UTC (rev 3877) @@ -2,6 +2,11 @@ % $Id$ % + at misc{coasters, +author={Mihael Hategan}, +note={\url{http://wiki.cogkit.org/wiki/Coasters}} +} + @inproceedings{VDS, author = {Ian Foster and Jens Voeckler and Michael Wilde and Yong Zhao}, title = {{Chimera: A Virtual Data System for Representing, Querying, Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 22:02:16 UTC (rev 3876) +++ text/parco10submission/paper.tex 2011-01-06 22:10:08 UTC (rev 3877) @@ -1087,7 +1087,7 @@ Swift offers two approaches: \emph{clustering} and \emph{coasters}. Clustering constructs job submissions that contain a number of component program executions, rather than just submitting jobs one-at-a-time. -Coasters is a form of multi-level scheduling similar to pilot jobs~\cite{Condor-G_2002}. +Coasters~\cite{coasters} is a form of multi-level scheduling similar to pilot jobs~\cite{Condor-G_2002}. It submits generic coaster jobs to a site, and binds component program executions to the coaster jobs (and thus to worker nodes) as these coaster jobs begin remote execution. From noreply at svn.ci.uchicago.edu Thu Jan 6 16:31:49 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 16:31:49 -0600 (CST) Subject: [Swift-commit] r3878 - text/parco10submission Message-ID: <20110106223149.D3E849CC9B@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-06 16:31:49 -0600 (Thu, 06 Jan 2011) New Revision: 3878 Modified: text/parco10submission/paper.bib Log: Changed Karajan ref to be 2007 paper from Workflows for e-Science book. Needs further adjustment - doesnt show Editors correctly in reference list. Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-06 22:10:08 UTC (rev 3877) +++ text/parco10submission/paper.bib 2011-01-06 22:31:49 UTC (rev 3878) @@ -190,7 +190,7 @@ pages = {237--247} } - at article {Karajan, + at article {OLDKarajan, author = {von Laszewski, Gregor and Hategan, Mike}, affiliation = {Argonne National Laboratory Mathematics and Computer Science Division, Argonne National Laboratory 9700 S. Cass Ave. Argonne IL 60440 USA}, title = {Workflow Concepts of the {Java CoG} Kit}, @@ -206,6 +206,25 @@ year = {2005} } + at INCOLLECTION{Karajan, + chapter = {Java CoG Kit Workflow}, + pages = {341-356}, + author = {von Laszewski, Gregor and Hategan, Mike and Kodeboyina, Deepti}, + title = {Workflows for e-Science}, + publisher = {Springer}, + year = {2007}, + editor = {Ian Taylor and Ewa Deelman and Dennis Gannon and Matthew Shields} +} + + at BOOK{Workflows_2007, + title = {Workflows for {e-Science}}, + publisher = {Springer}, + year = {2007}, + editor = {Ian Taylor and Ewa Deelman and Dennis Gannon and Matthew Shields}, + owner = {wozniak}, + timestamp = {2009.09.02} +} + @techreport{NPSOL, title={User's Guide for {NPSOL} (Version 4.0): A Fortran Package for Nonlinear Programming}, institution={Stanford University Systems Optimization Lab}, From noreply at svn.ci.uchicago.edu Thu Jan 6 16:33:54 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 16:33:54 -0600 (CST) Subject: [Swift-commit] r3879 - text/parco10submission Message-ID: <20110106223354.96D2E9CC9B@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-06 16:33:54 -0600 (Thu, 06 Jan 2011) New Revision: 3879 Modified: text/parco10submission/paper.tex Log: minor edits. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 22:31:49 UTC (rev 3878) +++ text/parco10submission/paper.tex 2011-01-06 22:33:54 UTC (rev 3879) @@ -612,6 +612,8 @@ \subsection{Ordering of execution and implicit parallelism} \label{ordering} +\mikenote{Rename this as Parallelism model?; stress and show how highly parallel the model is - the idea that the workflow is fully expanded but throttled.} + Since all variables and collection elements are single-assignment, %they can be assigned a value at most once during the execution of a script. a function or expression can be executed when all of its input From noreply at svn.ci.uchicago.edu Thu Jan 6 16:40:54 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 16:40:54 -0600 (CST) Subject: [Swift-commit] r3880 - text/parco10submission Message-ID: <20110106224054.DB95D9CC9B@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-06 16:40:54 -0600 (Thu, 06 Jan 2011) New Revision: 3880 Modified: text/parco10submission/paper.bib Log: fixing the workflow book chapter ref Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-06 22:33:54 UTC (rev 3879) +++ text/parco10submission/paper.bib 2011-01-06 22:40:54 UTC (rev 3880) @@ -206,11 +206,12 @@ year = {2005} } - at INCOLLECTION{Karajan, - chapter = {Java CoG Kit Workflow}, - pages = {341-356}, + at incollection{Karajan, + chapter={21}, + title = {Java {CoG} Kit Workflow}, + pages = {341--356}, author = {von Laszewski, Gregor and Hategan, Mike and Kodeboyina, Deepti}, - title = {Workflows for e-Science}, + booktitle = {Workflows for e-Science}, publisher = {Springer}, year = {2007}, editor = {Ian Taylor and Ewa Deelman and Dennis Gannon and Matthew Shields} From noreply at svn.ci.uchicago.edu Thu Jan 6 17:19:27 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 17:19:27 -0600 (CST) Subject: [Swift-commit] r3881 - in branches/release-0.92/src/org/griphyn/vdl/karajan/lib: . replication Message-ID: <20110106231927.B16449CC9B@svn.ci.uchicago.edu> Author: hategan Date: 2011-01-06 17:19:27 -0600 (Thu, 06 Jan 2011) New Revision: 3881 Modified: branches/release-0.92/src/org/griphyn/vdl/karajan/lib/Execute.java branches/release-0.92/src/org/griphyn/vdl/karajan/lib/replication/ReplicationManager.java Log: removed job info message: it is available by logging abstraction classes; use setStack(); remove done jobs from running map in ReplicationManager; also remove canceled jobs from running map in ReplicationManager to prevent repeated cancelling in case provide does not properly implement cancel() Modified: branches/release-0.92/src/org/griphyn/vdl/karajan/lib/Execute.java =================================================================== --- branches/release-0.92/src/org/griphyn/vdl/karajan/lib/Execute.java 2011-01-06 22:40:54 UTC (rev 3880) +++ branches/release-0.92/src/org/griphyn/vdl/karajan/lib/Execute.java 2011-01-06 23:19:27 UTC (rev 3881) @@ -15,6 +15,7 @@ import org.globus.cog.karajan.util.TypeUtil; import org.globus.cog.karajan.workflow.ExecutionException; import org.globus.cog.karajan.workflow.KarajanRuntimeException; +import org.globus.cog.karajan.workflow.events.EventBus; import org.globus.cog.karajan.workflow.futures.FutureVariableArguments; import org.globus.cog.karajan.workflow.nodes.grid.GridExec; import org.griphyn.vdl.karajan.lib.replication.CanceledReplicaException; @@ -48,9 +49,7 @@ registerReplica(stack, task); log(task, stack); scheduler.addJobStatusListener(this, task); - synchronized (tasks) { - tasks.put(task, stack); - } + setStack(task, stack); scheduler.enqueue(task, constraints); } catch (CanceledReplicaException e) { @@ -72,19 +71,6 @@ if (logger.isDebugEnabled()) { logger.debug("jobid="+jobid+" task=" + task); } - else if (logger.isInfoEnabled()) { - Specification spec = task.getSpecification(); - if (spec instanceof JobSpecification) { - JobSpecification jobspec = (JobSpecification) spec; - logger.info("Submit: " + - "in: " + jobspec.getDirectory() + - " command: " + jobspec.getExecutable() + - " " + jobspec.getArguments()); - } - else { - logger.info("Submit: " + spec); - } - } } protected void registerReplica(VariableStack stack, Task task) throws CanceledReplicaException { @@ -115,6 +101,9 @@ getReplicationManager(stack).active(task, e.getStatus().getTime()); ((FutureVariableArguments) A_REPLICATION_CHANNEL.getValue(stack)).close(); } + else if (e.getStatus().isTerminal()) { + getReplicationManager(stack).terminated(task); + } else if (c == ReplicationManager.STATUS_NEEDS_REPLICATION) { RuntimeStats.setProgress(stack, "Replicating"); ((FutureVariableArguments) A_REPLICATION_CHANNEL.getValue(stack)).append(Boolean.TRUE); Modified: branches/release-0.92/src/org/griphyn/vdl/karajan/lib/replication/ReplicationManager.java =================================================================== --- branches/release-0.92/src/org/griphyn/vdl/karajan/lib/replication/ReplicationManager.java 2011-01-06 22:40:54 UTC (rev 3880) +++ branches/release-0.92/src/org/griphyn/vdl/karajan/lib/replication/ReplicationManager.java 2011-01-06 23:19:27 UTC (rev 3881) @@ -5,7 +5,6 @@ import java.util.Date; import java.util.HashMap; -import java.util.Iterator; import java.util.Map; import org.apache.log4j.Logger; @@ -30,7 +29,7 @@ private int n; private long s; private long s2; - private Map queued, running; + private Map queued, running; private int minQueueTime, limit; private boolean enabled; private ReplicationGroups replicationGroups; @@ -39,8 +38,8 @@ public ReplicationManager(Scheduler scheduler) { this.replicationGroups = new ReplicationGroups(scheduler); this.scheduler = scheduler; - queued = new HashMap(); - running = new HashMap(); + queued = new HashMap(); + running = new HashMap(); try { minQueueTime = Integer.parseInt(VDL2Config.getConfig().getProperty( "replication.min.queue.time")); @@ -80,7 +79,7 @@ if (enabled) { Date submitted; synchronized (this) { - submitted = (Date) queued.remove(task); + submitted = queued.remove(task); registerRunning(task, time); if (submitted != null) { long delta = (time.getTime() - submitted.getTime()) / 1000; @@ -104,7 +103,9 @@ seconds = WallTime.timeToSeconds(walltime.toString()); } Date deadline = new Date(time.getTime() + WALLTIME_DEADLINE_MULTIPLIER * seconds * 1000); - running.put(task, deadline); + synchronized (this) { + running.put(task, deadline); + } } public synchronized int getN() { @@ -130,26 +131,21 @@ } public void checkTasks() { - Map m, r; + Map m, r; synchronized (this) { - m = new HashMap(queued); - r = new HashMap(running); + m = new HashMap(queued); + r = new HashMap(running); } - Iterator i; - i = m.entrySet().iterator(); - while (i.hasNext()) { - Map.Entry e = (Map.Entry) i.next(); - Task t = (Task) e.getKey(); - Date d = (Date) e.getValue(); + for (Map.Entry e : m.entrySet()) { + Task t = e.getKey(); + Date d = e.getValue(); if (shouldBeReplicated(t, d)) { replicationGroups.requestReplica(t); } } - i = r.entrySet().iterator(); - while (i.hasNext()) { - Map.Entry e = (Map.Entry) i.next(); - Task t = (Task) e.getKey(); - Date d = (Date) e.getValue(); + for (Map.Entry e : r.entrySet()) { + Task t = e.getKey(); + Date d = e.getValue(); if (isOverDeadline(t, d)) { logger.info(t + ": deadline passed. Cancelling job."); cancelTask(t); @@ -183,5 +179,15 @@ private void cancelTask(Task t) { scheduler.cancelTask(t, "Walltime exceeded"); + // prevent repeated cancelling in case the provider doesn't support cancel() + synchronized (this) { + running.remove(t); + } } + + public void terminated(Task task) { + synchronized (this) { + running.remove(task); + } + } } \ No newline at end of file From noreply at svn.ci.uchicago.edu Thu Jan 6 17:34:54 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 17:34:54 -0600 (CST) Subject: [Swift-commit] r3882 - text/parco10submission Message-ID: <20110106233454.A455C9CC9B@svn.ci.uchicago.edu> Author: hategan Date: 2011-01-06 17:34:54 -0600 (Thu, 06 Jan 2011) New Revision: 3882 Modified: text/parco10submission/paper.tex Log: made a comment on garbage collection and changed one instance of "composite type" to "collection type" Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-06 23:19:27 UTC (rev 3881) +++ text/parco10submission/paper.tex 2011-01-06 23:34:54 UTC (rev 3882) @@ -7,6 +7,7 @@ \usepackage{framed} \newcommand{\katznote}[1]{ {\textcolor{cyan} { ***Dan: #1 }}} \newcommand{\mikenote}[1]{ {\textcolor{red} { ***Mike: #1 }}} +\newcommand{\mihaelnote}[1]{ {\textcolor{green} { ***Mihael: #1 }}} \newcommand{\hide}[1]{ {{}}} \definecolor{shadecolor}{RGB}{240,255,200} @@ -391,12 +392,14 @@ Both types of collections can contain members of atomic or collection types. Structures contain a finite number of elements. Arrays contain a varying number of elements. Structures and arrays can both recursively reference other structures and arrays in addition to atomic values. Arrays can be nested to provide multi-dimensional indexing. Due to the dynamic, highly parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. Also, since all data elements have single-assignment semantics, no garbage collection issues arise. \katznote{does this follow? garbage collection removed variables that are no longer needed - I don't see how single assignment helps here.} +\mihaelnote{I think we should not mention the garbage collection issue. In fact, we don't and we should implement +garbage collection at the "dual" level (i.e., clean temp files) as well as remove unused futures from memory} Variables that are declared to be file references are associated with a \emph{mapper}, which defines (often through a dynamic lookup process) the data files that are to be mapped to the variable. Array and structure elements that are declared to be file references are similarly mapped. -Mapped type and composite \katznote{I don't know what composite means here} +Mapped type and collection \katznote{I don't know what composite means here}\mihaelnote{changed "composite type" to "collection type" as introduced earlier} type variable declarations can be annotated with a \emph{mapping} descriptor that specify the file(s) that are to be mapped to the Swift data element(s). From noreply at svn.ci.uchicago.edu Thu Jan 6 21:11:06 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 21:11:06 -0600 (CST) Subject: [Swift-commit] r3883 - in branches/release-0.92/tests: groups providers/local-cobalt providers/local-cobalt/surveyor Message-ID: <20110107031106.BE1C39CC9B@svn.ci.uchicago.edu> Author: davidk Date: 2011-01-06 21:11:06 -0600 (Thu, 06 Jan 2011) New Revision: 3883 Added: branches/release-0.92/tests/groups/group-surveyor.sh branches/release-0.92/tests/providers/local-cobalt/surveyor/ branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.check.sh branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.setup.sh branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.swift branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.timeout branches/release-0.92/tests/providers/local-cobalt/surveyor/README branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0001.out.expected branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0002.out.expected branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0003.out.expected branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0004.out.expected branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0005.out.expected branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0006.out.expected branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0007.out.expected branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0008.out.expected branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0009.out.expected branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0010.out.expected branches/release-0.92/tests/providers/local-cobalt/surveyor/data.txt branches/release-0.92/tests/providers/local-cobalt/surveyor/sites.template.xml branches/release-0.92/tests/providers/local-cobalt/surveyor/tc.template.data branches/release-0.92/tests/providers/local-cobalt/surveyor/title.txt Log: Site configuration test for surveyor Added: branches/release-0.92/tests/groups/group-surveyor.sh =================================================================== --- branches/release-0.92/tests/groups/group-surveyor.sh (rev 0) +++ branches/release-0.92/tests/groups/group-surveyor.sh 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1,8 @@ + +# GROUPLIST definition to run on Intrepid + +GROUPLIST=( $TESTDIR/providers/local \ + $TESTDIR/providers/local-cobalt/surveyor \ + ) + +checkvars WORK QUEUE PROJECT Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.check.sh =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.check.sh (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.check.sh 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1,12 @@ +#!/bin/bash + +set -x + +for count in `seq --format "%04.f" 1 1 10` +do + [ -f catsn.$count.out ] || exit 1 + CONTENTS1=$( cat catsn.$count.out.expected ) + CONTENTS2=$( cat catsn.$count.out ) + [[ $CONTENTS1 == $CONTENTS2 ]] || exit 1 +done +exit 0 Property changes on: branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.check.sh ___________________________________________________________________ Name: svn:executable + * Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.setup.sh =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.setup.sh (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.setup.sh 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1,4 @@ +#!/bin/bash + +cp -v $GROUP/data.txt . || exit 1 +cp -v $GROUP/*expected . || exit 1 Property changes on: branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.setup.sh ___________________________________________________________________ Name: svn:executable + * Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.swift =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.swift (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.swift 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1,15 @@ +type file; + +app (file o) cat (file i) +{ + cat @i stdout=@o; +} + +string t = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; +string char[] = @strsplit(t, ""); + +file out[]; +foreach j in [1:@toint(@arg("n","10"))] { + file data<"data.txt">; + out[j] = cat(data); +} Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.timeout =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.timeout (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.timeout 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +1000 Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/README =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/README (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/README 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1,3 @@ +Be sure to set PROJECT and QUEUE. These settings worked for me, but unsure if they are universal +export PROJECT=HTCScienceApps +export QUEUE=default Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0001.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0001.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0001.out.expected 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0002.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0002.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0002.out.expected 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0003.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0003.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0003.out.expected 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0004.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0004.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0004.out.expected 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0005.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0005.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0005.out.expected 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0006.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0006.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0006.out.expected 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0007.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0007.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0007.out.expected 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0008.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0008.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0008.out.expected 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0009.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0009.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0009.out.expected 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0010.out.expected =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0010.out.expected (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/catsn.0010.out.expected 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/data.txt =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/data.txt (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/data.txt 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +Hello world Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/sites.template.xml =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/sites.template.xml (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/sites.template.xml 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1,22 @@ + + + + + + _HOST_ + _PROJECT_ + _QUEUE_ + zeptoos + true + 21 + 10000 + 1 + DEBUG + 1 + 900 + 64 + 64 + _WORK_ + + + Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/tc.template.data =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/tc.template.data (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/tc.template.data 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1,7 @@ +surveyor echo /bin/echo INSTALLED INTEL32::LINUX +surveyor cat /bin/cat INSTALLED INTEL32::LINUX +surveyor ls /bin/ls INSTALLED INTEL32::LINUX +surveyor grep /bin/grep INSTALLED INTEL32::LINUX +surveyor sort /bin/sort INSTALLED INTEL32::LINUX +surveyor paste /bin/paste INSTALLED INTEL32::LINUX +surveyor wc /usr/bin/wc INSTALLED INTEL32::LINUX Added: branches/release-0.92/tests/providers/local-cobalt/surveyor/title.txt =================================================================== --- branches/release-0.92/tests/providers/local-cobalt/surveyor/title.txt (rev 0) +++ branches/release-0.92/tests/providers/local-cobalt/surveyor/title.txt 2011-01-07 03:11:06 UTC (rev 3883) @@ -0,0 +1 @@ +Surveyor Site Configuration Test From noreply at svn.ci.uchicago.edu Thu Jan 6 23:05:50 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 23:05:50 -0600 (CST) Subject: [Swift-commit] r3884 - text/parco10submission Message-ID: <20110107050550.BC0219CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-06 23:05:50 -0600 (Thu, 06 Jan 2011) New Revision: 3884 Modified: text/parco10submission/paper.tex Log: Extended acknowlegements. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 03:11:06 UTC (rev 3883) +++ text/parco10submission/paper.tex 2011-01-07 05:05:50 UTC (rev 3884) @@ -2,9 +2,17 @@ \documentclass[preprint,12pt]{elsarticle} \usepackage{graphicx} +\usepackage{fancyvrb} \usepackage{xcolor} \usepackage{framed} + +\usepackage{float} + +\floatstyle{ruled} +\newfloat{script}{thp}{lop} +\floatname{script}{Example script} + \newcommand{\katznote}[1]{ {\textcolor{cyan} { ***Dan: #1 }}} \newcommand{\mikenote}[1]{ {\textcolor{red} { ***Mike: #1 }}} \newcommand{\mihaelnote}[1]{ {\textcolor{green} { ***Mihael: #1 }}} @@ -1162,367 +1170,13 @@ executable. Swift's existing input file management then stages-in the application files once per site per run. +\pagebreak \section{Applications} \label{Applications} -% TODO: two or three applications in brief. discuss both the application -% behaviour in relation to Swift, but underlying grid behaviour in -% relation to Swift - -% One app: CNARI + TeraGrid - small jobs (3s), many of them. - -% Another app: Rosetta on OSG? OSG was designed with a focus on -% heterogeneity between sites. Large number of sites; automatic site file -% selection; and automatic app deployment there. - This section describes a few representative Swift applications from various diverse disciplines. -% \subsection{BLAST Application Example} - -% % The following is notes from the Wiki by Allan: needs much refinement, adding here as a placeholder. - -% \begin{verbatim} -% type database; -% type query; -% type output; -% type error; - -% app (output out, error err) blastall(query i, database db) { -% blastall "-p" "blastp" "-F" "F" -% "-d" @filename(db) "-i" @filename(i) -% "-v" "300" "-b" "300" "-m8" -% "-o" @filename(out) stderr=@filename(err); -% } - -% database pir ; - -% query i <"test.in">; -% output out <"test.out">; -% error err <"test.err">; - -% (out,err) = blastall(i, pir); -% \end{verbatim} - -% The application {\tt \small blastall} expects the prefix of the database files that it will read (.phr, .seq and .pin files). -% This example employs a dummy file called {\tt \small -% UNIPROT.14.0.seq} to satisfy the data dependency. When executed, -% the Swift script processes the following input directory {\tt\small /ci/pir}: - -% \begin{verbatim} -% -rw-r--r-- 1 ben ci 0 Nov 15 13:49 UNIPROT.14.0.seq -% -rw-r--r-- 1 ben ci 204106872 Oct 20 16:50 UNIPROT.14.0.seq.00.phr -% -rw-r--r-- 1 ben ci 23001752 Oct 20 16:50 UNIPROT.14.0.seq.00.pin -% -rw-r--r-- 1 ben ci 999999669 Oct 20 16:51 UNIPROT.14.0.seq.00.psq -% -rw-r--r-- 1 ben ci 233680738 Oct 20 16:51 UNIPROT.14.0.seq.01.phr -% -rw-r--r-- 1 ben ci 26330312 Oct 20 16:51 UNIPROT.14.0.seq.01.pin -% -rw-r--r-- 1 ben ci 999999864 Oct 20 16:52 UNIPROT.14.0.seq.01.psq -% -rw-r--r-- 1 ben ci 21034886 Oct 20 16:52 UNIPROT.14.0.seq.02.phr -% -rw-r--r-- 1 ben ci 2370216 Oct 20 16:52 UNIPROT.14.0.seq.02.pin -% -rw-r--r-- 1 ben ci 103755125 Oct 20 16:52 UNIPROT.14.0.seq.02.psq -% -rw-r--r-- 1 ben ci 208 Oct 20 16:52 UNIPROT.14.0.seq.pal -% \end{verbatim} - -% % I looked at the dock6 documentation for OSG. It looks that it recommends to transfer the datafiles to OSG sites manually via globus-url-copy. By my understanding of how swift works, it should be able to transfer my local files to the selected sites. I have yet to try this and will look more on examples in the data management side of Swift. - -% % Do you know other users who went in this approach? The documentation has only a few examples in managing data. I'll check the swift Wiki later and see what material we have and also post this email/ notes. - -\subsection{fMRI Application Example} - -\begin{figure}[htbp] - \begin{center} - \includegraphics[scale=1]{img/IMG_fmridataset} - \caption{fMRI application data structure\label{FMRI_app_image}} - \end{center} -\end{figure} - -\begin{figure}[htbp] -\begin{verbatim} -type Study { Group g[]; } -type Run { Volume v[]; } -type Volume { - Image img; - Header hdr; -} -type Group { Subject s[]; } -type AirVector { Air a[]; } -type Subject { - Volume anat; - Run run[]; -} - -(Run resliced) reslice_wf ( Run r) { - Run yR = reorientRun( r , "y", "n" ); - Run roR = reorientRun( yR , "x", "n" ); - Volume std = roR.v[1]; - AirVector roAirVec = alignlinearRun(std, roR, - 12, 1000, 1000, "81 3 3"); - resliced = resliceRun( roR, roAirVec, "-o", - "-k"); -} - -app (Volume ov) reorient (Volume iv, - string direction, string overwrite) { - - reorient @filename(iv.hdr) @filename(ov.hdr) - direction overwrite; - -} - -(Run or) reorientRun (Run ir, string direction, - string overwrite) { - foreach Volume iv, i in ir.v { - or.v[i] = reorient (iv, direction, overwrite); - } -} - -\end{verbatim} - \caption{fMRI application in Swift\label{FMRI_app_script}} -\end{figure} - -In this example, the logical structure of the fMRI dataset shown in -Figure~\ref{FMRI_app_image} can be represented by the Swift type declarations in -lines 1-6 in Figure~\ref{FMRI_app_script}. Here, Study is declared as containing an array -of Group, which in turn contains an array of Subject, etc. Similarly, -an fMRI Run is a series of brain scans called volumes, with a Volume -containing a 3D image of a volumetric slice of a brain image, -represented by an Image (voxels) and a Header (scanner metadata). An -Air is a parameter file for spatial adjustment, and an AirVector is a -set of such parameter files. Datasets are operated on by functions, -which take typed data described by a mapper, perform computations -on those data, and produce data to be stored in locations specified -by a mapper. The -function {\tt reslice\_wf} defines a compound function, which comprises -a series of function calls, using variables to establish -data dependencies. - -In the example, {\tt reslice\_wf} defines a four-step pipeline computation, -using variables to establish data dependencies. It applies reorientRun -to a run first in the x axis and then in the y axis, and then aligns -each image in the resulting run with the first image. The program -{\tt alignlinear} \katznote{should this be alignlinearRun?} determines how to spatially adjust an image to match a -reference image, and produces an air parameter file. The actual -alignment is done by the program {\tt reslice}. -\katznote{or resliceRun?} Note that variable {\tt yR} is -the output of the first step and the input of the second step, and as such, defines -the data dependencies between the two steps. %The pipeline is -%illustrated in the center of % Figure~\ref{FMRIFigure2}, -%while in figure % \ref{FMRIgraph} -%we show the expanded graph for a 20-volume run. Each -%volume comprises an image file and a header file, so there are a total -%of 40 input files and 40 output files. We can also apply the same -%function to a run containing hundreds or thousands of volumes. - -%In this example we show the details of -The function reorientRun -is also a compound function. -The foreach statement defines an iteration over the input run -{\tt ir} and applies the function {\tt reorient} (which rotates a brain image -along a certain axis) to each volume in the run to produces a -reoriented run or. Because the multiple calls to reorient operate on -independent data elements, they can proceed in parallel. - -The function reorient in this example is an atomic function. -This function has typed input parameters ({\tt iv}, {\tt direction}, and {\tt overwrite}) and -one output ({\tt ov}). The body specifies that it -invokes a program (also called reorient) that will be -dynamically mapped to a binary executable, which will -execute at an execution site chosen by the Swift runtime system. The -body also specifies how input parameters map to command line -arguments. The notation {\tt @filename} is a built-in mapping function that -maps a logical data structure to a physical file name. In this case, -it extracts the file name of input header and output header, which are -then put in the command line to invoke the reorient program. - -\subsection{Structural Equation Modeling using OpenMx} - -% \cite{OpenMx} - -OpenMx is an R library designed for structural equation modeling (SEM), -a technique currently used in the neuroimaging field to examine -connectivity between brain areas. - -Structural Equation Modeling is greatly enhanced when coupled with -grid resources and a workflow management system. Traditionally, -structural equation models have been derived from anatomical models -based on primate brains out of necessity. It was infeasible to test -models outside of the hypothetical anatomical model space due to -restrictions in resources. In contrast, Swift can be used implement a -scriptable, high-level means for not only testing but generating -exploratory models in parallel on large clusters, making the testing of -models outside the (anatomical) hypothesis space a more reasonable goal. -In light of this, we have developed a ``model generator'' to allow a -researcher to test all models within a space of potential connections -without a predefined anatomical model. In the absence of a large-scale -infrastructure, this would not be doable. For example, within the CNARI -submit cluster at Chicago, \katznote{is this where jobs are run? Or just submitted from? If the latter, where are they run?} we have a relatively simple Swift script for -calling OpenMx to generate and process models in parallel. - -\begin{figure}[htbp] - \begin{center} - \includegraphics{img/omxFigure} - \caption{Schematic of a single OpenMx model containing 4 - regions of interest (I through L) with 5 regression starting - values (asymmetric connections) of weight 0.75 and 4 - residual variances (symmetric connections) of weight 1.0 - \label{omxFigure}} - \end{center} -\end{figure} - -Using OpenMx's model generator---a set of functions that create -self-contained, structural equation models---we generated 65,535 R -objects representing all models with 1 to 16 connections -of varying weights between 4 pre-selected regions of interest. -A 4~x~4 matrix represents connections between the four regions, -with 16 possible connections (connections between the same two -regions but in different directions are tested separately). -\katznote{should refer to figure~\ref{omxFigure} here? If so, the residual variances -part doesn't seem to match this text} -We queried -our experiment database for activation values based on the selected -regions of interest, and the covariance of those regions over 8 time -points during the emblem \katznote{huh?} experiment was calculated. The covariance of -each generated model was then compared to the covariance matrix of the -observed data to determine the best-fitting model. In other words, the -connection weights (or strength of the relationships between anatomical -regions) can be explored based on the fit of each model. - -%modgenproc.swift -A Swift script is used to submit each of the necessary computation -components to TeraGrid's Ranger cluster: a) the model object b) the -covariance matrix derived from the database and c) the R script which -makes the call to OpenMx. Once the job is assigned to a node, OpenMx -estimates weight parameters for each connection within the given model -that results in a model covariance closest to the observed covariance of -the data. Each of these compute jobs returns its solution model object -as well as a file containing the minimum value achieved from that model. -The processing of these models on Ranger was achieved in less than -45 minutes. - -A model generator was developed for the OpenMx package and is designed -explicitly to enable parallel execution of exhaustive or partially -pruned sets of model objects. Given an $n$~x~$n$ covariance matrix it can -generate the entire set of possible models with anywhere from 0 to $n^2$ -connections; however, it can also take as input a single index from -that set and it will generate and run a single model. What this means -in the context of workflow design is that the generator can be -controlled (and parallelized) easily by a Swift script, using the few lines of -code in Figure~\ref{omxScript1}. - -\begin{figure} -\begin{verbatim} - -1. app (mxModel min) mxModelProcessor(file covMatrix, - Rscript mxModProc, int modnum, float initweight, - string cond){ -2. { -3. RInvoke @filename(mxModProc) @filename(covMatrix) modnum - initweight cond; -4. } -5. file covMatrix; -6. Rscript mxScript; -7. int totalperms[] = [1:65536]; -8. float initweight = .5; -9. foreach perm in totalperms{ -10. mxModel modmin; -11. modmin = mxModelProcessor(covMatrix, mxScript, perm, - initweight, speech); -12. } -\end{verbatim} -\caption{Swift script for 4-region exhaustive SEM for a single experimental condition\label{omxScript1}} -\end{figure} - -First, a covariance matrix containing activation data for 4 brain regions, -over 8 time points, averaged over a group of subjects in the speech -condition was drawn from the experiment database and its location -(in this example, on the local file system, though the file could be located -anywhere) is mapped in line 5. Line 6 maps the R processing script and -lines 1 through 4 define the atomic function for invoking R. The script -includes a foreach loop, each iteration -of which maps its optimized model output file and calls -mxModelProcessor() with the necessary parameters to generate and -run a model. Each of these invocations of mxModelProcessor() is -independent and is submitted for processing in parallel. Swift passes -5 variables for each invocation: (1) the covariance matrix; (2) the R -script containing the call to OpenMx; (3) the permutation number, -i.e., the index of the model; (4) the initialization weight for the free -parameters of the given model; and (5) the experimental condition. -Clearly, in this workflow all free parameters of the given model will -have the same initialization weight as Swift is passing only one weight -variable. When the job reaches a worker node (for example, on the -TeraGrid Ranger system at TACC), an R process is initialized, the -generator creates the desired model by calculating where in the array -that permutation of the model matrix falls. OpenMx then estimates the -model parameters using a non-linear optimization algorithm called -NPSOL~\cite{NPSOL} -and the optimized model is returned and written out by Swift to the -location specified in its mapping on line 10. - -This script completed in approximately 40 minutes. The script can -then be altered to run over multiple experimental conditions by adding -another outer loop, as shown in Figure~\ref{omxScript2}. -With the outer loop, the new workflow consists of 131,072 -jobs, since we are now running the entire set for two conditions. -This workflow completed in approximately 2 hours - -\begin{figure} -\begin{verbatim} -1. string conditions[] = ["emblem", "speech"]; -2. int totalperms[] = [1:65536]; -3. float initweight = .5; -4. foreach cond in conditions{ -5. foreach perm in totalperms{ -6. file covMatrix; -7. mxModel modmin; -8. modmin= mxModelProcessor(covMatrix, mxScript, perm, - initweight, cond); -9. } -\end{verbatim} -\caption{Swift script for 4-region exhaustive SEM for 2 experimental conditions\label{omxScript2}} -\end{figure} - - -\subsection{Molecular Dynamics with DOCK} - -\begin{figure} -\begin{verbatim} - -app (file t,DockOut tarout) dock (DockIn infile, string targetlist) { - dock6 @infile targetlist stdout=@filename(t) @tarout; -} - -type params { - string ligands; - string targets; -} - -params pset[] ; - -runDocks(params pset[]) -{ - foreach params,i in pset { - DockIn infile < single_file_mapper; - file=@strcat("/ci/dock/db/KEGGDrugs/",pset[i].ligands)>; - file sout ; - DockOut docking ; - (sout,docking) = dock(infile,pset[i].targetlist); - } -} - -params p[]; -p = readdata("paramslist.txt"); -runDocks(p); -\end{verbatim} -\caption{Swift script for running Dock\label{DockScript}} -\end{figure} - -\katznote{no text in this subsection -- no reference to Figure~\ref{DockScript}} - \subsection{Satellite image data processing.} The last example (which come from a class project) processes @@ -1546,62 +1200,214 @@ \katznote{``above'' isn't above, it's in the script below, which isn't at all described.} -\begin{verbatim} -type file; -type imagefile; -type landuse; +\pagebreak +Swift example 1: MODIS satellite image processing script +\begin{Verbatim}[fontsize=\scriptsize,frame=single,framesep=2mm,gobble=7, numbers=left] + 1 type file; + 2 type imagefile; + 3 type landuse; + 4 + 5 # Define application program interfaces + 6 + 7 app (landuse output) getLandUse (imagefile input, int sortfield) + 8 { + 9 getlanduse @input sortfield stdout=@output ; + 10 } + 11 + 12 app (file output, file tilelist) analyzeLandUse + 13 (landuse input[], string usetype, int maxnum) + 14 { + 15 analyzelanduse @output @tilelist usetype maxnum @filenames(input); + 16 } + 17 + 18 app (imagefile output) colorMODIS (imagefile input) + 19 { + 20 colormodis @input @output; + 21 } + 22 + 23 app (imagefile output) assemble + 24 (file selected, imagefile image[], string webdir) + 25 { + 26 assemble @output @selected @filename(image[0]) webdir; + 27 } + 28 + 29 app (imagefile grid) markMap (file tilelist) + 30 { + 31 markmap @tilelist @grid; + 32 } + 33 + 34 # Constants and command line arguments + 35 + 36 int nFiles = @toint(@arg("nfiles","1000")); + 37 int nSelect = @toint(@arg("nselect","12")); + 38 string landType = @arg("landtype","urban"); + 39 string runID = @arg("runid","modis-run"); + 40 string MODISdir= @arg("modisdir","/home/wilde/bigdata/data/modis/2002"); + 41 string webDir = @arg("webdir","/home/wilde/public_html/geo/"); + 42 + 43 string suffix=".tif"; + 44 + 45 # Input Dataset + 46 + 47 imagefile geos[] ; # site=site + 49 + 50 # Compute the land use summary of each MODIS tile + 51 + 52 landuse land[] ; + 54 + 55 foreach g,i in geos { + 56 land[i] = getLandUse(g,1); + 57 } + 58 + 59 # Find the top N tiles (by total area of selected landuse types) + 60 + 61 file topSelected<"topselected.txt">; + 62 file selectedTiles<"selectedtiles.txt">; + 63 (topSelected, selectedTiles) = analyzeLandUse(land, landType, nSelect); + 64 + 65 # Mark the top N tiles on a sinusoidal gridded map + 66 + 67 imagefile gridMap<"markedGrid.gif">; + 68 gridMap = markMap(topSelected); + 69 + 70 # Create multi-color images for all tiles + 71 + 72 imagefile colorImage[] ; + 75 + 76 foreach g, i in geos { + 77 colorImage[i] = colorMODIS(g); + 78 } + 79 + 80 # Assemble a montage of the top selected areas + 81 + 82 imagefile montage ; # @arg + 83 montage = assemble(selectedTiles,colorImage,webDir); -app (landuse output) getLandUse (imagefile input, int sortfield) -{ - getlanduse @input sortfield stdout=@output ; -} +\end{Verbatim} +%\end{verbatim} -app (file output, file tilelist) analyzeLandUse (landuse input[], int usetype, int maxnum) -{ - analyzelanduse @output @tilelist usetype maxnum @filenames(input); -} +\pagebreak +\subsection{Simulation of glassy dynamics and thermodynamics.} -app (imagefile output) colormodis (imagefile input) -{ - colormodis @input @output; -} +Recent study of the glass transition in model systems has focused on calculating from theory or simulation what is known as the "Mosaic length". Glen Hocky of the Reichman Lab at Columbia applied a new cavity method for measuring this length scale, where particles are simulated by molecular dynamics or Monte Carlo methods within cavities having amorphous boundary conditions. Various correlation functions are calculated at the interior of cavities of varying sizes and averaged over many independent simulations to determine a thermodynamic length. Hocky's simulations this method to investigate the differences between three different systems which all have the same "structure" but differ in other subtle ways to see if it is in fact this thermodynamic length that is there difference between the models. -imagefile geos[]; -landuse land[]; +Rather than run ~500K-1.5M steps per jobs (which a priori i didn't know how many i would run anyway) i ran 100K at a time. hence the repetitions of runs. But i would say the campaign started more like in october. if all the jobs are on pads then it'll be more obvious. -# Find the land use of each modis tile +As this simulation was a lengthy campaign (from about October through December 2010) Hocky chose to leverage Swift ``external'' mappers to determine what work remained during various restarts. His mappers assumed an application run was complete if all the returned ".final" files existed. In the case of script restarts, results that already existed were not computed. The swift restart mechanism was also tested and worked fine, but required tracking which workflow was being restarted. Occasionally missing files caused the restart to fail; Hocky's ad-hoc restart via mappers worked exceedingly well (and perhaps suggests a new approach for the integrated restart mechanism). -foreach g,i in geos { - land[i] = getLandUse(g,1); -} +A high-level description of the glass simulation campaign is as follows: -# Find the top 10 most urban tiles (by area) +loops are: 7 radii x 27 centers x 10 models x 1 job = 1690 jobs per run -int UsageTypeURBAN=13; -file bigurban<"topurban.txt">; -file urbantiles<"urbantiles.txt">; -(bigurban, urbantiles) = analyzeLandUse(land, UsageTypeURBAN, 10); +3 methods: kalj (16) kawka(37) pedersenipl (37) for total of 90 runs -# Map the files to an array +roughly 152,000 jobs defined by all the run*.sh scripts -string urbanfilenames[] = readData(urbantiles); +about 1-2 hours per job -imagefile urbanfiles[] ; +Approximate OSG usage over 100K cpus hours with about 100K tasks of 1-2 hours completed. App has been successfully run on about 18 OG (with the majority of runs have been completed on about 6 primary sites). -# Create a set of recolored images for just the urban tiles +This project would be completely unwieldy and much harder to organize without using Swift. -foreach uf, i in urbanfiles { - imagefile recoloredImage ; - recoloredImage = colormodis(uf); -} +Some runs were done on other resources including UChicago TeraGrid and the only change/addition necessary to run on OSG was configuring the OSG sites to run the science application. -imagefile geos[]; +Is currently investigating whether slightly more advanced techniques will be necessary, in which case I may need to run approximately the same amount of simulations again. -\end{verbatim} +\pagebreak +Swift example 2: Monte-Carlo simulation of quantum glass structures + +%\begin{verbatim} +\begin{Verbatim}[fontsize=\scriptsize,frame=single,framesep=2mm,gobble=7, numbers=left] + 1 type Arc; + 2 type Restart; + 3 type Log; + 4 + 5 type GlassIn{ + 6 Restart startfile; + 7 } + 8 + 9 type GlassOut{ + 10 Arc arcfile; + 11 Restart restartfile; + 12 Restart startfile; + 13 Restart final; + 14 Log logfile; + 15 } + 16 + 17 app (GlassOut o) glassCavityRun( + 18 GlassIn i, string rad, string temp, string steps, + 19 string volume, string fraca, string energyfunction, + 20 string centerstring, string arctimestring) + 21 { + 22 glassRun "-a" @filename(o.final) "--lf" @filename(i.startfile) + 23 "--temp" temp "--stepsperparticle" steps "--volume" volume + 24 "--fraca" fraca "--energy_function" energyfunction + 25 "--cradius" rad "--ccoord" centerstring arctimestring + 26 stdout=@filename(o.logfile); + 27 } + 28 + 29 CreateGlassSystem() + 30 { + 31 string temp=@arg("temp","2.0"); + 32 string steps=@arg("steps","10"); + 33 string esteps=@arg("esteps","100"); + 34 string ceqsteps=@arg("ceqsteps","100"); + 35 string natoms=@arg("natoms","200"); + 36 string volume=@arg("volume","200"); + 37 string rlist=@arg("rlist","rlist"); + 38 string clist=@arg("clist","clist"); + 39 string fraca=@arg("fraca","0.5"); + 40 string radii[] = readData(rlist); + 41 string centers[] = readData(clist); + 42 int nmodels=@toint( @arg("n","1") ); + 43 int nsub=@toint( @arg("nsub","1") ); + 44 string savearc=@arg("savearc","FALSE"); + 45 string arctimestring; + 46 if(savearc=="FALSE") { + 47 arctimestring="--arc_time=10000000"; + 48 } + 49 else{ + 50 arctimestring=""; + 51 } + 52 string energyfunction=@arg("energyfunction","softsphereratiosmooth"); + 53 + 54 GlassIn modelIn[][][] ; + 58 GlassOut modelOut[][][][] ; + 62 + 63 foreach rad,rindex in radii { + 64 foreach centerstring,cindex in centers { + 65 foreach model in [0:nmodels-1] { + 66 foreach job in [0:nsub-1] { + 67 string fname = + 68 @filename(modelOut[rindex][cindex][model][job].final) + 69 if (fname != "NULL") { + 70 modelOut[rindex][cindex][model][job] = + 71 glassCavityRun( modelIn[rindex][cindex][model], + 72 rad, temp, steps, volume, fraca, energyfunction, + 73 centerstring, arctimestring); + 74 } + 75 } + 76 } + 77 } + 78 } + 79 } + 80 + 81 + 82 CreateGlassSystem(); +\end{Verbatim} +%\end{verbatim} + \section{Performance Characteristics} \label{Performance} @@ -1892,9 +1698,11 @@ This research is supported in part by NSF grants OCI-721939 and OCI-0944332, and the U.S. Department of Energy under contract DE-AC02-06CH11357. Computing resources were provided by the Argonne -Leadership Computing Facility, TeraGrid, the Open Science Grid, the -Petascale Active Data Store, and Amazon Web Services. +Leadership Computing Facility, TeraGrid, the Open Science Grid, the UChicago Computation Institute +Petascale Active Data Store, and the Amazon Web Services Education program. +The quantum glass example in the article is the work of Glen Hocky of the Reichman Lab of the Columbia University Department of Chemistry. We thank Glen for many contributions and extremely valuable feedback to the Swift project. We gratefully acknowledge the contributions of current and former Swift team members and collaborators Sarah Kenny, Allan Espinosa, Zhao Zhang, David Kelly, Milena Nokolic, Jon Monette, Aashish Adhikari, Marc Parisien, Mats Rynge, Michael Kubal, and Tibi Stef-Praun, Xu Du, Zhengxiong Hou, and Xi Li. The initial implementation of Swift was the work of Yong Zhao and Mihael Hategan. + %% \section{TODO} %% Reference Swift as a follow-on project to VDL in VDS; how does XDTM fit @@ -1939,8 +1747,6 @@ %% ramble about separation of parallel execution concerns and dataflow spec %% in the same way that gph has a separation of same concerns... compare contrast -\mikenote{Ack or add: Sarah, David, Jon, Milena, Yong, Glen, Allan, Zhao, others???} - \bibliographystyle{elsarticle-num} \bibliography{paper,Wozniak} % for ACM SIGS style From noreply at svn.ci.uchicago.edu Thu Jan 6 23:48:18 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 6 Jan 2011 23:48:18 -0600 (CST) Subject: [Swift-commit] r3885 - text/parco10submission Message-ID: <20110107054818.9DCDA9CC9B@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-06 23:48:18 -0600 (Thu, 06 Jan 2011) New Revision: 3885 Modified: text/parco10submission/paper.tex Log: Resolved most katznotes and hatnotes. Added a mikenote to emphasize the uniqueness of the parallel model. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 05:05:50 UTC (rev 3884) +++ text/parco10submission/paper.tex 2011-01-07 05:48:18 UTC (rev 3885) @@ -364,8 +364,8 @@ invocation. Swift scripts similarly declare all output files that results from program invocations. This enables Swift to provide distributed, location-independent execution of external application programs. -The Swift parallel execution model is based on two concepts that are applied uniformly throughout the language. First, every Swift data element behaves like a \emph{future}. By ``data element'', we mean both the named variables exposed to Swift within a function's environment, such as its local variables, parameters, and returns, and the individual elements of array and structure collections. Second, all expressions in a Swift program are conceptually executed in parallel. Expressions (including function evaluations) wait for input values when they are required, and then set their result values as their computation proceeds. These fundamental concepts are discussed in more detail below. - +The Swift parallel execution model is based on two concepts that are applied uniformly throughout the language. First, every Swift data element behaves like a \emph{future}. By ``data element'', we mean both the named variables within a function's environment, such as its local variables, parameters, and returns, and the individual elements of array and structure collections. Second, all expressions in a Swift program are conceptually executed in parallel. Expressions (including function evaluations) wait for input values when they are required, and then set their result values as their computation proceeds. These fundamental concepts are discussed in more detail below. +\mikenote{This concept is a major highlight of the swift programming model - I meant to highlight it under "Execution model" but did not. We should do so.} % can be thought of as a massively-parallel lazy (ie, on-demand, or just in time) evaluation - say later on? \subsection{Data model} @@ -399,27 +399,31 @@ indices, but are sparse. Both types of collections can contain members of atomic or collection types. Structures contain a finite number of elements. Arrays contain a varying number of elements. Structures and arrays can both recursively reference other structures and arrays in addition to atomic values. Arrays can be nested to provide multi-dimensional indexing. -Due to the dynamic, highly parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. Also, since all data elements have single-assignment semantics, no garbage collection issues arise. \katznote{does this follow? garbage collection removed variables that are no longer needed - I don't see how single assignment helps here.} -\mihaelnote{I think we should not mention the garbage collection issue. In fact, we don't and we should implement -garbage collection at the "dual" level (i.e., clean temp files) as well as remove unused futures from memory} +Due to the dynamic, highly parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. +%Also, since all data elements have single-assignment semantics, no garbage collection issues arise. \katznote{does this follow? garbage collection removed variables that are no longer needed - I don't see how single assignment helps here.} +%\mihaelnote{I think we should not mention the garbage collection issue. In fact, we don't and we should implement +%garbage collection at the "dual" level (i.e., clean temp files) as well as remove unused futures from memory} +% Mike: I mentioned GC as it pertains to structures and arrays: since swift is single-assignment, structures and arrays can never get de-referenced and thus dont need to be GC'ed - *I think*. But I can see that internal objects like futures should be, and given that they dont, its best to steer clear of this issue for now. + Variables that are declared to be file references are associated with a \emph{mapper}, which defines (often through a dynamic lookup process) the data files that are to be mapped to the variable. Array and structure elements that are declared to be file references are similarly mapped. -Mapped type and collection \katznote{I don't know what composite means here}\mihaelnote{changed "composite type" to "collection type" as introduced earlier} +Mapped type and collection type variable declarations can be annotated with a \emph{mapping} descriptor that specify the file(s) that are to be mapped to the Swift data element(s). For example, the following line declares a variable named \verb|photo| of -type \verb|image|. Since image is a fileRef type \katznote{how do I know this? And, should ``fileRef'' have been defined 2 paragraphs ago?}, it additionally declares that the +type \verb|image|. Since image is a mapped file type, it additionally declares that the variable refers to a single file named \verb|shane.jpeg| +%\katznote{how do I know this? And, should ``fileRef'' have been defined 2 paragraphs ago?} \begin{verbatim} image photo <"shane.jpeg">; \end{verbatim} -We can declare {\tt image} to be an \emph{external file type}: \katznote{is this different from a fileRef type?} +We can declare {\tt image} to be an \emph{mapped file type}: \begin{verbatim} type image {}; @@ -458,7 +462,7 @@ im = sn.i; \end{verbatim} -\katznote{please check the above - I changed a couple of variables so ``i'' wasn't used twice for different things in the same example.} +%\katznote{please check the above - I changed a couple of variables so ``i'' wasn't used twice for different things in the same example.} \subsection{Execution model} @@ -620,10 +624,10 @@ \end{description} } -\subsection{Ordering of execution and implicit parallelism} +\subsection{Implicit parallelism} \label{ordering} -\mikenote{Rename this as Parallelism model?; stress and show how highly parallel the model is - the idea that the workflow is fully expanded but throttled.} +%\mikenote{Rename this as Parallelism model?; stress and show how highly parallel the model is - the idea that the workflow is fully expanded but throttled.} Since all variables and collection elements are single-assignment, %they can be assigned a value at most once during the execution of a script. @@ -907,7 +911,7 @@ portability), run in in any particular order with respect to other application invocations in a script (except those implied by data dependency), or that their working directories will or will not be -cleaned up after execution. \katznote{say something about apps should not cause side-effects?} +cleaned up after execution. In addition, applications should should strive to avoid side-effects which could both limit their location-independence and the determinism (either actual or de-facto) of the overall results of Swift script that call them. Consider the following \verb|app| declaration for the \verb|rotate| function: @@ -1061,14 +1065,8 @@ In such a case, Swift provides a \emph{restart log} that encapsulates which function invocations have been successfully completed. -\mikenote{What manual interv. and why???} -\katznote{Maybe ignore this, and just say: A subsequent Swift run may be started +A subsequent Swift run may be started with this restart log; this will avoid re-execution of already -executed invocations.} -After -appropriate manual intervention, -a subsequent Swift run may be started -with this restart log; this will avoid re-execution of already executed invocations. A different class of failure is when jobs are submitted to a site but @@ -1589,8 +1587,7 @@ scheduling Coasters workers using the standard job submission techniques and employing an internal IP network. -\mikenote{In order to achieve automatic parallelization in Swift, instead of using thunks (i.e., suspended computations), which yield lazy -evaluation, we employ futures, which result in eager parallelism. In this process, we trade the ability to efficiently deal with infinite structures for the ability to minimize computation time. It must, however, be noted that a middle ground exists: lazy futures (futures whose computation is delayed until a value is first needed).} \katznote{this is very confusing to me - it's mixing too many concepts and overloading lazy and eager.} +In order to achieve automatic parallelization in Swift, we ubiquitously employ futures and lightweight threads, which result in eager and massive parallelism but which has a large cost in terms of space and internal object management. We are exploring several alternatives to optimize this tradeoff and increase Swift scalability to ever larger task graphs. The solution space here includes ``lazy futures (whose computation is delayed until a value is first needed)'' and distributed task graphs with multiple, distributed evaluation engines running on separate compute nodes. \subsection{Filesystem access optimizations} From noreply at svn.ci.uchicago.edu Fri Jan 7 00:11:55 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 00:11:55 -0600 (CST) Subject: [Swift-commit] r3886 - text/parco10submission Message-ID: <20110107061155.D83089CC9B@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-07 00:11:55 -0600 (Fri, 07 Jan 2011) New Revision: 3886 Modified: text/parco10submission/paper.tex Log: resolved remoaining notes. Misc additional edits. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 05:48:18 UTC (rev 3885) +++ text/parco10submission/paper.tex 2011-01-07 06:11:55 UTC (rev 3886) @@ -358,15 +358,13 @@ pipelines (or more generally, graphs) of sub-functions. Unlike most other scripting languages, Swift expresses -invocations of ``ordinary programs''---technically, POSIX {\tt exec()} -operations---in a manner that explicitly declares the files and command-line +invocations of ``ordinary programs''--technically, POSIX {\tt exec()} +operations--in a manner that explicitly declares the files and command-line arguments that are the inputs of each program invocation. Swift scripts similarly declare all output files that results from program invocations. This enables Swift to provide distributed, location-independent execution of external application programs. -The Swift parallel execution model is based on two concepts that are applied uniformly throughout the language. First, every Swift data element behaves like a \emph{future}. By ``data element'', we mean both the named variables within a function's environment, such as its local variables, parameters, and returns, and the individual elements of array and structure collections. Second, all expressions in a Swift program are conceptually executed in parallel. Expressions (including function evaluations) wait for input values when they are required, and then set their result values as their computation proceeds. These fundamental concepts are discussed in more detail below. -\mikenote{This concept is a major highlight of the swift programming model - I meant to highlight it under "Execution model" but did not. We should do so.} -% can be thought of as a massively-parallel lazy (ie, on-demand, or just in time) evaluation - say later on? +The Swift parallel execution model is based on two concepts that are applied uniformly throughout the language. First, every Swift data element behaves like a \emph{future}. By ``data element'', we mean both the named variables within a function's environment, such as its local variables, parameters, and returns, and the individual elements of array and structure collections. Second, all expressions in a Swift program are conceptually executed in parallel. Expressions (including function evaluations) wait for input values when they are required, and then set their result values as their computation proceeds. These fundamental concepts of pervasive implicit parallelism and transparent location independence, along with natural manner in which Swift expresses the processing of files by applications as if they were ``in-memory'' objects, are the powerful aspects of Swift which make it unique among scripting tools. These aspects are elaborated in this section. \subsection{Data model} @@ -650,11 +648,12 @@ z=q(y); \end{verbatim} -Arrays in Swift are treated as collections of simple variables, in the sense that all array elements are single-assignment. +\hide{Arrays in Swift are treated as collections of simple variables, in the sense that all array elements are single-assignment. Once the value of an array element is set, then it cannot change. When all the values for the array which can be set (as determined by limited flow analysis) are -set, then the array is regarded as \emph{closed}. -\katznote{the few lines before this in this paragraph have been repeated from earlier in the section.} Statements that +set, then the array is regarded as \emph{closed}.} + +Statements that deal with the array as a whole will wait for the array to be closed before executing. An example of such an action is the expansion of the array values into an app command line. Thus, the closing of an array is the equivalent to setting a future variable, with respect to any statement that was waiting for the array itself to be assigned a value. However, a \verb|foreach| statement @@ -949,7 +948,7 @@ \section{Execution engine} \label{Execution} -Swift is implemented by compiling to a Karajan program~\cite{Karajan}, which provides +Swift is implemented by generating and executing a Karajan program~\cite{Karajan}, which provides several benefits: a lightweight threading model, futures, remote job execution, @@ -957,10 +956,10 @@ remote file transfer and data management. Both remote execution and data transfer and management functions are provided through generalized abstracted interfaces called \emph{providers}~\cite{Karajan}. -Data providers enable data transfer and management to be performed through a wide variety of protocols including direct local copying, GridFTP, HTTP, WebDAV, SCP, and FTP. -Execution providers enable job execution to take place using direct POSIX process fork, Globus GRAM, Condor (and Condor-G), PBS, SGE, SSH. -The Swift execution model can thus be extended by -adding new data providers and job execution providers. +\emph{Data providers} enable data transfer and management to be performed through a wide variety of protocols including direct local copying, GridFTP, HTTP, WebDAV, SCP, and FTP. +\emph{Execution providers} enable job execution to take place using direct POSIX process fork, Globus GRAM, Condor (and Condor-G), PBS, SGE, and SSH services. +The Swift execution model can be flexibly extended for novel and evolving computing environments by +implementing new data providers and/or job execution providers. \subsection{Executing on a remote site} \label{ExecutingSites} From noreply at svn.ci.uchicago.edu Fri Jan 7 10:17:35 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 10:17:35 -0600 (CST) Subject: [Swift-commit] r3887 - text/parco10submission Message-ID: <20110107161735.046C49CC7F@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-07 10:17:34 -0600 (Fri, 07 Jan 2011) New Revision: 3887 Modified: text/parco10submission/ResponseToReviews.txt text/parco10submission/paper.tex Log: changes, some responses to reviewers being handled Modified: text/parco10submission/ResponseToReviews.txt =================================================================== --- text/parco10submission/ResponseToReviews.txt 2011-01-07 06:11:55 UTC (rev 3886) +++ text/parco10submission/ResponseToReviews.txt 2011-01-07 16:17:34 UTC (rev 3887) @@ -49,17 +49,23 @@ On the other hand, I don't see much scientific merit in the paper. The paper reads more like a Swift user manual than a scientific paper. For the language design, the only thing that might be novel is the notion of mapped type, but I consider it to be quite minor. I also don't see any new ideas in the data-flow dependency based execution model. ->>> Novelty and scientific merit: +>>> Response: +We believe that our discussion of related work shows that there is no other language that does what Swift does. We also believe that the decisions we have made in creating Swift, as a simple minimal language with a function model for evaluating a large set of individual applications on both parallel and distributed systems of extreme scale using the concept of single-assignment futures to highly effectively exploit implicit parallelism provides the scientific merit of the paper. + <<< For the distributed execution, one important missing piece is performance evaluation. Data locality is very important for data-intensive applications. As I understand it, data have to be moved in and out the clusters. So, understanding the cost of scheduling and data transfer is very important to validate the Swift design. Perhaps, it was -published somewhere else, but it would be nice to discuss it in this paper. Here are some more detailed comments: +published somewhere else, but it would be nice to discuss it in this paper. ->>> Performance evaluation: +>>> Response: +We have added a new section, "5. Performance Characteristics" in response to this point. + <<< +Here are some more detailed comments: + 1. Swift uses restart log to reuse the results of successfully completed components. The paper mentioned "appropriate manual intervention". This seems to be something you can almost completely automate. Based on my experiences with large-scale and long running applications, this can be very useful. >>> Automation of restart @@ -86,8 +92,12 @@ For those who might not be familiar with the Karajan language, it would be useful to add a reference to the related work. ->>> Reference to Karajan: <<< +>>> Response: +We have added such a reference + +<<< + It would be helpful to include some discussion on the "auto-parallelization" capability (achieved via data flow analysis?). >>> auto-parallelization @@ -114,9 +124,10 @@ Should "frames" be "f" in this case? ->>> typo +>>> Response: The typo has been corrected. + <<< Reviewer #3: This is an interesting paper aimed at the practical problem of @@ -125,9 +136,10 @@ fine. There are a number of small errors which should have been caught by proofreading the manuscript. ->>> typos +>>> Response: typos and grammar have been corrected by a fresh complete proofreading. + <<< The most substantive comment I have concerns examples 4.3 and 4.4. I @@ -148,18 +160,18 @@ language, so perhaps "SwiftScript" should be replaced by "Swift" everywhere. ->>> +>>> Response: -this has been done. +This has been done. SwiftScript no longer appears. <<< 2. It's a bit awkward that "single assignment" is used in section 2.1 but not defined until section 2.3. ->>> +>>> Response: -fixed. +This has been fixed. <<< @@ -167,18 +179,18 @@ procedure should have an angle input in addition to the image input (this is corrected on p.6). ->>> +>>> Response: -fixed. +This has been fixed. <<< 4. In section 2.2, should rotate be invoked as "rotate(f, 180)" instead of "rotate(frames, 180)"? ->>> +>>> Response: -fixed. +This has been fixed. <<< @@ -189,23 +201,23 @@ probably should be defined. Other acronyms: GRAM, fMRI (and FMRI, cf. Fig. 2, which should probably be fMRI). ->>> +>>> Response: -RDBMS removed. -FMRI fixed. +RDBMS has been removed. +FMRI has been removed. GRAM doesn't seem to need explanation, it's cited where first used. -still need to think about acronyms in Figure 1. +The acronyms in Figure 1 are defined in the caption. <<< 6. All these appear: "stage in", "stage-in", "stagein". Please be consistent (similarly for stage out). ->>> +>>> Response: -fixed. +This has been fixed. - <<< +<<< 7. The mysterious numbers '12, 1000, 1000, "81 3 3"' in example 4.1 might merit an explanation. @@ -252,15 +264,19 @@ 12. "Karajan" is mentioned several times, there really should be short definition of it and a reference to it in the bibliography. ->>> <<< +>>> Response: +This has been fixed. + +<<< + 13. Many of the references look incomplete; journal references really should have page numbers, some references are missing a year. Reference 8 severely mangles "Bresnahan". ->>> +>>> Response: -all Refs fixed. +This has been fixed. <<< @@ -276,8 +292,13 @@ would one want to _compile_ a scripting language? It seems more natural (to this naive reader) to have an interpreter or a translator. ->>> <<< +>>> Response: +This has been fixed. We now more accurately say: +Swift is implemented by generating and executing a Karajan program. + +<<< + 16. The coaster idea looks quite interesting, could this be expanded, or could an example with coasters be constructed? @@ -286,16 +307,20 @@ 17. Table 1, 1st row, 3rd column: should it be f->data.txt instead of f->file.txt? ->>> <<< +>>> Response: +This has been fixed. + +<<< + 18. There are many (too many to list) typos, missing words, mistakes such as "en queued" instead of "enqueued", subject/verb mismatches of number and/or tense. A careful proofreading is sorely needed. ->>> +>>> Response: -fixed. +This has been fixed. <<< Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 06:11:55 UTC (rev 3886) +++ text/parco10submission/paper.tex 2011-01-07 16:17:34 UTC (rev 3887) @@ -798,7 +798,8 @@ \begin{table}[t] \begin{center} - \begin{tabular}{|l|p{3.5cm}|p{5cm}|} + \begin{footnotesize} + \begin{tabular}{|l|p{4cm}|p{5cm}|} \hline {\bf Mapper name } & {\bf Description} & @@ -808,9 +809,9 @@ \begin{minipage}{5cm} \vspace{2mm} \begin{center} - {\tt file f<"data.txt">;} \\ + {\tt file f <"data.txt">;} \\ --- \\ - $f \rightarrow {\tt file.txt}$ + $f \rightarrow {\tt data.txt}$ \vspace{2mm} \end{center} \end{minipage} @@ -821,10 +822,11 @@ \begin{minipage}{5cm} \vspace{2mm} \begin{center} - {\tt file f;} \\ + {\tt file f ;} \\ --- \\ - $f_0 \rightarrow {\tt file2.txt}$ + $f_0 \rightarrow {\tt data2.txt}$ \end{center} \end{minipage} \\ @@ -834,15 +836,17 @@ \begin{minipage}{5cm} \vspace{2mm} \begin{center} - {\tt file f;} \\ + {\tt file f ;} \\ --- \\ - $f.\textrm{red} \rightarrow {\tt file.red.txt}$ + $f.\textrm{red} \rightarrow {\tt data.red.txt}$ \end{center} \end{minipage} \\ \hline \end{tabular} + \end{footnotesize} \end{center} \caption{Swift built-in mappers: conceptual syntax} \label{mappertable} @@ -974,7 +978,7 @@ \begin{figure*}[htbp] \begin{center} \includegraphics{img/swift-model} - \caption{Swift site model} + \caption{Swift site model. (CoG = Commodity Grid~\cite{Karajan}, OSG = Open Science Grid, AWS = Amazon Web Services, HPC = High Performance Computing, BG/P = BlueGene/P.)} \label{FigureSwiftModel} \end{center} \end{figure*} From noreply at svn.ci.uchicago.edu Fri Jan 7 10:22:19 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 10:22:19 -0600 (CST) Subject: [Swift-commit] r3888 - in text/parco10submission: . img Message-ID: <20110107162219.6D60D9CC7F@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-07 10:22:19 -0600 (Fri, 07 Jan 2011) New Revision: 3888 Modified: text/parco10submission/img/figures.odg text/parco10submission/img/swift-model.pdf text/parco10submission/paper.tex Log: changing SwiftScript in Figure 1 to Swift script, fixing a line break in the caption Modified: text/parco10submission/img/figures.odg =================================================================== (Binary files differ) Modified: text/parco10submission/img/swift-model.pdf =================================================================== (Binary files differ) Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 16:17:34 UTC (rev 3887) +++ text/parco10submission/paper.tex 2011-01-07 16:22:19 UTC (rev 3888) @@ -21,6 +21,8 @@ \definecolor{shadecolor}{RGB}{240,255,200} \newenvironment{msection}{\begin{shaded}}{\end{shaded}} +\hyphenation{BlueGene} + \journal{Parallel Computing} \makeatletter \g at addto@macro\@verbatim\small @@ -978,7 +980,7 @@ \begin{figure*}[htbp] \begin{center} \includegraphics{img/swift-model} - \caption{Swift site model. (CoG = Commodity Grid~\cite{Karajan}, OSG = Open Science Grid, AWS = Amazon Web Services, HPC = High Performance Computing, BG/P = BlueGene/P.)} + \caption{Swift site model. (CoG = Commodity Grid~\cite{Karajan}, OSG = Open Science Grid, AWS~=~Amazon Web Services, HPC = High Performance Computing, BG/P = BlueGene/P.)} \label{FigureSwiftModel} \end{center} \end{figure*} From noreply at svn.ci.uchicago.edu Fri Jan 7 10:37:33 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 10:37:33 -0600 (CST) Subject: [Swift-commit] r3889 - text/parco10submission Message-ID: <20110107163733.22D1F9CC94@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-07 10:37:32 -0600 (Fri, 07 Jan 2011) New Revision: 3889 Added: text/parco10submission/original_submission.pdf Modified: text/parco10submission/ResponseToReviews.txt text/parco10submission/paper.bib text/parco10submission/paper.tex Log: adding originally submitted paper adding comparison to FlumeJava and bib entry, and marking this as done in the response to the reviewers Modified: text/parco10submission/ResponseToReviews.txt =================================================================== --- text/parco10submission/ResponseToReviews.txt 2011-01-07 16:22:19 UTC (rev 3888) +++ text/parco10submission/ResponseToReviews.txt 2011-01-07 16:37:32 UTC (rev 3889) @@ -80,8 +80,10 @@ 3. In terms of programming models, modern systems such as Microsoft's DryadLINQ and Google's FlumeJava successfully integrate data-flow constructs into state of the art programming languages (C# and Java). This integration approach is quite nice and powerful. It would be nice if the authors can compare Swift with these two systems. ->>> Comparison to Dryad and FlumeJava +>>> Response: +We have added comparisons to Dryad and FlumeJava in the related work section. + <<< Reviewer #2: The paper presents a powerful high-level scripting language, SwiftScript, for performing a massive number of tasks/jobs coupled with collections of file-based data. It describes details of the language syntax and semantics, which is based on data flow with support of arrays and procedures, and discusses the implementation and several use cases with the main focus on a grid. Although a similar work was published before, this paper gives elaborated summary of the technical details, which is useful for general audience. Added: text/parco10submission/original_submission.pdf =================================================================== (Binary files differ) Property changes on: text/parco10submission/original_submission.pdf ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-07 16:22:19 UTC (rev 3888) +++ text/parco10submission/paper.bib 2011-01-07 16:37:32 UTC (rev 3889) @@ -250,6 +250,24 @@ year={2008} } + at inproceedings{FlumeJava, + author = {Chambers, Craig and Raniwala, Ashish and Perry, Frances and Adams, Stephen and Henry, Robert R. and Bradshaw, Robert and Weizenbaum, Nathan}, + title = {{FlumeJava}: easy, efficient data-parallel pipelines}, + booktitle = {Proceedings of the 2010 {ACM SIGPLAN} conference on Programming language design and implementation}, + series = {PLDI '10}, + year = {2010}, + isbn = {978-1-4503-0019-3}, + location = {Toronto, Ontario, Canada}, + pages = {363--375}, + numpages = {13}, + url = {http://doi.acm.org/10.1145/1806596.1806638}, + doi = {http://doi.acm.org/10.1145/1806596.1806638}, + acmid = {1806638}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {data-parallel programming, java, mapreduce}, +} + @article{GEL, author = {Ching Lian, Chua and Tang, Francis and Issac, Praveen and Krishnan, Arun}, title = {GEL: Grid execution language}, Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 16:22:19 UTC (rev 3888) +++ text/parco10submission/paper.tex 2011-01-07 16:37:32 UTC (rev 3889) @@ -1496,6 +1496,10 @@ distributed Grid sites that may span multiple administrative domains, and deals with security and resource usage policy issues. +FlumeJava~\cite{FlumeJava} is similar to Swift in concept, as it is intended to run +data-processing pipelines over collections (of files). It is different in that it builds on top +of MapReduce primitives, rather than more abstract graphs as in Swift. + BPEL~\cite{BPEL_2006} is a Web Service-based standard that specifies how a set of Web services interact to form a larger, composite Web Service. BPEL is starting to be tested in scientific contexts. While From noreply at svn.ci.uchicago.edu Fri Jan 7 10:41:11 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 10:41:11 -0600 (CST) Subject: [Swift-commit] r3890 - text/parco10submission Message-ID: <20110107164111.A1C5E9CC94@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-07 10:41:11 -0600 (Fri, 07 Jan 2011) New Revision: 3890 Modified: text/parco10submission/ResponseToReviews.txt Log: one more reviewer issue has been addressed Modified: text/parco10submission/ResponseToReviews.txt =================================================================== --- text/parco10submission/ResponseToReviews.txt 2011-01-07 16:37:32 UTC (rev 3889) +++ text/parco10submission/ResponseToReviews.txt 2011-01-07 16:41:11 UTC (rev 3890) @@ -288,8 +288,14 @@ simplification, I know, but why not be honest and refer to nodes instead of cores? ->>> <<< +>>> Response: +In Swift, individual tasks generally run on cores, so cores is the correct +term. + +<<< + + 15. "Swift is implemented by compiling to a Karajan program". Why would one want to _compile_ a scripting language? It seems more natural (to this naive reader) to have an interpreter or a translator. From noreply at svn.ci.uchicago.edu Fri Jan 7 10:48:49 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 10:48:49 -0600 (CST) Subject: [Swift-commit] r3891 - text/parco10submission Message-ID: <20110107164849.489CC9CC94@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-07 10:48:49 -0600 (Fri, 07 Jan 2011) New Revision: 3891 Modified: text/parco10submission/paper.tex Log: merging in Ian's changes Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 16:41:11 UTC (rev 3890) +++ text/parco10submission/paper.tex 2011-01-07 16:48:49 UTC (rev 3891) @@ -80,13 +80,20 @@ \begin{abstract} -The work of scientists, engineers and statisticians often requires executing domain-specific application programs a -massive number of times on large collections of file-based data. This process requires complex data management to pass data to, from, and between application invocations. Distributed and -parallel computing resources can greatly speed up -such processing, but their use increases the complexity of the programming effort and presents new barriers. The Swift parallel scripting language reduces these complexities with a -scripting language for composing ordinary application programs into powerful parallel scripts that can efficiently utilize parallel and distributed resources. -In this paper we present Swift's implicitly parallel and deterministic programming model, which applies external applications to file collections using a functional, ``in-memory'' style of scripting that abstracts and simplifies distributed parallel execution. +Scientists, engineers and statisticians must often execute domain-specific application programs +many times on large collections of file-based data. This activity requires complex orchestration and data management +as data is passed to, from, and among application invocations. Distributed and +parallel computing resources can accelerate +such processing, but their use increases programming complexity yet further. +The Swift parallel scripting language reduces these complexities by (a) making file system structures accessible via language constructs, +and (b) +allowing +ordinary application programs to be composed into powerful parallel scripts that can efficiently utilize parallel and distributed resources. +We present Swift's implicitly parallel and deterministic programming model, +which applies external applications to file collections using a functional style of scripting that abstracts and simplifies distributed parallel execution. +%IAN: Re above--it seems important to me to make point (a). + \end{abstract} \begin{keyword} @@ -107,8 +114,8 @@ % mw: not sure why idempotency matters. This gets into issues of single-assignment; restartability; and whether or not we treat application invocations as both idempotent and/or referentially transparent. Since we treat execution of apps as "isolated", and "side-effect-free", neither parallel interleavings nor re-runs should have much effect on them. Swift is a scripting language designed for composing -application programs into distributed, -parallelized applications for execution on clusters, grids, clouds, and supercomputers +application programs into parallel +applications that can be executed on multicore processors, clusters, grids, clouds, and supercomputers with tens to hundreds of thousands of processors. %It is intended to serve as a higher level framework for composing the interaction of %concurrently executing programs (even parallel ones) and scripts @@ -128,14 +135,19 @@ %inherent complexity of programming on these systems, necessitates a %new approach. -While many application needs involve the execution of a single large -message-passing parallel program, many others require the coupling or +Many parallel applications involve a single +message-passing parallel program: a model supported well by the Message Passing Interface (MPI). +However, many others require the coupling or orchestration of large numbers of application invocations: either many invocations of the same program, or many invocations of sequences and -patterns of several programs. In this model, existing applications are similar to -functions in programming, and users typically need to execute many of -them. Scaling up requires the distribution of such workloads among -many computers or clusters and hence a ``grid'' approach. Even if +patterns of several programs. The execution of these +%In this model, existing applications are similar to +%functions in programming, and users typically need to execute many of +%them. +Scaling up requires the distribution of such workloads among +many computers or clusters. +%and hence a ``grid'' approach. +Even if a single large parallel cluster suffices, users will not always have access to the same system (i.e., big machines may be congested, or temporarily unavailable to a user due to maintenance or allocation depletion). This leads to the need to be able to utilize whatever resource happens to be available or economical at the moment From noreply at svn.ci.uchicago.edu Fri Jan 7 11:05:59 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 11:05:59 -0600 (CST) Subject: [Swift-commit] r3892 - text/parco10submission Message-ID: <20110107170559.6BDA69CC94@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-07 11:05:59 -0600 (Fri, 07 Jan 2011) New Revision: 3892 Modified: text/parco10submission/paper.tex Log: Minor edits in intro. Also changed PERL to Perl per Glen. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 16:48:49 UTC (rev 3891) +++ text/parco10submission/paper.tex 2011-01-07 17:05:59 UTC (rev 3892) @@ -120,9 +120,9 @@ %It is intended to serve as a higher level framework for composing the interaction of %concurrently executing programs (even parallel ones) and scripts %written in other scripting languages. -Swift %is a higher-level language that +Unlike most other scripting languages, Swift %is a higher-level language that focuses not on the details of executing sequences or -pipelines of scripts and programs (even parallel ones), but rather on the issues that arise +pipelines of scripts and programs, but rather on the issues that arise from the concurrent execution, composition, and coordination of many independent computational tasks at large scale. % @@ -177,7 +177,7 @@ % be and the monentary lack of libraries is independent of the language. Most functional languages are simpler % than Swift. -Swift can execute scripts that perform tens of thousands of program +Swift can execute scripts that perform hundreds of thousands of program invocations on highly parallel resources, and handle the unreliable and dynamic aspects of wide-area distributed resources. Such issues are handled by Swift's runtime system, and are not manifest in the user's scripts. The exact number of processing units available on such shared resources @@ -218,7 +218,7 @@ In order to achieve automatic parallelization, Swift is based on the synchronization construct of \emph{futures}~\cite{Futures}, which -can result in abundant parallelism. Every Swift variable (including every members of structures and arrays) is a future. +can enable large-scale parallelism. Every Swift variable (including all members of structures and arrays) is a future. Using a futures-based evaluation strategy has an enormous benefit: automatic parallelization is achieved without the need for dependency analysis, which would significantly complicate the Swift implementation. @@ -1122,7 +1122,7 @@ Clustering requires very little additional support on the remote site, while the coasters framework requires an active component on the -head node (in Java) and on the worker nodes (in PERL) as well as +head node (in Java) and on the worker nodes (in Perl) as well as additional network connectivity within a site. In practice, the automatic deployment and execution of these components can be difficult on a number sites. From noreply at svn.ci.uchicago.edu Fri Jan 7 12:05:22 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 12:05:22 -0600 (CST) Subject: [Swift-commit] r3893 - text/parco10submission Message-ID: <20110107180522.6E5469CC7F@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-07 12:05:22 -0600 (Fri, 07 Jan 2011) New Revision: 3893 Modified: text/parco10submission/paper.tex Log: changing the fileRef and reference to mapped files Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 17:05:59 UTC (rev 3892) +++ text/parco10submission/paper.tex 2011-01-07 18:05:22 UTC (rev 3893) @@ -406,7 +406,7 @@ \emph{Collection types} are provided in Swift by \emph{arrays} and \emph{structures}. Structure fields can be of any type, while arrays contain values of only a single type. One -array type is provided for every atomic type (integer, string, boolean, and file reference). +array type is provided for every atomic type (integer, string, boolean, and mapped file). Arrays use numeric indices, but are sparse. Both types of collections can contain members of atomic or collection types. Structures contain a finite number of elements. Arrays contain a varying number of elements. Structures and arrays can both recursively reference other structures and arrays in addition to atomic values. Arrays can be nested to provide multi-dimensional indexing. @@ -418,9 +418,9 @@ %garbage collection at the "dual" level (i.e., clean temp files) as well as remove unused futures from memory} % Mike: I mentioned GC as it pertains to structures and arrays: since swift is single-assignment, structures and arrays can never get de-referenced and thus dont need to be GC'ed - *I think*. But I can see that internal objects like futures should be, and given that they dont, its best to steer clear of this issue for now. -Variables that are declared to be file references +Variables that are declared to be mapped files are associated with a \emph{mapper}, which defines (often through a dynamic lookup process) the -data files that are to be mapped to the variable. Array and structure elements that are declared to be file references are similarly mapped. +data files that are to be mapped to the variable. Array and structure elements that are declared to be mapped files are similarly mapped. Mapped type and collection type variable declarations can be annotated with a @@ -744,13 +744,13 @@ Types are used to structure data, to aid in debugging and program correctness and to influence how Swift interacts with data. -The \verb|image| type declared in previous examples is a \emph{file reference -type} which we refer to as a \emph{fileRef}. fileRef types indicate that variable refers to a +The \verb|image| type declared in previous examples is a \emph{mapped file +type} which we refer to as a \emph{mapped file}. The mapped file type indicates that variable refers to a single file, and that no further structural information about the file is exposed at the Swift level. Arrays have been mentioned above, in the arrays section. A code block may be applied to each element of an array using \verb|foreach|; or -individual elements may be references using \verb|[]| notation. +individual elements may be mapped files using \verb|[]| notation. There are a number of primitive types: \verb|int|, \verb|string|, \verb|float|, \verb|boolean|, which represent integers, strings, From noreply at svn.ci.uchicago.edu Fri Jan 7 12:17:36 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 12:17:36 -0600 (CST) Subject: [Swift-commit] r3894 - text/parco10submission Message-ID: <20110107181736.453FB9CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-07 12:17:36 -0600 (Fri, 07 Jan 2011) New Revision: 3894 Modified: text/parco10submission/paper.tex Log: minor edits to mapper table. start of edits to App section. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 18:05:22 UTC (rev 3893) +++ text/parco10submission/paper.tex 2011-01-07 18:17:36 UTC (rev 3894) @@ -825,7 +825,7 @@ \begin{center} {\tt file f <"data.txt">;} \\ --- \\ - $f \rightarrow {\tt data.txt}$ + ${\tt f} \rightarrow {\tt data.txt}$ \vspace{2mm} \end{center} \end{minipage} @@ -836,11 +836,11 @@ \begin{minipage}{5cm} \vspace{2mm} \begin{center} - {\tt file f ;} \\ --- \\ - $f_0 \rightarrow {\tt data2.txt}$ + ${\tt f[0]} \rightarrow {\tt data2.txt}$ \end{center} \end{minipage} \\ @@ -850,11 +850,11 @@ \begin{minipage}{5cm} \vspace{2mm} \begin{center} - {\tt file f ;} \\ --- \\ - $f.\textrm{red} \rightarrow {\tt data.red.txt}$ + ${\tt f.\textrm{red}} \rightarrow {\tt data.red.txt}$ \end{center} \end{minipage} \\ @@ -1189,12 +1189,15 @@ \section{Applications} \label{Applications} -This section describes a few representative Swift applications -from various diverse disciplines. +Swift has been used by applications in +\mikenote{List here from CDI, IEEE, etc} +This section describes two complete Swift scripts (representative of two diverse disciplines) in more detail. +The first script is a tutorial example (used in a class on data intensive computing at the University of Chicago) which performs a simple analysis of satellite land-use imagery. The second script is taken (with minor changes to fit better on the page) directly from work done using Swift for an investigation into the molecular structure of glassy materials in the field of theoretical chemistry. In both examples, the intent is to show a complete and realistic Swift script, annotated to better understand the nature of the Swift programming model and to provide a glimpse of real Swift usage. + \subsection{Satellite image data processing.} -The last example (which come from a class project) processes +The first example -- Script 1 below -- processes data from a large dataset of files that categorize the Earth's surface, from the MODIS sensor instruments that orbit Earth on two NASA satellites of the Earth Observing System. From noreply at svn.ci.uchicago.edu Fri Jan 7 13:37:06 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 13:37:06 -0600 (CST) Subject: [Swift-commit] r3896 - text/parco10submission/plots Message-ID: <20110107193706.5992B9CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-07 13:37:06 -0600 (Fri, 07 Jan 2011) New Revision: 3896 Added: text/parco10submission/plots/multicore.pdf Log: Adding Added: text/parco10submission/plots/multicore.pdf =================================================================== (Binary files differ) Property changes on: text/parco10submission/plots/multicore.pdf ___________________________________________________________________ Name: svn:mime-type + application/octet-stream From noreply at svn.ci.uchicago.edu Fri Jan 7 13:37:40 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 13:37:40 -0600 (CST) Subject: [Swift-commit] r3897 - text/parco10submission Message-ID: <20110107193740.88DAF9CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-07 13:37:40 -0600 (Fri, 07 Jan 2011) New Revision: 3897 Modified: text/parco10submission/paper.tex Log: Include multicore plot Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 19:37:06 UTC (rev 3896) +++ text/parco10submission/paper.tex 2011-01-07 19:37:40 UTC (rev 3897) @@ -80,16 +80,16 @@ \begin{abstract} -Scientists, engineers and statisticians must often execute domain-specific application programs -many times on large collections of file-based data. This activity requires complex orchestration and data management +Scientists, engineers and statisticians must often execute domain-specific application programs +many times on large collections of file-based data. This activity requires complex orchestration and data management as data is passed to, from, and among application invocations. Distributed and parallel computing resources can accelerate -such processing, but their use increases programming complexity yet further. +such processing, but their use increases programming complexity yet further. The Swift parallel scripting language reduces these complexities by (a) making file system structures accessible via language constructs, and (b) allowing ordinary application programs to be composed into powerful parallel scripts that can efficiently utilize parallel and distributed resources. -We present Swift's implicitly parallel and deterministic programming model, +We present Swift's implicitly parallel and deterministic programming model, which applies external applications to file collections using a functional style of scripting that abstracts and simplifies distributed parallel execution. %IAN: Re above--it seems important to me to make point (a). @@ -136,14 +136,14 @@ %new approach. Many parallel applications involve a single -message-passing parallel program: a model supported well by the Message Passing Interface (MPI). +message-passing parallel program: a model supported well by the Message Passing Interface (MPI). However, many others require the coupling or orchestration of large numbers of application invocations: either many invocations of the same program, or many invocations of sequences and -patterns of several programs. The execution of these +patterns of several programs. The execution of these %In this model, existing applications are similar to %functions in programming, and users typically need to execute many of -%them. +%them. Scaling up requires the distribution of such workloads among many computers or clusters. %and hence a ``grid'' approach. @@ -387,7 +387,7 @@ Variables are used in Swift to name the local variables, arguments, and returns of a function. Every Swift variable is assigned a concrete data type, based on a very simple type model (with no concepts of inheritance, abstraction, etc). The outermost function in a Swift (akin to ``main'' in C) is only unique in that the variables in its environment can be declared ``global'' to make them accessible to every other function in the script. Swift data elements (atomic variables and array elements) are \emph{single-assignment}--- -they can be assigned at most one value during execution---and behave as futures. +they can be assigned at most one value during execution---and behave as futures. This semantic provides the basis for Swift's model of parallel function evaluation and chaining. While Swift collection types (arrays and structures) are not @@ -411,7 +411,7 @@ indices, but are sparse. Both types of collections can contain members of atomic or collection types. Structures contain a finite number of elements. Arrays contain a varying number of elements. Structures and arrays can both recursively reference other structures and arrays in addition to atomic values. Arrays can be nested to provide multi-dimensional indexing. -Due to the dynamic, highly parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. +Due to the dynamic, highly parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. %Also, since all data elements have single-assignment semantics, no garbage collection issues arise. \katznote{does this follow? garbage collection removed variables that are no longer needed - I don't see how single assignment helps here.} %\mihaelnote{I think we should not mention the garbage collection issue. In fact, we don't and we should implement @@ -422,7 +422,7 @@ are associated with a \emph{mapper}, which defines (often through a dynamic lookup process) the data files that are to be mapped to the variable. Array and structure elements that are declared to be mapped files are similarly mapped. -Mapped type and collection +Mapped type and collection type variable declarations can be annotated with a \emph{mapping} descriptor that specify the file(s) that are to be mapped to the Swift data element(s). @@ -1189,7 +1189,7 @@ \section{Applications} \label{Applications} -Swift has been used by applications in +Swift has been used by applications in \mikenote{List here from CDI, IEEE, etc} This section describes two complete Swift scripts (representative of two diverse disciplines) in more detail. @@ -1224,84 +1224,84 @@ 1 type file; 2 type imagefile; 3 type landuse; - 4 + 4 5 # Define application program interfaces - 6 + 6 7 app (landuse output) getLandUse (imagefile input, int sortfield) 8 { 9 getlanduse @input sortfield stdout=@output ; 10 } - 11 + 11 12 app (file output, file tilelist) analyzeLandUse 13 (landuse input[], string usetype, int maxnum) 14 { 15 analyzelanduse @output @tilelist usetype maxnum @filenames(input); 16 } - 17 + 17 18 app (imagefile output) colorMODIS (imagefile input) 19 { 20 colormodis @input @output; 21 } - 22 + 22 23 app (imagefile output) assemble 24 (file selected, imagefile image[], string webdir) 25 { 26 assemble @output @selected @filename(image[0]) webdir; 27 } - 28 - 29 app (imagefile grid) markMap (file tilelist) + 28 + 29 app (imagefile grid) markMap (file tilelist) 30 { 31 markmap @tilelist @grid; 32 } - 33 + 33 34 # Constants and command line arguments - 35 + 35 36 int nFiles = @toint(@arg("nfiles","1000")); 37 int nSelect = @toint(@arg("nselect","12")); 38 string landType = @arg("landtype","urban"); 39 string runID = @arg("runid","modis-run"); 40 string MODISdir= @arg("modisdir","/home/wilde/bigdata/data/modis/2002"); 41 string webDir = @arg("webdir","/home/wilde/public_html/geo/"); - 42 + 42 43 string suffix=".tif"; - 44 + 44 45 # Input Dataset - 46 + 46 47 imagefile geos[] ; # site=site - 49 + 49 50 # Compute the land use summary of each MODIS tile - 51 + 51 52 landuse land[] ; - 54 + 54 55 foreach g,i in geos { 56 land[i] = getLandUse(g,1); 57 } - 58 + 58 59 # Find the top N tiles (by total area of selected landuse types) - 60 + 60 61 file topSelected<"topselected.txt">; 62 file selectedTiles<"selectedtiles.txt">; 63 (topSelected, selectedTiles) = analyzeLandUse(land, landType, nSelect); - 64 + 64 65 # Mark the top N tiles on a sinusoidal gridded map - 66 + 66 67 imagefile gridMap<"markedGrid.gif">; 68 gridMap = markMap(topSelected); - 69 + 69 70 # Create multi-color images for all tiles - 71 + 71 72 imagefile colorImage[] ; - 75 + 75 76 foreach g, i in geos { 77 colorImage[i] = colorMODIS(g); 78 } - 79 + 79 80 # Assemble a montage of the top selected areas - 81 + 81 82 imagefile montage ; # @arg 83 montage = assemble(selectedTiles,colorImage,webDir); @@ -1315,7 +1315,7 @@ Rather than run ~500K-1.5M steps per jobs (which a priori i didn't know how many i would run anyway) i ran 100K at a time. hence the repetitions of runs. But i would say the campaign started more like in october. if all the jobs are on pads then it'll be more obvious. -As this simulation was a lengthy campaign (from about October through December 2010) Hocky chose to leverage Swift ``external'' mappers to determine what work remained during various restarts. His mappers assumed an application run was complete if all the returned ".final" files existed. In the case of script restarts, results that already existed were not computed. The swift restart mechanism was also tested and worked fine, but required tracking which workflow was being restarted. Occasionally missing files caused the restart to fail; Hocky's ad-hoc restart via mappers worked exceedingly well (and perhaps suggests a new approach for the integrated restart mechanism). +As this simulation was a lengthy campaign (from about October through December 2010) Hocky chose to leverage Swift ``external'' mappers to determine what work remained during various restarts. His mappers assumed an application run was complete if all the returned ".final" files existed. In the case of script restarts, results that already existed were not computed. The swift restart mechanism was also tested and worked fine, but required tracking which workflow was being restarted. Occasionally missing files caused the restart to fail; Hocky's ad-hoc restart via mappers worked exceedingly well (and perhaps suggests a new approach for the integrated restart mechanism). A high-level description of the glass simulation campaign is as follows: @@ -1327,7 +1327,7 @@ about 1-2 hours per job -Approximate OSG usage over 100K cpus hours with about 100K tasks of 1-2 hours completed. App has been successfully run on about 18 OG (with the majority of runs have been completed on about 6 primary sites). +Approximate OSG usage over 100K cpus hours with about 100K tasks of 1-2 hours completed. App has been successfully run on about 18 OG (with the majority of runs have been completed on about 6 primary sites). This project would be completely unwieldy and much harder to organize without using Swift. @@ -1344,11 +1344,11 @@ 1 type Arc; 2 type Restart; 3 type Log; - 4 + 4 5 type GlassIn{ 6 Restart startfile; 7 } - 8 + 8 9 type GlassOut{ 10 Arc arcfile; 11 Restart restartfile; @@ -1356,7 +1356,7 @@ 13 Restart final; 14 Log logfile; 15 } - 16 + 16 17 app (GlassOut o) glassCavityRun( 18 GlassIn i, string rad, string temp, string steps, 19 string volume, string fraca, string energyfunction, @@ -1368,7 +1368,7 @@ 25 "--cradius" rad "--ccoord" centerstring arctimestring 26 stdout=@filename(o.logfile); 27 } - 28 + 28 29 CreateGlassSystem() 30 { 31 string temp=@arg("temp","2.0"); @@ -1393,16 +1393,16 @@ 50 arctimestring=""; 51 } 52 string energyfunction=@arg("energyfunction","softsphereratiosmooth"); - 53 + 53 54 GlassIn modelIn[][][] ; + 57 e=energyfunction, natoms=natoms, i="true">; 58 GlassOut modelOut[][][][] ; - 62 + 62 63 foreach rad,rindex in radii { 64 foreach centerstring,cindex in centers { 65 foreach model in [0:nmodels-1] { @@ -1420,8 +1420,8 @@ 77 } 78 } 79 } - 80 - 81 + 80 + 81 82 CreateGlassSystem(); \end{Verbatim} %\end{verbatim} @@ -1437,7 +1437,16 @@ \end{center} \end{figure*} +\begin{figure*}[htbp] + \begin{center} + \includegraphics[scale=0.70]{plots/multicore} + \caption{System utilization for variable length tasks + at varying concurrency} + \label{PlotMulticore} + \end{center} +\end{figure*} + \section{Related Work} \label{Related} @@ -1491,7 +1500,7 @@ programming tool for the specification and execution of large parallel computations on large quantities of data, and facilitating the utilization of large distributed resources. However, the two also -differ in many aspects. The +differ in many aspects. The MapReduce programming model supports key-value pairs as input or output datasets and two types of computation functions, map and reduce; Swift provides a type system and allows the From noreply at svn.ci.uchicago.edu Fri Jan 7 13:42:19 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 13:42:19 -0600 (CST) Subject: [Swift-commit] r3898 - text/parco10submission Message-ID: <20110107194219.724DA9CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-07 13:42:17 -0600 (Fri, 07 Jan 2011) New Revision: 3898 Modified: text/parco10submission/paper.tex Log: Improve caption Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 19:37:40 UTC (rev 3897) +++ text/parco10submission/paper.tex 2011-01-07 19:42:17 UTC (rev 3898) @@ -1432,7 +1432,8 @@ \begin{figure*}[htbp] \begin{center} \includegraphics[scale=0.70]{plots/sleep} - \caption{Node utilization for {\tt sleep} tasks} + \caption{System utilization for variable length tasks + at varying system size} \label{PlotSleep} \end{center} \end{figure*} From noreply at svn.ci.uchicago.edu Fri Jan 7 13:44:16 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 13:44:16 -0600 (CST) Subject: [Swift-commit] r3900 - text/parco10submission Message-ID: <20110107194416.76A5E9CC7F@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-07 13:44:16 -0600 (Fri, 07 Jan 2011) New Revision: 3900 Modified: text/parco10submission/paper.tex Log: adding spaces in table 1 Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 19:42:30 UTC (rev 3899) +++ text/parco10submission/paper.tex 2011-01-07 19:44:16 UTC (rev 3900) @@ -836,7 +836,7 @@ \begin{minipage}{5cm} \vspace{2mm} \begin{center} - {\tt file f[];} \\ --- \\ @@ -850,7 +850,7 @@ \begin{minipage}{5cm} \vspace{2mm} \begin{center} - {\tt file f;} \\ --- \\ From noreply at svn.ci.uchicago.edu Fri Jan 7 18:12:42 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 7 Jan 2011 18:12:42 -0600 (CST) Subject: [Swift-commit] r3902 - text/parco10submission Message-ID: <20110108001242.45A4F9CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-07 18:12:42 -0600 (Fri, 07 Jan 2011) New Revision: 3902 Modified: text/parco10submission/paper.tex Log: Added 75% of the text for MODIS application. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-07 20:10:09 UTC (rev 3901) +++ text/parco10submission/paper.tex 2011-01-08 00:12:42 UTC (rev 3902) @@ -1201,28 +1201,59 @@ data from a large dataset of files that categorize the Earth's surface, from the MODIS sensor instruments that orbit Earth on two NASA satellites of the Earth Observing System. -The Swift script analyzes the dataset to find the files with the ten -largest total urban area and then produces a new dataset with viewable -color images of those top-ten urban data ``tiles''. -The dataset consists of 317 ``tile'' files that categorize every +The dataset (we tested with one named {\tt mcd12q1}, for year 2002) +consists of 317 ``tile'' files that categorize every 250-meter square of non-ocean surface of the earth into one of 17 -``land cover'' categories, for example, water, ice, forest, barren, or -urban. Each pixel of these data files has a value of 0 to 16, +``land cover'' categories, for example, water, ice, forest, barren, +urban, etc. Each pixel of these data files has a value of 0 to 16, describing one 250-meter square of the earth's surface at a specific -point in time. Each tile file has 5 million pixels, covering 2400 +point in time. Each tile file has ~ 5 million 1-byte pixels (5.7 megabytes), covering 2400 x 2400 250-meter squares, based on a specific map projection. -The input datasets are not ``viewable'' images because of the pixel -values, thus requiring the color rendering step above. -\katznote{``above'' isn't above, it's in the script below, which isn't at all -described.} +The Swift script analyzes the dataset to find the files with the N +largest total area of any requested sets of land-cover types, and then produces a new dataset with viewable +color images of those closest-matching data tiles. +(The input datasets are not viewable images, as their pixel +values are land-use codes. Thus a color rendering step is required). A typical invocation of this script would be ``\emph{find the top 12 urban tiles}'' or ``\emph{find the 16 tiles with the most forest and grassland}''. +\\ +\\ +The script is structured as follows: +\\ +\\ +Lines 1-3 define 3 mapped file types -- {\tt MODISfile} for the input images, {\tt landuse} for the output of the landuse histogram calculation; and {\tt file} for any other generic file that we don't care to assign a unique type to. +Lines 7-32 define the Swift interface functions for the application programs {\tt getLandUse}, {\tt analyzeLandUse}, {\tt colorMODIS}, {\tt assemble}, and {\tt markMap}. + +Lines 36-41 extract a set of science parameters from the {\tt swift} command line with which the user invokes the script. +These indicate the number of files of the input set to select (to enable processing the first M of N files), the set of land cover types to select, the number of ``top'' tiles to select, and parameters used to locate input and output directories. + +Lines 47-48 invoke a ``external'' mapper script {\tt modis.mapper} to map the first {\tt nFiles} MODIS data files in the directory contained in the script argument {\tt MODISdir} to the array {\tt geos}. An external mapper script is written by the Swift programmer (in any language desired, but quite often mappers are simple shell scripts). External mappers are usually co-located with the Swift script, and are invoked when Swift instantiates the associated variable. They return a two-field list of the the form \emph{SwiftExpression, filename}, where \emph{SwiftExpression} is relative to the variable name being mapped. For example, if this mapper invocation were called from the Swift script at line 47-48: +\begin{Verbatim}[fontsize=\scriptsize,framesep=2mm] +$ ./modis.mapper -location /home/wilde/modis/2002/ -suffix .tif -n 5 +[0] /home/wilde/modis/2002/h00v08.tif +[1] /home/wilde/modis/2002/h00v09.tif +[2] /home/wilde/modis/2002/h00v10.tif +[3] /home/wilde/modis/2002/h01v07.tif +[4] /home/wilde/modis/2002/h01v08.tif +\end{Verbatim} +it would cause the first five elements of the array {\tt geos} to be mapped to the first five files of the modis dataset in the specified directory. + +At lines 52-53, the script declares the array {\tt land} which will contain the output of the {\tt getlanduse} application. This declaration uses the built-in ``structured regular expression mapper'', which will determine the names of the \emph{output} files that the array will refer to once they are computed. Swift knows from context that this is an output mapping. The mapper will use regular expressions to base that names of the output files on the filenames of the corresponding elements of the input array {\tt geos} given by the {\tt source=} argument to the mapper. + + At lines 55-57 the script performs its first computation using a {\tt foreach} loop to invoke {\tt getLandUse} in parallel on each file mapped to the elements of {\tt geos[]}. As 317 files were mapped, the loop will invoke 317 instances of the application in parallel. The result of each computation is placed in a file mapped to the array {\tt land} and named by the regular expression translation to be based on the file names mapped to the array {\tt geos[]}. Thus the landuse histogram for file {\tt /home/wilde/modis/2002/h00v08.tif} would be written into file {\tt h00v08.landuse.freq} and would be considered by Swift to be of type {\tt landuse}. + +Once all the land usage histograms have have been computed, the script can then execute {\tt analyzeLandUse} at line 63 to find the requested number of highest tiles (files) with a specific land cover combination. The Swift runtime system uses futures to ensure that this analysis function is not invoked until all of its input files have computed and transported to the computation site chosen to run the analysis program. All of these steps take place automatically, using the relatively simple and location-independent Swift expressions shown. The output files to be use to hold the result are specified in the declarations at lines 61-62. + +To visualize the results, the application function {\tt markMap} invoked at line 68 will generate an image of a world map using the MODIS projection system and indicate the selected tiles matching the analysis criteria. Since this statememt depends on the output of the analysis, it will wait for statement at line 63 to complete before commencing. + +For additional visualization, the script assembles a full map of all the input tiles, placed in their proper grid location on the MODIS world map projection, and again marking the selected tiles. Since this operation needs true-color images of every input tiles these are computed -- again in parallel -- with 317 jobs invoked by the foreach statement at line 76-78. The power of Swift's implicit parallelization is very vividly shown here: since the colorMODIS call at line 77 depends only on the input array geos, these 317 application invocations. + \pagebreak Swift example 1: MODIS satellite image processing script \begin{Verbatim}[fontsize=\scriptsize,frame=single,framesep=2mm,gobble=7, numbers=left] 1 type file; - 2 type imagefile; + 2 type MODIS; type image; 3 type landuse; 4 5 # Define application program interfaces @@ -1233,23 +1264,28 @@ 10 } 11 12 app (file output, file tilelist) analyzeLandUse - 13 (landuse input[], string usetype, int maxnum) + 13 (MODIS input[], string usetype, int maxnum) 14 { 15 analyzelanduse @output @tilelist usetype maxnum @filenames(input); 16 } 17 - 18 app (imagefile output) colorMODIS (imagefile input) + 18 app (image output) colorMODIS (MODIS input) 19 { 20 colormodis @input @output; 21 } 22 - 23 app (imagefile output) assemble - 24 (file selected, imagefile image[], string webdir) + 23 app (image output) assemble + 24 (file selected, image img[], string webdir) 25 { - 26 assemble @output @selected @filename(image[0]) webdir; + 26 assemble @output @selected @filename(img[0]) webdir; 27 } +<<<<<<< .mine + 28 + 29 app (image grid) markMap (file tilelist) +======= 28 29 app (imagefile grid) markMap (file tilelist) +>>>>>>> .r3901 30 { 31 markmap @tilelist @grid; 32 } @@ -1263,12 +1299,12 @@ 40 string MODISdir= @arg("modisdir","/home/wilde/bigdata/data/modis/2002"); 41 string webDir = @arg("webdir","/home/wilde/public_html/geo/"); 42 - 43 string suffix=".tif"; + 43 44 45 # Input Dataset 46 - 47 imagefile geos[] ; # site=site + 47 image geos[] ; 49 50 # Compute the land use summary of each MODIS tile 51 @@ -1287,12 +1323,12 @@ 64 65 # Mark the top N tiles on a sinusoidal gridded map 66 - 67 imagefile gridMap<"markedGrid.gif">; + 67 image gridMap<"markedGrid.gif">; 68 gridMap = markMap(topSelected); 69 70 # Create multi-color images for all tiles 71 - 72 imagefile colorImage[] ; 75 @@ -1302,7 +1338,7 @@ 79 80 # Assemble a montage of the top selected areas 81 - 82 imagefile montage ; # @arg + 82 image montage ; # @arg 83 montage = assemble(selectedTiles,colorImage,webDir); \end{Verbatim} From noreply at svn.ci.uchicago.edu Sat Jan 8 08:44:34 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 08:44:34 -0600 (CST) Subject: [Swift-commit] r3903 - text/parco10submission Message-ID: <20110108144434.ADC9E9CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-08 08:44:33 -0600 (Sat, 08 Jan 2011) New Revision: 3903 Modified: text/parco10submission/paper.bib text/parco10submission/paper.tex Log: Completed desription of MODIS example and started polishing the Glass example. Added app intro summary and included refererencs. Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-08 00:12:42 UTC (rev 3902) +++ text/parco10submission/paper.bib 2011-01-08 14:44:33 UTC (rev 3903) @@ -26,6 +26,124 @@ url = {http://people.cs.uchicago.edu/~iraicu/publications/2008_NOVA08_book-chapter_Swift.pdf}, } + at article{PTMap_2010, + title = {{The first global screening of protein substrates bearing protein-bound 3,4-Dihydroxyphenylalanine in Escherichia coli and human mitochondria.}}, + author = {S Lee and Y Chen and H Luo and A A Wu and M Wilde and P T Schumacker and Y Zhao}, + journal = {{Journal of Proteome Research}}, + volume = {9(11)}, + year = 2010, + pages = {5705-14} +} + + at article{PTMap_DUP_2009, + title = {{PTMap--a sequence alignment software for unrestricted, accurate, and full-spectrum identification of post-translational modification sites. }}, + author = {Y Chen and W Chen and M H Cobb and Y Zhao}, + journal = {{Proc Natl Acad Sci USA}}, + volume = {106(3)}, + year = 2009, + pages = {761-6} +} + + at article{Boker_2010, + author = {S Boker and M Neale and H Maes and M Wilde and M Spiegel and T Brick and J Spies and R Estabrook and S Kenny and T Bates and P Mehta and J Fox}, + title = {OpenMx: An Open Source Extended Structural Equation Modeling Framework}, + journal = {Psychometrika}, + volume = {In press}, + year = {2010} +} + + at techreport{Fedorov_2009, + author = {A Fedorov and B Clifford and S K War?eld and R Kikinis and N Chrisochoides}, + title = {Non-Rigid Registration for Image-Guided Neurosurgery on the TeraGrid: A Case Study}, + institution = {College of William and Mary}, + number = {WM-CS-2009-05}, + year = {2009} +} + + at techreport{ProteinFolding_2009, + Author = {G Hocky AND M Wilde AND J DeBartolo AND M Hategan AND I Foster AND T R Sosnick and K F Freed}, + Date-Added = {2010-04-01 14:52:23 -0500}, + Date-Modified = {2010-04-01 14:56:11 -0500}, + Institution = {Argonne National Laboratory}, + Month = {April}, + Number = {ANL/MCS-P1612-0409}, + Read = {0}, + Title = {Towards petascale ab initio protein folding through parallel scripting}, + Year = {2009}, +} + + at article{SPEED_2010, + author={Joe DeBartolo and Glen Hocky and Michael Wilde and Jinbo Xu and Karl F. Freed and Tobin R. Sosnick}, + title={Protein structure prediction enhanced with evolutionary diversity: SPEED}, + journal={Protein Science}, + volume={19}, + number={3}, + pages={520--534}, + year={2010}, +} + + at inproceedings{MoralHazard_2007, + author={Stef-Praun, T. and Madeira, G. and Foster, I. and Townsend, R.}, + title={Accelerating solution of a moral hazard problem with Swift}, + booktitle={e-Social Science 2007}, + year={2007}, + address={Indianapolis, IN.} +} + + at article{CNARI_2009, + AUTHOR={S Kenny and M Andric and S Boker M and M Neale and M Wilde and S L Small}, + TITLE={Parallel workflows for data-driven structural equation modeling in functional neuroimaging}, + JOURNAL={Frontiers in Neuroinformatics}, + VOLUM={3}, + YEAR={2009}, + URL={www.frontiersin.org/neuroscience/neuroinformatics/paper/10.3389/neuro.11/034.2009/html/}, + DOI={10.3389/neuro.11/034.2009}, + ISSN={ISSN 1662-5196} +} + + + at article{CNARI_2008, + title = {Improving the analysis, storage and sharing of neuroimaging data using relational databases and distributed computing}, + journal = "NeuroImage", + volume = "39", + number = "2", + pages = "693 - 706", + year = "2008", + note = "", + issn = "1053-8119", + doi = "DOI: 10.1016/j.neuroimage.2007.09.021", + url = "http://www.sciencedirect.com/science/article/B6WNP-4PPW72Y-1/2/ac536a08f82f82ad9ce940ac235d8a55", + author = "Uri Hasson and Jeremy I. Skipper and Michael J. Wilde and Howard C. Nusbaum and Steven L. Small" +} + + at article{CNARI_DUP_2007, + author = {T Stef-Praun and B Clifford and I Foster and U Hasson and M Hategan and S L Small and M Wilde Michael and Y Zhao}, + issn = {0926-9630}, + journal = {Studies in health technology and informatics}, + keywords = {niak, pipeline, psom}, + pages = {207--216}, + posted-at = {2009-12-10 16:20:18}, + priority = {4}, + title = {Accelerating medical research using the swift workflow system}, + url = {http://view.ncbi.nlm.nih.gov/pubmed/17476063}, + volume = {126}, + year = {2007} +} + + at article{PetascaleScripting_2009, + author = {M Wilde and I Foster and K Iskra and P Beckman and Z Zhang and A Espinosa and M Hategan and B Clifford and I Raicu}, + title = {Parallel Scripting for Applications at the Petascale and Beyond}, + journal = {Computer}, + volume = {42}, + number = {11}, + year = {2009}, + issn = {0018-9162}, + pages = {50--60}, + doi = {http://dx.doi.org/10.1109/MC.2009.365}, + publisher = {IEEE Computer Society Press}, + address = {Los Alamitos, CA, USA}, + } + @inproceedings{SWIFTIWSW2007, author = {Yong Zhao and Mihael Hategan and B Clifford and I Foster and G von Laszewski and I Raicu and T Stef-Praun and M Wilde}, title = {{Swift: Fast, Reliable, Loosely Coupled Parallel Computation}}, Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-08 00:12:42 UTC (rev 3902) +++ text/parco10submission/paper.tex 2011-01-08 14:44:33 UTC (rev 3903) @@ -1185,14 +1185,28 @@ executable. Swift's existing input file management then stages-in the application files once per site per run. -\pagebreak \section{Applications} \label{Applications} -Swift has been used by applications in -\mikenote{List here from CDI, IEEE, etc} +By providing a minimal language that allows the rapid +composition of existing executable programs and scripts into +a logical unit, Swift has become a beneficial resource for +small to moderate-sized scientific projects. -This section describes two complete Swift scripts (representative of two diverse disciplines) in more detail. +Swift has been used to perform +computational +biochemical investigations, such as protein structure prediction \cite{PetascaleScripting_2009, ProteinFolding_2009, SPEED_2010} and +molecular dynamics of protein-ligand docking~\cite{Falkon_2008}, protein-RNA docking, and searching mass-spectrometry data for post-translational protein +modifications \cite{PTMap_2009, PTMap_2010, PetascaleScripting_2009}; +modeling the interactions of climate, energy, +and economics \cite{MoralHazard_2007, PetascaleScripting_2009}; post-processing and analysis of climate model results; +exploring the language functions of the human brain \cite{CNARI_2007, CNARI_2008, CNARI_2009}; +creating general statistical frameworks for structural equation +modeling \cite{Boker_2010}; +and performing image processing for research in +image-guided planning for neurosurgery \cite{Fedorov_2009}. + +This section describes two representative Swift scripts (from two diverse disciplines) in more detail. The first script is a tutorial example (used in a class on data intensive computing at the University of Chicago) which performs a simple analysis of satellite land-use imagery. The second script is taken (with minor changes to fit better on the page) directly from work done using Swift for an investigation into the molecular structure of glassy materials in the field of theoretical chemistry. In both examples, the intent is to show a complete and realistic Swift script, annotated to better understand the nature of the Swift programming model and to provide a glimpse of real Swift usage. \subsection{Satellite image data processing.} @@ -1215,7 +1229,7 @@ largest total area of any requested sets of land-cover types, and then produces a new dataset with viewable color images of those closest-matching data tiles. (The input datasets are not viewable images, as their pixel -values are land-use codes. Thus a color rendering step is required). A typical invocation of this script would be ``\emph{find the top 12 urban tiles}'' or ``\emph{find the 16 tiles with the most forest and grassland}''. +values are land-use codes. Thus a color rendering step is required). A typical invocation of this script would be ``\emph{find the top 12 urban tiles}'' or ``\emph{find the 16 tiles with the most forest and grassland}''. As this script is used for tutorial purposes, the application programs it calls are simple shell scripts that use fast, generic image processing applications to process the MODIS data. Thus the example executes quickly while serving as a realistic tutorial script for much more compute-intensive satellite data processing applications. \\ \\ The script is structured as follows: @@ -1247,7 +1261,7 @@ To visualize the results, the application function {\tt markMap} invoked at line 68 will generate an image of a world map using the MODIS projection system and indicate the selected tiles matching the analysis criteria. Since this statememt depends on the output of the analysis, it will wait for statement at line 63 to complete before commencing. -For additional visualization, the script assembles a full map of all the input tiles, placed in their proper grid location on the MODIS world map projection, and again marking the selected tiles. Since this operation needs true-color images of every input tiles these are computed -- again in parallel -- with 317 jobs invoked by the foreach statement at line 76-78. The power of Swift's implicit parallelization is very vividly shown here: since the colorMODIS call at line 77 depends only on the input array geos, these 317 application invocations. +For additional visualization, the script assembles a full map of all the input tiles, placed in their proper grid location on the MODIS world map projection, and again marking the selected tiles. Since this operation needs true-color images of every input tiles these are computed -- again in parallel -- with 317 jobs invoked by the foreach statement at line 76-78. The power of Swift's implicit parallelization is very vividly shown here: since the {\tt colorMODIS} call at line 77 depends only on the input array {\tt geos}, these 317 application invocations are executed in parallel with the initial 317 parallel executions of the {\tt getLandUse} application at line 56. The script concludes at line 83 by assembling a montage of all the colored tiles and writing this image file to a web-accessible directory for viewing. \pagebreak Swift example 1: MODIS satellite image processing script @@ -1279,13 +1293,8 @@ 25 { 26 assemble @output @selected @filename(img[0]) webdir; 27 } -<<<<<<< .mine 28 29 app (image grid) markMap (file tilelist) -======= - 28 - 29 app (imagefile grid) markMap (file tilelist) ->>>>>>> .r3901 30 { 31 markmap @tilelist @grid; 32 } @@ -1344,34 +1353,25 @@ \end{Verbatim} %\end{verbatim} -\pagebreak \subsection{Simulation of glassy dynamics and thermodynamics.} -Recent study of the glass transition in model systems has focused on calculating from theory or simulation what is known as the "Mosaic length". Glen Hocky of the Reichman Lab at Columbia applied a new cavity method for measuring this length scale, where particles are simulated by molecular dynamics or Monte Carlo methods within cavities having amorphous boundary conditions. Various correlation functions are calculated at the interior of cavities of varying sizes and averaged over many independent simulations to determine a thermodynamic length. Hocky's simulations this method to investigate the differences between three different systems which all have the same "structure" but differ in other subtle ways to see if it is in fact this thermodynamic length that is there difference between the models. +A recent study of the glass transition in model systems has focused on calculating from theory or simulation what is known as the "Mosaic length". -Rather than run ~500K-1.5M steps per jobs (which a priori i didn't know how many i would run anyway) i ran 100K at a time. hence the repetitions of runs. But i would say the campaign started more like in october. if all the jobs are on pads then it'll be more obvious. +Glen Hocky of the Reichman Group at Columbia applied a new cavity method for measuring this length scale, where particles are simulated by molecular dynamics or Monte Carlo methods within cavities having amorphous boundary conditions. Various correlation functions are calculated at the interior of cavities of varying sizes and averaged over many independent simulations to determine a thermodynamic length. Hocky is using simulations of this method to investigate the differences between three different glass systems which all have the same structure but which differ in other subtle ways to determine if this thermodynamic length causes the variations between the three systems. -As this simulation was a lengthy campaign (from about October through December 2010) Hocky chose to leverage Swift ``external'' mappers to determine what work remained during various restarts. His mappers assumed an application run was complete if all the returned ".final" files existed. In the case of script restarts, results that already existed were not computed. The swift restart mechanism was also tested and worked fine, but required tracking which workflow was being restarted. Occasionally missing files caused the restart to fail; Hocky's ad-hoc restart via mappers worked exceedingly well (and perhaps suggests a new approach for the integrated restart mechanism). +Hocky's application code performs 100,000 Monte-Carlo steps in about 1-2 hours. Ten jobs are used to generate the 1M simulation steps needed for each configuration. The input data to each simulation is a file of about 150KB representing initial glass structures. Each simulation returns three new structures of 150KB each, a 50 KB log file, and a 4K file describing which particles are in the cavity. -A high-level description of the glass simulation campaign is as follows: +Each simulation covers a space of 7 radii by 27 centers by 10 models, requiring 1690 jobs per run. Three methods are simulated (``kalj'', ``kawka'', and ``pedersenipl'') for total of 90 runs. Swift mappers enable metadata describing these aspects to be encoded in the data files of the campaigns to assist in managing the large volume of file data. -loops are: 7 radii x 27 centers x 10 models x 1 job = 1690 jobs per run +As the simulation campaigns are quite lengthy (the first ran from October through December 2010) Hocky chose to leverage Swift ``external'' mappers to determine what simulations need to be performed at any point in the campaign. His input mappers assume an application run was complete if all the returned ".final" files exist. In the case of script restarts, results that already existed were not computed. -3 methods: kalj (16) kawka(37) pedersenipl (37) for total of 90 runs +Roughly 152,000 jobs defined by all the run*.sh scripts. Some runs were done on other resources including UChicago PADS cluster and TeraGrid resources. The only change necessary to run on OSG was configuring the OSG sites to run the science application. -roughly 152,000 jobs defined by all the run*.sh scripts - -about 1-2 hours per job - Approximate OSG usage over 100K cpus hours with about 100K tasks of 1-2 hours completed. App has been successfully run on about 18 OG (with the majority of runs have been completed on about 6 primary sites). +Investigations of more advanced techniques are underway, and the fact that the entire campaign can be driven by location-independent Swift scripts will enable Hocky to reliably re-execute the entire campaign with relative ease. This project would be completely unwieldy and much harder to organize without using Swift. -Some runs were done on other resources including UChicago TeraGrid and the only change/addition necessary to run on OSG was configuring the OSG sites to run the science application. - -Is currently investigating whether slightly more advanced techniques will be necessary, in which case I may need to run approximately the same amount of simulations again. - - \pagebreak Swift example 2: Monte-Carlo simulation of quantum glass structures From noreply at svn.ci.uchicago.edu Sat Jan 8 09:14:53 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 09:14:53 -0600 (CST) Subject: [Swift-commit] r3904 - text/parco10submission Message-ID: <20110108151453.A13B59CC7F@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-08 09:14:53 -0600 (Sat, 08 Jan 2011) New Revision: 3904 Modified: text/parco10submission/paper.tex Log: some light editing in first script in 4, and some comments/questions Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-08 14:44:33 UTC (rev 3903) +++ text/parco10submission/paper.tex 2011-01-08 15:14:53 UTC (rev 3904) @@ -1206,30 +1206,31 @@ and performing image processing for research in image-guided planning for neurosurgery \cite{Fedorov_2009}. -This section describes two representative Swift scripts (from two diverse disciplines) in more detail. -The first script is a tutorial example (used in a class on data intensive computing at the University of Chicago) which performs a simple analysis of satellite land-use imagery. The second script is taken (with minor changes to fit better on the page) directly from work done using Swift for an investigation into the molecular structure of glassy materials in the field of theoretical chemistry. In both examples, the intent is to show a complete and realistic Swift script, annotated to better understand the nature of the Swift programming model and to provide a glimpse of real Swift usage. +This section describes two representative Swift scripts (from two diverse disciplines) in detail. +The first is a tutorial example (used in a class on data intensive computing at the University of Chicago) that performs a simple analysis of satellite land-use imagery. The second script is taken (with minor changes to fit better on the page) directly from work done using Swift for an investigation into the molecular structure of glassy materials in the field of theoretical chemistry. In both examples, the intent is to show complete and realistic Swift scripts, annotated to better understand the nature of the Swift programming model and to provide a glimpse of real Swift usage. \subsection{Satellite image data processing.} The first example -- Script 1 below -- processes data from a large dataset of files that categorize the Earth's surface, -from the MODIS sensor instruments that orbit Earth on two NASA +derived from data from the MODIS sensor instruments that orbit the Earth on two NASA satellites of the Earth Observing System. -The dataset (we tested with one named {\tt mcd12q1}, for year 2002) +The dataset we use (for 2002, named {\tt mcd12q1}) consists of 317 ``tile'' files that categorize every 250-meter square of non-ocean surface of the earth into one of 17 ``land cover'' categories, for example, water, ice, forest, barren, urban, etc. Each pixel of these data files has a value of 0 to 16, -describing one 250-meter square of the earth's surface at a specific -point in time. Each tile file has ~ 5 million 1-byte pixels (5.7 megabytes), covering 2400 +describing one square of the earth's surface at a specific +point in time. Each tile file has approximately +5 million 1-byte pixels (5.7 MB), covering 2400 x 2400 250-meter squares, based on a specific map projection. The Swift script analyzes the dataset to find the files with the N largest total area of any requested sets of land-cover types, and then produces a new dataset with viewable color images of those closest-matching data tiles. -(The input datasets are not viewable images, as their pixel -values are land-use codes. Thus a color rendering step is required). A typical invocation of this script would be ``\emph{find the top 12 urban tiles}'' or ``\emph{find the 16 tiles with the most forest and grassland}''. As this script is used for tutorial purposes, the application programs it calls are simple shell scripts that use fast, generic image processing applications to process the MODIS data. Thus the example executes quickly while serving as a realistic tutorial script for much more compute-intensive satellite data processing applications. +(A color rendering step is required to do this, as the input datasets are not viewable images; their pixel +values are land-use codes.) A typical invocation of this script would be ``\emph{find the top 12 urban tiles}'' or ``\emph{find the 16 tiles with the most forest and grassland}''. As this script is used for tutorial purposes, the application programs it calls are simple shell scripts that use fast, generic image processing applications to process the MODIS data. Thus the example executes quickly while serving as a realistic tutorial script for much more compute-intensive satellite data processing applications. \\ \\ The script is structured as follows: @@ -1241,6 +1242,7 @@ Lines 36-41 extract a set of science parameters from the {\tt swift} command line with which the user invokes the script. These indicate the number of files of the input set to select (to enable processing the first M of N files), the set of land cover types to select, the number of ``top'' tiles to select, and parameters used to locate input and output directories. +\katznote{not sure it these syntaxes were explained in section 2 clearly - if not, they probably should be added to section 2} Lines 47-48 invoke a ``external'' mapper script {\tt modis.mapper} to map the first {\tt nFiles} MODIS data files in the directory contained in the script argument {\tt MODISdir} to the array {\tt geos}. An external mapper script is written by the Swift programmer (in any language desired, but quite often mappers are simple shell scripts). External mappers are usually co-located with the Swift script, and are invoked when Swift instantiates the associated variable. They return a two-field list of the the form \emph{SwiftExpression, filename}, where \emph{SwiftExpression} is relative to the variable name being mapped. For example, if this mapper invocation were called from the Swift script at line 47-48: \begin{Verbatim}[fontsize=\scriptsize,framesep=2mm] @@ -1255,13 +1257,13 @@ At lines 52-53, the script declares the array {\tt land} which will contain the output of the {\tt getlanduse} application. This declaration uses the built-in ``structured regular expression mapper'', which will determine the names of the \emph{output} files that the array will refer to once they are computed. Swift knows from context that this is an output mapping. The mapper will use regular expressions to base that names of the output files on the filenames of the corresponding elements of the input array {\tt geos} given by the {\tt source=} argument to the mapper. - At lines 55-57 the script performs its first computation using a {\tt foreach} loop to invoke {\tt getLandUse} in parallel on each file mapped to the elements of {\tt geos[]}. As 317 files were mapped, the loop will invoke 317 instances of the application in parallel. The result of each computation is placed in a file mapped to the array {\tt land} and named by the regular expression translation to be based on the file names mapped to the array {\tt geos[]}. Thus the landuse histogram for file {\tt /home/wilde/modis/2002/h00v08.tif} would be written into file {\tt h00v08.landuse.freq} and would be considered by Swift to be of type {\tt landuse}. + At lines 55-57 the script performs its first computation using a {\tt foreach} loop to invoke {\tt getLandUse} in parallel on each file mapped to the elements of {\tt geos[]}. As 317 files were mapped (in lines 47-48), the loop will invoke 317 instances of the application in parallel. \katznote{is this strictly true? Do you want to say that it will enable 317 instances to be runnable in parallel, but the number that are actually run in parallel depends on the hardware available to Swift, or something like that?} The result of each computation is placed in a file mapped to the array {\tt land} and named by the regular expression translation to be based on the file names mapped to the array {\tt geos[]} (in lines \katznote{is this 52-53?}). Thus the landuse histogram for file {\tt /home/wilde/modis/2002/h00v08.tif} would be written into file {\tt h00v08.landuse.freq} and would be considered by Swift to be of type {\tt landuse}. -Once all the land usage histograms have have been computed, the script can then execute {\tt analyzeLandUse} at line 63 to find the requested number of highest tiles (files) with a specific land cover combination. The Swift runtime system uses futures to ensure that this analysis function is not invoked until all of its input files have computed and transported to the computation site chosen to run the analysis program. All of these steps take place automatically, using the relatively simple and location-independent Swift expressions shown. The output files to be use to hold the result are specified in the declarations at lines 61-62. +Once all the land usage histograms have have been computed, the script can then execute {\tt analyzeLandUse} at line 63 to find the requested number of highest tiles (files) with a specific land cover combination. The Swift runtime system uses futures to ensure that this analysis function is not invoked until all of its input files have computed and transported to the computation site chosen to run the analysis program. All of these steps take place automatically, using the relatively simple and location-independent Swift expressions shown. The output files to be use to hold the result are specified in the declarations at lines 61-62. \katznote{should these lines have a space inserted before the ``<'' to match the previous lines? Same question for 67-68... } -To visualize the results, the application function {\tt markMap} invoked at line 68 will generate an image of a world map using the MODIS projection system and indicate the selected tiles matching the analysis criteria. Since this statememt depends on the output of the analysis, it will wait for statement at line 63 to complete before commencing. +To visualize the results, the application function {\tt markMap} invoked at line 68 will generate an image of a world map using the MODIS projection system and indicate the selected tiles matching the analysis criteria. Since this statememt depends on the output of the analysis ({\tt topSelected}), it will wait for statement at line 63 to complete before commencing. -For additional visualization, the script assembles a full map of all the input tiles, placed in their proper grid location on the MODIS world map projection, and again marking the selected tiles. Since this operation needs true-color images of every input tiles these are computed -- again in parallel -- with 317 jobs invoked by the foreach statement at line 76-78. The power of Swift's implicit parallelization is very vividly shown here: since the {\tt colorMODIS} call at line 77 depends only on the input array {\tt geos}, these 317 application invocations are executed in parallel with the initial 317 parallel executions of the {\tt getLandUse} application at line 56. The script concludes at line 83 by assembling a montage of all the colored tiles and writing this image file to a web-accessible directory for viewing. +For additional visualization, the script assembles a full map of all the input tiles, placed in their proper grid location on the MODIS world map projection, and again marking the selected tiles. Since this operation needs true-color images of every input tiles these are computed---again in \katznote{potentially? as before} parallel---with 317 jobs invoked by the foreach statement at line 76-78. The power of Swift's implicit parallelization is very vividly shown here: since the {\tt colorMODIS} call at line 77 depends only on the input array {\tt geos}, these 317 application invocations are executed in parallel with the initial 317 parallel executions of the {\tt getLandUse} application at line 56. The script concludes at line 83 by assembling a montage of all the colored tiles and writing this image file to a web-accessible directory for viewing. \pagebreak Swift example 1: MODIS satellite image processing script From noreply at svn.ci.uchicago.edu Sat Jan 8 14:48:46 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 14:48:46 -0600 (CST) Subject: [Swift-commit] r3905 - text/parco10submission Message-ID: <20110108204846.0D5E59CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-08 14:48:45 -0600 (Sat, 08 Jan 2011) New Revision: 3905 Modified: text/parco10submission/paper.tex Log: Completed sec 4.2 to describe Glass sim application. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-08 15:14:53 UTC (rev 3904) +++ text/parco10submission/paper.tex 2011-01-08 20:48:45 UTC (rev 3905) @@ -1234,8 +1234,6 @@ \\ \\ The script is structured as follows: -\\ -\\ Lines 1-3 define 3 mapped file types -- {\tt MODISfile} for the input images, {\tt landuse} for the output of the landuse histogram calculation; and {\tt file} for any other generic file that we don't care to assign a unique type to. Lines 7-32 define the Swift interface functions for the application programs {\tt getLandUse}, {\tt analyzeLandUse}, {\tt colorMODIS}, {\tt assemble}, and {\tt markMap}. @@ -1266,7 +1264,7 @@ For additional visualization, the script assembles a full map of all the input tiles, placed in their proper grid location on the MODIS world map projection, and again marking the selected tiles. Since this operation needs true-color images of every input tiles these are computed---again in \katznote{potentially? as before} parallel---with 317 jobs invoked by the foreach statement at line 76-78. The power of Swift's implicit parallelization is very vividly shown here: since the {\tt colorMODIS} call at line 77 depends only on the input array {\tt geos}, these 317 application invocations are executed in parallel with the initial 317 parallel executions of the {\tt getLandUse} application at line 56. The script concludes at line 83 by assembling a montage of all the colored tiles and writing this image file to a web-accessible directory for viewing. \pagebreak -Swift example 1: MODIS satellite image processing script +{\bf \small Swift example 1: MODIS satellite image processing script} \begin{Verbatim}[fontsize=\scriptsize,frame=single,framesep=2mm,gobble=7, numbers=left] 1 type file; 2 type MODIS; type image; @@ -1355,7 +1353,7 @@ \end{Verbatim} %\end{verbatim} -\subsection{Simulation of glassy dynamics and thermodynamics.} +\subsection{Simulation of glass cavity dynamics and thermodynamics.} A recent study of the glass transition in model systems has focused on calculating from theory or simulation what is known as the "Mosaic length". @@ -1363,20 +1361,33 @@ Hocky's application code performs 100,000 Monte-Carlo steps in about 1-2 hours. Ten jobs are used to generate the 1M simulation steps needed for each configuration. The input data to each simulation is a file of about 150KB representing initial glass structures. Each simulation returns three new structures of 150KB each, a 50 KB log file, and a 4K file describing which particles are in the cavity. -Each simulation covers a space of 7 radii by 27 centers by 10 models, requiring 1690 jobs per run. Three methods are simulated (``kalj'', ``kawka'', and ``pedersenipl'') for total of 90 runs. Swift mappers enable metadata describing these aspects to be encoded in the data files of the campaigns to assist in managing the large volume of file data. +Each script run covers a simulation space of 7 radii by 27 centers by 10 models, requiring 1690 jobs per run. Three methods are simulated (``kalj'', ``kawka'', and ``pedersenipl'') for total of 90 runs. Swift mappers enable metadata describing these aspects to be encoded in the data files of the campaigns to assist in managing the large volume of file data. As the simulation campaigns are quite lengthy (the first ran from October through December 2010) Hocky chose to leverage Swift ``external'' mappers to determine what simulations need to be performed at any point in the campaign. His input mappers assume an application run was complete if all the returned ".final" files exist. In the case of script restarts, results that already existed were not computed. -Roughly 152,000 jobs defined by all the run*.sh scripts. Some runs were done on other resources including UChicago PADS cluster and TeraGrid resources. The only change necessary to run on OSG was configuring the OSG sites to run the science application. +Roughly 152,000 jobs are executed in a simulation campaign, defined by a set of parameter files defining molecular radii and centroids, and set set of "run" scripts that perform the execution of the {\tt swift} command with appropriately varying science parameters. Most runs were performed using the "USer Engagement" virtual organization of the Open Science Grid (OSG) \cite{OSG, OSGEngage}. Some runs were done on other resources including University of Chicago ``PADS'' cluster and TeraGrid resources. The only change necessary to run on OSG was configuring the OSG sites to run the science application. -Approximate OSG usage over 100K cpus hours with about 100K tasks of 1-2 hours completed. App has been successfully run on about 18 OG (with the majority of runs have been completed on about 6 primary sites). +The approximate OSG usage was over 100K cpus hours with about 100K tasks of 1-2 hours completed. The simulation campaign has been successfully run on about 18 OSG sites, with the majority of runs have been completed on about 6 primary sites that tend to provide the most compute-hour opportunities for members of the Engagement VO. -Investigations of more advanced techniques are underway, and the fact that the entire campaign can be driven by location-independent Swift scripts will enable Hocky to reliably re-execute the entire campaign with relative ease. -This project would be completely unwieldy and much harder to organize without using Swift. +Example 2 shows a slightly reformatted version of the glass simulation script that was in use in Dec. 2010. Its key aspects are as follows. +Lines 1-3 define the mapped file types; these files are used to compose input and output structures at lines 5-15. (At the moment, the input structure is a degenerate single-file structure, but the user has experimented with various multi-file input structures in prior versions of this script). The output structure reflects the fact that the simulation is restartable in 1-2 hour increments, and works together with the Swift script to create a simple but powerful mechanism for managing checkpoint/restart across a long-running large-scale simulation campaign. -\pagebreak -Swift example 2: Monte-Carlo simulation of quantum glass structures +The single application called by this script is the {\tt glassRun} program wrapped in the app function at lines 17-27. Note that rather than defining main program logic in ``open" (top-level) code, the script places all the program login in the function {\tt CreateGlassSystem}, with a single statement in open code at line 82 to invoke it. This enables the simulation script to be defined in a library which can be imported into other Swift scripts to perform entire campaigns or campaign subsets. +The {\tt CreateGlassSystem} function starts by extracting a large set of science parameters from the Swift command line at lines 31-44 and 52 using the {\tt @arg()} function. It uses the built-in function {\tt readData} at lines 40-41 to read prepared lists of molecular radii and centroids from parameter files to define the primary physical dimensions of the simulation space. +A selectable energy function to used by the simulation application is specified as a parameter at line 52. + +At lines 54 and 58, the script leverages Swift flexible dynamic arrays to create a 3D array for input and an 4D array of structures for outputs. These data structures, whose leaves consist entirely of mapped files, are set using the external mappers specified for the input array at lines 54-57 and for the output array of structures at 58-61. Note that many of the science parameters are passed to the mappers, which in turn are used by the input mapper to locate files within the large multi-level directory structure of the campaign, and by the output mapper to create new directory and file naming conventions for the campaign outputs. The mappers use the common, and useful practice of using scientific metadata to determine directory and file names. + +The entire body of the {\tt CreateGlassSystem} is a four-level nesting of foreach statements at lines 63-79. These perform a parameter sweep over all combinations of radius, centroid, model, and job number within the simulation space. A single run of the script immediately expands to an independent parallel invocation of the simulation application for each point in the space - 1,670 jobs for the minimum case of a 7 x 27 x 10 x 1 space. Note that the if statement at line 69 causes the simulation execution to be skipped if it has already been performed, as determine by a "NULL" file name returned by the mapper for the output of a given job in the simulation space. + +The advantages of managing a simulation campaign in this manner are well borne out by Hocky's experience: the expression of the campaign is a well-structured high-level script, devoid of details about file naming, synchronization of parallel tasks, location and state of remote computing resources, or explicit explicit data transfer. Hock was able to leverage local cluster resources on many occasions, but at any time could count on his script acquiring on the order of 1,000 compute cores from 6 to 18 sites of the Open Science Grid. When executing on the OSG, he leveraged the Swift capability to replicate jobs that are waiting in queues at more congested sites, and automatically send them to sites where jobs were moving through the system. All of these capabilities would be a huge distraction from his primary scientific simulation campaign were he to use lower-level abstractions where parallelism and remote distribution were the visible responsibility of the programmer. + +Investigations of more advanced glass simulation techniques are underway, and the fact that the entire campaign can be driven by location-independent Swift scripts will enable Hocky to reliably re-execute the entire campaign with relative ease. +He believes that Swift has made the project much easier to organize and execute. The project would be completely unwieldy without using Swift, and the distraction and scripting/programming effort level of leveraging multiple computing resources would be prohibitive. +\\ +\\ +{\bf \small Swift example 2: Monte-Carlo simulation of glass cavity dynamics.} %\begin{verbatim} \begin{Verbatim}[fontsize=\scriptsize,frame=single,framesep=2mm,gobble=7, numbers=left] 1 type Arc; From noreply at svn.ci.uchicago.edu Sat Jan 8 16:42:43 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 16:42:43 -0600 (CST) Subject: [Swift-commit] r3907 - in text/parco10submission: . code Message-ID: <20110108224243.318039CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-08 16:42:43 -0600 (Sat, 08 Jan 2011) New Revision: 3907 Added: text/parco10submission/code/ text/parco10submission/code/glass.swift text/parco10submission/code/glassRunCavities.swift.ORIG text/parco10submission/code/modis.swift Modified: text/parco10submission/paper.tex Log: Moved example scripts to a code/ dir and include them from \InputVerbatim directives. Added: text/parco10submission/code/glass.swift =================================================================== --- text/parco10submission/code/glass.swift (rev 0) +++ text/parco10submission/code/glass.swift 2011-01-08 22:42:43 UTC (rev 3907) @@ -0,0 +1,82 @@ +type Arc; +type Restart; +type Log; + +type GlassIn{ + Restart startfile; +} + +type GlassOut{ + Arc arcfile; + Restart restartfile; + Restart startfile; + Restart final; + Log logfile; +} + +app (GlassOut o) glassCavityRun( + GlassIn i, string rad, string temp, string steps, + string volume, string fraca, string energyfunction, + string centerstring, string arctimestring) +{ + glassRun "-a" @filename(o.final) "--lf" @filename(i.startfile) + "--temp" temp "--stepsperparticle" steps "--volume" volume + "--fraca" fraca "--energy_function" energyfunction + "--cradius" rad "--ccoord" centerstring arctimestring + stdout=@filename(o.logfile); +} + +CreateGlassSystem() +{ + string temp=@arg("temp","2.0"); + string steps=@arg("steps","10"); + string esteps=@arg("esteps","100"); + string ceqsteps=@arg("ceqsteps","100"); + string natoms=@arg("natoms","200"); + string volume=@arg("volume","200"); + string rlist=@arg("rlist","rlist"); + string clist=@arg("clist","clist"); + string fraca=@arg("fraca","0.5"); + string radii[] = readData(rlist); + string centers[] = readData(clist); + int nmodels=@toint( @arg("n","1") ); + int nsub=@toint( @arg("nsub","1") ); + string savearc=@arg("savearc","FALSE"); + string arctimestring; + if(savearc=="FALSE") { + arctimestring="--arc_time=10000000"; + } + else{ + arctimestring=""; + } + string energyfunction=@arg("energyfunction","softsphereratiosmooth"); + + GlassIn modelIn[][][] ; + GlassOut modelOut[][][][] ; + + foreach rad,rindex in radii { + foreach centerstring,cindex in centers { + foreach model in [0:nmodels-1] { + foreach job in [0:nsub-1] { + string fname = + @filename(modelOut[rindex][cindex][model][job].final) + if (fname != "NULL") { + modelOut[rindex][cindex][model][job] = + glassCavityRun( modelIn[rindex][cindex][model], + rad, temp, steps, volume, fraca, energyfunction, + centerstring, arctimestring); + } + } + } + } + } +} + + +CreateGlassSystem(); Added: text/parco10submission/code/glassRunCavities.swift.ORIG =================================================================== --- text/parco10submission/code/glassRunCavities.swift.ORIG (rev 0) +++ text/parco10submission/code/glassRunCavities.swift.ORIG 2011-01-08 22:42:43 UTC (rev 3907) @@ -0,0 +1,120 @@ +type file; +type Text; + +type Arc; +type Restart; +type Log; + +type GlassOut{ + Arc arcfile; + Restart restartfile; + Restart startfile; + Restart final; + Log logfile; +} + +type GlassIn{ + Restart startfile; +} + +// Lib functions (to be moved to imported file + +app (file o) echoi (int i) { echo i stdout=@o;} +app (file o) echof (float f) { echo f stdout=@o;} +app (file o) echob (boolean b) { echo b stdout=@o;} +app (file o) echos (string s) { echo s stdout=@o;} + +(string s) itostr (int i) +{ + file f; + f = echoi(i); + s = readData(f); +} + +(string s) ftostr (float n) +{ + file f; + f = echof(n); + s = readData(f); +} + +(int n) strtoi (string s) +{ + file f; + f = echos(s); + n = readData(f); +} + +(float n) strtof (string s) +{ + file f; + f = echos(s); + n = readData(f); +} + +app (file o) sprintfsApp (string fmt, string e[]) +{ + sprintfs fmt e stdout=@o; +} + +(string s) sprintfs (string fmt, string e[]) +{ + file f; + f = sprintfsApp(fmt,e); + s = readData(f); +} + +//glassSim app declarations +//app (int result) multiply(int a,int b) +//{ +// file f; +// multiply a b stdout=@filename(f); +// result=readData(f); +//} + +app (GlassOut o) glassCavityRun(GlassIn i,string rad,string temp,string steps,string volume,string fraca,string energyfunction,string centerstring,string arctimestring) +{ + glassRun "-a" @filename(o.final) "--lf" @filename(i.startfile) "--temp" temp "--stepsperparticle" steps "--volume" volume "--fraca" fraca "--energy_function" energyfunction "--cradius" rad "--ccoord" centerstring arctimestring stdout=@filename(o.logfile); +} + +CreateGlassSystem() +{ + string temp=@arg("temp","2.0"); + string steps=@arg("steps","10"); + string esteps=@arg("esteps","100"); + string ceqsteps=@arg("ceqsteps","100"); + string natoms=@arg("natoms","200"); + string volume=@arg("volume","200"); + string rlist=@arg("rlist","rlist"); + string clist=@arg("clist","clist"); + string fraca=@arg("fraca","0.5"); + string radii[] = readData(rlist); + string centers[] = readData(clist); + int nmodels=strtoi( @arg("n","1") ); + int nsub=strtoi( @arg("nsub","1") ); +// int njobs=nsub*nmodels; + string savearc=@arg("savearc","FALSE"); + string arctimestring; + if(savearc=="FALSE") { + arctimestring="--arc_time=10000000"; + } + else{ + arctimestring=""; + } + string energyfunction=@arg("energyfunction","softsphereratiosmooth"); + GlassIn modelIn[][][] ; + GlassOut modelOut[][][][] ; + foreach rad,rindex in radii { + foreach centerstring,cindex in centers { + foreach model in [0:nmodels-1] { + foreach job in [0:nsub-1] { + if( !(@filename(modelOut[rindex][cindex][model][job].final)=="NULL") ){ + modelOut[rindex][cindex][model][job]=glassCavityRun(modelIn[rindex][cindex][model],rad,temp,steps,volume,fraca,energyfunction,centerstring,arctimestring); + } + } + } + } + } +} + +CreateGlassSystem(); Added: text/parco10submission/code/modis.swift =================================================================== --- text/parco10submission/code/modis.swift (rev 0) +++ text/parco10submission/code/modis.swift 2011-01-08 22:42:43 UTC (rev 3907) @@ -0,0 +1,84 @@ +type file; +type MODIS; type image; +type landuse; + +# Define application program interfaces + +app (landuse output) getLandUse (imagefile input, int sortfield) +{ + getlanduse @input sortfield stdout=@output ; +} + +app (file output, file tilelist) analyzeLandUse + (MODIS input[], string usetype, int maxnum) +{ + analyzelanduse @output @tilelist usetype maxnum @filenames(input); +} + +app (image output) colorMODIS (MODIS input) +{ + colormodis @input @output; +} + +app (image output) assemble + (file selected, image img[], string webdir) +{ + assemble @output @selected @filename(img[0]) webdir; +} + +app (image grid) markMap (file tilelist) +{ + markmap @tilelist @grid; +} + +# Constants and command line arguments + +int nFiles = @toint(@arg("nfiles","1000")); +int nSelect = @toint(@arg("nselect","12")); +string landType = @arg("landtype","urban"); +string runID = @arg("runid","modis-run"); +string MODISdir= @arg("modisdir","/home/wilde/bigdata/data/modis/2002"); +string webDir = @arg("webdir","/home/wilde/public_html/geo/"); + + + +# Input Dataset + +image geos[] ; + +# Compute the land use summary of each MODIS tile + +landuse land[] ; + +foreach g,i in geos { + land[i] = getLandUse(g,1); +} + +# Find the top N tiles (by total area of selected landuse types) + +file topSelected<"topselected.txt">; +file selectedTiles<"selectedtiles.txt">; +(topSelected, selectedTiles) = analyzeLandUse(land, landType, nSelect); + +# Mark the top N tiles on a sinusoidal gridded map + +image gridMap<"markedGrid.gif">; +gridMap = markMap(topSelected); + +# Create multi-color images for all tiles + +image colorImage[] ; + +foreach g, i in geos { + colorImage[i] = colorMODIS(g); +} + +# Assemble a montage of the top selected areas + +image montage ; # @arg +montage = assemble(selectedTiles,colorImage,webDir); + Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-08 22:10:00 UTC (rev 3906) +++ text/parco10submission/paper.tex 2011-01-08 22:42:43 UTC (rev 3907) @@ -1286,92 +1286,9 @@ \pagebreak {\bf \small Swift example 1: MODIS satellite image processing script} -\begin{Verbatim}[fontsize=\scriptsize,frame=single,framesep=2mm,gobble=7, numbers=left] - 1 type file; - 2 type MODIS; type image; - 3 type landuse; - 4 - 5 # Define application program interfaces - 6 - 7 app (landuse output) getLandUse (imagefile input, int sortfield) - 8 { - 9 getlanduse @input sortfield stdout=@output ; - 10 } - 11 - 12 app (file output, file tilelist) analyzeLandUse - 13 (MODIS input[], string usetype, int maxnum) - 14 { - 15 analyzelanduse @output @tilelist usetype maxnum @filenames(input); - 16 } - 17 - 18 app (image output) colorMODIS (MODIS input) - 19 { - 20 colormodis @input @output; - 21 } - 22 - 23 app (image output) assemble - 24 (file selected, image img[], string webdir) - 25 { - 26 assemble @output @selected @filename(img[0]) webdir; - 27 } - 28 - 29 app (image grid) markMap (file tilelist) - 30 { - 31 markmap @tilelist @grid; - 32 } - 33 - 34 # Constants and command line arguments - 35 - 36 int nFiles = @toint(@arg("nfiles","1000")); - 37 int nSelect = @toint(@arg("nselect","12")); - 38 string landType = @arg("landtype","urban"); - 39 string runID = @arg("runid","modis-run"); - 40 string MODISdir= @arg("modisdir","/home/wilde/bigdata/data/modis/2002"); - 41 string webDir = @arg("webdir","/home/wilde/public_html/geo/"); - 42 - 43 - 44 - 45 # Input Dataset - 46 - 47 image geos[] ; - 49 - 50 # Compute the land use summary of each MODIS tile - 51 - 52 landuse land[] ; - 54 - 55 foreach g,i in geos { - 56 land[i] = getLandUse(g,1); - 57 } - 58 - 59 # Find the top N tiles (by total area of selected landuse types) - 60 - 61 file topSelected<"topselected.txt">; - 62 file selectedTiles<"selectedtiles.txt">; - 63 (topSelected, selectedTiles) = analyzeLandUse(land, landType, nSelect); - 64 - 65 # Mark the top N tiles on a sinusoidal gridded map - 66 - 67 image gridMap<"markedGrid.gif">; - 68 gridMap = markMap(topSelected); - 69 - 70 # Create multi-color images for all tiles - 71 - 72 image colorImage[] ; - 75 - 76 foreach g, i in geos { - 77 colorImage[i] = colorMODIS(g); - 78 } - 79 - 80 # Assemble a montage of the top selected areas - 81 - 82 image montage ; # @arg - 83 montage = assemble(selectedTiles,colorImage,webDir); - -\end{Verbatim} +\VerbatimInput[fontsize=\scriptsize,frame=single,framesep=2mm, numbers=left] {code/modis.swift} +%\VerbatimInput [fontsize=\scriptsize,frame=single,framesep=2mm,gobble=7, numbers=left]{modis.swift} +%\end{Verbatim} %\end{verbatim} \subsection{Simulation of glass cavity dynamics and thermodynamics.} @@ -1410,90 +1327,9 @@ \\ {\bf \small Swift example 2: Monte-Carlo simulation of glass cavity dynamics.} %\begin{verbatim} -\begin{Verbatim}[fontsize=\scriptsize,frame=single,framesep=2mm,gobble=7, numbers=left] - 1 type Arc; - 2 type Restart; - 3 type Log; - 4 - 5 type GlassIn{ - 6 Restart startfile; - 7 } - 8 - 9 type GlassOut{ - 10 Arc arcfile; - 11 Restart restartfile; - 12 Restart startfile; - 13 Restart final; - 14 Log logfile; - 15 } - 16 - 17 app (GlassOut o) glassCavityRun( - 18 GlassIn i, string rad, string temp, string steps, - 19 string volume, string fraca, string energyfunction, - 20 string centerstring, string arctimestring) - 21 { - 22 glassRun "-a" @filename(o.final) "--lf" @filename(i.startfile) - 23 "--temp" temp "--stepsperparticle" steps "--volume" volume - 24 "--fraca" fraca "--energy_function" energyfunction - 25 "--cradius" rad "--ccoord" centerstring arctimestring - 26 stdout=@filename(o.logfile); - 27 } - 28 - 29 CreateGlassSystem() - 30 { - 31 string temp=@arg("temp","2.0"); - 32 string steps=@arg("steps","10"); - 33 string esteps=@arg("esteps","100"); - 34 string ceqsteps=@arg("ceqsteps","100"); - 35 string natoms=@arg("natoms","200"); - 36 string volume=@arg("volume","200"); - 37 string rlist=@arg("rlist","rlist"); - 38 string clist=@arg("clist","clist"); - 39 string fraca=@arg("fraca","0.5"); - 40 string radii[] = readData(rlist); - 41 string centers[] = readData(clist); - 42 int nmodels=@toint( @arg("n","1") ); - 43 int nsub=@toint( @arg("nsub","1") ); - 44 string savearc=@arg("savearc","FALSE"); - 45 string arctimestring; - 46 if(savearc=="FALSE") { - 47 arctimestring="--arc_time=10000000"; - 48 } - 49 else{ - 50 arctimestring=""; - 51 } - 52 string energyfunction=@arg("energyfunction","softsphereratiosmooth"); - 53 - 54 GlassIn modelIn[][][] ; - 58 GlassOut modelOut[][][][] ; - 62 - 63 foreach rad,rindex in radii { - 64 foreach centerstring,cindex in centers { - 65 foreach model in [0:nmodels-1] { - 66 foreach job in [0:nsub-1] { - 67 string fname = - 68 @filename(modelOut[rindex][cindex][model][job].final) - 69 if (fname != "NULL") { - 70 modelOut[rindex][cindex][model][job] = - 71 glassCavityRun( modelIn[rindex][cindex][model], - 72 rad, temp, steps, volume, fraca, energyfunction, - 73 centerstring, arctimestring); - 74 } - 75 } - 76 } - 77 } - 78 } - 79 } - 80 - 81 - 82 CreateGlassSystem(); -\end{Verbatim} +%\begin{Verbatim}[fontsize=\scriptsize,frame=single,framesep=2mm,gobble=7, numbers=left] +\VerbatimInput[fontsize=\scriptsize,frame=single,framesep=2mm, numbers=left] {code/glass.swift} +%\end{Verbatim} %\end{verbatim} \section{Performance Characteristics} From noreply at svn.ci.uchicago.edu Sat Jan 8 16:45:15 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 16:45:15 -0600 (CST) Subject: [Swift-commit] r3908 - text/parco10submission Message-ID: <20110108224515.146239CC7F@svn.ci.uchicago.edu> Author: foster Date: 2011-01-08 16:45:14 -0600 (Sat, 08 Jan 2011) New Revision: 3908 Modified: text/parco10submission/paper.tex Log: Various minor edits Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-08 22:42:43 UTC (rev 3907) +++ text/parco10submission/paper.tex 2011-01-08 22:45:14 UTC (rev 3908) @@ -247,10 +247,10 @@ The remainder of this paper is organized as follows. %Section~\ref{Rationale} explains the motivation for the Swift programming model. -In Section~\ref{Language}, we present the major concepts and language -structure of Swift. Section~\ref{Execution} provides details of the +In Section~\ref{Language}, we present major concepts and language +structures. Section~\ref{Execution} details the Swift implementation, including the distributed architecture that enables -Swift applications to run on distributed resources. +applications to run on distributed resources. Section~\ref{Applications} describes real-world applications using Swift on scientific projects. Section~\ref{Related} relates Swift to other systems. Section~\ref{Future} highlights @@ -373,7 +373,7 @@ pipelines (or more generally, graphs) of sub-functions. Unlike most other scripting languages, Swift expresses -invocations of ``ordinary programs''--technically, POSIX {\tt exec()} +invocations of ordinary programs--technically, POSIX {\tt exec()} operations--in a manner that explicitly declares the files and command-line arguments that are the inputs of each program invocation. Swift scripts similarly declare all output files that results from program invocations. @@ -385,45 +385,34 @@ The Swift parallel execution model is based on two concepts that are applied uniformly throughout the language. First, every Swift data element behaves like a \emph{future}. By ``data element'', we mean both the named variables within a function's environment, such as its local variables, parameters, and returns, and the individual elements of array and structure collections. Second, all expressions in a Swift program are conceptually executed in parallel. Expressions (including function evaluations) wait for input values when they are required, and then set their result values as their computation proceeds. These fundamental concepts of pervasive implicit parallelism and transparent location independence, along with natural manner in which Swift expresses the processing of files by applications as if they were ``in-memory'' objects, are the powerful aspects of Swift which make it unique among scripting tools. These aspects are elaborated in this section. -\subsection{Data model} +\subsection{Data model and types} -%IAN: Hey, wait: surely we are mixing together different things when we talk about a queue of function invocations?? We are describing the implementation, it seems. +Variables are used in Swift to name the local variables, arguments, and returns of a function. +The outermost function in a Swift (akin to ``main'' in C) is only unique in that the variables in its environment +can be declared ``global'' to make them accessible to every other function in the script. -%IAN: I have a feeling that this text could be simplified. +Each variable in a Swift script is declared to be of a specific (single) type. The Swift type model is simple, with no concepts of inheritance, abstraction, etc. +There are three basic classes of data types: primitive, mapped, and collection. -%IAN XX - -Every data object in Swift has a type that defines - -Every data object in Swift is built up from atomic data elements that contain three fields: a value, a state, and a queue of function invocations that are waiting for the value to be set. - -Variables are used in Swift to name the local variables, arguments, and returns of a function. Every Swift variable is assigned a concrete data type, based on a simple type model (with no concepts of inheritance, abstraction, etc). The outermost function in a Swift (akin to ``main'' in C) is only unique in that the variables in its environment can be declared ``global'' to make them accessible to every other function in the script. - -Swift data elements (atomic variables and array elements) are \emph{single-assignment}--- -they can be assigned at most one value during execution---and behave as futures. -This semantic provides the -basis for Swift's model of parallel function evaluation and chaining. -While Swift collection types (arrays and structures) are not -single-assignment, each of their elements is single-assignment. - -Each variable in a Swift script is declared to be of a specific (single) type. -Swift provides three basic classes of data types: - -\emph{Primitive types} are provided for integer, float, string, and boolean values by the Swift runtime. Common operators are defined for +\emph{Primitive types} are provided for integer, float, string, and boolean values. Common operators are defined for primitive types, such as arithmetic, concatenation, and explicit conversion. An additional primitive type ``external'' is provided for manual synchronization. -\emph{Mapped types} are data elements that refer (through a process called``mapping'') to files external to the Swift script. These are the files that will be read and written by the external application programs called by Swift. +\emph{Mapped types} are data elements that refer (through a process called``mapping'') to files external to the Swift script. +These files can then be read and written by application programs called by Swift. The mapping process can map single variables to single files, and structures and arrays to collections of files. -Primitive and mapped types are called \emph{atomic types}. \emph{Collection types} are \emph{arrays} and \emph{structures}. Arrays contain values of only a single type; structure fields can be of any type. One -array type is provided for every atomic type (integer, string, boolean, and mapped file). +array type is provided for every scalar and mapped type. Arrays use numeric indices, but are sparse. -Both types of collections can contain members of atomic or collection types. Structures contain a finite number of elements. Arrays contain a varying number of elements. Structures and arrays can both recursively reference other structures and arrays in addition to atomic values. Arrays can be nested to provide multi-dimensional indexing. +Both types of collections can contain members of primitive, mapped, or collection types; +in particular, arrays can be nested to provide multi-dimensional indexing. +Structures contain a declared number of elements. The number of elements in an array can be determined at run time. -Due to the dynamic, highly parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. + + +Due to the dynamic, parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. %IAN: This last paragraph raises the issue of whether this state can always be determiend. %Also, since all data elements have single-assignment semantics, no garbage collection issues arise. \katznote{does this follow? garbage collection removed variables that are no longer needed - I don't see how single assignment helps here.} @@ -641,6 +630,16 @@ \subsection{Execution model: Implicit parallelism} \label{ordering} +Every data object in Swift is built up from atomic data elements that contain three fields: a value, a state, and a queue of function invocations that are waiting for the value to be set. + +Swift data elements (atomic variables and array elements) are \emph{single-assignment}--- +they can be assigned at most one value during execution---and behave as futures. +This semantic provides the +basis for Swift's model of parallel function evaluation and chaining. +While Swift collection types (arrays and structures) are not +single-assignment, each of their elements is single-assignment. + + %\mikenote{Rename this as Parallelism model?; stress and show how highly parallel the model is - the idea that the workflow is fully expanded but throttled.} We have described almost all of the Swift language. (Swift also provides conditional From noreply at svn.ci.uchicago.edu Sat Jan 8 17:07:11 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 17:07:11 -0600 (CST) Subject: [Swift-commit] r3909 - text/parco10submission Message-ID: <20110108230711.97ABA9CC7F@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-08 17:07:11 -0600 (Sat, 08 Jan 2011) New Revision: 3909 Modified: text/parco10submission/paper.bib Log: fixing the new references Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-08 22:45:14 UTC (rev 3908) +++ text/parco10submission/paper.bib 2011-01-08 23:07:11 UTC (rev 3909) @@ -187,127 +187,39 @@ pages = {55--59} } - at article{CEDPS, - title = {{CEDPS}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} -} - - at article{MONOTONICPHD, - title = {{MONOTONICPHD}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} -} - - at article{GLOBUS, - title = {{GLOBUS}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} -} - - at article{GRAM, - title = {{GRAM}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} -} - - at article{GridFTP, - title = {{GridFTP}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} -} - - at article{TCP, - title = {{TCP}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} -} - - at article{CNARI, - title = {{CNARI}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} -} - - at article{FALKON, - title = {{FALKON}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} -} - @misc{LONIPIPELINE, title="LONI Pipeline http://pipeline.loni.ucla.edu/" } + - at article{MAPREDUCE, - title = {{MAPREDUCE}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} -} - - at article{TERAGRID, - title = {{TERAGRID}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} -} - + @article{OSG, - title = {{OSG}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} + author={Ruth Pordes and Don Petravick and Bill Kramer and Doug Olson and Miron Livny and Alain Roy and Paul Avery and Kent +Blackburn and Torre Wenaus and Frank W\"{u}rthwein and Ian Foster and Rob Gardner and Mike Wilde and Alan Blatecky and John +McGee and Rob Quick}, + title={The open science grid}, + journal={Journal of Physics: Conference Series}, + volume={78}, + number={1}, + pages={012057}, + url={http://stacks.iop.org/1742-6596/78/i=1/a=012057}, + year={2007} } - - at article{ReSS, - title = {{ReSS}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} + + + @article{OSGEngage, + author={R Pordes and M Altunay and P Avery and A Bejan and K Blackburn and A Blatecky and R Gardner and B Kramer and M Livny and J McGee and M Potekhin and R Quick and D Olson and A Roy and C Sehgal and T Wenaus and M Wilde and F W\"{u}rthwein}, + title={New science on the Open Science Grid}, + journal={Journal of Physics: Conference Series}, + volume={125}, + number={1}, + pages={012070}, + url={http://stacks.iop.org/1742-6596/125/i=1/a=012070}, + year={2008} } + + - at article{GPFS, - title = {{GPFS}}, - author = {John Smith and Jane Doe}, - journal = {{Cluster Computing}}, - volume = {5(3)}, - year = 2002, - pages = {237--247} -} - @article {OLDKarajan, author = {von Laszewski, Gregor and Hategan, Mike}, affiliation = {Argonne National Laboratory Mathematics and Computer Science Division, Argonne National Laboratory 9700 S. Cass Ave. Argonne IL 60440 USA}, From noreply at svn.ci.uchicago.edu Sat Jan 8 17:13:33 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 17:13:33 -0600 (CST) Subject: [Swift-commit] r3910 - text/parco10submission Message-ID: <20110108231333.D7A8F9CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-08 17:13:33 -0600 (Sat, 08 Jan 2011) New Revision: 3910 Modified: text/parco10submission/paper.tex Log: A few more edits on the type model. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-08 23:07:11 UTC (rev 3909) +++ text/parco10submission/paper.tex 2011-01-08 23:13:33 UTC (rev 3910) @@ -392,26 +392,26 @@ can be declared ``global'' to make them accessible to every other function in the script. Each variable in a Swift script is declared to be of a specific (single) type. The Swift type model is simple, with no concepts of inheritance, abstraction, etc. -There are three basic classes of data types: primitive, mapped, and collection. +There are three basic classes of data types: primitive, mapped, and collection. \emph{Primitive types} are provided for integer, float, string, and boolean values. Common operators are defined for primitive types, such as arithmetic, concatenation, and explicit conversion. An additional primitive type ``external'' is provided for manual synchronization. -\emph{Mapped types} are data elements that refer (through a process called``mapping'') to files external to the Swift script. +\emph{Mapped types} Mapped types are provided to declare data elements that refer (through a process called``mapping'') to files external to the Swift script. These files can then be read and written by application programs called by Swift. The mapping process can map single variables to single files, and structures and arrays to collections of files. +At the moment, there are no built-in mapped types in the language. Instead, users simply declare type names with no other structure to denote any mapped type names desired. For example: {\tt type file; type log;} \emph{Collection types} are \emph{arrays} and \emph{structures}. -Arrays contain values of only a single type; structure fields can be of any type. One -array type is provided for every scalar and mapped type. +Arrays contain values of only a single type; structure fields can be of any type. Arrays can be created and defined to be of any primitive or declared type. Arrays use numeric indices, but are sparse. Both types of collections can contain members of primitive, mapped, or collection types; in particular, arrays can be nested to provide multi-dimensional indexing. Structures contain a declared number of elements. The number of elements in an array can be determined at run time. +New types can be declared to define a new mapped type or to name a structure type. The type model is by design very simple and limited to keep type semantics easy to understand, implement and use. - Due to the dynamic, parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. %IAN: This last paragraph raises the issue of whether this state can always be determiend. @@ -447,7 +447,7 @@ The notation \verb|{}| indicates that the type represents a reference to a single \emph{opaque} file, i.e., a reference to an external object whose structure is opaque to the Swift script. For convenience such type declarations typically use the equivalent shorthand \verb|type image;| -(which new users find confusing but which has become a Swift idiom). +(this compact notation is confusing at first but has become a useful Swift idiom). Mapped type variable declarations can be specified with a \emph{mapping} descriptor enclosed in \verb|<>| that indicates the file to be mapped to the variable. From noreply at svn.ci.uchicago.edu Sat Jan 8 17:16:20 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 17:16:20 -0600 (CST) Subject: [Swift-commit] r3911 - text/parco10submission Message-ID: <20110108231620.702549CC7F@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-08 17:16:20 -0600 (Sat, 08 Jan 2011) New Revision: 3911 Modified: text/parco10submission/paper.tex Log: small tweaks in the text about the new example in 4 Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-08 23:13:33 UTC (rev 3910) +++ text/parco10submission/paper.tex 2011-01-08 23:16:20 UTC (rev 3911) @@ -1292,7 +1292,7 @@ \subsection{Simulation of glass cavity dynamics and thermodynamics.} -A recent study of the glass transition in model systems has focused on calculating from theory or simulation what is known as the "Mosaic length". +A recent study of the glass transition in model systems has focused on calculating from theory or simulation what is known as the ``Mosaic length''. Glen Hocky of the Reichman Group at Columbia applied a new cavity method for measuring this length scale, where particles are simulated by molecular dynamics or Monte Carlo methods within cavities having amorphous boundary conditions. Various correlation functions are calculated at the interior of cavities of varying sizes and averaged over many independent simulations to determine a thermodynamic length. Hocky is using simulations of this method to investigate the differences between three different glass systems which all have the same structure but which differ in other subtle ways to determine if this thermodynamic length causes the variations between the three systems. @@ -1300,25 +1300,25 @@ Each script run covers a simulation space of 7 radii by 27 centers by 10 models, requiring 1690 jobs per run. Three methods are simulated (``kalj'', ``kawka'', and ``pedersenipl'') for total of 90 runs. Swift mappers enable metadata describing these aspects to be encoded in the data files of the campaigns to assist in managing the large volume of file data. -As the simulation campaigns are quite lengthy (the first ran from October through December 2010) Hocky chose to leverage Swift ``external'' mappers to determine what simulations need to be performed at any point in the campaign. His input mappers assume an application run was complete if all the returned ".final" files exist. In the case of script restarts, results that already existed were not computed. +As the simulation campaigns are quite lengthy (the first ran from October through December 2010) Hocky chose to leverage Swift ``external'' mappers to determine what simulations need to be performed at any point in the campaign. His input mappers assume an application run was complete if all the returned ``\verb|.final|'' files exist. In the case of script restarts, results that already existed were not (re)computed. -Roughly 152,000 jobs are executed in a simulation campaign, defined by a set of parameter files defining molecular radii and centroids, and set set of "run" scripts that perform the execution of the {\tt swift} command with appropriately varying science parameters. Most runs were performed using the "USer Engagement" virtual organization of the Open Science Grid (OSG) \cite{OSG, OSGEngage}. Some runs were done on other resources including University of Chicago ``PADS'' cluster and TeraGrid resources. The only change necessary to run on OSG was configuring the OSG sites to run the science application. +Roughly 152,000 jobs are executed in a simulation campaign, defined by a set of parameter files defining molecular radii and centroids, and set set of ``run'' scripts that perform the execution of the {\tt swift} command with appropriately varying science parameters. Most runs were performed using the "User Engagement" virtual organization (VO) of the Open Science Grid (OSG) \cite{OSG, OSGEngage}. Some runs were done on other resources including University of Chicago ``PADS'' cluster and TeraGrid resources. The only change necessary to run on OSG was configuring the OSG sites to run the science application. -The approximate OSG usage was over 100K cpus hours with about 100K tasks of 1-2 hours completed. The simulation campaign has been successfully run on about 18 OSG sites, with the majority of runs have been completed on about 6 primary sites that tend to provide the most compute-hour opportunities for members of the Engagement VO. +The approximate OSG usage was over 100,000 cpus hours with about 100,000 tasks of 1-2 hours completed. The simulation campaign has been successfully run on about 18 OSG sites, with the majority of runs have been completed on about 6 primary sites that tend to provide the most compute-hour opportunities for members of the Engagement VO. Example 2 shows a slightly reformatted version of the glass simulation script that was in use in Dec. 2010. Its key aspects are as follows. -Lines 1-3 define the mapped file types; these files are used to compose input and output structures at lines 5-15. (At the moment, the input structure is a degenerate single-file structure, but the user has experimented with various multi-file input structures in prior versions of this script). The output structure reflects the fact that the simulation is restartable in 1-2 hour increments, and works together with the Swift script to create a simple but powerful mechanism for managing checkpoint/restart across a long-running large-scale simulation campaign. +Lines 1-3 define the mapped file types; these files are used to compose input and output structures at lines 5-15. (At the moment, the input structure is a degenerate single-file structure, but Hocky has experimented with various multi-file input structures in prior versions of this script). The output structure reflects the fact that the simulation is restartable in 1-2 hour increments, and works together with the Swift script to create a simple but powerful mechanism for managing checkpoint/restart across a long-running large-scale simulation campaign. -The single application called by this script is the {\tt glassRun} program wrapped in the app function at lines 17-27. Note that rather than defining main program logic in ``open" (top-level) code, the script places all the program login in the function {\tt CreateGlassSystem}, with a single statement in open code at line 82 to invoke it. This enables the simulation script to be defined in a library which can be imported into other Swift scripts to perform entire campaigns or campaign subsets. +The single application called by this script is the {\tt glassRun} program wrapped in the app function at lines 17-27. Note that rather than defining main program logic in ``open" (top-level) code, the script places all the program login in the function {\tt CreateGlassSystem}, invoked by the single statement at line 82. This enables the simulation script to be defined in a library that can be imported into other Swift scripts to perform entire campaigns or campaign subsets. The {\tt CreateGlassSystem} function starts by extracting a large set of science parameters from the Swift command line at lines 31-44 and 52 using the {\tt @arg()} function. It uses the built-in function {\tt readData} at lines 40-41 to read prepared lists of molecular radii and centroids from parameter files to define the primary physical dimensions of the simulation space. A selectable energy function to used by the simulation application is specified as a parameter at line 52. -At lines 54 and 58, the script leverages Swift flexible dynamic arrays to create a 3D array for input and an 4D array of structures for outputs. These data structures, whose leaves consist entirely of mapped files, are set using the external mappers specified for the input array at lines 54-57 and for the output array of structures at 58-61. Note that many of the science parameters are passed to the mappers, which in turn are used by the input mapper to locate files within the large multi-level directory structure of the campaign, and by the output mapper to create new directory and file naming conventions for the campaign outputs. The mappers use the common, and useful practice of using scientific metadata to determine directory and file names. +At lines 54 and 58, the script leverages Swift flexible dynamic arrays to create a 3D array for input and an 4D array of structures for outputs. These data structures, whose leaf elements consist entirely of mapped files, are set using the external mappers specified for the input array at lines 54-57 and for the output array of structures at 58-61. Note that many of the science parameters are passed to the mappers, which in turn are used by the input mapper to locate files within the large multi-level directory structure of the campaign, and by the output mapper to create new directory and file naming conventions for the campaign outputs. The mappers use the common, and useful practice of using scientific metadata to determine directory and file names. -The entire body of the {\tt CreateGlassSystem} is a four-level nesting of foreach statements at lines 63-79. These perform a parameter sweep over all combinations of radius, centroid, model, and job number within the simulation space. A single run of the script immediately expands to an independent parallel invocation of the simulation application for each point in the space - 1,670 jobs for the minimum case of a 7 x 27 x 10 x 1 space. Note that the if statement at line 69 causes the simulation execution to be skipped if it has already been performed, as determine by a "NULL" file name returned by the mapper for the output of a given job in the simulation space. +The entire body of the {\tt CreateGlassSystem} is a four-level nesting of \verb|foreach| statements at lines 63-79. These perform a parameter sweep over all combinations of radius, centroid, model, and job number within the simulation space. A single run of the script immediately expands to an independent parallel invocation of the simulation application for each point in the space - 1,670 jobs for the minimum case of a 7 x 27 x 10 x 1 space. Note that the if statement at line 69 causes the simulation execution to be skipped if it has already been performed, as determine by a ``\verb|NULL|'' file name returned by the mapper for the output of a given job in the simulation space. -The advantages of managing a simulation campaign in this manner are well borne out by Hocky's experience: the expression of the campaign is a well-structured high-level script, devoid of details about file naming, synchronization of parallel tasks, location and state of remote computing resources, or explicit explicit data transfer. Hock was able to leverage local cluster resources on many occasions, but at any time could count on his script acquiring on the order of 1,000 compute cores from 6 to 18 sites of the Open Science Grid. When executing on the OSG, he leveraged the Swift capability to replicate jobs that are waiting in queues at more congested sites, and automatically send them to sites where jobs were moving through the system. All of these capabilities would be a huge distraction from his primary scientific simulation campaign were he to use lower-level abstractions where parallelism and remote distribution were the visible responsibility of the programmer. +The advantages of managing a simulation campaign in this manner are well borne out by Hocky's experience: the expression of the campaign is a well-structured high-level script, devoid of details about file naming, synchronization of parallel tasks, location and state of remote computing resources, or explicit explicit data transfer. Hock was able to leverage local cluster resources on many occasions, but at any time could count on his script acquiring on the order of 1,000 compute cores from 6 to 18 sites of the Open Science Grid. When executing on the OSG, he leveraged Swift's capability to replicate jobs that are waiting in queues at more congested sites, and automatically send them to sites where jobs were moving through the system. All of these capabilities would be a huge distraction from his primary scientific simulation campaign were he to use lower-level abstractions where parallelism and remote distribution were the visible responsibility of the programmer. Investigations of more advanced glass simulation techniques are underway, and the fact that the entire campaign can be driven by location-independent Swift scripts will enable Hocky to reliably re-execute the entire campaign with relative ease. He believes that Swift has made the project much easier to organize and execute. The project would be completely unwieldy without using Swift, and the distraction and scripting/programming effort level of leveraging multiple computing resources would be prohibitive. From noreply at svn.ci.uchicago.edu Sat Jan 8 18:29:03 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 18:29:03 -0600 (CST) Subject: [Swift-commit] r3912 - text/parco10submission Message-ID: <20110109002903.523C49CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-08 18:29:03 -0600 (Sat, 08 Jan 2011) New Revision: 3912 Modified: text/parco10submission/paper.tex Log: revised author list order and format; small changes to intro. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-08 23:16:20 UTC (rev 3911) +++ text/parco10submission/paper.tex 2011-01-09 00:29:03 UTC (rev 3912) @@ -40,43 +40,20 @@ \title{Swift: A language for distributed parallel scripting} -% draft - contact benc at ci.uchicago.edu +\author[ci,mcs]{Michael Wilde\corref{cor1}} \ead{wilde at mcs.anl.gov} +\author[ci]{Mihael Hategan} +\author[mcs]{Justin M. Wozniak} +\author[alcf]{Ben Clifford} +\author[ci]{Daniel S. Katz} +\author[ci,mcs,cs]{Ian Foster} + +\cortext[cor1]{Corresponding author} -% ACM styleguide says max 3 authors here, rest in acknowledgements +\address[ci]{Computation Institute, University of Chicago and Argonne National Laboratory} +\address[mcs]{Mathematics and Computer Science Division, Argonne National Laboratory} +\address[alcf]{Argonne Leadership Computing Facility, Argonne National Laboratory } +\address[cs]{Department of Computer Science, University of Chicago } -%% \numberofauthors{4} - -%% \author{ -%% \alignauthor Ben Clifford \\ -%% \affaddr{University of Chicago Computation Institute}\\ -%% \email{benc at ci.uchicago.edu} -%% \alignauthor Ian Foster \\ -%% \affaddr{University of Chicago Computation Institute}\\ -%% \affaddr{Argonne National Laboratory} -%% \alignauthor Mihael Hategan \\ -%% \affaddr{University of Chicago Computation Institute}\\ -%% \and -%% \alignauthor Michael Wilde \\ -%% \affaddr{University of Chicago Computation Institute}\\ -%% \affaddr{Argonne National Laboratory} \\ -%% } - -\author{Mihael Hategan} -\author{Justin M. Wozniak} -\author{Ian Foster} -\author{Michael Wilde} - -\address{Mathematics and Computer Science Division, Argonne National - Laboratory, and Computation Institute, University of Chicago and Argonne National Laboratory} - -\author{Daniel S. Katz} - -\address{Computation Institute, University of Chicago and Argonne National Laboratory} - -\author{Ben Clifford} - -\address{Argonne Leadership Computing Facility, Argonne National Laboratory } - \begin{abstract} Scientists, engineers and statisticians must often execute domain-specific application programs @@ -119,12 +96,14 @@ %written in other scripting languages. Unlike most other scripting languages, Swift %is a higher-level language that %IAN: I don't see what it means that Swift doesn't focus on details of executing scripts -focuses not on the details of executing sequences or -pipelines of scripts and programs, but rather on the issues that arise -from the concurrent execution, composition, and coordination of many independent computational tasks. +focuses % not on the details of executing sequences or +% pipelines of scripts and programs, but rather +on the issues that arise +from the concurrent execution, composition, and coordination of many independent (and typically, distributed) computational tasks. %at large scale. % -Swift scripts express the execution of programs that consume and produce datasets using a C-like syntax +Swift scripts express the execution of programs that consume and produce file-resident datasets. +Swift employs a C-like syntax consisting of function definitions and expressions, with dataflow-driven semantics and implicit parallelism. From noreply at svn.ci.uchicago.edu Sat Jan 8 18:37:20 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 18:37:20 -0600 (CST) Subject: [Swift-commit] r3914 - text/parco10submission/plots Message-ID: <20110109003720.3143D9CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-08 18:37:20 -0600 (Sat, 08 Jan 2011) New Revision: 3914 Added: text/parco10submission/plots/dds.pdf Log: Adding Added: text/parco10submission/plots/dds.pdf =================================================================== --- text/parco10submission/plots/dds.pdf (rev 0) +++ text/parco10submission/plots/dds.pdf 2011-01-09 00:37:20 UTC (rev 3914) @@ -0,0 +1,267 @@ +%PDF-1.4 +%???? +5 0 obj +<> +stream +x?????-=?.??S?}?R??|???iC? ?4??Am?2???????:?d +?G??XL^?d?????W???????????o?????G???G?>????g/?W???????7???G?????_>????????l??????????q?????????C?Wk??v??o??g;}????; ???7????)U?????????o??????????????W??u????U??????Y???????]?????u???n??W???h_??(??~?_????R?'?)wA???V?lq???m~?q??W????_?f ?)?k]??v]m|6Lp??????????????3????uFU????T?/ ???c?N?UN???|f?????\?:C????Y?"?@?????B?O?N??c?>??u??5 ?LX???????y_{x?u????)?g?k???????*???D??W??g?&?{s?tR:???u?V??t??UC7lDg???iRt@c???????v??s0M?L1H???m?(??V? ?.???3?8kT???Q???Br=?:?&?\???{!Q?-???????l?[+?EP?u&?L??????A??W???P???????}? +?/?????G???xA?3?????+??Q????W?J??wfp???rf?Tp?z?(n?Z<???v?#?5??? ??g?M????/?}6@?=/??? ?p?[????i??1 &??r?U<<?~50?z??l??U?????????????2???8`?S??o?q???.????`???????+O57]?S???Q/???C|?LS?c?????y??+3?e?????-??]?=?[????]7????s???? ?????S?? +!???o?0?L??xJ?B?8L&!??~????_1???@???f???j???-???? ?>?|$~?S???x?Otb? ??t???~???*????S?P6/??\I???? ?}??M?:?_?3?|????lVO%Z!?5??A?-dNd}o?O_k?l0?E?#D2?????.???C??aT ?!????1Y??B?P?????`???$??L???0???xX?8?^???q????-l u?C?????s?#t?gmn{ +???????:??p ?9?;?>[??1?o????0g?IG??$??+?U??q?q?N?X~L?e??8??#?Z??f?0Z%???8??????????7=:?????28???l??K?[7/??l?kr? ? ??'0u?u35????hc?????????N?? ?&u??p?????u@??)?q?b-sp ??{?Nn??#b??? YQ???M??S??????r????`t`)g???l2?1?J?? ?m??M???py?? ???3Wy[????l???;`?]?`?8???Y????G-??b;9???l?T?;??[?zP?< ip?C?}\??.????_>?J?yX????????R??@???#\?1E????v??"r_????d?p????L5????V?!?aC?x?e8_???C|?T??f/?Y??tE+ +?3 o?V?3?pnd??#????#.n???? 0?rv??A??\???1>??DwO???????JP?f?Nt +F7a$??????> +P?8E? ?)??7?JQg ??M'?s:?b???g??H??R??????Ww?3???Jt???5Tg]0?c????u!|q????5?????i?8??T??:?`9Q??9???=?_R??M?9??i%;Vp????Q???;??iC???\[T? s?????R?hjT?r?N?????E???Y9? ;????/?,??????cl?O?P?? C28=?f??????r???B(?L-pM6??S?t?9????Yx?vbz????A???n|p?q??q??!????S?Z.\??s???>?}???k0?P}?? ??c|?,U?,???a???~.Y?B???_?r?????[I?0???]XN????3??~?9?^??jx??P t?g.?l?????Bz%????hC???u???W????0k'?k?8? +.j????3L???N??2f*?y"3'??4?? ?`???? ??r?P'?? ?1?p`?? ????\ +L0?p?????????,???v?Pksp^P???J9?????????SONd??@?u??Z?6??? +P?i??? 9X??8???1O.????????6??}?;zX??????Q?H7LT??S???"Y????P??>???3??gI%????=9?????k%i????????a??FL>??1????6?%~ +?O?y?M?Tr?7???}??c7?8B?>???????B?P??T??R???s?????|?6~r?3??r???%6?[??!???_?????????Q?}H??????? v/7?p??j +???x#??Y??&?????=!F???F???|d??l" +??C?l?u????Sh???xh?/s?i???? ?h>1??;?p?? ?s???!?4??2p????+? +? +??8??C??*?y??>????<?>:??? ??????p?z8? +?%l\?? +??K at ul????y% +?rn???? +?C?H???????z??x????8S?T??b?????&MwH?1+?!?!?QB.+?8?t7w8??Js(???b.????E?aw???F??U?5??q?]?G?3??????????????]???6???^^?7,$31??????}?3???????N?2?|?]??I?1?$??????Hf?????-{??????N??@ .?@ ??N??.$?Y???H'3q8?8???u?p(0???Q]?????"?0?K +Nu?&?E????C???????pq???A?0????G??Z~R??!??P[???8=????6???V?Y??? @????????jC??_1??jz?Q????? +?hI?G??A??#P`6??=?(??8???*J???t?3'??O???_w??uI?t_???Z???L?????/p`????=}*v?T?????J???c? ?????c?e????????tQ?U????????t ????\??fh??u?^????ky??????o?a??_k?I???#bci5?`?C?Q?T|{???o???K?.?I? ?]?U???.??j+L?k???Y?=e ?%'???V????_?????^??%?m?:?ff?=?\?~????\???v*-?d]ev8 +x?X2????a??^^??7?? ??a%?4#??iM?F\? u?uX?%???WN?;l???s6 ?+?H%Nz????Je?NzG7 +c??vZ??F/t?h?t?)??*p??+??J??%&c??;?|i?gK?8[??D{ +'?S???????m$??^8???W?????%J?_,?b???-G???e??p??!??XA8 ?????Z*?T5??gpZT????k?>???#;?Q?P???8??S-? +??r??NET???,???Q +?Sf??? )dD???^1?????a????H?????bJ?~?J/ ??????K??"*d??-?B tow??n ?1r?y???Qq)+?ET??Zw?????^?-??p?p??ET???EH?wr?88?WD??????????~??V????^???w??`*\??^zR?_pwq???I???"6???S?G?"*W#?DT?E?;E8M??C??V????q?S?s?y2?)????^?v?t ?A? q? %?-9ET?k??'N?x??SD???u??tCa*N?W?N?ks?x??)???cp??x??{?:H??5m???+??%$???????c???7x;????C??y??10???F?C?E????w?45:???p???G?m?cm?c?Aa?%$`Q?M??????. ???}???9??#bX???????{y?>N?-?!???"?>-d?P?_=d?`?7?G??wOm +????!?????o!?^BC? 2aH??R???S??H??_??z?0l7X?~???????G??? +"??}?qz?~?q:I/???@6????or?????d??(J#f? +p?#????}(?????}?.?@k??a???1?N???HTc???????6A +??K??}!?M ???}???o???d?????oP?@?'H:DKP>?;@?d? ???o?p???}h??K??&?W???#?L??70????F?o ?{???osA?>??????~???A???? }??P??7?faF?`'?A??????????f\42D4}?c? +???,J?7???_=17?s?~???????????G?~???????????? ?f?? ??????j=?3??????}?A???h?~Tb??^B#?MJh?~ ?7)???&$ a?o"BC?CBX??I??7???! 4*9?!\@8B??????????4v???????p?K ?7 a????-z?$???{ ??W ???. ,z_?????D?aBF?????????%d?&%4x????????}???????B5x?Kh????c???F.$4x?d?????MFX?~? ?7???&# ta??o2B??CFX??? ?7&?????h????}??I???!#,z?0????Cw?&_y?a?88?????s{??1G +??i??^@?????O??[+?.??0???????v?m?!?&Gl??u#????#??l??X^a?M4M????,6?-?&:\%?=???x????u?Z???h??? +o???4v?????? b??& +#z??.??oC?(?????o??b??WZ?????|??????fq??????}??-k?~)C?????4???h????z{5?????????#`????E?\????a????}?\w????pcK? +??{?F???{???r?~? ???????YO???CT?{bc???~?z????????;?G??9KC?A??4b?7?$A????C?L<{???jp"????H~[-?????H~/???LPPY$???g???%?-J?VVj0?X??c??:????V???f??.???9,?,kgq?G???8??W&?????H??s&???J?[????4t?,??c$??`>????"??????0p[TzS?\???xD??Z??a"??'???Vi???C????'?b?n? x?Tc?????'wA???9???&Z 4U?!@?\?F??b??`?UN??>?? +??@??~*?g???O????$???gd?%?>G??????^??`?? +4?5??W????S?K?pFZ?|0???s? ?X*v,o???~v?? +p?u?4-?????vx?6 4;???q??9?"???4???Q???"?P9???8??8`z?a~????C??| 0?:Fi?:??/]ha^:!??n???????V?S???k???`??n????a??#%.???@ ?`?b~??bF?????? ?-???BR?????E? ???7Z"??gv^K?a??{Uo???g.??1au-?? ????N?s;??^?Wg??0`?I%??VW?????z0??????nx???????:F???at??2?????b????x?;??g ??K??? ?`fW??8?:????*?? &I???K??????????+;W?\?=^/?9??k*Q8?j{u????1?++?~?? +??????n???(????:???? ??i~?l)F?Lh??(????? $=???1?\?x????? +??cT??????n?m?_W????n??Z>P?????\gD???W??z=0?_??0p"???P3@??R??hS?srF??:??????u J?}?pa???8???1R?3?u?????d?6???c????"?T???? +?u??~??8?????[ +?zT$f?\??6;#HGKv???ag???????;??R??? ??$N2D?^?W??/9???:?1y?^??1?p?N???l?n?=p?r???????w*??-?????????s???@-? ?=???Z????5?b?Z?@???,? ???,??????E??A>p ?%u\QI?[hE[???`:???h9????????????????c\/n?=???????t??~?%?[?-o?j? .??S?En?=??/*??X???r?Fx???Q`'??????????y??J(5#i??RL?f?????(?Q?O?;??????5??q??? J?=?BW??^U??o????Wa?u?Yl?@???[l +?+??'!?"????:??knE?e?!1??r?mE??????[??N7?:hjC???"1? ?-?_?x[?L????????0?:F??q???? ?????"t????"??^a?Z?x??V???b??TCc?M????a???q]n???s????t??? ???x??o +c?[V??x[%?J2?V???0? +=????U?F???????9?<a????o?x??vo+?g?0??F6?2????"?????n? +?o+?4???"?O?j???x? +??6~??=?a??sR T?$3;?????3?!?,???Yc??F?6??iT????Z??D?-??I1??v?`?Gd??? ?h?N`??????? ]?Q?Q??#0??7?tO???Gm-q-AF.??? r?E,?7c?#???Ac?+8 +K1???V? +? ???&pS?????'L?? 7]????Q$??L?H?????qf?>L#VDEI?8~???wu:NjB??_??NK?b????!U??j?0??n???}?d????;!??@?'"?V'???? +???1y`_N?0????G??zc???? ??[1Iz????.?'}.\x#+(??!??o?!????Z!? +!g??R??nm!D???$???%?? In???f???IS??s{(?n???h +???????1&????\pkIpk'\pK]n?? +??????q??t?Hp?;?/j????x?????_???K? ?At:n??j ???d??lF?&i???#;?v?\????."???U???0????k@B?&0K??????oo?+?v???\?o]???j?????\R1h?\ H????-??o82n?,M?m????)x>?Dh(??l?{????? E?e?}|?B????4?30pl??i??$ +??? +?V?dw??A???Sj?3?;i?9~M?]pT?c^???H?f6?r??%?q":??U?s???P+(<R |???? ????a':?(?y +Jt?D??????+6?cle??4w*???0?_XAs;?9Fi?5 at Ah4g?D?$-??X???^`?j*?(_c??c?@ ?#?dswL???2?Ib??i?\K??u?t?=????)H% +??!??????fIJrd?9^b!F}??f??159?mZ?n?=?\??W???T? +%??^.M@??JP4?I?bJ1K?????6c????????]S????.?????????|??????~a??? s????jf5?l}???R ???F?h?>n???%V???@? ??5??-???{?? + y?59-U?j?(>?A?).z?5?t /2??????!? r?\3???J? ] +b ??I??????=?:?tgp??a???#2cm???'??!]{????m?Q +????? B?!???7? +?U? ?(L???IR??#? P??????O?S??IoC8@]? H It???????8??6?????@}@}99???R-?8??q?3?p?dg???J???~7??pV?Nha??u??FGb?D???<22R l?tr??`JM@?U?x?#? ??????om ?o???otq???c%|uz? |7z;??M???? +nc??!N?>y?k??L??}?? ??XP???c?N?C?v?#??i>????m?b9????Ng(?J? L?T??+Hg?0^?:_Igt1?G??k??? ZH??4?i?Y?Cg???c:??4??+i?+??Lt?>?UO)??"?V???YlTy?(?/??h??Kp?N?X ? ?S8??b?????}*??U??? +c??5$>????qzI?"????50??0?  ?R_G!#?qe??^?M ??Z???b?L ?|#L ??*W8???#=a^?1?G@?&??%?????b???;???9??l????? +]x??????? 5?C????l?{???if#[?? +?m?????????????B?A?a"?8o? ^???K??|??7?L???68?Q1["/?Y????7c)A:p???1'????\???J~`?c?????G?E??,:=??j?k?x/?=??[?D"??#?pm??? ?%Uy!Sy??[xUI????E??????(???^?C??Q}?>?????? g_L?ak???e}?hh?u`u??K? d? +????@;?E??t??N??s??MS-???\?-?M???0???h2??x]D? ??(??????%??????I?w?\m???e,?WAn??I????yQn?Y?[x??,?U???2&???m?`5?$?,Sc +?vZ??|T?j???gD?/??x????X?!>7?6????????L?d??%d??U?N9T +?"-ye\?",??tI/?AH?=DKI~3? )????P???diYe*?%p4??i9o?????#????I???I??-?g???f?????l??+??YF?c??????y=h?D??????4bs?????i~.????l????????M? ??>??O??~???g`?9????9?-?8O?K\???S? ?\????s?????>?h????=(G?L??%Y??N ????L?????v?D0V??o?]? ???h??v;??'a??)o???a?~???????????al_?????ig????)?cd?8?p??D?c?tu??*?(L?0?gI;?H???,???coy ?k?A)????a???#??{??{?????N?q?60U+?.N&?1??l????w?????]???#? s???]2???? ???>??????#]S??+?b??? ?(6?????|`?m +????2q?y????:S?I02U???S?[8?.}F???3?????3???9?rKKu;?????oZ??????"?Y? +???#??2?????8?LMr?????[???>????#>????????$-o?????m?d%kLW??o5???-?]??m?"Z???o????Y0=??`??p??V?6?V`;?[?=?}?=?j:c??f?-??,%?,???k`.??{0????eu?? ??e?? ???f8?}?fa|U?????-F?C +D??6??i?*??&_`?3?/??\?;??????]??w??]????t0C??? ?8Ar?*b??wz ?78???a???uCJ4? C +Ptuy?h????4?P?4?V??#{???`?:(???)??e?"? ?NU????2?\????9???,?\?qO??7?wDW< +???z???P?oi>??????FFp3??Yz?????Z'???_?N=oWD??0_0?y +?;????6?s?????Ws?+?/w???? +U?~ap??V?v??`{????&????>?_C???1??U???M`?D?f??f????S?\???Z????oW??v???? ???+ +1#?kcB?4w????????}9?@qh??????vE/G??U1??U?????v???O}??e?Q?p\???8?1?_??n????Y +?/??9U? ?E)?Z??_[?J_r?? +???/aH??nn?d.??NN?? ???(3?"?2?N????xQiF?v???tD3K?y?;,i?J???S??V?&?F?7m???Ybd????B?&=??R????????????????c???4?B?%???x???XlB??`???dJn|/???hg??+r?&B,?r?2 ??=?@?E[p??f?????B?|?yn???;?t???L??V?zB??%?R?I????x??n? ?+E4??c???P!???S:????$?y?A?]?8@?&I???#??????1S J?p??r6/(C?wc?5y??$ ????r?????????????7?.E???0?l!l9?|?{???Y??J???%?????`??0K? +??`W??q|n V"??!G???D?*?O?z?d?p:Y[ D3??QVx?f ?k/?"-???a5;?!??\H +??=x?? |???F?_???F?K?;?.?????]?|d?w?-?d?????@?j:???sA?4??? ?i?????q??t??8???TU???{??8?6u???`t>??'e?.??]???J?P?Ll{.??.???t?G?+[??Y???`??q?DQo??l???f??/G??t4P??~,@ ?L?2????I?L?:?B~.P.?b__q???TD_a*??=??r&??L?9?+?#?@?3?a?aM???M`2 ?>C?'A'E?UX??8?V?[n??3?1????I +??~??j?n???oS7??0*?? ???????Z? +??1?????L'?4?]??? +?? Q?L ?7?<\B??]???H?B????b* ??G??-B???$ +j???T?o?Hn?S?")_?????+"?d?"?[qa?So'H?K???-?a?7;?I|if.>+?????IN8IY,??Ib +FS???"7? (IU?w???(?LM?\?&E:?){??21]>h???m]r? q3'?K?sb?q?lb5X2}@SBL??A???? ???GemF1UnNC#?\? +??Te?0#.:g?l?!???~?? ?y?+ ???W??))????7~??z?1u66 ????0?E??bwL?a???X??O??|???(?x??L??H1??#?????e???G?%^7|???yP??h?~/*?hB.?J:f????Zi}|? ?7rsv?Sa????z??1t?7??Fo{/ +c?f????????s?g?BP?????????E?s??V?3b??3j%8???#?5(?yFp??m?I??e?????????+?????Fo?Prs>???f??|?N????????js>n?{??????(???s????????v???????????EqS?H????:3t?)????z?iU????K??c\.?J D?2_?pkD???????z?A?Z4?tLJ??8-??Nu#-x? ???)?.??????7^ `e? +??et+F3?o???G?0?&|2??" +??XA??C?EmA???!????tdL??y=???0?&??0 !??7T????@?*$A.QvD0? 38)H????w??Tvt?t ???c`?'Q???$po????????q??{?<?L?LciP??\`???E-S?0?@??I\? +.?sbw{?P|7???*??o +&p :z?y?1?U??;??0??2?2??-?????s??a?+[????)?y?AaC????-?TK???LK&?D?%L^?q?#?? +?NpW?0L?k? ?+?L??f???1?8??,?c??X?w??b^???;%ZLn??Eq????I??`J?i??$X ?/QW?^B:?M??YB???D ?$.??#?N?J??@?(???"???2??????&96r.NeIBy?M?qv?J??? +f?%??`( c?TW^I?????!?.s7????????k?????K ??P??Z?z3??3?yA?N#>????????[??3?g????K?^z +?????? ?> c?%"R???????)^R??U??f???r3+??????^??>????D????z?*???I???|?Ze:??A?h????t_?CIK??,??b?????^???#???_???B?}CU?4w_??S?K?? +?? ?'a&?1e?~?^Q??;??#?3? +?w)f0?$Y-?bW?G??L?1W?g6{?U??B`??$j????vF@??????F??J??G???????N?^????t??%??!Q +c??5?A?e????I??^0R? ? =_?; +?z?-f???\&Ok1???S??????c|??????????(??T?~s>{?M??RD?`?" +?"?y_??RD?[s??????/|3m???.v?9?Y?(J?|??????????3?~?>????L???Ht? ?J??2{:????????????N??U??????????+?????????]?KS???X??U +???/?VI?????G?}????Q?~???I??e??u6z?M?'a +?|Ex^?n????0???/??????K??????5??????B{a%??Z?cL9?_vl???;???Y?????h??`??????X??d????O=9^??MNW??xeX????J?!endstream +endobj +6 0 obj +21679 +endobj +4 0 obj +<> +/Contents 5 0 R +>> +endobj +3 0 obj +<< /Type /Pages /Kids [ +4 0 R +] /Count 1 +>> +endobj +1 0 obj +<> +endobj +7 0 obj +<>endobj +8 0 obj +<> +endobj +9 0 obj +<>stream + + + + + +\(Apache XML Graphics Commons: EPS Generator for Java2D\) + +Untitled + + + + + +endstream +endobj +2 0 obj +<>endobj +xref +0 10 +0000000000 65535 f +0000021975 00000 n +0000023511 00000 n +0000021916 00000 n +0000021785 00000 n +0000000015 00000 n +0000021764 00000 n +0000022039 00000 n +0000022080 00000 n +0000022109 00000 n +trailer +<< /Size 10 /Root 1 0 R /Info 2 0 R +/ID [] +>> +startxref +23702 +%%EOF From noreply at svn.ci.uchicago.edu Sat Jan 8 18:38:02 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 18:38:02 -0600 (CST) Subject: [Swift-commit] r3915 - text/parco10submission Message-ID: <20110109003802.5C2549CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-08 18:38:02 -0600 (Sat, 08 Jan 2011) New Revision: 3915 Modified: text/parco10submission/paper.tex Log: Include dds plot Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-09 00:37:20 UTC (rev 3914) +++ text/parco10submission/paper.tex 2011-01-09 00:38:02 UTC (rev 3915) @@ -366,11 +366,11 @@ \subsection{Data model and types} -Variables are used in Swift to name the local variables, arguments, and returns of a function. -The outermost function in a Swift (akin to ``main'' in C) is only unique in that the variables in its environment +Variables are used in Swift to name the local variables, arguments, and returns of a function. +The outermost function in a Swift (akin to ``main'' in C) is only unique in that the variables in its environment can be declared ``global'' to make them accessible to every other function in the script. -Each variable in a Swift script is declared to be of a specific (single) type. The Swift type model is simple, with no concepts of inheritance, abstraction, etc. +Each variable in a Swift script is declared to be of a specific (single) type. The Swift type model is simple, with no concepts of inheritance, abstraction, etc. There are three basic classes of data types: primitive, mapped, and collection. \emph{Primitive types} are provided for integer, float, string, and boolean values. Common operators are defined for @@ -387,7 +387,7 @@ Arrays use numeric indices, but are sparse. Both types of collections can contain members of primitive, mapped, or collection types; in particular, arrays can be nested to provide multi-dimensional indexing. -Structures contain a declared number of elements. The number of elements in an array can be determined at run time. +Structures contain a declared number of elements. The number of elements in an array can be determined at run time. New types can be declared to define a new mapped type or to name a structure type. The type model is by design very simple and limited to keep type semantics easy to understand, implement and use. @@ -424,8 +424,8 @@ \end{verbatim} The notation \verb|{}| indicates that the type represents a reference to a single \emph{opaque} -file, i.e., a reference to an external object whose structure is opaque to the Swift script. -For convenience such type declarations typically use the equivalent shorthand \verb|type image;| +file, i.e., a reference to an external object whose structure is opaque to the Swift script. +For convenience such type declarations typically use the equivalent shorthand \verb|type image;| (this compact notation is confusing at first but has become a useful Swift idiom). Mapped type variable declarations can be specified with a @@ -464,8 +464,8 @@ Swift's \emph{built-in functions} are implemented by the Swift runtime system, and perform various utility functions (numeric conversion, string manipulation, etc.). Built-in operators (+, *, etc.) behave similarly. An \emph{application interface function} (declared using the \verb|app| keyword) -specify both the interface (input files and parameters; output files) of an application program, -and the command line syntax used to invoke the program. It thus provides the information that the Swift runtime requires +specify both the interface (input files and parameters; output files) of an application program, +and the command line syntax used to invoke the program. It thus provides the information that the Swift runtime requires to invoke that program in a location independent manner. For example, the following application interface function declaration defines a Swift function {\tt rotate} that uses @@ -480,8 +480,8 @@ } \end{verbatim} -Having defined this function, we can now build a complete Swift script that rotates a file -{\tt puppy.jpeg} by 180 degrees to generate the file {\tt rotated.jpeg}: +Having defined this function, we can now build a complete Swift script that rotates a file +{\tt puppy.jpeg} by 180 degrees to generate the file {\tt rotated.jpeg}: \begin{verbatim} type image; @@ -501,7 +501,7 @@ output parameters, and variables to the right of the function invocation passed as inputs. -This script can be invoked from the command line, as in the following example, +This script can be invoked from the command line, as in the following example, in which Swift executes a single \verb|convert| command, while automatically performing for the user features such as remote multisite execution and fault tolerance, as discussed later. @@ -537,7 +537,7 @@ \subsection{Arrays and parallel execution} \label{ArraysAndForeach} -Arrays are declared using the \verb|[]| suffix. +Arrays are declared using the \verb|[]| suffix. For example, we declare here an array containing three strings and then use the \verb|foreach| construct to apply a function {\tt analyze} to each element of that array. (The arguments {\tt fruit} and {\tt index} resolve to an array element, and that element's index, respectively.) @@ -623,7 +623,7 @@ We have described almost all of the Swift language. (Swift also provides conditional execution through the \emph{if} and \emph{switch} statements.) -The Swift execution model is simple. +The Swift execution model is simple. Through the use of futures, functions become executable when their input parameters have all been set, either from existing data or prior function executions. Function calls may be chained by @@ -920,7 +920,7 @@ directory or one of its subdirectories, the program can expect to find all files passed as inputs to the application block; and on exit, it should leave all files named by that application block -in the same working directory. +in the same working directory. Applications should not assume that they will be executed on a particular host (to facilitate site @@ -1253,7 +1253,7 @@ it would cause the first five elements of the array {\tt geos} to be mapped to the first five files of the modis dataset in the specified directory. At lines 52-53, the script declares the array {\tt land} which will contain the output of the {\tt getlanduse} application. This declaration uses the built-in ``structured regular expression mapper'', which will determine the names of the \emph{output} files that the array will refer to once they are computed. Swift knows from context that this is an output mapping. The mapper will use regular expressions to base that names of the output files on the filenames of the corresponding elements of the input array {\tt geos} given by the {\tt source=} argument to the mapper. - + At lines 55-57 the script performs its first computation using a {\tt foreach} loop to invoke {\tt getLandUse} in parallel on each file mapped to the elements of {\tt geos[]}. As 317 files were mapped (in lines 47-48), the loop will invoke 317 instances of the application in parallel. \katznote{is this strictly true? Do you want to say that it will enable 317 instances to be runnable in parallel, but the number that are actually run in parallel depends on the hardware available to Swift, or something like that?} The result of each computation is placed in a file mapped to the array {\tt land} and named by the regular expression translation to be based on the file names mapped to the array {\tt geos[]} (in lines \katznote{is this 52-53?}). Thus the landuse histogram for file {\tt /home/wilde/modis/2002/h00v08.tif} would be written into file {\tt h00v08.landuse.freq} and would be considered by Swift to be of type {\tt landuse}. Once all the land usage histograms have have been computed, the script can then execute {\tt analyzeLandUse} at line 63 to find the requested number of highest tiles (files) with a specific land cover combination. The Swift runtime system uses futures to ensure that this analysis function is not invoked until all of its input files have computed and transported to the computation site chosen to run the analysis program. All of these steps take place automatically, using the relatively simple and location-independent Swift expressions shown. The output files to be use to hold the result are specified in the declarations at lines 61-62. \katznote{should these lines have a space inserted before the ``<'' to match the previous lines? Same question for 67-68... } @@ -1331,6 +1331,14 @@ \end{center} \end{figure*} +\begin{figure*}[htbp] + \begin{center} + \includegraphics[scale=0.70]{plots/dds} + \caption{System utilization for variable length tasks + at varying concurrency} + \label{PlotDDS} + \end{center} +\end{figure*} \section{Related Work} \label{Related} @@ -1678,4 +1686,4 @@ derivative works, distribute copies to the public, and perform publicly and display publicly, by or on behalf of the Government. -\end{document} \ No newline at end of file +\end{document} From noreply at svn.ci.uchicago.edu Sat Jan 8 18:40:04 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 18:40:04 -0600 (CST) Subject: [Swift-commit] r3916 - text/parco10submission Message-ID: <20110109004004.E77E59CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-08 18:40:04 -0600 (Sat, 08 Jan 2011) New Revision: 3916 Modified: text/parco10submission/paper.tex Log: Scale plots Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-09 00:38:02 UTC (rev 3915) +++ text/parco10submission/paper.tex 2011-01-09 00:40:04 UTC (rev 3916) @@ -1313,9 +1313,11 @@ \section{Performance Characteristics} \label{Performance} +\newcommand{\plotscale}{0.60} + \begin{figure*}[htbp] \begin{center} - \includegraphics[scale=0.70]{plots/sleep} + \includegraphics[scale=\plotscale]{plots/sleep} \caption{System utilization for variable length tasks at varying system size} \label{PlotSleep} @@ -1324,7 +1326,7 @@ \begin{figure*}[htbp] \begin{center} - \includegraphics[scale=0.70]{plots/multicore} + \includegraphics[scale=\plotscale]{plots/multicore} \caption{System utilization for variable length tasks at varying concurrency} \label{PlotMulticore} @@ -1333,7 +1335,7 @@ \begin{figure*}[htbp] \begin{center} - \includegraphics[scale=0.70]{plots/dds} + \includegraphics[scale=\plotscale]{plots/dds} \caption{System utilization for variable length tasks at varying concurrency} \label{PlotDDS} From noreply at svn.ci.uchicago.edu Sat Jan 8 18:45:29 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 18:45:29 -0600 (CST) Subject: [Swift-commit] r3917 - text/parco10submission Message-ID: <20110109004529.6BD289CC9B@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-08 18:45:29 -0600 (Sat, 08 Jan 2011) New Revision: 3917 Modified: text/parco10submission/paper.tex Log: A few ack. edits. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-09 00:40:04 UTC (rev 3916) +++ text/parco10submission/paper.tex 2011-01-09 00:45:29 UTC (rev 3917) @@ -41,12 +41,12 @@ \title{Swift: A language for distributed parallel scripting} \author[ci,mcs]{Michael Wilde\corref{cor1}} \ead{wilde at mcs.anl.gov} -\author[ci]{Mihael Hategan} -\author[mcs]{Justin M. Wozniak} +\author[ci]{Mihael Hategan} +\author[mcs]{Justin M. Wozniak} \author[alcf]{Ben Clifford} \author[ci]{Daniel S. Katz} \author[ci,mcs,cs]{Ian Foster} - + \cortext[cor1]{Corresponding author} \address[ci]{Computation Institute, University of Chicago and Argonne National Laboratory} @@ -1623,10 +1623,10 @@ This research is supported in part by NSF grants OCI-721939 and OCI-0944332, and the U.S. Department of Energy under contract DE-AC02-06CH11357. Computing resources were provided by the Argonne -Leadership Computing Facility, TeraGrid, the Open Science Grid, the UChicago Computation Institute -Petascale Active Data Store, and the Amazon Web Services Education program. +Leadership Computing Facility, TeraGrid, the Open Science Grid, the UChicago / Argonne Computation Institute +Petascale Active Data Store, and the Amazon Web Services Education allocation program. -The quantum glass example in the article is the work of Glen Hocky of the Reichman Lab of the Columbia University Department of Chemistry. We thank Glen for many contributions and extremely valuable feedback to the Swift project. We gratefully acknowledge the contributions of current and former Swift team members and collaborators Sarah Kenny, Allan Espinosa, Zhao Zhang, David Kelly, Milena Nokolic, Jon Monette, Aashish Adhikari, Marc Parisien, Mats Rynge, Michael Kubal, and Tibi Stef-Praun, Xu Du, Zhengxiong Hou, and Xi Li. The initial implementation of Swift was the work of Yong Zhao and Mihael Hategan. +The quantum glass example in the article is the work of Glen Hocky of the Reichman Lab of the Columbia University Department of Chemistry. We thank Glen for many contributions and extremely valuable feedback to the Swift project. We gratefully acknowledge the contributions of current and former Swift team members and collaborators Sarah Kenny, Allan Espinosa, Zhao Zhang, David Kelly, Milena Nokolic, Jon Monette, Aashish Adhikari, Marc Parisien, Michael Andric, Steven Small, John Dennis, Mats Rynge, Michael Kubal, Tibi Stef-Praun, Xu Du, Zhengxiong Hou, and Xi Li. The initial implementation of Swift was the work of Yong Zhao and Mihael Hategan. %% \section{TODO} From noreply at svn.ci.uchicago.edu Sat Jan 8 18:55:35 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 18:55:35 -0600 (CST) Subject: [Swift-commit] r3918 - text/parco10submission Message-ID: <20110109005535.4A87D9CC9B@svn.ci.uchicago.edu> Author: foster Date: 2011-01-08 18:55:35 -0600 (Sat, 08 Jan 2011) New Revision: 3918 Modified: text/parco10submission/paper.tex Log: Various minor edits Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-09 00:45:29 UTC (rev 3917) +++ text/parco10submission/paper.tex 2011-01-09 00:55:35 UTC (rev 3918) @@ -373,25 +373,36 @@ Each variable in a Swift script is declared to be of a specific (single) type. The Swift type model is simple, with no concepts of inheritance, abstraction, etc. There are three basic classes of data types: primitive, mapped, and collection. -\emph{Primitive types} are provided for integer, float, string, and boolean values. Common operators are defined for +The four \emph{primitive types} are integer, float, string, and boolean values. Common operators are defined for primitive types, such as arithmetic, concatenation, and explicit conversion. -An additional primitive type ``external'' is provided for manual synchronization. +(An additional primitive type ``external'' is provided for manual synchronization; we do not discuss this feature here.) -\emph{Mapped types} Mapped types are provided to declare data elements that refer (through a process called``mapping'') to files external to the Swift script. +\emph{Mapped types} are used to declare data elements that refer (through a process called``mapping,'' described +in Section~\ref{xx}) to files external to the Swift script. These files can then be read and written by application programs called by Swift. The mapping process can map single variables to single files, and structures and arrays to collections of files. -At the moment, there are no built-in mapped types in the language. Instead, users simply declare type names with no other structure to denote any mapped type names desired. For example: {\tt type file; type log;} +There are no built-in mapped types in the language. Instead, users declare type names with no other structure to denote +any mapped type names desired. For example: {\tt type file; type log;} -\emph{Collection types} are \emph{arrays} and \emph{structures}. -Arrays contain values of only a single type; structure fields can be of any type. Arrays can be created and defined to be of any primitive or declared type. -Arrays use numeric indices, but are sparse. +The two \emph{collection types} are \emph{arrays}, with elements accessed via an index, and \emph{structures}, with elements accessed via a "." operator. +Arrays contain values of only a single type; structure fields can be of any type. Both types of collections can contain members of primitive, mapped, or collection types; in particular, arrays can be nested to provide multi-dimensional indexing. Structures contain a declared number of elements. The number of elements in an array can be determined at run time. -New types can be declared to define a new mapped type or to name a structure type. The type model is by design very simple and limited to keep type semantics easy to understand, implement and use. +The size of a Swift array is not declared in the program but is determined at run time, as items are added to the array. +This feature proves useful for expressing some common classes of parallel computations. For example, we may +create an array containing just those experimental configurations that satisfy a certain criteria. +An array is considered ``closed'' when no further statements that set an element of the array can be executed. +This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. +%IAN: This last paragraph raises the issue of whether this state can always be determiend. +The set of elements that is thus defined need not be contiguous; i.e., the index set may be sparse. +As we will see below, the {\tt foreach} +statement makes it easy to access all elements of an array. -Due to the dynamic, parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. +%New types can be declared to define a new mapped type or to name a structure type. The type model is by design very simple and limited to keep type semantics easy to understand, implement and use. + +%Due to the dynamic, parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. %IAN: This last paragraph raises the issue of whether this state can always be determiend. %Also, since all data elements have single-assignment semantics, no garbage collection issues arise. \katznote{does this follow? garbage collection removed variables that are no longer needed - I don't see how single assignment helps here.} @@ -399,10 +410,10 @@ %garbage collection at the "dual" level (i.e., clean temp files) as well as remove unused futures from memory} % Mike: I mentioned GC as it pertains to structures and arrays: since swift is single-assignment, structures and arrays can never get de-referenced and thus dont need to be GC'ed - *I think*. But I can see that internal objects like futures should be, and given that they dont, its best to steer clear of this issue for now. -Variables that are declared to be mapped files +A variable that is declared to be a mapped file %IAN: Explain what we mean by dynamic lookup? Do we mean e.g accessing a directory to see what files it contains? -are associated with a \emph{mapper}, which defines (often through a dynamic lookup process) the -data files that are to be mapped to the variable. Array and structure elements that are declared to be mapped files are similarly mapped. +is associated with a \emph{mapper}, which defines (often through a dynamic lookup process) the +file that is mapped to the variable. Array and structure elements that are declared to be mapped files are similarly mapped. Mapped type and collection type variable declarations can be annotated with a From noreply at svn.ci.uchicago.edu Sat Jan 8 19:00:37 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 19:00:37 -0600 (CST) Subject: [Swift-commit] r3919 - text/parco10submission Message-ID: <20110109010037.2741D9CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-08 19:00:37 -0600 (Sat, 08 Jan 2011) New Revision: 3919 Modified: text/parco10submission/paper.tex Log: Tabulate plots Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-09 00:55:35 UTC (rev 3918) +++ text/parco10submission/paper.tex 2011-01-09 01:00:37 UTC (rev 3919) @@ -1326,32 +1326,50 @@ \newcommand{\plotscale}{0.60} -\begin{figure*}[htbp] +\begin{figure} \begin{center} - \includegraphics[scale=\plotscale]{plots/sleep} - \caption{System utilization for variable length tasks - at varying system size} - \label{PlotSleep} + \begin{tabular}{p{7cm}p{7cm}} + \includegraphics[scale=\plotscale]{plots/sleep} & + \includegraphics[scale=\plotscale]{plots/sleep} \\ + System utilization for variable length tasks + at varying system size & + System utilization for variable length tasks + at varying concurrency \\ + \includegraphics[scale=\plotscale]{plots/sleep} + & \\ + System utilization for variable length tasks + at varying concurrency + & \\ + \end{tabular} \end{center} -\end{figure*} +\end{figure} -\begin{figure*}[htbp] - \begin{center} - \includegraphics[scale=\plotscale]{plots/multicore} - \caption{System utilization for variable length tasks - at varying concurrency} - \label{PlotMulticore} - \end{center} -\end{figure*} +%% \begin{figure*}[htbp] +%% \begin{center} +%% \includegraphics[scale=\plotscale]{plots/sleep} +%% \caption{System utilization for variable length tasks +%% at varying system size} +%% \label{PlotSleep} +%% \end{center} +%% \end{figure*} +%% & +%% \begin{figure*}[htbp] +%% \begin{center} +%% \includegraphics[scale=\plotscale]{plots/multicore} +%% \caption{System utilization for variable length tasks +%% at varying concurrency} +%% \label{PlotMulticore} +%% \end{center} +%% \end{figure*} \\ -\begin{figure*}[htbp] - \begin{center} - \includegraphics[scale=\plotscale]{plots/dds} - \caption{System utilization for variable length tasks - at varying concurrency} - \label{PlotDDS} - \end{center} -\end{figure*} +%% \begin{figure*}[htbp] +%% \begin{center} +%% \includegraphics[scale=\plotscale]{plots/dds} +%% \caption{System utilization for variable length tasks +%% at varying concurrency} +%% \label{PlotDDS} +%% \end{center} +%% \end{figure*} \section{Related Work} \label{Related} From noreply at svn.ci.uchicago.edu Sat Jan 8 19:32:33 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 19:32:33 -0600 (CST) Subject: [Swift-commit] r3920 - text/parco10submission Message-ID: <20110109013233.142589CC9B@svn.ci.uchicago.edu> Author: foster Date: 2011-01-08 19:32:32 -0600 (Sat, 08 Jan 2011) New Revision: 3920 Modified: text/parco10submission/paper.tex Log: Various minor edits Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-09 01:00:37 UTC (rev 3919) +++ text/parco10submission/paper.tex 2011-01-09 01:32:32 UTC (rev 3920) @@ -370,10 +370,10 @@ The outermost function in a Swift (akin to ``main'' in C) is only unique in that the variables in its environment can be declared ``global'' to make them accessible to every other function in the script. -Each variable in a Swift script is declared to be of a specific (single) type. The Swift type model is simple, with no concepts of inheritance, abstraction, etc. +Each variable in a Swift script is declared to be of a specific type. The Swift type model is simple, with no concepts of inheritance or abstraction. There are three basic classes of data types: primitive, mapped, and collection. -The four \emph{primitive types} are integer, float, string, and boolean values. Common operators are defined for +The four primary \emph{primitive types} are integer, float, string, and boolean values. Common operators are defined for primitive types, such as arithmetic, concatenation, and explicit conversion. (An additional primitive type ``external'' is provided for manual synchronization; we do not discuss this feature here.) @@ -384,36 +384,10 @@ There are no built-in mapped types in the language. Instead, users declare type names with no other structure to denote any mapped type names desired. For example: {\tt type file; type log;} -The two \emph{collection types} are \emph{arrays}, with elements accessed via an index, and \emph{structures}, with elements accessed via a "." operator. -Arrays contain values of only a single type; structure fields can be of any type. -Both types of collections can contain members of primitive, mapped, or collection types; -in particular, arrays can be nested to provide multi-dimensional indexing. -Structures contain a declared number of elements. The number of elements in an array can be determined at run time. - -The size of a Swift array is not declared in the program but is determined at run time, as items are added to the array. -This feature proves useful for expressing some common classes of parallel computations. For example, we may -create an array containing just those experimental configurations that satisfy a certain criteria. -An array is considered ``closed'' when no further statements that set an element of the array can be executed. -This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. -%IAN: This last paragraph raises the issue of whether this state can always be determiend. -The set of elements that is thus defined need not be contiguous; i.e., the index set may be sparse. -As we will see below, the {\tt foreach} -statement makes it easy to access all elements of an array. - -%New types can be declared to define a new mapped type or to name a structure type. The type model is by design very simple and limited to keep type semantics easy to understand, implement and use. - -%Due to the dynamic, parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. -%IAN: This last paragraph raises the issue of whether this state can always be determiend. - -%Also, since all data elements have single-assignment semantics, no garbage collection issues arise. \katznote{does this follow? garbage collection removed variables that are no longer needed - I don't see how single assignment helps here.} -%\mihaelnote{I think we should not mention the garbage collection issue. In fact, we don't and we should implement -%garbage collection at the "dual" level (i.e., clean temp files) as well as remove unused futures from memory} -% Mike: I mentioned GC as it pertains to structures and arrays: since swift is single-assignment, structures and arrays can never get de-referenced and thus dont need to be GC'ed - *I think*. But I can see that internal objects like futures should be, and given that they dont, its best to steer clear of this issue for now. - A variable that is declared to be a mapped file %IAN: Explain what we mean by dynamic lookup? Do we mean e.g accessing a directory to see what files it contains? is associated with a \emph{mapper}, which defines (often through a dynamic lookup process) the -file that is mapped to the variable. Array and structure elements that are declared to be mapped files are similarly mapped. +file that is mapped to the variable. Mapped type and collection type variable declarations can be annotated with a @@ -436,22 +410,13 @@ The notation \verb|{}| indicates that the type represents a reference to a single \emph{opaque} file, i.e., a reference to an external object whose structure is opaque to the Swift script. -For convenience such type declarations typically use the equivalent shorthand \verb|type image;| -(this compact notation is confusing at first but has become a useful Swift idiom). +For convenience such type declarations typically use the equivalent shorthand \verb|type image;|. +(This compact notation can be confusing at first but has become a useful Swift idiom.) -Mapped type variable declarations can be specified with a -\emph{mapping} descriptor enclosed in \verb|<>| that indicates the file to be mapped to the variable. -For example, the following line declares a variable named \verb|photo| of -type \verb|image|. Since image is a mapped file type, it additionally declares that the -variable refers to a single file named \verb|puppy.jpeg|: +The two \emph{collection types} are structures and arrays. +A \emph{structure type} lists the set of elements contained in the structure, as for example in the following definition of the structure type \verb|snapshot|: \begin{verbatim} - image photo <"puppy.jpeg">; -\end{verbatim} - -\emph{Structure types} are defined in this manner: - -\begin{verbatim} type image; type metadata; type snapshot { @@ -460,6 +425,11 @@ } \end{verbatim} +Structure fields can be of any type. +Both types of collections can contain members of primitive, mapped, or collection types; +in particular, arrays can be nested to provide multi-dimensional indexing. + + Members of a structure can be accessed using the \verb|.| operator: \begin{verbatim} @@ -468,8 +438,33 @@ im = sn.i; \end{verbatim} -%\katznote{please check the above - I changed a couple of variables so ``i'' wasn't used twice for different things in the same example.} +An array type +Arrays contain values of only a single type; structure fields can be of any type. +Both types of collections can contain members of primitive, mapped, or collection types; +in particular, arrays can be nested to provide multi-dimensional indexing. + + +The size of a Swift array is not declared in the program but is determined at run time, as items are added to the array. +This feature proves useful for expressing some common classes of parallel computations. For example, we may +create an array containing just those experimental configurations that satisfy a certain criteria. +An array is considered ``closed'' when no further statements that set an element of the array can be executed. +This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. +%IAN: This last paragraph raises the issue of whether this state can always be determiend. +The set of elements that is thus defined need not be contiguous; i.e., the index set may be sparse. +As we will see below, the {\tt foreach} +statement makes it easy to access all elements of an array. + +%New types can be declared to define a new mapped type or to name a structure type. The type model is by design very simple and limited to keep type semantics easy to understand, implement and use. + +%Due to the dynamic, parallel nature of Swift, its arrays have no notion of size. Array elements can be set as a script's execution progresses. The number of elements set increases monotonically. An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. +%IAN: This last paragraph raises the issue of whether this state can always be determiend. + +%Also, since all data elements have single-assignment semantics, no garbage collection issues arise. \katznote{does this follow? garbage collection removed variables that are no longer needed - I don't see how single assignment helps here.} +%\mihaelnote{I think we should not mention the garbage collection issue. In fact, we don't and we should implement +%garbage collection at the "dual" level (i.e., clean temp files) as well as remove unused futures from memory} +% Mike: I mentioned GC as it pertains to structures and arrays: since swift is single-assignment, structures and arrays can never get de-referenced and thus dont need to be GC'ed - *I think*. But I can see that internal objects like futures should be, and given that they dont, its best to steer clear of this issue for now. + \subsection{Built-in, application interface, and compound functions} Swift's \emph{built-in functions} are implemented by the Swift runtime system, and perform various utility functions (numeric conversion, string manipulation, etc.). Built-in operators (+, *, etc.) behave similarly. From noreply at svn.ci.uchicago.edu Sat Jan 8 19:38:14 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 8 Jan 2011 19:38:14 -0600 (CST) Subject: [Swift-commit] r3921 - text/parco10submission Message-ID: <20110109013814.5986A9CC9B@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-08 19:38:14 -0600 (Sat, 08 Jan 2011) New Revision: 3921 Modified: text/parco10submission/paper.tex Log: a small change Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-09 01:32:32 UTC (rev 3920) +++ text/parco10submission/paper.tex 2011-01-09 01:38:14 UTC (rev 3921) @@ -1539,7 +1539,7 @@ scheduling Coasters workers using the standard job submission techniques and employing an internal IP network. -In order to achieve automatic parallelization in Swift, we ubiquitously employ futures and lightweight threads, which result in eager and massive parallelism but which has a large cost in terms of space and internal object management. We are exploring several alternatives to optimize this tradeoff and increase Swift scalability to ever larger task graphs. The solution space here includes ``lazy futures (whose computation is delayed until a value is first needed)'' and distributed task graphs with multiple, distributed evaluation engines running on separate compute nodes. +In order to achieve automatic parallelization in Swift, we ubiquitously employ futures and lightweight threads, which results in eager and massive parallelism but which has a large cost in terms of space and internal object management. We are exploring several alternatives to optimize this tradeoff and increase Swift scalability to ever larger task graphs. The solution space here includes ``lazy futures (whose computation is delayed until a value is first needed)'' and distributed task graphs with multiple, distributed evaluation engines running on separate compute nodes. \subsection{Filesystem access optimizations} From noreply at svn.ci.uchicago.edu Sun Jan 9 15:10:57 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sun, 9 Jan 2011 15:10:57 -0600 (CST) Subject: [Swift-commit] r3922 - text/parco10submission Message-ID: <20110109211058.048F59CC9B@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-09 15:10:57 -0600 (Sun, 09 Jan 2011) New Revision: 3922 Modified: text/parco10submission/paper.bib text/parco10submission/paper.tex Log: Revised glass science text with revised text from Glen. Added one ref on the glass app. Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-09 01:38:14 UTC (rev 3921) +++ text/parco10submission/paper.bib 2011-01-09 21:10:57 UTC (rev 3922) @@ -26,6 +26,15 @@ url = {http://people.cs.uchicago.edu/~iraicu/publications/2008_NOVA08_book-chapter_Swift.pdf}, } + at article{GlassMethods_2008, + title = {{Thermodynamic signature of growing amorphous order in glass-forming liquids}}, + author = {G Biroli and J P Bouchaud and A Cavagna and T S Grigera and P Verrocchio}, + journal = {{Nature Physics}}, + volume = {4}, + year = 2008, + pages = {771-775} +} + @article{PTMap_2010, title = {{The first global screening of protein substrates bearing protein-bound 3,4-Dihydroxyphenylalanine in Escherichia coli and human mitochondria.}}, author = {S Lee and Y Chen and H Luo and A A Wu and M Wilde and P T Schumacker and Y Zhao}, Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-09 01:38:14 UTC (rev 3921) +++ text/parco10submission/paper.tex 2011-01-09 21:10:57 UTC (rev 3922) @@ -1277,20 +1277,36 @@ \subsection{Simulation of glass cavity dynamics and thermodynamics.} -A recent study of the glass transition in model systems has focused on calculating from theory or simulation what is known as the ``Mosaic length''. +Many recent studies of the glass transition in model systems have focused +on calculating from theory or simulation what is known as the ?Mosaic +length?. Glen Hocky of the Reichman Group at Columbia is evaluating a new +cavity method \cite{GlassMethods_2008} for measuring this length scale, where particles are +simulated by molecular dynamics or Monte Carlo methods within cavities +having amorphous boundary conditions. -Glen Hocky of the Reichman Group at Columbia applied a new cavity method for measuring this length scale, where particles are simulated by molecular dynamics or Monte Carlo methods within cavities having amorphous boundary conditions. Various correlation functions are calculated at the interior of cavities of varying sizes and averaged over many independent simulations to determine a thermodynamic length. Hocky is using simulations of this method to investigate the differences between three different glass systems which all have the same structure but which differ in other subtle ways to determine if this thermodynamic length causes the variations between the three systems. +In this method, various correlation functions are +calculated at the interior of cavities of varying sizes and averaged over +many independent simulations to determine a thermodynamic length. Hocky is +using simulations of this method to investigate the differences between +three different glass systems which all have the same structure but which +differ in other subtle ways to determine if this thermodynamic length causes +the variations between the three systems. -Hocky's application code performs 100,000 Monte-Carlo steps in about 1-2 hours. Ten jobs are used to generate the 1M simulation steps needed for each configuration. The input data to each simulation is a file of about 150KB representing initial glass structures. Each simulation returns three new structures of 150KB each, a 50 KB log file, and a 4K file describing which particles are in the cavity. +The glass cavity simulation code performs +100,000 Monte-Carlo steps in 1-2 hours. Jobs of this length are run in +succession and strung together to make longer simulations tractable across a +variety of systems. The input data to each simulation is a file of +about 150KB representing initial glass structures and a 4K file describing +which particles are in the cavity. Each simulation returns three new +structures of 150KB each, a 50 KB log file, and the same 4K file +describing which particles are in the cavity. -Each script run covers a simulation space of 7 radii by 27 centers by 10 models, requiring 1690 jobs per run. Three methods are simulated (``kalj'', ``kawka'', and ``pedersenipl'') for total of 90 runs. Swift mappers enable metadata describing these aspects to be encoded in the data files of the campaigns to assist in managing the large volume of file data. +Each script run covers a simulation space of 7 radii by 27 centers by +10 models, requiring 1690 jobs per run. Three different model systems are +investigated for total of 90 runs. Swift mappers enable metadata describing +these aspects to be encoded in the data files of the campaigns to assist in +managing the large volume of file data. -As the simulation campaigns are quite lengthy (the first ran from October through December 2010) Hocky chose to leverage Swift ``external'' mappers to determine what simulations need to be performed at any point in the campaign. His input mappers assume an application run was complete if all the returned ``\verb|.final|'' files exist. In the case of script restarts, results that already existed were not (re)computed. - -Roughly 152,000 jobs are executed in a simulation campaign, defined by a set of parameter files defining molecular radii and centroids, and set set of ``run'' scripts that perform the execution of the {\tt swift} command with appropriately varying science parameters. Most runs were performed using the "User Engagement" virtual organization (VO) of the Open Science Grid (OSG) \cite{OSG, OSGEngage}. Some runs were done on other resources including University of Chicago ``PADS'' cluster and TeraGrid resources. The only change necessary to run on OSG was configuring the OSG sites to run the science application. - -The approximate OSG usage was over 100,000 cpus hours with about 100,000 tasks of 1-2 hours completed. The simulation campaign has been successfully run on about 18 OSG sites, with the majority of runs have been completed on about 6 primary sites that tend to provide the most compute-hour opportunities for members of the Engagement VO. - Example 2 shows a slightly reformatted version of the glass simulation script that was in use in Dec. 2010. Its key aspects are as follows. Lines 1-3 define the mapped file types; these files are used to compose input and output structures at lines 5-15. (At the moment, the input structure is a degenerate single-file structure, but Hocky has experimented with various multi-file input structures in prior versions of this script). The output structure reflects the fact that the simulation is restartable in 1-2 hour increments, and works together with the Swift script to create a simple but powerful mechanism for managing checkpoint/restart across a long-running large-scale simulation campaign. @@ -1650,7 +1666,7 @@ Leadership Computing Facility, TeraGrid, the Open Science Grid, the UChicago / Argonne Computation Institute Petascale Active Data Store, and the Amazon Web Services Education allocation program. -The quantum glass example in the article is the work of Glen Hocky of the Reichman Lab of the Columbia University Department of Chemistry. We thank Glen for many contributions and extremely valuable feedback to the Swift project. We gratefully acknowledge the contributions of current and former Swift team members and collaborators Sarah Kenny, Allan Espinosa, Zhao Zhang, David Kelly, Milena Nokolic, Jon Monette, Aashish Adhikari, Marc Parisien, Michael Andric, Steven Small, John Dennis, Mats Rynge, Michael Kubal, Tibi Stef-Praun, Xu Du, Zhengxiong Hou, and Xi Li. The initial implementation of Swift was the work of Yong Zhao and Mihael Hategan. +The quantum glass example in the article is the work of Glen Hocky of the Reichman Lab of the Columbia University Department of Chemistry. We thank Glen for many contributions to the text and code of Sec. 4 and valuable feedback to the Swift project. We gratefully acknowledge the contributions of current and former Swift team members and collaborators Sarah Kenny, Allan Espinosa, Zhao Zhang, David Kelly, Milena Nokolic, Jon Monette, Aashish Adhikari, Marc Parisien, Michael Andric, Steven Small, John Dennis, Mats Rynge, Michael Kubal, Tibi Stef-Praun, Xu Du, Zhengxiong Hou, and Xi Li. The initial implementation of Swift was the work of Yong Zhao and Mihael Hategan; Karajan was designed and implemented by Hategan. Tim Armstrong provided helpful comments on the text. %% \section{TODO} From noreply at svn.ci.uchicago.edu Sun Jan 9 16:02:02 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sun, 9 Jan 2011 16:02:02 -0600 (CST) Subject: [Swift-commit] r3923 - text/parco10submission Message-ID: <20110109220202.C51129CC9B@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-09 16:02:02 -0600 (Sun, 09 Jan 2011) New Revision: 3923 Modified: text/parco10submission/paper.tex Log: Minor corrections. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-09 21:10:57 UTC (rev 3922) +++ text/parco10submission/paper.tex 2011-01-09 22:02:02 UTC (rev 3923) @@ -117,7 +117,7 @@ However, others require the coupling or orchestration of large numbers of application invocations: either many invocations of the same program, or many invocations of sequences and -patterns of several programs. The execution of these %IAN +patterns of several programs. %In this model, existing applications are similar to %functions in programming, and users typically need to execute many of %them. @@ -148,7 +148,7 @@ %referential transparency, and one may readily extend the concept to %encompass arbitrary processes without difficulty. -As a language, Swift is simpler than most scripting languages because it does not replicate the capabilities that existing scripting languages like PERL, Python, and shells do well, but instead makes it easy to call such scripts as small applications. +As a language, Swift is simpler than most scripting languages because it does not replicate the capabilities that existing scripting languages like Perl, Python, and shells do well, but instead makes it easy to call such scripts as small applications. % say: it has fewer statements, limited data types and a compact library of useful support primitives. It can be extended using built-in functions coded in Java, and by mappers coded as Java built-ins or as external scripts. These functions execute in parallel as part of expression evaluation in the same mapper as externally called application programs or scripts do. % Mihael thinks that we should not claim that Swift is "simpler". The language is as complete as a language can % be and the monentary lack of libraries is independent of the language. Most functional languages are simpler From noreply at svn.ci.uchicago.edu Sun Jan 9 16:48:58 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sun, 9 Jan 2011 16:48:58 -0600 (CST) Subject: [Swift-commit] r3924 - in text/parco10submission: . code Message-ID: <20110109224858.8631D9CC9B@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-09 16:48:58 -0600 (Sun, 09 Jan 2011) New Revision: 3924 Modified: text/parco10submission/code/glass.swift text/parco10submission/code/glassRunCavities.swift.ORIG text/parco10submission/paper.tex Log: Updated Glass app code with better code and adjusted the annotated tour to reflect the new lines numbers. This now uses a 2-file inpt structure. Modified: text/parco10submission/code/glass.swift =================================================================== --- text/parco10submission/code/glass.swift 2011-01-09 22:02:02 UTC (rev 3923) +++ text/parco10submission/code/glass.swift 2011-01-09 22:48:58 UTC (rev 3924) @@ -1,29 +1,31 @@ +type Text; type Arc; type Restart; type Log; +type Active; type GlassIn{ - Restart startfile; + Restart startfile; + Active activefile; } type GlassOut{ - Arc arcfile; - Restart restartfile; - Restart startfile; - Restart final; - Log logfile; + Arc arcfile; + Active activefile; + Restart restartfile; + Restart startfile; + Restart final; + Log logfile; } -app (GlassOut o) glassCavityRun( - GlassIn i, string rad, string temp, string steps, - string volume, string fraca, string energyfunction, - string centerstring, string arctimestring) -{ - glassRun "-a" @filename(o.final) "--lf" @filename(i.startfile) - "--temp" temp "--stepsperparticle" steps "--volume" volume - "--fraca" fraca "--energy_function" energyfunction - "--cradius" rad "--ccoord" centerstring arctimestring - stdout=@filename(o.logfile); +app (GlassOut o) glassCavityRun + (GlassIn i, string rad, string temp, string steps, string volume, string fraca, + string energyfunction, string centerstring, string arctimestring) +{ glassRun + "-a" @filename(o.final) "--lf" @filename(i.startfile) stdout=@filename(o.logfile) + "--temp" temp "--stepsperparticle" steps "--energy_function" energyfunction + "--volume" volume "--fraca" fraca + "--cradius" rad "--ccoord" centerstring arctimestring; } CreateGlassSystem() @@ -39,38 +41,35 @@ string fraca=@arg("fraca","0.5"); string radii[] = readData(rlist); string centers[] = readData(clist); - int nmodels=@toint( @arg("n","1") ); - int nsub=@toint( @arg("nsub","1") ); + int nmodels=strtoi( @arg("n","1") ); + int nsub=strtoi( @arg("nsub","1") ); string savearc=@arg("savearc","FALSE"); string arctimestring; + string energyfunction=@arg("energyfunction","softsphereratiosmooth"); + if(savearc=="FALSE") { arctimestring="--arc_time=10000000"; } else{ arctimestring=""; } - string energyfunction=@arg("energyfunction","softsphereratiosmooth"); - GlassIn modelIn[][][] ; + GlassIn modelIn[][][] ; + GlassOut modelOut[][][][] ; + n=nmodels, nsub=nsub, rlist=rlist, clist=clist, ceqsteps=ceqsteps, esteps=esteps, + steps=steps, temp=temp, volume=volume, e=energyfunction, natoms=natoms>; foreach rad,rindex in radii { foreach centerstring,cindex in centers { foreach model in [0:nmodels-1] { foreach job in [0:nsub-1] { - string fname = - @filename(modelOut[rindex][cindex][model][job].final) - if (fname != "NULL") { - modelOut[rindex][cindex][model][job] = - glassCavityRun( modelIn[rindex][cindex][model], - rad, temp, steps, volume, fraca, energyfunction, - centerstring, arctimestring); + if( !(@filename(modelOut[rindex][cindex][model][job].final)=="NULL") ) { + modelOut[rindex][cindex][model][job] = glassCavityRun( + modelIn[rindex][cindex][model], rad, temp, steps, volume, fraca, + energyfunction, centerstring, arctimestring); } } } @@ -78,5 +77,4 @@ } } - -CreateGlassSystem(); +CreateGlassSystem(); \ No newline at end of file Modified: text/parco10submission/code/glassRunCavities.swift.ORIG =================================================================== --- text/parco10submission/code/glassRunCavities.swift.ORIG 2011-01-09 22:02:02 UTC (rev 3923) +++ text/parco10submission/code/glassRunCavities.swift.ORIG 2011-01-09 22:48:58 UTC (rev 3924) @@ -4,9 +4,11 @@ type Arc; type Restart; type Log; +type Active; type GlassOut{ Arc arcfile; + Active activefile; Restart restartfile; Restart startfile; Restart final; @@ -15,6 +17,7 @@ type GlassIn{ Restart startfile; + Active activefile; } // Lib functions (to be moved to imported file Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-09 22:02:02 UTC (rev 3923) +++ text/parco10submission/paper.tex 2011-01-09 22:48:58 UTC (rev 3924) @@ -1308,23 +1308,24 @@ managing the large volume of file data. Example 2 shows a slightly reformatted version of the glass simulation script that was in use in Dec. 2010. Its key aspects are as follows. -Lines 1-3 define the mapped file types; these files are used to compose input and output structures at lines 5-15. (At the moment, the input structure is a degenerate single-file structure, but Hocky has experimented with various multi-file input structures in prior versions of this script). The output structure reflects the fact that the simulation is restartable in 1-2 hour increments, and works together with the Swift script to create a simple but powerful mechanism for managing checkpoint/restart across a long-running large-scale simulation campaign. +Lines 1-5 define the mapped file types; these files are used to compose input and output structures at lines 7-19. +These structure reflect the fact that the simulation is restartable in 1-2 hour increments, and that it works together with the Swift script to create a simple but powerful mechanism for managing checkpoint/restart across a long-running large-scale simulation campaign. -The single application called by this script is the {\tt glassRun} program wrapped in the app function at lines 17-27. Note that rather than defining main program logic in ``open" (top-level) code, the script places all the program login in the function {\tt CreateGlassSystem}, invoked by the single statement at line 82. This enables the simulation script to be defined in a library that can be imported into other Swift scripts to perform entire campaigns or campaign subsets. +The single application called by this script is the {\tt glassRun} program wrapped in the app function at lines 21-29. Note that rather than defining main program logic in ``open" (top-level) code, the script places all the program login in the function {\tt CreateGlassSystem}, invoked by the single statement at line 80. This enables the simulation script to be defined in a library that can be imported into other Swift scripts to perform entire campaigns or campaign subsets. -The {\tt CreateGlassSystem} function starts by extracting a large set of science parameters from the Swift command line at lines 31-44 and 52 using the {\tt @arg()} function. It uses the built-in function {\tt readData} at lines 40-41 to read prepared lists of molecular radii and centroids from parameter files to define the primary physical dimensions of the simulation space. -A selectable energy function to used by the simulation application is specified as a parameter at line 52. +The {\tt CreateGlassSystem} function starts by extracting a large set of science parameters from the Swift command line at lines 33-48 using the {\tt @arg()} function. It uses the built-in function {\tt readData} at lines 42-43 to read prepared lists of molecular radii and centroids from parameter files to define the primary physical dimensions of the simulation space. +A selectable energy function to used by the simulation application is specified as a parameter at line 48. -At lines 54 and 58, the script leverages Swift flexible dynamic arrays to create a 3D array for input and an 4D array of structures for outputs. These data structures, whose leaf elements consist entirely of mapped files, are set using the external mappers specified for the input array at lines 54-57 and for the output array of structures at 58-61. Note that many of the science parameters are passed to the mappers, which in turn are used by the input mapper to locate files within the large multi-level directory structure of the campaign, and by the output mapper to create new directory and file naming conventions for the campaign outputs. The mappers use the common, and useful practice of using scientific metadata to determine directory and file names. +At lines 57 and 61, the script leverages Swift flexible dynamic arrays to create a 3D array for input and an 4D array of structures for outputs. These data structures, whose leaf elements consist entirely of mapped files, are set using the external mappers specified for the input array at lines 57-59 and for the output array of structures at 61-63. Note that many of the science parameters are passed to the mappers, which in turn are used by the input mapper to locate files within the large multi-level directory structure of the campaign, and by the output mapper to create new directory and file naming conventions for the campaign outputs. The mappers apply the common, useful practice of using scientific metadata to determine directory and file names. -The entire body of the {\tt CreateGlassSystem} is a four-level nesting of \verb|foreach| statements at lines 63-79. These perform a parameter sweep over all combinations of radius, centroid, model, and job number within the simulation space. A single run of the script immediately expands to an independent parallel invocation of the simulation application for each point in the space - 1,670 jobs for the minimum case of a 7 x 27 x 10 x 1 space. Note that the if statement at line 69 causes the simulation execution to be skipped if it has already been performed, as determine by a ``\verb|NULL|'' file name returned by the mapper for the output of a given job in the simulation space. +The entire body of the {\tt CreateGlassSystem} is a four-level nesting of \verb|foreach| statements at lines 65-77. These loops perform a parallel parameter sweep over all combinations of radius, centroid, model, and job number within the simulation space. A single run of the script immediately expands to an independent parallel invocation of the simulation application for each point in the space - 1,670 jobs for the minimum case of a 7 x 27 x 10 x 1 space. Note that the if statement at line 69 causes the simulation execution to be skipped if it has already been performed, as determine by a ``\verb|NULL|'' file name returned by the mapper for the output of a given job in the simulation space. The advantages of managing a simulation campaign in this manner are well borne out by Hocky's experience: the expression of the campaign is a well-structured high-level script, devoid of details about file naming, synchronization of parallel tasks, location and state of remote computing resources, or explicit explicit data transfer. Hock was able to leverage local cluster resources on many occasions, but at any time could count on his script acquiring on the order of 1,000 compute cores from 6 to 18 sites of the Open Science Grid. When executing on the OSG, he leveraged Swift's capability to replicate jobs that are waiting in queues at more congested sites, and automatically send them to sites where jobs were moving through the system. All of these capabilities would be a huge distraction from his primary scientific simulation campaign were he to use lower-level abstractions where parallelism and remote distribution were the visible responsibility of the programmer. Investigations of more advanced glass simulation techniques are underway, and the fact that the entire campaign can be driven by location-independent Swift scripts will enable Hocky to reliably re-execute the entire campaign with relative ease. -He believes that Swift has made the project much easier to organize and execute. The project would be completely unwieldy without using Swift, and the distraction and scripting/programming effort level of leveraging multiple computing resources would be prohibitive. -\\ -\\ +He reports that Swift has made the project much easier to organize and execute. The project would be completely unwieldy without using Swift, and the distraction and scripting/programming effort level of leveraging multiple computing resources would be prohibitive. + +\pagebreak {\bf \small Swift example 2: Monte-Carlo simulation of glass cavity dynamics.} %\begin{verbatim} %\begin{Verbatim}[fontsize=\scriptsize,frame=single,framesep=2mm,gobble=7, numbers=left] From noreply at svn.ci.uchicago.edu Sun Jan 9 18:08:11 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sun, 9 Jan 2011 18:08:11 -0600 (CST) Subject: [Swift-commit] r3925 - in text/parco10submission: . code Message-ID: <20110110000811.669CE9CC94@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-09 18:08:11 -0600 (Sun, 09 Jan 2011) New Revision: 3925 Modified: text/parco10submission/code/modis.swift text/parco10submission/paper.tex Log: Small updates based on comments form Tim. Modified: text/parco10submission/code/modis.swift =================================================================== --- text/parco10submission/code/modis.swift 2011-01-09 22:48:58 UTC (rev 3924) +++ text/parco10submission/code/modis.swift 2011-01-10 00:08:11 UTC (rev 3925) @@ -6,7 +6,7 @@ app (landuse output) getLandUse (imagefile input, int sortfield) { - getlanduse @input sortfield stdout=@output ; + getlanduse @input sortfield stdout=@output; } app (file output, file tilelist) analyzeLandUse Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-09 22:48:58 UTC (rev 3924) +++ text/parco10submission/paper.tex 2011-01-10 00:08:11 UTC (rev 3925) @@ -1118,8 +1118,7 @@ GRAM/LRM submission. Swift offers two approaches: \emph{clustering} and \emph{coasters}. -Clustering constructs job submissions that contain a number of component program -executions, rather than just submitting jobs one-at-a-time. +Clustering aggregates multiple program executions into a single job, thereby reducing the total number of jobs to be submitted Coasters~\cite{coasters} is a form of multi-level scheduling similar to pilot jobs~\cite{Condor-G_2002}. It submits generic coaster jobs to a site, and binds component program executions to the coaster jobs (and thus to worker @@ -1128,9 +1127,9 @@ Clustering requires little additional support on the remote site, while the coasters framework requires an active component on the head node (in Java) and on the worker nodes (in Perl) as well as -additional network connectivity within a site. In practice, the -automatic deployment and execution of these components can be -difficult on a number sites. +additional network connectivity within a site. Occasionally, the +automatic deployment and execution of the coaster components can be +problematic or even impractical on a site, and require alternative manual configuration. However, clustering can be less efficient than using coasters. Coasters can react much more dynamically to changing numbers of @@ -1141,7 +1140,7 @@ with excessive serialization, or, in the other direction, it can result in an excessive number of job submissions. Coaster workers can be queued and executed before all of the work that they will eventually -execute is known; this can enable more work to be done per job submission, +execute is known; this can enable the Swift scheduler to perform more application invocations per coaster worker job, and faster overall execution of the entire application. Using coasters, the status for the actual application jobs is reported as @@ -1258,9 +1257,9 @@ \end{Verbatim} it would cause the first five elements of the array {\tt geos} to be mapped to the first five files of the modis dataset in the specified directory. -At lines 52-53, the script declares the array {\tt land} which will contain the output of the {\tt getlanduse} application. This declaration uses the built-in ``structured regular expression mapper'', which will determine the names of the \emph{output} files that the array will refer to once they are computed. Swift knows from context that this is an output mapping. The mapper will use regular expressions to base that names of the output files on the filenames of the corresponding elements of the input array {\tt geos} given by the {\tt source=} argument to the mapper. +At lines 52-53, the script declares the array {\tt land} which will contain the output of the {\tt getlanduse} application. This declaration uses the built-in ``structured regular expression mapper'', which will determine the names of the \emph{output} files that the array will refer to once they are computed. Swift knows from context that this is an output mapping. The mapper will use regular expressions to base the names of the output files on the filenames of the corresponding elements of the input array {\tt geos} given by the {\tt source=} argument to the mapper. The declaration for {\tt land[]} maps, for example, a file {\tt h07v08.landuse.byfreq} to an element of the {\tt land[]} array for a file {\tt h07v08.tif} in the {\tt geos[]} array. - At lines 55-57 the script performs its first computation using a {\tt foreach} loop to invoke {\tt getLandUse} in parallel on each file mapped to the elements of {\tt geos[]}. As 317 files were mapped (in lines 47-48), the loop will invoke 317 instances of the application in parallel. \katznote{is this strictly true? Do you want to say that it will enable 317 instances to be runnable in parallel, but the number that are actually run in parallel depends on the hardware available to Swift, or something like that?} The result of each computation is placed in a file mapped to the array {\tt land} and named by the regular expression translation to be based on the file names mapped to the array {\tt geos[]} (in lines \katznote{is this 52-53?}). Thus the landuse histogram for file {\tt /home/wilde/modis/2002/h00v08.tif} would be written into file {\tt h00v08.landuse.freq} and would be considered by Swift to be of type {\tt landuse}. +At lines 55-57 the script performs its first computation using a {\tt foreach} loop to invoke {\tt getLandUse} in parallel on each file mapped to the elements of {\tt geos[]}. As 317 files were mapped (in lines 47-48), the loop will invoke 317 instances of the application in parallel. \katznote{is this strictly true? Do you want to say that it will enable 317 instances to be runnable in parallel, but the number that are actually run in parallel depends on the hardware available to Swift, or something like that?} The result of each computation is placed in a file mapped to the array {\tt land} and named by the regular expression translation to be based on the file names mapped to the array {\tt geos[]} (in lines \katznote{is this 52-53?}). Thus the landuse histogram for file {\tt /home/wilde/modis/2002/h00v08.tif} would be written into file {\tt h00v08.landuse.freq} and would be considered by Swift to be of type {\tt landuse}. Once all the land usage histograms have have been computed, the script can then execute {\tt analyzeLandUse} at line 63 to find the requested number of highest tiles (files) with a specific land cover combination. The Swift runtime system uses futures to ensure that this analysis function is not invoked until all of its input files have computed and transported to the computation site chosen to run the analysis program. All of these steps take place automatically, using the relatively simple and location-independent Swift expressions shown. The output files to be use to hold the result are specified in the declarations at lines 61-62. \katznote{should these lines have a space inserted before the ``<'' to match the previous lines? Same question for 67-68... } From noreply at svn.ci.uchicago.edu Sun Jan 9 19:11:44 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sun, 9 Jan 2011 19:11:44 -0600 (CST) Subject: [Swift-commit] r3926 - in text/parco10submission: . code Message-ID: <20110110011144.386DA9CC94@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-09 19:11:44 -0600 (Sun, 09 Jan 2011) New Revision: 3926 Modified: text/parco10submission/code/glass.swift text/parco10submission/paper.bib text/parco10submission/paper.tex Log: Cosmetic edits. Fixed url's in Ref list that were running over the page. Modified: text/parco10submission/code/glass.swift =================================================================== --- text/parco10submission/code/glass.swift 2011-01-10 00:08:11 UTC (rev 3925) +++ text/parco10submission/code/glass.swift 2011-01-10 01:11:44 UTC (rev 3926) @@ -41,8 +41,8 @@ string fraca=@arg("fraca","0.5"); string radii[] = readData(rlist); string centers[] = readData(clist); - int nmodels=strtoi( @arg("n","1") ); - int nsub=strtoi( @arg("nsub","1") ); + int nmodels=@toint( @arg("n","1") ); + int nsub=@toint( @arg("nsub","1") ); string savearc=@arg("savearc","FALSE"); string arctimestring; string energyfunction=@arg("energyfunction","softsphereratiosmooth"); Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-10 00:08:11 UTC (rev 3925) +++ text/parco10submission/paper.bib 2011-01-10 01:11:44 UTC (rev 3926) @@ -105,10 +105,10 @@ JOURNAL={Frontiers in Neuroinformatics}, VOLUM={3}, YEAR={2009}, - URL={www.frontiersin.org/neuroscience/neuroinformatics/paper/10.3389/neuro.11/034.2009/html/}, DOI={10.3389/neuro.11/034.2009}, ISSN={ISSN 1662-5196} } +% URL={www.frontiersin.org/neuroscience/neuroinformatics/paper/10.3389/neuro.11/034.2009/html/}, @article{CNARI_2008, @@ -121,9 +121,9 @@ note = "", issn = "1053-8119", doi = "DOI: 10.1016/j.neuroimage.2007.09.021", - url = "http://www.sciencedirect.com/science/article/B6WNP-4PPW72Y-1/2/ac536a08f82f82ad9ce940ac235d8a55", author = "Uri Hasson and Jeremy I. Skipper and Michael J. Wilde and Howard C. Nusbaum and Steven L. Small" } + % url = "http://www.sciencedirect.com/science/article/B6WNP-4PPW72Y-1/2/ac536a08f82f82ad9ce940ac235d8a55", @article{CNARI_DUP_2007, author = {T Stef-Praun and B Clifford and I Foster and U Hasson and M Hategan and S L Small and M Wilde Michael and Y Zhao}, Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-10 00:08:11 UTC (rev 3925) +++ text/parco10submission/paper.tex 2011-01-10 01:11:44 UTC (rev 3926) @@ -1306,6 +1306,10 @@ these aspects to be encoded in the data files of the campaigns to assist in managing the large volume of file data. +Hocky used four Swift scripts in his simulation campaign. The first, {\tt glassCreate} takes no input structure and generates an equilibrated configuration at some temperature; +{\tt glassAnneal} takes those structures and lowers the temperature to some specified temperature; +{\tt glassEquil} freezes particles outside of a spherical cavity and runs short simulations for particles inside; and the script {\tt glassRun}, described below, is the same but starts from equilibrated cavities. + Example 2 shows a slightly reformatted version of the glass simulation script that was in use in Dec. 2010. Its key aspects are as follows. Lines 1-5 define the mapped file types; these files are used to compose input and output structures at lines 7-19. These structure reflect the fact that the simulation is restartable in 1-2 hour increments, and that it works together with the Swift script to create a simple but powerful mechanism for managing checkpoint/restart across a long-running large-scale simulation campaign. @@ -1317,9 +1321,9 @@ At lines 57 and 61, the script leverages Swift flexible dynamic arrays to create a 3D array for input and an 4D array of structures for outputs. These data structures, whose leaf elements consist entirely of mapped files, are set using the external mappers specified for the input array at lines 57-59 and for the output array of structures at 61-63. Note that many of the science parameters are passed to the mappers, which in turn are used by the input mapper to locate files within the large multi-level directory structure of the campaign, and by the output mapper to create new directory and file naming conventions for the campaign outputs. The mappers apply the common, useful practice of using scientific metadata to determine directory and file names. -The entire body of the {\tt CreateGlassSystem} is a four-level nesting of \verb|foreach| statements at lines 65-77. These loops perform a parallel parameter sweep over all combinations of radius, centroid, model, and job number within the simulation space. A single run of the script immediately expands to an independent parallel invocation of the simulation application for each point in the space - 1,670 jobs for the minimum case of a 7 x 27 x 10 x 1 space. Note that the if statement at line 69 causes the simulation execution to be skipped if it has already been performed, as determine by a ``\verb|NULL|'' file name returned by the mapper for the output of a given job in the simulation space. +The entire body of the {\tt CreateGlassSystem} is a four-level nesting of \verb|foreach| statements at lines 65-77. These loops perform a parallel parameter sweep over all combinations of radius, centroid, model, and job number within the simulation space. A single run of the script immediately expands to an independent parallel invocation of the simulation application for each point in the space - 1,670 jobs for the minimum case of a 7 x 27 x 10 x 1 space. Note that the if statement at line 69 causes the simulation execution to be skipped if it has already been performed, as determine by a ``\verb|NULL|'' file name returned by the mapper for the output of a given job in the simulation space. In the current campaign the fourth dimension ({\tt nsub} of the simulation space is fixed at one. This value could be increased to define sub-configurations that would perform better Monte Carlo averaging, with a multiplicative increase in the number of jobs. This is currently set to on e because there are ample starting configurations, but if this was not the case (as in earlier campaigns) the script could run repeated simulations with different random seeds. -The advantages of managing a simulation campaign in this manner are well borne out by Hocky's experience: the expression of the campaign is a well-structured high-level script, devoid of details about file naming, synchronization of parallel tasks, location and state of remote computing resources, or explicit explicit data transfer. Hock was able to leverage local cluster resources on many occasions, but at any time could count on his script acquiring on the order of 1,000 compute cores from 6 to 18 sites of the Open Science Grid. When executing on the OSG, he leveraged Swift's capability to replicate jobs that are waiting in queues at more congested sites, and automatically send them to sites where jobs were moving through the system. All of these capabilities would be a huge distraction from his primary scientific simulation campaign were he to use lower-level abstractions where parallelism and remote distribution were the visible responsibility of the programmer. +The advantages of managing a simulation campaign in this manner are borne out well by Hocky's experience: the expression of the campaign is a well-structured high-level script, devoid of details about file naming, synchronization of parallel tasks, location and state of remote computing resources, or explicit explicit data transfer. Hocky was able to leverage local cluster resources on many occasions, but at any time could count on his script acquiring on the order of 1,000 compute cores from 6 to 18 sites of the Open Science Grid. When executing on the OSG, he leveraged Swift's capability to replicate jobs that are waiting in queues at more congested sites, and automatically send them to sites where resources were available and jobs were being processed at better rates. All of these capabilities would be a huge distraction from his primary scientific simulation campaign were he to use or script lower-level abstractions where parallelism and remote distribution were the manu al responsibility of the programmer. Investigations of more advanced glass simulation techniques are underway, and the fact that the entire campaign can be driven by location-independent Swift scripts will enable Hocky to reliably re-execute the entire campaign with relative ease. He reports that Swift has made the project much easier to organize and execute. The project would be completely unwieldy without using Swift, and the distraction and scripting/programming effort level of leveraging multiple computing resources would be prohibitive. @@ -1666,7 +1670,7 @@ Leadership Computing Facility, TeraGrid, the Open Science Grid, the UChicago / Argonne Computation Institute Petascale Active Data Store, and the Amazon Web Services Education allocation program. -The quantum glass example in the article is the work of Glen Hocky of the Reichman Lab of the Columbia University Department of Chemistry. We thank Glen for many contributions to the text and code of Sec. 4 and valuable feedback to the Swift project. We gratefully acknowledge the contributions of current and former Swift team members and collaborators Sarah Kenny, Allan Espinosa, Zhao Zhang, David Kelly, Milena Nokolic, Jon Monette, Aashish Adhikari, Marc Parisien, Michael Andric, Steven Small, John Dennis, Mats Rynge, Michael Kubal, Tibi Stef-Praun, Xu Du, Zhengxiong Hou, and Xi Li. The initial implementation of Swift was the work of Yong Zhao and Mihael Hategan; Karajan was designed and implemented by Hategan. Tim Armstrong provided helpful comments on the text. +The quantum glass example in the article is the work of Glen Hocky of the Reichman Lab of the Columbia University Department of Chemistry. We thank Glen for his contributions to the text and code of Sec. 4 and valuable feedback to the Swift project. We gratefully acknowledge the contributions of current and former Swift team members and collaborators Sarah Kenny, Allan Espinosa, Zhao Zhang, David Kelly, Milena Nokolic, Jon Monette, Aashish Adhikari, Marc Parisien, Michael Andric, Steven Small, John Dennis, Mats Rynge, Michael Kubal, Tibi Stef-Praun, Xu Du, Zhengxiong Hou, and Xi Li. The initial implementation of Swift was the work of Yong Zhao and Mihael Hategan; Karajan was designed and implemented by Hategan. We thank Tim Armstrong for helpful comments on the text. %% \section{TODO} @@ -1712,6 +1716,8 @@ %% ramble about separation of parallel execution concerns and dataflow spec %% in the same way that gph has a separation of same concerns... compare contrast +\newpage + \bibliographystyle{elsarticle-num} \bibliography{paper,Wozniak} % for ACM SIGS style From noreply at svn.ci.uchicago.edu Sun Jan 9 19:34:47 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sun, 9 Jan 2011 19:34:47 -0600 (CST) Subject: [Swift-commit] r3927 - in text/parco10submission: . code Message-ID: <20110110013447.179829CC94@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-09 19:34:46 -0600 (Sun, 09 Jan 2011) New Revision: 3927 Modified: text/parco10submission/code/modis.swift text/parco10submission/paper.tex Log: Addressed Dan's comments on the MODIS code, and corrected other typos in that section. Modified: text/parco10submission/code/modis.swift =================================================================== --- text/parco10submission/code/modis.swift 2011-01-10 01:11:44 UTC (rev 3926) +++ text/parco10submission/code/modis.swift 2011-01-10 01:34:46 UTC (rev 3927) @@ -58,13 +58,13 @@ # Find the top N tiles (by total area of selected landuse types) -file topSelected<"topselected.txt">; -file selectedTiles<"selectedtiles.txt">; +file topSelected <"topselected.txt">; +file selectedTiles <"selectedtiles.txt">; (topSelected, selectedTiles) = analyzeLandUse(land, landType, nSelect); # Mark the top N tiles on a sinusoidal gridded map -image gridMap<"markedGrid.gif">; +image gridMap <"markedGrid.gif">; gridMap = markMap(topSelected); # Create multi-color images for all tiles @@ -80,5 +80,4 @@ # Assemble a montage of the top selected areas image montage ; # @arg -montage = assemble(selectedTiles,colorImage,webDir); - +montage = assemble(selectedTiles,colorImage,webDir); \ No newline at end of file Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-10 01:11:44 UTC (rev 3926) +++ text/parco10submission/paper.tex 2011-01-10 01:34:46 UTC (rev 3927) @@ -1235,16 +1235,15 @@ color images of those closest-matching data tiles. (A color rendering step is required to do this, as the input datasets are not viewable images; their pixel values are land-use codes.) A typical invocation of this script would be ``\emph{find the top 12 urban tiles}'' or ``\emph{find the 16 tiles with the most forest and grassland}''. As this script is used for tutorial purposes, the application programs it calls are simple shell scripts that use fast, generic image processing applications to process the MODIS data. Thus the example executes quickly while serving as a realistic tutorial script for much more compute-intensive satellite data processing applications. -\\ -\\ + The script is structured as follows: -Lines 1-3 define 3 mapped file types -- {\tt MODISfile} for the input images, {\tt landuse} for the output of the landuse histogram calculation; and {\tt file} for any other generic file that we don't care to assign a unique type to. +Lines 1-3 define 3 mapped file types -- {\tt MODISfile} for the input images, {\tt landuse} for the output of the landuse histogram calculation; and {\tt file} for any other generic file that we don't wish to assign a unique type to. Lines 7-32 define the Swift interface functions for the application programs {\tt getLandUse}, {\tt analyzeLandUse}, {\tt colorMODIS}, {\tt assemble}, and {\tt markMap}. -Lines 36-41 extract a set of science parameters from the {\tt swift} command line with which the user invokes the script. -These indicate the number of files of the input set to select (to enable processing the first M of N files), the set of land cover types to select, the number of ``top'' tiles to select, and parameters used to locate input and output directories. -\katznote{not sure it these syntaxes were explained in section 2 clearly - if not, they probably should be added to section 2} +Lines 36-41 uses the built-in function {\tt @arg()} to extract a set of science parameters from the {\tt swift} command line arguments with which the user invokes the script. (This is a keyword-based analog of C's {\tt argv[]} convention). +These parameters indicate the number of files of the input set to select (to enable processing the first M of N files), the set of land cover types to select, the number of ``top'' tiles to select, and parameters used to locate input and output directories. +%\katznote{DONE: not sure it these syntaxes were explained in section 2 clearly - if not, they probably should be added to section 2} Lines 47-48 invoke a ``external'' mapper script {\tt modis.mapper} to map the first {\tt nFiles} MODIS data files in the directory contained in the script argument {\tt MODISdir} to the array {\tt geos}. An external mapper script is written by the Swift programmer (in any language desired, but quite often mappers are simple shell scripts). External mappers are usually co-located with the Swift script, and are invoked when Swift instantiates the associated variable. They return a two-field list of the the form \emph{SwiftExpression, filename}, where \emph{SwiftExpression} is relative to the variable name being mapped. For example, if this mapper invocation were called from the Swift script at line 47-48: \begin{Verbatim}[fontsize=\scriptsize,framesep=2mm] @@ -1259,13 +1258,17 @@ At lines 52-53, the script declares the array {\tt land} which will contain the output of the {\tt getlanduse} application. This declaration uses the built-in ``structured regular expression mapper'', which will determine the names of the \emph{output} files that the array will refer to once they are computed. Swift knows from context that this is an output mapping. The mapper will use regular expressions to base the names of the output files on the filenames of the corresponding elements of the input array {\tt geos} given by the {\tt source=} argument to the mapper. The declaration for {\tt land[]} maps, for example, a file {\tt h07v08.landuse.byfreq} to an element of the {\tt land[]} array for a file {\tt h07v08.tif} in the {\tt geos[]} array. -At lines 55-57 the script performs its first computation using a {\tt foreach} loop to invoke {\tt getLandUse} in parallel on each file mapped to the elements of {\tt geos[]}. As 317 files were mapped (in lines 47-48), the loop will invoke 317 instances of the application in parallel. \katznote{is this strictly true? Do you want to say that it will enable 317 instances to be runnable in parallel, but the number that are actually run in parallel depends on the hardware available to Swift, or something like that?} The result of each computation is placed in a file mapped to the array {\tt land} and named by the regular expression translation to be based on the file names mapped to the array {\tt geos[]} (in lines \katznote{is this 52-53?}). Thus the landuse histogram for file {\tt /home/wilde/modis/2002/h00v08.tif} would be written into file {\tt h00v08.landuse.freq} and would be considered by Swift to be of type {\tt landuse}. +At lines 55-57 the script performs its first computation using a {\tt foreach} loop to invoke {\tt getLandUse} in parallel on each file mapped to the elements of {\tt geos[]}. As 317 files were mapped (in lines 47-48), the loop will submit 317 instances of the application in parallel to the underlying execution provider. These will execute with a degree of parallelism subject to available resources. +%\katznote{DONE: is this strictly true? Do you want to say that it will enable 317 instances to be runnable in parallel, but the number that are actually run in parallel depends on the hardware available to Swift, or something like that?} +At lines 52-53 the result of each computation is placed in a file mapped to the array {\tt land} and named by the regular expression translation to be based on the file names mapped to the array {\tt geos[]} . +Thus the landuse histogram for file {\tt /home/wilde/modis/2002/h00v08.tif} would be written into file {\tt h00v08.landuse.freq} and would be considered by Swift to be of type {\tt landuse}. -Once all the land usage histograms have have been computed, the script can then execute {\tt analyzeLandUse} at line 63 to find the requested number of highest tiles (files) with a specific land cover combination. The Swift runtime system uses futures to ensure that this analysis function is not invoked until all of its input files have computed and transported to the computation site chosen to run the analysis program. All of these steps take place automatically, using the relatively simple and location-independent Swift expressions shown. The output files to be use to hold the result are specified in the declarations at lines 61-62. \katznote{should these lines have a space inserted before the ``<'' to match the previous lines? Same question for 67-68... } +Once all the land usage histograms have have been computed, the script can then execute {\tt analyzeLandUse} at line 63 to find the requested number of highest tiles (files) with a specific land cover combination. The Swift runtime system uses futures to ensure that this analysis function is not invoked until all of its input files have computed and transported to the computation site chosen to run the analysis program. All of these steps take place automatically, using the relatively simple and location-independent Swift expressions shown. The output files to be used for the result are specified in the declarations at lines 61-62. +% \katznote{DONE: should these lines have a space inserted before the ``<'' to match the previous lines? Same question for 67-68... } -To visualize the results, the application function {\tt markMap} invoked at line 68 will generate an image of a world map using the MODIS projection system and indicate the selected tiles matching the analysis criteria. Since this statememt depends on the output of the analysis ({\tt topSelected}), it will wait for statement at line 63 to complete before commencing. +To visualize the results, the application function {\tt markMap} invoked at line 68 will generate an image of a world map using the MODIS projection system and indicate the selected tiles matching the analysis criteria. Since this statement depends on the output of the analysis ({\tt topSelected}), it will wait for statement at line 63 to complete before commencing. -For additional visualization, the script assembles a full map of all the input tiles, placed in their proper grid location on the MODIS world map projection, and again marking the selected tiles. Since this operation needs true-color images of every input tiles these are computed---again in \katznote{potentially? as before} parallel---with 317 jobs invoked by the foreach statement at line 76-78. The power of Swift's implicit parallelization is shown vividly here: since the {\tt colorMODIS} call at line 77 depends only on the input array {\tt geos}, these 317 application invocations are executed in parallel with the initial 317 parallel executions of the {\tt getLandUse} application at line 56. The script concludes at line 83 by assembling a montage of all the colored tiles and writing this image file to a web-accessible directory for viewing. +For additional visualization, the script assembles a full map of all the input tiles, placed in their proper grid location on the MODIS world map projection, and again marking the selected tiles. Since this operation needs true-color images of every input tiles these are computed--again in parallel--with 317 jobs generated by the foreach statement at line 76-78. The power of Swift's implicit parallelization is shown vividly here: since the {\tt colorMODIS} call at line 77 depends only on the input array {\tt geos}, these 317 application invocations are submitted in parallel with the initial 317 parallel executions of the {\tt getLandUse} application at line 56. The script concludes at line 83 by assembling a montage of all the colored tiles and writing this image file to a web-accessible directory for viewing. \pagebreak {\bf \small Swift example 1: MODIS satellite image processing script} From noreply at svn.ci.uchicago.edu Mon Jan 10 09:12:15 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 09:12:15 -0600 (CST) Subject: [Swift-commit] r3929 - text/parco10submission Message-ID: <20110110151215.029E59CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-10 09:12:14 -0600 (Mon, 10 Jan 2011) New Revision: 3929 Modified: text/parco10submission/paper.tex Log: Started writing performance figure captions and text. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-10 06:25:54 UTC (rev 3928) +++ text/parco10submission/paper.tex 2011-01-10 15:12:14 UTC (rev 3929) @@ -1342,23 +1342,26 @@ \section{Performance Characteristics} \label{Performance} +We present here a few additional measurements to supplement those previously published. + \newcommand{\plotscale}{0.60} \begin{figure} \begin{center} + {\footnotesize \begin{tabular}{p{7cm}p{7cm}} \includegraphics[scale=\plotscale]{plots/sleep} & - \includegraphics[scale=\plotscale]{plots/sleep} \\ - System utilization for variable length tasks - at varying system size & - System utilization for variable length tasks - at varying concurrency \\ - \includegraphics[scale=\plotscale]{plots/sleep} + \includegraphics[scale=\plotscale]{plots/multicore} \\ + A. Application CPU utilization for 3 task durations (in seconds) at up to 2,048 nodes of the Blue Gene/P. + at varying system size. \mikenote{What is efficiency here???}& + B. Application CPU utilization for 3 shorter task durations (in seconds) with up to 200 concurrent processes on an 8-core local host. \mikenote{What is efficiency here??? what host and how many cores?}\\ + \includegraphics[scale=\plotscale]{plots/dds} & \\ - System utilization for variable length tasks - at varying concurrency + C. Efficiency for a fixed number of tasks with varying data sizes. Input and out data was one file in each direction of the size indicated. Data transfer was done with direct transfer using Swift's collective data management interface \cite{SwiftCDM_2011}. & \\ \end{tabular} + } + \caption{Swift performance metrics. Measures A and C were performed on the Blue Gene/P at various } \end{center} \end{figure} From noreply at svn.ci.uchicago.edu Mon Jan 10 09:53:38 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 09:53:38 -0600 (CST) Subject: [Swift-commit] r3930 - text/parco10submission Message-ID: <20110110155338.55A679CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-10 09:53:38 -0600 (Mon, 10 Jan 2011) New Revision: 3930 Modified: text/parco10submission/paper.tex Log: First draft of performance test descriptions Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-10 15:12:14 UTC (rev 3929) +++ text/parco10submission/paper.tex 2011-01-10 15:53:38 UTC (rev 3930) @@ -117,7 +117,7 @@ However, others require the coupling or orchestration of large numbers of application invocations: either many invocations of the same program, or many invocations of sequences and -patterns of several programs. +patterns of several programs. %In this model, existing applications are similar to %functions in programming, and users typically need to execute many of %them. @@ -383,7 +383,7 @@ in Section~\ref{mapping}) to files external to the Swift script. These files can then be read and written by application programs called by Swift. The mapping process can map single variables to single files, and structures and arrays to collections of files. -There are no built-in mapped types in the language. Instead, users declare type names with no other structure to denote +There are no built-in mapped types in the language. Instead, users declare type names with no other structure to denote any mapped type names desired. For example: {\tt type file; type log;} A variable that is declared to be a mapped file @@ -422,7 +422,7 @@ } \end{verbatim} -Structure fields can be of any type. +Structure fields can be of any type. Both types of collections can contain members of primitive, mapped, or collection types; in particular, arrays can be nested to provide multi-dimensional indexing. @@ -435,18 +435,18 @@ im = sn.i; \end{verbatim} -Arrays contain values of only a single type; structure fields can be of any type. +Arrays contain values of only a single type; structure fields can be of any type. Both types of collections can contain members of primitive, mapped, or collection types. In particular, arrays can be nested to provide multi-dimensional indexing. The size of a Swift array is not declared in the program but is determined at run time, as items are added to the array. This feature proves useful for expressing some common classes of parallel computations. For example, we may create an array containing just those experimental configurations that satisfy a certain criteria. -An array is considered ``closed'' when no further statements that set an element of the array can be executed. +An array is considered ``closed'' when no further statements that set an element of the array can be executed. This state is recognized at run time by information obtained from compile-time analysis of the script's call graph. %IAN: This last paragraph raises the issue of whether this state can always be determiend. -The set of elements that is thus defined need not be contiguous; i.e., the index set may be sparse. -As we will see below, the {\tt foreach} +The set of elements that is thus defined need not be contiguous; i.e., the index set may be sparse. +As we will see below, the {\tt foreach} statement makes it easy to access all elements of an array. %New types can be declared to define a new mapped type or to name a structure type. The type model is by design very simple and limited to keep type semantics easy to understand, implement and use. @@ -820,7 +820,7 @@ all files matching the name pattern \verb|*.jpeg| to an array--and then applies a function to each element of that array. Swift mappers can operate on files stored on the local machine in the directory where the {\tt swift} command is executing, or they can map any files accessible to the local machine, using absolute pathnames. Custom mappers (and some of the built-in mappers) can also map variables to files specified by URIs for access from remote servers via protocols such as GridFTP or HTTP, as described in section \ref{Execution}. Mappers can interact with structure fields and array elements in a simple and useful manner. - + New mappers can be added to Swift either as Java classes or as simple, external executable scripts or programs coded in any language. Mappers can operate as both input mappers (which map files to be processed as application inputs) and as output mappers (which specify the names of files to be produced by applications). It is important to understood that mapping a variable is a different operation than setting the value of a variable. Variables of mapped-file type are mapped (conceptually) when the variable becomes``in scope'', but they are set when a statement assigns them a value. The invocation of mappers (including external executable mappers) are completely synchronized with the Swift parallel execution model. @@ -1256,7 +1256,7 @@ At lines 52-53, the script declares the array {\tt land} which will contain the output of the {\tt getlanduse} application. This declaration uses the built-in ``structured regular expression mapper'', which will determine the names of the \emph{output} files that the array will refer to once they are computed. Swift knows from context that this is an output mapping. The mapper will use regular expressions to base the names of the output files on the filenames of the corresponding elements of the input array {\tt geos} given by the {\tt source=} argument to the mapper. The declaration for {\tt land[]} maps, for example, a file {\tt h07v08.landuse.byfreq} to an element of the {\tt land[]} array for a file {\tt h07v08.tif} in the {\tt geos[]} array. -At lines 55-57 the script performs its first computation using a {\tt foreach} loop to invoke {\tt getLandUse} in parallel on each file mapped to the elements of {\tt geos[]}. As 317 files were mapped (in lines 47-48), the loop will submit 317 instances of the application in parallel to the execution provider. These will execute with a degree of parallelism subject to available resources. +At lines 55-57 the script performs its first computation using a {\tt foreach} loop to invoke {\tt getLandUse} in parallel on each file mapped to the elements of {\tt geos[]}. As 317 files were mapped (in lines 47-48), the loop will submit 317 instances of the application in parallel to the execution provider. These will execute with a degree of parallelism subject to available resources. %\katznote{DONE: is this strictly true? Do you want to say that it will enable 317 instances to be runnable in parallel, but the number that are actually run in parallel depends on the hardware available to Swift, or something like that?} At lines 52-53 the result of each computation is placed in a file mapped to the array {\tt land} and named by the regular expression translation based on the file names mapped to {\tt geos[]} . Thus the landuse histogram for file {\tt h00v08.tif} would be written into file {\tt h00v08.landuse.freq} and would be considered by Swift to be of type {\tt landuse}. @@ -1309,12 +1309,12 @@ these aspects to be encoded in the data files of the campaigns to assist in managing the large volume of file data. -Hocky used four Swift scripts in his simulation campaign. The first, {\tt glassCreate} takes no input structure and generates an equilibrated configuration at some temperature; -{\tt glassAnneal} takes those structures and lowers the temperature to some specified temperature; +Hocky used four Swift scripts in his simulation campaign. The first, {\tt glassCreate} takes no input structure and generates an equilibrated configuration at some temperature; +{\tt glassAnneal} takes those structures and lowers the temperature to some specified temperature; {\tt glassEquil} freezes particles outside of a spherical cavity and runs short simulations for particles inside; and the script {\tt glassRun}, described below, is the same but starts from equilibrated cavities. Example 2 shows a slightly reformatted version of the glass simulation script that was in use in Dec. 2010. Its key aspects are as follows. -Lines 1-5 define the mapped file types; these files are used to compose input and output structures at lines 7-19. +Lines 1-5 define the mapped file types; these files are used to compose input and output structures at lines 7-19. These structure reflect the fact that the simulation is restartable in 1-2 hour increments, and that it works together with the Swift script to create a simple but powerful mechanism for managing checkpoint/restart across a long-running large-scale simulation campaign. The single application called by this script is the {\tt glassRun} program wrapped in the app function at lines 21-29. Note that rather than defining main program logic in ``open" (top-level) code, the script places all the program logic in the function {\tt CreateGlassSystem}, invoked by the single statement at line 80. This enables the simulation script to be defined in a library that can be imported into other Swift scripts to perform entire campaigns or campaign subsets. @@ -1342,26 +1342,76 @@ \section{Performance Characteristics} \label{Performance} -We present here a few additional measurements to supplement those previously published. +We present here a few additional measurements to supplement +those previously published. +First, we measure the ability of Swift to support many user tasks on a +single system image. In Test A, we used Swift to submit up to 2,000 +tasks to Thwomp, a 16-core x86-based Linux compute server at Argonne +National Laboratory. Each job in the batch was an identical, simple +single-processor job that executed for the given duration and +performed application input and output at 1 byte each. The total +execution time was measured. This was compared to the total amount of +core-time consumed to report a utilization ratio, which is plotted. + +Second, we measure the ability of Swift to support many tasks on a +large, distributed memory system without considering the effect on the +underlying file services. In Test B, we used Swift/Coasters to submit +up to 20,480 tasks to Intrepid, the 40,000-node IBM BlueGene/P system +at Argonne National Laboratory. Each job in the batch was an +identical, simple single-processor job that executed for the given +duration and performed no I/O. Each node was limited to one +concurrent job, thus, the user task had 4 cores at its disposal. The +total execution time was measured. This was compared to the total +amount of node-time consumed to report a utilization ratio, which is +plotted. + +Third, we measure the ability of Swift to support many tasks on a +large, distributed memory system including application use of the +underlying GPFS filesystem. In Test C, we used Swift/Coasters to +submit up to 10,240 tasks to Intrepid. Each job in the batch was an +identical, simple single-processor job that executed for 30 seconds +and performed the given amount of input and output. Coasters provider +staging was used to distribute application data to workers, except in +the case marked ``direct'', in which case the I/O was performed +directly to GPFS. Each node was limited to one concurrent job, thus, +the user task had 4 cores at its disposal. The total execution time +was measured. This was compared to the total amount of time consumed +by an equivalent shell script-based application to report an +efficiency ratio, which is plotted. + +The Test C shell script was provided with all job specifications in +advance and did not require communication from between the worker +nodes and the Swift/Coasters runtime. Thus, this test measures the +overhead involved in the dynamic job creation and scheduling +functionality offered by Swift. + \newcommand{\plotscale}{0.60} \begin{figure} \begin{center} {\footnotesize \begin{tabular}{p{7cm}p{7cm}} - \includegraphics[scale=\plotscale]{plots/sleep} & - \includegraphics[scale=\plotscale]{plots/multicore} \\ - A. Application CPU utilization for 3 task durations (in seconds) at up to 2,048 nodes of the Blue Gene/P. - at varying system size. \mikenote{What is efficiency here???}& - B. Application CPU utilization for 3 shorter task durations (in seconds) with up to 200 concurrent processes on an 8-core local host. \mikenote{What is efficiency here??? what host and how many cores?}\\ + \includegraphics[scale=\plotscale]{plots/multicore} & + \includegraphics[scale=\plotscale]{plots/sleep} \\ + Test A. + Application CPU utilization for 3 task durations + (in seconds) with up to 200 concurrent processes on an 8-core + local host. & + Test B. + Application CPU utilization for 3 task durations + (in seconds) at up to 2,048 nodes of the Blue Gene/P. + at varying system size. \\ \includegraphics[scale=\plotscale]{plots/dds} & \\ - C. Efficiency for a fixed number of tasks with varying data sizes. Input and out data was one file in each direction of the size indicated. Data transfer was done with direct transfer using Swift's collective data management interface \cite{SwiftCDM_2011}. + Test C. + Efficiency for a fixed number of tasks with varying data sizes. + Input and out data was one file in each direction of the size + indicated. & \\ \end{tabular} - } - \caption{Swift performance metrics. Measures A and C were performed on the Blue Gene/P at various } + } + \caption{Swift performance figures.} \end{center} \end{figure} From noreply at svn.ci.uchicago.edu Mon Jan 10 10:00:25 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 10:00:25 -0600 (CST) Subject: [Swift-commit] r3931 - text/parco10submission Message-ID: <20110110160025.1828C9CC9B@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-10 10:00:24 -0600 (Mon, 10 Jan 2011) New Revision: 3931 Modified: text/parco10submission/Wozniak.bib text/parco10submission/paper.bib Log: bunch of little changes in refs - making DOIs consistent, fixing a ref style Modified: text/parco10submission/Wozniak.bib =================================================================== --- text/parco10submission/Wozniak.bib 2011-01-10 15:53:38 UTC (rev 3930) +++ text/parco10submission/Wozniak.bib 2011-01-10 16:00:24 UTC (rev 3931) @@ -296,7 +296,7 @@ isbn = {1-59593-061-2}, pages = {54--}, url = {http://dx.doi.org/10.1109/SC.2005.72}, - doi = {http://dx.doi.org/10.1109/SC.2005.72}, + doi = {10.1109/SC.2005.72}, acmid = {1105819}, publisher = {IEEE Computer Society}, address = {Washington, DC, USA}, @@ -1601,7 +1601,7 @@ @article{MapReduce_2004, author = {Dean, Jeffrey and Ghemawat, Sanjay}, - title = {MapReduce: simplified data processing on large clusters}, + title = {{MapReduce}: simplified data processing on large clusters}, journal = {Commun. ACM}, volume = {51}, issue = {1}, @@ -1611,7 +1611,7 @@ pages = {107--113}, numpages = {7}, url = {http://doi.acm.org/10.1145/1327452.1327492}, - doi = {http://doi.acm.org/10.1145/1327452.1327492}, + doi = {10.1145/1327452.1327492}, acmid = {1327492}, publisher = {ACM}, address = {New York, NY, USA}, @@ -2736,7 +2736,7 @@ pages = {654--663}, address = {New York, NY, USA}, publisher = {ACM}, - doi = {http://doi.acm.org/10.1145/258533.258660}, + doi = {10.1145/258533.258660}, isbn = {0-89791-888-6}, location = {El Paso, Texas, United States} } @@ -3101,7 +3101,7 @@ {O}perating {S}ystems}, year = {1996}, comment = {pp. 84--92 New York, NY, USA ACM}, - doi = {http://doi.acm.org/10.1145/237090.237157}, + doi = {10.1145/237090.237157}, isbn = {0-89791-767-7}, location = {Cambridge, Massachusetts, United States} } @@ -3191,7 +3191,7 @@ pages = {144--154}, number = {2}, address = {Thousand Oaks, CA, USA}, - doi = {http://dx.doi.org/10.1177/1094342007077857}, + doi = {10.1177/1094342007077857}, issn = {1094-3420}, publisher = {Sage Publications, Inc.} } @@ -4333,7 +4333,7 @@ year = {1992}, volume = {26}, number = {3}, - doi = {http://doi.acm.org/10.1145/142413.996911}, + doi = {10.1145/142413.996911}, timestamp = {2007-10-08} } @@ -5523,7 +5523,7 @@ storage clusters}, booktitle = PDSW, year = {2007}, - doi = {http://doi.acm.org/10.1145/1374596.1374606}, + doi = {10.1145/1374596.1374606}, isbn = {978-1-59593-899-2}, timestamp = {2009.11.16} } @@ -5978,7 +5978,7 @@ pages = {16--20}, numpages = {5}, url = {http://doi.acm.org/10.1145/1713072.1713078}, - doi = {http://doi.acm.org/10.1145/1713072.1713078}, + doi = {10.1145/1713072.1713078}, acmid = {1713078}, publisher = {ACM}, address = {New York, NY, USA}, @@ -6281,19 +6281,19 @@ } isbn = 1904811817, publisher = {Packt Publishing} - at incollection {Sedna_2007, - author = {Wassermann, Bruno and Emmerich, Wolfgang and Butchart, Ben and Cameron, Nick and Chen, Liang and Patel, Jignesh}, - affiliation = {University College London Software Systems Engineering Group, Department of Computer Science Gower Street London WC1E 6BT UK}, - title = {Sedna: A BPEL-Based Environment for Visual Scientific Workflow Modeling}, - booktitle = {Workflows for e-Science}, - editor = {Taylor, Ian J. and Deelman, Ewa and Gannon, Dennis B. and Shields, Matthew}, - publisher = {Springer London}, - isbn = {978-1-84628-757-2}, - keyword = {Computer Science}, - pages = {428--449}, - url = {http://dx.doi.org/10.1007/978-1-84628-757-2\_26}, - note = {10.1007/978-1-84628-757-2\_26}, - year = {2007} + at incollection {Sedna_2007, + author = {Wassermann, Bruno and Emmerich, Wolfgang and Butchart, Ben and Cameron, Nick and Chen, Liang and Patel, Jignesh}, + affiliation = {University College London Software Systems Engineering Group, Department of Computer Science Gower Street London WC1E 6BT UK}, + title = {Sedna: A BPEL-Based Environment for Visual Scientific Workflow Modeling}, + booktitle = {Workflows for e-Science}, + editor = {Taylor, Ian J. and Deelman, Ewa and Gannon, Dennis B. and Shields, Matthew}, + publisher = {Springer London}, + isbn = {978-1-84628-757-2}, + keyword = {Computer Science}, + pages = {428--449}, + url = {http://dx.doi.org/10.1007/978-1-84628-757-2\_26}, + note = {10.1007/978-1-84628-757-2\_26}, + year = {2007} } @INPROCEEDINGS{SunConstellation_2008, Modified: text/parco10submission/paper.bib =================================================================== --- text/parco10submission/paper.bib 2011-01-10 15:53:38 UTC (rev 3930) +++ text/parco10submission/paper.bib 2011-01-10 16:00:24 UTC (rev 3931) @@ -128,7 +128,7 @@ year = "2008", note = "", issn = "1053-8119", - doi = "DOI: 10.1016/j.neuroimage.2007.09.021", + doi = "10.1016/j.neuroimage.2007.09.021", author = "Uri Hasson and Jeremy I. Skipper and Michael J. Wilde and Howard C. Nusbaum and Steven L. Small" } % url = "http://www.sciencedirect.com/science/article/B6WNP-4PPW72Y-1/2/ac536a08f82f82ad9ce940ac235d8a55", @@ -156,7 +156,7 @@ year = {2009}, issn = {0018-9162}, pages = {50--60}, - doi = {http://dx.doi.org/10.1109/MC.2009.365}, + doi = {10.1109/MC.2009.365}, publisher = {IEEE Computer Society Press}, address = {Los Alamitos, CA, USA}, } @@ -308,7 +308,7 @@ pages = {363--375}, numpages = {13}, url = {http://doi.acm.org/10.1145/1806596.1806638}, - doi = {http://doi.acm.org/10.1145/1806596.1806638}, + doi = {10.1145/1806596.1806638}, acmid = {1806638}, publisher = {ACM}, address = {New York, NY, USA}, @@ -327,7 +327,7 @@ pages = {857--869}, numpages = {13}, url = {http://dx.doi.org/10.1016/j.jpdc.2005.03.002}, - doi = {http://dx.doi.org/10.1016/j.jpdc.2005.03.002}, + doi = {10.1016/j.jpdc.2005.03.002}, acmid = {1088525}, publisher = {Academic Press, Inc.}, address = {Orlando, FL, USA}, @@ -346,21 +346,21 @@ articleno = {11}, numpages = {10}, url = {http://doi.acm.org/10.1145/1645164.1645175}, - doi = {http://doi.acm.org/10.1145/1645164.1645175}, + doi = {10.1145/1645164.1645175}, acmid = {1645175}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {coordination languages, data-flow processing, parallel processing}, } - at article{GXPmake, + at inproceedings{GXPmake, author = {Kenjiro Taura and Takuya Matsuzaki and Makoto Miwa and Yoshikazu Kamoshida and Daisaku Yokoyama and Nan Dun and Takeshi Shibata and Choi Sung Jun and Jun'ichi Tsujii}, -title = {Design and Implementation of GXP Make -- A Workflow System Based on Make}, -journal ={IEEE International Conference on eScience}, +title = {Design and Implementation of {GXP} Make -- A Workflow System Based on Make}, +booktitle ={Proceedings of IEEE International Conference on eScience}, isbn = {978-0-7695-4290-4}, year = {2010}, pages = {214--221}, -doi = {http://doi.ieeecomputersociety.org/10.1109/eScience.2010.43}, +doi = {10.1109/eScience.2010.43}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, } From noreply at svn.ci.uchicago.edu Mon Jan 10 12:07:47 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 12:07:47 -0600 (CST) Subject: [Swift-commit] r3933 - in text/parco10submission: . code Message-ID: <20110110180747.733FC9CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-10 12:07:47 -0600 (Mon, 10 Jan 2011) New Revision: 3933 Modified: text/parco10submission/code/glass.swift text/parco10submission/paper.tex Log: Added prio performance data. Minor correttions elsewhere. Modified: text/parco10submission/code/glass.swift =================================================================== --- text/parco10submission/code/glass.swift 2011-01-10 16:40:03 UTC (rev 3932) +++ text/parco10submission/code/glass.swift 2011-01-10 18:07:47 UTC (rev 3933) @@ -28,7 +28,7 @@ "--cradius" rad "--ccoord" centerstring arctimestring; } -CreateGlassSystem() +GlassRun() { string temp=@arg("temp","2.0"); string steps=@arg("steps","10"); @@ -77,4 +77,4 @@ } } -CreateGlassSystem(); \ No newline at end of file +GlassRun(); \ No newline at end of file Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-10 16:40:03 UTC (rev 3932) +++ text/parco10submission/paper.tex 2011-01-10 18:07:47 UTC (rev 3933) @@ -1317,14 +1317,14 @@ Lines 1-5 define the mapped file types; these files are used to compose input and output structures at lines 7-19. These structure reflect the fact that the simulation is restartable in 1-2 hour increments, and that it works together with the Swift script to create a simple but powerful mechanism for managing checkpoint/restart across a long-running large-scale simulation campaign. -The single application called by this script is the {\tt glassRun} program wrapped in the app function at lines 21-29. Note that rather than defining main program logic in ``open" (top-level) code, the script places all the program logic in the function {\tt CreateGlassSystem}, invoked by the single statement at line 80. This enables the simulation script to be defined in a library that can be imported into other Swift scripts to perform entire campaigns or campaign subsets. +The single application called by this script is the {\tt glassRun} program wrapped in the app function at lines 21-29. Note that rather than defining main program logic in ``open" (top-level) code, the script places all the program logic in the function {\tt GlassRun}, invoked by the single statement at line 80. This enables the simulation script to be defined in a library that can be imported into other Swift scripts to perform entire campaigns or campaign subsets. -The {\tt CreateGlassSystem} function starts by extracting a large set of science parameters from the Swift command line at lines 33-48 using the {\tt @arg()} function. It uses the built-in function {\tt readData} at lines 42-43 to read prepared lists of molecular radii and centroids from parameter files to define the primary physical dimensions of the simulation space. +The {\tt GlassRun} function starts by extracting a large set of science parameters from the Swift command line at lines 33-48 using the {\tt @arg()} function. It uses the built-in function {\tt readData} at lines 42-43 to read prepared lists of molecular radii and centroids from parameter files to define the primary physical dimensions of the simulation space. A selectable energy function to used by the simulation application is specified as a parameter at line 48. At lines 57 and 61, the script leverages Swift flexible dynamic arrays to create a 3D array for input and an 4D array of structures for outputs. These data structures, whose leaf elements consist entirely of mapped files, are set using the external mappers specified for the input array at lines 57-59 and for the output array of structures at 61-63. Note that many of the science parameters are passed to the mappers, which in turn are used by the input mapper to locate files within the large multi-level directory structure of the campaign, and by the output mapper to create new directory and file naming conventions for the campaign outputs. The mappers apply the common, useful practice of using scientific metadata to determine directory and file names. -The entire body of the {\tt CreateGlassSystem} is a four-level nesting of \verb|foreach| statements at lines 65-77. These loops perform a parallel parameter sweep over all combinations of radius, centroid, model, and job number within the simulation space. A single run of the script immediately expands to an independent parallel invocation of the simulation application for each point in the space - 1,670 jobs for the minimum case of a 7 x 27 x 10 x 1 space. Note that the {\tt if} statement at line 69 causes the simulation execution to be skipped if it has already been performed, as determine by a ``\verb|NULL|'' file name returned by the mapper for the output of a given job in the simulation space. In the current campaign the fourth dimension ({\tt nsub}) of the simulation space is fixed at one. This value could be increased to define sub-configurations that would perform better Monte Carlo averaging, with a multiplicative increase in the number of jobs. This is currently se t to one because there are ample starting configurations, but if this was not the case (as in earlier campaigns) the script could run repeated simulations with different random seeds. +The entire body of the {\tt GlassRun} is a four-level nesting of \verb|foreach| statements at lines 65-77. These loops perform a parallel parameter sweep over all combinations of radius, centroid, model, and job number within the simulation space. A single run of the script immediately expands to an independent parallel invocation of the simulation application for each point in the space - 1,670 jobs for the minimum case of a 7 x 27 x 10 x 1 space. Note that the {\tt if} statement at line 69 causes the simulation execution to be skipped if it has already been performed, as determine by a ``\verb|NULL|'' file name returned by the mapper for the output of a given job in the simulation space. In the current campaign the fourth dimension ({\tt nsub}) of the simulation space is fixed at one. This value could be increased to define sub-configurations that would perform better Monte Carlo averaging, with a multiplicative increase in the number of jobs. This is currently set to one because there are ample starting configurations, but if this was not the case (as in earlier campaigns) the script could run repeated simulations with different random seeds. The advantages of managing a simulation campaign in this manner are borne out well by Hocky's experience: the expression of the campaign is a well-structured high-level script, devoid of details about file naming, synchronization of parallel tasks, location and state of remote computing resources, or explicit explicit data transfer. Hocky was able to leverage local cluster resources on many occasions, but at any time could count on his script acquiring on the order of 1,000 compute cores from 6 to 18 sites of the Open Science Grid. When executing on the OSG, he leveraged Swift's capability to replicate jobs that were waiting in queues at more congested sites, and automatically send them to sites where resources were available and jobs were being processed at better rates. All of these capabilities would be a huge distraction from his primary scientific simulation campaign were he to use or script lower-level abstractions where parallelism and remote distribution were the man ual responsibility of the programmer. @@ -1415,6 +1415,53 @@ \end{center} \end{figure} + +\subsection{Prior performance measures} +\mikenote{Remove above caption} + +Published measurements of Swift performance provide evidence that its parallel distributed programming model can be implemented with sufficient scalability and efficiency to make it a practical tool for large-scale parallel application scripting. + +The performance of Swift, submitting jobs over the wide area network from UChicago to the TeraGrid Ranger cluster at TACC, as published in \cite{CNARI_2009}are shown in figure \ref{SEMplots}, which shows an SEM workload of 131,072 jobs for 4 brain regions and two experimental conditions. This workflow completed in approximately 3 hours. The logs from the {\tt swift\_plot\_log} utility show the high degree of concurrent overlap between job execution and input and output file staging to remote computing resources. +The workflows were developed on and submitted (to Ranger) from a single-core Linux workstation at UChicago running an Intel? Xeon? 3.20 GHz CPU. Data staging was performed using the Globus GridFTP protocol and job execution was performed over the Globus GRAM 2 protocol. +During the third hour of the workflow, Swift achieved very high utilization of the 2,048 allocated processor cores and a steady rate of input and output transfers. The first two hours of the run were more bursty, due to fluctuating grid conditions and data server loads. + + +\begin{figure} + \begin{center} + {\footnotesize + \begin{tabular}{p{14 cm}} + \includegraphics[scale=.4]{plots/SEM_left} + \end{tabular} + } + \caption{128K-job SEM fMRI application execution on the Ranger Constellation. Red=active compute jobs, blue=data stage in, green=stage out.} + \label{SEMplots} + \end{center} +\end{figure} + +Prior work also showed Swift's ability to achieve ample task rates for local and remote submission to high performance clusters\cite{PetascaleScripting_2009}. These prior results are shown in figure \ref{TaskPlots}. + +The left figure shows the PTMap application running the stage 1 processing of the E.coli K12 genome (4,127 sequences) on 2,048 Intrepid cores. The lower plot shows processor utilization as time progresses; Overall, the average per task execution time was 64 seconds, with a standard deviation of 14 seconds. These 4,127 tasks consumed a total of 73 CPU hours, in a span of 161 seconds on 2,048 processor cores, achieving 80 percent utilization. + +The right figure below shows performance of Swift running structural equation modeling problem at large scale using on the Ranger Constellation to model neural pathway connectivity from experimental fMRI data\cite{CNARI_2009}. The left figure shows the active jobs for a larger version of the problem type shown in figure \ref{SEMplots}. This shows an SEM script executing ~ 418,000 jobs. The red line represents job execution on Ranger; + +\begin{figure} + \begin{center} + {\footnotesize + \begin{tabular}{p{7cm}p{7cm}} + \includegraphics[scale=.3]{plots/PTMap_top} & + \includegraphics[scale=.3]{plots/SEM_top} \\ + \includegraphics[scale=.3]{plots/PTMap_bottom} & + \includegraphics[scale=.3]{plots/SEM_bottom} \\ + A. PTMap application on 2,048 nodes of the Blue Gene/P & + B. SEM application on varying-size processing allocations on Ranger\\ + \end{tabular} + } + \caption{Swift task rates for PTMap and SEM applications on the Blue Gene/P and Ranger} + \label{TaskPlots} + \end{center} +\end{figure} + + %% \begin{figure*}[htbp] %% \begin{center} %% \includegraphics[scale=\plotscale]{plots/sleep} @@ -1716,7 +1763,7 @@ Leadership Computing Facility, TeraGrid, the Open Science Grid, the UChicago / Argonne Computation Institute Petascale Active Data Store, and the Amazon Web Services Education allocation program. -The quantum glass example in the article is the work of Glen Hocky of the Reichman Lab of the Columbia University Department of Chemistry. We thank Glen for his contributions to the text and code of Sec. 4 and valuable feedback to the Swift project. We gratefully acknowledge the contributions of current and former Swift team members, collaborators, and users: Sarah Kenny, Allan Espinosa, Zhao Zhang, Luiz Gadelha, David Kelly, Milena Nokolic, Jon Monette, Aashish Adhikari, Marc Parisien, Michael Andric, Steven Small, John Dennis, Mats Rynge, Michael Kubal, Tibi Stef-Praun, Xu Du, Zhengxiong Hou, and Xi Li. The initial implementation of Swift was the work of Yong Zhao and Mihael Hategan; Karajan was designed and implemented by Hategan. We thank Tim Armstrong for helpful comments on the text. +The glass cavity simulation example in this article is the work of Glen Hocky of the Reichman Lab of the Columbia University Department of Chemistry. We thank Glen for his contributions to the text and code of Sec. 4 and valuable feedback to the Swift project. We gratefully acknowledge the contributions of current and former Swift team members, collaborators, and users: Sarah Kenny, Allan Espinosa, Zhao Zhang, Luiz Gadelha, David Kelly, Milena Nokolic, Jon Monette, Aashish Adhikari, Marc Parisien, Michael Andric, Steven Small, John Dennis, Mats Rynge, Michael Kubal, Tibi Stef-Praun, Xu Du, Zhengxiong Hou, and Xi Li. The initial implementation of Swift was the work of Yong Zhao and Mihael Hategan; Karajan was designed and implemented by Hategan. We thank Tim Armstrong for helpful comments on the text. %% \section{TODO} From noreply at svn.ci.uchicago.edu Mon Jan 10 12:24:39 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 12:24:39 -0600 (CST) Subject: [Swift-commit] r3934 - text/parco10submission Message-ID: <20110110182439.4C2359CC7F@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-10 12:24:39 -0600 (Mon, 10 Jan 2011) New Revision: 3934 Modified: text/parco10submission/ResponseToReviews.txt Log: Made final comments in the ResponseToReviews file. Modified: text/parco10submission/ResponseToReviews.txt =================================================================== --- text/parco10submission/ResponseToReviews.txt 2011-01-10 18:07:47 UTC (rev 3933) +++ text/parco10submission/ResponseToReviews.txt 2011-01-10 18:24:39 UTC (rev 3934) @@ -60,25 +60,51 @@ >>> Response: -We have added a new section, "5. Performance Characteristics" in response to this point. +We have added a new section, "5. Performance Characteristics" in +response to this point. Additional tests are being developed and run, +so these results may be further refined before publication. Some +results from prior publications have been cited and included here, +which show the overlap of data transfer and processing to address the +issue above. <<< Here are some more detailed comments: -1. Swift uses restart log to reuse the results of successfully completed components. The paper mentioned "appropriate manual intervention". This seems to be something you can almost completely automate. Based on my experiences with large-scale and long running applications, this can be very useful. +1. Swift uses restart log to reuse the results of successfully +completed components. The paper mentioned "appropriate manual +intervention". This seems to be something you can almost completely +automate. Based on my experiences with large-scale and long running +applications, this can be very useful. >>> Automation of restart +The "manual intervention" referred to the correction of whatever +caused the script to fail. For example, a missing data file. Since the +Swift restart mechanism *is* fully automated, this phrase was removed. + <<< -2. Swift reduces job submission cost using clustering. It is not clear to me if a subgraph can be batched together by your clustering technique. This obviously requires a little bit of analysis of the data-flow graph to do it properly. But it could be quite useful to achieve better data locality. +2. Swift reduces job submission cost using clustering. It is not +clear to me if a subgraph can be batched together by your clustering +technique. This obviously requires a little bit of analysis of the +data-flow graph to do it properly. But it could be quite useful to +achieve better data locality. >>> Clustering +This section has been clarified. Swift will group tasks together based +on their expected time duration and their readiness to run. So a +cluster batch could include tasks from multiple sub-graphs. But its +based on + <<< -3. In terms of programming models, modern systems such as Microsoft's DryadLINQ and Google's FlumeJava successfully integrate data-flow constructs into state of the art programming languages (C# and Java). This integration approach is quite nice and powerful. It would be nice if the authors can compare Swift with these two systems. +3. In terms of programming models, modern systems such as Microsoft's +DryadLINQ and Google's FlumeJava successfully integrate data-flow +constructs into state of the art programming languages (C# and Java). +This integration approach is quite nice and powerful. It would be nice +if the authors can compare Swift with these two systems. >>> Response: @@ -104,12 +130,18 @@ >>> auto-parallelization +This is now discussed in much more detail, in section 2. + <<< Out of the four application examples presented, two of the cases (4.3 and 4.4) do not contain enough details to support the discussion; deleting the two examples should not affect the clarity of the paper. >>> Examples: clarify or delete +We have completely revised the application example section (4). It +now shows only two app examples, but does so in much more precise +detail, to provide a better understanding of what using Swift entails. + <<< It would be helpful to elaborate more in example 4.2 on how each task/job gets scheduled onto Ranger nodes or how Swift interacts with the local batch job scheduler, which would in turn help audience understand better how SwiftScript could be used for a certain class of applications on a more tightly coupled massive computing environment (such as @@ -117,6 +149,9 @@ >>> 4.2 - task scheduling on Ranger +As Sec 4 is revised considerably, this information has been included +in Secs. 2 & 3. + <<< There are a few minor typos in the manuscript. @@ -152,6 +187,8 @@ >>> examples 4.3 and 4.4: annotate or delete +Addressed and revised completely, as mentioned above. + <<< Further comments: @@ -252,14 +289,18 @@ 10. Section 4.2, what is an "emblem experiment"? ->>> <<< +>>> +This has been removed in the revision of Sec 4. + +<<< + 11. The margins in sections 4.2 and 4.4 need fixing as some lines run completely off the page. >>> -fixed in 4.2 - not sure about the state of 4.4 yet +This has been addressed in the revision of Sec 4. <<< @@ -331,44 +372,3 @@ This has been fixed. <<< - -==== Other improvement notes - -mention futures in parco paper, show them visually to show fine grain - -mention habanero (c and java) and other fresh stack languages (x10) -compare to GEL - from SIngapore - -mention csp bsp sim and diff to mpi (IPO) - -Why a new model? - -examine determinism - -examine language vs library - -examine how it builds on karajan - - ---- - -Innov: fine grained parallelism; no need for flow analysis; -sep of concerns: how throttling and site mgmt are isolated - -How we can manage data locality - -How restart is more transparent than it sounds here - -Fine: how work takes off before a proc returns - -Add table of critical benchmarks on multi sys types - -How complex flows are easily composed - -How types and mappers encapsulate complexity - -2.3 order of exec: show more complex patterns here or later - -Second 2 is the part that reads like a LRM; make it more interesting - -2.5 don't say marker types From noreply at svn.ci.uchicago.edu Mon Jan 10 12:44:30 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 12:44:30 -0600 (CST) Subject: [Swift-commit] r3935 - text/parco10submission Message-ID: <20110110184430.818D79CC9B@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-10 12:44:30 -0600 (Mon, 10 Jan 2011) New Revision: 3935 Modified: text/parco10submission/paper.tex Log: adding some comments and making some changes in Perf. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-10 18:24:39 UTC (rev 3934) +++ text/parco10submission/paper.tex 2011-01-10 18:44:30 UTC (rev 3935) @@ -1343,32 +1343,35 @@ \label{Performance} We present here a few additional measurements to supplement -those previously published. +those previously published. \katznote{need to site something here, maybe \cite{Swift_2007}?} -First, we measure the ability of Swift to support many user tasks on a -single system image. In Test A, we used Swift to submit up to 2,000 +First, we measured the ability of Swift to support many user tasks on a +single system image. We used Swift to submit up to 2,000 tasks to Thwomp, a 16-core x86-based Linux compute server at Argonne National Laboratory. Each job in the batch was an identical, simple single-processor job that executed for the given duration and performed application input and output at 1 byte each. The total execution time was measured. This was compared to the total amount of -core-time consumed to report a utilization ratio, which is plotted. +core-time consumed to report a utilization ratio, which is plotted in Figure~\ref{fig:swift-performance}, case A. +\katznote{what knowledge should I gain from the figure? is the data good or bad? why?} Second, we measure the ability of Swift to support many tasks on a large, distributed memory system without considering the effect on the -underlying file services. In Test B, we used Swift/Coasters to submit +underlying file services. We used Swift/Coasters to submit up to 20,480 tasks to Intrepid, the 40,000-node IBM BlueGene/P system -at Argonne National Laboratory. Each job in the batch was an +at Argonne. Each job in the batch was an identical, simple single-processor job that executed for the given duration and performed no I/O. Each node was limited to one concurrent job, thus, the user task had 4 cores at its disposal. The total execution time was measured. This was compared to the total amount of node-time consumed to report a utilization ratio, which is -plotted. +plotted in Figure~\ref{fig:swift-performance}, case B. +\katznote{what knowledge should I gain from the figure? is the data good or bad? why?} + Third, we measure the ability of Swift to support many tasks on a large, distributed memory system including application use of the -underlying GPFS filesystem. In Test C, we used Swift/Coasters to +underlying GPFS filesystem. We used Swift/Coasters to submit up to 10,240 tasks to Intrepid. Each job in the batch was an identical, simple single-processor job that executed for 30 seconds and performed the given amount of input and output. Coasters provider @@ -1378,7 +1381,8 @@ the user task had 4 cores at its disposal. The total execution time was measured. This was compared to the total amount of time consumed by an equivalent shell script-based application to report an -efficiency ratio, which is plotted. +efficiency ratio, which is plotted in Figure~\ref{fig:swift-performance}, case C. +\katznote{what knowledge should I gain from the figure? is the data good or bad? why?} The Test C shell script was provided with all job specifications in advance and did not require communication from between the worker @@ -1411,7 +1415,7 @@ & \\ \end{tabular} } - \caption{Swift performance figures.} + \caption{Swift performance figures.\label{fig:swift-performance}} \end{center} \end{figure} @@ -1419,9 +1423,10 @@ \subsection{Prior performance measures} \mikenote{Remove above caption} -Published measurements of Swift performance provide evidence that its parallel distributed programming model can be implemented with sufficient scalability and efficiency to make it a practical tool for large-scale parallel application scripting. +Published measurements of Swift performance +provide evidence that its parallel distributed programming model can be implemented with sufficient scalability and efficiency to make it a practical tool for large-scale parallel application scripting. -The performance of Swift, submitting jobs over the wide area network from UChicago to the TeraGrid Ranger cluster at TACC, as published in \cite{CNARI_2009}are shown in figure \ref{SEMplots}, which shows an SEM workload of 131,072 jobs for 4 brain regions and two experimental conditions. This workflow completed in approximately 3 hours. The logs from the {\tt swift\_plot\_log} utility show the high degree of concurrent overlap between job execution and input and output file staging to remote computing resources. +The performance of Swift submitting jobs over the wide area network from UChicago to the TeraGrid Ranger cluster at TACC are shown in Figure~\ref{SEMplots} (from \cite{CNARI_2009}), which shows an SEM workload of 131,072 jobs for 4 brain regions and two experimental conditions. This workflow completed in approximately 3 hours. The logs from the {\tt swift\_plot\_log} utility show the high degree of concurrent overlap between job execution and input and output file staging to remote computing resources. The workflows were developed on and submitted (to Ranger) from a single-core Linux workstation at UChicago running an Intel? Xeon? 3.20 GHz CPU. Data staging was performed using the Globus GridFTP protocol and job execution was performed over the Globus GRAM 2 protocol. During the third hour of the workflow, Swift achieved very high utilization of the 2,048 allocated processor cores and a steady rate of input and output transfers. The first two hours of the run were more bursty, due to fluctuating grid conditions and data server loads. @@ -1438,11 +1443,11 @@ \end{center} \end{figure} -Prior work also showed Swift's ability to achieve ample task rates for local and remote submission to high performance clusters\cite{PetascaleScripting_2009}. These prior results are shown in figure \ref{TaskPlots}. +Prior work also showed Swift's ability to achieve ample task rates for local and remote submission to high performance clusters. These prior results are shown in Figure~\ref{TaskPlots} (from~\cite{PetascaleScripting_2009}). -The left figure shows the PTMap application running the stage 1 processing of the E.coli K12 genome (4,127 sequences) on 2,048 Intrepid cores. The lower plot shows processor utilization as time progresses; Overall, the average per task execution time was 64 seconds, with a standard deviation of 14 seconds. These 4,127 tasks consumed a total of 73 CPU hours, in a span of 161 seconds on 2,048 processor cores, achieving 80 percent utilization. +Figure~\ref{TaskPlots} left shows the PTMap application running the stage 1 processing of the E.coli K12 genome (4,127 sequences) on 2,048 Intrepid cores. The lower plot shows processor utilization as time progresses; Overall, the average per task execution time was 64 seconds, with a standard deviation of 14 seconds. These 4,127 tasks consumed a total of 73 CPU hours, in a span of 161 seconds on 2,048 processor cores, achieving 80 percent utilization. -The right figure below shows performance of Swift running structural equation modeling problem at large scale using on the Ranger Constellation to model neural pathway connectivity from experimental fMRI data\cite{CNARI_2009}. The left figure shows the active jobs for a larger version of the problem type shown in figure \ref{SEMplots}. This shows an SEM script executing ~ 418,000 jobs. The red line represents job execution on Ranger; +Figure~\ref{TaskPlots} right shows performance of Swift running structural equation modeling problem at large scale using on the Ranger Constellation to model neural pathway connectivity from experimental fMRI data\cite{CNARI_2009}. The left \katznote{lower?} figure shows the active jobs for a larger version of the problem type shown in Figure~\ref{SEMplots}. This shows an SEM script executing ~ 418,000 jobs. The red line represents job execution on Ranger; \begin{figure} \begin{center} From noreply at svn.ci.uchicago.edu Mon Jan 10 12:56:34 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 12:56:34 -0600 (CST) Subject: [Swift-commit] r3936 - text/parco10submission/plots Message-ID: <20110110185634.AC46D9CC9B@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-10 12:56:34 -0600 (Mon, 10 Jan 2011) New Revision: 3936 Added: text/parco10submission/plots/PTMap_bottom.pdf text/parco10submission/plots/PTMap_top.pdf text/parco10submission/plots/SEM_bottom.pdf text/parco10submission/plots/SEM_top.pdf Log: Add perf graphics. Added: text/parco10submission/plots/PTMap_bottom.pdf =================================================================== (Binary files differ) Property changes on: text/parco10submission/plots/PTMap_bottom.pdf ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: text/parco10submission/plots/PTMap_top.pdf =================================================================== (Binary files differ) Property changes on: text/parco10submission/plots/PTMap_top.pdf ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: text/parco10submission/plots/SEM_bottom.pdf =================================================================== (Binary files differ) Property changes on: text/parco10submission/plots/SEM_bottom.pdf ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: text/parco10submission/plots/SEM_top.pdf =================================================================== (Binary files differ) Property changes on: text/parco10submission/plots/SEM_top.pdf ___________________________________________________________________ Name: svn:mime-type + application/octet-stream From noreply at svn.ci.uchicago.edu Mon Jan 10 12:57:54 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 12:57:54 -0600 (CST) Subject: [Swift-commit] r3937 - trunk Message-ID: <20110110185754.849489CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-10 12:57:54 -0600 (Mon, 10 Jan 2011) New Revision: 3937 Modified: trunk/.classpath Log: Don't rely on provider-wonky Modified: trunk/.classpath =================================================================== --- trunk/.classpath 2011-01-10 18:56:34 UTC (rev 3936) +++ trunk/.classpath 2011-01-10 18:57:54 UTC (rev 3937) @@ -18,7 +18,6 @@ - From noreply at svn.ci.uchicago.edu Mon Jan 10 13:02:45 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 13:02:45 -0600 (CST) Subject: [Swift-commit] r3938 - trunk/docs Message-ID: <20110110190245.E16289CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-10 13:02:45 -0600 (Mon, 10 Jan 2011) New Revision: 3938 Modified: trunk/docs/ Log: Set ignores Property changes on: trunk/docs ___________________________________________________________________ Name: svn:ignore + docbook userguide *.php index.html From noreply at svn.ci.uchicago.edu Mon Jan 10 13:04:22 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 13:04:22 -0600 (CST) Subject: [Swift-commit] r3939 - trunk/docs/formatting Message-ID: <20110110190422.E0B099CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-10 13:04:22 -0600 (Mon, 10 Jan 2011) New Revision: 3939 Modified: trunk/docs/formatting/ Log: Set ignore Property changes on: trunk/docs/formatting ___________________________________________________________________ Name: svn:ignore + docbook From noreply at svn.ci.uchicago.edu Mon Jan 10 13:04:46 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 13:04:46 -0600 (CST) Subject: [Swift-commit] r3940 - text/parco10submission Message-ID: <20110110190446.12EB29CC9B@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-10 13:04:45 -0600 (Mon, 10 Jan 2011) New Revision: 3940 Modified: text/parco10submission/paper.tex Log: 2 small changes Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-10 19:04:22 UTC (rev 3939) +++ text/parco10submission/paper.tex 2011-01-10 19:04:45 UTC (rev 3940) @@ -1355,7 +1355,7 @@ core-time consumed to report a utilization ratio, which is plotted in Figure~\ref{fig:swift-performance}, case A. \katznote{what knowledge should I gain from the figure? is the data good or bad? why?} -Second, we measure the ability of Swift to support many tasks on a +Second, we measured the ability of Swift to support many tasks on a large, distributed memory system without considering the effect on the underlying file services. We used Swift/Coasters to submit up to 20,480 tasks to Intrepid, the 40,000-node IBM BlueGene/P system @@ -1369,7 +1369,7 @@ \katznote{what knowledge should I gain from the figure? is the data good or bad? why?} -Third, we measure the ability of Swift to support many tasks on a +Third, we measured the ability of Swift to support many tasks on a large, distributed memory system including application use of the underlying GPFS filesystem. We used Swift/Coasters to submit up to 10,240 tasks to Intrepid. Each job in the batch was an From noreply at svn.ci.uchicago.edu Mon Jan 10 13:05:06 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 13:05:06 -0600 (CST) Subject: [Swift-commit] r3941 - trunk/docs/historical Message-ID: <20110110190506.5D86D9CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-10 13:05:06 -0600 (Mon, 10 Jan 2011) New Revision: 3941 Modified: trunk/docs/historical/ Log: Set ignores Property changes on: trunk/docs/historical ___________________________________________________________________ Name: svn:ignore + *.php From noreply at svn.ci.uchicago.edu Mon Jan 10 13:19:04 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 13:19:04 -0600 (CST) Subject: [Swift-commit] r3942 - text/parco10submission/plots Message-ID: <20110110191904.BB6619CC9B@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-10 13:19:04 -0600 (Mon, 10 Jan 2011) New Revision: 3942 Added: text/parco10submission/plots/SEM_IO.pdf text/parco10submission/plots/SEM_IO.png Log: Added SEM perf image. Added: text/parco10submission/plots/SEM_IO.pdf =================================================================== (Binary files differ) Property changes on: text/parco10submission/plots/SEM_IO.pdf ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: text/parco10submission/plots/SEM_IO.png =================================================================== (Binary files differ) Property changes on: text/parco10submission/plots/SEM_IO.png ___________________________________________________________________ Name: svn:mime-type + application/octet-stream From noreply at svn.ci.uchicago.edu Mon Jan 10 13:20:00 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 13:20:00 -0600 (CST) Subject: [Swift-commit] r3943 - text/parco10submission Message-ID: <20110110192000.416389CC9B@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-10 13:20:00 -0600 (Mon, 10 Jan 2011) New Revision: 3943 Modified: text/parco10submission/paper.tex Log: Update for missing SEM image. Need to see if we lost text in conflct resolutions. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-10 19:19:04 UTC (rev 3942) +++ text/parco10submission/paper.tex 2011-01-10 19:20:00 UTC (rev 3943) @@ -1342,18 +1342,17 @@ \section{Performance Characteristics} \label{Performance} -We present here a few additional measurements to supplement -those previously published. \katznote{need to site something here, maybe \cite{Swift_2007}?} +We present here a few additional measurements of Swift performance, and highlight and include a few previously published results. -First, we measured the ability of Swift to support many user tasks on a -single system image. We used Swift to submit up to 2,000 -tasks to Thwomp, a 16-core x86-based Linux compute server at Argonne +\subsection{Synthetic benchmark results} +First, we measure the ability of Swift to support many user tasks on a +single local system. In Test A, we used Swift to submit up to 2,000 +tasks to a 16-core x86-based Linux compute server at Argonne National Laboratory. Each job in the batch was an identical, simple single-processor job that executed for the given duration and performed application input and output at 1 byte each. The total execution time was measured. This was compared to the total amount of -core-time consumed to report a utilization ratio, which is plotted in Figure~\ref{fig:swift-performance}, case A. -\katznote{what knowledge should I gain from the figure? is the data good or bad? why?} +core-time consumed to report a utilization ratio, which is plotted. We observe that for tasks of only 5 seconds in duration, Swift can sustain 100 concurrent application executions at a CPU utilization of 90\%, and 200 concurrent executions at a utilization of 85\%. Second, we measured the ability of Swift to support many tasks on a large, distributed memory system without considering the effect on the @@ -1365,8 +1364,9 @@ concurrent job, thus, the user task had 4 cores at its disposal. The total execution time was measured. This was compared to the total amount of node-time consumed to report a utilization ratio, which is -plotted in Figure~\ref{fig:swift-performance}, case B. -\katznote{what knowledge should I gain from the figure? is the data good or bad? why?} +plotted. We observe that for tasks of 100 second duration, Swift achieves +a 95\% CPU utilization of 2,048 compute nodes. Even for 30 second tasks, +it can sustain an 80\% utilization at this level of concurrency. Third, we measured the ability of Swift to support many tasks on a @@ -1383,9 +1383,10 @@ by an equivalent shell script-based application to report an efficiency ratio, which is plotted in Figure~\ref{fig:swift-performance}, case C. \katznote{what knowledge should I gain from the figure? is the data good or bad? why?} +\emph{Note: this test will be refined with adequate data points before publication.} The Test C shell script was provided with all job specifications in -advance and did not require communication from between the worker +advance and did not require communication between the worker nodes and the Swift/Coasters runtime. Thus, this test measures the overhead involved in the dynamic job creation and scheduling functionality offered by Swift. @@ -1420,11 +1421,9 @@ \end{figure} -\subsection{Prior performance measures} -\mikenote{Remove above caption} +\subsection{Application performance measurements} -Published measurements of Swift performance -provide evidence that its parallel distributed programming model can be implemented with sufficient scalability and efficiency to make it a practical tool for large-scale parallel application scripting. +Previously published measurements of Swift performance performance on several scientific applications provide evidence that its parallel distributed programming model can be implemented with sufficient scalability and efficiency to make it a practical tool for large-scale parallel application scripting. The performance of Swift submitting jobs over the wide area network from UChicago to the TeraGrid Ranger cluster at TACC are shown in Figure~\ref{SEMplots} (from \cite{CNARI_2009}), which shows an SEM workload of 131,072 jobs for 4 brain regions and two experimental conditions. This workflow completed in approximately 3 hours. The logs from the {\tt swift\_plot\_log} utility show the high degree of concurrent overlap between job execution and input and output file staging to remote computing resources. The workflows were developed on and submitted (to Ranger) from a single-core Linux workstation at UChicago running an Intel? Xeon? 3.20 GHz CPU. Data staging was performed using the Globus GridFTP protocol and job execution was performed over the Globus GRAM 2 protocol. @@ -1435,19 +1434,19 @@ \begin{center} {\footnotesize \begin{tabular}{p{14 cm}} - \includegraphics[scale=.4]{plots/SEM_left} + \includegraphics[scale=.4]{plots/SEM_IO} \end{tabular} } - \caption{128K-job SEM fMRI application execution on the Ranger Constellation. Red=active compute jobs, blue=data stage in, green=stage out.} + \caption{128K-job SEM fMRI application execution on the Ranger Constellation (From publication \cite{CNARI_2009}). Red=active compute jobs, blue=data stage in, green=stage out. } \label{SEMplots} \end{center} \end{figure} Prior work also showed Swift's ability to achieve ample task rates for local and remote submission to high performance clusters. These prior results are shown in Figure~\ref{TaskPlots} (from~\cite{PetascaleScripting_2009}). -Figure~\ref{TaskPlots} left shows the PTMap application running the stage 1 processing of the E.coli K12 genome (4,127 sequences) on 2,048 Intrepid cores. The lower plot shows processor utilization as time progresses; Overall, the average per task execution time was 64 seconds, with a standard deviation of 14 seconds. These 4,127 tasks consumed a total of 73 CPU hours, in a span of 161 seconds on 2,048 processor cores, achieving 80 percent utilization. +The left plot in figure \ref{TaskPlots} shows the PTMap application running the stage 1 processing of the E.coli K12 genome (4,127 sequences) on 2,048 Intrepid cores. The lower plot shows processor utilization as time progresses; Overall, the average per task execution time was 64 seconds, with a standard deviation of 14 seconds. These 4,127 tasks consumed a total of 73 CPU hours, in a span of 161 seconds on 2,048 processor cores, achieving 80 percent utilization. -Figure~\ref{TaskPlots} right shows performance of Swift running structural equation modeling problem at large scale using on the Ranger Constellation to model neural pathway connectivity from experimental fMRI data\cite{CNARI_2009}. The left \katznote{lower?} figure shows the active jobs for a larger version of the problem type shown in Figure~\ref{SEMplots}. This shows an SEM script executing ~ 418,000 jobs. The red line represents job execution on Ranger; +The right plot in figure \ref{TaskPlots} shows performance of Swift running structural equation modeling problem at large scale using on the Ranger Constellation to model neural pathway connectivity from experimental fMRI data\cite{CNARI_2009}. The left figure shows the active jobs for a larger version of the problem type shown in figure \ref{SEMplots}. This shows a Swift script executing 418,000 structural equation modeling jobs over a 40 hour period. \begin{figure} \begin{center} @@ -1461,7 +1460,7 @@ B. SEM application on varying-size processing allocations on Ranger\\ \end{tabular} } - \caption{Swift task rates for PTMap and SEM applications on the Blue Gene/P and Ranger} + \caption{Swift task rates for PTMap and SEM applications on the Blue Gene/P and Ranger. (From reference \cite{PetascaleScripting_2009} ) } \label{TaskPlots} \end{center} \end{figure} From noreply at svn.ci.uchicago.edu Mon Jan 10 13:31:51 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 13:31:51 -0600 (CST) Subject: [Swift-commit] r3944 - text/parco10submission Message-ID: <20110110193151.634E39CC9B@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-10 13:31:51 -0600 (Mon, 10 Jan 2011) New Revision: 3944 Modified: text/parco10submission/paper.tex Log: updated Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-10 19:20:00 UTC (rev 3943) +++ text/parco10submission/paper.tex 2011-01-10 19:31:51 UTC (rev 3944) @@ -1345,14 +1345,14 @@ We present here a few additional measurements of Swift performance, and highlight and include a few previously published results. \subsection{Synthetic benchmark results} -First, we measure the ability of Swift to support many user tasks on a -single local system. In Test A, we used Swift to submit up to 2,000 +First, we measured the ability of Swift to support many user tasks on a +single local system. We used Swift to submit up to 2,000 tasks to a 16-core x86-based Linux compute server at Argonne National Laboratory. Each job in the batch was an identical, simple single-processor job that executed for the given duration and performed application input and output at 1 byte each. The total execution time was measured. This was compared to the total amount of -core-time consumed to report a utilization ratio, which is plotted. We observe that for tasks of only 5 seconds in duration, Swift can sustain 100 concurrent application executions at a CPU utilization of 90\%, and 200 concurrent executions at a utilization of 85\%. +core-time consumed to report a utilization ratio, which is plotted in Figure~\ref{fig:swift-performance}, case A. We observe that for tasks of only 5 seconds in duration, Swift can sustain 100 concurrent application executions at a CPU utilization of 90\%, and 200 concurrent executions at a utilization of 85\%. Second, we measured the ability of Swift to support many tasks on a large, distributed memory system without considering the effect on the @@ -1364,33 +1364,33 @@ concurrent job, thus, the user task had 4 cores at its disposal. The total execution time was measured. This was compared to the total amount of node-time consumed to report a utilization ratio, which is -plotted. We observe that for tasks of 100 second duration, Swift achieves +plotted in Figure~\ref{fig:swift-performance}, case B. +We observe that for tasks of 100 second duration, Swift achieves a 95\% CPU utilization of 2,048 compute nodes. Even for 30 second tasks, it can sustain an 80\% utilization at this level of concurrency. -Third, we measured the ability of Swift to support many tasks on a -large, distributed memory system including application use of the -underlying GPFS filesystem. We used Swift/Coasters to -submit up to 10,240 tasks to Intrepid. Each job in the batch was an -identical, simple single-processor job that executed for 30 seconds -and performed the given amount of input and output. Coasters provider -staging was used to distribute application data to workers, except in -the case marked ``direct'', in which case the I/O was performed -directly to GPFS. Each node was limited to one concurrent job, thus, -the user task had 4 cores at its disposal. The total execution time -was measured. This was compared to the total amount of time consumed -by an equivalent shell script-based application to report an -efficiency ratio, which is plotted in Figure~\ref{fig:swift-performance}, case C. -\katznote{what knowledge should I gain from the figure? is the data good or bad? why?} -\emph{Note: this test will be refined with adequate data points before publication.} +%Third, we measured the ability of Swift to support many tasks on a +%large, distributed memory system including application use of the +%underlying GPFS filesystem. We used Swift/Coasters to +%submit up to 10,240 tasks to Intrepid. Each job in the batch was an +%identical, simple single-processor job that executed for 30 seconds +%and performed the given amount of input and output. Coasters provider +%staging was used to distribute application data to workers, except in +%the case marked ``direct'', in which case the I/O was performed +%directly to GPFS. Each node was limited to one concurrent job, thus, +%the user task had 4 cores at its disposal. The total execution time +%was measured. This was compared to the total amount of time consumed +%by an equivalent shell script-based application to report an +%efficiency ratio, which is plotted in Figure~\ref{fig:swift-performance}, case C. +%\emph{Note: this test will be refined with adequate data points before publication.} +% +%The Test C shell script was provided with all job specifications in +%advance and did not require communication between the worker +%nodes and the Swift/Coasters runtime. Thus, this test measures the +%overhead involved in the dynamic job creation and scheduling +%functionality offered by Swift. -The Test C shell script was provided with all job specifications in -advance and did not require communication between the worker -nodes and the Swift/Coasters runtime. Thus, this test measures the -overhead involved in the dynamic job creation and scheduling -functionality offered by Swift. - \newcommand{\plotscale}{0.60} \begin{figure} @@ -1407,13 +1407,13 @@ Application CPU utilization for 3 task durations (in seconds) at up to 2,048 nodes of the Blue Gene/P. at varying system size. \\ - \includegraphics[scale=\plotscale]{plots/dds} - & \\ - Test C. - Efficiency for a fixed number of tasks with varying data sizes. - Input and out data was one file in each direction of the size - indicated. - & \\ +% \includegraphics[scale=\plotscale]{plots/dds} +% & \\ +% Test C. +% Efficiency for a fixed number of tasks with varying data sizes. +% Input and out data was one file in each direction of the size +% indicated. +% & \\ \end{tabular} } \caption{Swift performance figures.\label{fig:swift-performance}} @@ -1437,16 +1437,16 @@ \includegraphics[scale=.4]{plots/SEM_IO} \end{tabular} } - \caption{128K-job SEM fMRI application execution on the Ranger Constellation (From publication \cite{CNARI_2009}). Red=active compute jobs, blue=data stage in, green=stage out. } + \caption{128K-job SEM fMRI application execution on the Ranger Constellation (From \cite{CNARI_2009}). Red=active compute jobs, blue=data stage in, green=stage out. } \label{SEMplots} \end{center} \end{figure} -Prior work also showed Swift's ability to achieve ample task rates for local and remote submission to high performance clusters. These prior results are shown in Figure~\ref{TaskPlots} (from~\cite{PetascaleScripting_2009}). +Prior work also showed Swift's ability to achieve ample task rates for local and remote submission to high performance clusters. These prior results are shown in Figure~\ref{TaskPlots} (from \cite{PetascaleScripting_2009}). -The left plot in figure \ref{TaskPlots} shows the PTMap application running the stage 1 processing of the E.coli K12 genome (4,127 sequences) on 2,048 Intrepid cores. The lower plot shows processor utilization as time progresses; Overall, the average per task execution time was 64 seconds, with a standard deviation of 14 seconds. These 4,127 tasks consumed a total of 73 CPU hours, in a span of 161 seconds on 2,048 processor cores, achieving 80 percent utilization. +The top plot in Figure~\ref{TaskPlots}-A shows the PTMap application running the stage 1 processing of the E.coli K12 genome (4,127 sequences) on 2,048 Intrepid cores. The lower plot shows processor utilization as time progresses; overall, the average per task execution time was 64 seconds, with a standard deviation of 14 seconds. These 4,127 tasks consumed a total of 73 CPU hours, in a span of 161 seconds on 2,048 processor cores, achieving 80 percent utilization. -The right plot in figure \ref{TaskPlots} shows performance of Swift running structural equation modeling problem at large scale using on the Ranger Constellation to model neural pathway connectivity from experimental fMRI data\cite{CNARI_2009}. The left figure shows the active jobs for a larger version of the problem type shown in figure \ref{SEMplots}. This shows a Swift script executing 418,000 structural equation modeling jobs over a 40 hour period. +The top plot in Figure~\ref{TaskPlots}-B shows performance of Swift running structural equation modeling problem at large scale using on the Ranger Constellation to model neural pathway connectivity from experimental fMRI data~\cite{CNARI_2009}. The lower plot shows the active jobs for a larger version of the problem type shown in Figure~\ref{SEMplots}. This shows a Swift script executing 418,000 structural equation modeling jobs over a 40 hour period. \begin{figure} \begin{center} @@ -1460,7 +1460,7 @@ B. SEM application on varying-size processing allocations on Ranger\\ \end{tabular} } - \caption{Swift task rates for PTMap and SEM applications on the Blue Gene/P and Ranger. (From reference \cite{PetascaleScripting_2009} ) } + \caption{Swift task rates for PTMap and SEM applications on the Blue Gene/P and Ranger. (From \cite{PetascaleScripting_2009}) } \label{TaskPlots} \end{center} \end{figure} From noreply at svn.ci.uchicago.edu Mon Jan 10 13:35:26 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 13:35:26 -0600 (CST) Subject: [Swift-commit] r3945 - text/parco10submission Message-ID: <20110110193526.162F89CC9B@svn.ci.uchicago.edu> Author: dsk Date: 2011-01-10 13:35:25 -0600 (Mon, 10 Jan 2011) New Revision: 3945 Modified: text/parco10submission/paper.tex Log: adding hyphen Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-10 19:31:51 UTC (rev 3944) +++ text/parco10submission/paper.tex 2011-01-10 19:35:25 UTC (rev 3945) @@ -1366,7 +1366,7 @@ amount of node-time consumed to report a utilization ratio, which is plotted in Figure~\ref{fig:swift-performance}, case B. We observe that for tasks of 100 second duration, Swift achieves -a 95\% CPU utilization of 2,048 compute nodes. Even for 30 second tasks, +a 95\% CPU utilization of 2,048 compute nodes. Even for 30-second tasks, it can sustain an 80\% utilization at this level of concurrency. @@ -1426,7 +1426,7 @@ Previously published measurements of Swift performance performance on several scientific applications provide evidence that its parallel distributed programming model can be implemented with sufficient scalability and efficiency to make it a practical tool for large-scale parallel application scripting. The performance of Swift submitting jobs over the wide area network from UChicago to the TeraGrid Ranger cluster at TACC are shown in Figure~\ref{SEMplots} (from \cite{CNARI_2009}), which shows an SEM workload of 131,072 jobs for 4 brain regions and two experimental conditions. This workflow completed in approximately 3 hours. The logs from the {\tt swift\_plot\_log} utility show the high degree of concurrent overlap between job execution and input and output file staging to remote computing resources. -The workflows were developed on and submitted (to Ranger) from a single-core Linux workstation at UChicago running an Intel? Xeon? 3.20 GHz CPU. Data staging was performed using the Globus GridFTP protocol and job execution was performed over the Globus GRAM 2 protocol. +The workflows were developed on and submitted (to Ranger) from a single-core Linux workstation at UChicago running an Intel Xeon 3.20-GHz CPU. Data staging was performed using the Globus GridFTP protocol and job execution was performed over the Globus GRAM~2 protocol. During the third hour of the workflow, Swift achieved very high utilization of the 2,048 allocated processor cores and a steady rate of input and output transfers. The first two hours of the run were more bursty, due to fluctuating grid conditions and data server loads. @@ -1446,7 +1446,7 @@ The top plot in Figure~\ref{TaskPlots}-A shows the PTMap application running the stage 1 processing of the E.coli K12 genome (4,127 sequences) on 2,048 Intrepid cores. The lower plot shows processor utilization as time progresses; overall, the average per task execution time was 64 seconds, with a standard deviation of 14 seconds. These 4,127 tasks consumed a total of 73 CPU hours, in a span of 161 seconds on 2,048 processor cores, achieving 80 percent utilization. -The top plot in Figure~\ref{TaskPlots}-B shows performance of Swift running structural equation modeling problem at large scale using on the Ranger Constellation to model neural pathway connectivity from experimental fMRI data~\cite{CNARI_2009}. The lower plot shows the active jobs for a larger version of the problem type shown in Figure~\ref{SEMplots}. This shows a Swift script executing 418,000 structural equation modeling jobs over a 40 hour period. +The top plot in Figure~\ref{TaskPlots}-B shows performance of Swift running structural equation modeling problem at large scale using on the Ranger Constellation to model neural pathway connectivity from experimental fMRI data~\cite{CNARI_2009}. The lower plot shows the active jobs for a larger version of the problem type shown in Figure~\ref{SEMplots}. This shows a Swift script executing 418,000 structural equation modeling jobs over a 40-hour period. \begin{figure} \begin{center} From noreply at svn.ci.uchicago.edu Mon Jan 10 13:58:53 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 13:58:53 -0600 (CST) Subject: [Swift-commit] r3946 - text/parco10submission Message-ID: <20110110195853.CFD2D9CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-10 13:58:53 -0600 (Mon, 10 Jan 2011) New Revision: 3946 Modified: text/parco10submission/paper.tex Log: Minor corrections Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-10 19:35:25 UTC (rev 3945) +++ text/parco10submission/paper.tex 2011-01-10 19:58:53 UTC (rev 3946) @@ -184,7 +184,7 @@ %external programs on clusters, grids and other parallel platforms, providing %automated site selection, data management, and reliability. -We choose to make the Swift language purely functional (i.e., all operations +We chose to make the Swift language purely functional (i.e., all operations have a well-defined set of inputs and outputs, all variables are write-once, and no script-level side effects are permitted by the language) in order to prevent the difficulties that arise from having to track side effects to ensure determinism in complex @@ -1280,8 +1280,8 @@ \subsection{Simulation of glass cavity dynamics and thermodynamics.} Many recent theoretical chemistry studies of the glass transition in model systems have focused -on calculating from theory or simulation what is known as the ?Mosaic -length?. Glen Hocky of the Reichman Group at Columbia is evaluating a new +on calculating from theory or simulation what is known as the Mosaic +length. Glen Hocky of the Reichman Group at Columbia is evaluating a new cavity method \cite{GlassMethods_2008} for measuring this length scale, where particles are simulated by molecular dynamics or Monte Carlo methods within cavities having amorphous boundary conditions. @@ -1401,7 +1401,7 @@ \includegraphics[scale=\plotscale]{plots/sleep} \\ Test A. Application CPU utilization for 3 task durations - (in seconds) with up to 200 concurrent processes on an 8-core + (in seconds) with up to 200 concurrent processes on an 16-core local host. & Test B. Application CPU utilization for 3 task durations @@ -1425,7 +1425,7 @@ Previously published measurements of Swift performance performance on several scientific applications provide evidence that its parallel distributed programming model can be implemented with sufficient scalability and efficiency to make it a practical tool for large-scale parallel application scripting. -The performance of Swift submitting jobs over the wide area network from UChicago to the TeraGrid Ranger cluster at TACC are shown in Figure~\ref{SEMplots} (from \cite{CNARI_2009}), which shows an SEM workload of 131,072 jobs for 4 brain regions and two experimental conditions. This workflow completed in approximately 3 hours. The logs from the {\tt swift\_plot\_log} utility show the high degree of concurrent overlap between job execution and input and output file staging to remote computing resources. +The performance of Swift submitting jobs over the wide area network from UChicago to the TeraGrid Ranger cluster at TACC are shown in Figure~\ref{SEMplots} (from \cite{CNARI_2009}), which shows an SEM workload of 131,072 jobs for 4 brain regions and two experimental conditions. This workflow completed in approximately 3 hours. The logs from the {\tt swift\_plot\_log} utility show the high degree of concurrent overlap between job execution and input and output file staging to remote computing resources. The workflows were developed on and submitted (to Ranger) from a single-core Linux workstation at UChicago running an Intel Xeon 3.20-GHz CPU. Data staging was performed using the Globus GridFTP protocol and job execution was performed over the Globus GRAM~2 protocol. During the third hour of the workflow, Swift achieved very high utilization of the 2,048 allocated processor cores and a steady rate of input and output transfers. The first two hours of the run were more bursty, due to fluctuating grid conditions and data server loads. From noreply at svn.ci.uchicago.edu Mon Jan 10 14:01:21 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 14:01:21 -0600 (CST) Subject: [Swift-commit] r3947 - text/parco10submission Message-ID: <20110110200121.811A39CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-10 14:01:21 -0600 (Mon, 10 Jan 2011) New Revision: 3947 Modified: text/parco10submission/paper.tex Log: Spelling Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-10 19:58:53 UTC (rev 3946) +++ text/parco10submission/paper.tex 2011-01-10 20:01:21 UTC (rev 3947) @@ -1600,7 +1600,7 @@ the knowledge of the whole workflow graph, while in Swift, the structure of a workflow is constructed and expanded dynamically. -Drayd~\cite{Dryad} is an infrastructure for running data-parallel programs on a parallel or distributed system. In addition to allowing files to be used for passing data between +Dryad~\cite{Dryad} is an infrastructure for running data-parallel programs on a parallel or distributed system. In addition to allowing files to be used for passing data between tasks (like Swift), it also allows TCP pipes and shared memory FIFOs to be used. Dryad tasks are written in C++, while Swift tasks can be written in any language. Dryad graphs are explicitly developed by the programmer; Swift graphs are implicit and the programmer doesn't worry about them. A scripting language called Nebula was originally developed From noreply at svn.ci.uchicago.edu Mon Jan 10 14:42:57 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 14:42:57 -0600 (CST) Subject: [Swift-commit] r3948 - trunk/tests Message-ID: <20110110204257.1DF089CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-10 14:42:56 -0600 (Mon, 10 Jan 2011) New Revision: 3948 Modified: trunk/tests/meta.sh trunk/tests/nightly.sh trunk/tests/run-nightly.sh Log: Notes... Modified: trunk/tests/meta.sh =================================================================== --- trunk/tests/meta.sh 2011-01-10 20:01:21 UTC (rev 3947) +++ trunk/tests/meta.sh 2011-01-10 20:42:56 UTC (rev 3948) @@ -10,3 +10,11 @@ # Retrieve results # scp ... + +# Repeat for other sites... + + + +# Build nice HTML index of results from all sites + +return 0 Modified: trunk/tests/nightly.sh =================================================================== --- trunk/tests/nightly.sh 2011-01-10 20:01:21 UTC (rev 3947) +++ trunk/tests/nightly.sh 2011-01-10 20:42:56 UTC (rev 3948) @@ -77,6 +77,14 @@ # via make_sites_sed() -> group_sites_xml() # Note that some schedulers restrict your choice of RUNDIR +# NAMING +# Site-specific test groups are in providers/ . +# These are named: +# providers// +# or: +# providers// +# E.g., providers/local-pbs/PADS + printhelp() { echo "nightly.sh " echo "" Modified: trunk/tests/run-nightly.sh =================================================================== --- trunk/tests/run-nightly.sh 2011-01-10 20:01:21 UTC (rev 3947) +++ trunk/tests/run-nightly.sh 2011-01-10 20:42:56 UTC (rev 3948) @@ -14,4 +14,6 @@ ./nightly.sh -g -o topdir $GROUPLISTFILE [ $? != 0 ] && exit 1 +# Bundle results with tar cfz + exit 0 From noreply at svn.ci.uchicago.edu Mon Jan 10 21:59:49 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 10 Jan 2011 21:59:49 -0600 (CST) Subject: [Swift-commit] r3950 - in trunk/tests/providers: . local-cobalt local-cobalt/intrepid local-cobalt/surveyor Message-ID: <20110111035949.706699CC9B@svn.ci.uchicago.edu> Author: davidk Date: 2011-01-10 21:59:49 -0600 (Mon, 10 Jan 2011) New Revision: 3950 Added: trunk/tests/providers/local-cobalt/ trunk/tests/providers/local-cobalt/intrepid/ trunk/tests/providers/local-cobalt/intrepid/100-cp.check.sh trunk/tests/providers/local-cobalt/intrepid/100-cp.clean.sh trunk/tests/providers/local-cobalt/intrepid/100-cp.setup.sh trunk/tests/providers/local-cobalt/intrepid/100-cp.swift trunk/tests/providers/local-cobalt/intrepid/100-cp.timeout trunk/tests/providers/local-cobalt/intrepid/sites.template.xml trunk/tests/providers/local-cobalt/intrepid/tc.template.data trunk/tests/providers/local-cobalt/intrepid/title.txt trunk/tests/providers/local-cobalt/surveyor/ trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.check.sh trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.setup.sh trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.swift trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.timeout trunk/tests/providers/local-cobalt/surveyor/README trunk/tests/providers/local-cobalt/surveyor/catsn.0001.out.expected trunk/tests/providers/local-cobalt/surveyor/catsn.0002.out.expected trunk/tests/providers/local-cobalt/surveyor/catsn.0003.out.expected trunk/tests/providers/local-cobalt/surveyor/catsn.0004.out.expected trunk/tests/providers/local-cobalt/surveyor/catsn.0005.out.expected trunk/tests/providers/local-cobalt/surveyor/catsn.0006.out.expected trunk/tests/providers/local-cobalt/surveyor/catsn.0007.out.expected trunk/tests/providers/local-cobalt/surveyor/catsn.0008.out.expected trunk/tests/providers/local-cobalt/surveyor/catsn.0009.out.expected trunk/tests/providers/local-cobalt/surveyor/catsn.0010.out.expected trunk/tests/providers/local-cobalt/surveyor/data.txt trunk/tests/providers/local-cobalt/surveyor/sites.template.xml trunk/tests/providers/local-cobalt/surveyor/tc.template.data trunk/tests/providers/local-cobalt/surveyor/title.txt Log: Adding some of the provider tests from 0.92 to trunk Added: trunk/tests/providers/local-cobalt/intrepid/100-cp.check.sh =================================================================== --- trunk/tests/providers/local-cobalt/intrepid/100-cp.check.sh (rev 0) +++ trunk/tests/providers/local-cobalt/intrepid/100-cp.check.sh 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1,7 @@ +#!/bin/sh + +set -x + +grep $( uname -m ) 100-cp-output.txt || exit 1 + +exit 0 Property changes on: trunk/tests/providers/local-cobalt/intrepid/100-cp.check.sh ___________________________________________________________________ Name: svn:executable + * Added: trunk/tests/providers/local-cobalt/intrepid/100-cp.clean.sh =================================================================== --- trunk/tests/providers/local-cobalt/intrepid/100-cp.clean.sh (rev 0) +++ trunk/tests/providers/local-cobalt/intrepid/100-cp.clean.sh 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1,7 @@ +#!/bin/sh + +set -x + +rm -v 100-cp-input.txt 100-cp-output.txt || exit 1 + +exit 0 Property changes on: trunk/tests/providers/local-cobalt/intrepid/100-cp.clean.sh ___________________________________________________________________ Name: svn:executable + * Added: trunk/tests/providers/local-cobalt/intrepid/100-cp.setup.sh =================================================================== --- trunk/tests/providers/local-cobalt/intrepid/100-cp.setup.sh (rev 0) +++ trunk/tests/providers/local-cobalt/intrepid/100-cp.setup.sh 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1,7 @@ +#!/bin/sh + +set -x + +uname -a > 100-cp-input.txt || exit 1 + +exit 0 Property changes on: trunk/tests/providers/local-cobalt/intrepid/100-cp.setup.sh ___________________________________________________________________ Name: svn:executable + * Added: trunk/tests/providers/local-cobalt/intrepid/100-cp.swift =================================================================== --- trunk/tests/providers/local-cobalt/intrepid/100-cp.swift (rev 0) +++ trunk/tests/providers/local-cobalt/intrepid/100-cp.swift 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1,13 @@ + +type file; + +app (file o) cp(file i) +{ + cp @i @o; +} + +file input<"100-cp-input.txt">; +file output<"100-cp-output.txt">; + +output = cp(input); + Added: trunk/tests/providers/local-cobalt/intrepid/100-cp.timeout =================================================================== --- trunk/tests/providers/local-cobalt/intrepid/100-cp.timeout (rev 0) +++ trunk/tests/providers/local-cobalt/intrepid/100-cp.timeout 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +3000 Added: trunk/tests/providers/local-cobalt/intrepid/sites.template.xml =================================================================== --- trunk/tests/providers/local-cobalt/intrepid/sites.template.xml (rev 0) +++ trunk/tests/providers/local-cobalt/intrepid/sites.template.xml 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1,32 @@ + + + + + + /scratch/wozniak/work + + 0.04 + file + + + + + + + _HOST_ + _PROJECT_ + _QUEUE_ + zeptoos + true + 21 + 10000 + 1 + DEBUG + 1 + 900 + 64 + 64 + _WORK_ + + + Added: trunk/tests/providers/local-cobalt/intrepid/tc.template.data =================================================================== --- trunk/tests/providers/local-cobalt/intrepid/tc.template.data (rev 0) +++ trunk/tests/providers/local-cobalt/intrepid/tc.template.data 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +coasters_alcfbgp cp /bin/cp INSTALLED INTEL32::LINUX null Added: trunk/tests/providers/local-cobalt/intrepid/title.txt =================================================================== --- trunk/tests/providers/local-cobalt/intrepid/title.txt (rev 0) +++ trunk/tests/providers/local-cobalt/intrepid/title.txt 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Site Test: BG/P: Intrepid Added: trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.check.sh =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.check.sh (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.check.sh 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1,12 @@ +#!/bin/bash + +set -x + +for count in `seq --format "%04.f" 1 1 10` +do + [ -f catsn.$count.out ] || exit 1 + CONTENTS1=$( cat catsn.$count.out.expected ) + CONTENTS2=$( cat catsn.$count.out ) + [[ $CONTENTS1 == $CONTENTS2 ]] || exit 1 +done +exit 0 Property changes on: trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.check.sh ___________________________________________________________________ Name: svn:executable + * Added: trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.setup.sh =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.setup.sh (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.setup.sh 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1,4 @@ +#!/bin/bash + +cp -v $GROUP/data.txt . || exit 1 +cp -v $GROUP/*expected . || exit 1 Property changes on: trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.setup.sh ___________________________________________________________________ Name: svn:executable + * Added: trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.swift =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.swift (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.swift 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1,15 @@ +type file; + +app (file o) cat (file i) +{ + cat @i stdout=@o; +} + +string t = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; +string char[] = @strsplit(t, ""); + +file out[]; +foreach j in [1:@toint(@arg("n","10"))] { + file data<"data.txt">; + out[j] = cat(data); +} Added: trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.timeout =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.timeout (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/001-catsn-surveyor.timeout 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +1000 Added: trunk/tests/providers/local-cobalt/surveyor/README =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/README (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/README 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1,3 @@ +Be sure to set PROJECT and QUEUE. These settings worked for me, but unsure if they are universal +export PROJECT=HTCScienceApps +export QUEUE=default Added: trunk/tests/providers/local-cobalt/surveyor/catsn.0001.out.expected =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/catsn.0001.out.expected (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/catsn.0001.out.expected 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Hello world Added: trunk/tests/providers/local-cobalt/surveyor/catsn.0002.out.expected =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/catsn.0002.out.expected (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/catsn.0002.out.expected 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Hello world Added: trunk/tests/providers/local-cobalt/surveyor/catsn.0003.out.expected =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/catsn.0003.out.expected (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/catsn.0003.out.expected 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Hello world Added: trunk/tests/providers/local-cobalt/surveyor/catsn.0004.out.expected =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/catsn.0004.out.expected (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/catsn.0004.out.expected 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Hello world Added: trunk/tests/providers/local-cobalt/surveyor/catsn.0005.out.expected =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/catsn.0005.out.expected (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/catsn.0005.out.expected 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Hello world Added: trunk/tests/providers/local-cobalt/surveyor/catsn.0006.out.expected =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/catsn.0006.out.expected (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/catsn.0006.out.expected 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Hello world Added: trunk/tests/providers/local-cobalt/surveyor/catsn.0007.out.expected =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/catsn.0007.out.expected (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/catsn.0007.out.expected 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Hello world Added: trunk/tests/providers/local-cobalt/surveyor/catsn.0008.out.expected =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/catsn.0008.out.expected (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/catsn.0008.out.expected 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Hello world Added: trunk/tests/providers/local-cobalt/surveyor/catsn.0009.out.expected =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/catsn.0009.out.expected (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/catsn.0009.out.expected 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Hello world Added: trunk/tests/providers/local-cobalt/surveyor/catsn.0010.out.expected =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/catsn.0010.out.expected (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/catsn.0010.out.expected 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Hello world Added: trunk/tests/providers/local-cobalt/surveyor/data.txt =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/data.txt (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/data.txt 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Hello world Added: trunk/tests/providers/local-cobalt/surveyor/sites.template.xml =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/sites.template.xml (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/sites.template.xml 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1,22 @@ + + + + + + _HOST_ + _PROJECT_ + _QUEUE_ + zeptoos + true + 21 + 10000 + 1 + DEBUG + 1 + 900 + 64 + 64 + _WORK_ + + + Added: trunk/tests/providers/local-cobalt/surveyor/tc.template.data =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/tc.template.data (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/tc.template.data 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1,7 @@ +surveyor echo /bin/echo INSTALLED INTEL32::LINUX +surveyor cat /bin/cat INSTALLED INTEL32::LINUX +surveyor ls /bin/ls INSTALLED INTEL32::LINUX +surveyor grep /bin/grep INSTALLED INTEL32::LINUX +surveyor sort /bin/sort INSTALLED INTEL32::LINUX +surveyor paste /bin/paste INSTALLED INTEL32::LINUX +surveyor wc /usr/bin/wc INSTALLED INTEL32::LINUX Added: trunk/tests/providers/local-cobalt/surveyor/title.txt =================================================================== --- trunk/tests/providers/local-cobalt/surveyor/title.txt (rev 0) +++ trunk/tests/providers/local-cobalt/surveyor/title.txt 2011-01-11 03:59:49 UTC (rev 3950) @@ -0,0 +1 @@ +Surveyor Site Configuration Test From noreply at svn.ci.uchicago.edu Tue Jan 11 10:27:13 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 11 Jan 2011 10:27:13 -0600 (CST) Subject: [Swift-commit] r3951 - text/parco10submission Message-ID: <20110111162713.6AC1A9CC94@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-11 10:27:13 -0600 (Tue, 11 Jan 2011) New Revision: 3951 Modified: text/parco10submission/paper.tex Log: Put perf plots back in with comments around figure-disabling code. Modified: text/parco10submission/paper.tex =================================================================== --- text/parco10submission/paper.tex 2011-01-11 03:59:49 UTC (rev 3950) +++ text/parco10submission/paper.tex 2011-01-11 16:27:13 UTC (rev 3951) @@ -1452,14 +1452,14 @@ \begin{center} {\footnotesize \begin{tabular}{p{7cm}p{7cm}} - %\includegraphics[scale=.3]{plots/PTMap_top} & - %\includegraphics[scale=.3]{plots/SEM_top} \\ - %\includegraphics[scale=.3]{plots/PTMap_bottom} & - %\includegraphics[scale=.3]{plots/SEM_bottom} \\ - IMAGE HERE: plots/PTMap-top & - IMAGE HERE: plots/SEM-top \\ - IMAGE HERE: plots/PTMap-bottom & - IMAGE HERE: plots/SEM-bottom \\ + \includegraphics[scale=.3]{plots/PTMap_top} & + \includegraphics[scale=.3]{plots/SEM_top} \\ + \includegraphics[scale=.3]{plots/PTMap_bottom} & + \includegraphics[scale=.3]{plots/SEM_bottom} \\ + %IMAGE HERE: plots/PTMap-top & + %IMAGE HERE: plots/SEM-top \\ + %IMAGE HERE: plots/PTMap-bottom & + %IMAGE HERE: plots/SEM-bottom \\ A. PTMap application on 2,048 nodes of the Blue Gene/P & B. SEM application on varying-size processing allocations on Ranger\\ \end{tabular} From noreply at svn.ci.uchicago.edu Wed Jan 12 09:51:20 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 09:51:20 -0600 (CST) Subject: [Swift-commit] r3952 - trunk/tests Message-ID: <20110112155120.F03AF9CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 09:51:20 -0600 (Wed, 12 Jan 2011) New Revision: 3952 Added: trunk/tests/persistent-coasters/ Log: Adding From noreply at svn.ci.uchicago.edu Wed Jan 12 10:22:46 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 10:22:46 -0600 (CST) Subject: [Swift-commit] r3953 - trunk/tests Message-ID: <20110112162246.EECA39CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 10:22:46 -0600 (Wed, 12 Jan 2011) New Revision: 3953 Modified: trunk/tests/nightly.sh Log: Allow nightly.sh to test shell scripts in addition to SwiftScripts Modified: trunk/tests/nightly.sh =================================================================== --- trunk/tests/nightly.sh 2011-01-12 15:51:20 UTC (rev 3952) +++ trunk/tests/nightly.sh 2011-01-12 16:22:46 UTC (rev 3953) @@ -85,6 +85,11 @@ # providers// # E.g., providers/local-pbs/PADS +# WARNINGS +# nightly.sh uses shopt + +shopt -s nullglob + printhelp() { echo "nightly.sh " echo "" @@ -163,7 +168,7 @@ HTML_COMMENTS=1 fi -# Iterations +# Iterations per test (may want to run each test multiple times?) ITERS_LOCAL=1 LOGCOUNT=0 @@ -185,7 +190,7 @@ SCRIPTDIR=$( cd $( dirname $0 ) ; /bin/pwd ) -SWIFTCOUNT=0 +TESTCOUNT=0 echo "RUNNING_IN: $RUNDIR" echo "HTML_OUTPUT: $HTML" @@ -470,7 +475,7 @@ start_row() { html_tr testline html_td align right width 50 - html "$SWIFTCOUNT" + html "$TESTCOUNT" html " " html_~td html_td align right @@ -651,7 +656,7 @@ TIMEOUT=$2 # seconds OUTPUT=$3 - V=$SWIFTCOUNT + V=$TESTCOUNT # Use background so kill/trap is immediate sleep $TIMEOUT > /dev/null 2>&1 & @@ -756,7 +761,7 @@ CDM= [ -r fs.data ] && CDM="-cdm.file fs.data" - (( SWIFTCOUNT++ )) + (( TESTCOUNT++ )) TIMEOUT=$( gettimeout $GROUP/$TIMEOUTFILE ) @@ -779,6 +784,40 @@ fi } +# Execute shell test case w/ setup, check, clean +script_test_case() { + SHELLSCRIPT=$1 + SETUPSCRIPT=${SHELLSCRIPT%.test.sh}.setup.sh + CHECKSCRIPT=${SHELLSCRIPT%.test.sh}.check.sh + CLEANSCRIPT=${SHELLSCRIPT%.test.sh}.clean.sh + TIMEOUTFILE=${SHELLSCRIPT%.test.sh}.timeout + + TEST_SHOULD_FAIL=0 + if [ -x $GROUP/$SETUPSCRIPT ]; then + script_exec $GROUP/$SETUPSCRIPT "S" + fi + + (( TESTCOUNT++ )) + + # Not using background for script tests yet + # TIMEOUT=$( gettimeout $GROUP/$TIMEOUTFILE ) + + if [ -x $GROUP/$SETUPSCRIPT ]; then + script_exec $GROUP/$SETUPSCRIPT "S" + fi + + if [ -x $GROUP/$SHELLSCRIPT ]; then + script_exec $SHELLSCRIPT "X" + fi + + if [ -x $GROUP/$CHECKSCRIPT ]; then + script_exec $GROUP/$CHECKSCRIPT "√" + fi + if [ -x $GROUP/$CLEANSCRIPT ]; then + script_exec $GROUP/$CLEANSCRIPT "C" + fi +} + # All timeouts in this script are in seconds gettimeout() { FILE=$1 @@ -832,12 +871,12 @@ } # Setup coasters variables -if which ifconfig > /dev/null; then +if which ifconfig > /dev/null 2>&1; then IFCONFIG=ifconfig else IFCONFIG=/sbin/ifconfig fi -$IFCONFIG > /dev/null || crash "Cannot run ifconfig!" +$IFCONFIG > /dev/null 2>&1 || crash "Cannot run ifconfig!" GLOBUS_HOSTNAME=$( $IFCONFIG | grep inet | head -1 | cut -d ':' -f 2 | \ awk '{print $1}' ) [ $? != 0 ] && crash "Could not obtain GLOBUS_HOSTNAME!" @@ -907,24 +946,42 @@ group_fs_data group_swift_properties - SWIFTS=$( ls $GROUP/*.swift ) - checkfail "Could not ls: $GROUP" + SWIFTS=$( echo $GROUP/*.swift ) + checkfail "Could not list: $GROUP" for TEST in $SWIFTS; do (( SKIP_COUNTER++ < SKIP_TESTS )) && continue - TESTNAME=$( basename $TEST) + TESTNAME=$( basename $TEST ) cp -v $GROUP/$TESTNAME . TESTLINK=$TESTNAME start_row - for ((i=0; $i<$ITERS_LOCAL; i=$i+1)); do + for (( i=0; $i<$ITERS_LOCAL; i=$i+1 )); do swift_test_case $TESTNAME - (( $SWIFTCOUNT >= $NUMBER_OF_TESTS )) && return + (( $TESTCOUNT >= $NUMBER_OF_TESTS )) && return done end_row done + + SCRIPTS=$( echo $GROUP/*.test.sh ) + checkfail "Could not list: $GROUP" + for TEST in $SCRIPTS; do + + (( SKIP_COUNTER++ < SKIP_TESTS )) && continue + + TESTNAME=$( basename $TEST ) + cp -v $GROUP/$TESTNAME . + TESTLINK=$TESTNAME + + start_row + for ((i=0; $i<$ITERS_LOCAL; i=$i+1)); do + script_test_case $TESTNAME + (( $TESTCOUNT >= $NUMBER_OF_TESTS )) && return + done + end_row + done } if [[ $WORK == "" ]] @@ -1004,7 +1061,7 @@ start_part "Part $GROUPCOUNT: $TITLE" test_group (( GROUPCOUNT++ )) - (( $SWIFTCOUNT >= $NUMBER_OF_TESTS )) && break + (( $TESTCOUNT >= $NUMBER_OF_TESTS )) && break done if [ $GRID_TESTS == "0" ]; then From noreply at svn.ci.uchicago.edu Wed Jan 12 10:24:09 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 10:24:09 -0600 (CST) Subject: [Swift-commit] r3954 - in trunk/tests: groups persistent-coasters Message-ID: <20110112162409.A020C9CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 10:24:09 -0600 (Wed, 12 Jan 2011) New Revision: 3954 Added: trunk/tests/groups/group-pc.sh trunk/tests/persistent-coasters/000-sanity.test.sh trunk/tests/persistent-coasters/README.txt trunk/tests/persistent-coasters/title.txt Log: Sketch of persistent coasters GROUP and first shell script test Added: trunk/tests/groups/group-pc.sh =================================================================== --- trunk/tests/groups/group-pc.sh (rev 0) +++ trunk/tests/groups/group-pc.sh 2011-01-12 16:24:09 UTC (rev 3954) @@ -0,0 +1,6 @@ + +# GROUPLIST definition to run persistent coasters tests + +GROUPLIST=( $TESTDIR/persistent-coasters ) + +checkvars WORK Added: trunk/tests/persistent-coasters/000-sanity.test.sh =================================================================== --- trunk/tests/persistent-coasters/000-sanity.test.sh (rev 0) +++ trunk/tests/persistent-coasters/000-sanity.test.sh 2011-01-12 16:24:09 UTC (rev 3954) @@ -0,0 +1,10 @@ +#!/bin/bash + +# This is just a sanity check for the nightly.sh shell script test +# mechanics + +echo sane + +/bin/true || exit 1 + +exit 0 Property changes on: trunk/tests/persistent-coasters/000-sanity.test.sh ___________________________________________________________________ Name: svn:executable + * Added: trunk/tests/persistent-coasters/README.txt =================================================================== --- trunk/tests/persistent-coasters/README.txt (rev 0) +++ trunk/tests/persistent-coasters/README.txt 2011-01-12 16:24:09 UTC (rev 3954) @@ -0,0 +1,4 @@ +Persistent Coasters tests. + +This will probably involve somewhat complicated shell scripts +but simple SwiftScripts. Added: trunk/tests/persistent-coasters/title.txt =================================================================== --- trunk/tests/persistent-coasters/title.txt (rev 0) +++ trunk/tests/persistent-coasters/title.txt 2011-01-12 16:24:09 UTC (rev 3954) @@ -0,0 +1 @@ +Persistent Coasters Tests From noreply at svn.ci.uchicago.edu Wed Jan 12 10:52:57 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 10:52:57 -0600 (CST) Subject: [Swift-commit] r3955 - provenancedb Message-ID: <20110112165257.D4BA39CC7F@svn.ci.uchicago.edu> Author: lgadelha Date: 2011-01-12 10:52:57 -0600 (Wed, 12 Jan 2011) New Revision: 3955 Modified: provenancedb/pql_functions.sql provenancedb/prov-init.sql Log: Added support for lineage queries using SQL recursion. Modified: provenancedb/pql_functions.sql =================================================================== --- provenancedb/pql_functions.sql 2011-01-12 16:24:09 UTC (rev 3954) +++ provenancedb/pql_functions.sql 2011-01-12 16:52:57 UTC (rev 3955) @@ -149,3 +149,18 @@ ds_usage.param_name='prot' OR ds_usage.param_name='prepTarFile' OR ds_usage.param_name='nSim') AND annot_wf_txt.name='oops_run_id' AND annot_wf_txt.value=$1; $$ LANGUAGE SQL; + +-- recursive query to find ancestor entities in a provenance graph + +create or replace function ancestors(varchar) returns setof varchar as $$ + with recursive anc(ancestor,descendant) as + ( + select parent as ancestor, child as descendant from parent_of where child=$1 + union all + select parent_of.parent as ancestor, anc.descendant as descendant + from anc,parent_of + where anc.ancestor=parent_of.child + ) + select ancestor from anc +$$ language sql; + Modified: provenancedb/prov-init.sql =================================================================== --- provenancedb/prov-init.sql 2011-01-12 16:24:09 UTC (rev 3954) +++ provenancedb/prov-init.sql 2011-01-12 16:52:57 UTC (rev 3955) @@ -198,6 +198,13 @@ primary key (id, name) ); +create view parent_of as + select process_id as parent,dataset_id as child from ds_usage where direction='O' + union + select dataset_id as parent,process_id as child from ds_usage where direction='I' + union + select out_id as parent,in_id as child from ds_containment; + -- extrainfo stores lines generated by the SWIFT_EXTRA_INFO feature -- extrainfo will be processes into annotation tables -- CREATE TABLE extrainfo From noreply at svn.ci.uchicago.edu Wed Jan 12 11:14:49 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 11:14:49 -0600 (CST) Subject: [Swift-commit] r3956 - usertools Message-ID: <20110112171449.A42649CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 11:14:49 -0600 (Wed, 12 Jan 2011) New Revision: 3956 Added: usertools/persistent-coasters/ Log: Adding From noreply at svn.ci.uchicago.edu Wed Jan 12 11:18:31 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 11:18:31 -0600 (CST) Subject: [Swift-commit] r3957 - usertools/persistent-coasters Message-ID: <20110112171831.317BE9CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 11:18:30 -0600 (Wed, 12 Jan 2011) New Revision: 3957 Added: usertools/persistent-coasters/passivate.swift usertools/persistent-coasters/run.zsh usertools/persistent-coasters/settings.sh usertools/persistent-coasters/setup.sh usertools/persistent-coasters/sites.passivate-template.xml usertools/persistent-coasters/sites.persistent.xml usertools/persistent-coasters/swift.properties usertools/persistent-coasters/tc.data usertools/persistent-coasters/workers-cobalt.zsh usertools/persistent-coasters/workers-local.zsh usertools/persistent-coasters/workers-ssh.zsh Log: Import persistent coasters scripts from cdm/scripts/cpc Added: usertools/persistent-coasters/passivate.swift =================================================================== --- usertools/persistent-coasters/passivate.swift (rev 0) +++ usertools/persistent-coasters/passivate.swift 2011-01-12 17:18:30 UTC (rev 3957) @@ -0,0 +1,17 @@ +type file; + +app passivate () +{ + sh "-c" "echo dummy swift job;"; +} + +app (file h) hostname () +{ + hostname stdout=@h; +} + +file h<"hostname.txt">; + +passivate(); + +h = hostname(); Added: usertools/persistent-coasters/run.zsh =================================================================== --- usertools/persistent-coasters/run.zsh (rev 0) +++ usertools/persistent-coasters/run.zsh 2011-01-12 17:18:30 UTC (rev 3957) @@ -0,0 +1,126 @@ +#!/bin/zsh + +set -x + +APP=$( cd $( dirname $0 ) ; /bin/pwd ) +TOOLS=${APP}/../tools +source ${TOOLS}/helpers.zsh +[[ $? != 0 ]] && print "Could not load helpers.zsh!" && exit 1 + +[[ ${SWIFT} == "" ]] && SWIFT=$( which swift ) +SWIFT_BIN=$( dirname ${SWIFT} ) +COASTER_RESOURCES=${SWIFT_BIN}/../../../../provider-coaster/resources +WORKER=${COASTER_RESOURCES}/worker.pl + +export SERVICE_PORT=10985 + +LOGDIR=logs +mkdir -p ${LOGDIR} +exitcode + +SWIFT_OUT=${LOGDIR}/swift.out + +local OUT=$1 + +# source workers-local.zsh +source workers-ssh.zsh + +# Obtain the URL to which Swift should connect +get_service_coasters() +{ + local OUT=$1 + local LINE=() + COUNT=0 + for COUNT in {1..3} + do + LINE=( $( grep --text "Started coaster service:" ${OUT} ) ) + if [[ ${#LINE} == 0 ]] then + sleep 3 + else + break + fi + done + [[ ${#LINE} == 0 ]] && return 1 + + CONTACT=${LINE[-1]} + print ${CONTACT} + return 0 +} + +# Obtain the URL to which the workers should connect +get_service_local() +{ + local OUT=$1 + local LINE=() + COUNT=0 + for COUNT in {1..4} + do + LINE=( $( grep "Passive queue processor" ${OUT} ) ) + if [[ ${#LINE} == 0 ]] then + sleep 4 + else + break + fi + if grep "Error" ${OUT} >& /dev/null + then + print "coaster-service error!" >&2 + cat ${OUT} >&2 + return 1 + fi + done + [[ ${#LINE} == 0 ]] && return 1 + + CONTACT=${LINE[-1]} + print ${CONTACT} + return 0 +} + +SIGNALS="EXIT INT QUIT" +cleanup_trap() +{ + print "cleanup_trap()..." + eval trap - ${SIGNALS} + [[ ${COASTER_SERVICE_PID} != "" ]] && kill ${COASTER_SERVICE_PID} + [[ ${STARTWORKERS_PID} != "" ]] && kill ${START_WORKERS_PID} +} +# eval trap cleanup_trap ${SIGNALS} + +SERVICE_LOG=${LOGDIR}/coaster-service.log +coaster-service -nosec -p ${SERVICE_PORT} >& ${SERVICE_LOG} & +COASTER_SERVICE_PID=${!} + +sleep 1 + +SERVICE_COASTERS=$( get_service_coasters ${SERVICE_LOG} ) +exitcode "Could not get coasters service!" +export SERVICE_COASTERS + +source setup.sh +exitcode "setup.sh failed!" + +sleep 1 + +{ ${SWIFT} -config swift.properties \ + -sites.file sites.passivate.xml \ + -tc.file tc.passivate.data \ + passivate.swift < /dev/null >& ${SWIFT_OUT} + exitcode "Swift failed!" + print "Swift finished." +} & + +sleep 1 + +pwd +SERVICE_LOCAL=$( get_service_local ${SWIFT_OUT} ) +exitcode "get_service_local failed!" +export SERVICE_LOCAL + +sleep 1 + +print "Starting workers..." +start_workers ${SERVICE_LOCAL} & +START_WORKERS_PID=${!} + +sleep 1 + +exit 0 Property changes on: usertools/persistent-coasters/run.zsh ___________________________________________________________________ Name: svn:executable + * Added: usertools/persistent-coasters/settings.sh =================================================================== --- usertools/persistent-coasters/settings.sh (rev 0) +++ usertools/persistent-coasters/settings.sh 2011-01-12 17:18:30 UTC (rev 3957) @@ -0,0 +1,7 @@ + +WORK=${HOME}/work + +NODES=64 + +QUEUE=prod-devel +MAXTIME=$(( 20 )) Added: usertools/persistent-coasters/setup.sh =================================================================== --- usertools/persistent-coasters/setup.sh (rev 0) +++ usertools/persistent-coasters/setup.sh 2011-01-12 17:18:30 UTC (rev 3957) @@ -0,0 +1,12 @@ + +source ${TOOLS}/coasters-setup.sh +source ./settings.sh + +export WORK +${TOOLS}/gensites.sh ${WORK} sites.passivate-template.xml \ + sites.passivate.xml +[[ $? != 0 ]] && bail "sites problem (passivate)" && return +${TOOLS}/gensites.sh ${WORK} sites.persistent.xml sites.xml +[[ $? != 0 ]] && bail "sites problem (persistent)" && return + +return 0 Property changes on: usertools/persistent-coasters/setup.sh ___________________________________________________________________ Name: svn:executable + * Added: usertools/persistent-coasters/sites.passivate-template.xml =================================================================== --- usertools/persistent-coasters/sites.passivate-template.xml (rev 0) +++ usertools/persistent-coasters/sites.passivate-template.xml 2011-01-12 17:18:30 UTC (rev 3957) @@ -0,0 +1,13 @@ + + + + passive + 4 + .03 + 10000 + + _WORK_ + + Added: usertools/persistent-coasters/sites.persistent.xml =================================================================== --- usertools/persistent-coasters/sites.persistent.xml (rev 0) +++ usertools/persistent-coasters/sites.persistent.xml 2011-01-12 17:18:30 UTC (rev 3957) @@ -0,0 +1,33 @@ + + + + + passive + 4 + 3.00 + 10000 + + _WORK_ + + + + Added: usertools/persistent-coasters/swift.properties =================================================================== --- usertools/persistent-coasters/swift.properties (rev 0) +++ usertools/persistent-coasters/swift.properties 2011-01-12 17:18:30 UTC (rev 3957) @@ -0,0 +1,7 @@ +wrapperlog.always.transfer=true +sitedir.keep=true +execution.retries=0 +lazy.errors=false +status.mode=provider +use.provider.staging=true +provider.staging.pin.swiftfiles=false Added: usertools/persistent-coasters/tc.data =================================================================== --- usertools/persistent-coasters/tc.data (rev 0) +++ usertools/persistent-coasters/tc.data 2011-01-12 17:18:30 UTC (rev 3957) @@ -0,0 +1 @@ +cpc sh /bin/sh null null null Added: usertools/persistent-coasters/workers-cobalt.zsh =================================================================== --- usertools/persistent-coasters/workers-cobalt.zsh (rev 0) +++ usertools/persistent-coasters/workers-cobalt.zsh 2011-01-12 17:18:30 UTC (rev 3957) @@ -0,0 +1,23 @@ + +# Source this to get start_workers() for cobalt + +start_workers() +{ + local URI=$1 + local TIMESTAMP=$(date "+%Y.%m%d.%H%M%S") + local -Z 5 R=${RANDOM} + ID="${TIMESTAMP}.${R}" + + cqsub -q ${QUEUE} \ + -k zeptoos \ + -t ${MAXTIME} \ # minutes + -n ${NODES} \ + --cwd ${LOGDIR} \ + -E ${LOGDIR}/cobalt.${$}.stderr \ + -o ${LOGDIR}/cobalt.${$}.stdout \ + -e "WORKER_LOGGING_LEVEL=DEBUG:ZOID_ENABLE_NAT=true" \ + ${WORKER} ${URI} ${ID} ${LOGDIR} + + START_WORKERS_PID= + return 0 +} Added: usertools/persistent-coasters/workers-local.zsh =================================================================== --- usertools/persistent-coasters/workers-local.zsh (rev 0) +++ usertools/persistent-coasters/workers-local.zsh 2011-01-12 17:18:30 UTC (rev 3957) @@ -0,0 +1,15 @@ + +# Source this to get start_workers() for local +# Just starts a local worker.pl process + +start_workers() +{ + local URI=$1 + + ${WORKER} ${URI} LOCAL ${LOGDIR} & + + # TODO: manage these PIDs + # START_WORKERS_PID= + + return 0 +} Added: usertools/persistent-coasters/workers-ssh.zsh =================================================================== --- usertools/persistent-coasters/workers-ssh.zsh (rev 0) +++ usertools/persistent-coasters/workers-ssh.zsh 2011-01-12 17:18:30 UTC (rev 3957) @@ -0,0 +1,23 @@ + +# Source this to get start_workers() for ssh +# hostnames must be in environment variable WORKER_HOSTS + +start_workers() +{ + local URI=$1 + + local TIMESTAMP=$(date "+%Y.%m%d.%H%M%S") + local -Z 5 R=${RANDOM} + ID="${TIMESTAMP}.${R}" + + for MACHINE in ${WORKER_HOSTS} + do + pwd + ssh ${MACHINE} ${WORKER} ${URI} ${MACHINE} ${LOGDIR} & + done + + # TODO: manage these PIDs + # START_WORKERS_PID= + + return 0 +} From noreply at svn.ci.uchicago.edu Wed Jan 12 11:19:10 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 11:19:10 -0600 (CST) Subject: [Swift-commit] r3958 - usertools/persistent-coasters Message-ID: <20110112171910.668D99CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 11:19:10 -0600 (Wed, 12 Jan 2011) New Revision: 3958 Removed: usertools/persistent-coasters/sites.persistent.xml Log: This file is generated Deleted: usertools/persistent-coasters/sites.persistent.xml =================================================================== --- usertools/persistent-coasters/sites.persistent.xml 2011-01-12 17:18:30 UTC (rev 3957) +++ usertools/persistent-coasters/sites.persistent.xml 2011-01-12 17:19:10 UTC (rev 3958) @@ -1,33 +0,0 @@ - - - - - passive - 4 - 3.00 - 10000 - - _WORK_ - - - - From noreply at svn.ci.uchicago.edu Wed Jan 12 11:27:25 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 11:27:25 -0600 (CST) Subject: [Swift-commit] r3959 - usertools/persistent-coasters Message-ID: <20110112172725.3F7629CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 11:27:25 -0600 (Wed, 12 Jan 2011) New Revision: 3959 Modified: usertools/persistent-coasters/passivate.swift usertools/persistent-coasters/settings.sh usertools/persistent-coasters/setup.sh usertools/persistent-coasters/workers-cobalt.zsh Log: Some additional usage notes Modified: usertools/persistent-coasters/passivate.swift =================================================================== --- usertools/persistent-coasters/passivate.swift 2011-01-12 17:19:10 UTC (rev 3958) +++ usertools/persistent-coasters/passivate.swift 2011-01-12 17:27:25 UTC (rev 3959) @@ -1,3 +1,12 @@ + +/** + * Simple script that does nothing of consequence to + * the CoasterService. Settings are passed along + * which configure the CoasterService. + * + * We can do some diagnostic stuff here. + * */ + type file; app passivate () Modified: usertools/persistent-coasters/settings.sh =================================================================== --- usertools/persistent-coasters/settings.sh 2011-01-12 17:19:10 UTC (rev 3958) +++ usertools/persistent-coasters/settings.sh 2011-01-12 17:27:25 UTC (rev 3959) @@ -1,4 +1,6 @@ +# Keep all interesting settings in one place + WORK=${HOME}/work NODES=64 Modified: usertools/persistent-coasters/setup.sh =================================================================== --- usertools/persistent-coasters/setup.sh 2011-01-12 17:19:10 UTC (rev 3958) +++ usertools/persistent-coasters/setup.sh 2011-01-12 17:27:25 UTC (rev 3959) @@ -1,4 +1,8 @@ +# Obtain settings from settings.sh, +# generate sites file +# Should extend to generate tc as well + source ${TOOLS}/coasters-setup.sh source ./settings.sh Modified: usertools/persistent-coasters/workers-cobalt.zsh =================================================================== --- usertools/persistent-coasters/workers-cobalt.zsh 2011-01-12 17:19:10 UTC (rev 3958) +++ usertools/persistent-coasters/workers-cobalt.zsh 2011-01-12 17:27:25 UTC (rev 3959) @@ -1,5 +1,6 @@ # Source this to get start_workers() for cobalt +# This is not complete start_workers() { From noreply at svn.ci.uchicago.edu Wed Jan 12 11:31:26 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 11:31:26 -0600 (CST) Subject: [Swift-commit] r3960 - usertools/persistent-coasters Message-ID: <20110112173126.10C4B9CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 11:31:25 -0600 (Wed, 12 Jan 2011) New Revision: 3960 Added: usertools/persistent-coasters/README.txt Log: Adding Added: usertools/persistent-coasters/README.txt =================================================================== --- usertools/persistent-coasters/README.txt (rev 0) +++ usertools/persistent-coasters/README.txt 2011-01-12 17:31:25 UTC (rev 3960) @@ -0,0 +1,10 @@ + +Overview of persistent CoasterService process + +1) Start coaster service +2) Get URL to which Swift should connect from service output +3) Run Swift once to send settings to CoasterService, + putting CoasterService in passive mode +4) Get URL to which workers should connect from Swift output +5) Connect workers to CoasterService +6) Run Swift for application From noreply at svn.ci.uchicago.edu Wed Jan 12 11:34:43 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 11:34:43 -0600 (CST) Subject: [Swift-commit] r3961 - usertools/persistent-coasters Message-ID: <20110112173443.7F42A9CC7F@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 11:34:43 -0600 (Wed, 12 Jan 2011) New Revision: 3961 Added: usertools/persistent-coasters/coasters-setup.sh usertools/persistent-coasters/gensites.sh usertools/persistent-coasters/gentc.sh usertools/persistent-coasters/helpers.zsh Modified: usertools/persistent-coasters/run.zsh Log: Additional required scripts from cdm/scripts/tools Added: usertools/persistent-coasters/coasters-setup.sh =================================================================== --- usertools/persistent-coasters/coasters-setup.sh (rev 0) +++ usertools/persistent-coasters/coasters-setup.sh 2011-01-12 17:34:43 UTC (rev 3961) @@ -0,0 +1,5 @@ + +# Source this to setup GLOBUS_HOSTNAME + +ipaddr=$( ifconfig | grep inet | head -1 | cut -d ':' -f 2 | awk '{print $1}' ) +export GLOBUS_HOSTNAME=$ipaddr Added: usertools/persistent-coasters/gensites.sh =================================================================== --- usertools/persistent-coasters/gensites.sh (rev 0) +++ usertools/persistent-coasters/gensites.sh 2011-01-12 17:34:43 UTC (rev 3961) @@ -0,0 +1,55 @@ +#!/bin/sh + +# Certain sites files may require environment variables, +# see below. +# In particular, URL for the coasters persistent service + +# Swift work directory, e.g., ${HOME}/work +WORK=$1 +# Input sites file, e.g., sites.local.xml +INPUT=$2 +# Output sites file, e.g., sites.xml +OUTPUT=$3 + +crash() +{ + MSG=$1 + echo ${MSG} + exit 1 +} + +[[ ${WORK} == "" ]] && crash "Not specified: WORK" +[[ ${INPUT} == "" ]] && crash "Not specified: INPUT" +[[ ${OUTPUT} == "" ]] && crash "Not specified: OUTPUT" + +[[ ${GLOBUS_HOSTNAME} == "" ]] && crash "Not specified: GLOBUS_HOSTNAME" + +# If the sites file requires a PROJECT, QUEUE, N_GRAN, or N_MAX and +# it is not set in the environment, crash: +for TOKEN in PROJECT QUEUE N_GRAN N_MAX SLOTS + do + if grep _${TOKEN}_ ${INPUT} > /dev/null + then + if ! declare -p ${TOKEN} > /dev/null + then + printenv + crash "Not specified: ${TOKEN}" + fi + fi +done + +{ + echo "s/_NODES_/${NODES}/" + echo "s/_HOST_/${GLOBUS_HOSTNAME}/" + echo "s at _WORK_@${WORK}@" + echo "s/_PROJECT_/${PROJECT}/" + echo "s/_QUEUE_/${QUEUE}/" + echo "s/_N_GRAN_/${N_GRAN}/" + echo "s/_N_MAX_/${N_MAX}/" + echo "s/_SLOTS_/${SLOTS}/" + echo "s/_MAXTIME_/${MAXTIME}/" + echo "s at _SERVICE_COASTERS_@${SERVICE_COASTERS:-NO_URL_GIVEN}@" + echo "s at _SERVICE_PORT_@${SERVICE_PORT:-NO_PORT_GIVEN}@" +} > gensites.sed + +sed -f gensites.sed < ${INPUT} > ${OUTPUT} Property changes on: usertools/persistent-coasters/gensites.sh ___________________________________________________________________ Name: svn:executable + * Added: usertools/persistent-coasters/gentc.sh =================================================================== --- usertools/persistent-coasters/gentc.sh (rev 0) +++ usertools/persistent-coasters/gentc.sh 2011-01-12 17:34:43 UTC (rev 3961) @@ -0,0 +1,26 @@ +#!/bin/sh + +# Substitutes PWD for _DIR_ in tc files +# example usage: gentc.sh tc.something.data tc.data ${PWD} + +# Input tc file, e.g., tc.local.data +INPUT=$1 +# Output tc file, e.g., tc.data +OUTPUT=$2 +DIR=$3 + +[[ ${DIR} == "" ]] && echo "No DIR!" && exit 1 + +crash() +{ + MSG=$1 + echo ${MSG} + exit 1 +} + +[[ ${INPUT} == "" ]] && crash "Not specified: INPUT" +[[ ${OUTPUT} == "" ]] && crash "Not specified: OUTPUT" + +{ + sed "s at _DIR_@${DIR}@" +} < ${INPUT} > ${OUTPUT} Property changes on: usertools/persistent-coasters/gentc.sh ___________________________________________________________________ Name: svn:executable + * Added: usertools/persistent-coasters/helpers.zsh =================================================================== --- usertools/persistent-coasters/helpers.zsh (rev 0) +++ usertools/persistent-coasters/helpers.zsh 2011-01-12 17:34:43 UTC (rev 3961) @@ -0,0 +1,241 @@ + +KB=1024 +MB=$(( 1024*KB )) +GB=$(( 1024*MB )) + +assert() { + ERR=$1 + shift + MSG="${*}" + check ${ERR} "${MSG}" || exit ${ERR} + return 0 +} + +exitcode() +{ + ERR=$? + MSG="${*}" + assert ${ERR} "${MSG}" +} + +# If CODE is non-zero, print MSG and return CODE +check() +{ + CODE=$1 + shift + MSG=${*} + + if [[ ${CODE} != 0 ]] + then + print ${MSG} + return ${CODE} + fi + return 0 +} + +bail() +{ + CODE=$1 + shift + MSG="${*}" + print "${MSG}" + set +x +} + +crash() +{ + CODE=$1 + shift + MSG="${*}" + bail ${CODE} ${MSG} + exit ${CODE} +} + +checkvar() +{ + local VAR=$1 + + if [[ ${(P)VAR} == "" ]] + then + crash 1 "Not set: ${VAR}" + fi + return 0 +} + +checkvars() +{ + local VARS + VARS=( ${*} ) + for V in ${VARS} + do + checkvar ${V} + done + return 0 +} + +nanos() +{ + date +%s.%N +} + +within() +{ + local TIME=$1 + shift + local START STOP DIFF LEFT + START=$( nanos ) + ${*} + STOP=$( nanos ) + DIFF=$(( STOP-START )) + if (( DIFF < 0 )) + then + print "TIME exceeded (${DIFF} > ${TIME})!" + return 1 + fi + LEFT=$(( TIME-LEFT )) + sleep ${LEFT} + return 0 +} + + +scan() +# Use shoot to output the contents of a scan +{ + [[ $1 == "" ]] && return + typeset -g -a $1 + local i=1 + local T + while read T + do + eval "${1}[${i}]='${T}'" + (( i++ )) + done +} + +shoot() +# print out an array loaded by scan() +{ + local i + local N + N=$( eval print '${#'$1'}' ) + # print N $N + for (( i=1 ; i <= N ; i++ )) + do + eval print -- "$"${1}"["${i}"]" + done +} + +scan_kv() +{ + [[ $1 == "" ]] && return 1 + typeset -g -A $1 + while read T + do + A=( ${T} ) + KEY=${A[1]%:} # Strip any tailing : + VALUE=${A[2,-1]} + eval "${1}[${KEY}]='${VALUE}'" + done + return 0 +} + +shoot_kv() +{ + local VAR=$1 + eval print -a -C 2 \"'${(kv)'$VAR'[@]}'\" + return 0 +} + +tformat() +# Convert seconds to hh:mm:ss +{ + local -Z 2 T=$1 + local -Z 2 M + + if (( T <= 60 )) + then + print "${T}" + elif (( T <= 3600 )) + then + M=$(( T/60 )) + print "${M}:$( tformat $(( T%60 )) )" + else + print "$(( T/3600 )):$( tformat $(( T%3600 )) )" + fi +} + +bformat() +# Format byte counts +{ + local BYTES=$1 + local LENGTH=${2:-3} + local UNIT + local UNITS + UNITS=( "B" "KB" "MB" "GB" "TB" ) + + local B=${BYTES} + for (( UNIT=0 ; UNIT < 4 ; UNIT++ )) + do + (( B /= 1024 )) + (( B == 0 )) && break + done + + local RESULT=${UNITS[UNIT+1]} + if [[ ${RESULT} == "B" ]] + then + print "${BYTES} B" + else + local -F BF=${BYTES} + local MANTISSA=$(( BF / (1024 ** UNIT ) )) + MANTISSA=$( significant ${LENGTH} ${MANTISSA} ) + print "${MANTISSA} ${RESULT}" + fi + + return 0 +} + +significant() +# Report significant digits from floating point number +{ + local DIGITS=$1 + local NUMBER=$2 + + local -F FLOAT=${NUMBER} + local RESULT + local DOT=0 + local LZ=1 # Leading zeros + local C + local i=1 + while (( 1 )) + do + C=${FLOAT[i]} + [[ ${C} != "0" ]] && [[ ${C} != "." ]] && break + [[ ${C} == "." ]] && DOT=1 + RESULT+=${C} + (( i++ )) + done + while (( ${DIGITS} > 0 )) + do + C=${FLOAT[i]} + if [[ ${C} == "" ]] + then + (( ! DOT )) && RESULT+="." && DOT=1 + C="0" + fi + RESULT+=${C} + [[ ${C} == "." ]] && (( DIGITS++ )) && DOT=1 + (( i++ )) + (( DIGITS-- )) + done + if (( ! DOT )) # Extra zeros to finish out integer + then + local -i J=${NUMBER} + # J=${J} + while (( ${#RESULT} < ${#J} )) + do + RESULT+="0" + done + fi + print ${RESULT} + return 0 +} Modified: usertools/persistent-coasters/run.zsh =================================================================== --- usertools/persistent-coasters/run.zsh 2011-01-12 17:31:25 UTC (rev 3960) +++ usertools/persistent-coasters/run.zsh 2011-01-12 17:34:43 UTC (rev 3961) @@ -1,5 +1,8 @@ #!/bin/zsh +# Process management is incomplete here, check for +# processes with ps + set -x APP=$( cd $( dirname $0 ) ; /bin/pwd ) From noreply at svn.ci.uchicago.edu Wed Jan 12 11:40:47 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 11:40:47 -0600 (CST) Subject: [Swift-commit] r3962 - usertools/persistent-coasters Message-ID: <20110112174047.56B029CC94@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 11:40:47 -0600 (Wed, 12 Jan 2011) New Revision: 3962 Added: usertools/persistent-coasters/sites.persistent.xml Log: This is converted into the sites.xml used by the application Added: usertools/persistent-coasters/sites.persistent.xml =================================================================== --- usertools/persistent-coasters/sites.persistent.xml (rev 0) +++ usertools/persistent-coasters/sites.persistent.xml 2011-01-12 17:40:47 UTC (rev 3962) @@ -0,0 +1,33 @@ + + + + + passive + 4 + 3.00 + 10000 + + _WORK_ + + + + From noreply at svn.ci.uchicago.edu Wed Jan 12 11:57:55 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 11:57:55 -0600 (CST) Subject: [Swift-commit] r3963 - usertools/persistent-coasters Message-ID: <20110112175755.9492A9CC94@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 11:57:55 -0600 (Wed, 12 Jan 2011) New Revision: 3963 Modified: usertools/persistent-coasters/README.txt Log: Additional notes Modified: usertools/persistent-coasters/README.txt =================================================================== --- usertools/persistent-coasters/README.txt 2011-01-12 17:40:47 UTC (rev 3962) +++ usertools/persistent-coasters/README.txt 2011-01-12 17:57:55 UTC (rev 3963) @@ -8,3 +8,9 @@ 4) Get URL to which workers should connect from Swift output 5) Connect workers to CoasterService 6) Run Swift for application + +Script layout: + +Start run.zsh + - run.zsh does the above steps +Start your application SwiftScripts with the generated sites.xml From noreply at svn.ci.uchicago.edu Wed Jan 12 13:34:07 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 13:34:07 -0600 (CST) Subject: [Swift-commit] r3964 - usertools/persistent-coasters Message-ID: <20110112193407.EFE989CC94@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 13:34:07 -0600 (Wed, 12 Jan 2011) New Revision: 3964 Modified: usertools/persistent-coasters/run.zsh Log: Correct TOOLS location and add notes Modified: usertools/persistent-coasters/run.zsh =================================================================== --- usertools/persistent-coasters/run.zsh 2011-01-12 17:57:55 UTC (rev 3963) +++ usertools/persistent-coasters/run.zsh 2011-01-12 19:34:07 UTC (rev 3964) @@ -1,12 +1,15 @@ #!/bin/zsh +# If you get "Could not get coasters service" the +# service may already be running on SERVICE_PORT + # Process management is incomplete here, check for # processes with ps set -x -APP=$( cd $( dirname $0 ) ; /bin/pwd ) -TOOLS=${APP}/../tools +PC=$( cd $( dirname $0 ) ; /bin/pwd ) +TOOLS=${PC} # In the future this might be ${SWIFT_HOME}/tools source ${TOOLS}/helpers.zsh [[ $? != 0 ]] && print "Could not load helpers.zsh!" && exit 1 @@ -38,6 +41,7 @@ do LINE=( $( grep --text "Started coaster service:" ${OUT} ) ) if [[ ${#LINE} == 0 ]] then + grep Error ${OUT} >& /dev/null && return 1 sleep 3 else break From noreply at svn.ci.uchicago.edu Wed Jan 12 13:35:11 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 12 Jan 2011 13:35:11 -0600 (CST) Subject: [Swift-commit] r3965 - usertools/persistent-coasters Message-ID: <20110112193511.4F0579CC94@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-12 13:35:11 -0600 (Wed, 12 Jan 2011) New Revision: 3965 Modified: usertools/persistent-coasters/ usertools/persistent-coasters/setup.sh Log: Set ignores Property changes on: usertools/persistent-coasters ___________________________________________________________________ Name: svn:ignore + logs sites.passivate.xml sites.xml gensites.sed passivate.kml passivate.xml Modified: usertools/persistent-coasters/setup.sh =================================================================== --- usertools/persistent-coasters/setup.sh 2011-01-12 19:34:07 UTC (rev 3964) +++ usertools/persistent-coasters/setup.sh 2011-01-12 19:35:11 UTC (rev 3965) @@ -7,9 +7,11 @@ source ./settings.sh export WORK + ${TOOLS}/gensites.sh ${WORK} sites.passivate-template.xml \ sites.passivate.xml [[ $? != 0 ]] && bail "sites problem (passivate)" && return + ${TOOLS}/gensites.sh ${WORK} sites.persistent.xml sites.xml [[ $? != 0 ]] && bail "sites problem (persistent)" && return From noreply at svn.ci.uchicago.edu Fri Jan 14 17:27:10 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 14 Jan 2011 17:27:10 -0600 (CST) Subject: [Swift-commit] r3966 - provenancedb Message-ID: <20110114232710.B9B7A9CC7F@svn.ci.uchicago.edu> Author: lgadelha Date: 2011-01-14 17:27:10 -0600 (Fri, 14 Jan 2011) New Revision: 3966 Modified: provenancedb/pql_functions.sql provenancedb/prov-to-sql.sh Log: Added generic annotation process using SWIFT_EXTRA_INFO from env profile. Modified: provenancedb/pql_functions.sql =================================================================== --- provenancedb/pql_functions.sql 2011-01-12 19:35:11 UTC (rev 3965) +++ provenancedb/pql_functions.sql 2011-01-14 23:27:10 UTC (rev 3966) @@ -152,15 +152,17 @@ -- recursive query to find ancestor entities in a provenance graph -create or replace function ancestors(varchar) returns setof varchar as $$ - with recursive anc(ancestor,descendant) as +CREATE OR REPLACE FUNCTION ancestors(varchar) RETURNS SETOF varchar AS $$ + WITH RECURSIVE anc(ancestor,descendant) AS ( - select parent as ancestor, child as descendant from parent_of where child=$1 - union all - select parent_of.parent as ancestor, anc.descendant as descendant - from anc,parent_of - where anc.ancestor=parent_of.child + SELECT parent AS ancestor, child AS descendant FROM parent_of WHERE child=$1 + UNION + SELECT parent_of.parent AS ancestor, anc.descendant AS descendant + FROM anc,parent_of + WHERE anc.ancestor=parent_of.child ) - select ancestor from anc -$$ language sql; + SELECT ancestor FROM anc +$$ LANGUAGE SQL; + + Modified: provenancedb/prov-to-sql.sh =================================================================== --- provenancedb/prov-to-sql.sh 2011-01-12 19:35:11 UTC (rev 3965) +++ provenancedb/prov-to-sql.sh 2011-01-14 23:27:10 UTC (rev 3966) @@ -9,22 +9,21 @@ echo Generating SQL for $RUNID -rm -f tmp-u.sql tmp-ds.sql tmp-p.sql tmp-e.sql tmp-e2.sql tmp-dsu.sql tmp-dsc.sql tmp-f.sql tmp-v.sql tmp-import.sql import.sql # this gives a distinction between the root process for a workflow and the # workflow itself. perhaps better to model the workflow as a process -echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-p.sql +echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" > tmp-p.sql while read time duration thread localthread endstate tr_name scratch; do echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-p.sql - echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-e.sql + echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" > tmp-e.sql done < execute.global.event while read start_time duration globalid id endstate thread site scratch; do # cut off the last component of the thread, so that we end up at the # parent thread id which should correspond with the execute-level ID inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')" - echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> tmp-e2.sql + echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" > tmp-e2.sql done < execute2.global.event while read col1 col2 col3 col4 col5 thread name lhs rhs result; do @@ -42,11 +41,11 @@ result=$(echo $result | sed -e 's/tag:benc at ci.uchicago.edu,2008:swift://g') fi - echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-ds.sql + echo "INSERT INTO dataset (id) VALUES ('$lhs');" > tmp-ds.sql echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-ds.sql echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-ds.sql echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-p.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-dsu.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" > tmp-dsu.sql echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-dsu.sql echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-dsu.sql done < operators.txt @@ -85,7 +84,7 @@ echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-ds.sql echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-ds.sql - echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-dsc.sql + echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" > tmp-dsc.sql done < tie-containers.txt while read dataset filename; do @@ -95,7 +94,7 @@ fi echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql - echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-f.sql + echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" > tmp-f.sql done < dataset-filenames.txt while read dataset value; do @@ -105,19 +104,39 @@ fi echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql - echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-v.sql + echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" > tmp-v.sql done < dataset-values.txt while read start duration wfid rest; do - echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" >> tmp-u.sql + echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" > tmp-u.sql echo "UPDATE workflow SET duration=$duration WHERE id='$WF';" >> tmp-u.sql done < workflow.event -#while read id extrainfo ; do -# TODO this will not like quotes and things like that in extrainfo -# echo "INSERT INTO extrainfo (id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql -#done < extrainfo.txt + +while read id extrainfo; do + echo $extrainfo | awk -F ";" '{ for (i = 1; i <= NF; i++) + print $i + }' | awk -F "=" '{ print $1 " " $2 }' | awk -F ":" '{ print $1 " " $2 }' > fields.txt + while read name type value; do + if [ "$type" = "num" ]; then + echo "INSERT INTO annot_p_num (id, name, value) VALUES ('$id', '$name', $value);" > tmp-import.sql + fi + if [ "$type" = "txt" ]; then + echo "INSERT INTO annot_p_txt (id, name, value) VALUES ('$id', '$name', '$value');" >> tmp-import.sql + fi + if [ "$type" = "bool" ]; then + echo "INSERT INTO annot_p_bool (id, name, value) VALUES ('$id', '$name', $value);" >> tmp-import.sql + fi + done < fields.txt +done < extrainfo.txt + + +while read id extrainfo ; do + + echo "INSERT INTO extrainfo (id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql +done < extrainfo.txt + # TODO this could merge with other naming tables while read start duration thread final_state procname ; do if [ "$duration" != "last-event-line" ]; then @@ -174,6 +193,7 @@ echo Sending SQL to DB $SQLCMD < import.sql +rm -f tmp-u.sql tmp-ds.sql tmp-p.sql tmp-e.sql tmp-e2.sql tmp-dsu.sql tmp-dsc.sql tmp-f.sql tmp-v.sql tmp-import.sql import.sql fields.txt echo Finished sending SQL to DB From noreply at svn.ci.uchicago.edu Fri Jan 14 21:08:22 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 14 Jan 2011 21:08:22 -0600 (CST) Subject: [Swift-commit] r3967 - SwiftApps/SwiftR/Swift/exec Message-ID: <20110115030822.770759CC94@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-14 21:08:22 -0600 (Fri, 14 Jan 2011) New Revision: 3967 Added: SwiftApps/SwiftR/Swift/exec/configure-server-sge Modified: SwiftApps/SwiftR/Swift/exec/start-swift Log: Changes for SGE on siraf. Needs further testing. Added: SwiftApps/SwiftR/Swift/exec/configure-server-sge =================================================================== --- SwiftApps/SwiftR/Swift/exec/configure-server-sge (rev 0) +++ SwiftApps/SwiftR/Swift/exec/configure-server-sge 2011-01-15 03:08:22 UTC (rev 3967) @@ -0,0 +1,85 @@ +#! /bin/bash + +# Generate Swift configuration files for SGE with manually-started Swift workers (passive coasters) + +cores=$1 +throttle=5.0 # allow approximately 500 concurrent jobs + +cat >tc <sites.xml < + + + + 0.15 + 10000 + + $(pwd)/swiftwork + file + + + + + passive + + 1 + $throttle + 10000 + + $HOME/swiftwork + /tmp/$USER/swiftwork + proxy + + + +END + +# Note abve: workdirectory for cluster must be on shared filesystem + +cat >cf < + + 300000 + 4 + 1 + 1 + 0.15 + 10000 + + $(pwd)/swiftwork + proxy + + + + + + + 8 + 1 + 1 + .15 + 10000 + proxy + /scratch/local/wilde/pstest/swiftwork + + +END Property changes on: SwiftApps/SwiftR/Swift/exec/configure-server-sge ___________________________________________________________________ Name: svn:executable + * Modified: SwiftApps/SwiftR/Swift/exec/start-swift =================================================================== --- SwiftApps/SwiftR/Swift/exec/start-swift 2011-01-14 23:27:10 UTC (rev 3966) +++ SwiftApps/SwiftR/Swift/exec/start-swift 2011-01-15 03:08:22 UTC (rev 3967) @@ -60,7 +60,7 @@ make-pbs-submit-file() { - if [ $queue != default ]; then + if [ $queue != NONE ]; then queueDirective="#PBS -q $queue" else queueDirective="" @@ -98,7 +98,7 @@ usage exit 1 fi - if [ $queue != default ]; then # FIXME: this will interfere if user really wants to use "-q default" + if [ $queue != NONE ]; then queueDirective="#PBS -q $queue" else queueDirective="" @@ -148,17 +148,23 @@ make-sge-submit-file() { echo in $0 - if [ $queue != default ]; then + if [ $queue != NONE ]; then queueDirective="#$ -q $queue" else queueDirective="" fi - if [ $project != default ]; then + if [ $project != NONE ]; then projectDirective="#$ -A $project" else projectDirective="" fi - rcmd="qrsh" # FIXME - need to set on system basis; qrsh works for siraf + if [ $perEnv != NONE ]; then + parEnvDirective="#$ -pe $parEnv $(($nodes*$cores)) + else + parEnvDirective="" + fi + +# rcmd="qrsh" # FIXME - need to set on system basis; qrsh works for siraf cat >batch.sub < Author: wilde Date: 2011-01-15 17:28:25 -0600 (Sat, 15 Jan 2011) New Revision: 3968 Modified: SwiftApps/SwiftR/Swift/exec/start-swift Log: Interim changes for SGE support, from siraf. Modified: SwiftApps/SwiftR/Swift/exec/start-swift =================================================================== --- SwiftApps/SwiftR/Swift/exec/start-swift 2011-01-15 03:08:22 UTC (rev 3967) +++ SwiftApps/SwiftR/Swift/exec/start-swift 2011-01-15 23:28:25 UTC (rev 3968) @@ -1,6 +1,6 @@ #! /bin/bash -set -x +# set -x export TRAPEVENTS="EXIT 1 2 3 15" # Signals and conditions to trap @@ -143,11 +143,10 @@ # FIXME: for big systems like Ranger, need to use ssh_tree to avoid socket FD exhastion? -echo about to define make-sge +echo def make-sge-submit-file make-sge-submit-file() { -echo in $0 if [ $queue != NONE ]; then queueDirective="#$ -q $queue" else @@ -158,14 +157,14 @@ else projectDirective="" fi - if [ $perEnv != NONE ]; then - parEnvDirective="#$ -pe $parEnv $(($nodes*$cores)) + if [ $parEnv != NONE ]; then + parEnvDirective="#$ -pe $parEnv $(($nodes*$cores))" else parEnvDirective="" fi # rcmd="qrsh" # FIXME - need to set on system basis; qrsh works for siraf - + cat >batch.sub <&2 "usage: $0 -c cores -h 'host1 ... hostN' -n nodes -q queue -s server -p parallelJobs -t walltime" - echo >&2 " valid servers: local, ssh, pbs, pbsf (pbs with firewalled workers)" - echo >&2 " defaults: cores=2 nodes=1 queue=none server=local parallelJobs=10 walltime=00:30:00 (hh:mm:ss) " + echo >&2 " valid servers: local, ssh, pbs, pbsf (for firewalled worker nodes), sge" + echo >&2 " defaults: cores=2 nodes=1 queue=none server=local parallelJobs=10 walltime=00:30:00 (hh:mm:ss)" } verify-is-one-of() @@ -294,7 +293,7 @@ parEnv=NONE rcmd=ssh # rcmd: ssh (typical) or qrsh (eg for siraf with node login restrictions) -workmode=slot # slot: start one worker on each slot; node: start on worker for all slots on a node +workmode=slot # slot: start one worker on each slot; node: start one worker for all slots on a node while [ $# -gt 0 ] do From noreply at svn.ci.uchicago.edu Sun Jan 16 15:55:36 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sun, 16 Jan 2011 15:55:36 -0600 (CST) Subject: [Swift-commit] r3969 - provenancedb Message-ID: <20110116215536.7863D9CC84@svn.ci.uchicago.edu> Author: lgadelha Date: 2011-01-16 15:55:36 -0600 (Sun, 16 Jan 2011) New Revision: 3969 Modified: provenancedb/prov-to-sql.sh Log: Minor fixes. Modified: provenancedb/prov-to-sql.sh =================================================================== --- provenancedb/prov-to-sql.sh 2011-01-15 23:28:25 UTC (rev 3968) +++ provenancedb/prov-to-sql.sh 2011-01-16 21:55:36 UTC (rev 3969) @@ -131,12 +131,6 @@ done < fields.txt done < extrainfo.txt - -while read id extrainfo ; do - - echo "INSERT INTO extrainfo (id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql -done < extrainfo.txt - # TODO this could merge with other naming tables while read start duration thread final_state procname ; do if [ "$duration" != "last-event-line" ]; then From noreply at svn.ci.uchicago.edu Mon Jan 17 09:55:02 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 09:55:02 -0600 (CST) Subject: [Swift-commit] r3970 - SwiftApps/SwiftR Message-ID: <20110117155502.73D1F9CC94@svn.ci.uchicago.edu> Author: tga Date: 2011-01-17 09:55:02 -0600 (Mon, 17 Jan 2011) New Revision: 3970 Modified: SwiftApps/SwiftR/TODO Log: Test commit - added blank lines to TODO Modified: SwiftApps/SwiftR/TODO =================================================================== --- SwiftApps/SwiftR/TODO 2011-01-16 21:55:36 UTC (rev 3969) +++ SwiftApps/SwiftR/TODO 2011-01-17 15:55:02 UTC (rev 3970) @@ -1,5 +1,7 @@ *** TO DO LIST: + + Thu: why script.pl appears in coasters dir for manual coasters? From noreply at svn.ci.uchicago.edu Mon Jan 17 13:18:58 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 13:18:58 -0600 (CST) Subject: [Swift-commit] r3971 - SwiftApps/SwiftR Message-ID: <20110117191858.E60049CC84@svn.ci.uchicago.edu> Author: tga Date: 2011-01-17 13:18:58 -0600 (Mon, 17 Jan 2011) New Revision: 3971 Added: SwiftApps/SwiftR/JOINT-TODO Log: Wrote up a TODO list based on discussion at today's meeting with MW. Added: SwiftApps/SwiftR/JOINT-TODO =================================================================== --- SwiftApps/SwiftR/JOINT-TODO (rev 0) +++ SwiftApps/SwiftR/JOINT-TODO 2011-01-17 19:18:58 UTC (rev 3971) @@ -0,0 +1,66 @@ + +SHORT-TERM GOALS +--------------- +* Stable on the following platforms: + - SGE + - Local multi-core + - PBS + - SSH +* No hanging on FIFO reads/writes +* Working on the following platforms for development use: + - Cobalt + - Slurm +* Speed competitive with Snowfall on basic tasks + + +TEST SUITE +----------- +1. get and try openmx examples +2. look at openmx test examples for test structure conventions +3. Test cleanup +4. performance tests (compare with Snowfall) + - vary durations, batching, and target environs + - vary input and output sizes + - plot performance surfaces + + +MANUAL TESTING +-------------- +1. test on local, ssh, pbs (then SGE) + - need SGE accounts for Tim +2. test fast branch + +PERFORMANCE OPTIMISATION +------------------------ +1. increase speed on basic per tests + +ARCHITECTURAL CHANGES +-------------------------- +1. add readSocket/writeSocket function to swift +2. replace clienr-R-to-rserver.swift fifo with socket +3. replace worker.pl-to-appScript.sh-to-serverR fifo with socket (later?) +4. consider karajan instead of swift program (discuss w/ Mihael and Allan) + +MISC SWIFTR ISSUES +------------------ +* replace ack with fprintf + +MISC SWIFT ISSUES +---------- +- coaster service & worker timeout - disable +- use fprintf() +- test fast branch +- eval raw swift and worker speed + + +BACKGROUND - TIM +---------------- +1. Read OpenMX Paper, Wiki + +ADMIN - TIM +---------- +* SGE accounts + -Get acct on IBI cluster - ci support + - apply for TeraGrid account to test on Ranger SGE system + - Eureka account + From noreply at svn.ci.uchicago.edu Mon Jan 17 14:29:26 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 14:29:26 -0600 (CST) Subject: [Swift-commit] r3972 - usertools/persistent-coasters Message-ID: <20110117202926.2B3589CC94@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 14:29:26 -0600 (Mon, 17 Jan 2011) New Revision: 3972 Added: usertools/persistent-coasters/tc.passivate.data Log: Adding Added: usertools/persistent-coasters/tc.passivate.data =================================================================== --- usertools/persistent-coasters/tc.passivate.data (rev 0) +++ usertools/persistent-coasters/tc.passivate.data 2011-01-17 20:29:26 UTC (rev 3972) @@ -0,0 +1,2 @@ +passivate sh /bin/sh null null null +passivate hostname /bin/hostname null null null From noreply at svn.ci.uchicago.edu Mon Jan 17 14:32:07 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 14:32:07 -0600 (CST) Subject: [Swift-commit] r3973 - usertools/persistent-coasters Message-ID: <20110117203207.952279CC94@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 14:32:07 -0600 (Mon, 17 Jan 2011) New Revision: 3973 Modified: usertools/persistent-coasters/run.zsh Log: Strange ZSH issue Modified: usertools/persistent-coasters/run.zsh =================================================================== --- usertools/persistent-coasters/run.zsh 2011-01-17 20:29:26 UTC (rev 3972) +++ usertools/persistent-coasters/run.zsh 2011-01-17 20:32:07 UTC (rev 3973) @@ -28,14 +28,14 @@ local OUT=$1 -# source workers-local.zsh -source workers-ssh.zsh +source workers-local.zsh +# source workers-ssh.zsh # Obtain the URL to which Swift should connect get_service_coasters() { local OUT=$1 - local LINE=() + LINE=() COUNT=0 for COUNT in {1..3} do @@ -58,7 +58,7 @@ get_service_local() { local OUT=$1 - local LINE=() + LINE=() COUNT=0 for COUNT in {1..4} do @@ -114,6 +114,7 @@ exitcode "Swift failed!" print "Swift finished." } & +SWIFT_PID=${!} sleep 1 From noreply at svn.ci.uchicago.edu Mon Jan 17 14:32:43 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 14:32:43 -0600 (CST) Subject: [Swift-commit] r3974 - usertools/persistent-coasters Message-ID: <20110117203243.ABEBE9CC94@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 14:32:43 -0600 (Mon, 17 Jan 2011) New Revision: 3974 Modified: usertools/persistent-coasters/workers-ssh.zsh Log: Probable fix for WORKER_HOSTS loop issue reported by David Modified: usertools/persistent-coasters/workers-ssh.zsh =================================================================== --- usertools/persistent-coasters/workers-ssh.zsh 2011-01-17 20:32:07 UTC (rev 3973) +++ usertools/persistent-coasters/workers-ssh.zsh 2011-01-17 20:32:43 UTC (rev 3974) @@ -10,7 +10,7 @@ local -Z 5 R=${RANDOM} ID="${TIMESTAMP}.${R}" - for MACHINE in ${WORKER_HOSTS} + for MACHINE in ${=WORKER_HOSTS} do pwd ssh ${MACHINE} ${WORKER} ${URI} ${MACHINE} ${LOGDIR} & From noreply at svn.ci.uchicago.edu Mon Jan 17 14:33:08 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 14:33:08 -0600 (CST) Subject: [Swift-commit] r3975 - usertools/persistent-coasters Message-ID: <20110117203308.16CFA9CC94@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 14:33:07 -0600 (Mon, 17 Jan 2011) New Revision: 3975 Modified: usertools/persistent-coasters/settings.sh Log: Put WORKER_HOSTS example in settings.sh Modified: usertools/persistent-coasters/settings.sh =================================================================== --- usertools/persistent-coasters/settings.sh 2011-01-17 20:32:43 UTC (rev 3974) +++ usertools/persistent-coasters/settings.sh 2011-01-17 20:33:07 UTC (rev 3975) @@ -5,5 +5,8 @@ NODES=64 +# WORKER_HOSTS="login1 login2" +export WORKER_HOSTS="$( print login{1,2}.mcs.anl.gov )" + QUEUE=prod-devel MAXTIME=$(( 20 )) From noreply at svn.ci.uchicago.edu Mon Jan 17 14:33:30 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 14:33:30 -0600 (CST) Subject: [Swift-commit] r3976 - usertools/persistent-coasters Message-ID: <20110117203330.234949CC94@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 14:33:30 -0600 (Mon, 17 Jan 2011) New Revision: 3976 Modified: usertools/persistent-coasters/README.txt Log: Minor README change Modified: usertools/persistent-coasters/README.txt =================================================================== --- usertools/persistent-coasters/README.txt 2011-01-17 20:33:07 UTC (rev 3975) +++ usertools/persistent-coasters/README.txt 2011-01-17 20:33:30 UTC (rev 3976) @@ -9,7 +9,7 @@ 5) Connect workers to CoasterService 6) Run Swift for application -Script layout: +Usage: Start run.zsh - run.zsh does the above steps From noreply at svn.ci.uchicago.edu Mon Jan 17 14:34:14 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 14:34:14 -0600 (CST) Subject: [Swift-commit] r3977 - usertools/persistent-coasters Message-ID: <20110117203414.52A419CC94@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 14:34:14 -0600 (Mon, 17 Jan 2011) New Revision: 3977 Added: usertools/persistent-coasters/start-service.zsh Removed: usertools/persistent-coasters/run.zsh Log: Rename main script Deleted: usertools/persistent-coasters/run.zsh =================================================================== --- usertools/persistent-coasters/run.zsh 2011-01-17 20:33:30 UTC (rev 3976) +++ usertools/persistent-coasters/run.zsh 2011-01-17 20:34:14 UTC (rev 3977) @@ -1,134 +0,0 @@ -#!/bin/zsh - -# If you get "Could not get coasters service" the -# service may already be running on SERVICE_PORT - -# Process management is incomplete here, check for -# processes with ps - -set -x - -PC=$( cd $( dirname $0 ) ; /bin/pwd ) -TOOLS=${PC} # In the future this might be ${SWIFT_HOME}/tools -source ${TOOLS}/helpers.zsh -[[ $? != 0 ]] && print "Could not load helpers.zsh!" && exit 1 - -[[ ${SWIFT} == "" ]] && SWIFT=$( which swift ) -SWIFT_BIN=$( dirname ${SWIFT} ) -COASTER_RESOURCES=${SWIFT_BIN}/../../../../provider-coaster/resources -WORKER=${COASTER_RESOURCES}/worker.pl - -export SERVICE_PORT=10985 - -LOGDIR=logs -mkdir -p ${LOGDIR} -exitcode - -SWIFT_OUT=${LOGDIR}/swift.out - -local OUT=$1 - -source workers-local.zsh -# source workers-ssh.zsh - -# Obtain the URL to which Swift should connect -get_service_coasters() -{ - local OUT=$1 - LINE=() - COUNT=0 - for COUNT in {1..3} - do - LINE=( $( grep --text "Started coaster service:" ${OUT} ) ) - if [[ ${#LINE} == 0 ]] then - grep Error ${OUT} >& /dev/null && return 1 - sleep 3 - else - break - fi - done - [[ ${#LINE} == 0 ]] && return 1 - - CONTACT=${LINE[-1]} - print ${CONTACT} - return 0 -} - -# Obtain the URL to which the workers should connect -get_service_local() -{ - local OUT=$1 - LINE=() - COUNT=0 - for COUNT in {1..4} - do - LINE=( $( grep "Passive queue processor" ${OUT} ) ) - if [[ ${#LINE} == 0 ]] then - sleep 4 - else - break - fi - if grep "Error" ${OUT} >& /dev/null - then - print "coaster-service error!" >&2 - cat ${OUT} >&2 - return 1 - fi - done - [[ ${#LINE} == 0 ]] && return 1 - - CONTACT=${LINE[-1]} - print ${CONTACT} - return 0 -} - -SIGNALS="EXIT INT QUIT" -cleanup_trap() -{ - print "cleanup_trap()..." - eval trap - ${SIGNALS} - [[ ${COASTER_SERVICE_PID} != "" ]] && kill ${COASTER_SERVICE_PID} - [[ ${STARTWORKERS_PID} != "" ]] && kill ${START_WORKERS_PID} -} -# eval trap cleanup_trap ${SIGNALS} - -SERVICE_LOG=${LOGDIR}/coaster-service.log -coaster-service -nosec -p ${SERVICE_PORT} >& ${SERVICE_LOG} & -COASTER_SERVICE_PID=${!} - -sleep 1 - -SERVICE_COASTERS=$( get_service_coasters ${SERVICE_LOG} ) -exitcode "Could not get coasters service!" -export SERVICE_COASTERS - -source setup.sh -exitcode "setup.sh failed!" - -sleep 1 - -{ ${SWIFT} -config swift.properties \ - -sites.file sites.passivate.xml \ - -tc.file tc.passivate.data \ - passivate.swift < /dev/null >& ${SWIFT_OUT} - exitcode "Swift failed!" - print "Swift finished." -} & -SWIFT_PID=${!} - -sleep 1 - -pwd -SERVICE_LOCAL=$( get_service_local ${SWIFT_OUT} ) -exitcode "get_service_local failed!" -export SERVICE_LOCAL - -sleep 1 - -print "Starting workers..." -start_workers ${SERVICE_LOCAL} & -START_WORKERS_PID=${!} - -sleep 1 - -exit 0 Copied: usertools/persistent-coasters/start-service.zsh (from rev 3973, usertools/persistent-coasters/run.zsh) =================================================================== --- usertools/persistent-coasters/start-service.zsh (rev 0) +++ usertools/persistent-coasters/start-service.zsh 2011-01-17 20:34:14 UTC (rev 3977) @@ -0,0 +1,134 @@ +#!/bin/zsh + +# If you get "Could not get coasters service" the +# service may already be running on SERVICE_PORT + +# Process management is incomplete here, check for +# processes with ps + +set -x + +PC=$( cd $( dirname $0 ) ; /bin/pwd ) +TOOLS=${PC} # In the future this might be ${SWIFT_HOME}/tools +source ${TOOLS}/helpers.zsh +[[ $? != 0 ]] && print "Could not load helpers.zsh!" && exit 1 + +[[ ${SWIFT} == "" ]] && SWIFT=$( which swift ) +SWIFT_BIN=$( dirname ${SWIFT} ) +COASTER_RESOURCES=${SWIFT_BIN}/../../../../provider-coaster/resources +WORKER=${COASTER_RESOURCES}/worker.pl + +export SERVICE_PORT=10985 + +LOGDIR=logs +mkdir -p ${LOGDIR} +exitcode + +SWIFT_OUT=${LOGDIR}/swift.out + +local OUT=$1 + +source workers-local.zsh +# source workers-ssh.zsh + +# Obtain the URL to which Swift should connect +get_service_coasters() +{ + local OUT=$1 + LINE=() + COUNT=0 + for COUNT in {1..3} + do + LINE=( $( grep --text "Started coaster service:" ${OUT} ) ) + if [[ ${#LINE} == 0 ]] then + grep Error ${OUT} >& /dev/null && return 1 + sleep 3 + else + break + fi + done + [[ ${#LINE} == 0 ]] && return 1 + + CONTACT=${LINE[-1]} + print ${CONTACT} + return 0 +} + +# Obtain the URL to which the workers should connect +get_service_local() +{ + local OUT=$1 + LINE=() + COUNT=0 + for COUNT in {1..4} + do + LINE=( $( grep "Passive queue processor" ${OUT} ) ) + if [[ ${#LINE} == 0 ]] then + sleep 4 + else + break + fi + if grep "Error" ${OUT} >& /dev/null + then + print "coaster-service error!" >&2 + cat ${OUT} >&2 + return 1 + fi + done + [[ ${#LINE} == 0 ]] && return 1 + + CONTACT=${LINE[-1]} + print ${CONTACT} + return 0 +} + +SIGNALS="EXIT INT QUIT" +cleanup_trap() +{ + print "cleanup_trap()..." + eval trap - ${SIGNALS} + [[ ${COASTER_SERVICE_PID} != "" ]] && kill ${COASTER_SERVICE_PID} + [[ ${STARTWORKERS_PID} != "" ]] && kill ${START_WORKERS_PID} +} +# eval trap cleanup_trap ${SIGNALS} + +SERVICE_LOG=${LOGDIR}/coaster-service.log +coaster-service -nosec -p ${SERVICE_PORT} >& ${SERVICE_LOG} & +COASTER_SERVICE_PID=${!} + +sleep 1 + +SERVICE_COASTERS=$( get_service_coasters ${SERVICE_LOG} ) +exitcode "Could not get coasters service!" +export SERVICE_COASTERS + +source setup.sh +exitcode "setup.sh failed!" + +sleep 1 + +{ ${SWIFT} -config swift.properties \ + -sites.file sites.passivate.xml \ + -tc.file tc.passivate.data \ + passivate.swift < /dev/null >& ${SWIFT_OUT} + exitcode "Swift failed!" + print "Swift finished." +} & +SWIFT_PID=${!} + +sleep 1 + +pwd +SERVICE_LOCAL=$( get_service_local ${SWIFT_OUT} ) +exitcode "get_service_local failed!" +export SERVICE_LOCAL + +sleep 1 + +print "Starting workers..." +start_workers ${SERVICE_LOCAL} & +START_WORKERS_PID=${!} + +sleep 1 + +exit 0 From noreply at svn.ci.uchicago.edu Mon Jan 17 14:35:12 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 14:35:12 -0600 (CST) Subject: [Swift-commit] r3978 - usertools/persistent-coasters Message-ID: <20110117203512.F38B19CC94@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 14:35:11 -0600 (Mon, 17 Jan 2011) New Revision: 3978 Modified: usertools/persistent-coasters/start-service.zsh usertools/persistent-coasters/workers-local.zsh usertools/persistent-coasters/workers-ssh.zsh Log: Rename API to start-workers() Modified: usertools/persistent-coasters/start-service.zsh =================================================================== --- usertools/persistent-coasters/start-service.zsh 2011-01-17 20:34:14 UTC (rev 3977) +++ usertools/persistent-coasters/start-service.zsh 2011-01-17 20:35:11 UTC (rev 3978) @@ -126,7 +126,7 @@ sleep 1 print "Starting workers..." -start_workers ${SERVICE_LOCAL} & +start-workers ${SERVICE_LOCAL} & START_WORKERS_PID=${!} sleep 1 Modified: usertools/persistent-coasters/workers-local.zsh =================================================================== --- usertools/persistent-coasters/workers-local.zsh 2011-01-17 20:34:14 UTC (rev 3977) +++ usertools/persistent-coasters/workers-local.zsh 2011-01-17 20:35:11 UTC (rev 3978) @@ -2,7 +2,7 @@ # Source this to get start_workers() for local # Just starts a local worker.pl process -start_workers() +start-workers() { local URI=$1 Modified: usertools/persistent-coasters/workers-ssh.zsh =================================================================== --- usertools/persistent-coasters/workers-ssh.zsh 2011-01-17 20:34:14 UTC (rev 3977) +++ usertools/persistent-coasters/workers-ssh.zsh 2011-01-17 20:35:11 UTC (rev 3978) @@ -2,7 +2,7 @@ # Source this to get start_workers() for ssh # hostnames must be in environment variable WORKER_HOSTS -start_workers() +start-workers() { local URI=$1 From noreply at svn.ci.uchicago.edu Mon Jan 17 15:22:52 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 15:22:52 -0600 (CST) Subject: [Swift-commit] r3979 - in SwiftApps/SwiftR/Swift: R exec Message-ID: <20110117212252.46F269CC84@svn.ci.uchicago.edu> Author: wilde Date: 2011-01-17 15:22:52 -0600 (Mon, 17 Jan 2011) New Revision: 3979 Modified: SwiftApps/SwiftR/Swift/R/Swift.R SwiftApps/SwiftR/Swift/exec/rserver.swift SwiftApps/SwiftR/Swift/exec/start-swift Log: changed ack to request fifo from app() to fprintf(). Added first perf test. Improvements to start-swift. Modified: SwiftApps/SwiftR/Swift/R/Swift.R =================================================================== --- SwiftApps/SwiftR/Swift/R/Swift.R 2011-01-17 20:35:11 UTC (rev 3978) +++ SwiftApps/SwiftR/Swift/R/Swift.R 2011-01-17 21:22:52 UTC (rev 3979) @@ -273,6 +273,37 @@ } } +# Performance tests + +swiftTest_6.1 <- function(delay,ncalls) +{ + options(swift.initialexpr="initVar3 <<- 123; initVar4 <<- 100"); + + timed <- function(delay) { Sys.sleep(delay); delay } + + args=list(delay) + arglist = rep(list(args),ncalls) + + cat("\nTest of swiftapply(delay,arglist)\n") + + startTime = proc.time()[["elapsed"]] + swiftres = swiftapply(timed,arglist) + endTime = proc.time()[["elapsed"]] + runTime <- endTime - startTime + + cat("\n\n ===> Total elapsed unit test time = ",runTime," seconds.\n\n") + + cat("Swift result:\n") + print(swiftres[[1]]) + + if(identical(delay,swiftres[[1]])) { + cat("\n==> test 6.1 passed\n") + } else { + cat("\n==> test 6.1 FAILED !!!!!\n") + } + +} + runAllSwiftTests <- function() { Modified: SwiftApps/SwiftR/Swift/exec/rserver.swift =================================================================== --- SwiftApps/SwiftR/Swift/exec/rserver.swift 2011-01-17 20:35:11 UTC (rev 3978) +++ SwiftApps/SwiftR/Swift/exec/rserver.swift 2011-01-17 21:22:52 UTC (rev 3979) @@ -52,7 +52,7 @@ # FIXME: read swiftserver dir via @args dir = readData(requestPipeName); # Reads direct from this local pipe. Assumes Swift started in right dir. - trace("rserver: got dir", dir); + # trace("rserver: got dir", dir); external wait[]; wait = apply(dir); @@ -64,6 +64,8 @@ # want: tracef("%k completed\n", "my/responsepipe.fifo", wait); # %k waits for wait to be fully closed; then sends formatted string to specified file(doing open, write, close). - ack(wait); + // ack(wait); + fprintf(resultPipeName, "%kdone\n", wait); + } until (done); Modified: SwiftApps/SwiftR/Swift/exec/start-swift =================================================================== --- SwiftApps/SwiftR/Swift/exec/start-swift 2011-01-17 20:35:11 UTC (rev 3978) +++ SwiftApps/SwiftR/Swift/exec/start-swift 2011-01-17 21:22:52 UTC (rev 3979) @@ -48,7 +48,7 @@ # fixme:send worker.pl to remote host via stdin or scp. ssh $host /bin/sh -c \'"mkdir -p $LOGDIR"\' scp $SWIFTBIN/worker.pl $host:$LOGDIR - ssh $host '/bin/sh -c '\'"WORKER_LOGGING_ENABLED=true $LOGDIR/worker.pl $CONTACT $ID $LOGDIR $IDLETIMEOUT 2>&1 & echo PID=\$!"\' >remotepid.$host &1 & echo PID=\$!"\' >remotepid.$host &2 "usage: $0 -c cores -h 'host1 ... hostN' -n nodes -q queue -s server -p parallelJobs -t walltime" - echo >&2 " valid servers: local, ssh, pbs, pbsf (for firewalled worker nodes), sge" - echo >&2 " defaults: cores=2 nodes=1 queue=none server=local parallelJobs=10 walltime=00:30:00 (hh:mm:ss)" -} - verify-is-one-of() { argname=$1 @@ -271,6 +262,40 @@ fi } +usage() +{ + cat <= 1 (default is: local 2; ssh 4; cluster 8) + -e parEnv site specific, SGE only + -h hosts 1 list of hosts, quoted as one argument, space separated + -m workmode node node: start one worker for all slots on a node; slot (one worker on each slot) + -n nodes 1 + -p throttle 10 >= 1 + -q queue site speific (PBS, SGE) + -r rcmd ssh site specific, SGE only, typically ssh. qrsh for siraf cluster + -s server local local, pbs, sge, ssh, pbsf (for firewalled worker nodes) + -t time 00:30:00 hh:mm:ss, for PBS and SGE only + -w wkloglvl NONE NONE, ERROR, WARN, INFO, DEBUG, TRACE + + Examples: + + Local: start-swift -c 4 + ssh: start-swift -s ssh -c 8 -h "crush stomp thwomp" + PBS: start-swift -s pbs -c 8 -n 4 -A CI-87654 -q short -t "02:00:00" + SGE: start-swift -s sge -c 8 -n 4 -A CI-87654 -q short -t "02:00:00" -m node -e smp + siraf: start-swift -s sge -c 8 -n 4 -A CI-87654 -q bigmem.q -t "02:00:00" -m node -e shm -r qrsh + +END +} + # main script tmp=${SWIFTR_TMP:-/tmp} @@ -283,14 +308,16 @@ queue=short #throttleOneCore="-0.001" FIXME: Remove #throttleOneCore="0.00" FIXME: Remove -localcores=5 cores=0 -defaultCores=4 +defaultLocalCores=2 +defaultSshCores=4 +defaultClusterCores=8 throttle=10 hosts=no-hosts-specified queue=NONE project=NONE parEnv=NONE +workerLogging=ERROR rcmd=ssh # rcmd: ssh (typical) or qrsh (eg for siraf with node login restrictions) workmode=slot # slot: start one worker on each slot; node: start one worker for all slots on a node @@ -309,13 +336,12 @@ -r) rcmd=$2; verify-is-one-of rcmd $rcmd ssh qrsh; shift ;; -s) server=$2; verify-is-one-of server $server local ssh pbs pbsf sge; shift ;; -t) time=$2; verify-not-null time $time; shift ;; + -w) workerLogging=$2; verify-is-one-of workerLoggingLevel $workerLogging NONE ERROR WARN INFO DEBUG TRACE; shift ;; *) usage; exit 1 ;; esac shift done -echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server throttle=$throttle - SWIFTRBIN=$(cd $(dirname $0); pwd) SWIFTBIN=$SWIFTRBIN/../swift/bin # This depends on ~/SwiftR/Swift/swift being a symlink to swift in RLibrary/Swift @@ -346,13 +372,20 @@ if [ $server = local ]; then + if [ $cores -eq 0 ]; then + cores=$defaultLocalCores + fi + echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server throttle=$throttle + + source $SWIFTRBIN/configure-server-local $cores elif [ $server = ssh ]; then if [ $cores -eq 0 ]; then - cores=$defaultRemoteCores + cores=$defaultSshCores fi + echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server throttle=$throttle source $SWIFTRBIN/configure-server-ssh $cores $time @@ -384,6 +417,11 @@ elif [ \( $server = pbs \) -o \( $server = pbsf \) -o \( $server = sge \) ]; then + if [ $cores -eq 0 ]; then + cores=$defaultClusterCores + fi + echo project=$project cores=$cores nodes=$nodes queue=$queue server=$server throttle=$throttle + source $SWIFTRBIN/configure-server-${server} $cores jobidfile=${out/stdouterr/jobid} From noreply at svn.ci.uchicago.edu Mon Jan 17 15:36:28 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 15:36:28 -0600 (CST) Subject: [Swift-commit] r3980 - usertools/persistent-coasters Message-ID: <20110117213628.37D209CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 15:36:28 -0600 (Mon, 17 Jan 2011) New Revision: 3980 Modified: usertools/persistent-coasters/settings.sh Log: Enable control of worker logging Modified: usertools/persistent-coasters/settings.sh =================================================================== --- usertools/persistent-coasters/settings.sh 2011-01-17 21:22:52 UTC (rev 3979) +++ usertools/persistent-coasters/settings.sh 2011-01-17 21:36:28 UTC (rev 3980) @@ -5,6 +5,8 @@ NODES=64 +export WORKER_LOGGING=INFO + # WORKER_HOSTS="login1 login2" export WORKER_HOSTS="$( print login{1,2}.mcs.anl.gov )" From noreply at svn.ci.uchicago.edu Mon Jan 17 15:37:11 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 15:37:11 -0600 (CST) Subject: [Swift-commit] r3981 - usertools/persistent-coasters Message-ID: <20110117213711.65F939CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 15:37:11 -0600 (Mon, 17 Jan 2011) New Revision: 3981 Modified: usertools/persistent-coasters/start-service.zsh Log: Create user sites and tc files (may become more interesting later) Modified: usertools/persistent-coasters/start-service.zsh =================================================================== --- usertools/persistent-coasters/start-service.zsh 2011-01-17 21:36:28 UTC (rev 3980) +++ usertools/persistent-coasters/start-service.zsh 2011-01-17 21:37:11 UTC (rev 3981) @@ -131,4 +131,9 @@ sleep 1 +cp sites.passivate.xml sites.xml +print "Created user sites file: sites.xml" +cp tc.passivate.data tc.data +print "Created user tc file: tc.data" + exit 0 From noreply at svn.ci.uchicago.edu Mon Jan 17 15:39:34 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 15:39:34 -0600 (CST) Subject: [Swift-commit] r3982 - usertools/persistent-coasters Message-ID: <20110117213934.01EE49CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 15:39:33 -0600 (Mon, 17 Jan 2011) New Revision: 3982 Added: usertools/persistent-coasters/run-job.zsh usertools/persistent-coasters/system-info.sh usertools/persistent-coasters/system-info.swift Log: Add a user job to test the service once started Added: usertools/persistent-coasters/run-job.zsh =================================================================== --- usertools/persistent-coasters/run-job.zsh (rev 0) +++ usertools/persistent-coasters/run-job.zsh 2011-01-17 21:39:33 UTC (rev 3982) @@ -0,0 +1,16 @@ +#!/bin/zsh + +# Launch a user job +# Use after running start-service.zsh + +APP=$( cd $( dirname $0 ) ; /bin/pwd ) +TOOLS=${APP} # In the future this might be ${SWIFT_HOME}/tools +source ${TOOLS}/helpers.zsh +[[ $? != 0 ]] && print "Could not load helpers.zsh!" && exit 1 + +SWIFT_OUT=logs/swift-user.out +swift -config swift.properties \ + -sites.file sites.xml \ + -tc.file tc.data \ + system-info.swift < /dev/null # >& ${SWIFT_OUT} +exitcode "Swift user job failed!" Property changes on: usertools/persistent-coasters/run-job.zsh ___________________________________________________________________ Name: svn:executable + * Added: usertools/persistent-coasters/system-info.sh =================================================================== --- usertools/persistent-coasters/system-info.sh (rev 0) +++ usertools/persistent-coasters/system-info.sh 2011-01-17 21:39:33 UTC (rev 3982) @@ -0,0 +1,10 @@ +#!/bin/bash + +# User script to be submitted by Swift using persistent Coasters +# Simply report some system info to stdout for diagnostics + +hostname +echo +uname -a + +exit 0 Property changes on: usertools/persistent-coasters/system-info.sh ___________________________________________________________________ Name: svn:executable + * Added: usertools/persistent-coasters/system-info.swift =================================================================== --- usertools/persistent-coasters/system-info.swift (rev 0) +++ usertools/persistent-coasters/system-info.swift 2011-01-17 21:39:33 UTC (rev 3982) @@ -0,0 +1,20 @@ + +/** + * Simple script that does nothing of consequence to + * the CoasterService. Settings are passed along + * which configure the CoasterService. + * + * We can do some diagnostic stuff here. + * */ + +type file; + +app (file output) passivate (file script) +{ + sh @script stdout=@output; +} + +file s<"system-info.sh">; +file o<"system-info.out">; + +o = passivate(s); From noreply at svn.ci.uchicago.edu Mon Jan 17 15:39:48 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 15:39:48 -0600 (CST) Subject: [Swift-commit] r3983 - usertools/persistent-coasters Message-ID: <20110117213948.9F8529CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 15:39:48 -0600 (Mon, 17 Jan 2011) New Revision: 3983 Removed: usertools/persistent-coasters/tc.data Log: This is now a generated file Deleted: usertools/persistent-coasters/tc.data =================================================================== --- usertools/persistent-coasters/tc.data 2011-01-17 21:39:33 UTC (rev 3982) +++ usertools/persistent-coasters/tc.data 2011-01-17 21:39:48 UTC (rev 3983) @@ -1 +0,0 @@ -cpc sh /bin/sh null null null From noreply at svn.ci.uchicago.edu Mon Jan 17 15:41:39 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 15:41:39 -0600 (CST) Subject: [Swift-commit] r3984 - usertools/persistent-coasters Message-ID: <20110117214139.AE0349CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 15:41:39 -0600 (Mon, 17 Jan 2011) New Revision: 3984 Modified: usertools/persistent-coasters/ Log: Set ignores Property changes on: usertools/persistent-coasters ___________________________________________________________________ Name: svn:ignore - logs sites.passivate.xml sites.xml gensites.sed passivate.kml passivate.xml + logs sites.passivate.xml sites.xml gensites.sed passivate.kml passivate.xml system-info.kml system-info.xml From noreply at svn.ci.uchicago.edu Mon Jan 17 15:48:16 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 15:48:16 -0600 (CST) Subject: [Swift-commit] r3985 - usertools/persistent-coasters Message-ID: <20110117214816.4D3659CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 15:48:16 -0600 (Mon, 17 Jan 2011) New Revision: 3985 Modified: usertools/persistent-coasters/README.txt usertools/persistent-coasters/settings.sh usertools/persistent-coasters/setup.sh usertools/persistent-coasters/start-service.zsh Log: Make the user source settings.sh manually. Add WORKER_MODE Modified: usertools/persistent-coasters/README.txt =================================================================== --- usertools/persistent-coasters/README.txt 2011-01-17 21:41:39 UTC (rev 3984) +++ usertools/persistent-coasters/README.txt 2011-01-17 21:48:16 UTC (rev 3985) @@ -1,6 +1,7 @@ Overview of persistent CoasterService process +0) Source settings.sh or a similar file 1) Start coaster service 2) Get URL to which Swift should connect from service output 3) Run Swift once to send settings to CoasterService, Modified: usertools/persistent-coasters/settings.sh =================================================================== --- usertools/persistent-coasters/settings.sh 2011-01-17 21:41:39 UTC (rev 3984) +++ usertools/persistent-coasters/settings.sh 2011-01-17 21:48:16 UTC (rev 3985) @@ -5,6 +5,9 @@ NODES=64 +# How to launch workers- local or ssh +export WORKER_MODE=local + export WORKER_LOGGING=INFO # WORKER_HOSTS="login1 login2" Modified: usertools/persistent-coasters/setup.sh =================================================================== --- usertools/persistent-coasters/setup.sh 2011-01-17 21:41:39 UTC (rev 3984) +++ usertools/persistent-coasters/setup.sh 2011-01-17 21:48:16 UTC (rev 3985) @@ -4,7 +4,6 @@ # Should extend to generate tc as well source ${TOOLS}/coasters-setup.sh -source ./settings.sh export WORK Modified: usertools/persistent-coasters/start-service.zsh =================================================================== --- usertools/persistent-coasters/start-service.zsh 2011-01-17 21:41:39 UTC (rev 3984) +++ usertools/persistent-coasters/start-service.zsh 2011-01-17 21:48:16 UTC (rev 3985) @@ -28,8 +28,12 @@ local OUT=$1 -source workers-local.zsh -# source workers-ssh.zsh +if [[ ${WORKER_MODE} == "local" ]] + then + source workers-local.zsh +elif [[ ${WORKER_MODE} == "ssh" ]] + source workers-ssh.zsh +fi # Obtain the URL to which Swift should connect get_service_coasters() From noreply at svn.ci.uchicago.edu Mon Jan 17 15:49:43 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 15:49:43 -0600 (CST) Subject: [Swift-commit] r3986 - usertools/persistent-coasters Message-ID: <20110117214943.BE14E9CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 15:49:43 -0600 (Mon, 17 Jan 2011) New Revision: 3986 Modified: usertools/persistent-coasters/settings.sh Log: Document settings Modified: usertools/persistent-coasters/settings.sh =================================================================== --- usertools/persistent-coasters/settings.sh 2011-01-17 21:48:16 UTC (rev 3985) +++ usertools/persistent-coasters/settings.sh 2011-01-17 21:49:43 UTC (rev 3986) @@ -1,17 +1,20 @@ # Keep all interesting settings in one place +# sites file workDirectory location WORK=${HOME}/work -NODES=64 - # How to launch workers- local or ssh export WORKER_MODE=local +# Worker logging setting passed to worker.pl export WORKER_LOGGING=INFO +# Worker host names for ssh # WORKER_HOSTS="login1 login2" export WORKER_HOSTS="$( print login{1,2}.mcs.anl.gov )" +# Some settings known to gensites +NODES=64 QUEUE=prod-devel MAXTIME=$(( 20 )) From noreply at svn.ci.uchicago.edu Mon Jan 17 16:11:02 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 16:11:02 -0600 (CST) Subject: [Swift-commit] r3987 - usertools/persistent-coasters Message-ID: <20110117221102.D9FC89CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-17 16:11:02 -0600 (Mon, 17 Jan 2011) New Revision: 3987 Modified: usertools/persistent-coasters/settings.sh usertools/persistent-coasters/start-service.zsh usertools/persistent-coasters/system-info.sh usertools/persistent-coasters/workers-ssh.zsh Log: Improvements to ssh mode Modified: usertools/persistent-coasters/settings.sh =================================================================== --- usertools/persistent-coasters/settings.sh 2011-01-17 21:49:43 UTC (rev 3986) +++ usertools/persistent-coasters/settings.sh 2011-01-17 22:11:02 UTC (rev 3987) @@ -1,11 +1,11 @@ # Keep all interesting settings in one place -# sites file workDirectory location -WORK=${HOME}/work +# Where to place/launch worker.pl on the remote machine +export WORKER_WORK=/home/${USER}/work # How to launch workers- local or ssh -export WORKER_MODE=local +export WORKER_MODE=ssh # Worker logging setting passed to worker.pl export WORKER_LOGGING=INFO @@ -18,3 +18,4 @@ NODES=64 QUEUE=prod-devel MAXTIME=$(( 20 )) +WORK=${HOME}/work Modified: usertools/persistent-coasters/start-service.zsh =================================================================== --- usertools/persistent-coasters/start-service.zsh 2011-01-17 21:49:43 UTC (rev 3986) +++ usertools/persistent-coasters/start-service.zsh 2011-01-17 22:11:02 UTC (rev 3987) @@ -6,8 +6,6 @@ # Process management is incomplete here, check for # processes with ps -set -x - PC=$( cd $( dirname $0 ) ; /bin/pwd ) TOOLS=${PC} # In the future this might be ${SWIFT_HOME}/tools source ${TOOLS}/helpers.zsh @@ -26,14 +24,17 @@ SWIFT_OUT=${LOGDIR}/swift.out -local OUT=$1 - if [[ ${WORKER_MODE} == "local" ]] then source workers-local.zsh elif [[ ${WORKER_MODE} == "ssh" ]] + then source workers-ssh.zsh +else + print "Unknown WORKER_MODE: ${WORKER_MODE}" + false fi +exitcode "Could not find start-workers()!" # Obtain the URL to which Swift should connect get_service_coasters() Modified: usertools/persistent-coasters/system-info.sh =================================================================== --- usertools/persistent-coasters/system-info.sh 2011-01-17 21:49:43 UTC (rev 3986) +++ usertools/persistent-coasters/system-info.sh 2011-01-17 22:11:02 UTC (rev 3987) @@ -4,6 +4,7 @@ # Simply report some system info to stdout for diagnostics hostname +hostname -d echo uname -a Modified: usertools/persistent-coasters/workers-ssh.zsh =================================================================== --- usertools/persistent-coasters/workers-ssh.zsh 2011-01-17 21:49:43 UTC (rev 3986) +++ usertools/persistent-coasters/workers-ssh.zsh 2011-01-17 22:11:02 UTC (rev 3987) @@ -13,7 +13,8 @@ for MACHINE in ${=WORKER_HOSTS} do pwd - ssh ${MACHINE} ${WORKER} ${URI} ${MACHINE} ${LOGDIR} & + scp ${WORKER} ${MACHINE}:${WORKER_WORK} + ssh ${MACHINE} ${WORKER_WORK}/worker.pl ${URI} ${MACHINE} ${LOGDIR} & done # TODO: manage these PIDs From noreply at svn.ci.uchicago.edu Mon Jan 17 19:45:05 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 19:45:05 -0600 (CST) Subject: [Swift-commit] r3988 - branches/release-0.92/tests/providers/local-pbs-coasters Message-ID: <20110118014505.9BC909CC84@svn.ci.uchicago.edu> Author: skenny Date: 2011-01-17 19:45:05 -0600 (Mon, 17 Jan 2011) New Revision: 3988 Modified: branches/release-0.92/tests/providers/local-pbs-coasters/001-catsn-pads-local-pbs-coasters.timeout Log: longer wait time needed to accommodate for longer queues Modified: branches/release-0.92/tests/providers/local-pbs-coasters/001-catsn-pads-local-pbs-coasters.timeout =================================================================== --- branches/release-0.92/tests/providers/local-pbs-coasters/001-catsn-pads-local-pbs-coasters.timeout 2011-01-17 22:11:02 UTC (rev 3987) +++ branches/release-0.92/tests/providers/local-pbs-coasters/001-catsn-pads-local-pbs-coasters.timeout 2011-01-18 01:45:05 UTC (rev 3988) @@ -1 +1,2 @@ -180 +7200 + From noreply at svn.ci.uchicago.edu Mon Jan 17 19:46:45 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 19:46:45 -0600 (CST) Subject: [Swift-commit] r3989 - branches/release-0.92/tests/providers/local-pbs-coasters Message-ID: <20110118014645.E766E9CC84@svn.ci.uchicago.edu> Author: skenny Date: 2011-01-17 19:46:45 -0600 (Mon, 17 Jan 2011) New Revision: 3989 Modified: branches/release-0.92/tests/providers/local-pbs-coasters/sites.template.xml Log: queue and project env variables added Modified: branches/release-0.92/tests/providers/local-pbs-coasters/sites.template.xml =================================================================== --- branches/release-0.92/tests/providers/local-pbs-coasters/sites.template.xml 2011-01-18 01:45:05 UTC (rev 3988) +++ branches/release-0.92/tests/providers/local-pbs-coasters/sites.template.xml 2011-01-18 01:46:45 UTC (rev 3989) @@ -2,14 +2,16 @@ - 3000 + 1 + 7200 1 1 1 1 - fast + _QUEUE_ 5.99 10000 + _PROJECT_ _WORK_ From noreply at svn.ci.uchicago.edu Mon Jan 17 19:48:33 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 19:48:33 -0600 (CST) Subject: [Swift-commit] r3990 - branches/release-0.92/tests/providers/local-pbs-coasters Message-ID: <20110118014833.2850A9CC84@svn.ci.uchicago.edu> Author: skenny Date: 2011-01-17 19:48:33 -0600 (Mon, 17 Jan 2011) New Revision: 3990 Modified: branches/release-0.92/tests/providers/local-pbs-coasters/tc.template.data Log: walltime set in sites file Modified: branches/release-0.92/tests/providers/local-pbs-coasters/tc.template.data =================================================================== --- branches/release-0.92/tests/providers/local-pbs-coasters/tc.template.data 2011-01-18 01:46:45 UTC (rev 3989) +++ branches/release-0.92/tests/providers/local-pbs-coasters/tc.template.data 2011-01-18 01:48:33 UTC (rev 3990) @@ -1,8 +1,8 @@ -local-pbs-coasters echo /bin/echo INSTALLED INTEL32::LINUX GLOBUS::maxwalltime="00:05:00" -local-pbs-coasters cat /bin/cat INSTALLED INTEL32::LINUX GLOBUS::maxwalltime="00:05:00" -local-pbs-coasters ls /bin/ls INSTALLED INTEL32::LINUX GLOBUS::maxwalltime="00:05:00" -local-pbs-coasters grep /bin/grep INSTALLED INTEL32::LINUX GLOBUS::maxwalltime="00:05:00" -local-pbs-coasters sort /bin/sort INSTALLED INTEL32::LINUX GLOBUS::maxwalltime="00:05:00" -local-pbs-coasters paste /bin/paste INSTALLED INTEL32::LINUX GLOBUS::maxwalltime="00:05:00" -local-pbs-coasters wc /usr/bin/wc INSTALLED INTEL32::LINUX GLOBUS::maxwalltime="00:05:00" +local-pbs-coasters echo /bin/echo INSTALLED INTEL32::LINUX null +local-pbs-coasters cat /bin/cat INSTALLED INTEL32::LINUX null +local-pbs-coasters ls /bin/ls INSTALLED INTEL32::LINUX null +local-pbs-coasters grep /bin/grep INSTALLED INTEL32::LINUX null +local-pbs-coasters sort /bin/sort INSTALLED INTEL32::LINUX null +local-pbs-coasters paste /bin/paste INSTALLED INTEL32::LINUX null +local-pbs-coasters wc /usr/bin/wc INSTALLED INTEL32::LINUX null From noreply at svn.ci.uchicago.edu Mon Jan 17 20:03:30 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 17 Jan 2011 20:03:30 -0600 (CST) Subject: [Swift-commit] r3991 - branches/release-0.92/tests/sites Message-ID: <20110118020330.644E39CC84@svn.ci.uchicago.edu> Author: skenny Date: 2011-01-17 20:03:30 -0600 (Mon, 17 Jan 2011) New Revision: 3991 Added: branches/release-0.92/tests/sites/pads-pbs-coasters.sh Log: for testing single site Added: branches/release-0.92/tests/sites/pads-pbs-coasters.sh =================================================================== --- branches/release-0.92/tests/sites/pads-pbs-coasters.sh (rev 0) +++ branches/release-0.92/tests/sites/pads-pbs-coasters.sh 2011-01-18 02:03:30 UTC (rev 3991) @@ -0,0 +1,6 @@ + +# GROUPLIST definition to run pbs tests + +GROUPLIST=( + $TESTDIR/providers/local-pbs-coasters \ +) From noreply at svn.ci.uchicago.edu Tue Jan 18 00:24:56 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 18 Jan 2011 00:24:56 -0600 (CST) Subject: [Swift-commit] r3992 - branches/release-0.92/tests Message-ID: <20110118062456.93E2B9CC9A@svn.ci.uchicago.edu> Author: skenny Date: 2011-01-18 00:24:55 -0600 (Tue, 18 Jan 2011) New Revision: 3992 Removed: branches/release-0.92/tests/README.txt Log: chaning to standard README Deleted: branches/release-0.92/tests/README.txt =================================================================== --- branches/release-0.92/tests/README.txt 2011-01-18 02:03:30 UTC (rev 3991) +++ branches/release-0.92/tests/README.txt 2011-01-18 06:24:55 UTC (rev 3992) @@ -1 +0,0 @@ -See nightly.sh for usage From noreply at svn.ci.uchicago.edu Tue Jan 18 00:25:36 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 18 Jan 2011 00:25:36 -0600 (CST) Subject: [Swift-commit] r3993 - branches/release-0.92/tests Message-ID: <20110118062536.425489CC9A@svn.ci.uchicago.edu> Author: skenny Date: 2011-01-18 00:25:35 -0600 (Tue, 18 Jan 2011) New Revision: 3993 Added: branches/release-0.92/tests/README Log: quick and dirty explanation of the scripts for running nightly tests Added: branches/release-0.92/tests/README =================================================================== --- branches/release-0.92/tests/README (rev 0) +++ branches/release-0.92/tests/README 2011-01-18 06:25:35 UTC (rev 3993) @@ -0,0 +1,15 @@ +nightly.sh: + See nightly.sh for usage + +run-nightly.sh: + wrapper for nightly.sh + env variables set may be customized by user + +meta.sh: + wrapper for run-nightly.sh + used to execute run-nightly/nightly.sh from a remote site using ssh + example usage: + meta.sh login.pads.ci.uchicago.edu /home/skenny/swift_runs/tests sites/pads-pbs-coasters.sh + + + From noreply at svn.ci.uchicago.edu Tue Jan 18 00:35:03 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 18 Jan 2011 00:35:03 -0600 (CST) Subject: [Swift-commit] r3994 - branches/release-0.92/tests Message-ID: <20110118063503.457549CC9A@svn.ci.uchicago.edu> Author: skenny Date: 2011-01-18 00:35:03 -0600 (Tue, 18 Jan 2011) New Revision: 3994 Modified: branches/release-0.92/tests/meta.sh Log: tested submitting from uci to pads Modified: branches/release-0.92/tests/meta.sh =================================================================== --- branches/release-0.92/tests/meta.sh 2011-01-18 06:25:35 UTC (rev 3993) +++ branches/release-0.92/tests/meta.sh 2011-01-18 06:35:03 UTC (rev 3994) @@ -1,12 +1,16 @@ #!/bin/bash -# Sketch of meta script +# runs run-nightly.sh (wrapper for nightly.sh) on a given site based on login -# Runs nightly.sh on various sites +SITE_LOGIN=$1 # e.g. login.pads.ci.uchicago.edu -DIR=$1 # E.g., /home/wozniak/nightly-tests +DIR=$2 # e.g., /home/skenny/swift_runs/tests -ssh intrepid.alcf.anl.gov $DIR/run-nightly.sh groups/group-intrepid.sh +TEST=$3 # e.g. sites/pads-pbs-coasters.sh -# Retrieve results -# scp ... +# run test and retrieve results + +RUNDIR=run-$( date +"%Y-%m-%d" ) + +ssh $SITE_LOGIN $DIR/run-nightly.sh $DIR/$TEST +scp -r $SITE_LOGIN:$RUNDIR . From noreply at svn.ci.uchicago.edu Tue Jan 18 10:29:25 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 18 Jan 2011 10:29:25 -0600 (CST) Subject: [Swift-commit] r3995 - SwiftApps/SwiftR Message-ID: <20110118162925.273879CC9A@svn.ci.uchicago.edu> Author: tga Date: 2011-01-18 10:29:24 -0600 (Tue, 18 Jan 2011) New Revision: 3995 Added: SwiftApps/SwiftR/Makefile Removed: SwiftApps/SwiftR/install.sh Log: Replace install.sh with a makefile, with separate build, install and publish targets. Also now it gets the version number automatically from the R package description file. Added: SwiftApps/SwiftR/Makefile =================================================================== --- SwiftApps/SwiftR/Makefile (rev 0) +++ SwiftApps/SwiftR/Makefile 2011-01-18 16:29:24 UTC (rev 3995) @@ -0,0 +1,35 @@ + +# Extract the version number from the R package description file +# There should be a line in the file of the form: +# Version: (major).(minor) +SWIFTR_VERSION=$(shell sed -n -r 's/^Version:(\W*)([[:digit:]]+\.[[:digit:]])/\2/p' Swift/DESCRIPTION) + +SW := $(shell which swift) +SWIFT_PATH := $(shell cd `dirname $(SW)`/..; pwd) + +TBALL=SWIFT_$(SWIFTR_VERSION).tar.gz + +all: build + + +build: $(TBALL) + + +publish: $(TBALL) + cp $(TBALL) ~/public_html + +install: $(TBALL) + R CMD INSTALL $(TBALL) + +clean: + rm -rf Swift/inst/swift/* + +$(TBALL): + mkdir -p Swift/inst/swift + cp -pr $(SWIFT_PATH)/* Swift/inst/swift + R CMD build Swift + +checkversion: + echo SwiftR version is $(SWIFTR_VERSION) according to Swift/DESCRIPTION file + + Deleted: SwiftApps/SwiftR/install.sh =================================================================== --- SwiftApps/SwiftR/install.sh 2011-01-18 06:35:03 UTC (rev 3994) +++ SwiftApps/SwiftR/install.sh 2011-01-18 16:29:24 UTC (rev 3995) @@ -1,8 +0,0 @@ -ver=0.1 -rm -rf Swift/inst/swift/* -mkdir -p Swift/inst/swift -SWIFTREL=$(cd $(dirname $(which swift))/..; pwd) -cp -pr $SWIFTREL/* Swift/inst/swift -R CMD build Swift -R CMD INSTALL Swift_${ver}.tar.gz -cp Swift_${ver}.tar.gz ~/public_html From noreply at svn.ci.uchicago.edu Tue Jan 18 10:35:23 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 18 Jan 2011 10:35:23 -0600 (CST) Subject: [Swift-commit] r3996 - SwiftApps/SwiftR Message-ID: <20110118163523.6C3E59CC9A@svn.ci.uchicago.edu> Author: tga Date: 2011-01-18 10:35:23 -0600 (Tue, 18 Jan 2011) New Revision: 3996 Modified: SwiftApps/SwiftR/Makefile Log: Bugfixes to makefile: clean now ensures that tarball is removed, and install now uses the correct package name. Modified: SwiftApps/SwiftR/Makefile =================================================================== --- SwiftApps/SwiftR/Makefile 2011-01-18 16:29:24 UTC (rev 3995) +++ SwiftApps/SwiftR/Makefile 2011-01-18 16:35:23 UTC (rev 3996) @@ -7,7 +7,7 @@ SW := $(shell which swift) SWIFT_PATH := $(shell cd `dirname $(SW)`/..; pwd) -TBALL=SWIFT_$(SWIFTR_VERSION).tar.gz +TBALL=Swift_$(SWIFTR_VERSION).tar.gz all: build @@ -23,6 +23,7 @@ clean: rm -rf Swift/inst/swift/* + rm $(TBALL) $(TBALL): mkdir -p Swift/inst/swift From noreply at svn.ci.uchicago.edu Tue Jan 18 11:30:56 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 18 Jan 2011 11:30:56 -0600 (CST) Subject: [Swift-commit] r3997 - SwiftApps/SwiftR Message-ID: <20110118173056.D39A09CC84@svn.ci.uchicago.edu> Author: tga Date: 2011-01-18 11:30:56 -0600 (Tue, 18 Jan 2011) New Revision: 3997 Modified: SwiftApps/SwiftR/Makefile Log: Tweaks to makefile: * Forces rebuild of SwiftR package always, without checking for modification * Avoids copying over swift directory unless you run clean Modified: SwiftApps/SwiftR/Makefile =================================================================== --- SwiftApps/SwiftR/Makefile 2011-01-18 16:35:23 UTC (rev 3996) +++ SwiftApps/SwiftR/Makefile 2011-01-18 17:30:56 UTC (rev 3997) @@ -12,8 +12,13 @@ all: build -build: $(TBALL) +build: Swift/inst/swift/bin/swift + R CMD build Swift +# Target to make sure that swift exists +Swift/inst/swift/bin/swift: + mkdir -p Swift/inst/swift + cp -pr $(SWIFT_PATH)/* Swift/inst/swift publish: $(TBALL) cp $(TBALL) ~/public_html @@ -25,10 +30,7 @@ rm -rf Swift/inst/swift/* rm $(TBALL) -$(TBALL): - mkdir -p Swift/inst/swift - cp -pr $(SWIFT_PATH)/* Swift/inst/swift - R CMD build Swift +$(TBALL): build checkversion: echo SwiftR version is $(SWIFTR_VERSION) according to Swift/DESCRIPTION file From noreply at svn.ci.uchicago.edu Tue Jan 18 17:59:39 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 18 Jan 2011 17:59:39 -0600 (CST) Subject: [Swift-commit] r3998 - branches/release-0.92/tests/groups Message-ID: <20110118235939.642329CC9A@svn.ci.uchicago.edu> Author: skenny Date: 2011-01-18 17:59:39 -0600 (Tue, 18 Jan 2011) New Revision: 3998 Added: branches/release-0.92/tests/groups/pads-pbs-coasters.sh Log: single test for pads coasters Added: branches/release-0.92/tests/groups/pads-pbs-coasters.sh =================================================================== --- branches/release-0.92/tests/groups/pads-pbs-coasters.sh (rev 0) +++ branches/release-0.92/tests/groups/pads-pbs-coasters.sh 2011-01-18 23:59:39 UTC (rev 3998) @@ -0,0 +1,6 @@ + +# GROUPLIST definition to run pbs tests + +GROUPLIST=( + $TESTDIR/providers/local-pbs-coasters \ +) From noreply at svn.ci.uchicago.edu Wed Jan 19 13:32:48 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 19 Jan 2011 13:32:48 -0600 (CST) Subject: [Swift-commit] r3999 - in SwiftApps/SwiftR/Swift: R exec Message-ID: <20110119193248.33C2E9CC84@svn.ci.uchicago.edu> Author: tga Date: 2011-01-19 13:32:47 -0600 (Wed, 19 Jan 2011) New Revision: 3999 Added: SwiftApps/SwiftR/Swift/R/Workers.R SwiftApps/SwiftR/Swift/exec/start-swift-daemon Modified: SwiftApps/SwiftR/Swift/R/Swift.R Log: Checking in work on launching start-swift within R. All logic to launch and track processes is present, but TERM signal is not correctly taking down the worker processes at this stage: need to fix. Modified: SwiftApps/SwiftR/Swift/R/Swift.R =================================================================== --- SwiftApps/SwiftR/Swift/R/Swift.R 2011-01-18 23:59:39 UTC (rev 3998) +++ SwiftApps/SwiftR/Swift/R/Swift.R 2011-01-19 19:32:47 UTC (rev 3999) @@ -193,6 +193,9 @@ options(swift.initialexpr=initcmds) # Set here; used in test group 4 } + + + swiftTest_1.1 <- function() { Added: SwiftApps/SwiftR/Swift/R/Workers.R =================================================================== --- SwiftApps/SwiftR/Swift/R/Workers.R (rev 0) +++ SwiftApps/SwiftR/Swift/R/Workers.R 2011-01-19 19:32:47 UTC (rev 3999) @@ -0,0 +1,147 @@ + + + +swiftInit <- function( cores=NULL, server=NULL, + hosts=NULL, nodes=NULL, project=NULL, + parEnv=NULL, workmode=NULL, + throttle=NULL, queue=NULL, + rcmd=NULL, time=NULL, + workerLogging=NULL ) +{ + #TODO: document function + # server: which server backend to use to acquire workers + # for example, local runs tasks on the local machine + # pbs, uses the PBS scheduler to obtain nodes on a cluster, + # etc + # hosts: list of hosts to use (for ssh backend) + # nodes: number of hosts to use (for cluster-based backends) + # cores: number of cores per host to use #TODO: check + # time: (pbs and sge servers only) walltime in hh:mm:ss + # Default is 30 minutes on these servers, unlimited + # elsewhere + # wkloglvl: logging level. Settings are NONE, ERROR, WARn, INFO, + # DEBUG, TRACE + # Options which are server and site-specific: + # project, queue + + + # In case it was somehow deleted + if (is.null(.swift.workers)) { + .swift.workers <<- list() + } + + # Find out where start-swift script lives in this + # R installation + # Presume UNIX path names - start-swift script + cmdString <- file.path(.find.package("Swift"), "exec/start-swift-daemon") + + if(! is.null(cores) ) { + cmdString <- paste(cmdString, "-c", cores) + } + if(! is.null(server) ) { + cmdString <- paste(cmdString, "-s", server) + } + if(! is.null(hosts) ) { + cmdString <- paste(cmdString, "-h", hosts) + } + if(! is.null(parEnv) ) { + cmdString <- paste(cmdString, "-e", parEnv) + } + if(! is.null(workmode) ) { + cmdString <- paste(cmdString, "-m", workmode) + } + if(! is.null(nodes) ) { + cmdString <- paste(cmdString, "-n", nodes) + } + if(! is.null(throttle) ) { + cmdString <- paste(cmdString, "-p", throttle) + } + if(! is.null(queue) ) { + cmdString <- paste(cmdString, "-q", queue) + } + if(! is.null(rcmd) ) { + cmdString <- paste(cmdString, "-r", rcmd) + } + if(! is.null(time) ) { + cmdString <- paste(cmdString, "-t", time) + } + if(! is.null(workerLogging) ) { + cmdString <- paste(cmdString, "-w", workerLogging) + } + + + + # launch asynchronously + # for now, we will rely on the shell script's output to inform + # the user if there was a problem with the workers + output <- system(cmdString, intern=TRUE) + cat("Started worker manager with pid ", output, "\n") + + # store pid + .swift.workers[[length(.swift.workers) + 1]] <<- output + + # add hook to ensure child process will be killed when + # this process exits + addHook() +} + +swiftShutdown <- function() +{ + if (is.null(.swift.workers)) { + return + } + cat("Shutting down Swift worker processes\n") + # shut down all worker processes using kill + for (pid in .swift.workers) { + cat("Killing ", pid, "\n") + killCmd <- paste("kill","-1", pid, " &> /dev/null") + cat(killCmd, "\n") + system(killCmd, wait=FALSE) + } + + .swift.workers <<- list() + +} + +.First.lib <- function(libname, packagename) { + # When the library is loaded, set up the + # list of workers + .swift.workers <<- list() +} + +.Last.lib <- function(p) +{ + # If the library is unloaded we need to do cleanup + swiftShutdown() +} + +# Hook to perform cleanup of workers upon shutting down an R +# session +addHook <- function() { + # Replace the user's last function with ours + # If .UserLast already exists don't worry about it + # as we've already added our hook + if (!exists(".UserLast")) { + if (!exists(".Last")) { + # Create a dummy function + .UserLast <<- function () {} + + } + else { + .UserLast <<- .Last + } + + .Last <<- function () { + swiftShutdown() + .UserLast() + removeHook() + } + } +} + +removeHook <- function() { + if (exists(".UserLast", where=".GlobalEnv")) { + .Last <<- .UserLast + rm(".UserLast", pos=".GlobalEnv") + } +} Added: SwiftApps/SwiftR/Swift/exec/start-swift-daemon =================================================================== --- SwiftApps/SwiftR/Swift/exec/start-swift-daemon (rev 0) +++ SwiftApps/SwiftR/Swift/exec/start-swift-daemon 2011-01-19 19:32:47 UTC (rev 3999) @@ -0,0 +1,14 @@ +#!/bin/sh +# This script is intended as a helper script to let +# R launch start-swift. It works around the limitations +# of the R system command, which cannot retrieve +# the process id of the child process. +# This script forks off a child process, detaches it +# and then, as the only thing written to stdout, echoes +# the pid of start-swift +ssscript=`dirname $0`/start-swift +# Start as detached daemon, with output going to stdout +$ssscript "$@" 1>&2 & +childpid=$! +echo ${childpid} +disown $childpid Property changes on: SwiftApps/SwiftR/Swift/exec/start-swift-daemon ___________________________________________________________________ Name: svn:executable + * From noreply at svn.ci.uchicago.edu Wed Jan 19 16:30:37 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 19 Jan 2011 16:30:37 -0600 (CST) Subject: [Swift-commit] r4000 - in SwiftApps/SwiftR/Swift: R exec Message-ID: <20110119223037.768BA9CC94@svn.ci.uchicago.edu> Author: tga Date: 2011-01-19 16:30:37 -0600 (Wed, 19 Jan 2011) New Revision: 4000 Added: SwiftApps/SwiftR/Swift/exec/killtree Modified: SwiftApps/SwiftR/Swift/R/Swift.R SwiftApps/SwiftR/Swift/R/Workers.R SwiftApps/SwiftR/Swift/exec/start-swift SwiftApps/SwiftR/Swift/exec/start-swift-daemon Log: Several improvements: * R now correctly sends signals to the appropriate worker processes when the session exists * worker processes remove their request pipes when they quit * if you run a swiftapply, the R code now checks whether a valid request pipe exists and can detect that nothing is running. Modified: SwiftApps/SwiftR/Swift/R/Swift.R =================================================================== --- SwiftApps/SwiftR/Swift/R/Swift.R 2011-01-19 19:32:47 UTC (rev 3999) +++ SwiftApps/SwiftR/Swift/R/Swift.R 2011-01-19 22:30:37 UTC (rev 4000) @@ -127,16 +127,22 @@ requestPipeName=paste(swiftServerDir,"/requestpipe",sep="") resultPipeName=paste(swiftServerDir,"/resultpipe",sep="") + if (file.exists(requestPipeName)) { + requestPipe <- fifo(requestPipeName,open="w",blocking=TRUE) + cat(file=requestPipe,paste(reqdir,"\n",sep="")) + close(requestPipe) - requestPipe <- fifo(requestPipeName,open="w",blocking=TRUE) - cat(file=requestPipe,paste(reqdir,"\n",sep="")) - close(requestPipe) + # Wait for reply from service - # Wait for reply from service - - resultPipe <- fifo(resultPipeName,open="r",blocking=TRUE) - resultStatus <- readLines(con=resultPipe,n=1,ok=TRUE) - close(resultPipe) + resultPipe <- fifo(resultPipeName,open="r",blocking=TRUE) + resultStatus <- readLines(con=resultPipe,n=1,ok=TRUE) + close(resultPipe) + } + else { + stop(paste("It appears no SwiftR servers of type", swiftserver, + "are running, as no request pipe exists in", + swiftServerDir,"exists")) + } } # Fetch the batch results Modified: SwiftApps/SwiftR/Swift/R/Workers.R =================================================================== --- SwiftApps/SwiftR/Swift/R/Workers.R 2011-01-19 19:32:47 UTC (rev 3999) +++ SwiftApps/SwiftR/Swift/R/Workers.R 2011-01-19 22:30:37 UTC (rev 4000) @@ -93,9 +93,8 @@ cat("Shutting down Swift worker processes\n") # shut down all worker processes using kill for (pid in .swift.workers) { - cat("Killing ", pid, "\n") - killCmd <- paste("kill","-1", pid, " &> /dev/null") - cat(killCmd, "\n") + cmdString <- file.path(.find.package("Swift"), "exec/killtree") + killCmd <- paste(cmdString,pid) system(killCmd, wait=FALSE) } Added: SwiftApps/SwiftR/Swift/exec/killtree =================================================================== --- SwiftApps/SwiftR/Swift/exec/killtree (rev 0) +++ SwiftApps/SwiftR/Swift/exec/killtree 2011-01-19 22:30:37 UTC (rev 4000) @@ -0,0 +1,19 @@ +#!/bin/sh +tokill=$1 +echo $tokill +while [ ! -z $tokill ] +do + children="" + for pid in $tokill + do + #echo $pid + newkids=`ps h -o pid --ppid $pid` + if [ ! -z $newkids ] + then + children="$children $newkids" + fi + #echo kids $children + kill $pid + done + tokill=$children +done Property changes on: SwiftApps/SwiftR/Swift/exec/killtree ___________________________________________________________________ Name: svn:executable + * Modified: SwiftApps/SwiftR/Swift/exec/start-swift =================================================================== --- SwiftApps/SwiftR/Swift/exec/start-swift 2011-01-19 19:32:47 UTC (rev 3999) +++ SwiftApps/SwiftR/Swift/exec/start-swift 2011-01-19 22:30:37 UTC (rev 4000) @@ -1,6 +1,6 @@ #! /bin/bash -# set -x +#set -x export TRAPEVENTS="EXIT 1 2 3 15" # Signals and conditions to trap @@ -370,6 +370,15 @@ out=swift.stdouterr touch $out + +# Reset signal handling behaviour to default: +# sending a HUP to this process should then trigger termination +# of all subprocess unless we change it by hand. This is added +# here to work around problems where parent processes had changed +# signal handlers +#trap " echo quitting; exit 0 " $TRAPEVENTS + + if [ $server = local ]; then if [ $cores -eq 0 ]; then @@ -379,7 +388,18 @@ source $SWIFTRBIN/configure-server-local $cores + function onexit { + # don't accept any more requests: unlink fifo from filesystem + if [ -p requestpipe ]; then + rm requestpipe + fi + exit 0 + } + TRAPEVENTS="EXIT 1 2 3 15" # Signals and conditions to trap + trap onexit $TRAPEVENTS + + elif [ $server = ssh ]; then if [ $cores -eq 0 ]; then @@ -394,6 +414,10 @@ TRAPEVENTS="EXIT 1 2 3 15" # Signals and conditions to trap function onexit { + # don't accept any more requests: unlink fifo from filesystem + if [ -p requestpipe ]; then + rm requestpipe + fi coasterservicepid="" # null: saved in case we go back to using coaster servers trap - $TRAPEVENTS sshpids=$(cat $sshpidfile) @@ -429,6 +453,10 @@ TRAPEVENTS="EXIT 1 2 3 15" # Signals and conditions to trap function onexit { + # don't accept any more requests: unlink fifo from filesystem + if [ -p requestpipe ]; then + rm requestpipe + fi coasterservicepid="" # null: saved in case we go back to using coaster servers trap - $TRAPEVENTS jobid=$(cat $jobidfile) Modified: SwiftApps/SwiftR/Swift/exec/start-swift-daemon =================================================================== --- SwiftApps/SwiftR/Swift/exec/start-swift-daemon 2011-01-19 19:32:47 UTC (rev 3999) +++ SwiftApps/SwiftR/Swift/exec/start-swift-daemon 2011-01-19 22:30:37 UTC (rev 4000) @@ -7,8 +7,14 @@ # and then, as the only thing written to stdout, echoes # the pid of start-swift ssscript=`dirname $0`/start-swift + # Start as detached daemon, with output going to stdout + + +# Start up a subprocess with a new process group +# childpid will be of form '[jobno] pid' $ssscript "$@" 1>&2 & childpid=$! + + echo ${childpid} -disown $childpid From noreply at svn.ci.uchicago.edu Thu Jan 20 08:51:55 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 20 Jan 2011 08:51:55 -0600 (CST) Subject: [Swift-commit] r4001 - SwiftApps/SwiftR/Swift/R Message-ID: <20110120145155.C82ED9CC84@svn.ci.uchicago.edu> Author: tga Date: 2011-01-20 08:51:54 -0600 (Thu, 20 Jan 2011) New Revision: 4001 Modified: SwiftApps/SwiftR/Swift/R/Swift.R SwiftApps/SwiftR/Swift/R/Workers.R Log: Minor changes: documented a race condition to be fixed in the future, made sure swiftInit got the server setting from the global options. Modified: SwiftApps/SwiftR/Swift/R/Swift.R =================================================================== --- SwiftApps/SwiftR/Swift/R/Swift.R 2011-01-19 22:30:37 UTC (rev 4000) +++ SwiftApps/SwiftR/Swift/R/Swift.R 2011-01-20 14:51:54 UTC (rev 4001) @@ -127,7 +127,16 @@ requestPipeName=paste(swiftServerDir,"/requestpipe",sep="") resultPipeName=paste(swiftServerDir,"/resultpipe",sep="") + + # fifo will block irrecoverably if there is no reader on the + # other end of the requestPipe. This is bad. The swift worker + # script is responsible for deleting the request pipe when it + # shuts down, so we know if the requestPipe still exists there + # should still be a worker (or the worker crashed in a funny way). if (file.exists(requestPipeName)) { + #TODO: there is a race condition here if the fifo disappears in + # between checking for existence and opening the fifo + requestPipe <- fifo(requestPipeName,open="w",blocking=TRUE) cat(file=requestPipe,paste(reqdir,"\n",sep="")) close(requestPipe) Modified: SwiftApps/SwiftR/Swift/R/Workers.R =================================================================== --- SwiftApps/SwiftR/Swift/R/Workers.R 2011-01-19 22:30:37 UTC (rev 4000) +++ SwiftApps/SwiftR/Swift/R/Workers.R 2011-01-20 14:51:54 UTC (rev 4001) @@ -38,6 +38,9 @@ if(! is.null(cores) ) { cmdString <- paste(cmdString, "-c", cores) } + + if(is.null(server)) + server <- getOption("swift.server") if(! is.null(server) ) { cmdString <- paste(cmdString, "-s", server) } From noreply at svn.ci.uchicago.edu Thu Jan 20 09:16:09 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 20 Jan 2011 09:16:09 -0600 (CST) Subject: [Swift-commit] r4002 - usertools Message-ID: <20110120151609.DD53F9CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-20 09:16:09 -0600 (Thu, 20 Jan 2011) New Revision: 4002 Added: usertools/worker-profile/ Log: Adding From noreply at svn.ci.uchicago.edu Thu Jan 20 09:18:28 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 20 Jan 2011 09:18:28 -0600 (CST) Subject: [Swift-commit] r4003 - usertools/worker-profile Message-ID: <20110120151828.0DBA29CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-20 09:18:27 -0600 (Thu, 20 Jan 2011) New Revision: 4003 Added: usertools/worker-profile/src/ Log: Adding From noreply at svn.ci.uchicago.edu Thu Jan 20 09:21:46 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 20 Jan 2011 09:21:46 -0600 (CST) Subject: [Swift-commit] r4004 - in usertools: . plotter plotter/src Message-ID: <20110120152146.CE4159CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-20 09:21:46 -0600 (Thu, 20 Jan 2011) New Revision: 4004 Added: usertools/plotter/ usertools/plotter/build.xml usertools/plotter/classpath.zsh usertools/plotter/lines.zsh usertools/plotter/src/ usertools/plotter/src/Bits.java usertools/plotter/src/LineReader.java usertools/plotter/src/Lines.java usertools/plotter/src/Util.java Log: Copy simple JFreeChart-based plotter from CDM Added: usertools/plotter/build.xml =================================================================== --- usertools/plotter/build.xml (rev 0) +++ usertools/plotter/build.xml 2011-01-20 15:21:46 UTC (rev 4004) @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + Added: usertools/plotter/classpath.zsh =================================================================== --- usertools/plotter/classpath.zsh (rev 0) +++ usertools/plotter/classpath.zsh 2011-01-20 15:21:46 UTC (rev 4004) @@ -0,0 +1,15 @@ + +# Source this to set up classpath for plotter runs +# classpath stored in ${CP} + +if [[ $( uname ) == CYGWIN* ]] +then + typeset -T CP cp ";" + cp+=( ${classpath} ) + cp+=${PLOTTERS}/src + cp+=( ${PLOTTERS}/lib/*.jar ) +else + typeset -T CP cp + CP=${CLASSPATH}:${PLOTTERS}/src + cp+=( ${PLOTTERS}/lib/*.jar ) +fi Added: usertools/plotter/lines.zsh =================================================================== --- usertools/plotter/lines.zsh (rev 0) +++ usertools/plotter/lines.zsh 2011-01-20 15:21:46 UTC (rev 4004) @@ -0,0 +1,14 @@ +#!/bin/zsh + +# LINES.ZSH +# Arguments passed directly to Lines.java +# usage: lines.zsh * + +PLOTTERS=$( dirname $0 ) + +# declare -p PLOTDIR + +source ${PLOTTERS}/classpath.zsh +[[ $? != 0 ]] && print "Could not build CLASSPATH!" && return 1 + +java -ea -cp ${CP} Lines ${*} Property changes on: usertools/plotter/lines.zsh ___________________________________________________________________ Name: svn:executable + * Added: usertools/plotter/src/Bits.java =================================================================== --- usertools/plotter/src/Bits.java (rev 0) +++ usertools/plotter/src/Bits.java 2011-01-20 15:21:46 UTC (rev 4004) @@ -0,0 +1,149 @@ + +import java.math.BigInteger; +import java.util.*; + +/** + * Provides random bits, statically. + * Kills VM exit code 2 on misuse. + * */ + +public class Bits +{ + public static Random rng = null; + static boolean locked = false; + static boolean ready = false; + + /** + If not locked, seed the generator with the clock. + */ + public static void init() + { + init(System.currentTimeMillis()); + } + + /** + If not locked, seed the generator with the clock mod 1000. + Useful for debugging. + @param print If true, print the seed. + */ + public static long init(boolean print) + { + long seed = System.currentTimeMillis() % 1000; + init(seed); + + if (print) + System.out.println("Bits.seed: " + seed); + + return seed; + } + + /** + If not locked, seed the generator + @param seed The seed. + */ + public static void init(long seed) + { + assert (!locked) : "Bits locked!"; + // System.out.println("Seeding RNG..."); + rng = new Random(seed); + ready = true; + } + + /** + Lock the generator to prevent seeding + */ + public static void lock() + { + locked = true; + } + + /** + Unlock the generator + */ + public static void unlock() + { + locked = false; + } + + public static double nextDouble() + { + if (! ready) + { + System.err.println("Bits not ready!"); + System.exit(2); + } + return rng.nextDouble(); + } + + public static double nextDouble(double d) + { + return rng.nextDouble() * d; + } + + public static int nextInt() + { + if (! ready) + { + System.err.println("Bits not ready!"); + System.exit(2); + } + return rng.nextInt(); + } + + /** + Return a integer in [0..t). + */ + public static int nextInt(int t) + { + double d = nextDouble(); + int i = (new Double(d * t)).intValue(); + return i; + } + + public static boolean nextBoolean() + { + if (! ready) + { + System.err.println("Bits not ready!"); + System.exit(2); + } + return rng.nextBoolean(); + } + + public static long nextLong() + { + if (! ready) + { + System.err.println("Bits not ready!"); + System.exit(2); + } + return rng.nextLong(); + } + + public static long nextLong(long t) + { + double d = nextDouble(); + long i = (new Double(d * t)).longValue(); + return i; + } + + public static BigInteger nextBigInteger(BigInteger i) + { + if (i.equals(BigInteger.ZERO)) + return BigInteger.ZERO; + + int b = i.bitLength()+1; + BigInteger result; + do + { + result = BigInteger.valueOf(b); + } while (result.compareTo(i) >= 0); + + return result; + } + + public static void nextBytes(byte[] bytes) + { + rng.nextBytes(bytes); + } +} Added: usertools/plotter/src/LineReader.java =================================================================== --- usertools/plotter/src/LineReader.java (rev 0) +++ usertools/plotter/src/LineReader.java 2011-01-20 15:21:46 UTC (rev 4004) @@ -0,0 +1,147 @@ + +import java.util.*; + +public class LineReader +{ + LineReader() + {} + + public static List read(java.io.File file) + throws java.io.FileNotFoundException + { + java.io.BufferedReader reader = + new java.io.BufferedReader(new java.io.FileReader(file)); + return read(reader); + } + + public static List read(String s) + { + java.io.BufferedReader reader = + new java.io.BufferedReader(new java.io.StringReader(s)); + return read(reader); + } + + public static List read(java.io.BufferedReader reader) + { + List result = new ArrayList(); + try + { + String prevline = ""; + String line = ""; + while ((line = reader.readLine()) != null) + { + int hash = line.indexOf("#"); + if (hash >= 0) + line = line.substring(0,hash); + line = spaces(line); + line = (prevline + " " + line).trim(); + if (line.endsWith("\\")) + { + line = line.substring(0, line.length()-2); + prevline = line; + continue; + } + else + { + prevline = ""; + line = line.trim(); + if (line.length() > 0) + result.add(line); + } + } + reader.close(); + } + catch (java.io.IOException e) + { + System.out.println("LineReader: I/O problem."); + return null; + } + return result; + } + + public static List tokens(List lines) + { + List result = new ArrayList(lines.size()); + for (String line : lines) + { + String[] tokens = tokenize(line); + result.add(tokens); + } + return result; + } + + public static int maxTokens(List tokens) + { + int result = 0; + for (String[] t : tokens) + if (t.length > result) + result = t.length; + return result; + } + + public static double[][] array(List lines) + { + List tokens = tokens(lines); + int columns = maxTokens(tokens); + int rows = lines.size(); + double[][] result = new double[rows][columns]; + for (int i = 0; i < rows; i++) + for (int j = 0; j < columns; j++) + { + try + { + String v = tokens.get(i)[j]; + double d = Double.parseDouble(v); + result[i][j] = d; + } + catch (NumberFormatException e) + { + throw new RuntimeException("Problem in row: " + i); + } + } + return result; + } + + public static String spaces(String line) + { + String result = ""; + for (int i = 0; i < line.length(); i++) + { + if (line.charAt(i) == '=' && + line.charAt(i+1) != '=' && + line.charAt(i-1) != '=') + { + result += " = "; + } + else + result += line.substring(i,i+1); + } + return result; + } + + public static String[] tokenize(String line) + { + if (line == null) + return null; + List words = new ArrayList(); + String[] ws = line.split("\\s"); + for (int i = 0; i < ws.length; i++) + if (ws[i].length() > 0) + words.add(ws[i]); + String[] result = new String[words.size()]; + for (int i = 0; i < words.size(); i++) + result[i] = words.get(i); + return result; + } + + /* + public static List list(String line) + { + List result = new ArrayList(); + String[] tokens = tokenize(line); + for (int i = 0; i < tokens.length; i++) + result.add(tokens[i]); + return result; + } + */ +} Added: usertools/plotter/src/Lines.java =================================================================== --- usertools/plotter/src/Lines.java (rev 0) +++ usertools/plotter/src/Lines.java 2011-01-20 15:21:46 UTC (rev 4004) @@ -0,0 +1,327 @@ + +import java.awt.Color; +import java.awt.geom.Rectangle2D; + +import java.io.*; +import java.util.*; + +import gnu.getopt.Getopt; + +import org.apache.commons.io.IOUtils; +import org.apache.xmlgraphics.java2d.ps.EPSDocumentGraphics2D; + +import org.jfree.chart.ChartFactory; +import org.jfree.chart.JFreeChart; +import org.jfree.chart.axis.NumberAxis; +import org.jfree.chart.plot.PlotOrientation; +import org.jfree.chart.plot.XYPlot; +import org.jfree.chart.renderer.xy.XYLineAndShapeRenderer; +import org.jfree.data.Range; +import org.jfree.data.general.Series; +import org.jfree.data.xy.XYSeriesCollection; + +public class Lines +{ + static Properties properties; + + public static boolean bw = false; + + static int w = 400; + static int h = 400; + + // null indicates the value was not set by the user + static Double xmin = null; + static Double xmax = null; + static Double ymin = null; + static Double ymax = null; + + /** + Generate simple plot. + @param collection The x,y data. + @param title Plot title. + @param xlabel X label text. + @param ylabel Y label text. + @param output EPS filename. + */ + public static boolean plot(XYSeriesCollection collection, + String title, String xlabel, + String ylabel, String output) + { + EPSDocumentGraphics2D g2d = null; + Rectangle2D.Double rectangle = null; + OutputStream out = null; + + try + { + out = new FileOutputStream(output); + out = new BufferedOutputStream(out); + + g2d = new EPSDocumentGraphics2D(false); + g2d.setGraphicContext + (new org.apache.xmlgraphics.java2d.GraphicContext()); + + rectangle = new Rectangle2D.Double(0, 0, w, h); + + g2d.setGraphicContext + (new org.apache.xmlgraphics.java2d.GraphicContext()); + g2d.setupDocument(out, w, h); + } + catch (IOException e) + { + System.out.println("Problem with file: " + output); + return false; + } + + final boolean withLegend = true; + + JFreeChart chart = + ChartFactory.createXYLineChart + (title, xlabel, ylabel, collection, + PlotOrientation.VERTICAL, withLegend, false, false); + + setupPlot(chart, collection); + chart.draw(g2d, rectangle); + + try + { + g2d.finish(); + } + catch (Exception e) + { + System.out.println("Err!"); + } + + IOUtils.closeQuietly(out); + System.out.println("PLOTTED: " + output); + + return true; + } + + private static void setupPlot(JFreeChart chart, + XYSeriesCollection collection) + { + XYPlot plot = chart.getXYPlot(); + XYLineAndShapeRenderer renderer = new XYLineAndShapeRenderer(); + if (bw) + for (int i = 0; i < plot.getSeriesCount(); i++) + renderer.setSeriesPaint(i, Color.BLACK); + for (int i = 0; i < plot.getSeriesCount(); i++) + { + Series series = collection.getSeries(i); + if (! showShape(series.getDescription())) + { + // System.out.println("invis"); + renderer.setSeriesShapesVisible(i, false); + } + } + + setAxes(plot); + plot.setRenderer(renderer); + plot.setBackgroundPaint(Color.WHITE); + } + + static void setAxes(XYPlot plot) + { + // Actual values: modify if necessary + double axmin, axmax, aymin, aymax; + + if (xmin != null || xmax != null) + { + NumberAxis axis = (NumberAxis) plot.getDomainAxis(); + Range range = axis.getRange(); + axmin = range.getLowerBound(); + axmax = range.getUpperBound(); + if (xmin != null) axmin = xmin; + if (xmax != null) axmax = xmax; + axis.setRange(axmin, axmax); + } + + if (ymin != null || ymax != null) + { + NumberAxis axis = (NumberAxis) plot.getRangeAxis(); + Range range = axis.getRange(); + aymin = range.getLowerBound(); + aymax = range.getUpperBound(); + if (ymin != null) aymin = ymin; + if (ymax != null) aymax = ymax; + axis.setRange(aymin, aymax); + } + } + + /** + Debugging only. + Args: Lines * + Reads title, xlabel, ylabel, and legend labels from properties: + e.g.: + title = Plot + xlabel = size + ylabel = speed + label.file.data = legend text + */ + public static void main(String[] args) + { + // Settings: + boolean verbose = false; + + Getopt g = new Getopt("testprog", args, "v"); + int c = -1; + while ((c = g.getopt()) != -1) + { + switch (c) + { + case 'v': + verbose = true; + } + } + + if (args.length < 3) + { + System.out.println + ("usage: [] *"); + System.exit(2); + } + + Bits.init(); + Util.verbose(verbose); + + String propFile = args[0]; + String output = args[1]; + List names = new ArrayList(); + for (int i = 2; i < args.length; i++) + names.add(args[i]); + + String title = null; + String xlabel = null; + String ylabel = null; + List data = new ArrayList(); + List labels = new ArrayList(); + + properties = new Properties(); + load(propFile); + title = properties.getProperty("title"); + xlabel = properties.getProperty("xlabel"); + ylabel = properties.getProperty("ylabel"); + + scanProperties(); + + for (String name : names) + { + File file = new File(name); + Util.verbose("open: " + file); + List lines = null; + try + { + lines = LineReader.read(file); + } + catch (FileNotFoundException e) + { + System.out.println("not found: " + file); + System.exit(1); + } + double[][] array = LineReader.array(lines); + data.add(array); + addLabel(name, labels); + Util.verbose("array:\n" + toString(array)); + } + + XYSeriesCollection collection = Util.collection(data, labels, + names); + + plot(collection, title, xlabel, ylabel, output); + } + + static void load(String propFile) + { + try + { + if (propFile.equals("-")) + properties.load(System.in); + else + properties.load(new FileInputStream(propFile)); + } + catch (FileNotFoundException e) + { + System.out.println(e); + System.exit(1); + } + catch (IOException e) + { + e.printStackTrace(); + System.exit(1); + } + } + + static void scanProperties() + { + String tmp; + tmp = properties.getProperty("width"); + if (tmp != null) + w = Integer.parseInt(tmp.trim()); + tmp = properties.getProperty("height"); + if (tmp != null) + h = Integer.parseInt(tmp.trim()); + tmp = properties.getProperty("xmin"); + if (tmp != null) + xmin = Double.parseDouble(tmp); + tmp = properties.getProperty("xmax"); + if (tmp != null) + xmax = Double.parseDouble(tmp); + tmp = properties.getProperty("ymin"); + if (tmp != null) + ymin = Double.parseDouble(tmp); + tmp = properties.getProperty("ymax"); + if (tmp != null) + ymax = Double.parseDouble(tmp); + } + + /** + Arrays.copyOfRange is a Java 1.6 feature. + This has the same signature. + */ + /* + static String[] select(String[] s, int p, int q) + { + String[] result = new String[q-p]; + int j = 0; + for (int i = p; i < q; i++) + result[j++] = s[i]; + return result; + } + */ + + static void addLabel(String name, + List labels) + { + String label = properties.getProperty("label."+name); + if (label == null) + label = ""; + labels.add(label); + } + + static boolean showShape(String name) + { + // System.out.println(name); + String mode = properties.getProperty("shape."+name); + // System.out.println(mode); + if ("none".equals(mode)) + return false; + return true; + } + + static String toString(double[][] array) + { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < array.length; i++) + { + double[] row = array[i]; + for (int j = 0; j < row.length; j++) + { + sb.append(array[i][j]); + if (j < row.length-1) + sb.append(" "); + } + sb.append("\n"); + } + return sb.toString(); + } +} Added: usertools/plotter/src/Util.java =================================================================== --- usertools/plotter/src/Util.java (rev 0) +++ usertools/plotter/src/Util.java 2011-01-20 15:21:46 UTC (rev 4004) @@ -0,0 +1,57 @@ + +/** + * Plot data helpers. + * */ + +import java.util.List; + +import org.jfree.data.xy.XYSeries; +import org.jfree.data.xy.XYSeriesCollection; + +public class Util +{ + static boolean v = false; + + /** + Stores the label as the Series key. + Stores the filename as the Series description. + */ + static XYSeriesCollection collection(List data, + List labels, + List names) + { + final XYSeriesCollection collection = new XYSeriesCollection(); + + int count = 0; + for (double[][] d : data) + { + String label = "data: " + count; + try + { + label = labels.get(count); + } + catch (IndexOutOfBoundsException e) + {} + + XYSeries series = new XYSeries(label); + for (int i = 0; i < d.length; i++) + series.add(d[i][0], d[i][1]); + + series.setDescription(names.get(count)); + collection.addSeries(series); + count++; + } + return collection; + } + + public static void verbose(String s) + { + if (v) + System.out.println(s); + } + + public static void verbose(boolean status) + { + v = status; + } +} From noreply at svn.ci.uchicago.edu Thu Jan 20 09:22:06 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 20 Jan 2011 09:22:06 -0600 (CST) Subject: [Swift-commit] r4005 - usertools/worker-profile Message-ID: <20110120152206.614369CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-20 09:22:06 -0600 (Thu, 20 Jan 2011) New Revision: 4005 Removed: usertools/worker-profile/src/ Log: Drop src From noreply at svn.ci.uchicago.edu Thu Jan 20 09:35:55 2011 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 20 Jan 2011 09:35:55 -0600 (CST) Subject: [Swift-commit] r4006 - usertools/worker-profile Message-ID: <20110120153555.450859CC84@svn.ci.uchicago.edu> Author: wozniak Date: 2011-01-20 09:35:55 -0600 (Thu, 20 Jan 2011) New Revision: 4006 Added: usertools/worker-profile/worker_jobs.zsh usertools/worker-profile/worker_jobs_duration.zsh usertools/worker-profile/worker_jobs_lib.zsh usertools/worker-profile/worker_profile_extract.zsh usertools/worker-profile/worker_profile_util.zsh Log: Import worker profile plot data tools Added: usertools/worker-profile/worker_jobs.zsh =================================================================== --- usertools/worker-profile/worker_jobs.zsh (rev 0) +++ usertools/worker-profile/worker_jobs.zsh 2011-01-20 15:35:55 UTC (rev 4006) @@ -0,0 +1,22 @@ +#!/bin/zsh + +# Lists worker job counts +# Uses worker_jobs_lib.zsh + +if [[ ${#*} == 0 ]] +then + print "Lists worker job counts" + print "usage: worker_jobs.zsh " +fi + +PLOTS=$( dirname $0 ) +TOOLS=${PLOTS}/../tools + +source ${TOOLS}/helpers.zsh +[[ $? != 0 ]] && print "Could not source helpers.zsh!" && exit 1 + +source ${PLOTS}/worker_jobs_lib.zsh + +worker_jobs ${*} > ${RESULT} + +return 0 Property changes on: usertools/worker-profile/worker_jobs.zsh ___________________________________________________________________ Name: svn:executable + * Added: usertools/worker-profile/worker_jobs_duration.zsh =================================================================== --- usertools/worker-profile/worker_jobs_duration.zsh (rev 0) +++ usertools/worker-profile/worker_jobs_duration.zsh 2011-01-20 15:35:55 UTC (rev 4006) @@ -0,0 +1,30 @@ +#!/bin/zsh + +# Lists worker job counts, sorted for plotting +# usage: worker_jobs_spectrum.zsh +# The result is a tabular file compatible with plotter/lines.zsh + +if [[ ${#*} == 0 ]] +then + print "Lists worker job counts, sorted for plotting" + print "usage: worker_jobs_spectrum.zsh " + return 1 +fi + +PLOTS=$( dirname $0 ) +TOOLS=${PLOTS}/../tools + +source ${TOOLS}/helpers.zsh +[[ $? != 0 ]] && print "Could not source helpers.zsh!" && exit 1 + +source ${PLOTS}/worker_jobs_lib.zsh + +RESULT=$1 +shift +LOGS=${*} + +# Output: +worker_jobs ${LOGS} | sort -k 2 | nl -w1 | \ + awk '{ print $1 " " $3 " # " $2 " " $4 }' > ${RESULT} + +return 0 Property changes on: usertools/worker-profile/worker_jobs_duration.zsh ___________________________________________________________________ Name: svn:executable + * Added: usertools/worker-profile/worker_jobs_lib.zsh =================================================================== --- usertools/worker-profile/worker_jobs_lib.zsh (rev 0) +++ usertools/worker-profile/worker_jobs_lib.zsh 2011-01-20 15:35:55 UTC (rev 4006) @@ -0,0 +1,23 @@ + +# Lists worker job counts from all given worker logs +# Should just require log level INFO +worker_jobs() +{ + if [[ ${#*} == 0 ]] + then + print "Lists worker job counts" + print "usage: worker_jobs.zsh " + fi + + RESULT=$1 + shift + LOGS=${*} + for LOG in ${LOGS} + do + ID=$( sed -n '/.*ID.*/{s/.*ID=\(.*\)/\1/;p;q}' ${LOG} ) + JOBS=$( sed -n '/.*Ran a total.*/{s/.*of \(.*\) jobs/\1/;p;q}' ${LOG} ) + print ${ID} ${JOBS} ${LOG} + done + + return 0 +} Added: usertools/worker-profile/worker_profile_extract.zsh =================================================================== --- usertools/worker-profile/worker_profile_extract.zsh (rev 0) +++ usertools/worker-profile/worker_profile_extract.zsh 2011-01-20 15:35:55 UTC (rev 4006) @@ -0,0 +1,19 @@ +#!/bin/zsh + +# Extract the PROFILE: lines from the end of the worker log +# Requires you to set $PROFILE=1 in worker.pl + +PLOTS=$( dirname $0 ) +TOOLS=${PLOTS}/../tools + +source ${TOOLS}/helpers.zsh +[[ $? != 0 ]] && print "Could not source helpers.zsh!" && exit 1 + +LOG=$1 +PROFILE=$2 + +checkvars LOG PROFILE + +grep "PROFILE:" ${LOG} | awk '{ print $5 " " $6 " " $7 }' > ${PROFILE} + +exit 0 Property changes on: usertools/worker-profile/worker_profile_extract.zsh ___________________________________________________________________ Name: svn:executable + * Added: usertools/worker-profile/worker_profile_util.zsh =================================================================== --- usertools/worker-profile/worker_profile_util.zsh (rev 0) +++ usertools/worker-profile/worker_profile_util.zsh 2011-01-20 15:35:55 UTC (rev 4006) @@ -0,0 +1,95 @@ +#!/bin/zsh + +# WORKER PROFILE UTILIZATION +# Build a data file for plotting from a worker.pl profile log +# 2-column output format: