[Swift-commit] r5980 - SwiftApps/EpiSnp

wilde at ci.uchicago.edu wilde at ci.uchicago.edu
Fri Oct 19 15:43:18 CDT 2012


Author: wilde
Date: 2012-10-19 15:43:18 -0500 (Fri, 19 Oct 2012)
New Revision: 5980

Added:
   SwiftApps/EpiSnp/cf.midway
   SwiftApps/EpiSnp/local.xml
   SwiftApps/EpiSnp/midway.xml
Removed:
   SwiftApps/EpiSnp/cf
   SwiftApps/EpiSnp/sites.xml
Modified:
   SwiftApps/EpiSnp/README
   SwiftApps/EpiSnp/episnp.swift
   SwiftApps/EpiSnp/runepisnp.sh
   SwiftApps/EpiSnp/tc
Log:
Add midway capability and extra app parameters

Modified: SwiftApps/EpiSnp/README
===================================================================
--- SwiftApps/EpiSnp/README	2012-10-19 17:34:48 UTC (rev 5979)
+++ SwiftApps/EpiSnp/README	2012-10-19 20:43:18 UTC (rev 5980)
@@ -1,5 +1,10 @@
 To run:
 
-  ./runepisnp.sh N
+  ./runepisnp.sh sitename nRuns initSingle initPairs
 
-where N is the number of app calls to run.
+where:
+
+  site is local or midway
+  nRuns is the number of EpiSnp application invocations to run (default 1)
+  initSingle is the base value of parameter output results for single SNP tests  (default:1000)
+  initPairs is the base value of parameter output results for pairwise SNP tests (default:10000)

Deleted: SwiftApps/EpiSnp/cf
===================================================================
--- SwiftApps/EpiSnp/cf	2012-10-19 17:34:48 UTC (rev 5979)
+++ SwiftApps/EpiSnp/cf	2012-10-19 20:43:18 UTC (rev 5980)
@@ -1,361 +0,0 @@
-sites.file=${swift.home}/etc/sites.xml
-tc.file=${swift.home}/etc/tc.data
-
-#
-# The host name of the submit machine is used by GRAM as a callback
-# address to report the status of submitted jobs. In general, Swift
-# can automatically detect the host name of the local machine. 
-# However, if the machine host name is improperly configured or if
-# it does not represent a valid DNS entry, certain services (such as
-# GRAM) will not be able to send job status notifications back to 
-# the client. The value of this property can be an IP address.
-#
-# Format:
-#    hostname=string
-#
-
-
-#hostname=localhost
-
-#
-# A TCP port range can be specified to restrict the ports on which GRAM
-# callback services are started. This is likely needed if your submit
-# host is behind a firewall, in which case the firewall should be 
-# configured to allow incoming connections on ports in the range.
-#
-# Format:
-#     tcp.port.range=start,end
-#
-
-#tcp.port.range=50000,50100
-
-#
-# false	- means an error will be immediately reported and cause the
-# 		workflow to abort. At this time remote jobs that are already
-#		running will not be canceled
-# true	- means that Swift will try to do as much work as possible and 
-#		report all errors encountered at the end. However, "errors"
-#		here only applies to job execution errors. Certain errors
-#		that are related to the Swift implementation (should such 
-#		errors occur) will still be reported eagerly.
-#
-# Default: false
-#
-lazy.errors=false
-
-#
-# What algorithm to use for caching of remote files. LRU (as in what
-# files to purge) is the only implementation right now. One can set
-# a target size (in bytes) for a host by using the swift:storagesize 
-# profile for a host in sites.xml
-#
-# Default: LRU
-#
-caching.algorithm=LRU
-
-#
-# true       - generate a provenance graph in .dot format (Swift will
-#			 choose a random file name)
-# false      - do not generate a provenance graph 
-# <filename> - generate a provenange graph in the give file name
-#
-# Default: false
-#
-pgraph=false
-
-
-#
-# graph properties for the provenance graph (.dot specific) 
-#
-# Default: splines="compound", rankdir="TB"
-#
-pgraph.graph.options=splines="compound", rankdir="TB"
-
-
-#
-# node properties for the provenance graph (.dot specific) 
-#
-# Default: color="seagreen", style="filled"
-#
-pgraph.node.options=color="seagreen", style="filled"
-
-#
-# true	- clustering of small jobs is enabled. Clustering works in the 
-#       following way: If a job is clusterable (meaning that it has the
-#       GLOBUS::maxwalltime profile specified in tc.data and its value
-#       is less than the value of the "clustering.min.time" property) it will
-#       be put in a clustering queue. The queue is processed at intervals 
-#       specified by the "clustering.queue.delay" property. The processing
-#       of the clustering queue consists of selecting compatible jobs and
-#		grouping them in clusters whose max wall time does not exceed twice
-#       the value of the "clustering.min.time" property. Two or more jobs are 
-#       considered compatible if they share the same site and do not have
-#       conflicting profiles (e.g. different values for the same environment
-#       variable). 
-# false	- clustering of small jobs is disabled.
-#
-# Default: false
-#
-clustering.enabled=false
-
-
-#
-# <seconds>	- the intervals at which the clustering queue is processed
-#
-# Default: 4
-#
-clustering.queue.delay=4
-
-#
-# <seconds>	- the threshold time for clustering
-#
-# Default: 60
-#
-clustering.min.time=60
-
-#
-# Kickstart is a useful tool that can be used to gather various information
-# about a remote process. Before it can be used it must be installed on the
-# remote site and the corresponding entry be set in the sites file.
-# This option allows controlling of how Swift uses Kickstart. The following
-# values are possible:
-# false - do not use Kickstart
-# true  - use Kickstart. If a job is scheduled on a site that does not have
-#       Kickstart installed, that job will fail.
-# maybe - Use Kickstart if installed (i.e. the entry is present in the sites
-#       file) 
-#
-# Default: maybe
-#
-
-kickstart.enabled=maybe
-
-#
-# Indicates when Kickstart records should be fetched from the remote site:
-# true	- always transfer Kickstart records if Kickstart was used (see
-#		kickstart.enabled)
-# false	- only transfer Kickstart records if the job fails
-#
-# Default: false
-#
-
-kickstart.always.transfer=false
-
-#
-# Indicates when wrapper logs should be fetched from the remote site:
-# true	- always transfer wrapper logs
-# false	- only transfer wrapper logs if the job fails
-#
-# Default: false
-#
-
-wrapperlog.always.transfer=false
-
-###########################################################################
-#                          Throttling options                             #
-###########################################################################
-#
-# For the throttling parameters, valid values are either a positive integer
-# or "off" (without the quotes).
-#
-
-#
-# Limits the number of concurrent submissions for a workflow instance. This
-# throttle only limits the number of concurrent tasks (jobs) that are being
-# sent to sites, not the total number of concurrent jobs that can be run.
-# The submission stage in GRAM is one of the most CPU expensive stages (due
-# mostly to the mutual authentication and delegation). Having too many 
-# concurrent submissions can overload either or both the submit host CPU
-# and the remote host/head node causing degraded performance.     
-#
-# Default: 4
-#
-
-throttle.submit=4
-#throttle.submit=off
-
-#
-# Limits the number of concurrent submissions for any of the sites Swift will
-# try to send jobs to. In other words it guarantees that no more than the 
-# value of this throttle jobs sent to any site will be concurrently in a state
-# of being submitted.
-#
-# Default: 2
-#
-
-throttle.host.submit=2
-#throttle.host.submit=off
-
-#
-# The Swift scheduler has the ability to limit the number of concurrent jobs
-# allowed on a site based on the performance history of that site. Each site
-# is assigned a score (initially 1), which can increase or decrease based
-# on whether the site yields successful or faulty job runs. The score for a
-# site can take values in the (0.1, 100) interval. The number of allowed jobs
-# is calculated using the following formula: 
-# 	2 + score*throttle.score.job.factor
-# This means a site will always be allowed at least two concurrent jobs and
-# at most 2 + 100*throttle.score.job.factor. With a default of 4 this means
-# at least 2 jobs and at most 402.
-#
-# Default: 4
-#
-
-throttle.score.job.factor=0.2
-#throttle.score.job.factor=off
-
-
-#
-# Limits the total number of concurrent file transfers that can happen at any
-# given time. File transfers consume bandwidth. Too many concurrent transfers
-# can cause the network to be overloaded preventing various other signalling
-# traffic from flowing properly.
-#
-# Default: 4
-#
-
-throttle.transfers=4
-#throttle.transfers=off
-
-# Limits the total number of concurrent file operations that can happen at any
-# given time. File operations (like transfers) require an exclusive connection
-# to a site. These connections can be expensive to establish. A large number
-# of concurrent file operations may cause Swift to attempt to establish many 
-# such expensive connections to various sites. Limiting the number of concurrent
-# file operations causes Swift to use a small number of cached connections and
-# achieve better overall performance. 
-# 
-# Default: 8
-#
-
-throttle.file.operations=8
-#throttle.file.operations=off
-
-# Indicates whether the working directory on the remote site should be
-# left intact even when the workflow completes successfully. This can be
-# used to inspect the site working directory for debugging purposes.
-#
-# Default: false
-#
-
-sitedir.keep=false
-
-# number of time a job will be retried if it fails (giving a maximum of 
-# 1 + execution.retries attempts at execution)
-#
-
-execution.retries=2
-
-
-# Enables/disables replication. Replication is used to deal with jobs sitting
-# in batch queues for abnormally large amounts of time. If replication is enabled
-# and certain conditions are met, Swift creates and submits replicas of jobs, and
-# allows multiple instances of a job to compete.
-#
-
-replication.enabled=false
-
-# If replication is enabled, this value specifies the minimum time, in seconds,
-# a job needs to be queued in a batch queue in order to be considered for 
-# replication
-#
-
-replication.min.queue.time=60
-
-# The maximum number of replicas that Swift should attempt.
-
-replication.limit=3
-
-#
-# WARNING: This option is deprecated. Please use the hostname option.
-#
-# The IP address of the submit machine is used by GRAM as a callback
-# address to report the status of submitted jobs. In general, Swift
-# can automatically detect the IP address of the local machine. 
-# However, if the machine has more than one network interface, Swift
-# will pick the first one, which may not be the right choice. It is
-# recommended that this property is set properly before attempting to
-# run jobs through GRAM.
-#
-# Format:
-#    ip.address=x.y.z.w
-#
-
-#ip.address=127.0.0.1
-
-
-# Controls how Swift will communicate the result code of running user programs
-# from workers to the submit side. In files mode, a file
-# indicating success or failure will be created on the site shared filesystem.
-# In provider mode, the execution provider job status will
-# be used. Notably, GRAM2 does not return job statuses correctly, and so
-# provider mode will not work with GRAM2. With other
-# providers, it can be used to reduce the amount of filesystem access compared
-# to files mode.
-#
-# status.mode=files
-
-# Controls how swift will supply parameters to the remote wrapper script.
-# 'args' mode will pass parameters on the command line
-# 'files' mode will pass parameters through an additional input file
-#
-# valid values: args, files
-# Default: files
-#
-# wrapper.parameter.mode=args
-
-# Determines if Swift remote wrappers will be executed by specifying an
-# absolute path, or a path relative to the job initial working directory
-#
-# valid values: absolute, relative
-# wrapper.invocation.mode=absolute
-
-#
-# Limits the number of concurrent iterations that each foreach statement
-# can have at one time. This conserves memory for swift programs that 
-# have large numbers of iterations (which would otherwise all be executed
-# in parallel).
-#
-# Default: 1024
-#
-
-foreach.max.threads=16384
-
-# controls whether the log file will contain provenance information
-# enabling this will increase the size of log files, sometimes
-# significantly.
-
-provenance.log=false
-
-# Controls whether file staging is done by swift or by the execution 
-# provider. If set to false, the standard swift staging mechanism is
-# used. If set to true, swift does not stage files. Instead, the 
-# execution provider is instructed to stage files in and out.
-# 
-# Provider staging is experimental.
-#
-# When enabled, and when coasters are used as an execution provider,
-# a staging mechanism can be selected for each site
-# using the swift:stagingMethod site profile in sites.xml. The
-# following is a list of accepted mechanisms:
-#
-# * file:  Staging is done from a filesystem accessible to the 
-#          coaster service (typically running on the head node) 
-# * proxy: Staging is done from a filesystem accessible to the
-#          client machine that swift is running on, and is proxied
-#          through the coaster service
-# * sfs:   (short for "shared filesystem") Staging is done by
-#          copying files to and from a filesystem accessible
-#          by the compute node (such as an NFS or GPFS mount).   
- 
-
-use.provider.staging=false
-
-# Changed settings:
-
-wrapperlog.always.transfer=true
-sitedir.keep=true
-execution.retries=0
-lazy.errors=false
-status.mode=provider
-use.wrapper.staging=false
\ No newline at end of file

Copied: SwiftApps/EpiSnp/cf.midway (from rev 5979, SwiftApps/EpiSnp/cf)
===================================================================
--- SwiftApps/EpiSnp/cf.midway	                        (rev 0)
+++ SwiftApps/EpiSnp/cf.midway	2012-10-19 20:43:18 UTC (rev 5980)
@@ -0,0 +1,9 @@
+
+use.provider.staging=true
+provider.staging.pin.swiftfiles=true
+wrapperlog.always.transfer=true
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+status.mode=provider
+use.wrapper.staging=false
\ No newline at end of file

Modified: SwiftApps/EpiSnp/episnp.swift
===================================================================
--- SwiftApps/EpiSnp/episnp.swift	2012-10-19 17:34:48 UTC (rev 5979)
+++ SwiftApps/EpiSnp/episnp.swift	2012-10-19 20:43:18 UTC (rev 5980)
@@ -14,11 +14,10 @@
 file figout[]<simple_mapper; location="output", prefix="single_locus_fig.",suffix=".out">;
 file sigout[]<simple_mapper; location="output", prefix="single_locus_sig.",suffix=".out">;
 
-int nRuns = @toint(@arg("nRuns","1"));
+int nRuns =       @toInt(@arg("nRuns","1"));
+int initSingles = @toInt(@arg("initSingles","1000"));
+int initPairs =   @toInt(@arg("initPairs","1000"));
 
 foreach incr, i in [0:nRuns-1] {
-  (figout[i], sigout[i], logout[i]) = episnp(epiwrapper, epiexec, epitrait, epichroms, 1000+i, 10000+i);
+  (figout[i], sigout[i], logout[i]) = episnp(epiwrapper, epiexec, epitrait, epichroms, initSingles+i, initPairs+i);
 }
-
-
-

Copied: SwiftApps/EpiSnp/local.xml (from rev 5979, SwiftApps/EpiSnp/sites.xml)
===================================================================
--- SwiftApps/EpiSnp/local.xml	                        (rev 0)
+++ SwiftApps/EpiSnp/local.xml	2012-10-19 20:43:18 UTC (rev 5980)
@@ -0,0 +1,9 @@
+<config>
+  <pool handle="local">
+    <execution provider="local"/>
+    <profile namespace="karajan" key="jobThrottle">.07</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+    <filesystem provider="local"/>
+    <workdirectory>/tmp/swiftwork</workdirectory>
+  </pool>
+</config>

Added: SwiftApps/EpiSnp/midway.xml
===================================================================
--- SwiftApps/EpiSnp/midway.xml	                        (rev 0)
+++ SwiftApps/EpiSnp/midway.xml	2012-10-19 20:43:18 UTC (rev 5980)
@@ -0,0 +1,25 @@
+<config>
+  <pool handle="midway">
+    <execution provider="coaster" url="none" jobmanager="local:slurm"/>
+
+    <profile namespace="globus" key="jobsPerNode">16</profile>
+    <profile namespace="globus" key="maxTime">3500</profile>
+    <profile namespace="globus" key="maxWallTime">00:10:00</profile>
+    <profile namespace="globus" key="slots">20</profile>
+    <profile namespace="globus" key="nodeGranularity">1</profile>
+    <profile namespace="globus" key="maxNodes">1</profile>
+
+    <profile namespace="globus" key="lowoverallocation">100</profile>
+    <profile namespace="globus" key="highoverallocation">100</profile>
+
+
+    <profile namespace="globus" key="queue">sandyb</profile>
+
+    <profile namespace="karajan" key="jobThrottle">3.20</profile>
+    <profile namespace="karajan" key="initialScore">10000</profile>
+
+    <filesystem provider="local" url="none"/>
+    <workdirectory>/tmp/wilde/swiftwork</workdirectory>
+    <!-- <workdirectory>/home/wilde/swift/lab/slurm/swiftwork</workdirectory> -->
+  </pool>
+</config>

Modified: SwiftApps/EpiSnp/runepisnp.sh
===================================================================
--- SwiftApps/EpiSnp/runepisnp.sh	2012-10-19 17:34:48 UTC (rev 5979)
+++ SwiftApps/EpiSnp/runepisnp.sh	2012-10-19 20:43:18 UTC (rev 5980)
@@ -1,3 +1,5 @@
 #! /bin/sh
 
-swift -config cf -tc.file tc -sites.file sites.xml episnp.swift -nRuns=${1:-1}
+site=${1:-local}
+
+swift -config cf.$site -tc.file tc -sites.file $site.xml episnp.swift -nRuns=${2:-1} -initSingles=${3:-1000} -initPairs=${4:-10000}

Deleted: SwiftApps/EpiSnp/sites.xml
===================================================================
--- SwiftApps/EpiSnp/sites.xml	2012-10-19 17:34:48 UTC (rev 5979)
+++ SwiftApps/EpiSnp/sites.xml	2012-10-19 20:43:18 UTC (rev 5980)
@@ -1,7 +0,0 @@
-<config>
-  <pool handle="localhost">
-    <execution provider="local"/>
-    <filesystem provider="local"/>
-    <workdirectory >/home/wilde/swiftwork</workdirectory>
-  </pool>
-</config>

Modified: SwiftApps/EpiSnp/tc
===================================================================
--- SwiftApps/EpiSnp/tc	2012-10-19 17:34:48 UTC (rev 5979)
+++ SwiftApps/EpiSnp/tc	2012-10-19 20:43:18 UTC (rev 5980)
@@ -1,46 +1,3 @@
-#This is the transformation catalog.
-#
-#It comes pre-configured with a number of simple transformations with
-#paths that are likely to work on a linux box. However, on some systems,
-#the paths to these executables will be different (for example, sometimes
-#some of these programs are found in /usr/bin rather than in /bin)
-#
-#NOTE WELL: fields in this file must be separated by tabs, not spaces; and
-#there must be no trailing whitespace at the end of each line.
-#
-# sitename  transformation  path   INSTALLED  platform  profiles
-localhost 	sh 		/bin/sh	INSTALLED	INTEL32::LINUX	null
-localhost 	echo 		/bin/echo	INSTALLED	INTEL32::LINUX	null
-localhost 	cat 		/bin/cat	INSTALLED	INTEL32::LINUX	null
-localhost 	ls 		/bin/ls		INSTALLED	INTEL32::LINUX	null
-localhost 	grep 		/bin/grep	INSTALLED	INTEL32::LINUX	null
-localhost 	sort 		/bin/sort	INSTALLED	INTEL32::LINUX	null
-localhost 	paste 		/bin/paste	INSTALLED	INTEL32::LINUX	null
-localhost 	pwd 		/bin/pwd	INSTALLED	INTEL32::LINUX	null
-#
-ranger 	cat 		/bin/cat	INSTALLED	INTEL32::LINUX	null
-abe 	cat 		/bin/cat	INSTALLED	INTEL32::LINUX	null
-qb 	cat 		/bin/cat	INSTALLED	INTEL32::LINUX	null
-firefly 	cat 		/bin/cat	INSTALLED	INTEL32::LINUX	null
-teraport 	cat 		/bin/cat	INSTALLED	INTEL32::LINUX	null
-pbs	cat	/bin/cat	null	null	null
-ssh	cat	/bin/cat	null	null	null
-rssh	scat	/home/wilde/swift/lab/scat	null	null	null
-sico cat /bin/cat null null null
-sico scat /home/wilde/swift/lab/scat null null null
-
-#########
-
-crush	cat	/bin/cat	null	null	null
-thwomp	cat	/bin/cat	null	null	null
-stomp	cat	/bin/cat	null	null	null
-crank	cat	/bin/cat	null	null	null
-steamroller cat	/bin/cat	null	null	null
-grind	cat	/bin/cat	null	null	null
-churn	cat	/bin/cat	null	null	null
-trounce	cat	/bin/cat	null	null	null
-thrash	cat	/bin/cat	null	null	null
-vanquish cat	/bin/cat	null	null	null
-octagon	cat	/bin/cat	null	null	null
-octopus	cat	/bin/cat	null	null	null
-triumph	cat	/bin/cat	null	null	null
+local  sh /bin/sh null null null
+midway sh /bin/sh null null null
+pads   sh /bin/sh null null null




More information about the Swift-commit mailing list