[Swift-commit] r5980 - SwiftApps/EpiSnp
wilde at ci.uchicago.edu
wilde at ci.uchicago.edu
Fri Oct 19 15:43:18 CDT 2012
Author: wilde
Date: 2012-10-19 15:43:18 -0500 (Fri, 19 Oct 2012)
New Revision: 5980
Added:
SwiftApps/EpiSnp/cf.midway
SwiftApps/EpiSnp/local.xml
SwiftApps/EpiSnp/midway.xml
Removed:
SwiftApps/EpiSnp/cf
SwiftApps/EpiSnp/sites.xml
Modified:
SwiftApps/EpiSnp/README
SwiftApps/EpiSnp/episnp.swift
SwiftApps/EpiSnp/runepisnp.sh
SwiftApps/EpiSnp/tc
Log:
Add midway capability and extra app parameters
Modified: SwiftApps/EpiSnp/README
===================================================================
--- SwiftApps/EpiSnp/README 2012-10-19 17:34:48 UTC (rev 5979)
+++ SwiftApps/EpiSnp/README 2012-10-19 20:43:18 UTC (rev 5980)
@@ -1,5 +1,10 @@
To run:
- ./runepisnp.sh N
+ ./runepisnp.sh sitename nRuns initSingle initPairs
-where N is the number of app calls to run.
+where:
+
+ site is local or midway
+ nRuns is the number of EpiSnp application invocations to run (default 1)
+ initSingle is the base value of parameter output results for single SNP tests (default:1000)
+ initPairs is the base value of parameter output results for pairwise SNP tests (default:10000)
Deleted: SwiftApps/EpiSnp/cf
===================================================================
--- SwiftApps/EpiSnp/cf 2012-10-19 17:34:48 UTC (rev 5979)
+++ SwiftApps/EpiSnp/cf 2012-10-19 20:43:18 UTC (rev 5980)
@@ -1,361 +0,0 @@
-sites.file=${swift.home}/etc/sites.xml
-tc.file=${swift.home}/etc/tc.data
-
-#
-# The host name of the submit machine is used by GRAM as a callback
-# address to report the status of submitted jobs. In general, Swift
-# can automatically detect the host name of the local machine.
-# However, if the machine host name is improperly configured or if
-# it does not represent a valid DNS entry, certain services (such as
-# GRAM) will not be able to send job status notifications back to
-# the client. The value of this property can be an IP address.
-#
-# Format:
-# hostname=string
-#
-
-
-#hostname=localhost
-
-#
-# A TCP port range can be specified to restrict the ports on which GRAM
-# callback services are started. This is likely needed if your submit
-# host is behind a firewall, in which case the firewall should be
-# configured to allow incoming connections on ports in the range.
-#
-# Format:
-# tcp.port.range=start,end
-#
-
-#tcp.port.range=50000,50100
-
-#
-# false - means an error will be immediately reported and cause the
-# workflow to abort. At this time remote jobs that are already
-# running will not be canceled
-# true - means that Swift will try to do as much work as possible and
-# report all errors encountered at the end. However, "errors"
-# here only applies to job execution errors. Certain errors
-# that are related to the Swift implementation (should such
-# errors occur) will still be reported eagerly.
-#
-# Default: false
-#
-lazy.errors=false
-
-#
-# What algorithm to use for caching of remote files. LRU (as in what
-# files to purge) is the only implementation right now. One can set
-# a target size (in bytes) for a host by using the swift:storagesize
-# profile for a host in sites.xml
-#
-# Default: LRU
-#
-caching.algorithm=LRU
-
-#
-# true - generate a provenance graph in .dot format (Swift will
-# choose a random file name)
-# false - do not generate a provenance graph
-# <filename> - generate a provenange graph in the give file name
-#
-# Default: false
-#
-pgraph=false
-
-
-#
-# graph properties for the provenance graph (.dot specific)
-#
-# Default: splines="compound", rankdir="TB"
-#
-pgraph.graph.options=splines="compound", rankdir="TB"
-
-
-#
-# node properties for the provenance graph (.dot specific)
-#
-# Default: color="seagreen", style="filled"
-#
-pgraph.node.options=color="seagreen", style="filled"
-
-#
-# true - clustering of small jobs is enabled. Clustering works in the
-# following way: If a job is clusterable (meaning that it has the
-# GLOBUS::maxwalltime profile specified in tc.data and its value
-# is less than the value of the "clustering.min.time" property) it will
-# be put in a clustering queue. The queue is processed at intervals
-# specified by the "clustering.queue.delay" property. The processing
-# of the clustering queue consists of selecting compatible jobs and
-# grouping them in clusters whose max wall time does not exceed twice
-# the value of the "clustering.min.time" property. Two or more jobs are
-# considered compatible if they share the same site and do not have
-# conflicting profiles (e.g. different values for the same environment
-# variable).
-# false - clustering of small jobs is disabled.
-#
-# Default: false
-#
-clustering.enabled=false
-
-
-#
-# <seconds> - the intervals at which the clustering queue is processed
-#
-# Default: 4
-#
-clustering.queue.delay=4
-
-#
-# <seconds> - the threshold time for clustering
-#
-# Default: 60
-#
-clustering.min.time=60
-
-#
-# Kickstart is a useful tool that can be used to gather various information
-# about a remote process. Before it can be used it must be installed on the
-# remote site and the corresponding entry be set in the sites file.
-# This option allows controlling of how Swift uses Kickstart. The following
-# values are possible:
-# false - do not use Kickstart
-# true - use Kickstart. If a job is scheduled on a site that does not have
-# Kickstart installed, that job will fail.
-# maybe - Use Kickstart if installed (i.e. the entry is present in the sites
-# file)
-#
-# Default: maybe
-#
-
-kickstart.enabled=maybe
-
-#
-# Indicates when Kickstart records should be fetched from the remote site:
-# true - always transfer Kickstart records if Kickstart was used (see
-# kickstart.enabled)
-# false - only transfer Kickstart records if the job fails
-#
-# Default: false
-#
-
-kickstart.always.transfer=false
-
-#
-# Indicates when wrapper logs should be fetched from the remote site:
-# true - always transfer wrapper logs
-# false - only transfer wrapper logs if the job fails
-#
-# Default: false
-#
-
-wrapperlog.always.transfer=false
-
-###########################################################################
-# Throttling options #
-###########################################################################
-#
-# For the throttling parameters, valid values are either a positive integer
-# or "off" (without the quotes).
-#
-
-#
-# Limits the number of concurrent submissions for a workflow instance. This
-# throttle only limits the number of concurrent tasks (jobs) that are being
-# sent to sites, not the total number of concurrent jobs that can be run.
-# The submission stage in GRAM is one of the most CPU expensive stages (due
-# mostly to the mutual authentication and delegation). Having too many
-# concurrent submissions can overload either or both the submit host CPU
-# and the remote host/head node causing degraded performance.
-#
-# Default: 4
-#
-
-throttle.submit=4
-#throttle.submit=off
-
-#
-# Limits the number of concurrent submissions for any of the sites Swift will
-# try to send jobs to. In other words it guarantees that no more than the
-# value of this throttle jobs sent to any site will be concurrently in a state
-# of being submitted.
-#
-# Default: 2
-#
-
-throttle.host.submit=2
-#throttle.host.submit=off
-
-#
-# The Swift scheduler has the ability to limit the number of concurrent jobs
-# allowed on a site based on the performance history of that site. Each site
-# is assigned a score (initially 1), which can increase or decrease based
-# on whether the site yields successful or faulty job runs. The score for a
-# site can take values in the (0.1, 100) interval. The number of allowed jobs
-# is calculated using the following formula:
-# 2 + score*throttle.score.job.factor
-# This means a site will always be allowed at least two concurrent jobs and
-# at most 2 + 100*throttle.score.job.factor. With a default of 4 this means
-# at least 2 jobs and at most 402.
-#
-# Default: 4
-#
-
-throttle.score.job.factor=0.2
-#throttle.score.job.factor=off
-
-
-#
-# Limits the total number of concurrent file transfers that can happen at any
-# given time. File transfers consume bandwidth. Too many concurrent transfers
-# can cause the network to be overloaded preventing various other signalling
-# traffic from flowing properly.
-#
-# Default: 4
-#
-
-throttle.transfers=4
-#throttle.transfers=off
-
-# Limits the total number of concurrent file operations that can happen at any
-# given time. File operations (like transfers) require an exclusive connection
-# to a site. These connections can be expensive to establish. A large number
-# of concurrent file operations may cause Swift to attempt to establish many
-# such expensive connections to various sites. Limiting the number of concurrent
-# file operations causes Swift to use a small number of cached connections and
-# achieve better overall performance.
-#
-# Default: 8
-#
-
-throttle.file.operations=8
-#throttle.file.operations=off
-
-# Indicates whether the working directory on the remote site should be
-# left intact even when the workflow completes successfully. This can be
-# used to inspect the site working directory for debugging purposes.
-#
-# Default: false
-#
-
-sitedir.keep=false
-
-# number of time a job will be retried if it fails (giving a maximum of
-# 1 + execution.retries attempts at execution)
-#
-
-execution.retries=2
-
-
-# Enables/disables replication. Replication is used to deal with jobs sitting
-# in batch queues for abnormally large amounts of time. If replication is enabled
-# and certain conditions are met, Swift creates and submits replicas of jobs, and
-# allows multiple instances of a job to compete.
-#
-
-replication.enabled=false
-
-# If replication is enabled, this value specifies the minimum time, in seconds,
-# a job needs to be queued in a batch queue in order to be considered for
-# replication
-#
-
-replication.min.queue.time=60
-
-# The maximum number of replicas that Swift should attempt.
-
-replication.limit=3
-
-#
-# WARNING: This option is deprecated. Please use the hostname option.
-#
-# The IP address of the submit machine is used by GRAM as a callback
-# address to report the status of submitted jobs. In general, Swift
-# can automatically detect the IP address of the local machine.
-# However, if the machine has more than one network interface, Swift
-# will pick the first one, which may not be the right choice. It is
-# recommended that this property is set properly before attempting to
-# run jobs through GRAM.
-#
-# Format:
-# ip.address=x.y.z.w
-#
-
-#ip.address=127.0.0.1
-
-
-# Controls how Swift will communicate the result code of running user programs
-# from workers to the submit side. In files mode, a file
-# indicating success or failure will be created on the site shared filesystem.
-# In provider mode, the execution provider job status will
-# be used. Notably, GRAM2 does not return job statuses correctly, and so
-# provider mode will not work with GRAM2. With other
-# providers, it can be used to reduce the amount of filesystem access compared
-# to files mode.
-#
-# status.mode=files
-
-# Controls how swift will supply parameters to the remote wrapper script.
-# 'args' mode will pass parameters on the command line
-# 'files' mode will pass parameters through an additional input file
-#
-# valid values: args, files
-# Default: files
-#
-# wrapper.parameter.mode=args
-
-# Determines if Swift remote wrappers will be executed by specifying an
-# absolute path, or a path relative to the job initial working directory
-#
-# valid values: absolute, relative
-# wrapper.invocation.mode=absolute
-
-#
-# Limits the number of concurrent iterations that each foreach statement
-# can have at one time. This conserves memory for swift programs that
-# have large numbers of iterations (which would otherwise all be executed
-# in parallel).
-#
-# Default: 1024
-#
-
-foreach.max.threads=16384
-
-# controls whether the log file will contain provenance information
-# enabling this will increase the size of log files, sometimes
-# significantly.
-
-provenance.log=false
-
-# Controls whether file staging is done by swift or by the execution
-# provider. If set to false, the standard swift staging mechanism is
-# used. If set to true, swift does not stage files. Instead, the
-# execution provider is instructed to stage files in and out.
-#
-# Provider staging is experimental.
-#
-# When enabled, and when coasters are used as an execution provider,
-# a staging mechanism can be selected for each site
-# using the swift:stagingMethod site profile in sites.xml. The
-# following is a list of accepted mechanisms:
-#
-# * file: Staging is done from a filesystem accessible to the
-# coaster service (typically running on the head node)
-# * proxy: Staging is done from a filesystem accessible to the
-# client machine that swift is running on, and is proxied
-# through the coaster service
-# * sfs: (short for "shared filesystem") Staging is done by
-# copying files to and from a filesystem accessible
-# by the compute node (such as an NFS or GPFS mount).
-
-
-use.provider.staging=false
-
-# Changed settings:
-
-wrapperlog.always.transfer=true
-sitedir.keep=true
-execution.retries=0
-lazy.errors=false
-status.mode=provider
-use.wrapper.staging=false
\ No newline at end of file
Copied: SwiftApps/EpiSnp/cf.midway (from rev 5979, SwiftApps/EpiSnp/cf)
===================================================================
--- SwiftApps/EpiSnp/cf.midway (rev 0)
+++ SwiftApps/EpiSnp/cf.midway 2012-10-19 20:43:18 UTC (rev 5980)
@@ -0,0 +1,9 @@
+
+use.provider.staging=true
+provider.staging.pin.swiftfiles=true
+wrapperlog.always.transfer=true
+sitedir.keep=true
+execution.retries=0
+lazy.errors=false
+status.mode=provider
+use.wrapper.staging=false
\ No newline at end of file
Modified: SwiftApps/EpiSnp/episnp.swift
===================================================================
--- SwiftApps/EpiSnp/episnp.swift 2012-10-19 17:34:48 UTC (rev 5979)
+++ SwiftApps/EpiSnp/episnp.swift 2012-10-19 20:43:18 UTC (rev 5980)
@@ -14,11 +14,10 @@
file figout[]<simple_mapper; location="output", prefix="single_locus_fig.",suffix=".out">;
file sigout[]<simple_mapper; location="output", prefix="single_locus_sig.",suffix=".out">;
-int nRuns = @toint(@arg("nRuns","1"));
+int nRuns = @toInt(@arg("nRuns","1"));
+int initSingles = @toInt(@arg("initSingles","1000"));
+int initPairs = @toInt(@arg("initPairs","1000"));
foreach incr, i in [0:nRuns-1] {
- (figout[i], sigout[i], logout[i]) = episnp(epiwrapper, epiexec, epitrait, epichroms, 1000+i, 10000+i);
+ (figout[i], sigout[i], logout[i]) = episnp(epiwrapper, epiexec, epitrait, epichroms, initSingles+i, initPairs+i);
}
-
-
-
Copied: SwiftApps/EpiSnp/local.xml (from rev 5979, SwiftApps/EpiSnp/sites.xml)
===================================================================
--- SwiftApps/EpiSnp/local.xml (rev 0)
+++ SwiftApps/EpiSnp/local.xml 2012-10-19 20:43:18 UTC (rev 5980)
@@ -0,0 +1,9 @@
+<config>
+ <pool handle="local">
+ <execution provider="local"/>
+ <profile namespace="karajan" key="jobThrottle">.07</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
+ <filesystem provider="local"/>
+ <workdirectory>/tmp/swiftwork</workdirectory>
+ </pool>
+</config>
Added: SwiftApps/EpiSnp/midway.xml
===================================================================
--- SwiftApps/EpiSnp/midway.xml (rev 0)
+++ SwiftApps/EpiSnp/midway.xml 2012-10-19 20:43:18 UTC (rev 5980)
@@ -0,0 +1,25 @@
+<config>
+ <pool handle="midway">
+ <execution provider="coaster" url="none" jobmanager="local:slurm"/>
+
+ <profile namespace="globus" key="jobsPerNode">16</profile>
+ <profile namespace="globus" key="maxTime">3500</profile>
+ <profile namespace="globus" key="maxWallTime">00:10:00</profile>
+ <profile namespace="globus" key="slots">20</profile>
+ <profile namespace="globus" key="nodeGranularity">1</profile>
+ <profile namespace="globus" key="maxNodes">1</profile>
+
+ <profile namespace="globus" key="lowoverallocation">100</profile>
+ <profile namespace="globus" key="highoverallocation">100</profile>
+
+
+ <profile namespace="globus" key="queue">sandyb</profile>
+
+ <profile namespace="karajan" key="jobThrottle">3.20</profile>
+ <profile namespace="karajan" key="initialScore">10000</profile>
+
+ <filesystem provider="local" url="none"/>
+ <workdirectory>/tmp/wilde/swiftwork</workdirectory>
+ <!-- <workdirectory>/home/wilde/swift/lab/slurm/swiftwork</workdirectory> -->
+ </pool>
+</config>
Modified: SwiftApps/EpiSnp/runepisnp.sh
===================================================================
--- SwiftApps/EpiSnp/runepisnp.sh 2012-10-19 17:34:48 UTC (rev 5979)
+++ SwiftApps/EpiSnp/runepisnp.sh 2012-10-19 20:43:18 UTC (rev 5980)
@@ -1,3 +1,5 @@
#! /bin/sh
-swift -config cf -tc.file tc -sites.file sites.xml episnp.swift -nRuns=${1:-1}
+site=${1:-local}
+
+swift -config cf.$site -tc.file tc -sites.file $site.xml episnp.swift -nRuns=${2:-1} -initSingles=${3:-1000} -initPairs=${4:-10000}
Deleted: SwiftApps/EpiSnp/sites.xml
===================================================================
--- SwiftApps/EpiSnp/sites.xml 2012-10-19 17:34:48 UTC (rev 5979)
+++ SwiftApps/EpiSnp/sites.xml 2012-10-19 20:43:18 UTC (rev 5980)
@@ -1,7 +0,0 @@
-<config>
- <pool handle="localhost">
- <execution provider="local"/>
- <filesystem provider="local"/>
- <workdirectory >/home/wilde/swiftwork</workdirectory>
- </pool>
-</config>
Modified: SwiftApps/EpiSnp/tc
===================================================================
--- SwiftApps/EpiSnp/tc 2012-10-19 17:34:48 UTC (rev 5979)
+++ SwiftApps/EpiSnp/tc 2012-10-19 20:43:18 UTC (rev 5980)
@@ -1,46 +1,3 @@
-#This is the transformation catalog.
-#
-#It comes pre-configured with a number of simple transformations with
-#paths that are likely to work on a linux box. However, on some systems,
-#the paths to these executables will be different (for example, sometimes
-#some of these programs are found in /usr/bin rather than in /bin)
-#
-#NOTE WELL: fields in this file must be separated by tabs, not spaces; and
-#there must be no trailing whitespace at the end of each line.
-#
-# sitename transformation path INSTALLED platform profiles
-localhost sh /bin/sh INSTALLED INTEL32::LINUX null
-localhost echo /bin/echo INSTALLED INTEL32::LINUX null
-localhost cat /bin/cat INSTALLED INTEL32::LINUX null
-localhost ls /bin/ls INSTALLED INTEL32::LINUX null
-localhost grep /bin/grep INSTALLED INTEL32::LINUX null
-localhost sort /bin/sort INSTALLED INTEL32::LINUX null
-localhost paste /bin/paste INSTALLED INTEL32::LINUX null
-localhost pwd /bin/pwd INSTALLED INTEL32::LINUX null
-#
-ranger cat /bin/cat INSTALLED INTEL32::LINUX null
-abe cat /bin/cat INSTALLED INTEL32::LINUX null
-qb cat /bin/cat INSTALLED INTEL32::LINUX null
-firefly cat /bin/cat INSTALLED INTEL32::LINUX null
-teraport cat /bin/cat INSTALLED INTEL32::LINUX null
-pbs cat /bin/cat null null null
-ssh cat /bin/cat null null null
-rssh scat /home/wilde/swift/lab/scat null null null
-sico cat /bin/cat null null null
-sico scat /home/wilde/swift/lab/scat null null null
-
-#########
-
-crush cat /bin/cat null null null
-thwomp cat /bin/cat null null null
-stomp cat /bin/cat null null null
-crank cat /bin/cat null null null
-steamroller cat /bin/cat null null null
-grind cat /bin/cat null null null
-churn cat /bin/cat null null null
-trounce cat /bin/cat null null null
-thrash cat /bin/cat null null null
-vanquish cat /bin/cat null null null
-octagon cat /bin/cat null null null
-octopus cat /bin/cat null null null
-triumph cat /bin/cat null null null
+local sh /bin/sh null null null
+midway sh /bin/sh null null null
+pads sh /bin/sh null null null
More information about the Swift-commit
mailing list