From noreply at svn.ci.uchicago.edu Mon Nov 1 19:07:58 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 1 Nov 2010 19:07:58 -0500 (CDT) Subject: [Swift-commit] r3704 - trunk/bin Message-ID: <20101102000758.A71A59CCA3@svn.ci.uchicago.edu> Author: davidk Date: 2010-11-01 19:07:58 -0500 (Mon, 01 Nov 2010) New Revision: 3704 Modified: trunk/bin/swift Log: Updated shell script to ignore error messages from bash when compiled with --disable-net-redirections Modified: trunk/bin/swift =================================================================== --- trunk/bin/swift 2010-10-30 21:33:13 UTC (rev 3703) +++ trunk/bin/swift 2010-11-02 00:07:58 UTC (rev 3704) @@ -139,7 +139,7 @@ USERID=`echo $USERID $HOSTNAME $MAC | $MD5SUM |cut -d ' ' -f1` DATE=`date` KEY=`echo $USERID $DATE $$ | $MD5SUM | cut -d ' ' -f1` - $BASH -c "echo $KEY $USERID $SCRIPTID $SCRIPTLENGTH -1 0 > /dev/udp/$LISTENER/$PORT" & + $BASH -c "echo $KEY $USERID $SCRIPTID $SCRIPTLENGTH -1 0 > /dev/udp/$LISTENER/$PORT" > /dev/null 2>&1 & fi ### EXECUTE ############ @@ -154,7 +154,7 @@ return_code=$? if [ "$SWIFT_USAGE_STATS" = 1 ]; then - $BASH -c "echo $KEY $USERID $SCRIPTID $SCRIPTLENGTH $return_code 1 > /dev/udp/$LISTENER/$PORT" & + $BASH -c "echo $KEY $USERID $SCRIPTID $SCRIPTLENGTH $return_code 1 > /dev/udp/$LISTENER/$PORT" > /dev/null 2>&1 & fi exit $return_code From noreply at svn.ci.uchicago.edu Mon Nov 8 14:30:10 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 8 Nov 2010 14:30:10 -0600 (CST) Subject: [Swift-commit] r3705 - trunk/tests/language/should-not-work Message-ID: <20101108203010.1C2B29CC86@svn.ci.uchicago.edu> Author: skenny Date: 2010-11-08 14:30:09 -0600 (Mon, 08 Nov 2010) New Revision: 3705 Added: trunk/tests/language/should-not-work/119-missing-semi.swift Log: tests for missing semi-colon at the end of mapping statement Added: trunk/tests/language/should-not-work/119-missing-semi.swift =================================================================== --- trunk/tests/language/should-not-work/119-missing-semi.swift (rev 0) +++ trunk/tests/language/should-not-work/119-missing-semi.swift 2010-11-08 20:30:09 UTC (rev 3705) @@ -0,0 +1,14 @@ +type file {}; +type student { + file name; + file age; + file gpa; +} +app (file t) getname(string n) { + echo n stdout=@filename(t); +} + +file results ; +student fnames[] +results = getname(@filename(fnames[0])); + From noreply at svn.ci.uchicago.edu Wed Nov 10 12:18:39 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 10 Nov 2010 12:18:39 -0600 (CST) Subject: [Swift-commit] r3706 - provenancedb Message-ID: <20101110181839.DD312FAC2@svn.ci.uchicago.edu> Author: lgadelha Date: 2010-11-10 12:18:39 -0600 (Wed, 10 Nov 2010) New Revision: 3706 Modified: provenancedb/README provenancedb/prepare-provenance-chart provenancedb/prov-init.sql provenancedb/prov-sql-generate-transitive-closures.sql provenancedb/prov-to-sql.sh provenancedb/swift-prov-import-all-logs Log: New database schema, and corrections to import scripts. Modified: provenancedb/README =================================================================== --- provenancedb/README 2010-11-08 20:30:09 UTC (rev 3705) +++ provenancedb/README 2010-11-10 18:18:39 UTC (rev 3706) @@ -1,12 +1,9 @@ -provenance database prototype +The file etc/provenance.config should be edited to define the local configuration. The location of the directory containing the log files should be defined in the variable LOGREPO. For instance: -At the moment, you have pretty much no chance of getting this working. +export LOGREPO=~/swift-logs/ -Working notes are in docbook form in provenance.xml +The command used for connecting to the database should be defined in the variable SQLCMD. For example, to connect to CI's PostgreSQL? database: -To import, run: +export SQLCMD="psql -h db.ci.uchicago.edu -U provdb provdb" - ./import-all - -with no parameters. - +The script ./swift-prov-import-all-logs will import provenance information from the log files in $LOGREPO into the database. The command line option -rebuild will initialize the database before importing provenance information. The file prov-init.sql contains the database schema. Modified: provenancedb/prepare-provenance-chart =================================================================== --- provenancedb/prepare-provenance-chart 2010-11-08 20:30:09 UTC (rev 3705) +++ provenancedb/prepare-provenance-chart 2010-11-10 18:18:39 UTC (rev 3706) @@ -8,8 +8,8 @@ export RUNID=$(basename $1 .log) -export WFID="tag:benc at ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:" -export EXECUTE2PREFIX="tag:benc at ci.uchicago.edu,2008:swiftlogs:execute2:${RUNID}:" +export WFID="tag:ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:" +export EXECUTE2PREFIX="tag:ci.uchicago.edu,2008:swiftlogs:execute2:${RUNID}:" # will output log information about datasets from a log file passed as $1 @@ -30,8 +30,8 @@ cat $1 | grep ' OPERATOR ' | sed 's/^.*thread=\([^ ]*\) operator="\([^ ]*\)" lhs=\([^ ]*\) rhs=\([^ ]*\) result=\([^ ]*\).*$/\1 \2 \3 \4 \5/' > operators.txt -# 2009-03-19 19:15:35,244+0100 INFO vdl:arguments FUNCTION id=88000-0-4-4 name="f ilename" result=tag:benc at ci.uchicago.edu,2008:swift:dataset:20090319-1915-xj8flg 13:720000000060 -# 2009-03-19 19:15:35,246+0100 INFO vdl:arguments FUNCTIONPARAMETER id=88001-0-4- 4 input=tag:benc at ci.uchicago.edu,2008:swift:dataset:20090319-1915-xj8flg13:72000 0000058 +# 2009-03-19 19:15:35,244+0100 INFO vdl:arguments FUNCTION id=88000-0-4-4 name="f ilename" result=tag:ci.uchicago.edu,2008:swift:dataset:20090319-1915-xj8flg 13:720000000060 +# 2009-03-19 19:15:35,246+0100 INFO vdl:arguments FUNCTIONPARAMETER id=88001-0-4- 4 input=tag:ci.uchicago.edu,2008:swift:dataset:20090319-1915-xj8flg13:72000 0000058 cat $1 | grep ' FUNCTION ' | sed "s/^.*id=\([^ ]*\) name=\([^ ]*\) result=\([^ ]*\).*\$/$WFID\1 \2 \3/" > functions.txt # the IDs in functions.txt should be unique... Modified: provenancedb/prov-init.sql =================================================================== --- provenancedb/prov-init.sql 2010-11-08 20:30:09 UTC (rev 3705) +++ provenancedb/prov-init.sql 2010-11-10 18:18:39 UTC (rev 3706) @@ -1,253 +1,259 @@ - -- this is the schema definition used for the main relational provenance -- implementation (in both sqlite3 and postgres) - -DROP TABLE processes; -DROP TABLE executes; -DROP TABLE execute2s; -DROP TABLE dataset_usage; -DROP TABLE invocation_procedure_names; -DROP TABLE dataset_containment; -DROP TABLE dataset_filenames; -DROP TABLE processes_in_workflows; -DROP TABLE dataset_values; -DROP TABLE known_workflows; -DROP TABLE workflow_events; + +DROP TABLE dataset; +DROP TABLE file; +DROP TABLE variable; +DROP TABLE ds_containment; +DROP TABLE process; +DROP TABLE execute; +DROP TABLE execute2; +DROP TABLE workflow; +DROP TABLE ds_usage; +DROP TABLE annot_ds_num; +DROP TABLE annot_ds_txt; +DROP TABLE annot_ds_bool; +DROP TABLE annot_p_num; +DROP TABLE annot_p_txt; +DROP TABLE annot_p_bool; +DROP TABLE annot_wf_num; +DROP TABLE annot_wf_txt; +DROP TABLE annot_wf_bool; DROP TABLE extrainfo; DROP TABLE createarray; DROP TABLE createarray_member; DROP TABLE array_range; -DROP TABLE dataset_annotations_numeric; -DROP TABLE dataset_annotations_varchar; -DROP TABLE dataset_annotations_boolean; -DROP TABLE process_annotations_numeric; -DROP TABLE process_annotations_varchar; -DROP TABLE process_annotations_boolean; -DROP TABLE workflow_annotations_numeric; -DROP TABLE workflow_annotations_varchar; -DROP TABLE workflow_annotations_boolean; --- associates each process with its containing workflow --- TODO - perhaps a workflow is itself a big big process? --- in which case this looks very much like a compound/app --- containment? -CREATE TABLE processes_in_workflows - (workflow_id varchar(2048), - process_id varchar(2048), - primary key (workflow_id, process_id) - ); --- processes gives information about each process (in the OPM sense) --- it is augmented by information in other tables -CREATE TABLE processes - (id varchar(2048) PRIMARY KEY, -- a uri - type varchar(16) -- specifies the type of process. for any type, it - -- must be the case that the specific type table - -- has an entry for this process. - -- Having this type here seems poor normalisation, though? +-- workflow stores some information about each workflow log that has +-- been seen by the importer: the log filename, swift version and import +-- status. +-- Might be interesting to store xml translation of the swiftscript code +-- here for prospective provenance/versioning +CREATE TABLE workflow + (id varchar(256) PRIMARY KEY, + log_filename varchar(2048), + swift_version varchar(16), + import_status varchar(16), + start_time numeric, + duration numeric ); +-- workflow_run stores the start time and duration for each workflow +-- that has been successfully imported. +--CREATE TABLE wf_run +-- (id varchar(256) PRIMARY KEY REFERENCES workflow (id) ON DELETE CASCADE, +-- start_time numeric, +-- duration numeric +-- ); --- this gives information about each execute. --- each execute is identified by a unique URI. other information from --- swift logs is also stored here. an execute is an OPM process. -CREATE TABLE executes - (id varchar(2048) PRIMARY KEY, -- actually foreign key to processes - starttime numeric, - duration numeric, - finalstate varchar(2048), - app varchar(2048), - scratch varchar(2048) +-- dataset stores all dataset identifiers. +CREATE TABLE dataset + (id varchar(256) PRIMARY KEY ); --- this gives information about each execute2, which is an attempt to --- perform an execution. the execute2 id is tied to per-execution-attempt --- information such as wrapper logs - -CREATE TABLE execute2s - (id varchar(2048) PRIMARY KEY, - execute_id varchar(2048), -- secondary key to executes and processes tables - starttime numeric, - duration numeric, - finalstate varchar(2048), - site varchar(2048) +-- file stores the filename mapped to each dataset. +CREATE TABLE file + ( id varchar(256) PRIMARY KEY REFERENCES dataset (id) ON DELETE CASCADE, + filename varchar(2048) ); --- dataset_usage records usage relationships between processes and datasets; --- in SwiftScript terms, the input and output parameters for each --- application procedure invocation; in OPM terms, the artificts which are --- input to and output from each process that is a Swift execution - --- TODO: no primary key here. should probably index both on execute_id and on --- dataset_id for common queries? maybe add arbitrary ID for sake of it? - -CREATE TABLE dataset_usage - (process_id varchar(2048), -- foreign key but not enforced because maybe process - -- doesn't exist at time. same type as processes.id - direction char(1), -- I or O for input or output - dataset_id varchar(2048), -- this will perhaps key against dataset table - param_name varchar(2048) -- the name of the parameter in this execute that - -- this dataset was bound to. sometimes this must - -- be contrived (for example, in positional varargs) +-- dataset_values stores the value for each dataset which is known to have +-- a value (which is all assigned primitive types). No attempt is made here +-- to expose that value as an SQL type other than a string, and so (for +-- example) SQL numerical operations should not be expected to work, even +-- though the user knows that a particular dataset stores a numeric value. +CREATE TABLE variable + ( id varchar(256) PRIMARY KEY REFERENCES dataset (id) ON DELETE CASCADE, + value varchar(2048) ); - --- invocation_procedure_name maps each execute ID to the name of its --- SwiftScript procedure - --- TODO probably desirable that this is part of executes table --- but for now this is the easiest to pull data from logs. -CREATE TABLE invocation_procedure_names - (execute_id varchar(2048) PRIMARY KEY, - procedure_name varchar(2048) - ); - - -- dataset_containment stores the containment hierarchy between -- container datasets (arrays and structs) and their contents. - --- outer_dataset_id contains inner_dataset_id - +-- out_id contains in_id -- TODO this should perhaps be replaced with a more OPM-like model of -- constructors and accessors, rather than, or in addition to, -- a containment hierarchy. The relationship (such as array index or -- structure member name) should also be stored in this table. -CREATE TABLE dataset_containment - ( outer_dataset_id varchar(2048), - inner_dataset_id varchar(2048) +CREATE TABLE ds_containment + ( out_id varchar(256) REFERENCES dataset (id) ON DELETE CASCADE, + in_id varchar(256) REFERENCES dataset (id) ON DELETE CASCADE, + PRIMARY KEY (out_id,in_id) ); - --- dataset_filenames stores the filename mapped to each dataset. As some --- datasets do not have filenames, it should not be expected that --- every dataset will have a row in this table -CREATE TABLE dataset_filenames - ( dataset_id varchar(2048) PRIMARY KEY, - filename varchar(2048) +-- process gives information about each process (in the OPM sense) +-- it is augmented by information in other tables +-- specifies the type of process. for any type, it +-- must be the case that the specific type table +-- has an entry for this process. +-- Having this type here seems poor normalisation, though? +-- process types: internal, rootthread, execute, function, compound, scope, operator +-- maybe create a table for each type? +CREATE TABLE process + (id varchar(256) PRIMARY KEY, + type varchar(16), + name varchar(256), -- in the case of an execute this refers to the transformation name in tc.data + workflow_id varchar(256) REFERENCES workflow (id) ON DELETE CASCADE -- normalize: workflow_id of sub-procedure determined + -- by compound procedure ); --- dataset_values stores the value for each dataset which is known to have --- a value (which is all assigned primitive types). No attempt is made here --- to expose that value as an SQL type other than a string, and so (for --- example) SQL numerical operations should not be expected to work, even --- though the user knows that a particular dataset stores a numeric value. -CREATE TABLE dataset_values - ( dataset_id varchar(2048) PRIMARY KEY, - value varchar(2048) +-- this gives information about each execute. +-- each execute is identified by a unique URI. other information from +-- swift logs is also stored here. an execute is an OPM process. +CREATE TABLE execute + (id varchar(256) PRIMARY KEY REFERENCES process (id) ON DELETE CASCADE, + procedure_name varchar(256), -- name of the app procedure that invokes the transformation + start_time numeric, + duration numeric, + final_state varchar(16), + scratch varchar(2048) ); --- The above dataset_* tables are the original containment representation --- of datasets. --- Now here are some tables to represent complex datasets differently, in --- terms of accessors and constructors. --- It is unclear which is the better representation. - -CREATE TABLE createarray - ( array_id varchar(2048) +-- this gives information about each execute2, which is an attempt to +-- perform an execution. the execute2 id is tied to per-execution-attempt +-- information such as wrapper logs +CREATE TABLE execute2 + (id varchar(256) PRIMARY KEY, + execute_id varchar(256) REFERENCES execute (id) ON DELETE CASCADE, + start_time numeric, + duration numeric, + final_state varchar(16), + site varchar(256) ); -CREATE TABLE createarray_member - ( array_id varchar(2048), - ix varchar(2048), - member_id varchar(2048) +-- dataset_usage records usage relationships between processes and datasets; +-- in SwiftScript terms, the input and output parameters for each +-- application procedure invocation; in OPM terms, the artificts which are +-- input to and output from each process that is a Swift execution +CREATE TABLE ds_usage + (process_id varchar(256) REFERENCES process(id) ON DELETE CASCADE, + direction char(1), -- I or O for input or output + dataset_id varchar(256) REFERENCES dataset(id) ON DELETE CASCADE, + param_name varchar(256), -- the name of the parameter in this execute that + -- this dataset was bound to. sometimes this must + -- be contrived (for example, in positional varargs) + PRIMARY KEY (process_id,direction,dataset_id,param_name) ); --- TODO step -CREATE TABLE array_range - ( array_id varchar(2048), - from_id varchar(2048), - to_id varchar(2048), - step_id varchar(2048) -- nullable, if step is unspecified - ); - --- known_workflows stores some information about each workflow log that has --- been seen by the importer: the log filename, swift version and import --- status. -CREATE TABLE known_workflows - ( - workflow_id varchar(2048) PRIMARY KEY, - workflow_log_filename varchar(2048), - version varchar(2048), - importstatus varchar(2048) - ); - - --- workflow_events stores the start time and duration for each workflow --- that has been successfully imported. -CREATE TABLE workflow_events - ( workflow_id varchar(2048) PRIMARY KEY, - starttime numeric, - duration numeric - ); - --- extrainfo stores lines generated by the SWIFT_EXTRA_INFO feature -CREATE TABLE extrainfo - ( execute2id varchar(2048), - extrainfo varchar(1024) - ); - -- annotations -CREATE TABLE dataset_annotations_numeric - ( dataset_id varchar(2048), - name varchar(2048), +CREATE TABLE annot_ds_num + ( id varchar(256) REFERENCES dataset (id) ON DELETE CASCADE, + name varchar(256), value numeric, - primary key(dataset_id, name) + PRIMARY KEY (id, name) ); -CREATE TABLE dataset_annotations_varchar - ( dataset_id varchar(2048), - name varchar(2048), - value varchar(4096), - primary key(dataset_id, name) +CREATE TABLE annot_ds_txt + ( id varchar(256) REFERENCES dataset (id) ON DELETE CASCADE, + name varchar(256), + value varchar(2048), + PRIMARY KEY (id, name) ); -CREATE TABLE dataset_annotations_boolean - ( dataset_id varchar(2048), - name varchar(2048), +CREATE TABLE annot_ds_bool + ( id varchar(256) REFERENCES dataset (id) ON DELETE CASCADE, + name varchar(256), value boolean, - primary key(dataset_id, name) + PRIMARY KEY (id, name) ); -CREATE TABLE process_annotations_numeric - ( process_id varchar(2048), - name varchar(2048), +CREATE TABLE annot_p_num + ( id varchar(256) REFERENCES process (id) ON DELETE CASCADE, + name varchar(256), value numeric, - primary key(process_id, name) + PRIMARY KEY (id, name) ); -CREATE TABLE process_annotations_varchar - ( process_id varchar(2048), - name varchar(2048), - value varchar(1024), - primary key(process_id, name) +CREATE TABLE annot_p_txt + ( id varchar(256) REFERENCES process (id) ON DELETE CASCADE, + name varchar(256), + value varchar(2048), + PRIMARY KEY (id, name) ); -CREATE TABLE process_annotations_boolean - ( process_id varchar(2048), - name varchar(2048), +CREATE TABLE annot_p_bool + ( id varchar(256) REFERENCES process (id) ON DELETE CASCADE, + name varchar(256), value boolean, - primary key(process_id, name) + PRIMARY KEY (id, name) ); -CREATE TABLE workflow_annotations_numeric - ( workflow_id varchar(2048), - name varchar(2048), +CREATE TABLE annot_wf_num + ( id varchar(256) REFERENCES workflow (id) ON DELETE CASCADE, + name varchar(256), value numeric, - primary key(workflow_id, name) + PRIMARY KEY (id, name) ); -CREATE TABLE workflow_annotations_varchar - ( workflow_id varchar(2048), - name varchar(2048), - value varchar(1024), - primary key(workflow_id, name) +CREATE TABLE annot_wf_txt + ( id varchar(256) REFERENCES workflow (id) ON DELETE CASCADE, + name varchar(256), + value varchar(2048), + PRIMARY KEY (id, name) ); -CREATE TABLE workflow_annotations_boolean - ( workflow_id varchar(2048), - name varchar(2048), +CREATE TABLE annot_wf_bool + ( id varchar(256) REFERENCES workflow (id) ON DELETE CASCADE, + name varchar(2048), value boolean, - primary key(workflow_id, name) + PRIMARY KEY (id, name) ); + +-- extrainfo stores lines generated by the SWIFT_EXTRA_INFO feature +-- extrainfo will be processes into annotation tables +-- CREATE TABLE extrainfo +-- ( id varchar(256) PRIMARY KEY REFERENCES execute (id) ON DELETE CASCADE, +-- extrainfo varchar(2048) +-- ); + +-- The above dataset_* tables are the original containment representation +-- of datasets. +-- Now here are some tables to represent complex datasets differently, in +-- terms of accessors and constructors. +-- It is unclear which is the better representation. + +CREATE TABLE createarray + ( id varchar(256) PRIMARY KEY + ); + +CREATE TABLE createarray_member + ( array_id varchar(256) REFERENCES createarray (id) ON DELETE CASCADE, + ix varchar(256), + member_id varchar(256), + PRIMARY KEY (array_id, ix) + ); + +-- TODO step +CREATE TABLE array_range + ( array_id varchar(256) REFERENCES createarray (id) ON DELETE CASCADE, + from_id varchar(256), + to_id varchar(256), + step_id varchar(256), -- nullable, if step is unspecified + PRIMARY KEY (array_id,from_id,to_id,step_id) + ); + -- this GRANT does not work for sqlite; you'll get a syntax error but -- ignore it, as it is not needed in sqlite -grant all on dataset_containment, dataset_filenames, dataset_usage, processes_in_workflows, invocation_procedure_names, known_workflows, workflow_events to public, operators; +grant all on + dataset, + file, + variable, + ds_containment, + process, + execute, + execute2, + workflow, + ds_usage, + annot_ds_num, + annot_ds_txt, + annot_ds_bool, + annot_p_num, + annot_p_txt, + annot_p_bool, + annot_wf_num, + annot_wf_txt, + annot_wf_bool, + extrainfo, + createarray, + createarray_member, + array_range +to public, operators; Modified: provenancedb/prov-sql-generate-transitive-closures.sql =================================================================== --- provenancedb/prov-sql-generate-transitive-closures.sql 2010-11-08 20:30:09 UTC (rev 3705) +++ provenancedb/prov-sql-generate-transitive-closures.sql 2010-11-10 18:18:39 UTC (rev 3706) @@ -10,13 +10,13 @@ -- of same? CREATE TABLE trans - (before char(128), - after char(128), + (before varchar(2048), + after varchar(2048), CONSTRAINT no_duplicate_arcs_in_trans UNIQUE (before, after)); CREATE TABLE transbase - (before char(128), - after char(128), + (before varchar(2048), + after varchar(2048), CONSTRAINT no_duplicate_arcs_in_transbase UNIQUE (before, after) ); Modified: provenancedb/prov-to-sql.sh =================================================================== --- provenancedb/prov-to-sql.sh 2010-11-08 20:30:09 UTC (rev 3705) +++ provenancedb/prov-to-sql.sh 2010-11-10 18:18:39 UTC (rev 3706) @@ -1,12 +1,11 @@ #!/bin/bash - export RUNID=$(basename $1 .log) -export WFID="tag:benc at ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:" +export WFID="tag:ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:" # TODO is there already a URI form for identifying workflows? -export WF="tag:benc at ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:run" +export WF="tag:ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:run" echo Generating SQL for $RUNID @@ -14,110 +13,147 @@ # this gives a distinction between the root process for a workflow and the # workflow itself. perhaps better to model the workflow as a process -echo "INSERT INTO processes_in_workflows (process_id, workflow_id) VALUES ('${WFID}0', '$WF');" >> tmp-import.sql - echo "INSERT INTO processes (id, type) VALUES ('${WFID}0', 'rootthread');" >> tmp-import.sql +echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-import.sql -while read time duration thread localthread endstate app scratch; do - echo "INSERT INTO processes (id, type) VALUES ('$thread', 'execute');" >> tmp-import.sql - echo "INSERT INTO processes_in_workflows (process_id, workflow_id) VALUES ('$thread', '$WF');" >> tmp-import.sql - echo "INSERT INTO executes (id, starttime, duration, finalstate, app, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$app', '$scratch');" >> tmp-import.sql + +while read time duration thread localthread endstate tr_name scratch; do + echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-import.sql + echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-import.sql done < execute.global.event -while read starttime duration globalid id endstate thread site scratch; do +while read start_time duration globalid id endstate thread site scratch; do + # cut off the last component of the thread, so that we end up at the + # parent thread id which should correspond with the execute-level ID + inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')" + echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> tmp-import.sql +done < execute2.global.event - # cut off the last component of the thread, so that we end up at the - # parent thread id which should correspond with the execute-level ID - execute_id="$WFID$(echo $thread | sed 's/-[^-]*$//')" - echo "INSERT INTO execute2s (id, execute_id, starttime, duration, finalstate, site) VALUES ('$globalid', '$execute_id', '$starttime', '$duration', '$endstate', '$site');" >> tmp-import.sql -done < execute2.global.event -while read thread name lhs rhs result; do - operatorid="${WFID}operator:$thread" - echo "INSERT INTO processes (id, type) VALUES ('$operatorid', 'operator');" >> tmp-import.sql - echo "INSERT INTO invocation_procedure_names (execute_id, procedure_name) VALUES ('$operatorid','$name'); " >> tmp-import.sql - echo "INSERT INTO processes_in_workflows (process_id, workflow_id) VALUES ('$operatorid', '$WF');" >> tmp-import.sql - echo "INSERT INTO dataset_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-import.sql - echo "INSERT INTO dataset_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-import.sql - echo "INSERT INTO dataset_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-import.sql +while read col1 col2 col3 col4 col5 threadst namest lhsst rhsst resultst; do + thread=`echo $threadst | awk 'BEGIN { FS = "=" }; {print $2}'` + name=`echo $name | awk 'BEGIN { FS = "=" }; {print $2}'` + lhs=`echo $lhsst | awk 'BEGIN { FS = "=" }; {print $2}'` + rhs=`echo $rhsst | awk 'BEGIN { FS = "=" }; {print $2}'` + result=`echo $resultst | awk 'BEGIN { FS = "=" }; {print $2}'` + + operatorid="${WFID}operator:$thread" + + EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$lhs';") + if [ "$EXISTING" -eq "0" ]; then + echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-import.sql + fi + EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$rhs';") + if [ "$EXISTING" -eq "0" ]; then + echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-import.sql + fi + EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$result';") + if [ "$EXISTING" -eq "0" ]; then + echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-import.sql + fi + echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-import.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-import.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-import.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-import.sql done < operators.txt while read id name output; do - echo "INSERT INTO processes (id, type) VALUES ('$id', 'function');" >> tmp-import.sql - echo "INSERT INTO processes_in_workflows (process_id, workflow_id) VALUES ('$id', '$WF');" >> tmp-import.sql - echo "INSERT INTO dataset_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-import.sql - echo "INSERT INTO invocation_procedure_names (execute_id, procedure_name) VALUES ('$id', '$name');" >> tmp-import.sql + EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$id';") + if [ "$EXISTING" -eq "0" ]; then + echo "INSERT INTO dataset (id) VALUES ('$id');" >> tmp-import.sql + fi + echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$id', 'function', '$name', '$WF');" >> tmp-import.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-import.sql done < functions.txt while read id value; do -# TODO need ordering/naming - echo "INSERT INTO dataset_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-import.sql + # TODO need ordering/naming + EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$id';") + if [ "$EXISTING" -eq "0" ]; then + echo "INSERT INTO dataset (id) VALUES ('$id');" >> tmp-import.sql + fi + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-import.sql done < function-inputs.txt while read thread direction dataset variable rest; do - if [ "$direction" == "input" ] ; then - dir=I - else - dir=O - fi - echo "INSERT INTO dataset_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-import.sql + if [ "$direction" == "input" ] ; then + dir=I + else + dir=O + fi + EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$dataset';") + if [ "$EXISTING" -eq "0" ]; then + echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-import.sql + fi + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-import.sql done < tie-data-invocs.txt while read thread appname; do - echo "INSERT INTO invocation_procedure_names (execute_id, procedure_name) VALUES ('$thread', '$appname');" >> tmp-import.sql - + echo "UPDATE execute SET procedure_name='$appname' WHERE id='$thread';" >> tmp-import.sql done < invocation-procedure-names.txt while read outer inner; do - echo "INSERT INTO dataset_containment (outer_dataset_id, inner_dataset_id) VALUES ('$outer', '$inner');" >> tmp-import.sql + EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$outer';") + if [ "$EXISTING" -eq "0" ]; then + echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-import.sql + fi + EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$inner';") + if [ "$EXISTING" -eq "0" ]; then + echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-import.sql + fi + echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-import.sql done < tie-containers.txt while read dataset filename; do - echo "INSERT INTO dataset_filenames (dataset_id, filename) VALUES ('$dataset', '$filename');" >> tmp-import.sql + EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$dataset';") + if [ "$EXISTING" -eq "0" ]; then + echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-import.sql + fi + echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-import.sql done < dataset-filenames.txt while read dataset value; do - echo "INSERT INTO dataset_values (dataset_id, value) VALUES ('$dataset', '$value');" >> tmp-import.sql + EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$dataset';") + if [ "$EXISTING" -eq "0" ]; then + echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-import.sql + fi + echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-import.sql done < dataset-values.txt -while read start duration wfid; do - echo "INSERT INTO workflow_events (workflow_id,starttime, duration) VALUES ('$wfid', '$start', '$duration');" >> tmp-import.sql +while read start duration wfid rest; do + echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" >> tmp-import.sql + echo "UPDATE workflow SET duration=$duration WHERE id='$WF';" >> tmp-import.sql done < workflow.event while read id extrainfo ; do -# TODO this will not like quotes and things like that in extrainfo - echo "INSERT INTO extrainfo (execute2id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql + # TODO this will not like quotes and things like that in extrainfo + echo "INSERT INTO extrainfo (id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql done < extrainfo.txt # TODO this could merge with other naming tables -while read start duration thread finalstate procname ; do - compoundid=$WFID$thread - echo "INSERT INTO processes (id, type) VALUES ('$compoundid', 'compound');" >> tmp-import.sql - echo "INSERT INTO processes_in_workflows (process_id, workflow_id) VALUES ('$compound', '$WF');" >> tmp-import.sql - echo "INSERT INTO invocation_procedure_names (execute_id, procedure_name) VALUES ('$compoundid', '$procname');" >> tmp-import.sql +while read start duration thread final_state procname ; do + compoundid=$WFID$thread + echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');" >> tmp-import.sql done < compound.event -while read start duration thread finalstate procname ; do - fqid=$WFID$thread - echo "INSERT INTO processes (id, type) VALUES ('$fqid', 'internal');" >> tmp-import.sql - echo "INSERT INTO processes_in_workflows (process_id, workflow_id) VALUES ('$fqid', '$WF');" >> tmp-import.sql - echo "INSERT INTO invocation_procedure_names (execute_id, procedure_name) VALUES ('$fqid', '$procname');" >> tmp-import.sql +while read start duration thread final_state procname ; do + fqid=$WFID$thread + echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$fqid', 'internal', '$procname', '$WF');" >> tmp-import.sql done < internalproc.event +while read t ; do + thread="${WFID}$t" + echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'scope', 'scope', '$WF');" >> tmp-import.sql +done < scopes.txt + while read id ; do - echo "INSERT INTO createarray (array_id) VALUES ('$id');" >> tmp-import.sql + echo "INSERT INTO createarray (array_id) VALUES ('$id');" >> tmp-import.sql done < createarray.txt while read arrayid index memberid ; do - echo "INSERT INTO createarray_member (array_id, ix, member_id) VALUES ('$arrayid', '$index', '$memberid');" >> tmp-import.sql + echo "INSERT INTO createarray_member (array_id, ix, member_id) VALUES ('$arrayid', '$index', '$memberid');" >> tmp-import.sql done < createarray-members.txt -while read t ; do - thread="${WFID}$t" - echo "INSERT INTO processes (id, type) VALUES ('$thread', 'scope');" >> tmp-import.sql - echo "INSERT INTO processes_in_workflows (process_id, workflow_id) VALUES ('$thread', '$WF');" >> tmp-import.sql -done < scopes.txt - echo Sending SQL to DB $SQLCMD < tmp-import.sql Modified: provenancedb/swift-prov-import-all-logs =================================================================== --- provenancedb/swift-prov-import-all-logs 2010-11-08 20:30:09 UTC (rev 3705) +++ provenancedb/swift-prov-import-all-logs 2010-11-10 18:18:39 UTC (rev 3706) @@ -11,11 +11,13 @@ source $PROVDIR/etc/provenance.config export PATH=$PROVDIR:$PATH +# this generates a file with pairs like: +# swift-plot-log $LOGREPO everylog-vs-versions.data if [ "$?" != "0" ]; then - echo swift-plot-log failed when building everylog-vs-versions.data - exit 1 + echo swift-plot-log failed when building everylog-vs-versions.data + exit 1 fi # TODO better tmp handling that always using the same name in a shared @@ -24,53 +26,55 @@ echo first commandline param is $1 if [ "$1" == "rebuild" ]; then - echo CLEANING DATABASE - $SQLCMD < $PROVDIR/prov-init.sql + echo CLEANING DATABASE + $SQLCMD < $PROVDIR/prov-init.sql fi while read start version filename; do + + export IDIR=$(echo $filename | sed 's/\.log$/.d/') + echo IDIR=$IDIR + if [ $version -ge 1538 ]; then + echo -n "Log: $filename ... " + + # TODO: does not work in sqlite + EXISTING=$($SQLCMD --tuples-only -c "select count(*) from workflow where log_filename='$filename';") + + if [ "$EXISTING" -eq "0" ]; then + echo IMPORTING + + if grep --silent "DEBUG Loader Swift finished with no errors" $filename; then + wfstatus="SUCCESS" + else + wfstatus="FAIL" + fi - export IDIR=$(echo $filename | sed 's/\.log$/.d/') - echo IDIR=$IDIR - if [ $version -ge 1538 ]; then - echo -n "Log: $filename ... " + export RUNID=$(basename $filename .log) + # changing tag authority from e-mail benc at ci.uchicago.edu to dnsname ci.uchicago.edu + export WF="tag:ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:run" + + echo "INSERT INTO workflow (id, log_filename, swift_version, import_status) VALUES ('$WF','$filename','$version','$wfstatus');" | $SQLCMD -# TODO: make it work in general (sqlite, mysql, ...) -# Works only with PostgreSQL -EXISTING=$($SQLCMD --tuples-only -c "select count(*) from known_workflows where workflow_log_filename='$filename';") - -if [ "$EXISTING" -eq "0" ]; then - echo IMPORTING - - if grep --silent "DEBUG Loader Swift finished with no errors" $filename; then - wfstatus="SUCCESS" - else - wfstatus="FAIL" + + echo version $version in log file $filename + echo ============= will import ============= + prepare-for-import $filename + if [ "$?" != "0" ]; then + echo prepare-for-import failed + exit 2 + fi + import-run-to-sql $filename + if [ "$?" != "0" ]; then + echo import-run-to-sql failed + exit 3 + fi + + # import-run-to-xml $filename + + else + echo SKIP: Already known in workflow + fi fi - - echo version $version in log file $filename - echo ============= will import ============= - prepare-for-import $filename - if [ "$?" != "0" ]; then - echo prepare-for-import failed - exit 2 - fi - import-run-to-sql $filename - if [ "$?" != "0" ]; then - echo import-run-to-sql failed - exit 3 - fi - -# import-run-to-xml $filename - - export RUNID=$(basename $filename .log) - export WF="tag:benc at ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:run" - - echo "INSERT INTO known_workflows (workflow_id, workflow_log_filename, version, importstatus) VALUES ('$WF','$filename','$version','$wfstatus');" | $SQLCMD -else - echo SKIP: Already known in workflow -fi - fi done < /tmp/everylog-vs-versions.data # now pull the XML data into eXist, in as few runs as possible to avoid From noreply at svn.ci.uchicago.edu Fri Nov 12 17:27:31 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 12 Nov 2010 17:27:31 -0600 (CST) Subject: [Swift-commit] r3707 - in provenancedb: . apps/oops Message-ID: <20101112232731.197929CCBF@svn.ci.uchicago.edu> Author: lgadelha Date: 2010-11-12 17:27:30 -0600 (Fri, 12 Nov 2010) New Revision: 3707 Modified: provenancedb/apps/oops/oops_extractor.sh provenancedb/prov-to-sql.sh Log: Updated OOPS annotation extraction scripts Modified: provenancedb/apps/oops/oops_extractor.sh =================================================================== --- provenancedb/apps/oops/oops_extractor.sh 2010-11-10 18:18:39 UTC (rev 3706) +++ provenancedb/apps/oops/oops_extractor.sh 2010-11-12 23:27:30 UTC (rev 3707) @@ -6,7 +6,7 @@ # OOPS' Swift logs. PROVDB_HOME=~/provenancedb -PROTESTS_HOME=/home/aashish/CASP +PROTESTS_HOME=//CASP IMPORT_HOME=~/protests source $PROVDB_HOME/etc/provenance.config @@ -41,12 +41,14 @@ OOPS_RUN_ID=`echo $i | awk -F . '{print $3}'` cd $PROTESTS_HOME/$k/$i LOG_FILENAME=`ls | grep psim.loops- | grep "\."log$` - WORKFLOW_ID=`echo "select workflow_id from known_workflows where workflow_log_filename like '%$LOG_FILENAME%'" | $SQLCMD -t | awk '{print $1}'` + WORKFLOW_ID=`echo "select id from workflow where log_filename like '%$LOG_FILENAME%'" | $SQLCMD -t | awk '{print $1}'` cd $IMPORT_HOME/swift-logs - echo "insert into workflow_annotations_varchar values ('$WORKFLOW_ID','oops_run_id','$OOPS_RUN_ID');" | $SQLCMD + echo "insert into annot_wf_txt (id, name, value) values ('$WORKFLOW_ID','oops_run_id','$OOPS_RUN_ID');" | $SQLCMD - # using this as a workaround for the problem above, it will return nSim identical tuples - echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames,processes_in_workflows where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='loopModel' and param_name='d' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%' and processes_in_workflows.process_id=dataset_usage.process_id and processes_in_workflows.workflow_id='$WORKFLOW_ID';" > query.sql + echo "select id,filename from file where filename like '%params%' and id in (select in_id from ds_containment where out_id in (select ds_usage.dataset_id from ds_usage,process,execute where ds_usage.process_id=process.id and process.id=execute.id and execute.procedure_name='loopPrepare' and ds_usage.direction='I' and process.workflow_id like '%$WORKFLOW_ID%'));" > query.sql + + #query for the previous database schema + #echo "select dataset_id,filename from dataset_filenames where filename like '%params%' and dataset_id in (select inner_dataset_id from dataset_containment where outer_dataset_id in (select dataset_usage.dataset_id from invocation_procedure_names,dataset_usage,processes_in_workflows where invocation_procedure_names.execute_id=dataset_usage.process_id and dataset_usage.process_id=processes_in_workflows.process_id and invocation_procedure_names.procedure_name='loopPrepare' and dataset_usage.direction='I' and processes_in_workflows.workflow_id like '%$WORKFLOW_ID%'));" > query.sql $SQLCMD -t -A -F " " -f query.sql -o result.txt @@ -63,18 +65,18 @@ if [ "$NAME" = "SAMPLE RANGE" ]; then VALUE1=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}' | awk 'BEGIN { FS = "-" }; {print $1}'` VALUE2=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}' | awk 'BEGIN { FS = "-" }; {print $2}'` - echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME BEGIN', $VALUE1);" | $SQLCMD - echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME END', $VALUE2);" | $SQLCMD + echo "insert into annot_ds_num values ('$DATASET_ID', '$NAME BEGIN', $VALUE1);" | $SQLCMD + echo "insert into annot_ds_num values ('$DATASET_ID', '$NAME END', $VALUE2);" | $SQLCMD fi if [ "$NAME" = "RESTRAIN DISTANCE" ]; then VALUE1=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}' | awk 'BEGIN { FS = "," }; {print $1}'` VALUE2=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}' | awk 'BEGIN { FS = "," }; {print $2}'` - echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME 1', $VALUE1);" | $SQLCMD - echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME 2', $VALUE2);" | $SQLCMD + echo "insert into annot_ds_num values ('$DATASET_ID', '$NAME 1', $VALUE1);" | $SQLCMD + echo "insert into annot_ds_num values ('$DATASET_ID', '$NAME 2', $VALUE2);" | $SQLCMD fi if [ "$NAME" = "MAXIMUM NUMBER OF STEPS" ]; then VALUE=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}'` - echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME', $VALUE);" | $SQLCMD + echo "insert into annot_ds_num values ('$DATASET_ID', '$NAME', $VALUE);" | $SQLCMD fi done < $FILENAME fi Modified: provenancedb/prov-to-sql.sh =================================================================== --- provenancedb/prov-to-sql.sh 2010-11-10 18:18:39 UTC (rev 3706) +++ provenancedb/prov-to-sql.sh 2010-11-12 23:27:30 UTC (rev 3707) @@ -125,10 +125,10 @@ echo "UPDATE workflow SET duration=$duration WHERE id='$WF';" >> tmp-import.sql done < workflow.event -while read id extrainfo ; do - # TODO this will not like quotes and things like that in extrainfo - echo "INSERT INTO extrainfo (id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql -done < extrainfo.txt +#while read id extrainfo ; do +# TODO this will not like quotes and things like that in extrainfo +# echo "INSERT INTO extrainfo (id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql +#done < extrainfo.txt # TODO this could merge with other naming tables while read start duration thread final_state procname ; do From noreply at svn.ci.uchicago.edu Fri Nov 19 10:02:41 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 19 Nov 2010 10:02:41 -0600 (CST) Subject: [Swift-commit] r3708 - SwiftApps/SwiftR/Swift/exec Message-ID: <20101119160241.A5C009D64A@svn.ci.uchicago.edu> Author: wilde Date: 2010-11-19 10:02:41 -0600 (Fri, 19 Nov 2010) New Revision: 3708 Modified: SwiftApps/SwiftR/Swift/exec/start-swift Log: Fix pbs server - adpated newer pbsf logic to pbs, with needed adjustments to to '&' operator. Modified: SwiftApps/SwiftR/Swift/exec/start-swift =================================================================== --- SwiftApps/SwiftR/Swift/exec/start-swift 2010-11-12 23:27:30 UTC (rev 3707) +++ SwiftApps/SwiftR/Swift/exec/start-swift 2010-11-19 16:02:41 UTC (rev 3708) @@ -67,11 +67,21 @@ #PBS -m n #PBS -l nodes=$nodes #PBS -l walltime=$time -#PBS -o pbs.stdout -#PBS -e pbs.stderr +#PBS -o $HOME +#PBS -e $HOME $queueDirective WORKER_LOGGING_ENABLED=true # FIXME: parameterize; fix w PBS -v -cd / && /usr/bin/perl $SWIFTBIN/worker.pl $CONTACT SwiftR-workers $HOME/.globus/coasters $IDLETIMEOUT +#cd / && /usr/bin/perl $SWIFTBIN/worker.pl $CONTACT SwiftR-workers $HOME/.globus/coasters $IDLETIMEOUT +HOST=\$(echo $CONTACT | sed -e 's,^http://,,' -e 's/:.*//') +PORT=\$(echo $CONTACT | sed -e 's,^.*:,,') +echo '***' PBS_NODEFILE file: \$PBS_NODEFILE CONTACT:$CONTACT +cat \$PBS_NODEFILE +echo '***' unique nodes are: +sort < \$PBS_NODEFILE|uniq +for h in \$(sort < \$PBS_NODEFILE|uniq); do + ssh \$h "echo Swift R startup running on host; hostname; cd /; /usr/bin/perl $SWIFTBIN/worker.pl $CONTACT SwiftR-\$h $HOME/.globus/coasters $IDLETIMEOUT" & +done +wait END } @@ -82,7 +92,7 @@ usage exit 1 fi - if [ $queue != default ]; then + if [ $queue != default ]; then # FIXME: this will interfere if user really wants to use "-q default" queueDirective="#PBS -q $queue" else queueDirective="" @@ -93,14 +103,14 @@ #PBS -m n #PBS -l nodes=$nodes:ppn=$cores #PBS -l walltime=$time -#PBS -o $HOME/mw/work/pbs.stdout -#PBS -e $HOME/mw/work/pbs.stderr +#PBS -o $HOME +#PBS -e $HOME $queueDirective WORKER_LOGGING_ENABLED=true # FIXME: parameterize; fix w PBS -v HOST=\$(echo $CONTACT | sed -e 's,^http://,,' -e 's/:.*//') PORT=\$(echo $CONTACT | sed -e 's,^.*:,,') CONTACT=http://localhost:\$PORT -echo '***' PBS_NODEFILE file is \$PBS_NODEFILE +echo '***' PBS_NODEFILE file: \$PBS_NODEFILE CONTACT:$CONTACT cat \$PBS_NODEFILE echo '***' unique nodes are: sort < \$PBS_NODEFILE|uniq @@ -144,9 +154,9 @@ usage() { - echo >&2 "usage: $0 -c cores -h 'host1 ... hostN' -n nodes -q queue -s server -t throttle" + echo >&2 "usage: $0 -c cores -h 'host1 ... hostN' -n nodes -q queue -s server -p parallelJobs -t walltime" echo >&2 " valid servers: local, ssh, pbs, pbsf (pbs with firewalled workers)" - echo >&2 " defaults: cores=2 nodes=1 queue=none server=local throttle=10" + echo >&2 " defaults: cores=2 nodes=1 queue=none server=local parallelJobs=10 walltime=00:30:00 (hh:mm:ss) " } verify-is-one-of() From noreply at svn.ci.uchicago.edu Mon Nov 22 16:23:05 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 22 Nov 2010 16:23:05 -0600 (CST) Subject: [Swift-commit] r3709 - trunk/tests Message-ID: <20101122222305.ED1309CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2010-11-22 16:23:05 -0600 (Mon, 22 Nov 2010) New Revision: 3709 Modified: trunk/tests/nightly.sh Log: Restore CDM coasters pinned test Modified: trunk/tests/nightly.sh =================================================================== --- trunk/tests/nightly.sh 2010-11-19 16:02:41 UTC (rev 3708) +++ trunk/tests/nightly.sh 2010-11-22 22:23:05 UTC (rev 3709) @@ -155,6 +155,7 @@ SWIFTCOUNT=0 +echo "RUNNING_IN: $RUNDIR" echo "HTML_OUTPUT: $HTML" cd $TOPDIR @@ -911,10 +912,10 @@ $TESTDIR/language/should-not-work \ $TESTDIR/cdm \ $TESTDIR/cdm/ps \ - $TESTDIR/cdm/star ) + $TESTDIR/cdm/star + $TESTDIR/cdm/ps/pinned + ) -# $TESTDIR/cdm/ps/pinned \ # Currently broken - GROUPCOUNT=1 for G in ${GROUPLIST[@]}; do export GROUP=$G From noreply at svn.ci.uchicago.edu Mon Nov 22 16:29:17 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 22 Nov 2010 16:29:17 -0600 (CST) Subject: [Swift-commit] r3710 - in trunk/tests: cdm cdm/ps cdm/ps/pinned cdm/star functions language/should-not-work language-behaviour Message-ID: <20101122222917.1BD909CC9B@svn.ci.uchicago.edu> Author: wozniak Date: 2010-11-22 16:29:16 -0600 (Mon, 22 Nov 2010) New Revision: 3710 Modified: trunk/tests/cdm/ps/pinned/title.txt trunk/tests/cdm/ps/title.txt trunk/tests/cdm/star/title.txt trunk/tests/cdm/title.txt trunk/tests/functions/title.txt trunk/tests/language-behaviour/title.txt trunk/tests/language/should-not-work/title.txt Log: Title corrections Modified: trunk/tests/cdm/ps/pinned/title.txt =================================================================== --- trunk/tests/cdm/ps/pinned/title.txt 2010-11-22 22:23:05 UTC (rev 3709) +++ trunk/tests/cdm/ps/pinned/title.txt 2010-11-22 22:29:16 UTC (rev 3710) @@ -1 +1 @@ -CDM tests with provider staging and pinned +CDM Tests w/ Provider Staging & Pinned Modified: trunk/tests/cdm/ps/title.txt =================================================================== --- trunk/tests/cdm/ps/title.txt 2010-11-22 22:23:05 UTC (rev 3709) +++ trunk/tests/cdm/ps/title.txt 2010-11-22 22:29:16 UTC (rev 3710) @@ -1 +1 @@ -CDM tests with provider staging +CDM Tests with Provider Staging Modified: trunk/tests/cdm/star/title.txt =================================================================== --- trunk/tests/cdm/star/title.txt 2010-11-22 22:23:05 UTC (rev 3709) +++ trunk/tests/cdm/star/title.txt 2010-11-22 22:29:16 UTC (rev 3710) @@ -1 +1 @@ -CDM tests w/ "star" +CDM Tests w/ "star" Modified: trunk/tests/cdm/title.txt =================================================================== --- trunk/tests/cdm/title.txt 2010-11-22 22:23:05 UTC (rev 3709) +++ trunk/tests/cdm/title.txt 2010-11-22 22:29:16 UTC (rev 3710) @@ -1 +1 @@ -CDM tests +CDM Tests Modified: trunk/tests/functions/title.txt =================================================================== --- trunk/tests/functions/title.txt 2010-11-22 22:23:05 UTC (rev 3709) +++ trunk/tests/functions/title.txt 2010-11-22 22:29:16 UTC (rev 3710) @@ -1 +1 @@ -Swift functions +Swift Functions Modified: trunk/tests/language/should-not-work/title.txt =================================================================== --- trunk/tests/language/should-not-work/title.txt 2010-11-22 22:23:05 UTC (rev 3709) +++ trunk/tests/language/should-not-work/title.txt 2010-11-22 22:29:16 UTC (rev 3710) @@ -1 +1 @@ -Should-Not-Work tests +Should-Not-Work Tests Modified: trunk/tests/language-behaviour/title.txt =================================================================== --- trunk/tests/language-behaviour/title.txt 2010-11-22 22:23:05 UTC (rev 3709) +++ trunk/tests/language-behaviour/title.txt 2010-11-22 22:29:16 UTC (rev 3710) @@ -1 +1 @@ -Language-behaviour tests +Language-Behaviour Tests From noreply at svn.ci.uchicago.edu Tue Nov 23 00:38:34 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 23 Nov 2010 00:38:34 -0600 (CST) Subject: [Swift-commit] r3711 - trunk/libexec Message-ID: <20101123063834.A898AFC41@svn.ci.uchicago.edu> Author: skenny Date: 2010-11-23 00:38:31 -0600 (Tue, 23 Nov 2010) New Revision: 3711 Modified: trunk/libexec/vdl-int.k Log: prints errors to stdout so they can be read by the user during workflow execution Modified: trunk/libexec/vdl-int.k =================================================================== --- trunk/libexec/vdl-int.k 2010-11-22 22:29:16 UTC (rev 3710) +++ trunk/libexec/vdl-int.k 2010-11-23 06:38:31 UTC (rev 3711) @@ -555,6 +555,7 @@ catch("^(?!Abort$).*" vdl:setprogress("Failed but can retry") log(LOG:DEBUG, "APPLICATION_EXCEPTION jobid={jobid} - Application exception: ", exception) + echo(exception) if(matches(exception,".*executable bit.*") generateError(exception) ) From noreply at svn.ci.uchicago.edu Tue Nov 23 00:39:25 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 23 Nov 2010 00:39:25 -0600 (CST) Subject: [Swift-commit] r3712 - trunk/libexec Message-ID: <20101123063925.48329FC41@svn.ci.uchicago.edu> Author: skenny Date: 2010-11-23 00:39:25 -0600 (Tue, 23 Nov 2010) New Revision: 3712 Modified: trunk/libexec/execute-default.k Log: print errors to stdout so they can be seen by the user during workflow execution Modified: trunk/libexec/execute-default.k =================================================================== --- trunk/libexec/execute-default.k 2010-11-23 06:38:31 UTC (rev 3711) +++ trunk/libexec/execute-default.k 2010-11-23 06:39:25 UTC (rev 3712) @@ -47,6 +47,7 @@ else ( to(errors, exception) log(LOG:INFO, exception) + echo(exception) mark(restartout, err=true, mapping=false) graphStuff(tr, stagein, stageout, err=true, maybe(args=arguments)) ) From noreply at svn.ci.uchicago.edu Tue Nov 23 13:35:18 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 23 Nov 2010 13:35:18 -0600 (CST) Subject: [Swift-commit] r3713 - provenancedb Message-ID: <20101123193518.57110FC41@svn.ci.uchicago.edu> Author: lgadelha Date: 2010-11-23 13:35:18 -0600 (Tue, 23 Nov 2010) New Revision: 3713 Modified: provenancedb/prov-init.sql provenancedb/prov-to-sql.sh Log: Modified: provenancedb/prov-init.sql =================================================================== --- provenancedb/prov-init.sql 2010-11-23 06:39:25 UTC (rev 3712) +++ provenancedb/prov-init.sql 2010-11-23 19:35:18 UTC (rev 3713) @@ -1,28 +1,28 @@ -- this is the schema definition used for the main relational provenance -- implementation (in both sqlite3 and postgres) -DROP TABLE dataset; -DROP TABLE file; -DROP TABLE variable; -DROP TABLE ds_containment; -DROP TABLE process; -DROP TABLE execute; -DROP TABLE execute2; -DROP TABLE workflow; -DROP TABLE ds_usage; -DROP TABLE annot_ds_num; -DROP TABLE annot_ds_txt; -DROP TABLE annot_ds_bool; -DROP TABLE annot_p_num; -DROP TABLE annot_p_txt; -DROP TABLE annot_p_bool; -DROP TABLE annot_wf_num; -DROP TABLE annot_wf_txt; -DROP TABLE annot_wf_bool; -DROP TABLE extrainfo; -DROP TABLE createarray; -DROP TABLE createarray_member; -DROP TABLE array_range; +DROP TABLE dataset CASCADE; +DROP TABLE file CASCADE; +DROP TABLE variable CASCADE; +DROP TABLE ds_containment CASCADE; +DROP TABLE process CASCADE; +DROP TABLE execute CASCADE; +DROP TABLE execute2 CASCADE; +DROP TABLE workflow CASCADE; +DROP TABLE ds_usage CASCADE; +DROP TABLE annot_ds_num CASCADE; +DROP TABLE annot_ds_txt CASCADE; +DROP TABLE annot_ds_bool CASCADE; +DROP TABLE annot_p_num CASCADE; +DROP TABLE annot_p_txt CASCADE; +DROP TABLE annot_p_bool CASCADE; +DROP TABLE annot_wf_num CASCADE; +DROP TABLE annot_wf_txt CASCADE; +DROP TABLE annot_wf_bool CASCADE; +DROP TABLE extrainfo CASCADE; +DROP TABLE createarray CASCADE; +DROP TABLE createarray_member CASCADE; +DROP TABLE array_range CASCADE; -- workflow stores some information about each workflow log that has -- been seen by the importer: the log filename, swift version and import @@ -231,6 +231,81 @@ PRIMARY KEY (array_id,from_id,to_id,step_id) ); + +-- lists distinct processes by name in a workflow + +CREATE OR REPLACE FUNCTION process_names(varchar) RETURNS SETOF varchar AS $$ + SELECT DISTINCT(process.name) + FROM process + WHERE process.workflow_id=$1; +$$ LANGUAGE SQL; + + +-- OOPS-specific functions + +CREATE OR REPLACE FUNCTION list_oops_runs() RETURNS SETOF varchar AS $$ + SELECT DISTINCT(value) FROM annot_wf_txt WHERE annot_wf_txt.name=('oops_run_id'); +$$ LANGUAGE SQL; + +CREATE TYPE oops_summary AS (oops_run_id varchar, start_time timestamp with time zone, duration_sec numeric, swift_version varchar); + +CREATE OR REPLACE FUNCTION oops_run_summary(varchar) RETURNS SETOF oops_summary AS $$ + SELECT annot_wf_txt.value as oops_run_id, to_timestamp(workflow.start_time) as start_time, + workflow.duration as duration_sec,workflow.swift_version as swift_version + FROM annot_wf_txt,workflow + WHERE annot_wf_txt.id=workflow.id and annot_wf_txt.name='oops_run_id' and annot_wf_txt.value=$1; +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION oops_process_names(varchar) RETURNS SETOF varchar AS $$ + SELECT DISTINCT(process.name) + FROM process, annot_wf_txt + WHERE process.workflow_id=annot_wf_txt.id AND + annot_wf_txt.name='oops_run_id' AND annot_wf_txt.value=$1; +$$ LANGUAGE SQL; + +CREATE TYPE oops_variable_summary AS (oops_run_id varchar, param_name varchar, value varchar); + +CREATE OR REPLACE FUNCTION oops_variable_summary(varchar) RETURNS SETOF oops_variable_summary $$ + SELECT annot_wf_txt.value,ds_usage.param_name,variable.value + FROM variable,ds_usage,process,annot_wf_txt + WHERE variable.id=ds_usage.dataset_id and ds_usage.process_id=process.id and process.workflow_id=annot_wf_txt.id; +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION oops_science_summary(varchar) RETURNS SETOF oops_variable_summary AS $$ + SELECT ds_usage.param_name,variable.value + FROM variable,ds_usage,process,annot_wf_txt + WHERE variable.id=ds_usage.dataset_id AND ds_usage.process_id=process.id AND process.workflow_id=annot_wf_txt.id AND + (ds_usage.param_name='proteinId' OR ds_usage.param_name='targetId' OR ds_usage.param_name='seqFile' OR + ds_usage.param_name='prot' OR ds_usage.param_name='prepTarFile' OR ds_usage.param_name='nSim') AND + annot_wf_txt.name='oops_run_id' AND annot_wf_txt.value='h733'; +$$ LANGUAGE SQL; + + + + + + + + + + +CREATE OR REPLACE FUNCTION annotation(entity varchar, name varchar) RETURNS anyelement AS $$ + IF entity = 'workflow' THEN + + ELSE + IF entity = 'process' THEN + + ELSE + IF entity = 'dataset' THEN + + ELSE + + END IF; + END IF; + END IF; +$$ LANGUAGE 'plpgsql'; + + -- this GRANT does not work for sqlite; you'll get a syntax error but -- ignore it, as it is not needed in sqlite grant all on Modified: provenancedb/prov-to-sql.sh =================================================================== --- provenancedb/prov-to-sql.sh 2010-11-23 06:39:25 UTC (rev 3712) +++ provenancedb/prov-to-sql.sh 2010-11-23 19:35:18 UTC (rev 3713) @@ -9,13 +9,12 @@ echo Generating SQL for $RUNID -rm -f tmp-import.sql +rm -f tmp-import.sql import.sql tmp-ds.sql # this gives a distinction between the root process for a workflow and the # workflow itself. perhaps better to model the workflow as a process echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-import.sql - while read time duration thread localthread endstate tr_name scratch; do echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-import.sql echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-import.sql @@ -32,25 +31,16 @@ while read col1 col2 col3 col4 col5 threadst namest lhsst rhsst resultst; do thread=`echo $threadst | awk 'BEGIN { FS = "=" }; {print $2}'` - name=`echo $name | awk 'BEGIN { FS = "=" }; {print $2}'` + name=`echo $namest | awk 'BEGIN { FS = "=" }; {print $2}'` lhs=`echo $lhsst | awk 'BEGIN { FS = "=" }; {print $2}'` rhs=`echo $rhsst | awk 'BEGIN { FS = "=" }; {print $2}'` result=`echo $resultst | awk 'BEGIN { FS = "=" }; {print $2}'` operatorid="${WFID}operator:$thread" - EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$lhs';") - if [ "$EXISTING" -eq "0" ]; then - echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-import.sql - fi - EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$rhs';") - if [ "$EXISTING" -eq "0" ]; then - echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-import.sql - fi - EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$result';") - if [ "$EXISTING" -eq "0" ]; then - echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-import.sql - fi + echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-ds.sql + echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-ds.sql + echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-ds.sql echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-import.sql echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-import.sql echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-import.sql @@ -58,20 +48,14 @@ done < operators.txt while read id name output; do - EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$id';") - if [ "$EXISTING" -eq "0" ]; then - echo "INSERT INTO dataset (id) VALUES ('$id');" >> tmp-import.sql - fi + echo "INSERT INTO dataset (id) VALUES ('$output');" >> tmp-ds.sql echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$id', 'function', '$name', '$WF');" >> tmp-import.sql echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-import.sql done < functions.txt while read id value; do # TODO need ordering/naming - EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$id';") - if [ "$EXISTING" -eq "0" ]; then - echo "INSERT INTO dataset (id) VALUES ('$id');" >> tmp-import.sql - fi + echo "INSERT INTO dataset (id) VALUES ('$value');" >> tmp-ds.sql echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-import.sql done < function-inputs.txt @@ -81,10 +65,7 @@ else dir=O fi - EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$dataset';") - if [ "$EXISTING" -eq "0" ]; then - echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-import.sql - fi + echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-import.sql done < tie-data-invocs.txt @@ -93,30 +74,18 @@ done < invocation-procedure-names.txt while read outer inner; do - EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$outer';") - if [ "$EXISTING" -eq "0" ]; then - echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-import.sql - fi - EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$inner';") - if [ "$EXISTING" -eq "0" ]; then - echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-import.sql - fi + echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-ds.sql + echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-ds.sql echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-import.sql done < tie-containers.txt while read dataset filename; do - EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$dataset';") - if [ "$EXISTING" -eq "0" ]; then - echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-import.sql - fi + echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-import.sql done < dataset-filenames.txt while read dataset value; do - EXISTING=$($SQLCMD --tuples-only -c "select count(*) from dataset where id='$dataset';") - if [ "$EXISTING" -eq "0" ]; then - echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-import.sql - fi + echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-import.sql done < dataset-values.txt @@ -154,9 +123,13 @@ echo "INSERT INTO createarray_member (array_id, ix, member_id) VALUES ('$arrayid', '$index', '$memberid');" >> tmp-import.sql done < createarray-members.txt +echo "BEGIN;" > import.sql +cat tmp-ds.sql | sort | uniq >> import.sql +cat tmp-import.sql >> import.sql +echo "COMMIT;" >> import.sql echo Sending SQL to DB -$SQLCMD < tmp-import.sql +$SQLCMD < import.sql echo Finished sending SQL to DB From noreply at svn.ci.uchicago.edu Tue Nov 23 15:36:41 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Tue, 23 Nov 2010 15:36:41 -0600 (CST) Subject: [Swift-commit] r3714 - provenancedb Message-ID: <20101123213641.0DCF8FC41@svn.ci.uchicago.edu> Author: lgadelha Date: 2010-11-23 15:36:40 -0600 (Tue, 23 Nov 2010) New Revision: 3714 Modified: provenancedb/prov-init.sql provenancedb/prov-to-sql.sh Log: Modified: provenancedb/prov-init.sql =================================================================== --- provenancedb/prov-init.sql 2010-11-23 19:35:18 UTC (rev 3713) +++ provenancedb/prov-init.sql 2010-11-23 21:36:40 UTC (rev 3714) @@ -281,31 +281,6 @@ $$ LANGUAGE SQL; - - - - - - - - -CREATE OR REPLACE FUNCTION annotation(entity varchar, name varchar) RETURNS anyelement AS $$ - IF entity = 'workflow' THEN - - ELSE - IF entity = 'process' THEN - - ELSE - IF entity = 'dataset' THEN - - ELSE - - END IF; - END IF; - END IF; -$$ LANGUAGE 'plpgsql'; - - -- this GRANT does not work for sqlite; you'll get a syntax error but -- ignore it, as it is not needed in sqlite grant all on Modified: provenancedb/prov-to-sql.sh =================================================================== --- provenancedb/prov-to-sql.sh 2010-11-23 19:35:18 UTC (rev 3713) +++ provenancedb/prov-to-sql.sh 2010-11-23 21:36:40 UTC (rev 3714) @@ -9,22 +9,22 @@ echo Generating SQL for $RUNID -rm -f tmp-import.sql import.sql tmp-ds.sql +rm -f tmp-ds.sql tmp-p.sql tmp-e.sql tmp-e2.sql tmp-dsu.sql tmp-dsc.sql tmp-f.sql tmp-v.sql tmp-import.sql import.sql # this gives a distinction between the root process for a workflow and the # workflow itself. perhaps better to model the workflow as a process -echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-import.sql +echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('${WFID}0', 'rootthread', '$RUNID', '$WF');" >> tmp-p.sql while read time duration thread localthread endstate tr_name scratch; do - echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-import.sql - echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-import.sql + echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'execute', '$tr_name', '$WF');" >> tmp-p.sql + echo "INSERT INTO execute (id, start_time, duration, final_state, scratch) VALUES ('$thread', $time, $duration, '$endstate', '$scratch');" >> tmp-e.sql done < execute.global.event while read start_time duration globalid id endstate thread site scratch; do # cut off the last component of the thread, so that we end up at the # parent thread id which should correspond with the execute-level ID inv_id="$WFID$(echo $thread | sed 's/-[^-]*$//')" - echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> tmp-import.sql + echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> tmp-e2.sql done < execute2.global.event @@ -41,57 +41,48 @@ echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-ds.sql echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-ds.sql echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-ds.sql - echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-import.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-import.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-import.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-import.sql + echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-p.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-dsu.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-dsu.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-dsu.sql done < operators.txt while read id name output; do echo "INSERT INTO dataset (id) VALUES ('$output');" >> tmp-ds.sql - echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$id', 'function', '$name', '$WF');" >> tmp-import.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-import.sql + echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$id', 'function', '$name', '$WF');" >> tmp-p.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-dsu.sql done < functions.txt while read id value; do # TODO need ordering/naming echo "INSERT INTO dataset (id) VALUES ('$value');" >> tmp-ds.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-import.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-dsu.sql done < function-inputs.txt -while read thread direction dataset variable rest; do - if [ "$direction" == "input" ] ; then - dir=I - else - dir=O - fi - echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-import.sql -done < tie-data-invocs.txt while read thread appname; do - echo "UPDATE execute SET procedure_name='$appname' WHERE id='$thread';" >> tmp-import.sql + echo "UPDATE execute SET procedure_name='$appname' WHERE id='$thread';" >> tmp-e.sql done < invocation-procedure-names.txt while read outer inner; do echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-ds.sql echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-ds.sql - echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-import.sql + echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-dsc.sql done < tie-containers.txt while read dataset filename; do echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql - echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-import.sql + echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-f.sql done < dataset-filenames.txt while read dataset value; do echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql - echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-import.sql + echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-v.sql done < dataset-values.txt while read start duration wfid rest; do - echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" >> tmp-import.sql - echo "UPDATE workflow SET duration=$duration WHERE id='$WF';" >> tmp-import.sql + echo "UPDATE workflow SET start_time=$start WHERE id='$WF';" >> tmp-u.sql + echo "UPDATE workflow SET duration=$duration WHERE id='$WF';" >> tmp-u.sql done < workflow.event #while read id extrainfo ; do @@ -102,19 +93,29 @@ # TODO this could merge with other naming tables while read start duration thread final_state procname ; do compoundid=$WFID$thread - echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');" >> tmp-import.sql + echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$compoundid', 'compound', '$procname', '$WF');" >> tmp-p.sql done < compound.event while read start duration thread final_state procname ; do fqid=$WFID$thread - echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$fqid', 'internal', '$procname', '$WF');" >> tmp-import.sql + echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$fqid', 'internal', '$procname', '$WF');" >> tmp-p.sql done < internalproc.event while read t ; do thread="${WFID}$t" - echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'scope', 'scope', '$WF');" >> tmp-import.sql + echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$thread', 'scope', 'scope', '$WF');" >> tmp-p.sql done < scopes.txt +while read thread direction dataset variable rest; do + if [ "$direction" == "input" ] ; then + dir=I + else + dir=O + fi + echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-dsu.sql +done < tie-data-invocs.txt + while read id ; do echo "INSERT INTO createarray (array_id) VALUES ('$id');" >> tmp-import.sql done < createarray.txt @@ -123,10 +124,17 @@ echo "INSERT INTO createarray_member (array_id, ix, member_id) VALUES ('$arrayid', '$index', '$memberid');" >> tmp-import.sql done < createarray-members.txt -echo "BEGIN;" > import.sql -cat tmp-ds.sql | sort | uniq >> import.sql -cat tmp-import.sql >> import.sql -echo "COMMIT;" >> import.sql +cat tmp-ds.sql | sort | uniq > import.sql +cat tmp-f.sql | sort | uniq >> import.sql +cat tmp-v.sql | sort | uniq >> import.sql +cat tmp-p.sql | sort | uniq >> import.sql +cat tmp-e.sql | sort | uniq >> import.sql +cat tmp-e2.sql | sort | uniq >> import.sql +cat tmp-dsu.sql | sort | uniq >> import.sql +cat tmp-dsc.sql | sort | uniq >> import.sql +cat tmp-import.sql | sort | uniq >> import.sql +cat tmp-u.sql | sort | uniq >> import.sql + echo Sending SQL to DB $SQLCMD < import.sql From noreply at svn.ci.uchicago.edu Wed Nov 24 10:54:14 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 24 Nov 2010 10:54:14 -0600 (CST) Subject: [Swift-commit] r3715 - trunk/tests/language/working Message-ID: <20101124165414.E2A4F9CC92@svn.ci.uchicago.edu> Author: wozniak Date: 2010-11-24 10:54:14 -0600 (Wed, 24 Nov 2010) New Revision: 3715 Modified: trunk/tests/language/working/033-assign.swift Log: Script should fail Modified: trunk/tests/language/working/033-assign.swift =================================================================== --- trunk/tests/language/working/033-assign.swift 2010-11-23 21:36:40 UTC (rev 3714) +++ trunk/tests/language/working/033-assign.swift 2010-11-24 16:54:14 UTC (rev 3715) @@ -1,2 +1,6 @@ + +// THIS-SCRIPT-SHOULD-FAIL +// Hangs waiting for a,b + int a,b; int i=(a + b) * 5; From noreply at svn.ci.uchicago.edu Wed Nov 24 10:54:45 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 24 Nov 2010 10:54:45 -0600 (CST) Subject: [Swift-commit] r3716 - in trunk/tests/language: should-not-work working Message-ID: <20101124165445.6A9B29CC92@svn.ci.uchicago.edu> Author: wozniak Date: 2010-11-24 10:54:45 -0600 (Wed, 24 Nov 2010) New Revision: 3716 Added: trunk/tests/language/should-not-work/033-assign.swift Removed: trunk/tests/language/working/033-assign.swift Log: Script should fail Copied: trunk/tests/language/should-not-work/033-assign.swift (from rev 3715, trunk/tests/language/working/033-assign.swift) =================================================================== --- trunk/tests/language/should-not-work/033-assign.swift (rev 0) +++ trunk/tests/language/should-not-work/033-assign.swift 2010-11-24 16:54:45 UTC (rev 3716) @@ -0,0 +1,6 @@ + +// THIS-SCRIPT-SHOULD-FAIL +// Hangs waiting for a,b + +int a,b; +int i=(a + b) * 5; Deleted: trunk/tests/language/working/033-assign.swift =================================================================== --- trunk/tests/language/working/033-assign.swift 2010-11-24 16:54:14 UTC (rev 3715) +++ trunk/tests/language/working/033-assign.swift 2010-11-24 16:54:45 UTC (rev 3716) @@ -1,6 +0,0 @@ - -// THIS-SCRIPT-SHOULD-FAIL -// Hangs waiting for a,b - -int a,b; -int i=(a + b) * 5; From noreply at svn.ci.uchicago.edu Wed Nov 24 10:55:37 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 24 Nov 2010 10:55:37 -0600 (CST) Subject: [Swift-commit] r3717 - trunk/tests/language/working Message-ID: <20101124165537.D3F979CC92@svn.ci.uchicago.edu> Author: wozniak Date: 2010-11-24 10:55:37 -0600 (Wed, 24 Nov 2010) New Revision: 3717 Removed: trunk/tests/language/working/027-initializer.swift Log: Remove duplicate test Deleted: trunk/tests/language/working/027-initializer.swift =================================================================== --- trunk/tests/language/working/027-initializer.swift 2010-11-24 16:54:45 UTC (rev 3716) +++ trunk/tests/language/working/027-initializer.swift 2010-11-24 16:55:37 UTC (rev 3717) @@ -1,3 +0,0 @@ -int a,b; -int i = (a + b) * 5; - From noreply at svn.ci.uchicago.edu Wed Nov 24 10:57:08 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 24 Nov 2010 10:57:08 -0600 (CST) Subject: [Swift-commit] r3718 - trunk/tests/language/working Message-ID: <20101124165708.2E07D9CC92@svn.ci.uchicago.edu> Author: wozniak Date: 2010-11-24 10:57:08 -0600 (Wed, 24 Nov 2010) New Revision: 3718 Modified: trunk/tests/language/working/0341-whitespace.swift Log: Script should fail Modified: trunk/tests/language/working/0341-whitespace.swift =================================================================== --- trunk/tests/language/working/0341-whitespace.swift 2010-11-24 16:55:37 UTC (rev 3717) +++ trunk/tests/language/working/0341-whitespace.swift 2010-11-24 16:57:08 UTC (rev 3718) @@ -1,7 +1,12 @@ - int a; +// THIS-SCRIPT-SHOULD-FAIL +// Hangs waiting for a,b +// Same as 033-assign +int a; + + int b; From noreply at svn.ci.uchicago.edu Wed Nov 24 10:57:34 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 24 Nov 2010 10:57:34 -0600 (CST) Subject: [Swift-commit] r3719 - in trunk/tests/language: should-not-work working Message-ID: <20101124165734.EB69A9CC92@svn.ci.uchicago.edu> Author: wozniak Date: 2010-11-24 10:57:34 -0600 (Wed, 24 Nov 2010) New Revision: 3719 Added: trunk/tests/language/should-not-work/0341-whitespace.swift Removed: trunk/tests/language/working/0341-whitespace.swift Log: Script should fail Copied: trunk/tests/language/should-not-work/0341-whitespace.swift (from rev 3718, trunk/tests/language/working/0341-whitespace.swift) =================================================================== --- trunk/tests/language/should-not-work/0341-whitespace.swift (rev 0) +++ trunk/tests/language/should-not-work/0341-whitespace.swift 2010-11-24 16:57:34 UTC (rev 3719) @@ -0,0 +1,30 @@ + +// THIS-SCRIPT-SHOULD-FAIL +// Hangs waiting for a,b +// Same as 033-assign + +int a; + + + + int +b; + +int + + + + + + i =( a + /* dfdfsdfds*/ b ) // + * 5 + + + + + +; + + + + Deleted: trunk/tests/language/working/0341-whitespace.swift =================================================================== --- trunk/tests/language/working/0341-whitespace.swift 2010-11-24 16:57:08 UTC (rev 3718) +++ trunk/tests/language/working/0341-whitespace.swift 2010-11-24 16:57:34 UTC (rev 3719) @@ -1,30 +0,0 @@ - -// THIS-SCRIPT-SHOULD-FAIL -// Hangs waiting for a,b -// Same as 033-assign - -int a; - - - - int -b; - -int - - - - - - i =( a + /* dfdfsdfds*/ b ) // - * 5 - - - - - -; - - - - From noreply at svn.ci.uchicago.edu Wed Nov 24 10:59:23 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 24 Nov 2010 10:59:23 -0600 (CST) Subject: [Swift-commit] r3720 - trunk/tests Message-ID: <20101124165923.2DBE39CC92@svn.ci.uchicago.edu> Author: wozniak Date: 2010-11-24 10:59:23 -0600 (Wed, 24 Nov 2010) New Revision: 3720 Added: trunk/tests/apps/ Log: Tests that are very similar to real applications From noreply at svn.ci.uchicago.edu Wed Nov 24 11:00:14 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 24 Nov 2010 11:00:14 -0600 (CST) Subject: [Swift-commit] r3721 - in trunk/tests: apps language/working Message-ID: <20101124170014.BADF09CC92@svn.ci.uchicago.edu> Author: wozniak Date: 2010-11-24 11:00:14 -0600 (Wed, 24 Nov 2010) New Revision: 3721 Added: trunk/tests/apps/039-montage.swift Removed: trunk/tests/language/working/039-montage.swift Log: Move 039-montage to apps Copied: trunk/tests/apps/039-montage.swift (from rev 3708, trunk/tests/language/working/039-montage.swift) =================================================================== --- trunk/tests/apps/039-montage.swift (rev 0) +++ trunk/tests/apps/039-montage.swift 2010-11-24 17:00:14 UTC (rev 3721) @@ -0,0 +1,186 @@ + +type Image {} +type Header {} +type Table {} +type DiffStruct { + int cntr1; + int cntr2; + Image plus; + Image minus; + Image diff; +} + +type TxtFile {} +type JPEG {}; + + +( Image projectedImage, Image projectedArea ) mProjectPP ( Image rawImage, Header template ) { + app { + mProjectPP "-X" @rawImage @projectedImage @template; + } +} + +( Image projectedImages[], Image projectedAreas[] ) mProjectPPBatch ( Image rawImages[], Header template ) { + foreach img, i in rawImages { + Image projImg; + Image areaImg; + ( projImg, areaImg ) = mProjectPP ( img, template ); + projectedImages[i] = projImg; + projectedAreas[i] = areaImg; + } +} + + +( Table diffsTbl ) mOverlaps ( Table imagesTbl ) { + app { + mOverlaps @imagesTbl @diffsTbl; + } +} + + +( Image diffImage, TxtFile statusFile ) mDiffFit ( Image projectedImage1, Image projectedArea1, Image projectedImage2, Image projectedArea2, Header template) { + app { + mDiffFit "-s" @statusFile @projectedImage1 @projectedImage2 @diffImage @template; + } +} + +( Image diffImages[], TxtFile statusFiles[] ) mDiffFitBatch ( Table diffsTbl, Header template) { + //read overlap image pairs from diffsTbl + DiffStruct diffs[]; + + foreach d, i in diffs { + Image image1 = d.plus; + Image area1; + Image image2 = d.minus; + Image area2; + Image diffImg; + TxtFile statusFile; + + ( diffImg, statusFile ) = mDiffFit ( image1, area1, image2, area2, template ); + diffImages[i] = diffImg; + statusFiles[i] = statusFile; + } +} + +( Table statusFilesTbl ) mStatTbl ( Table diffsTbl ) { + app { + mStatTbl @diffsTbl @statusFilesTbl; + } +} + +( Table fitsTbl ) mConcatFit ( Table statusFilesTbl, TxtFile statusFiles[], string statusDir ) { + app { + mConcatFit @statusFilesTbl @fitsTbl statusDir; + } +} + +( Table correctionsTbl ) mBgModel ( Table projectedImagesTbl, Table fitsTbl ) { + app { + mBgModel @projectedImagesTbl @fitsTbl @correctionsTbl; + } +} + +( Image correctedImage, Image correctedArea ) mBackground ( Image projectedImage, Image projectedArea, Table projectedImagesTbl, Table correctionsTbl ) { + app { + mBackground "-t" @projectedImage @correctedImage @projectedImagesTbl @correctionsTbl; + } +} + +( Image correctedImages[], Image correctedAreas[] ) mBackgroundBatch ( Image projectedImages[], Image projectedAreas[], Table projectedImagesTbl, Table correctionsTbl ) { + foreach projImg, i in projectedImages { + Image projArea = projectedAreas[i]; + Image corrImg; + Image corrArea; + ( corrImg, corrArea ) = mBackground ( projImg, projArea, projectedImagesTbl, correctionsTbl ); + correctedImages[i] = corrImg; + correctedAreas[i] = corrArea; + } +} + +( Table imagesTbl ) mImgtbl ( string imageDir, Image images[] ) { + app { + mImgtbl imageDir @imagesTbl; + } +} + +( Table newImagesTbl ) mImgtbl_t ( string imageDir, Image images[], Table oldImagesTbl ) { + app { + mImgtbl imageDir "-t" @oldImagesTbl @newImagesTbl; + } +} + +( Image mosaic, Image mosaicArea ) mAdd ( Table imagesTbl, Header template, Image images[], Image imageAreas[] ) { + app { + mAdd "-e" @imagesTbl @template @mosaic; + } +} + +( Image shrunkImage ) mShrink ( Image image, float factor ) { + app { + mShrink @image @shrunkImage factor; + } +} + +( JPEG jpeg ) mJPEG ( Image image ) { + app { + mJPEG "-ct" 1 + "-gray" @image + "-1.5s" "60s" "gaussian" + "-out" @jpeg; + } +} + +// get raw images +Image rawImages[]; + +// template header file +Header template<"template.hdr">; + +// fast project raw images +Image projectedImages[], projectedAreas[]; +( projectedImages, projectedAreas ) = mProjectPPBatch ( rawImages, template ); + +// table of projected images +Table projImgTbl<"projImg.tbl">; +projImgTbl = mImgtbl ( ".", projectedImages ); + +// table of overlapping images +Table diffsTbl<"diffs.tbl">; +diffsTbl = mOverlaps ( projImgTbl ); + +Image diffImgs[]; +TxtFile statusFiles[]; +( diffImgs, statusFiles ) = mDiffFitBatch ( diffsTbl, template ); + +Table statusFilesTbl<"statfile.tbl">; +statusFilesTbl = mStatTbl ( diffsTbl ); + +// fit to plane +Table fitsTbl<"fits.tbl">; +fitsTbl = mConcatFit ( statusFilesTbl, statusFiles, "." ); + +// corrections +Table correctionsTbl<"corrections.tbl">; +correctionsTbl = mBgModel ( projImgTbl, fitsTbl ); + +// background adjustment +Image correctedImages[], correctedAreas[]; +( correctedImages, correctedAreas ) = mBackgroundBatch ( projectedImages, projectedAreas, projImgTbl, correctionsTbl ); + +// table of corrected images +Table corrImgTbl<"corrImg.tbl">; +corrImgTbl = mImgtbl ( ".", correctedImages ); + +// generate mosaic +Image mosaic<"mosaic.fits">; +Image mosaicArea<"mosaic_area.fits">; +( mosaic, mosaicArea ) = mAdd ( corrImgTbl, template, correctedImages, correctedAreas ); + +// shrink the image +Image smallMosaic<"smallMosaic.fits">; +smallMosaic = mShrink ( mosaic, 3.0 ); + +// convert to jpeg +JPEG jpeg<"mosaic.jpg">; +jpeg = mJPEG( smallMosaic ); + Deleted: trunk/tests/language/working/039-montage.swift =================================================================== --- trunk/tests/language/working/039-montage.swift 2010-11-24 16:59:23 UTC (rev 3720) +++ trunk/tests/language/working/039-montage.swift 2010-11-24 17:00:14 UTC (rev 3721) @@ -1,186 +0,0 @@ - -type Image {} -type Header {} -type Table {} -type DiffStruct { - int cntr1; - int cntr2; - Image plus; - Image minus; - Image diff; -} - -type TxtFile {} -type JPEG {}; - - -( Image projectedImage, Image projectedArea ) mProjectPP ( Image rawImage, Header template ) { - app { - mProjectPP "-X" @rawImage @projectedImage @template; - } -} - -( Image projectedImages[], Image projectedAreas[] ) mProjectPPBatch ( Image rawImages[], Header template ) { - foreach img, i in rawImages { - Image projImg; - Image areaImg; - ( projImg, areaImg ) = mProjectPP ( img, template ); - projectedImages[i] = projImg; - projectedAreas[i] = areaImg; - } -} - - -( Table diffsTbl ) mOverlaps ( Table imagesTbl ) { - app { - mOverlaps @imagesTbl @diffsTbl; - } -} - - -( Image diffImage, TxtFile statusFile ) mDiffFit ( Image projectedImage1, Image projectedArea1, Image projectedImage2, Image projectedArea2, Header template) { - app { - mDiffFit "-s" @statusFile @projectedImage1 @projectedImage2 @diffImage @template; - } -} - -( Image diffImages[], TxtFile statusFiles[] ) mDiffFitBatch ( Table diffsTbl, Header template) { - //read overlap image pairs from diffsTbl - DiffStruct diffs[]; - - foreach d, i in diffs { - Image image1 = d.plus; - Image area1; - Image image2 = d.minus; - Image area2; - Image diffImg; - TxtFile statusFile; - - ( diffImg, statusFile ) = mDiffFit ( image1, area1, image2, area2, template ); - diffImages[i] = diffImg; - statusFiles[i] = statusFile; - } -} - -( Table statusFilesTbl ) mStatTbl ( Table diffsTbl ) { - app { - mStatTbl @diffsTbl @statusFilesTbl; - } -} - -( Table fitsTbl ) mConcatFit ( Table statusFilesTbl, TxtFile statusFiles[], string statusDir ) { - app { - mConcatFit @statusFilesTbl @fitsTbl statusDir; - } -} - -( Table correctionsTbl ) mBgModel ( Table projectedImagesTbl, Table fitsTbl ) { - app { - mBgModel @projectedImagesTbl @fitsTbl @correctionsTbl; - } -} - -( Image correctedImage, Image correctedArea ) mBackground ( Image projectedImage, Image projectedArea, Table projectedImagesTbl, Table correctionsTbl ) { - app { - mBackground "-t" @projectedImage @correctedImage @projectedImagesTbl @correctionsTbl; - } -} - -( Image correctedImages[], Image correctedAreas[] ) mBackgroundBatch ( Image projectedImages[], Image projectedAreas[], Table projectedImagesTbl, Table correctionsTbl ) { - foreach projImg, i in projectedImages { - Image projArea = projectedAreas[i]; - Image corrImg; - Image corrArea; - ( corrImg, corrArea ) = mBackground ( projImg, projArea, projectedImagesTbl, correctionsTbl ); - correctedImages[i] = corrImg; - correctedAreas[i] = corrArea; - } -} - -( Table imagesTbl ) mImgtbl ( string imageDir, Image images[] ) { - app { - mImgtbl imageDir @imagesTbl; - } -} - -( Table newImagesTbl ) mImgtbl_t ( string imageDir, Image images[], Table oldImagesTbl ) { - app { - mImgtbl imageDir "-t" @oldImagesTbl @newImagesTbl; - } -} - -( Image mosaic, Image mosaicArea ) mAdd ( Table imagesTbl, Header template, Image images[], Image imageAreas[] ) { - app { - mAdd "-e" @imagesTbl @template @mosaic; - } -} - -( Image shrunkImage ) mShrink ( Image image, float factor ) { - app { - mShrink @image @shrunkImage factor; - } -} - -( JPEG jpeg ) mJPEG ( Image image ) { - app { - mJPEG "-ct" 1 - "-gray" @image - "-1.5s" "60s" "gaussian" - "-out" @jpeg; - } -} - -// get raw images -Image rawImages[]; - -// template header file -Header template<"template.hdr">; - -// fast project raw images -Image projectedImages[], projectedAreas[]; -( projectedImages, projectedAreas ) = mProjectPPBatch ( rawImages, template ); - -// table of projected images -Table projImgTbl<"projImg.tbl">; -projImgTbl = mImgtbl ( ".", projectedImages ); - -// table of overlapping images -Table diffsTbl<"diffs.tbl">; -diffsTbl = mOverlaps ( projImgTbl ); - -Image diffImgs[]; -TxtFile statusFiles[]; -( diffImgs, statusFiles ) = mDiffFitBatch ( diffsTbl, template ); - -Table statusFilesTbl<"statfile.tbl">; -statusFilesTbl = mStatTbl ( diffsTbl ); - -// fit to plane -Table fitsTbl<"fits.tbl">; -fitsTbl = mConcatFit ( statusFilesTbl, statusFiles, "." ); - -// corrections -Table correctionsTbl<"corrections.tbl">; -correctionsTbl = mBgModel ( projImgTbl, fitsTbl ); - -// background adjustment -Image correctedImages[], correctedAreas[]; -( correctedImages, correctedAreas ) = mBackgroundBatch ( projectedImages, projectedAreas, projImgTbl, correctionsTbl ); - -// table of corrected images -Table corrImgTbl<"corrImg.tbl">; -corrImgTbl = mImgtbl ( ".", correctedImages ); - -// generate mosaic -Image mosaic<"mosaic.fits">; -Image mosaicArea<"mosaic_area.fits">; -( mosaic, mosaicArea ) = mAdd ( corrImgTbl, template, correctedImages, correctedAreas ); - -// shrink the image -Image smallMosaic<"smallMosaic.fits">; -smallMosaic = mShrink ( mosaic, 3.0 ); - -// convert to jpeg -JPEG jpeg<"mosaic.jpg">; -jpeg = mJPEG( smallMosaic ); - From noreply at svn.ci.uchicago.edu Wed Nov 24 11:02:18 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 24 Nov 2010 11:02:18 -0600 (CST) Subject: [Swift-commit] r3722 - in trunk/tests: apps language/working Message-ID: <20101124170218.6A04F9CC92@svn.ci.uchicago.edu> Author: wozniak Date: 2010-11-24 11:02:17 -0600 (Wed, 24 Nov 2010) New Revision: 3722 Added: trunk/tests/apps/046-tibi.swift trunk/tests/apps/047-LQCD.swift trunk/tests/apps/0471-fmri.swift Removed: trunk/tests/language/working/046-tibi.swift trunk/tests/language/working/047-LQCD.swift trunk/tests/language/working/0471-fmri.swift Log: Move app tests to apps Copied: trunk/tests/apps/046-tibi.swift (from rev 3708, trunk/tests/language/working/046-tibi.swift) =================================================================== --- trunk/tests/apps/046-tibi.swift (rev 0) +++ trunk/tests/apps/046-tibi.swift 2010-11-24 17:02:17 UTC (rev 3722) @@ -0,0 +1,39 @@ +// this is from Tibi's message +// on 8th feb 2007777777 + +type file {} + +//define the wavelet procedure +(file wavelets) waveletTransf (file waveletScript, int subjNo, +string trialType, file dataFiles) { + app { + cwtsmall @filename(waveletScript) subjNo trialType; + } +} + +(file outputs[]) batchTrials ( string trialTypes[] ){ + file waveletScript; + //file dataFiles[]; + + foreach s,i in trialTypes { + //file output; + file dataFiles; + outputs[i] = waveletTransf(waveletScript,101,s,dataFiles); + } +} + +//string trialTypes[] = ["FB", "FC", "FI", "SB", "SC", "SI" ]; +string trialTypes[] = [ "FB" ]; + +file allOutputs[]; +file namedOutputs[]; + + +//the MAIN program +//file waveletScript; + + file gauge = stageIn (template, config); + + # need config to be put into filenames too + #string fn = "m0.005 m0.007 m0.01 m0.02 m0.03"; + file stags[]; + + stags = stagSolve(gauge, mass, source); + + file stagTar; + stagTar = archiveStag(mass, stags); + + file clover0 = cloverSolve(kappaQ, cSW, gauge, source); + file q0 = cvt12x12(clover0); + file cvtArch0 = archive(q0); + + string source1 = "wavefunction,0,1S"; + file clover1 = cloverSolve(kappaQ, cSW, gauge, source1); + file q1 = cvt12x12(clover1); + file cvtArch1 = archive(q1); + + string source2 = "wavefunction,0,2S"; + file clover2 = cloverSolve(kappaQ, cSW, gauge, source2); + file q2 = cvt12x12(clover2); + file cvtArch2 = archive(q2); + + file antiQ = q0; + file pStdout = twoPtHH(gauge, antiQ, q0, q1, q2); + foreach stag in stags { + file sStdout = twoPtSH(gauge, stag, antiQ, q0, q1, q2); + } +} Copied: trunk/tests/apps/0471-fmri.swift (from rev 3708, trunk/tests/language/working/0471-fmri.swift) =================================================================== --- trunk/tests/apps/0471-fmri.swift (rev 0) +++ trunk/tests/apps/0471-fmri.swift 2010-11-24 17:02:17 UTC (rev 3722) @@ -0,0 +1,90 @@ +type voxelfile; +type headerfile; + +type pgmfile; +type imagefile; + +type warpfile; + +type volume { + voxelfile img; + headerfile hdr; +}; + +(warpfile warp) align_warp(volume reference, volume subject, string model, string quick) { + app { + align_warp @reference.img @subject.img @warp "-m " model quick; + } +} + +(volume sliced) reslice(warpfile warp, volume subject) +{ + app { + reslice @warp @sliced.img; + } +} + +(volume sliced) align_and_reslice(volume reference, volume subject, string model, string quick) { + warpfile warp; + warp = align_warp(reference, subject, model, quick); + sliced = reslice(warp, subject); +} + + +(volume atlas) softmean(volume sliced[]) +{ + app { + softmean @atlas.img "y" "null" @filenames(sliced[*].img); + } +} + + +(pgmfile outslice) slicer(volume input, string axis, string position) +{ + app { + slicer @input.img axis position @outslice; + } +} + +(imagefile outimg) convert(pgmfile inpgm) +{ + app { + convert @inpgm @outimg; + } +} + +(imagefile outimg) slice_to_jpeg(volume inp, string axis, string position) +{ + pgmfile outslice; + outslice = slicer(inp, axis, position); + outimg = convert(outslice); +} + +(volume s[]) all_align_reslices(volume reference, volume subjects[]) { + + foreach subject, i in subjects { + s[i] = align_and_reslice(reference, subjects[i], "12", "-q"); + } + +} + + +volume references[] ; +volume reference=references[0]; + +volume subjects[] ; + +volume slices[] ; +slices = all_align_reslices(reference, subjects); + +volume atlas ; +atlas = softmean(slices); + +string directions[] = [ "x", "y", "z"]; + +foreach direction in directions { + imagefile o ; + string option = @strcat("-",direction); + o = slice_to_jpeg(atlas, option, ".5"); +} + Deleted: trunk/tests/language/working/046-tibi.swift =================================================================== --- trunk/tests/language/working/046-tibi.swift 2010-11-24 17:00:14 UTC (rev 3721) +++ trunk/tests/language/working/046-tibi.swift 2010-11-24 17:02:17 UTC (rev 3722) @@ -1,39 +0,0 @@ -// this is from Tibi's message -// on 8th feb 2007777777 - -type file {} - -//define the wavelet procedure -(file wavelets) waveletTransf (file waveletScript, int subjNo, -string trialType, file dataFiles) { - app { - cwtsmall @filename(waveletScript) subjNo trialType; - } -} - -(file outputs[]) batchTrials ( string trialTypes[] ){ - file waveletScript; - //file dataFiles[]; - - foreach s,i in trialTypes { - //file output; - file dataFiles; - outputs[i] = waveletTransf(waveletScript,101,s,dataFiles); - } -} - -//string trialTypes[] = ["FB", "FC", "FI", "SB", "SC", "SI" ]; -string trialTypes[] = [ "FB" ]; - -file allOutputs[]; -file namedOutputs[]; - - -//the MAIN program -//file waveletScript; - - file gauge = stageIn (template, config); - - # need config to be put into filenames too - #string fn = "m0.005 m0.007 m0.01 m0.02 m0.03"; - file stags[]; - - stags = stagSolve(gauge, mass, source); - - file stagTar; - stagTar = archiveStag(mass, stags); - - file clover0 = cloverSolve(kappaQ, cSW, gauge, source); - file q0 = cvt12x12(clover0); - file cvtArch0 = archive(q0); - - string source1 = "wavefunction,0,1S"; - file clover1 = cloverSolve(kappaQ, cSW, gauge, source1); - file q1 = cvt12x12(clover1); - file cvtArch1 = archive(q1); - - string source2 = "wavefunction,0,2S"; - file clover2 = cloverSolve(kappaQ, cSW, gauge, source2); - file q2 = cvt12x12(clover2); - file cvtArch2 = archive(q2); - - file antiQ = q0; - file pStdout = twoPtHH(gauge, antiQ, q0, q1, q2); - foreach stag in stags { - file sStdout = twoPtSH(gauge, stag, antiQ, q0, q1, q2); - } -} Deleted: trunk/tests/language/working/0471-fmri.swift =================================================================== --- trunk/tests/language/working/0471-fmri.swift 2010-11-24 17:00:14 UTC (rev 3721) +++ trunk/tests/language/working/0471-fmri.swift 2010-11-24 17:02:17 UTC (rev 3722) @@ -1,90 +0,0 @@ -type voxelfile; -type headerfile; - -type pgmfile; -type imagefile; - -type warpfile; - -type volume { - voxelfile img; - headerfile hdr; -}; - -(warpfile warp) align_warp(volume reference, volume subject, string model, string quick) { - app { - align_warp @reference.img @subject.img @warp "-m " model quick; - } -} - -(volume sliced) reslice(warpfile warp, volume subject) -{ - app { - reslice @warp @sliced.img; - } -} - -(volume sliced) align_and_reslice(volume reference, volume subject, string model, string quick) { - warpfile warp; - warp = align_warp(reference, subject, model, quick); - sliced = reslice(warp, subject); -} - - -(volume atlas) softmean(volume sliced[]) -{ - app { - softmean @atlas.img "y" "null" @filenames(sliced[*].img); - } -} - - -(pgmfile outslice) slicer(volume input, string axis, string position) -{ - app { - slicer @input.img axis position @outslice; - } -} - -(imagefile outimg) convert(pgmfile inpgm) -{ - app { - convert @inpgm @outimg; - } -} - -(imagefile outimg) slice_to_jpeg(volume inp, string axis, string position) -{ - pgmfile outslice; - outslice = slicer(inp, axis, position); - outimg = convert(outslice); -} - -(volume s[]) all_align_reslices(volume reference, volume subjects[]) { - - foreach subject, i in subjects { - s[i] = align_and_reslice(reference, subjects[i], "12", "-q"); - } - -} - - -volume references[] ; -volume reference=references[0]; - -volume subjects[] ; - -volume slices[] ; -slices = all_align_reslices(reference, subjects); - -volume atlas ; -atlas = softmean(slices); - -string directions[] = [ "x", "y", "z"]; - -foreach direction in directions { - imagefile o ; - string option = @strcat("-",direction); - o = slice_to_jpeg(atlas, option, ".5"); -} - From noreply at svn.ci.uchicago.edu Wed Nov 24 11:03:13 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 24 Nov 2010 11:03:13 -0600 (CST) Subject: [Swift-commit] r3723 - trunk/tests/language/should-not-work Message-ID: <20101124170313.37A029CC92@svn.ci.uchicago.edu> Author: wozniak Date: 2010-11-24 11:03:13 -0600 (Wed, 24 Nov 2010) New Revision: 3723 Modified: trunk/tests/language/should-not-work/119-missing-semi.swift Log: Script should fail Modified: trunk/tests/language/should-not-work/119-missing-semi.swift =================================================================== --- trunk/tests/language/should-not-work/119-missing-semi.swift 2010-11-24 17:02:17 UTC (rev 3722) +++ trunk/tests/language/should-not-work/119-missing-semi.swift 2010-11-24 17:03:13 UTC (rev 3723) @@ -1,10 +1,13 @@ + +// THIS-SCRIPT-SHOULD-FAIL + type file {}; -type student { - file name; - file age; - file gpa; -} -app (file t) getname(string n) { +type student { + file name; + file age; + file gpa; +} +app (file t) getname(string n) { echo n stdout=@filename(t); } From noreply at svn.ci.uchicago.edu Wed Nov 24 11:18:13 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Wed, 24 Nov 2010 11:18:13 -0600 (CST) Subject: [Swift-commit] r3724 - trunk/tests/cdm Message-ID: <20101124171813.1E4729CC92@svn.ci.uchicago.edu> Author: wozniak Date: 2010-11-24 11:18:12 -0600 (Wed, 24 Nov 2010) New Revision: 3724 Added: trunk/tests/cdm/401-external.check.sh trunk/tests/cdm/401-external.clean.sh trunk/tests/cdm/401-external.setup.sh trunk/tests/cdm/401-external.swift Removed: trunk/tests/cdm/210-external.check.sh trunk/tests/cdm/210-external.clean.sh trunk/tests/cdm/210-external.setup.sh trunk/tests/cdm/210-external.swift Modified: trunk/tests/cdm/README.txt Log: Move 210-external to 401-external Deleted: trunk/tests/cdm/210-external.check.sh =================================================================== --- trunk/tests/cdm/210-external.check.sh 2010-11-24 17:03:13 UTC (rev 3723) +++ trunk/tests/cdm/210-external.check.sh 2010-11-24 17:18:12 UTC (rev 3724) @@ -1,11 +0,0 @@ -#!/bin/sh - -set -x - -cat external.out || exit 1 -rm -v external.out || exit 1 - -grep $( uname -m ) 210-input.txt || exit 1 -grep $( uname -m ) 210-output.txt || exit 1 - -exit 0 Deleted: trunk/tests/cdm/210-external.clean.sh =================================================================== --- trunk/tests/cdm/210-external.clean.sh 2010-11-24 17:03:13 UTC (rev 3723) +++ trunk/tests/cdm/210-external.clean.sh 2010-11-24 17:18:12 UTC (rev 3724) @@ -1,7 +0,0 @@ -#!/bin/sh - -set -x - -rm -v 210-input.txt 210-output.txt - -exit 0 Deleted: trunk/tests/cdm/210-external.setup.sh =================================================================== --- trunk/tests/cdm/210-external.setup.sh 2010-11-24 17:03:13 UTC (rev 3723) +++ trunk/tests/cdm/210-external.setup.sh 2010-11-24 17:18:12 UTC (rev 3724) @@ -1,12 +0,0 @@ -#!/bin/sh - -set -x - -{ - uname -a - date -} > 210-input.txt - -cp -v $GROUP/external.sh . - -exit 0 Deleted: trunk/tests/cdm/210-external.swift =================================================================== --- trunk/tests/cdm/210-external.swift 2010-11-24 17:03:13 UTC (rev 3723) +++ trunk/tests/cdm/210-external.swift 2010-11-24 17:18:12 UTC (rev 3724) @@ -1,11 +0,0 @@ - -type file; - -app (file o) copy (file i) -{ - cp @i @o; -} - -file f1<"210-input.txt">; -file f2<"210-output.txt">; -f2 = copy(f1); Copied: trunk/tests/cdm/401-external.check.sh (from rev 3708, trunk/tests/cdm/210-external.check.sh) =================================================================== --- trunk/tests/cdm/401-external.check.sh (rev 0) +++ trunk/tests/cdm/401-external.check.sh 2010-11-24 17:18:12 UTC (rev 3724) @@ -0,0 +1,11 @@ +#!/bin/sh + +set -x + +cat external.out || exit 1 +rm -v external.out || exit 1 + +grep $( uname -m ) 210-input.txt || exit 1 +grep $( uname -m ) 210-output.txt || exit 1 + +exit 0 Copied: trunk/tests/cdm/401-external.clean.sh (from rev 3708, trunk/tests/cdm/210-external.clean.sh) =================================================================== --- trunk/tests/cdm/401-external.clean.sh (rev 0) +++ trunk/tests/cdm/401-external.clean.sh 2010-11-24 17:18:12 UTC (rev 3724) @@ -0,0 +1,7 @@ +#!/bin/sh + +set -x + +rm -v 210-input.txt 210-output.txt + +exit 0 Copied: trunk/tests/cdm/401-external.setup.sh (from rev 3708, trunk/tests/cdm/210-external.setup.sh) =================================================================== --- trunk/tests/cdm/401-external.setup.sh (rev 0) +++ trunk/tests/cdm/401-external.setup.sh 2010-11-24 17:18:12 UTC (rev 3724) @@ -0,0 +1,12 @@ +#!/bin/sh + +set -x + +{ + uname -a + date +} > 210-input.txt + +cp -v $GROUP/external.sh . + +exit 0 Copied: trunk/tests/cdm/401-external.swift (from rev 3708, trunk/tests/cdm/210-external.swift) =================================================================== --- trunk/tests/cdm/401-external.swift (rev 0) +++ trunk/tests/cdm/401-external.swift 2010-11-24 17:18:12 UTC (rev 3724) @@ -0,0 +1,11 @@ + +type file; + +app (file o) copy (file i) +{ + cp @i @o; +} + +file f1<"210-input.txt">; +file f2<"210-output.txt">; +f2 = copy(f1); Modified: trunk/tests/cdm/README.txt =================================================================== --- trunk/tests/cdm/README.txt 2010-11-24 17:03:13 UTC (rev 3723) +++ trunk/tests/cdm/README.txt 2010-11-24 17:18:12 UTC (rev 3724) @@ -1,5 +1,5 @@ 000 series : noop tests 200 series : DIRECT tests -300 series : BROADCAST test - +300 series : BROADCAST tests +400 series : EXTERNAL tests From noreply at svn.ci.uchicago.edu Thu Nov 25 16:57:40 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Thu, 25 Nov 2010 16:57:40 -0600 (CST) Subject: [Swift-commit] r3725 - provenancedb Message-ID: <20101125225740.E6E3B9CCE6@svn.ci.uchicago.edu> Author: lgadelha Date: 2010-11-25 16:57:39 -0600 (Thu, 25 Nov 2010) New Revision: 3725 Added: provenancedb/pql_functions.sql Modified: provenancedb/prov-init.sql provenancedb/prov-to-sql.sh provenancedb/swift-prov-import-all-logs Log: Added: provenancedb/pql_functions.sql =================================================================== --- provenancedb/pql_functions.sql (rev 0) +++ provenancedb/pql_functions.sql 2010-11-25 22:57:39 UTC (rev 3725) @@ -0,0 +1,95 @@ +-- SQL Functions + +CREATE OR REPLACE FUNCTION list_runs() RETURNS SETOF VARCHAR AS $$ + SELECT DISTINCT(log_filename) FROM workflow; +$$ LANGUAGE SQL; + +-- lists distinct processes by name in a workflow + +CREATE OR REPLACE FUNCTION process_names(wf_id VARCHAR) RETURNS SETOF VARCHAR AS $$ + SELECT DISTINCT(process.name) + FROM process + WHERE process.workflow_id=$1; +$$ LANGUAGE SQL; + +-- lists variations in a parameter's value across workflows + +DROP TYPE param_across_wf_type CASCADE; +CREATE TYPE param_across_wf_type AS (workflow VARCHAR, parameter VARCHAR, value VARCHAR); + +CREATE OR REPLACE FUNCTION param_across_wf(param_name VARCHAR) RETURNS SETOF param_across_wf_type AS $$ + SELECT workflow.log_filename,ds_usage.param_name,variable.value + FROM variable,ds_usage,process,workflow + WHERE variable.id=ds_usage.dataset_id AND ds_usage.process_id=process.id AND + process.workflow_id=workflow.id AND ds_usage.param_name=$1 + GROUP BY workflow.log_filename,ds_usage.param_name,variable.value; +$$ LANGUAGE SQL; + +-- correlate a parameter with workflow runtime statistics +DROP TYPE correlate_param_runtime_type CASCADE; +CREATE TYPE correlate_param_runtime_type AS (workflow VARCHAR, workflow_starttime TIMESTAMP WITH TIME ZONE, workflow_duration NUMERIC, parameter VARCHAR, parameter_value VARCHAR); + +CREATE OR REPLACE FUNCTION correlate_param_runtime(param_name VARCHAR) RETURNS SETOF correlate_param_runtime_type AS $$ + SELECT A.workflow,to_timestamp(B.start_time),B.duration,A.parameter,A.value + FROM param_across_wf($1) AS A, workflow AS B + WHERE A.workflow=B.log_filename; +$$ LANGUAGE SQL; + +-- OOPS-specific functions + +CREATE OR REPLACE FUNCTION list_oops_runs() RETURNS SETOF VARCHAR AS $$ + SELECT DISTINCT(value) FROM annot_wf_txt WHERE annot_wf_txt.name=('oops_run_id'); +$$ LANGUAGE SQL; + +DROP TYPE oops_param_across_wf_type CASCADE; +CREATE TYPE oops_param_across_wf_type AS (oops_run_id VARCHAR, param_name VARCHAR, variable VARCHAR); + +CREATE OR REPLACE FUNCTION oops_param_across_wf(VARCHAR) RETURNS SETOF oops_param_across_wf_type AS $$ + SELECT annot_wf_txt.value,ds_usage.param_name,variable.value + FROM variable,ds_usage,process,annot_wf_txt + WHERE variable.id=ds_usage.dataset_id AND ds_usage.process_id=process.id AND process.workflow_id=annot_wf_txt.id AND + ds_usage.param_name=$1 AND annot_wf_txt.name='oops_run_id' + GROUP BY annot_wf_txt.name,annot_wf_txt.value,ds_usage.param_name,variable.value; +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION list_oops_runs() RETURNS SETOF VARCHAR AS $$ + SELECT DISTINCT(value) FROM annot_wf_txt WHERE annot_wf_txt.name=('oops_run_id'); +$$ LANGUAGE SQL; + +DROP TYPE oops_summary CASCADE; +CREATE TYPE oops_summary AS (oops_run_id VARCHAR, start_time TIMESTAMP WITH TIME ZONE, duration_sec NUMERIC, swift_version VARCHAR); + +CREATE OR REPLACE FUNCTION oops_run_summary(varchar) RETURNS SETOF oops_summary AS $$ + SELECT annot_wf_txt.value as oops_run_id, to_timestamp(workflow.start_time) as start_time, + workflow.duration as duration_sec,workflow.swift_version as swift_version + FROM annot_wf_txt,workflow + WHERE annot_wf_txt.id=workflow.id and annot_wf_txt.name='oops_run_id' and annot_wf_txt.value=$1; +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION oops_process_names(varchar) RETURNS SETOF varchar AS $$ + SELECT DISTINCT(process.name) + FROM process, annot_wf_txt + WHERE process.workflow_id=annot_wf_txt.id AND + annot_wf_txt.name='oops_run_id' AND annot_wf_txt.value=$1; +$$ LANGUAGE SQL; + +DROP TYPE oops_wf_param_summary CASCADE; +CREATE TYPE oops_wf_param_summary AS (oops_run_id varchar, param_name varchar, value varchar); + +CREATE OR REPLACE FUNCTION oops_variable_summary() RETURNS SETOF oops_wf_param_summary AS $$ + SELECT annot_wf_txt.value,ds_usage.param_name,variable.value + FROM variable,ds_usage,process,annot_wf_txt + WHERE variable.id=ds_usage.dataset_id and ds_usage.process_id=process.id and process.workflow_id=annot_wf_txt.id; +$$ LANGUAGE SQL; + +DROP TYPE oops_param_summary CASCADE; +CREATE TYPE oops_param_summary AS (param_name varchar, value varchar); + +CREATE OR REPLACE FUNCTION oops_science_summary(varchar) RETURNS SETOF oops_param_summary AS $$ + SELECT ds_usage.param_name,variable.value + FROM variable,ds_usage,process,annot_wf_txt + WHERE variable.id=ds_usage.dataset_id AND ds_usage.process_id=process.id AND process.workflow_id=annot_wf_txt.id AND + (ds_usage.param_name='proteinId' OR ds_usage.param_name='targetId' OR ds_usage.param_name='seqFile' OR + ds_usage.param_name='prot' OR ds_usage.param_name='prepTarFile' OR ds_usage.param_name='nSim') AND + annot_wf_txt.name='oops_run_id' AND annot_wf_txt.value=$1; +$$ LANGUAGE SQL; Modified: provenancedb/prov-init.sql =================================================================== --- provenancedb/prov-init.sql 2010-11-24 17:18:12 UTC (rev 3724) +++ provenancedb/prov-init.sql 2010-11-25 22:57:39 UTC (rev 3725) @@ -19,7 +19,7 @@ DROP TABLE annot_wf_num CASCADE; DROP TABLE annot_wf_txt CASCADE; DROP TABLE annot_wf_bool CASCADE; -DROP TABLE extrainfo CASCADE; +-- DROP TABLE extrainfo CASCADE; DROP TABLE createarray CASCADE; DROP TABLE createarray_member CASCADE; DROP TABLE array_range CASCADE; @@ -30,12 +30,12 @@ -- Might be interesting to store xml translation of the swiftscript code -- here for prospective provenance/versioning CREATE TABLE workflow - (id varchar(256) PRIMARY KEY, - log_filename varchar(2048), - swift_version varchar(16), - import_status varchar(16), - start_time numeric, - duration numeric + (id VARCHAR(256) PRIMARY KEY, + log_filename VARCHAR(2048), + swift_version VARCHAR(16), + import_status VARCHAR(16), + start_time NUMERIC, + duration NUMERIC ); -- workflow_run stores the start time and duration for each workflow @@ -48,13 +48,13 @@ -- dataset stores all dataset identifiers. CREATE TABLE dataset - (id varchar(256) PRIMARY KEY + (id VARCHAR(256) PRIMARY KEY ); -- file stores the filename mapped to each dataset. CREATE TABLE file - ( id varchar(256) PRIMARY KEY REFERENCES dataset (id) ON DELETE CASCADE, - filename varchar(2048) + ( id VARCHAR(256) PRIMARY KEY REFERENCES dataset (id) ON DELETE CASCADE, + filename VARCHAR(2048) ); -- dataset_values stores the value for each dataset which is known to have @@ -63,8 +63,8 @@ -- example) SQL numerical operations should not be expected to work, even -- though the user knows that a particular dataset stores a numeric value. CREATE TABLE variable - ( id varchar(256) PRIMARY KEY REFERENCES dataset (id) ON DELETE CASCADE, - value varchar(2048) + ( id VARCHAR(256) PRIMARY KEY REFERENCES dataset (id) ON DELETE CASCADE, + value VARCHAR(2048) ); -- dataset_containment stores the containment hierarchy between @@ -75,8 +75,8 @@ -- a containment hierarchy. The relationship (such as array index or -- structure member name) should also be stored in this table. CREATE TABLE ds_containment - ( out_id varchar(256) REFERENCES dataset (id) ON DELETE CASCADE, - in_id varchar(256) REFERENCES dataset (id) ON DELETE CASCADE, + ( out_id VARCHAR(256) REFERENCES dataset (id) ON DELETE CASCADE, + in_id VARCHAR(256) REFERENCES dataset (id) ON DELETE CASCADE, PRIMARY KEY (out_id,in_id) ); @@ -89,10 +89,10 @@ -- process types: internal, rootthread, execute, function, compound, scope, operator -- maybe create a table for each type? CREATE TABLE process - (id varchar(256) PRIMARY KEY, - type varchar(16), - name varchar(256), -- in the case of an execute this refers to the transformation name in tc.data - workflow_id varchar(256) REFERENCES workflow (id) ON DELETE CASCADE -- normalize: workflow_id of sub-procedure determined + (id VARCHAR(256) PRIMARY KEY, + type VARCHAR(16), + name VARCHAR(256), -- in the case of an execute this refers to the transformation name in tc.data + workflow_id VARCHAR(256) REFERENCES workflow (id) ON DELETE CASCADE -- normalize: workflow_id of sub-procedure determined -- by compound procedure ); @@ -100,24 +100,24 @@ -- each execute is identified by a unique URI. other information from -- swift logs is also stored here. an execute is an OPM process. CREATE TABLE execute - (id varchar(256) PRIMARY KEY REFERENCES process (id) ON DELETE CASCADE, - procedure_name varchar(256), -- name of the app procedure that invokes the transformation - start_time numeric, - duration numeric, - final_state varchar(16), - scratch varchar(2048) + (id VARCHAR(256) PRIMARY KEY REFERENCES process (id) ON DELETE CASCADE, + procedure_name VARCHAR(256), -- name of the app procedure that invokes the transformation + start_time NUMERIC, + duration NUMERIC, + final_state VARCHAR(16), + scratch VARCHAR(2048) ); -- this gives information about each execute2, which is an attempt to -- perform an execution. the execute2 id is tied to per-execution-attempt -- information such as wrapper logs CREATE TABLE execute2 - (id varchar(256) PRIMARY KEY, - execute_id varchar(256) REFERENCES execute (id) ON DELETE CASCADE, - start_time numeric, - duration numeric, - final_state varchar(16), - site varchar(256) + (id VARCHAR(256) PRIMARY KEY, + execute_id VARCHAR(256) REFERENCES execute (id) ON DELETE CASCADE, + start_time NUMERIC, + duration NUMERIC, + final_state VARCHAR(16), + site VARCHAR(256) ); -- dataset_usage records usage relationships between processes and datasets; @@ -125,10 +125,10 @@ -- application procedure invocation; in OPM terms, the artificts which are -- input to and output from each process that is a Swift execution CREATE TABLE ds_usage - (process_id varchar(256) REFERENCES process(id) ON DELETE CASCADE, - direction char(1), -- I or O for input or output - dataset_id varchar(256) REFERENCES dataset(id) ON DELETE CASCADE, - param_name varchar(256), -- the name of the parameter in this execute that + (process_id VARCHAR(256) REFERENCES process(id) ON DELETE CASCADE, + direction CHAR(1), -- I or O for input or output + dataset_id VARCHAR(256) REFERENCES dataset(id) ON DELETE CASCADE, + param_name VARCHAR(256), -- the name of the parameter in this execute that -- this dataset was bound to. sometimes this must -- be contrived (for example, in positional varargs) PRIMARY KEY (process_id,direction,dataset_id,param_name) @@ -136,65 +136,65 @@ -- annotations CREATE TABLE annot_ds_num - ( id varchar(256) REFERENCES dataset (id) ON DELETE CASCADE, - name varchar(256), - value numeric, + ( id VARCHAR(256) REFERENCES dataset (id) ON DELETE CASCADE, + name VARCHAR(256), + value NUMERIC, PRIMARY KEY (id, name) ); CREATE TABLE annot_ds_txt - ( id varchar(256) REFERENCES dataset (id) ON DELETE CASCADE, - name varchar(256), - value varchar(2048), + ( id VARCHAR(256) REFERENCES dataset (id) ON DELETE CASCADE, + name VARCHAR(256), + value VARCHAR(2048), PRIMARY KEY (id, name) ); CREATE TABLE annot_ds_bool - ( id varchar(256) REFERENCES dataset (id) ON DELETE CASCADE, - name varchar(256), - value boolean, + ( id VARCHAR(256) REFERENCES dataset (id) ON DELETE CASCADE, + name VARCHAR(256), + value BOOLEAN, PRIMARY KEY (id, name) ); CREATE TABLE annot_p_num - ( id varchar(256) REFERENCES process (id) ON DELETE CASCADE, - name varchar(256), - value numeric, + ( id VARCHAR(256) REFERENCES process (id) ON DELETE CASCADE, + name VARCHAR(256), + value NUMERIC, PRIMARY KEY (id, name) ); CREATE TABLE annot_p_txt - ( id varchar(256) REFERENCES process (id) ON DELETE CASCADE, - name varchar(256), - value varchar(2048), + ( id VARCHAR(256) REFERENCES process (id) ON DELETE CASCADE, + name VARCHAR(256), + value VARCHAR(2048), PRIMARY KEY (id, name) ); CREATE TABLE annot_p_bool - ( id varchar(256) REFERENCES process (id) ON DELETE CASCADE, - name varchar(256), - value boolean, + ( id VARCHAR(256) REFERENCES process (id) ON DELETE CASCADE, + name VARCHAR(256), + value BOOLEAN, PRIMARY KEY (id, name) ); CREATE TABLE annot_wf_num - ( id varchar(256) REFERENCES workflow (id) ON DELETE CASCADE, - name varchar(256), - value numeric, + ( id VARCHAR(256) REFERENCES workflow (id) ON DELETE CASCADE, + name VARCHAR(256), + value NUMERIC, PRIMARY KEY (id, name) ); CREATE TABLE annot_wf_txt - ( id varchar(256) REFERENCES workflow (id) ON DELETE CASCADE, - name varchar(256), - value varchar(2048), + ( id VARCHAR(256) REFERENCES workflow (id) ON DELETE CASCADE, + name VARCHAR(256), + value VARCHAR(2048), PRIMARY KEY (id, name) ); CREATE TABLE annot_wf_bool - ( id varchar(256) REFERENCES workflow (id) ON DELETE CASCADE, - name varchar(2048), - value boolean, + ( id VARCHAR(256) REFERENCES workflow (id) ON DELETE CASCADE, + name VARCHAR(2048), + value BOOLEAN, PRIMARY KEY (id, name) ); @@ -212,78 +212,30 @@ -- It is unclear which is the better representation. CREATE TABLE createarray - ( id varchar(256) PRIMARY KEY + ( id VARCHAR(256) PRIMARY KEY ); CREATE TABLE createarray_member - ( array_id varchar(256) REFERENCES createarray (id) ON DELETE CASCADE, - ix varchar(256), - member_id varchar(256), + ( array_id VARCHAR(256) REFERENCES createarray (id) ON DELETE CASCADE, + ix VARCHAR(256), + member_id VARCHAR(256), PRIMARY KEY (array_id, ix) ); -- TODO step CREATE TABLE array_range - ( array_id varchar(256) REFERENCES createarray (id) ON DELETE CASCADE, - from_id varchar(256), - to_id varchar(256), - step_id varchar(256), -- nullable, if step is unspecified + ( array_id VARCHAR(256) REFERENCES createarray (id) ON DELETE CASCADE, + from_id VARCHAR(256), + to_id VARCHAR(256), + step_id VARCHAR(256), -- nullable, if step is unspecified PRIMARY KEY (array_id,from_id,to_id,step_id) ); --- lists distinct processes by name in a workflow -CREATE OR REPLACE FUNCTION process_names(varchar) RETURNS SETOF varchar AS $$ - SELECT DISTINCT(process.name) - FROM process - WHERE process.workflow_id=$1; -$$ LANGUAGE SQL; - - --- OOPS-specific functions - -CREATE OR REPLACE FUNCTION list_oops_runs() RETURNS SETOF varchar AS $$ - SELECT DISTINCT(value) FROM annot_wf_txt WHERE annot_wf_txt.name=('oops_run_id'); -$$ LANGUAGE SQL; - -CREATE TYPE oops_summary AS (oops_run_id varchar, start_time timestamp with time zone, duration_sec numeric, swift_version varchar); - -CREATE OR REPLACE FUNCTION oops_run_summary(varchar) RETURNS SETOF oops_summary AS $$ - SELECT annot_wf_txt.value as oops_run_id, to_timestamp(workflow.start_time) as start_time, - workflow.duration as duration_sec,workflow.swift_version as swift_version - FROM annot_wf_txt,workflow - WHERE annot_wf_txt.id=workflow.id and annot_wf_txt.name='oops_run_id' and annot_wf_txt.value=$1; -$$ LANGUAGE SQL; - -CREATE OR REPLACE FUNCTION oops_process_names(varchar) RETURNS SETOF varchar AS $$ - SELECT DISTINCT(process.name) - FROM process, annot_wf_txt - WHERE process.workflow_id=annot_wf_txt.id AND - annot_wf_txt.name='oops_run_id' AND annot_wf_txt.value=$1; -$$ LANGUAGE SQL; - -CREATE TYPE oops_variable_summary AS (oops_run_id varchar, param_name varchar, value varchar); - -CREATE OR REPLACE FUNCTION oops_variable_summary(varchar) RETURNS SETOF oops_variable_summary $$ - SELECT annot_wf_txt.value,ds_usage.param_name,variable.value - FROM variable,ds_usage,process,annot_wf_txt - WHERE variable.id=ds_usage.dataset_id and ds_usage.process_id=process.id and process.workflow_id=annot_wf_txt.id; -$$ LANGUAGE SQL; - -CREATE OR REPLACE FUNCTION oops_science_summary(varchar) RETURNS SETOF oops_variable_summary AS $$ - SELECT ds_usage.param_name,variable.value - FROM variable,ds_usage,process,annot_wf_txt - WHERE variable.id=ds_usage.dataset_id AND ds_usage.process_id=process.id AND process.workflow_id=annot_wf_txt.id AND - (ds_usage.param_name='proteinId' OR ds_usage.param_name='targetId' OR ds_usage.param_name='seqFile' OR - ds_usage.param_name='prot' OR ds_usage.param_name='prepTarFile' OR ds_usage.param_name='nSim') AND - annot_wf_txt.name='oops_run_id' AND annot_wf_txt.value='h733'; -$$ LANGUAGE SQL; - - -- this GRANT does not work for sqlite; you'll get a syntax error but -- ignore it, as it is not needed in sqlite -grant all on +GRANT ALL ON dataset, file, variable, @@ -306,4 +258,4 @@ createarray, createarray_member, array_range -to public, operators; +TO public, operators; Modified: provenancedb/prov-to-sql.sh =================================================================== --- provenancedb/prov-to-sql.sh 2010-11-24 17:18:12 UTC (rev 3724) +++ provenancedb/prov-to-sql.sh 2010-11-25 22:57:39 UTC (rev 3725) @@ -27,8 +27,6 @@ echo "INSERT INTO execute2 (id, execute_id, start_time, duration, final_state, site) VALUES ('$globalid', '$inv_id', $start_time, $duration, '$endstate', '$site');" >> tmp-e2.sql done < execute2.global.event - - while read col1 col2 col3 col4 col5 threadst namest lhsst rhsst resultst; do thread=`echo $threadst | awk 'BEGIN { FS = "=" }; {print $2}'` name=`echo $namest | awk 'BEGIN { FS = "=" }; {print $2}'` Modified: provenancedb/swift-prov-import-all-logs =================================================================== --- provenancedb/swift-prov-import-all-logs 2010-11-24 17:18:12 UTC (rev 3724) +++ provenancedb/swift-prov-import-all-logs 2010-11-25 22:57:39 UTC (rev 3725) @@ -50,7 +50,6 @@ fi export RUNID=$(basename $filename .log) - # changing tag authority from e-mail benc at ci.uchicago.edu to dnsname ci.uchicago.edu export WF="tag:ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:run" echo "INSERT INTO workflow (id, log_filename, swift_version, import_status) VALUES ('$WF','$filename','$version','$wfstatus');" | $SQLCMD From noreply at svn.ci.uchicago.edu Fri Nov 26 20:23:51 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Fri, 26 Nov 2010 20:23:51 -0600 (CST) Subject: [Swift-commit] r3726 - provenancedb Message-ID: <20101127022351.B66899CC92@svn.ci.uchicago.edu> Author: lgadelha Date: 2010-11-26 20:23:51 -0600 (Fri, 26 Nov 2010) New Revision: 3726 Modified: provenancedb/pql_functions.sql provenancedb/prepare-provenance-chart provenancedb/prov-to-sql.sh provenancedb/swift-prov-import-all-logs Log: Simplification of the provenance identifiers. Modified: provenancedb/pql_functions.sql =================================================================== --- provenancedb/pql_functions.sql 2010-11-25 22:57:39 UTC (rev 3725) +++ provenancedb/pql_functions.sql 2010-11-27 02:23:51 UTC (rev 3726) @@ -27,7 +27,12 @@ -- correlate a parameter with workflow runtime statistics DROP TYPE correlate_param_runtime_type CASCADE; -CREATE TYPE correlate_param_runtime_type AS (workflow VARCHAR, workflow_starttime TIMESTAMP WITH TIME ZONE, workflow_duration NUMERIC, parameter VARCHAR, parameter_value VARCHAR); +CREATE TYPE correlate_param_runtime_type +AS (workflow VARCHAR, + workflow_starttime TIMESTAMP WITH TIME ZONE, + workflow_duration NUMERIC, + parameter VARCHAR, + parameter_value VARCHAR); CREATE OR REPLACE FUNCTION correlate_param_runtime(param_name VARCHAR) RETURNS SETOF correlate_param_runtime_type AS $$ SELECT A.workflow,to_timestamp(B.start_time),B.duration,A.parameter,A.value Modified: provenancedb/prepare-provenance-chart =================================================================== --- provenancedb/prepare-provenance-chart 2010-11-25 22:57:39 UTC (rev 3725) +++ provenancedb/prepare-provenance-chart 2010-11-27 02:23:51 UTC (rev 3726) @@ -8,8 +8,8 @@ export RUNID=$(basename $1 .log) -export WFID="tag:ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:" -export EXECUTE2PREFIX="tag:ci.uchicago.edu,2008:swiftlogs:execute2:${RUNID}:" +export WFID="execute:${RUNID}:" +export EXECUTE2PREFIX="execute2:${RUNID}:" # will output log information about datasets from a log file passed as $1 @@ -30,8 +30,8 @@ cat $1 | grep ' OPERATOR ' | sed 's/^.*thread=\([^ ]*\) operator="\([^ ]*\)" lhs=\([^ ]*\) rhs=\([^ ]*\) result=\([^ ]*\).*$/\1 \2 \3 \4 \5/' > operators.txt -# 2009-03-19 19:15:35,244+0100 INFO vdl:arguments FUNCTION id=88000-0-4-4 name="f ilename" result=tag:ci.uchicago.edu,2008:swift:dataset:20090319-1915-xj8flg 13:720000000060 -# 2009-03-19 19:15:35,246+0100 INFO vdl:arguments FUNCTIONPARAMETER id=88001-0-4- 4 input=tag:ci.uchicago.edu,2008:swift:dataset:20090319-1915-xj8flg13:72000 0000058 +# 2009-03-19 19:15:35,244+0100 INFO vdl:arguments FUNCTION id=88000-0-4-4 name="f ilename" result=dataset:20090319-1915-xj8flg 13:720000000060 +# 2009-03-19 19:15:35,246+0100 INFO vdl:arguments FUNCTIONPARAMETER id=88001-0-4- 4 input=dataset:20090319-1915-xj8flg13:72000 0000058 cat $1 | grep ' FUNCTION ' | sed "s/^.*id=\([^ ]*\) name=\([^ ]*\) result=\([^ ]*\).*\$/$WFID\1 \2 \3/" > functions.txt # the IDs in functions.txt should be unique... Modified: provenancedb/prov-to-sql.sh =================================================================== --- provenancedb/prov-to-sql.sh 2010-11-25 22:57:39 UTC (rev 3725) +++ provenancedb/prov-to-sql.sh 2010-11-27 02:23:51 UTC (rev 3726) @@ -2,10 +2,10 @@ export RUNID=$(basename $1 .log) -export WFID="tag:ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:" +export WFID="execute:${RUNID}:" # TODO is there already a URI form for identifying workflows? -export WF="tag:ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:run" +export WF="execute:${RUNID}:run" echo Generating SQL for $RUNID Modified: provenancedb/swift-prov-import-all-logs =================================================================== --- provenancedb/swift-prov-import-all-logs 2010-11-25 22:57:39 UTC (rev 3725) +++ provenancedb/swift-prov-import-all-logs 2010-11-27 02:23:51 UTC (rev 3726) @@ -50,7 +50,7 @@ fi export RUNID=$(basename $filename .log) - export WF="tag:ci.uchicago.edu,2008:swiftlogs:execute:${RUNID}:run" + export WF="execute:${RUNID}:run" echo "INSERT INTO workflow (id, log_filename, swift_version, import_status) VALUES ('$WF','$filename','$version','$wfstatus');" | $SQLCMD From noreply at svn.ci.uchicago.edu Sat Nov 27 07:04:30 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sat, 27 Nov 2010 07:04:30 -0600 (CST) Subject: [Swift-commit] r3727 - in trunk: libexec libexec/log-processing src/org/griphyn/vdl/mapping tests/log-processing/make-targets Message-ID: <20101127130430.6A4D29CCE6@svn.ci.uchicago.edu> Author: lgadelha Date: 2010-11-27 07:04:29 -0600 (Sat, 27 Nov 2010) New Revision: 3727 Modified: trunk/libexec/log-processing/add-runid-as-prefix trunk/libexec/log-processing/whole-workflow-event trunk/libexec/vdl.k trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java trunk/src/org/griphyn/vdl/mapping/ExternalDataNode.java trunk/tests/log-processing/make-targets/execute.global.event.first-r2522.expected Log: - Re-enable the generation of provenance information in the log files. - Simplification of provenance identifiers, this makes the output of provenance queries cleaner. tag URIs can be used when exporting provenance information, to OPM for instance. Modified: trunk/libexec/log-processing/add-runid-as-prefix =================================================================== --- trunk/libexec/log-processing/add-runid-as-prefix 2010-11-27 02:23:51 UTC (rev 3726) +++ trunk/libexec/log-processing/add-runid-as-prefix 2010-11-27 13:04:29 UTC (rev 3727) @@ -4,7 +4,7 @@ export EVENTSTREAM=$(basename $1 .event) -export WFID="tag:benc at ci.uchicago.edu,2008:swiftlogs:${EVENTSTREAM}:${RUNID}:" +export WFID="${EVENTSTREAM}:${RUNID}:" while read one two thread rest; do echo $one $two ${WFID}$thread $thread $rest Modified: trunk/libexec/log-processing/whole-workflow-event =================================================================== --- trunk/libexec/log-processing/whole-workflow-event 2010-11-27 02:23:51 UTC (rev 3726) +++ trunk/libexec/log-processing/whole-workflow-event 2010-11-27 13:04:29 UTC (rev 3727) @@ -3,7 +3,7 @@ export st=$(cat start-time.tmp) export et=$(cat end-time.tmp) -#2008-08-30 08:59:26,994-0500 INFO unknown RUNID id=tag:benc at ci.uchicago.edu,2007:swift:run:20080830-0859-q9bqd8r4 +#2008-08-30 08:59:26,994-0500 INFO unknown RUNID id=run:20080830-0859-q9bqd8r4 WFID=$(grep -E '^[^ ]* [^ ]* INFO unknown RUNID id=' $1 | sed 's/^.*INFO unknown RUNID id=\(.*\)$/\1/') Modified: trunk/libexec/vdl.k =================================================================== --- trunk/libexec/vdl.k 2010-11-27 02:23:51 UTC (rev 3726) +++ trunk/libexec/vdl.k 2010-11-27 13:04:29 UTC (rev 3727) @@ -20,11 +20,18 @@ log("info",sys:file:read("{swift.home}/libexec/version.txt")) echo(sys:file:read("{swift.home}/libexec/version.txt")) - log("info","RUNID id=tag:benc at ci.uchicago.edu,2007:swift:run:{VDL:RUNID}") + log("info","RUNID id=run:{VDL:RUNID}") echo("RunID: {VDL:RUNID}") ) export( + + element(parameterlog, [direction, variable, id, thread], + if( + vdl:configProperty("provenance.log") == "true" + log("info","PARAM thread={thread} direction={direction} variable={variable} provenanceid={id}") + ) + ) element(split, [var], each(str:split(vdl:getFieldValue(var), " "))) element(quote, [var, optional(path)], Modified: trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java =================================================================== --- trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java 2010-11-27 02:23:51 UTC (rev 3726) +++ trunk/src/org/griphyn/vdl/mapping/AbstractDataNode.java 2010-11-27 13:04:29 UTC (rev 3727) @@ -25,7 +25,7 @@ public abstract class AbstractDataNode implements DSHandle { - static final String DATASET_URI_PREFIX = "tag:benc at ci.uchicago.edu,2008:swift:dataset:"; + static final String DATASET_URI_PREFIX = "dataset:"; public static final Logger logger = Logger .getLogger(AbstractDataNode.class); Modified: trunk/src/org/griphyn/vdl/mapping/ExternalDataNode.java =================================================================== --- trunk/src/org/griphyn/vdl/mapping/ExternalDataNode.java 2010-11-27 02:23:51 UTC (rev 3726) +++ trunk/src/org/griphyn/vdl/mapping/ExternalDataNode.java 2010-11-27 13:04:29 UTC (rev 3727) @@ -22,7 +22,7 @@ this.params = params; } - static final String DATASET_URI_PREFIX = "tag:benc at ci.uchicago.edu,2008:swift:dataset:external:"; + static final String DATASET_URI_PREFIX = "dataset:external:"; public static final Logger logger = Logger.getLogger(ExternalDataNode.class); Modified: trunk/tests/log-processing/make-targets/execute.global.event.first-r2522.expected =================================================================== --- trunk/tests/log-processing/make-targets/execute.global.event.first-r2522.expected 2010-11-27 02:23:51 UTC (rev 3726) +++ trunk/tests/log-processing/make-targets/execute.global.event.first-r2522.expected 2010-11-27 13:04:29 UTC (rev 3727) @@ -1 +1 @@ -1236540520.203 0.352999925613403 tag:benc at ci.uchicago.edu,2008:swiftlogs:execute:first-r2522:0 0 END_SUCCESS echo +1236540520.203 0.352999925613403 execute:first-r2522:0 0 END_SUCCESS echo From noreply at svn.ci.uchicago.edu Sun Nov 28 15:55:44 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Sun, 28 Nov 2010 15:55:44 -0600 (CST) Subject: [Swift-commit] r3728 - trunk/docs Message-ID: <20101128215544.337A09CC95@svn.ci.uchicago.edu> Author: skenny Date: 2010-11-28 15:55:43 -0600 (Sun, 28 Nov 2010) New Revision: 3728 Removed: trunk/docs/guides/ Log: need to remove docbook-generated docs From noreply at svn.ci.uchicago.edu Mon Nov 29 17:25:14 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 29 Nov 2010 17:25:14 -0600 (CST) Subject: [Swift-commit] r3729 - provenancedb Message-ID: <20101129232514.BCF75FC41@svn.ci.uchicago.edu> Author: lgadelha Date: 2010-11-29 17:25:14 -0600 (Mon, 29 Nov 2010) New Revision: 3729 Modified: provenancedb/import-run-to-sql provenancedb/prepare-provenance-chart provenancedb/prov-to-sql.sh provenancedb/swift-prov-import-all-logs Log: Make imports use both the new and the old provenance identifiers. Modified: provenancedb/import-run-to-sql =================================================================== --- provenancedb/import-run-to-sql 2010-11-28 21:55:43 UTC (rev 3728) +++ provenancedb/import-run-to-sql 2010-11-29 23:25:14 UTC (rev 3729) @@ -9,5 +9,5 @@ # with kickstart records expected to be in the same directory as the # log file. -prov-to-sql.sh $1 +PROVIDPREFIX=$PROVIDPREFIX prov-to-sql.sh $1 Modified: provenancedb/prepare-provenance-chart =================================================================== --- provenancedb/prepare-provenance-chart 2010-11-28 21:55:43 UTC (rev 3728) +++ provenancedb/prepare-provenance-chart 2010-11-29 23:25:14 UTC (rev 3729) @@ -8,8 +8,8 @@ export RUNID=$(basename $1 .log) -export WFID="execute:${RUNID}:" -export EXECUTE2PREFIX="execute2:${RUNID}:" +export WFID=$PROVIDPREFIX"execute:${RUNID}:" +export EXECUTE2PREFIX=$PROVIDPREFIX"execute2:${RUNID}:" # will output log information about datasets from a log file passed as $1 Modified: provenancedb/prov-to-sql.sh =================================================================== --- provenancedb/prov-to-sql.sh 2010-11-28 21:55:43 UTC (rev 3728) +++ provenancedb/prov-to-sql.sh 2010-11-29 23:25:14 UTC (rev 3729) @@ -2,10 +2,10 @@ export RUNID=$(basename $1 .log) -export WFID="execute:${RUNID}:" +export WFID=$PROVIDPREFIX"execute:${RUNID}:" # TODO is there already a URI form for identifying workflows? -export WF="execute:${RUNID}:run" +export WF=$PROVIDPREFIX"execute:${RUNID}:run" echo Generating SQL for $RUNID Modified: provenancedb/swift-prov-import-all-logs =================================================================== --- provenancedb/swift-prov-import-all-logs 2010-11-28 21:55:43 UTC (rev 3728) +++ provenancedb/swift-prov-import-all-logs 2010-11-29 23:25:14 UTC (rev 3729) @@ -48,21 +48,24 @@ else wfstatus="FAIL" fi + + export RUNID=$(basename $filename .log) + if [ $version -le 3726 ]; then + PROVIDPREFIX="tag:ci.uchicago.edu,2008:swiftlogs:" + fi - export RUNID=$(basename $filename .log) - export WF="execute:${RUNID}:run" + export WF=$PROVIDPREFIX"execute:${RUNID}:run" echo "INSERT INTO workflow (id, log_filename, swift_version, import_status) VALUES ('$WF','$filename','$version','$wfstatus');" | $SQLCMD - echo version $version in log file $filename echo ============= will import ============= - prepare-for-import $filename + PROVIDPREFIX=$PROVIDPREFIX prepare-for-import $filename if [ "$?" != "0" ]; then echo prepare-for-import failed exit 2 fi - import-run-to-sql $filename + PROVIDPREFIX=$PROVIDPREFIX import-run-to-sql $filename if [ "$?" != "0" ]; then echo import-run-to-sql failed exit 3 From noreply at svn.ci.uchicago.edu Mon Nov 29 17:59:05 2010 From: noreply at svn.ci.uchicago.edu (noreply at svn.ci.uchicago.edu) Date: Mon, 29 Nov 2010 17:59:05 -0600 (CST) Subject: [Swift-commit] r3730 - provenancedb Message-ID: <20101129235905.3A3639CC7F@svn.ci.uchicago.edu> Author: lgadelha Date: 2010-11-29 17:59:05 -0600 (Mon, 29 Nov 2010) New Revision: 3730 Modified: provenancedb/prov-to-sql.sh Log: Make import scripts work with old and new dataset identifiers Modified: provenancedb/prov-to-sql.sh =================================================================== --- provenancedb/prov-to-sql.sh 2010-11-29 23:25:14 UTC (rev 3729) +++ provenancedb/prov-to-sql.sh 2010-11-29 23:59:05 UTC (rev 3730) @@ -36,25 +36,25 @@ operatorid="${WFID}operator:$thread" - echo "INSERT INTO dataset (id) VALUES ('$lhs');" >> tmp-ds.sql - echo "INSERT INTO dataset (id) VALUES ('$rhs');" >> tmp-ds.sql - echo "INSERT INTO dataset (id) VALUES ('$result');" >> tmp-ds.sql + echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$lhs');" >> tmp-ds.sql + echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$rhs');" >> tmp-ds.sql + echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$result');" >> tmp-ds.sql echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$operatorid', 'operator', '$name', '$WF');" >> tmp-p.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$lhs', 'lhs');" >> tmp-dsu.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$rhs', 'rhs');" >> tmp-dsu.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$result', 'result');" >> tmp-dsu.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$PROVIDPREFIX$lhs', 'lhs');" >> tmp-dsu.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'I', '$PROVIDPREFIX$rhs', 'rhs');" >> tmp-dsu.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$operatorid', 'O', '$PROVIDPREFIX$result', 'result');" >> tmp-dsu.sql done < operators.txt while read id name output; do - echo "INSERT INTO dataset (id) VALUES ('$output');" >> tmp-ds.sql + echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$output');" >> tmp-ds.sql echo "INSERT INTO process (id, type, name, workflow_id) VALUES ('$id', 'function', '$name', '$WF');" >> tmp-p.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$output', 'result');" >> tmp-dsu.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'O', '$PROVIDPREFIX$output', 'result');" >> tmp-dsu.sql done < functions.txt while read id value; do # TODO need ordering/naming - echo "INSERT INTO dataset (id) VALUES ('$value');" >> tmp-ds.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$value', 'undefined');" >> tmp-dsu.sql + echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$value');" >> tmp-ds.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$id', 'I', '$PROVIDPREFIX$value', 'undefined');" >> tmp-dsu.sql done < function-inputs.txt @@ -63,19 +63,19 @@ done < invocation-procedure-names.txt while read outer inner; do - echo "INSERT INTO dataset (id) VALUES ('$outer');" >> tmp-ds.sql - echo "INSERT INTO dataset (id) VALUES ('$inner');" >> tmp-ds.sql - echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$outer', '$inner');" >> tmp-dsc.sql + echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$outer');" >> tmp-ds.sql + echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$inner');" >> tmp-ds.sql + echo "INSERT INTO ds_containment (out_id, in_id) VALUES ('$PROVIDPREFIX$outer', '$PROVIDPREFIX$inner');" >> tmp-dsc.sql done < tie-containers.txt while read dataset filename; do - echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql - echo "INSERT INTO file (id, filename) VALUES ('$dataset', '$filename');" >> tmp-f.sql + echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$dataset');" >> tmp-ds.sql + echo "INSERT INTO file (id, filename) VALUES ('$PROVIDPREFIX$dataset', '$filename');" >> tmp-f.sql done < dataset-filenames.txt while read dataset value; do - echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql - echo "INSERT INTO variable (id, value) VALUES ('$dataset', '$value');" >> tmp-v.sql + echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$dataset');" >> tmp-ds.sql + echo "INSERT INTO variable (id, value) VALUES ('$PROVIDPREFIX$dataset', '$value');" >> tmp-v.sql done < dataset-values.txt while read start duration wfid rest; do @@ -110,8 +110,8 @@ else dir=O fi - echo "INSERT INTO dataset (id) VALUES ('$dataset');" >> tmp-ds.sql - echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$dataset', '$variable');" >> tmp-dsu.sql + echo "INSERT INTO dataset (id) VALUES ('$PROVIDPREFIX$dataset');" >> tmp-ds.sql + echo "INSERT INTO ds_usage (process_id, direction, dataset_id, param_name) VALUES ('$thread', '$dir', '$PROVIDPREFIX$dataset', '$variable');" >> tmp-dsu.sql done < tie-data-invocs.txt while read id ; do