[Swift-commit] r3339 - provenancedb/apps/oops
noreply at svn.ci.uchicago.edu
noreply at svn.ci.uchicago.edu
Tue Jun 1 17:03:57 CDT 2010
Author: lgadelha
Date: 2010-06-01 17:03:56 -0500 (Tue, 01 Jun 2010)
New Revision: 3339
Modified:
provenancedb/apps/oops/oops_extractor.sh
Log:
Modified: provenancedb/apps/oops/oops_extractor.sh
===================================================================
--- provenancedb/apps/oops/oops_extractor.sh 2010-05-31 10:57:08 UTC (rev 3338)
+++ provenancedb/apps/oops/oops_extractor.sh 2010-06-01 22:03:56 UTC (rev 3339)
@@ -1,19 +1,15 @@
#!/bin/bash
# Annotation extractor for the OOPS application
-# Author: Luiz Gadelha
-# Date: 2010-05-25
#
# The directory $PROTESTS/swift-logs contains symbolic links to
# OOPS' Swift logs.
PROVDB_HOME=~/provenancedb
PROTESTS_HOME=~/protests
-workflow_id=$1
source $PROVDB_HOME/etc/provenance.config
-
# provdb_imported records runs already imported to the provenance database
cd $PROTESTS_HOME
if [ ! -a provdb_imported ]; then
@@ -23,6 +19,7 @@
for i in `ls | grep run.loops`;
do
+ cd $PROTESTS_HOME
if ! grep $i provdb_imported; then
if grep "Swift finished with no errors" $i/psim.loops-*.log; then
cd swift-logs
@@ -37,21 +34,35 @@
echo $i >> provdb_imported
# annotate workflows with their oops runid
- oops_run_id=`echo $i | awk -F . '{print $3}'`
- log_filename=`ls $i | grep psim.loops- | grep "\."log$`
- workflow_id=`echo "select workflow_id from known_workflows where workflow_log_filename like '%$log_filename%'" | $SQLCMD -t | awk '{print $1}'`
- echo "insert into annotations values ('$workflow_id','oops_run_id','$oops_run_id');" | $SQLCMD
+ OOPS_RUN_ID=`echo $i | awk -F . '{print $3}'`
+ LOG_FILENAME=`ls $i | grep psim.loops- | grep "\."log$`
+ WORKFLOW_ID=`echo "select workflow_id from known_workflows where workflow_log_filename like '%$LOG_FILENAME%'" | $SQLCMD -t | awk '{print $1}'`
+ echo "insert into annotations values ('$WORKFLOW_ID','oops_run_id','$OOPS_RUN_ID');" | $SQLCMD
# annotate dataset with scientific parameters passed to doLoopRound
- #echo "\pset border 0;" > query.sql
- #echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='doLoopRound' and param_name='modelData' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%';" >> query.sql
- #$SQLCMD -t -A -F " " -f query.sql -o result.txt
- #dataset_id=`awk '{print $1}' result.txt`
- #filename=`awk '{print $2}' result.txt | sed 's/file:\/\/localhost\///g'`
- #TODO extract name-value pairs
+
+ # TODO: check why it is not recording doLoopRound in processes_in_workflows
+ #echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames,processes_in_workflows where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='loopModel' and param_name='d' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%' and processes_in_workflows.process_id=dataset_usage.process_id and processes_in_workflows.workflow_id='$WORKFLOW_ID';" > query.sql
+
+ # using this as a workaround for the problem above, it will return nSim identical tuples
+ echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames,processes_in_workflows where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='loopModel' and param_name='d' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%' and processes_in_workflows.process_id=dataset_usage.process_id and processes_in_workflows.workflow_id='$WORKFLOW_ID';" > query.sql
+
+ $SQLCMD -t -A -F " " -f query.sql -o result.txt
+
+ #DATASET_ID=`awk '{print $1}' result.txt`
+ DATASET_ID=`awk '{if (NR==1) print $1}' result.txt`
+
+ #FILENAME=`awk '{print $2}' result.txt | sed 's/file:\/\/localhost\///g'`
+ FILENAME=`awk '{if (NR==1) print $2}' result.txt | sed 's/file:\/\/localhost\///g'`
+
+ cd $PROTESTS_HOME/run.loops.$OOPS_RUN_ID
+
+ while read line
+ do
+ NAME=`echo $line | awk 'BEGIN { FS = "=" }; {print $1}'`
+ VALUE=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}'`
+ echo "insert into annotations values ('$DATASET_ID', '$NAME', '$VALUE');" | $SQLCMD
+ done < $FILENAME
fi
fi
done
-
-
-
More information about the Swift-commit
mailing list