[Swift-commit] r3339 - provenancedb/apps/oops

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Tue Jun 1 17:03:57 CDT 2010


Author: lgadelha
Date: 2010-06-01 17:03:56 -0500 (Tue, 01 Jun 2010)
New Revision: 3339

Modified:
   provenancedb/apps/oops/oops_extractor.sh
Log:


Modified: provenancedb/apps/oops/oops_extractor.sh
===================================================================
--- provenancedb/apps/oops/oops_extractor.sh	2010-05-31 10:57:08 UTC (rev 3338)
+++ provenancedb/apps/oops/oops_extractor.sh	2010-06-01 22:03:56 UTC (rev 3339)
@@ -1,19 +1,15 @@
 #!/bin/bash
 
 # Annotation extractor for the OOPS application
-# Author: Luiz Gadelha
-# Date: 2010-05-25
 # 
 # The directory $PROTESTS/swift-logs contains symbolic links to 
 # OOPS' Swift logs. 
 
 PROVDB_HOME=~/provenancedb
 PROTESTS_HOME=~/protests
-workflow_id=$1
 
 source $PROVDB_HOME/etc/provenance.config
 
-
 # provdb_imported records runs already imported to the provenance database
 cd $PROTESTS_HOME
 if [ ! -a provdb_imported ]; then
@@ -23,6 +19,7 @@
 
 for i in `ls | grep run.loops`;
 do 
+    cd $PROTESTS_HOME
     if ! grep $i provdb_imported; then
 	if grep "Swift finished with no errors" $i/psim.loops-*.log; then
 	    cd swift-logs
@@ -37,21 +34,35 @@
 	    echo $i >> provdb_imported
 
 	    # annotate workflows with their oops runid
-	    oops_run_id=`echo $i | awk -F . '{print $3}'`
-	    log_filename=`ls $i | grep psim.loops- | grep "\."log$`
-	    workflow_id=`echo "select workflow_id from known_workflows where workflow_log_filename like '%$log_filename%'" | $SQLCMD -t | awk '{print $1}'`
-	    echo "insert into annotations values ('$workflow_id','oops_run_id','$oops_run_id');" | $SQLCMD
+	    OOPS_RUN_ID=`echo $i | awk -F . '{print $3}'`
+	    LOG_FILENAME=`ls $i | grep psim.loops- | grep "\."log$`
+	    WORKFLOW_ID=`echo "select workflow_id from known_workflows where workflow_log_filename like '%$LOG_FILENAME%'" | $SQLCMD -t | awk '{print $1}'`
+	    echo "insert into annotations values ('$WORKFLOW_ID','oops_run_id','$OOPS_RUN_ID');" | $SQLCMD
 
 	    # annotate dataset with scientific parameters passed to doLoopRound
-	    #echo "\pset border 0;" > query.sql
-            #echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='doLoopRound' and param_name='modelData' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%';" >> query.sql
-            #$SQLCMD -t -A -F " " -f query.sql -o result.txt
-            #dataset_id=`awk '{print $1}' result.txt`
-            #filename=`awk '{print $2}' result.txt | sed 's/file:\/\/localhost\///g'`
-            #TODO extract name-value pairs
+
+	    # TODO: check why it is not recording doLoopRound in processes_in_workflows
+	    #echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames,processes_in_workflows where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='loopModel' and param_name='d' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%' and processes_in_workflows.process_id=dataset_usage.process_id and processes_in_workflows.workflow_id='$WORKFLOW_ID';" > query.sql
+
+	    # using this as a workaround for the problem above, it will return nSim identical tuples
+            echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames,processes_in_workflows where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='loopModel' and param_name='d' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%' and processes_in_workflows.process_id=dataset_usage.process_id and processes_in_workflows.workflow_id='$WORKFLOW_ID';" > query.sql
+
+	    $SQLCMD -t -A -F " " -f query.sql -o result.txt
+
+            #DATASET_ID=`awk '{print $1}' result.txt`
+            DATASET_ID=`awk '{if (NR==1) print $1}' result.txt`
+
+            #FILENAME=`awk '{print $2}' result.txt | sed 's/file:\/\/localhost\///g'`
+            FILENAME=`awk '{if (NR==1) print $2}' result.txt | sed 's/file:\/\/localhost\///g'`
+
+	    cd $PROTESTS_HOME/run.loops.$OOPS_RUN_ID
+	    
+	    while read line
+	    do
+		NAME=`echo $line | awk 'BEGIN { FS = "=" }; {print $1}'`
+		VALUE=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}'`
+		echo "insert into annotations values ('$DATASET_ID', '$NAME', '$VALUE');" | $SQLCMD
+	    done < $FILENAME
 	fi 
     fi 
 done
-
-
-




More information about the Swift-commit mailing list