[Swift-commit] r3707 - in provenancedb: . apps/oops

noreply at svn.ci.uchicago.edu noreply at svn.ci.uchicago.edu
Fri Nov 12 17:27:31 CST 2010


Author: lgadelha
Date: 2010-11-12 17:27:30 -0600 (Fri, 12 Nov 2010)
New Revision: 3707

Modified:
   provenancedb/apps/oops/oops_extractor.sh
   provenancedb/prov-to-sql.sh
Log:
Updated OOPS annotation extraction scripts


Modified: provenancedb/apps/oops/oops_extractor.sh
===================================================================
--- provenancedb/apps/oops/oops_extractor.sh	2010-11-10 18:18:39 UTC (rev 3706)
+++ provenancedb/apps/oops/oops_extractor.sh	2010-11-12 23:27:30 UTC (rev 3707)
@@ -6,7 +6,7 @@
 # OOPS' Swift logs. 
 
 PROVDB_HOME=~/provenancedb
-PROTESTS_HOME=/home/aashish/CASP
+PROTESTS_HOME=//CASP
 IMPORT_HOME=~/protests
 
 source $PROVDB_HOME/etc/provenance.config
@@ -41,12 +41,14 @@
 		OOPS_RUN_ID=`echo $i | awk -F . '{print $3}'`
 		cd $PROTESTS_HOME/$k/$i
 		LOG_FILENAME=`ls | grep psim.loops- | grep "\."log$`
-		WORKFLOW_ID=`echo "select workflow_id from known_workflows where workflow_log_filename like '%$LOG_FILENAME%'" | $SQLCMD -t | awk '{print $1}'`
+		WORKFLOW_ID=`echo "select id from workflow where log_filename like '%$LOG_FILENAME%'" | $SQLCMD -t | awk '{print $1}'`
 		cd $IMPORT_HOME/swift-logs
-		echo "insert into workflow_annotations_varchar values ('$WORKFLOW_ID','oops_run_id','$OOPS_RUN_ID');" | $SQLCMD
+		echo "insert into annot_wf_txt (id, name, value) values ('$WORKFLOW_ID','oops_run_id','$OOPS_RUN_ID');" | $SQLCMD
 		
-	        # using this as a workaround for the problem above, it will return nSim identical tuples
-		echo "select dataset_filenames.dataset_id,dataset_filenames.filename from dataset_usage,invocation_procedure_names,dataset_containment,dataset_filenames,processes_in_workflows where dataset_usage.process_id=invocation_procedure_names.execute_id and dataset_containment.inner_dataset_id=dataset_filenames.dataset_id and procedure_name='loopModel' and param_name='d' and dataset_containment.outer_dataset_id=dataset_usage.dataset_id and dataset_filenames.filename like '%.params%' and processes_in_workflows.process_id=dataset_usage.process_id and processes_in_workflows.workflow_id='$WORKFLOW_ID';" > query.sql
+		echo "select id,filename from file where filename like '%params%' and id in (select in_id from ds_containment where out_id in (select ds_usage.dataset_id from ds_usage,process,execute where ds_usage.process_id=process.id and process.id=execute.id and execute.procedure_name='loopPrepare' and ds_usage.direction='I' and process.workflow_id like '%$WORKFLOW_ID%'));" > query.sql
+
+		#query for the previous database schema
+		#echo "select dataset_id,filename from dataset_filenames where filename like '%params%' and dataset_id in (select inner_dataset_id from dataset_containment where outer_dataset_id in (select dataset_usage.dataset_id from invocation_procedure_names,dataset_usage,processes_in_workflows where invocation_procedure_names.execute_id=dataset_usage.process_id and dataset_usage.process_id=processes_in_workflows.process_id and invocation_procedure_names.procedure_name='loopPrepare' and dataset_usage.direction='I' and processes_in_workflows.workflow_id like '%$WORKFLOW_ID%'));" > query.sql
 		
 		$SQLCMD -t -A -F " " -f query.sql -o result.txt
 		
@@ -63,18 +65,18 @@
 		    if [ "$NAME" = "SAMPLE RANGE" ]; then
 			VALUE1=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}' | awk 'BEGIN { FS = "-" }; {print $1}'`
 			VALUE2=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}' | awk 'BEGIN { FS = "-" }; {print $2}'`
-			echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME BEGIN', $VALUE1);" | $SQLCMD
-			echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME END', $VALUE2);" | $SQLCMD
+			echo "insert into annot_ds_num values ('$DATASET_ID', '$NAME BEGIN', $VALUE1);" | $SQLCMD
+			echo "insert into annot_ds_num values ('$DATASET_ID', '$NAME END', $VALUE2);" | $SQLCMD
 		    fi 
 		    if [ "$NAME" = "RESTRAIN DISTANCE" ]; then
 			VALUE1=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}' | awk 'BEGIN { FS = "," }; {print $1}'`
 			VALUE2=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}' | awk 'BEGIN { FS = "," }; {print $2}'`
-			echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME 1', $VALUE1);" | $SQLCMD
-			echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME 2', $VALUE2);" | $SQLCMD
+			echo "insert into annot_ds_num values ('$DATASET_ID', '$NAME 1', $VALUE1);" | $SQLCMD
+			echo "insert into annot_ds_num values ('$DATASET_ID', '$NAME 2', $VALUE2);" | $SQLCMD
 		    fi 
 		    if [ "$NAME" = "MAXIMUM NUMBER OF STEPS" ]; then
 			VALUE=`echo $line | awk 'BEGIN { FS = "=" }; {print $2}'`
-			echo "insert into dataset_annotations_numeric values ('$DATASET_ID', '$NAME', $VALUE);" | $SQLCMD
+			echo "insert into annot_ds_num values ('$DATASET_ID', '$NAME', $VALUE);" | $SQLCMD
 		    fi 
 		done < $FILENAME
 	    fi

Modified: provenancedb/prov-to-sql.sh
===================================================================
--- provenancedb/prov-to-sql.sh	2010-11-10 18:18:39 UTC (rev 3706)
+++ provenancedb/prov-to-sql.sh	2010-11-12 23:27:30 UTC (rev 3707)
@@ -125,10 +125,10 @@
     echo "UPDATE workflow SET duration=$duration WHERE id='$WF';" >> tmp-import.sql
 done < workflow.event
 
-while read id extrainfo ; do
-    # TODO this will not like quotes and things like that in extrainfo
-    echo "INSERT INTO extrainfo (id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql
-done < extrainfo.txt
+#while read id extrainfo ; do
+# TODO this will not like quotes and things like that in extrainfo
+#    echo "INSERT INTO extrainfo (id, extrainfo) VALUES ('$id', '$extrainfo');" >> tmp-import.sql
+#done < extrainfo.txt
 
 # TODO this could merge with other naming tables
 while read start duration thread final_state procname ; do




More information about the Swift-commit mailing list