[Swift-commit] r5522 - SwiftApps/SciColSim

Sat Jan 28 15:11:00 CST 2012

Author: wilde
Date: 2012-01-28 15:10:59 -0600 (Sat, 28 Jan 2012)
New Revision: 5522

Modified:
   SwiftApps/SciColSim/Makefile
   SwiftApps/SciColSim/TODO
   SwiftApps/SciColSim/annealing.swift
   SwiftApps/SciColSim/evolve.sh
   SwiftApps/SciColSim/optimizer.cpp
   SwiftApps/SciColSim/tc
   SwiftApps/SciColSim/testopt1.py
Log:
Getting closer to C++ output for Andrey's graph optimizer.

Modified: SwiftApps/SciColSim/Makefile
===================================================================

--- SwiftApps/SciColSim/Makefile	2012-01-25 17:29:58 UTC (rev 5521)
+++ SwiftApps/SciColSim/Makefile	2012-01-28 21:10:59 UTC (rev 5522)
@@ -2,19 +2,14 @@
 
 ifeq ($(UNAME), Linux)
 
-all:	toptimizer
+all:	openmp-optimizer
 
-protoall:	toptimizer optimizer Optimizer
+openmp-optimizer: optimizer.cpp
+	g++ -DP_OPENMP -fopenmp -I boost_1_47_0 -o openmp-optimizer optimizer.cpp
 
-optimizer:	optimizer.snap01.cpp
-	g++ -static -fopenmp -I boost_1_47_0 -o optimizer optimizer.snap01.cpp
+clean:
+	rm -rf mac-openmp-optimizer mac-dispatch-optimizer mac-orig-optimizer
 
-Optimizer:	optimizer.snap01.cpp
-	g++ -static -O3 -fopenmp -I boost_1_47_0 -o Optimizer optimizer.snap01.cpp
-
-toptimizer:	optimizer.cpp
-	g++ -static -fopenmp -I boost_1_47_0 -o toptimizer optimizer.cpp
-
 endif
 
 ifeq ($(UNAME), Darwin)

Modified: SwiftApps/SciColSim/TODO
===================================================================
--- SwiftApps/SciColSim/TODO	2012-01-25 17:29:58 UTC (rev 5521)
+++ SwiftApps/SciColSim/TODO	2012-01-28 21:10:59 UTC (rev 5522)
@@ -1,12 +1,37 @@
-[x] Determine if one evolve() run in the optimizer.cpp code is sufficient. (yes -it does 1 woth reruns = 1
+[x] Determine if one evolve() run in the optimizer.cpp code is
+sufficient. (yes -it does 1 woth reruns = 1
 
-Determine if and how to let the optimizer.cpp code do N reruns and how to set NWorkers accordingly (must evenly divide in current code).
+[ ] Determine correct test parameter sets - and if we are passing these
+through correctly.
 
-Determine annealing.swift output files needed.
+[ ] Check correctnes of rejection tracking logic and j of k logic for the 5 params
 
-Align Swift and .cp code and do a correctness test.
+[ ] address fixme's
 
-Do performance tests on Beagle and then other platforms (OSG, TG)
+[ ] deterine how to find max_reject
 
-Package code for ExM research.
+[ ] Determine if and how to let the optimizer.cpp code do N reruns and
+how to set NWorkers accordingly (must evenly divide in current code).
+Check if floor() is correct and how load is partitioned; what N makes
+sense in swift usage?
 
+[x] Determine annealing.swift output files needed.
+
+[_x_ ] Make annealing.swift stdout/err output match that of optimizer.cpp
+
+[x] Integrate code to start with usec seed
+
+[x] add code to set secific seed for reproducibiity.  Will Swift use same from Java Random?
+
+[ ] Align Swift and .cp code enough do a correctness test.
+
+[ ] Enable precision control for %f formatting (to match c++
+output). Why are we getting some long and some short? Is swift
+truncating the constant or the tracef output?
+
+[ ] Do performance tests on Beagle and then other platforms (OSG, TG)
+- at scale. Ensure we aredoing 100's to 1000's of evolve() calls in
+parallel.
+
+[ ] Package code for ExM research.
+

Modified: SwiftApps/SciColSim/annealing.swift
===================================================================
--- SwiftApps/SciColSim/annealing.swift	2012-01-25 17:29:58 UTC (rev 5521)
+++ SwiftApps/SciColSim/annealing.swift	2012-01-28 21:10:59 UTC (rev 5522)
@@ -64,7 +64,7 @@
     
     Res mlres[][];
     mlres[0][0] = multi_loss( 0, 0, params0, target_innov, evolve_reruns ); // Only done once, not 5x; serves for all evolve-params ???
-tracef("in multi_annealing: i=%i j=%i ret vals from iniital multi_loss=(%f,%f)\n",0,0,mlres[0][0].loss,mlres[0][0].sdev);
+    tracef("multi_annealing: AR: %f +- %f\n",mlres[0][0].loss,mlres[0][0].sdev);
 
     foreach j in [0:NEVOPARAMS-1] {
         x[0][j]=params0[j];
@@ -77,7 +77,7 @@
     foreach i in [1:annealing_cycles] {
         // set new temperature, rejection threshold, and dx values for this cycle
         float temperature = T_start*exp( @tofloat(i)*(jlog(T_end)-jlog(T_start))/@tofloat(annealing_cycles));
-        tracef("in multi_annealing: i=%i T=%f\n", i, temperature);
+        tracef("in multi_annealing: AR: i=%i ....T=%f\n", i, temperature);
 	// On each new "major" cycle within the annealing_cycles (other than the first) set new rejection and dx values
         if ( i %% cycle == 1 && i > 1 ){ 
             tracef("multi_annealing: new cycle at i=%i\n",i);
@@ -93,6 +93,7 @@
                 }
                 trace ("Recomputed rejection: i=%d k=%d dx[i][k]=%f\n", i, k, dx[i][k]);
             }
+            tracef("in multi_annealing: AR: %f %f %f %f %f\n",dx[i][0],dx[i][1],dx[i][2],dx[i][3],dx[i][4],dx[i][5]);
         }
         else { # If not new cycle, set dx from previous dx (i-1)
             foreach  k in [0:NEVOPARAMS-1] {
@@ -103,8 +104,9 @@
             // float try_x[];
             int curr = (i * NEVOPARAMS) + j;
             int prev = curr-1;
-tracef("in multi_annealing: i=%i j=%i curr=%i prev=%i\n", i, j, curr, prev);
-            if ( /*(!FIX_VARIABLES) || */ (var_fixed[j]==0) ) {  # fixed=1,1,0,0,0: FIXME: FIX_VARIABLES flag has faulty logic but OK when TRUE
+            tracef("in multi_annealing: i=%i j=%i curr=%i prev=%i\n", i, j, curr, prev);
+            if ( /*(!FIX_VARIABLES) || */ (var_fixed[j]==0) ) {
+                # fixed=1,1,0,0,0: FIXME: FIX_VARIABLES flag has faulty logic but OK when TRUE
                 float try_x[];
                 foreach k in [0:NEVOPARAMS-1] { // Select the evolve params to try
                     if ( k < j ) {
@@ -122,35 +124,65 @@
                     tracef("in multi_annealing: i=%i j=%i k=%i\n", i, j, k);
                 }
                 // Up to here, x[] and dx[] are only set for previous i
-tracef( @strcat(color(3,"in multi_annealing: "),"i=%i j=%i calling multi_loss\n"),i,j);
-#tracef("in multi_annealing: i=%i j=%i calling multi_loss try_x=%q\n",i,j,try_x);
+                tracef( @strcat(color(3,"in multi_annealing: "),"i=%i j=%i calling multi_loss\n"),i,j);
+                #tracef("multi_annealing: i=%i j=%i calling multi_loss try_x=%q\n",i,j,try_x);
                 mlres[i][j] = multi_loss(i,j,try_x, target_innov, evolve_reruns); # do the N evolve()'s, N=evolve_reruns
-tracef("in multi_annealing: i=%i j=%i ret vals from multi_loss=(%f,%f)\n",i,j,mlres[i][j].loss,mlres[i][j].sdev);
+                tracef("multi_annealing: i=%i j=%i ret vals from multi_loss=(%f,%f)\n",i,j,mlres[i][j].loss,mlres[i][j].sdev);
+                tracef("in multi_annealing: AR: %f+-%f\n",mlres[i][j].loss,mlres[i][j].sdev);
                 // Beyond this point, x[] and dx[] are being set for this i,j
                 float ratio = min(1.0, exp( -(mlres[i][j].loss-curr_loss[prev]) /temperature));
                 float r = (random()) / (pow(2.0,31.0)-1.0);  // why all the 2^31's ???
+                tracef("in multi_annealing: AR: %f vs %f\n", r, ratio);
 
                 float ALOT=100000000000.0; // 100,000,000,000. = 10^11
                 if (mlres[i][j].loss < ALOT) { 
-                    tracef("multi_annealing: Writing: filestr.open (best_opt_some.txt, ofstream::app);\n");
-                    # un[0]->get_target() Res.loss un[0]->get_parameter[0:4] Res.error
-                    # filestr.open ("max_dist.txt", ofstream::app);
+                    tracef("multi_annealing: AF: best_opt_some.txt: %f,%f,%f,%f,%f,%f,%f,%f\n",
+                           target_innov,mlres[i][j].loss,try_x[0],try_x[1],try_x[2],try_x[3],try_x[4],mlres[i][j].sdev);
+                    tracef(color(Red,"multi_annealing: AR: max_dist.txt - tbd\n"));
+                           // FIXME: max_dist is global set inside evolve() - need to get to cli - perhaps as 3rd output value
                 }
                 else {  // does this ever occur? if so did we want to still do the ratio computation above???
                     tracef("multi_annealing: Loss %f > ALOT at [i][j] = [%d][%d]\n", mlres[i][j].loss, i ,j);
                 }
                 if (r > ratio) {  // Reject new parameter
                     x[i][j] = x[i-1][j];
-                    rejection[i][j] = rejection[i-1][j] + 1.0;  // Is this correct? incr rejection? 
+                    rejection[i][j] = rejection[i-1][j] + 1.0;  // FIXME: AR: Is this correct? incr rejection? 
                     curr_loss[curr] = curr_loss[prev];
                     curr_sdev[curr] = curr_sdev[prev];
+                    # FIXME: AR: the following prints seem to replicate values in the .cpp version - please clarify.
+                    tracef("multi_annealing: AR: %i,%i %i Did not accept: %f (%i)\n", i, j, i, try_x[j], j);
+                    tracef("multi_annealing: AR: %f %f %f %f %f\n", try_x[0],try_x[1],try_x[2],try_x[3],try_x[4]);
                 }
                 else {           // Accept new parameter
-tracef("multi_annealing: Accepting try_x[j], i=%i j=%i\n",i,j);
+                    tracef("multi_annealing: Accepting try_x[j], i=%i j=%i\n",i,j);
                     x[i][j] = try_x[j];
-tracef("multi_annealing: Accepting try_x[j], i=%i j=%i try_x[j]=%f\n",i,j,try_x[j]);
+                    rejection[i][j] = rejection[i-1][j];  // FIXME: AR: Is this correct? no incr of rejection? 
+                    tracef("multi_annealing: Accepting try_x[j], i=%i j=%i try_x[j]=%f\n",i,j,try_x[j]);
                     curr_loss[curr] = mlres[i][j].loss;
                     curr_sdev[curr] = mlres[i][j].sdev;
+                    tracef(@strcat("multi_annealing: AR: %i ", color(8,"Rejection counts: "),
+                                   color( /* 2 */ 1," %f"), "\n\n"),
+                                   i, rejection[i][j] ); // , rejection[i][1], rejection[i][2], rejection[i][3], rejection[i][4]);
+# FIXME: determine correct rejection[] values to avoid hanging:
+#                    tracef(@strcat("multi_annealing: AR: %i ", color(8,"Rejection counts: "),
+#                                   color( /* 2 */ 1," %f"), color(7," %f"), color(5," %f"), color(9," %f"), color(6," %f"), "\n\n"),
+#                                   rejection[i][0], rejection[i][1], rejection[i][2], rejection[i][3], rejection[i][4]);
+# END FIXME
+                    float rj[];
+                    foreach k in [0:NEVOPARAMS-1] {  # FIXME!!!
+                        if (k==j) {
+                           rj[k] = rejection[i-1][k];
+                        }
+                        else {
+                           rj[k] = rejection[i-1][k];
+                        }
+                        tracef(@strcat("multi_annealing: AR: [%i][%i] ", color(8,"Rejection counts: "),
+                                   color(/*2*/ 1," %f"), color(7," %f"), color(5," %f"), color(9," %f"), color(6," %f"), "\n\n"),
+                                   i, j, rj[0], rj[1], rj[2], rj[3], rj[4]);
+                    }
+                    tracef(@strcat("multi_annealing: AR: %i ", color(8,"***** Did accept! "),
+                                   color(/*2*/ 1," %f"), color(7," %f"), color(5," %f"), color(9," %f"), color(6," %f"), "\n\n"),
+                                   i, try_x[0], try_x[1], try_x[2], try_x[3], try_x[4]);
                 }
             }
             else {
@@ -165,13 +197,8 @@
 (Res r) multi_loss( int ci, int cj, float x[], float target_innov, int evolve_reruns )
 {
   file rfile[];
-  tracef("In multi_loss: entered: ci=%i cj=%i target_innov=%f evolve_reruns=%i x=%q\n",ci, cj, target_innov,evolve_reruns,x);
-  tracef("In multi_loss: entered: ci=%i cj=%i target_innov=%f evolve_reruns=%i\n",ci, cj, target_innov,evolve_reruns);
-/* This hangs:
-  foreach f,i in x {
-     tracef("In multi_loss: x[]: ci=%i cj=%i x[%i]=%f\n",ci, cj, i, f);
-  }
-*/
+  tracef("multi_loss: entered: ci=%i cj=%i target_innov=%f evolve_reruns=%i x=%q\n",ci, cj, target_innov,evolve_reruns,x);
+
   foreach i in [1:evolve_reruns] {  // repeats of the evolove() - same as n_reruns
     file outfile; // FIXME: map and save in future 
     string args[] = [ // FIXME: move this to a setargs() function
@@ -196,11 +223,11 @@
 
     file graph <"movie_graph.txt">;
     (outfile, rfile[i]) = evolve(args,graph); 
-    tracef("in multi_loss: after evolve: i=%i %k %k\n", i, outfile, rfile[i]);
+    tracef("multi_loss: after evolve: i=%i %k %k\n", i, outfile, rfile[i]);
   }
   file sumfile = sumloss(rfile);
   r = readData(sumfile);
-  tracef("in multi_loss: returning: ci=%i cj=%i r.loss=%f r.sdev=%f\n",ci,cj,r.loss,r.sdev);
+  tracef("multi_loss: returning: ci=%i cj=%i r.loss=%f r.sdev=%f\n",ci,cj,r.loss,r.sdev);
 }
 
 optimizer_sweep() // Implements logic of python driver script
@@ -229,7 +256,7 @@
          T_start          = 2.0,
          T_end            = 0.01,
          Target_rejection = 0.3,
-         evolve_reruns    = 2,
+         evolve_reruns    = 10,
          starting_jump    = 2.3,
          params0[]        = [0.0, 0.0, 4.0, 50.0, -1.0],
          @tofloat(target_innov),
@@ -239,7 +266,7 @@
          2.0,
          0.01,
          0.3,
-         2,
+         10,
          2.3,
          [0.0, 0.0, 4.0, 50.0, -1.0],
          @tofloat(target_innov),

Modified: SwiftApps/SciColSim/evolve.sh
===================================================================
--- SwiftApps/SciColSim/evolve.sh	2012-01-25 17:29:58 UTC (rev 5521)
+++ SwiftApps/SciColSim/evolve.sh	2012-01-28 21:10:59 UTC (rev 5522)
@@ -2,5 +2,5 @@
 datafile=$1
 touch multi_loss.data
 shift 1
-$(dirname $0)/toptimizer $* 2>&1
+$(dirname $0)/openmp-optimizer $* 2>&1
 mv multi_loss.data $datafile

Modified: SwiftApps/SciColSim/optimizer.cpp
===================================================================
--- SwiftApps/SciColSim/optimizer.cpp	2012-01-25 17:29:58 UTC (rev 5521)
+++ SwiftApps/SciColSim/optimizer.cpp	2012-01-28 21:10:59 UTC (rev 5522)
@@ -16,6 +16,8 @@
 char operation = 'n'; // n: normal; m: do one multi_loss (with n_reruns).
                       // Not used: a: analyze and generate next annealing parameter set. g: tbd
 
+unsigned initSeed = 0;
+
 #include <fstream>
 #include <iostream>
 #include <stdio.h>
@@ -512,7 +514,7 @@
         if (verbose_level>=3){
             std::cout << id << " failed " << r << std::endl;
 		}
-		return r;
+	return (int) round(r);  // FIXME: Andrey: please verify that round() is correct.
 		
 	}
     
@@ -618,9 +620,12 @@
 						Dist[*vi][j]=d[*vi];
 						Dist[j][*vi]=d[*vi];
                         
-                        if (Dist[*vi][j]>max_dist){
-                            max_dist=Dist[*vi][j];
-                        }
+						if ( (int)round(Dist[*vi][j]>max_dist)) {
+						  // FIXME: Andrey: please verify that (int) cast is correct. Do we need to round()?
+						  // also, the indent on this iff statement was way off - 
+						  // perhaps due to space v. tab?
+						  max_dist=(int)round(Dist[*vi][j]);
+						}
                         
                         
 					} else {
@@ -1240,7 +1245,7 @@
         
     }
     
-#endif notdef
+#endif
 	
 };
 
@@ -1262,7 +1267,7 @@
 #endif
     
     int N = un[0]->get_reruns();
-    int step = (int)(double)N/(double)(Nworkers);
+    int step = (int)floor((double)N/(double)(Nworkers)); // FIXME: Andrey: please check change in cast grouping and use of floor
     int istart=0;
     int iend = istart+step;
     
@@ -1296,10 +1301,18 @@
     //dispatch_release(group);
 #else
     int i;
-    #pragma omp parallel for private (i)
+
+    // Print debug in serial code to get coherent trace output
+
     for(i=0; i<Nworkers; i++){
         std::cout<<"multi_loss: Calling evolve_to_target_and_save i=" << i << " N=" << N << " step=" << step << " istart=" << i*step\
 	       << " iend=" << (i+1)*step << "\n";
+     }
+
+    // Execute actual loop in parallel
+
+    #pragma omp parallel for private (i)
+    for(i=0; i<Nworkers; i++){
         un[i]->evolve_to_target_and_save(i*step, min((i+1)*step,N), Results, Counters);
      }
 #endif
@@ -1564,7 +1577,7 @@
     string par_names1[4] = {"n_epochs", "n_steps", "n_reruns", "range"};
     string par_names2[5] = {"T_start", "T_end", "Annealing_steps","Target_rejection","Starting_jump"};
     string par_names3[5] = {"FREEZE_alpha_i", "FREEZE_alpha_m", "FREEZE_beta", "FREEZE_gamma", "FREEZE_delta"};    
-    string par_names4[2] = {"Operation", "Nworkers"};
+    string par_names4[3] = {"Operation", "Nworkers", "initSeed"};
 
     int params1[4] = {300, 50, 1000, 10};
     int params3[5] = { 0, 0, 0, 0, 0};
@@ -1617,8 +1630,8 @@
                 std::cout << par_names2[nArg-12] << ": " << params2[nArg-12] <<  std::endl;
             }
             if (nArg > 16 && nArg < 22){
-                params3[nArg-17]= atof(argv[nArg]);
-                var_fixed[nArg-17]= atof(argv[nArg]);
+	        params3[nArg-17]= (int)round(atof(argv[nArg]));   // FIXME: Andrey: please verify that round() is correct.
+	        var_fixed[nArg-17]= (int)round(atof(argv[nArg])); // FIXME: ditto
                 std::cout << par_names3[nArg-17] << ": " << var_fixed[nArg-17] <<  std::endl;
             }
             if (nArg == 22 ){
@@ -1629,6 +1642,10 @@
                 Nworkers = atoi(argv[nArg]);
                 std::cout << par_names4[1] << ": " << Nworkers <<  std::endl;
             }
+            if (nArg == 24 ){
+                initSeed = atoi(argv[nArg]);
+                std::cout << par_names4[2] << ": " << initSeed <<  std::endl;
+            }
             
             
         }
@@ -1672,9 +1689,18 @@
         
     }
     //...............................
-    srand(time(0));
+    //srand(time(0));
     //srandomdev();
     
+    if ( initSeed != 0.0 ) {
+      srand(initSeed);
+    }
+    else {
+        timeval t;
+        gettimeofday(&t, NULL);
+        srand(t.tv_usec);
+    }
+
     {
         double r=0;
         for (int j=0; j<100; j++){
@@ -1684,7 +1710,7 @@
             r = rand()/(double)(pow(2.,31)-1.);
             std::cout << r << " ";
         }
-        std::cout << "\n ";
+        std::cout << "\n";
     }
   	//random initiation of starting parameters
     

Modified: SwiftApps/SciColSim/tc
===================================================================
--- SwiftApps/SciColSim/tc	2012-01-25 17:29:58 UTC (rev 5521)
+++ SwiftApps/SciColSim/tc	2012-01-28 21:10:59 UTC (rev 5522)
@@ -11,5 +11,5 @@
 beagle evolve /home/wilde/AndreysOptimizer/evolve.sh null null GLOBUS::maxwalltime="02:00:00"
 localhost evolve /home/wilde/AndreysOptimizer/src/evolve.sh null null GLOBUS::maxwalltime="02:00:00"
 
-beagle sumloss /home/wilde/AndreysOptimizer/evolve.sh null null GLOBUS::maxwalltime="02:00:00"
+beagle sumloss /home/wilde/AndreysOptimizer/src/sumloss.sh null null GLOBUS::maxwalltime="02:00:00"
 localhost sumloss /home/wilde/AndreysOptimizer/src/sumloss.sh null null GLOBUS::maxwalltime="02:00:00"

Modified: SwiftApps/SciColSim/testopt1.py
===================================================================
--- SwiftApps/SciColSim/testopt1.py	2012-01-25 17:29:58 UTC (rev 5521)
+++ SwiftApps/SciColSim/testopt1.py	2012-01-28 21:10:59 UTC (rev 5522)
@@ -1,13 +1,24 @@
-#! /usr/bin/env python -u
+#! /usr/bin/env python 
 #                      -u => run unbuffered
 
+
+
 import os, sys
 
+
+#_-----------
+
+unbuffered = os.fdopen(sys.stdout.fileno(), 'w', 0)
+sys.stdout = unbuffered
+
+#--------------
+
+
 app              = "./orig-optimizer";     # For Mac only: original code (+1 loop fix) using Grnd Central Dispatch
 app              = "./dispatch-optimizer"; # For Mac only: sing Grand Central Dispatch
 app              = "./openmp-optimizer";   # For Mac or Linux: Using OpenMP (Default)
 
-app              = "./dispatch-optimizer";
+app              = "./openmp-optimizer";
 
 # FULL INITIAL TEST PARAMETERS from Andrey
 
@@ -31,6 +42,7 @@
 
 NWorkers         = "2"
 operation        = "n"  # n=normal, m=manual (runs 1 multi_loss call)
+seed             = "1234567";
 
 if app == "./orig-optimizer":
   NWorkers = ""
@@ -39,7 +51,7 @@
 for target in range(startTarget,endTarget,incrTarget):
   for i in range(optimizerRepeats):
     args = app + " 0 0 4 50 -1 " + str(target) + " 40000 20 " + str(evolveReruns) + \
-      " 2 1 2. 0.01 " + str(annealingSteps) + " 0.3 2.3 1 1 0 0 0 " + operation + " " + NWorkers
+      " 2 1 2. 0.01 " + str(annealingSteps) + " 0.3 2.3 1 1 0 0 0 " + operation + " " + NWorkers + " " + seed;
     print("\n**** Calling optimizer: "+args+"\n")
     os.system(args);
 print sys.argv[0] + " Done!"