Main Page   Class Hierarchy   Data Structures   File List   Data Fields   Globals  

hdyn_schedule.C

Go to the documentation of this file.
00001 
00002        //=======================================================//    _\|/_
00003       //  __  _____           ___                    ___       //      /|\ ~
00004      //  /      |      ^     |   \  |         ^     |   \     //          _\|/_
00005     //   \__    |     / \    |___/  |        / \    |___/    //            /|\ ~
00006    //       \   |    /___\   |  \   |       /___\   |   \   // _\|/_
00007   //     ___/   |   /     \  |   \  |____  /     \  |___/  //   /|\ ~
00008  //                                                       //            _\|/_
00009 //=======================================================//              /|\ ~
00010 
00011 //  hdyn_schedule.C: scheduling routines
00012 //.............................................................................
00013 //    version 1:  Nov 1996   Jun Makino
00014 //    version 2:
00015 //.............................................................................
00016 //
00017 //  Externally visible functions:
00018 //
00019 //      void print_sort_counters
00020 //      void fast_get_nodes_to_move
00021 //              - a single function which takes care of everything...
00022 //      void dump_node_list
00023 //      void clean_up_hdyn_schedule
00024 //
00025 //.............................................................................
00026 
00027 #include "hdyn.h"
00028 
00029 typedef struct {hdynptr b;
00030                 xreal next_time;}       // Save next_time here because
00031 node_time;                              // get_next_time() can be EXTREMELY
00032                                         // slow -- avoiding it gains nearly
00033                                         // a factor of 5!  (Steve 5/99)
00034 
00035 // Note that xreal comparisons likely take longer, so may need to
00036 // optimize this... (Steve, 5/00)
00037 
00038 //-----------------------------------------------------------------------------
00039 //
00040 // Static variables used to define and manage the scheduling:
00041 //
00042 //      static  node_time       *nodes                  // list of node/time
00043 //                                                      // pairs for scheduling
00044 //
00045 //      static  int             work_size               // allocated size of
00046 //                                                      // the nodes array
00047 //
00048 //      static  int             nbody                   // current number of
00049 //                                                      // nodes on the list
00050 //
00051 //      static  int             nprev                   // number of nodes on
00052 //                                                      // previous "to_move"
00053 //                                                      // list
00054 //
00055 //      static  int             istack[NSTACK]          // used in quicksort_nr
00056 //      static  node_time       arr_copy[MAX_COPY]      // used in merge_sort
00057 //
00058 //      static  int             counter[NC]             // used in statistics
00059 //      static  int             total_count[NC]         // gathering
00060 //
00061 //-----------------------------------------------------------------------------
00062 
00063 local inline void swap(node_time a[], int i, int j)
00064 {
00065     // ASSUME assignment a = b is OK for structures.
00066 
00067     node_time tmp;
00068 
00069     tmp = a[i];
00070     a[i] = a[j];
00071     a[j] = tmp;
00072 }
00073 
00074 // Makino's quicksort -- recursive.
00075 
00076 local void quicksort(node_time a[], int left, int right)
00077 {
00078     // Sort nodes in array a, according to get_next_time().
00079 
00080     int i,j;
00081     xreal v;
00082 
00083     if (right > left) {
00084         bool is_sorted = true;
00085         for (i=left; i< right; i++) {
00086             if (a[i].next_time > a[i+1].next_time) {
00087                 is_sorted = false;
00088                 i = right;
00089             }
00090         }
00091         if (is_sorted) return;
00092         i = (left + right)/2;
00093         swap(a, i, right);
00094         v = a[right].next_time; i = left- 1; j = right;
00095         for (;;) {
00096             while (a[++i].next_time < v && i < right) ;
00097             while (a[--j].next_time > v && j > left) ;
00098             if (i >= j) break;
00099             swap(a, i, j);
00100         }
00101         swap(a, i, right);
00102         quicksort(a, left, i - 1);
00103         quicksort(a, i + 1, right);
00104     }
00105 }
00106 
00107 // Copy of Makino's quicksort.  Use of separate identical functions
00108 // allows us to keep track of sorting for profiling purposes.
00109 
00110 local void quicksort2(node_time a[], int left, int right)
00111 {
00112     // Sort nodes in array a, according to get_next_time().
00113 
00114     int i,j;
00115     xreal v;
00116 
00117     if (right > left) {
00118         bool is_sorted = true;
00119         for (i=left; i< right; i++) {
00120             if (a[i].next_time > a[i+1].next_time) {
00121                 is_sorted = false;
00122                 i = right;
00123             }
00124         }
00125         if (is_sorted) return;
00126         i = (left + right)/2;
00127         swap(a, i, right);
00128         v = a[right].next_time; i = left- 1; j = right;
00129         for (;;) {
00130             while (a[++i].next_time < v && i < right) ;
00131             while (a[--j].next_time > v && j > left) ;
00132             if (i >= j) break;
00133             swap(a, i, j);
00134         }
00135         swap(a, i, right);
00136         quicksort2(a, left, i - 1);
00137         quicksort2(a, i + 1, right);
00138     }
00139 }
00140 
00141 // Numerical Recipes non-recursive quicksort.  VERY slow for nearly
00142 // ordered data...  (Steve 5/99)
00143 
00144 #define M       7
00145 #define NSTACK  128
00146 
00147 static int istack[NSTACK];
00148 
00149 local void quicksort_nr(node_time arr[], int n)
00150 
00151 // Note Numerical Recipes unit-offset arrays!  Array arr[] runs
00152 // from 1 to n.  Handle in the calling sequence...
00153 
00154 {
00155     int i, l = 1, ir = n, j, k;
00156     int jstack = 0;
00157     node_time a;
00158 
00159     for (;;) {
00160         if (ir-l < M) {
00161 
00162             for (j = l+1; j <= ir; j++) {
00163                 a = arr[j];
00164                 for (i = j-1; i >= 1; i--) {
00165                     if (arr[i].next_time <= a.next_time) break;
00166                     arr[i+1] = arr[i];
00167                 }
00168                 arr[i+1] = a;
00169             }
00170             if (jstack == 0) break;
00171             ir = istack[jstack--];
00172             l = istack[jstack--];
00173 
00174         } else {
00175 
00176             k = (l+ir) >> 1;
00177 
00178             swap(arr, k, l+1);
00179 
00180             if (arr[l+1].next_time > arr[ir].next_time)
00181                 swap(arr, l+1, ir);
00182 
00183             if (arr[l].next_time > arr[ir].next_time)
00184                 swap(arr, l, ir);
00185 
00186             if (arr[l+1].next_time > arr[l].next_time)
00187                 swap(arr, l+1, l);
00188 
00189             i = l+1;
00190             j = ir;
00191             a = arr[l];
00192             for (;;) {
00193                 do i++; while (arr[i].next_time < a.next_time);
00194                 do j--; while (arr[j].next_time > a.next_time);
00195                 if (j < i) break;
00196                 swap(arr, i, j);
00197             }
00198             arr[l] = arr[j];
00199             arr[j] = a;
00200             jstack += 2;
00201 
00202             if (jstack > NSTACK)
00203                 err_exit("NSTACK too small in sort_quicknr.");
00204 
00205             if (ir-i+1 >= j-l) {
00206                 istack[jstack] = ir;
00207                 istack[jstack-1] = i;
00208                 ir = j-1;
00209             } else {
00210                 istack[jstack] = j-1;
00211                 istack[jstack-1] = l;
00212                 l = i;
00213             }
00214         }
00215     }
00216 }
00217 
00218 // Heapsort from Numerical Recipes.  Note Numerical Recipes unit-offset
00219 // arrays!  Array arr[] runs from 1 to n...  Faster than NR quicksort,
00220 // but still slower than Makino's version for our purposes.  (Steve 5/99)
00221 
00222 local void heapsort(node_time arr[], int n)
00223 {
00224     int i, ir, j, l;
00225     node_time a;
00226 
00227     if (n < 2) return;
00228 
00229     l = (n >> 1)+1;
00230     ir = n;
00231 
00232     for (;;) {
00233         if (l > 1) {
00234             a = arr[--l];
00235         } else {
00236             a = arr[ir];
00237             arr[ir] = arr[1];
00238             if (--ir == 1) {
00239                 arr[1] = a;
00240                 break;
00241             }
00242         }
00243         i = l;
00244         j = l+l;
00245         while (j <= ir) {
00246             if (j < ir && arr[j].next_time < arr[j+1].next_time)
00247                 j++;
00248             if (a.next_time < arr[j].next_time) {
00249                 arr[i] = arr[j];
00250                 i = j;
00251                 j <<= 1;
00252             } else j = ir+1;
00253         }
00254         arr[i] = a;
00255     }
00256 }
00257 
00258 // Insertion sort from Numerical Recipes, except that arrays start at 0.
00259 
00260 inline void insertion_sort(node_time arr[], int n)      // sort from 0 to n-1
00261 {
00262     int i, j;
00263     node_time a;
00264 
00265     for (j = 1; j < n; j++) {
00266         a = arr[j];
00267         i = j-1;
00268         while (i >= 0 && arr[i].next_time > a.next_time) {
00269             arr[i+1] = arr[i];
00270             i--;
00271         }
00272         arr[i+1] = a;
00273     }
00274 }
00275 
00276 // Quick and dirty merging of an array consisting of two ordered
00277 // subarrays.  2-3 times faster than Makino's quicksort.  (Steve 5/99)
00278 
00279 #define MAX_COPY        256
00280 static node_time        arr_copy[MAX_COPY];
00281 
00282 local void merge_sort(node_time arr[], int n, int np)
00283 {
00284     // Ordered subarrays are 0 to np-1, np to n-1.  We expect np << n
00285     // in typical use, and arr[np-1] > arr[n-1] by construction..
00286 
00287     if (np > MAX_COPY) {
00288         quicksort2(arr, 0, n-1);
00289         return;
00290     }
00291 
00292     // Work with a copy of the first subarray (simplest to code).
00293 
00294     int i, j;
00295 
00296     for (i = 0; i < np; i++) arr_copy[i] = arr[i];
00297 
00298     // This could probably be accelerated using the block structure...
00299 
00300     i = 0, j = np;
00301 
00302     for (int k = 0; k < n; k++) {
00303 
00304         if (j >= n || arr_copy[i].next_time < arr[j].next_time)
00305             arr[k] = arr_copy[i++];
00306         else
00307             arr[k] = arr[j++];          // note that k < j always
00308 
00309     }
00310 }
00311 
00312 //------------------------------------------------------------------------
00313 
00314 #define NC 12
00315 static int counter[NC] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
00316 static int total_count[NC] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
00317 
00318 local void update_sort_counters(int n)
00319 {
00320     int nc = 1, ic = 0;
00321     while (nc < n) {
00322         nc *= 2;
00323         ic++;
00324     }
00325     if (ic >= NC) ic = NC - 1;
00326     counter[ic]++;
00327     total_count[ic]++;
00328 }
00329 
00330 void print_sort_counters()
00331 {
00332     cerr << endl << "Sort counters:" << endl << "    ";
00333     for (int i = 0; i < NC; i++) cerr << counter[i] << " ";
00334     cerr << endl << "    ";
00335     for (int i = 0; i < NC; i++) cerr << total_count[i] << " ";
00336     cerr << endl << endl;
00337     for (int i = 0; i < NC; i++) counter[i] = 0;
00338 }
00339 
00340 local void print_sort_compare(node_time nodes[], int iprev, int icurr,
00341                               char* s)
00342 {
00343     // iprev was the last element in the previously sorted list.
00344     // icurr  is the last element in the new sorted list.
00345     // The list has not yet been resorted.
00346 
00347     // Count how many elements of the "prev" list have moved into
00348     // the rest of the list.  On entry, the list is ordered from 0
00349     // to iprev, and from iprev+1 to icurr.
00350 
00351     int n_moved = 0;
00352     xreal tp = nodes[iprev+1].next_time;
00353 
00354     for (int i = iprev; i >= 0; i--) {
00355         if (nodes[i].next_time <= tp) break;
00356         n_moved++;
00357     }   
00358 
00359     fprintf(stderr, "quicksort: %20.12f %8d %8d %8d %s\n",
00360             (real)nodes[0].b->get_system_time(), iprev+1, icurr+1, n_moved, s);
00361 }
00362 
00363 //------------------------------------------------------------------------
00364 
00365 #define WHICH_SORT      4               // 4 is preferred (Steve 5/99)
00366 
00367 #define N_ISORT         10
00368 
00369 // Convenient to separate sorts by length for debugging/profiling...
00370 
00371 inline local void QS(node_time a[], int n, int np = 0)
00372 {
00373     if (n <= N_ISORT && (WHICH_SORT != 4 || np <= 0)) {
00374         insertion_sort(a, n);
00375         return;
00376     }
00377 
00378     if (WHICH_SORT == 1) {
00379 
00380         // Makino's quicksort:
00381 
00382         if (np < 0)
00383             quicksort(a, 0, n-1);
00384         else
00385             quicksort2(a, 0, n-1);
00386 
00387     } else if (WHICH_SORT == 2) {
00388 
00389         // NR quicksort:
00390 
00391         quicksort_nr(a-1, n);                   // note NR offset!
00392 
00393     } else if (WHICH_SORT == 3) {
00394 
00395         // NR heapsort:
00396 
00397         heapsort(a-1, n);                       // note NR offset!
00398 
00399     } else if (WHICH_SORT == 4) {
00400 
00401         // Steve's merge_sort:
00402 
00403         if (np > 0)
00404 
00405             merge_sort(a, n, np);               // np is the start of the
00406                                                 // second ordered subarray
00407         else {
00408 
00409             if (np < 0)
00410                 quicksort(a, 0, n-1);
00411             else
00412                 quicksort2(a, 0, n-1);
00413 
00414         }
00415     }
00416 }
00417 
00418 local void sort_node_array(node_time nodes[], int nprev, int nbody)
00419 {
00420 
00421 #if 0
00422     cerr << "before sort\n";
00423     for (int i = 0; i < nprev; i++) {
00424         PRC(i); nodes[i].b->print_label(cerr);
00425         PRL(nodes[i].next_time);
00426     }
00427 #endif
00428 
00429     // Update times in the node_time structure.
00430 
00431     for (int i = 0; i < nprev; i++)
00432         nodes[i].next_time = nodes[i].b->get_next_time();
00433 
00434     // Sort the previous list (bodies last stepped forward).
00435     // (No need to sort if nprev = 1.)
00436 
00437     int p = cerr.precision(13);         // nonstandard precision
00438 
00439     if (nprev > 1) {
00440 
00441         // Sort nodes array from 0 to nprev-1.
00442 
00443         update_sort_counters(nprev);
00444         QS(nodes, nprev, -1);
00445     }
00446 
00447 #if 0
00448     for (int i = 0; i < nprev; i++) {
00449         PRC(i); nodes[i].b->print_label(cerr);
00450         PRL(nodes[i].next_time);
00451     }
00452 #endif
00453 
00454     // Perhaps some check is necessary for unperturbed motion,
00455     // whose timestep is integer multiple of the basic step...
00456 
00457     if (nprev < nbody) {
00458 
00459         // The list from 0 to nprev-1 is ordered, as is the list from
00460         // nprev to nbody-1.  See if the two parts need to be merged.
00461 
00462         // int p = cerr.precision(HIGH_PRECISION);
00463         // PRC(nodes[nprev-1].next_time); PRL(nodes[nprev].next_time);
00464         // cerr.precision(p);
00465 
00466         if (nodes[nprev-1].next_time > nodes[nprev].next_time) {
00467 
00468             // Subarrays overlap.  Need to sort or merge.
00469             // These searches could probably be accelerated using
00470             // bisection (see NR, p. 117).
00471 
00472             int ilow = nprev-1;
00473             for (int i = nprev-2; i >= 0; i--) {
00474                 if (nodes[i].next_time <= nodes[nprev].next_time) break;
00475                 ilow = i;
00476             }
00477         
00478             int ihigh = nprev;
00479             for (int i = nprev+1; i < nbody; i++) {
00480                 if (nodes[i].next_time > nodes[nprev-1].next_time) break;
00481                 ihigh = i;
00482             }
00483 
00484             // Out-of-order region runs from ilow to ihigh, inclusive.
00485 
00486 #if 0
00487             int p = cerr.precision(HIGH_PRECISION);
00488             cerr << "sort_node_array: correct sequence from ";
00489             cerr << ilow  << " ("; nodes[ilow].b->print_label(cerr);
00490             cerr << ") to ";
00491             cerr << ihigh << " ("; nodes[ihigh].b->print_label(cerr);
00492             cerr << ")" << endl << "                 at time "
00493                  << nodes[nprev-1].next_time << "  nprev = " << nprev
00494                  << endl;
00495 
00496             for (int i = 0; i < 10; i++)
00497                 cerr << i << " " << nodes[i].b->format_label()
00498                      << " " << nodes[i].next_time << endl;
00499 
00500             print_sort_compare(nodes, nprev-1, ihigh, "");
00501             cerr.precision(p);
00502 #endif
00503 
00504             // Sort from ilow to ihigh, possibly using the additional
00505             // information that the subarrays below and above nprev are
00506             // already ordered.
00507 
00508             update_sort_counters(ihigh-ilow+1);
00509             QS(nodes+ilow, ihigh-ilow+1, nprev-ilow);
00510 
00511         }
00512     }
00513     cerr.precision(p);
00514 }
00515 
00516 // Static data:
00517 
00518 static int work_size = 0;
00519 static node_time * nodes = NULL;
00520 
00521 static int nbody = 0;
00522 static int nprev = 0;
00523 
00524 // Allow possibility of cleaning up if necessary:
00525 
00526 void clean_up_hdyn_schedule() {if (nodes) delete [] nodes;}
00527 
00528 // ***** NOTE that the scheduling may become badly corrupted if bodies not
00529 // ***** on the list have been integrated (e.g. by synchronize_tree()).
00530 // ***** If this is done, then a reset *must* be forced.
00531 
00532 void fast_get_nodes_to_move(hdyn * b,
00533                             hdyn * list[],
00534                             int &nlist,
00535                             xreal & tmin,
00536                             bool & reset)
00537 {
00538     if (nbody == 0) reset = true;
00539 
00540     if (reset) {
00541 
00542         // Initialize the array of node pointers.
00543 
00544         int n = 0;
00545         for_all_nodes(hdyn, b, bb) n++;
00546 
00547         if (work_size < n) {
00548             work_size = n*2;
00549             if (nodes) delete [] nodes;
00550             nodes = new node_time[work_size];
00551         }
00552 
00553         n = 0;
00554         for_all_nodes(hdyn, b, bbb) {
00555             if (!bbb->is_root()
00556                 && (bbb->is_top_level_node()
00557                     || bbb->get_elder_sister() == NULL)) {
00558                 nodes[n].b = bbb;
00559                 nodes[n].next_time = bbb->get_next_time();
00560                 n++ ;
00561             }
00562         }
00563         nbody = nprev = n;
00564     }
00565 
00566     reset = false;
00567 
00568     bool debug = false;
00569     // debug = (b->get_system_time() > 100);
00570 
00571     sort_node_array(nodes, nprev, nbody);
00572 
00573     nlist = 0;
00574     tmin = nodes[0].next_time;
00575     for (int i = 0; i < nbody; i++) {
00576         if (nodes[i].next_time < tmin) {
00577 
00578             // Fatal scheduling error...
00579 
00580             cerr << endl << "Error in fast_get_nodes_to_move():" << endl;
00581             cerr.precision(HIGH_PRECISION);
00582 
00583             PRC(tmin), PRL((real)tmin);
00584             PRC(i); cerr << "node " << nodes[i].b->format_label() << endl;
00585             PRL(nodes[i].next_time);
00586 
00587             for (int j = 0; j <= i; j++) {
00588                 PRC(j);
00589                 cerr << nodes[j].b->format_label() << "  "
00590                      << nodes[j].b->get_time() << "  "
00591                      << nodes[j].next_time << endl;
00592             }
00593 
00594             err_exit("internal error: sort failed\n");
00595 
00596         } else if (nodes[i].next_time > tmin) {
00597 
00598             i = nbody;  // aka break!
00599 
00600         } else {
00601 
00602             list[i] = nodes[i].b;
00603             nlist = i + 1;
00604         }
00605     }
00606     nprev = nlist;
00607 
00608     if (debug) {
00609       cerr << endl;
00610 
00611       int p = cerr.precision(HIGH_PRECISION);
00612       PRC(nbody); PRC(nlist); PRC(tmin);
00613       cerr.precision(p);
00614 
00615       cerr << list[0]->format_label() << endl;
00616     }
00617 }
00618 
00619 
00620 void dump_node_list(int n) // default = 1000000000
00621 {
00622     int p = cerr.precision(HIGH_PRECISION);
00623     cerr << endl << "Current node list (nbody = " << nbody << "):\n";
00624     for (int i = 0; i < nbody && i < n; i++) {
00625         cerr << i << "  " << nodes[i].b->format_label()
00626              << "  " << nodes[i].next_time << endl;
00627     }
00628     cerr << endl << flush;
00629     cerr.precision(p);
00630 }
00631 
00632 
00633 void dump_node_list_for(char *s)
00634 {
00635     int p = cerr.precision(HIGH_PRECISION);
00636     cerr << "Current node list entry for " << s << endl;
00637     for (int i = 0; i < nbody; i++) {
00638         if (streq(nodes[i].b->format_label(), s)) {
00639             if (i > 0)
00640                 cerr << i-1 << "  " << nodes[i-1].b->format_label()
00641                      << "  " << nodes[i-1].next_time << endl;
00642             cerr << i << "  " << nodes[i].b->format_label()
00643                  << "  " << nodes[i].next_time << endl;
00644             if (i < nbody-1)
00645                 cerr << i+1 << "  " << nodes[i+1].b->format_label()
00646                      << "  " << nodes[i+1].next_time << endl;
00647         }
00648     }
00649     cerr << endl << flush;
00650     cerr.precision(p);
00651 }

Generated at Sun Feb 24 09:57:03 2002 for STARLAB by doxygen1.2.6 written by Dimitri van Heesch, © 1997-2001