summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorspiros <andyspiros@gmail.com>2011-07-18 02:36:22 +0200
committerspiros <andyspiros@gmail.com>2011-07-18 02:36:22 +0200
commit63a0b1731ec14427c3f7e5dfbb63e5e6724e69f9 (patch)
treeb233a6e387be3cfcfa3261d62d0c67358aa044ee /btl/generic_bench
parentInitial work on distributed-memory benchmarks. (diff)
downloadauto-numerical-bench-63a0b1731ec14427c3f7e5dfbb63e5e6724e69f9.tar.gz
auto-numerical-bench-63a0b1731ec14427c3f7e5dfbb63e5e6724e69f9.tar.bz2
auto-numerical-bench-63a0b1731ec14427c3f7e5dfbb63e5e6724e69f9.zip
Much work on distributed-memory BTL.
Diffstat (limited to 'btl/generic_bench')
-rw-r--r--btl/generic_bench/bench.hh112
-rw-r--r--btl/generic_bench/timers/distributed_perf_analyzer_node.hh78
-rw-r--r--btl/generic_bench/timers/distributed_perf_analyzer_root.hh94
-rw-r--r--btl/generic_bench/timers/portable_perf_analyzer.hh10
4 files changed, 194 insertions, 100 deletions
diff --git a/btl/generic_bench/bench.hh b/btl/generic_bench/bench.hh
index d9906a4..2a5ba36 100644
--- a/btl/generic_bench/bench.hh
+++ b/btl/generic_bench/bench.hh
@@ -29,21 +29,19 @@
#include <vector>
#include <string>
#include "timers/portable_perf_analyzer.hh"
+#include "timers/distributed_perf_analyzer_root.hh"
+#include "timers/distributed_perf_analyzer_node.hh"
// #include "timers/mixed_perf_analyzer.hh"
// #include "timers/x86_perf_analyzer.hh"
// #include "timers/STL_perf_analyzer.hh"
#ifdef HAVE_MKL
extern "C" void cblas_saxpy(const int, const float, const float*, const int, float *, const int);
#endif
-using namespace std;
template <template<class> class Perf_Analyzer, class Action>
BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silent = false )
{
- if (BtlConfig::skipAction(Action::name()))
- return;
-
- string filename="bench_"+Action::name()+".dat";
+ std::string filename = "bench_"+Action::name()+".dat";
if (!silent) { INFOS("starting " <<filename); }
@@ -55,116 +53,44 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silen
// matrices and vector size calculations
size_lin_log(nb_point,size_min,size_max,tab_sizes);
- std::vector<int> oldSizes;
- std::vector<double> oldFlops;
- bool hasOldResults = read_xy_file(filename, oldSizes, oldFlops, true);
- int oldi = oldSizes.size() - 1;
-
// loop on matrix size
Perf_Analyzer<Action> perf_action;
- for (int i=nb_point-1;i>=0;i--)
+ for (int i=nb_point-1; i>=0; i--)
{
- //INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")");
if (!silent)
std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush;
BTL_DISABLE_SSE_EXCEPTIONS();
- #ifdef HAVE_MKL
- {
- float dummy;
- cblas_saxpy(1,0,&dummy,1,&dummy,1);
- }
- #endif
tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i], silent);
- if (!silent) std::cout << tab_mflops[i];
-
- if (hasOldResults)
- {
- while (oldi>=0 && oldSizes[oldi]>tab_sizes[i])
- --oldi;
- if (oldi>=0 && oldSizes[oldi]==tab_sizes[i] && !silent)
- {
- if (oldFlops[oldi]<tab_mflops[i])
- std::cout << "\t > ";
- else
- std::cout << "\t < ";
- std::cout << oldFlops[oldi];
- }
- --oldi;
- }
if (!silent)
- std::cout << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
- }
-
- if (!BtlConfig::Instance.overwriteResults)
- {
- if (hasOldResults)
- {
- // merge the two data
- std::vector<int> newSizes;
- std::vector<double> newFlops;
- int i=0;
- int j=0;
- while (i<tab_sizes.size() && j<oldSizes.size())
- {
- if (tab_sizes[i] == oldSizes[j])
- {
- newSizes.push_back(tab_sizes[i]);
- newFlops.push_back(std::max(tab_mflops[i], oldFlops[j]));
- ++i;
- ++j;
- }
- else if (tab_sizes[i] < oldSizes[j])
- {
- newSizes.push_back(tab_sizes[i]);
- newFlops.push_back(tab_mflops[i]);
- ++i;
- }
- else
- {
- newSizes.push_back(oldSizes[j]);
- newFlops.push_back(oldFlops[j]);
- ++j;
- }
- }
- while (i<tab_sizes.size())
- {
- newSizes.push_back(tab_sizes[i]);
- newFlops.push_back(tab_mflops[i]);
- ++i;
- }
- while (j<oldSizes.size())
- {
- newSizes.push_back(oldSizes[j]);
- newFlops.push_back(oldFlops[j]);
- ++j;
- }
- tab_mflops = newFlops;
- tab_sizes = newSizes;
- }
+ std::cout << tab_mflops[i] << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
}
// dump the result in a file :
- if (!silent) dump_xy_file(tab_sizes,tab_mflops,filename);
+ if (!silent) dump_xy_file(tab_sizes, tab_mflops, filename);
}
// default Perf Analyzer
template <class Action>
-BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silent ){
-
- // if the rdtsc is not available :
+BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silent = false)
+{
bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point,silent);
- // if the rdtsc is available :
-// bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
-
+}
- // Only for small problem size. Otherwize it will be too long
-// bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
-// bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
+// distributed Perf Analyzer
+template <class Action>
+BTL_DONT_INLINE void distr_bench( int size_min, int size_max, int nb_point, bool silent = false)
+{
+ int myid, nproc;
+ blacs_pinfo_(&myid, &nproc);
+ if (myid)
+ bench<Distributed_Perf_Analyzer_Node, Action>(size_min, size_max, nb_point, silent);
+ else
+ bench<Distributed_Perf_Analyzer_Root, Action>(size_min, size_max, nb_point, silent);
}
#endif
diff --git a/btl/generic_bench/timers/distributed_perf_analyzer_node.hh b/btl/generic_bench/timers/distributed_perf_analyzer_node.hh
new file mode 100644
index 0000000..7399d30
--- /dev/null
+++ b/btl/generic_bench/timers/distributed_perf_analyzer_node.hh
@@ -0,0 +1,78 @@
+#ifndef _PORTABLE_PERF_ANALYZER_NODE_HH
+#define _PORTABLE_PERF_ANALYZER_NODE_HH
+
+#include "utilities.h"
+#include "timers/portable_timer.hh"
+#include "blacs.h"
+
+template <class Action>
+class Distributed_Perf_Analyzer_Node{
+public:
+ Distributed_Perf_Analyzer_Node( ):_nb_calc(0){
+ MESSAGE("Distributed_Perf_Analyzer_Node Ctor");
+ int temp, what = 0;
+ blacs_get_(&temp, &what, &context);
+ };
+ Distributed_Perf_Analyzer_Node( const Distributed_Perf_Analyzer_Node& ){
+ INFOS("Copy Ctor not implemented");
+ exit(0);
+ };
+ ~Distributed_Perf_Analyzer_Node(){
+ MESSAGE("Distributed_Perf_Analyzer_Node Dtor");
+ };
+
+ BTL_DONT_INLINE double eval_mflops(int size, bool silent = false)
+ {
+ Action action(size);
+
+ /* Find best _nb_calc_ */
+ int bcast_receive, iZERO = 0, iONE = 1;
+ igebr2d_(&context, "A", " ", &iONE, &iONE, &bcast_receive, &iONE, &iZERO, &iZERO);
+ while (bcast_receive > 0) {
+ _nb_calc = bcast_receive;
+ action.initialize();
+ time_calculate(action);
+ igebr2d_(&context, "A", " ", &iONE, &iONE, &bcast_receive, &iONE, &iZERO, &iZERO);
+ }
+ int tries = -bcast_receive;
+
+ /* Optimize */
+ for (int i = 1; i < tries; ++i) {
+ Action _action(size);
+ _action.initialize();
+ time_calculate(_action);
+ }
+
+ /* Check */
+ int do_check;
+ igebr2d_(&context, "A", " ", &iONE, &iONE, &do_check, &iONE, &iZERO, &iZERO);
+ if (do_check > 0) {
+ action.initialize();
+ action.calculate();
+ action.check_result();
+ }
+
+ /* Return a void value */
+ return 0.;
+ }
+
+ BTL_DONT_INLINE void time_calculate(Action & action)
+ {
+ // no need for time measurement
+ action.calculate();
+ for (int i = 0; i < _nb_calc; ++i)
+ action.calculate();
+ }
+
+ unsigned long long get_nb_calc()
+ {
+ return _nb_calc;
+ }
+
+
+private:
+ int context;
+ unsigned long long _nb_calc;
+};
+
+#endif //_PORTABLE_PERF_ANALYZER_NODE_HH
diff --git a/btl/generic_bench/timers/distributed_perf_analyzer_root.hh b/btl/generic_bench/timers/distributed_perf_analyzer_root.hh
new file mode 100644
index 0000000..ca59738
--- /dev/null
+++ b/btl/generic_bench/timers/distributed_perf_analyzer_root.hh
@@ -0,0 +1,94 @@
+#ifndef _PORTABLE_PERF_ANALYZER_ROOT_HH
+#define _PORTABLE_PERF_ANALYZER_ROOT_HH
+
+#include "utilities.h"
+#include "timers/portable_timer.hh"
+#include "blacs.h"
+
+template <class Action>
+class Distributed_Perf_Analyzer_Root{
+public:
+ Distributed_Perf_Analyzer_Root( ):_nb_calc(0), m_time_action(0), _chronos(){
+ MESSAGE("Distributed_Perf_Analyzer_Root Ctor");
+ int temp, what = 0;
+ blacs_get_(&temp, &what, &context);
+ };
+ Distributed_Perf_Analyzer_Root( const Distributed_Perf_Analyzer_Root & ){
+ INFOS("Copy Ctor not implemented");
+ exit(0);
+ };
+ ~Distributed_Perf_Analyzer_Root(){
+ MESSAGE("Distributed_Perf_Analyzer_Root Dtor");
+ };
+
+ BTL_DONT_INLINE double eval_mflops(int size, bool silent = false)
+ {
+ Action action(size);
+ m_time_action = 0;
+ _nb_calc = 0;
+
+ /* Find best _nb_calc_ */
+ int bcast_send = _nb_calc;
+ int iONE = 1;
+ while (m_time_action < MIN_TIME) {
+ _nb_calc = _nb_calc ? 2*_nb_calc : 1;
+ bcast_send = _nb_calc;
+ igebs2d_(&context, "A", " ", &iONE, &iONE, &bcast_send, &iONE);
+ action.initialize();
+ m_time_action = time_calculate(action);
+ }
+ int tries = BtlConfig::Instance.tries;
+ bcast_send = -tries;
+ igebs2d_(&context, "A", " ", &iONE, &iONE, &bcast_send, &iONE);
+
+ /* Optimize */
+ for (int i = 1; i < tries; ++i) {
+ Action _action(size);
+ if (!silent)
+ std::cout << " " << _action.nb_op_base()*_nb_calc/(m_time_action*1e6) << " ";
+ _action.initialize();
+ m_time_action = std::min(m_time_action, time_calculate(_action));
+ }
+ double time_action = m_time_action / (double(_nb_calc));
+
+ /* Check */
+ int do_check = (BtlConfig::Instance.checkResults && size<128) ? 1 : 0;
+ igebs2d_(&context, "A", " ", &iONE, &iONE, &do_check, &iONE);
+ if (do_check > 0) {
+ action.initialize();
+ action.calculate();
+ action.check_result();
+ }
+
+ return action.nb_op_base()/(time_action*1e6);
+ }
+
+ BTL_DONT_INLINE double time_calculate(Action & action)
+ {
+ // time measurement
+ action.calculate();
+ _chronos.start();
+ for (int ii=0; ii<_nb_calc; ii++)
+ {
+ action.calculate();
+ }
+ _chronos.stop();
+ return _chronos.user_time();
+ }
+
+ unsigned long long get_nb_calc()
+ {
+ return _nb_calc;
+ }
+
+
+private:
+ int context;
+ unsigned long long _nb_calc;
+ double m_time_action;
+ Portable_Timer _chronos;
+
+};
+
+#endif //_PORTABLE_PERF_ANALYZER_ROOT_HH
+
diff --git a/btl/generic_bench/timers/portable_perf_analyzer.hh b/btl/generic_bench/timers/portable_perf_analyzer.hh
index 161992f..a8c261f 100644
--- a/btl/generic_bench/timers/portable_perf_analyzer.hh
+++ b/btl/generic_bench/timers/portable_perf_analyzer.hh
@@ -42,12 +42,8 @@ public:
{
Action action(size);
-// action.initialize();
-// time_action = time_calculate(action);
- while (m_time_action < MIN_TIME)
- {
- if(_nb_calc==0) _nb_calc = 1;
- else _nb_calc *= 2;
+ while (m_time_action < MIN_TIME) {
+ _nb_calc = _nb_calc ? 2*_nb_calc : 1;
action.initialize();
m_time_action = time_calculate(action);
}
@@ -79,7 +75,7 @@ public:
// time measurement
action.calculate();
_chronos.start();
- for (int ii=0;ii<_nb_calc;ii++)
+ for (int ii=0; ii<_nb_calc; ii++)
{
action.calculate();
}