diff options
author | spiros <andyspiros@gmail.com> | 2011-07-18 02:36:22 +0200 |
---|---|---|
committer | spiros <andyspiros@gmail.com> | 2011-07-18 02:36:22 +0200 |
commit | 63a0b1731ec14427c3f7e5dfbb63e5e6724e69f9 (patch) | |
tree | b233a6e387be3cfcfa3261d62d0c67358aa044ee /btl/generic_bench | |
parent | Initial work on distributed-memory benchmarks. (diff) | |
download | auto-numerical-bench-63a0b1731ec14427c3f7e5dfbb63e5e6724e69f9.tar.gz auto-numerical-bench-63a0b1731ec14427c3f7e5dfbb63e5e6724e69f9.tar.bz2 auto-numerical-bench-63a0b1731ec14427c3f7e5dfbb63e5e6724e69f9.zip |
Much work on distributed-memory BTL.
Diffstat (limited to 'btl/generic_bench')
-rw-r--r-- | btl/generic_bench/bench.hh | 112 | ||||
-rw-r--r-- | btl/generic_bench/timers/distributed_perf_analyzer_node.hh | 78 | ||||
-rw-r--r-- | btl/generic_bench/timers/distributed_perf_analyzer_root.hh | 94 | ||||
-rw-r--r-- | btl/generic_bench/timers/portable_perf_analyzer.hh | 10 |
4 files changed, 194 insertions, 100 deletions
diff --git a/btl/generic_bench/bench.hh b/btl/generic_bench/bench.hh index d9906a4..2a5ba36 100644 --- a/btl/generic_bench/bench.hh +++ b/btl/generic_bench/bench.hh @@ -29,21 +29,19 @@ #include <vector> #include <string> #include "timers/portable_perf_analyzer.hh" +#include "timers/distributed_perf_analyzer_root.hh" +#include "timers/distributed_perf_analyzer_node.hh" // #include "timers/mixed_perf_analyzer.hh" // #include "timers/x86_perf_analyzer.hh" // #include "timers/STL_perf_analyzer.hh" #ifdef HAVE_MKL extern "C" void cblas_saxpy(const int, const float, const float*, const int, float *, const int); #endif -using namespace std; template <template<class> class Perf_Analyzer, class Action> BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silent = false ) { - if (BtlConfig::skipAction(Action::name())) - return; - - string filename="bench_"+Action::name()+".dat"; + std::string filename = "bench_"+Action::name()+".dat"; if (!silent) { INFOS("starting " <<filename); } @@ -55,116 +53,44 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silen // matrices and vector size calculations size_lin_log(nb_point,size_min,size_max,tab_sizes); - std::vector<int> oldSizes; - std::vector<double> oldFlops; - bool hasOldResults = read_xy_file(filename, oldSizes, oldFlops, true); - int oldi = oldSizes.size() - 1; - // loop on matrix size Perf_Analyzer<Action> perf_action; - for (int i=nb_point-1;i>=0;i--) + for (int i=nb_point-1; i>=0; i--) { - //INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")"); if (!silent) std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush; BTL_DISABLE_SSE_EXCEPTIONS(); - #ifdef HAVE_MKL - { - float dummy; - cblas_saxpy(1,0,&dummy,1,&dummy,1); - } - #endif tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i], silent); - if (!silent) std::cout << tab_mflops[i]; - - if (hasOldResults) - { - while (oldi>=0 && oldSizes[oldi]>tab_sizes[i]) - --oldi; - if (oldi>=0 && oldSizes[oldi]==tab_sizes[i] && !silent) - { - if (oldFlops[oldi]<tab_mflops[i]) - std::cout << "\t > "; - else - std::cout << "\t < "; - std::cout << oldFlops[oldi]; - } - --oldi; - } if (!silent) - std::cout << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl; - } - - if (!BtlConfig::Instance.overwriteResults) - { - if (hasOldResults) - { - // merge the two data - std::vector<int> newSizes; - std::vector<double> newFlops; - int i=0; - int j=0; - while (i<tab_sizes.size() && j<oldSizes.size()) - { - if (tab_sizes[i] == oldSizes[j]) - { - newSizes.push_back(tab_sizes[i]); - newFlops.push_back(std::max(tab_mflops[i], oldFlops[j])); - ++i; - ++j; - } - else if (tab_sizes[i] < oldSizes[j]) - { - newSizes.push_back(tab_sizes[i]); - newFlops.push_back(tab_mflops[i]); - ++i; - } - else - { - newSizes.push_back(oldSizes[j]); - newFlops.push_back(oldFlops[j]); - ++j; - } - } - while (i<tab_sizes.size()) - { - newSizes.push_back(tab_sizes[i]); - newFlops.push_back(tab_mflops[i]); - ++i; - } - while (j<oldSizes.size()) - { - newSizes.push_back(oldSizes[j]); - newFlops.push_back(oldFlops[j]); - ++j; - } - tab_mflops = newFlops; - tab_sizes = newSizes; - } + std::cout << tab_mflops[i] << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl; } // dump the result in a file : - if (!silent) dump_xy_file(tab_sizes,tab_mflops,filename); + if (!silent) dump_xy_file(tab_sizes, tab_mflops, filename); } // default Perf Analyzer template <class Action> -BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silent ){ - - // if the rdtsc is not available : +BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silent = false) +{ bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point,silent); - // if the rdtsc is available : -// bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point); - +} - // Only for small problem size. Otherwize it will be too long -// bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point); -// bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point); +// distributed Perf Analyzer +template <class Action> +BTL_DONT_INLINE void distr_bench( int size_min, int size_max, int nb_point, bool silent = false) +{ + int myid, nproc; + blacs_pinfo_(&myid, &nproc); + if (myid) + bench<Distributed_Perf_Analyzer_Node, Action>(size_min, size_max, nb_point, silent); + else + bench<Distributed_Perf_Analyzer_Root, Action>(size_min, size_max, nb_point, silent); } #endif diff --git a/btl/generic_bench/timers/distributed_perf_analyzer_node.hh b/btl/generic_bench/timers/distributed_perf_analyzer_node.hh new file mode 100644 index 0000000..7399d30 --- /dev/null +++ b/btl/generic_bench/timers/distributed_perf_analyzer_node.hh @@ -0,0 +1,78 @@ +#ifndef _PORTABLE_PERF_ANALYZER_NODE_HH +#define _PORTABLE_PERF_ANALYZER_NODE_HH + +#include "utilities.h" +#include "timers/portable_timer.hh" +#include "blacs.h" + +template <class Action> +class Distributed_Perf_Analyzer_Node{ +public: + Distributed_Perf_Analyzer_Node( ):_nb_calc(0){ + MESSAGE("Distributed_Perf_Analyzer_Node Ctor"); + int temp, what = 0; + blacs_get_(&temp, &what, &context); + }; + Distributed_Perf_Analyzer_Node( const Distributed_Perf_Analyzer_Node& ){ + INFOS("Copy Ctor not implemented"); + exit(0); + }; + ~Distributed_Perf_Analyzer_Node(){ + MESSAGE("Distributed_Perf_Analyzer_Node Dtor"); + }; + + BTL_DONT_INLINE double eval_mflops(int size, bool silent = false) + { + Action action(size); + + /* Find best _nb_calc_ */ + int bcast_receive, iZERO = 0, iONE = 1; + igebr2d_(&context, "A", " ", &iONE, &iONE, &bcast_receive, &iONE, &iZERO, &iZERO); + while (bcast_receive > 0) { + _nb_calc = bcast_receive; + action.initialize(); + time_calculate(action); + igebr2d_(&context, "A", " ", &iONE, &iONE, &bcast_receive, &iONE, &iZERO, &iZERO); + } + int tries = -bcast_receive; + + /* Optimize */ + for (int i = 1; i < tries; ++i) { + Action _action(size); + _action.initialize(); + time_calculate(_action); + } + + /* Check */ + int do_check; + igebr2d_(&context, "A", " ", &iONE, &iONE, &do_check, &iONE, &iZERO, &iZERO); + if (do_check > 0) { + action.initialize(); + action.calculate(); + action.check_result(); + } + + /* Return a void value */ + return 0.; + } + + BTL_DONT_INLINE void time_calculate(Action & action) + { + // no need for time measurement + action.calculate(); + for (int i = 0; i < _nb_calc; ++i) + action.calculate(); + } + + unsigned long long get_nb_calc() + { + return _nb_calc; + } + + +private: + int context; + unsigned long long _nb_calc; +}; + +#endif //_PORTABLE_PERF_ANALYZER_NODE_HH diff --git a/btl/generic_bench/timers/distributed_perf_analyzer_root.hh b/btl/generic_bench/timers/distributed_perf_analyzer_root.hh new file mode 100644 index 0000000..ca59738 --- /dev/null +++ b/btl/generic_bench/timers/distributed_perf_analyzer_root.hh @@ -0,0 +1,94 @@ +#ifndef _PORTABLE_PERF_ANALYZER_ROOT_HH +#define _PORTABLE_PERF_ANALYZER_ROOT_HH + +#include "utilities.h" +#include "timers/portable_timer.hh" +#include "blacs.h" + +template <class Action> +class Distributed_Perf_Analyzer_Root{ +public: + Distributed_Perf_Analyzer_Root( ):_nb_calc(0), m_time_action(0), _chronos(){ + MESSAGE("Distributed_Perf_Analyzer_Root Ctor"); + int temp, what = 0; + blacs_get_(&temp, &what, &context); + }; + Distributed_Perf_Analyzer_Root( const Distributed_Perf_Analyzer_Root & ){ + INFOS("Copy Ctor not implemented"); + exit(0); + }; + ~Distributed_Perf_Analyzer_Root(){ + MESSAGE("Distributed_Perf_Analyzer_Root Dtor"); + }; + + BTL_DONT_INLINE double eval_mflops(int size, bool silent = false) + { + Action action(size); + m_time_action = 0; + _nb_calc = 0; + + /* Find best _nb_calc_ */ + int bcast_send = _nb_calc; + int iONE = 1; + while (m_time_action < MIN_TIME) { + _nb_calc = _nb_calc ? 2*_nb_calc : 1; + bcast_send = _nb_calc; + igebs2d_(&context, "A", " ", &iONE, &iONE, &bcast_send, &iONE); + action.initialize(); + m_time_action = time_calculate(action); + } + int tries = BtlConfig::Instance.tries; + bcast_send = -tries; + igebs2d_(&context, "A", " ", &iONE, &iONE, &bcast_send, &iONE); + + /* Optimize */ + for (int i = 1; i < tries; ++i) { + Action _action(size); + if (!silent) + std::cout << " " << _action.nb_op_base()*_nb_calc/(m_time_action*1e6) << " "; + _action.initialize(); + m_time_action = std::min(m_time_action, time_calculate(_action)); + } + double time_action = m_time_action / (double(_nb_calc)); + + /* Check */ + int do_check = (BtlConfig::Instance.checkResults && size<128) ? 1 : 0; + igebs2d_(&context, "A", " ", &iONE, &iONE, &do_check, &iONE); + if (do_check > 0) { + action.initialize(); + action.calculate(); + action.check_result(); + } + + return action.nb_op_base()/(time_action*1e6); + } + + BTL_DONT_INLINE double time_calculate(Action & action) + { + // time measurement + action.calculate(); + _chronos.start(); + for (int ii=0; ii<_nb_calc; ii++) + { + action.calculate(); + } + _chronos.stop(); + return _chronos.user_time(); + } + + unsigned long long get_nb_calc() + { + return _nb_calc; + } + + +private: + int context; + unsigned long long _nb_calc; + double m_time_action; + Portable_Timer _chronos; + +}; + +#endif //_PORTABLE_PERF_ANALYZER_ROOT_HH + diff --git a/btl/generic_bench/timers/portable_perf_analyzer.hh b/btl/generic_bench/timers/portable_perf_analyzer.hh index 161992f..a8c261f 100644 --- a/btl/generic_bench/timers/portable_perf_analyzer.hh +++ b/btl/generic_bench/timers/portable_perf_analyzer.hh @@ -42,12 +42,8 @@ public: { Action action(size); -// action.initialize(); -// time_action = time_calculate(action); - while (m_time_action < MIN_TIME) - { - if(_nb_calc==0) _nb_calc = 1; - else _nb_calc *= 2; + while (m_time_action < MIN_TIME) { + _nb_calc = _nb_calc ? 2*_nb_calc : 1; action.initialize(); m_time_action = time_calculate(action); } @@ -79,7 +75,7 @@ public: // time measurement action.calculate(); _chronos.start(); - for (int ii=0;ii<_nb_calc;ii++) + for (int ii=0; ii<_nb_calc; ii++) { action.calculate(); } |