Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
HostScaling.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include <string>
43 #include <iostream>
44 #include <cstdlib>
45 
46 #include "Kokkos_Core.hpp"
47 
48 #include "Teuchos_CommandLineProcessor.hpp"
49 #include "Teuchos_StandardCatchMacros.hpp"
50 
51 #include "TestStochastic.hpp"
52 
54 
55 // Algorithms
57 const int num_sg_alg = 2;
59 const char *sg_alg_names[] = { "Original Matrix-Free", "Product CRS" };
60 
61 std::vector<double>
62 run_test(const size_t num_cpu, const size_t num_core_per_cpu,
63  const size_t num_threads_per_core,
64  const size_t p, const size_t d, const size_t nGrid, const size_t nIter,
65  const bool symmetric, SG_Alg sg_alg,
66  const std::vector<double>& perf1 = std::vector<double>())
67 {
68  typedef double Scalar;
69  typedef Kokkos::Threads Device;
70  const size_t team_count = num_cpu * num_core_per_cpu;
71  const size_t threads_per_team = num_threads_per_core;
72  Kokkos::Threads::initialize( team_count * threads_per_team );
73 
74  std::vector<int> var_degree( d , p );
75 
76  std::vector<double> perf;
77  if (sg_alg == PROD_CRS)
78  perf =
79  unit_test::test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(var_degree , nGrid , nIter , symmetric );
80  else if (sg_alg == ORIG_MAT_FREE)
81  perf =
82  unit_test::test_original_matrix_free_vec<Scalar,Device,Stokhos::DefaultMultiply>(
83  var_degree , nGrid , nIter , true , symmetric );
84 
85  Kokkos::Threads::finalize();
86 
87  double speed_up;
88  if (perf1.size() > 0)
89  speed_up = perf1[1] / perf[1];
90  else
91  speed_up = perf[1] / perf[1];
92  double efficiency = speed_up / team_count;
93 
94  std::cout << team_count << " , "
95  << nGrid << " , "
96  << d << " , "
97  << p << " , "
98  << perf[1] << " , "
99  << perf[2] << " , "
100  << speed_up << " , "
101  << 100.0 * efficiency << " , "
102  << std::endl;
103 
104  return perf;
105 }
106 
107 int main(int argc, char *argv[])
108 {
109  bool success = true;
110 
111  try {
112  // Setup command line options
113  Teuchos::CommandLineProcessor CLP;
114  int p = 3;
115  CLP.setOption("p", &p, "Polynomial order");
116  int d = 4;
117  CLP.setOption("d", &d, "Stochastic dimension");
118  int nGrid = 64;
119  CLP.setOption("n", &nGrid, "Number of spatial grid points in each dimension");
120  int nIter = 1;
121  CLP.setOption("niter", &nIter, "Number of iterations");
122  int n_thread_per_core = 1;
123  CLP.setOption("nthread", &n_thread_per_core, "Number of threads per core to use");
124  int n_hyperthreads = 2;
125  CLP.setOption("nht", &n_hyperthreads, "Number of hyperthreads per core available");
126  SG_Alg sg_alg = PROD_CRS;
127  CLP.setOption("alg", &sg_alg, num_sg_alg, sg_alg_values, sg_alg_names,
128  "SG Mat-Vec Algorithm");
129  bool symmetric = true;
130  CLP.setOption("symmetric", "asymmetric", &symmetric, "Use symmetric PDF");
131  CLP.parse( argc, argv );
132 
133  // Detect number of CPUs and number of cores
134  const size_t num_cpu = Kokkos::hwloc::get_available_numa_count();
135  const size_t num_core_per_cpu = Kokkos::hwloc::get_available_cores_per_numa();
136  const size_t core_capacity = Kokkos::hwloc::get_available_threads_per_core();
137  if (static_cast<size_t>(n_thread_per_core) > core_capacity )
138  n_thread_per_core = core_capacity;
139 
140  // Print header
141  std::cout << std::endl
142  << "\"#nCore\" , "
143  << "\"#nGrid\" , "
144  << "\"#Variable\" , "
145  << "\"PolyDegree\" , "
146  << "\"" << sg_alg_names[sg_alg] << " MXV Time\" , "
147  << "\"" << sg_alg_names[sg_alg] << " MXV GFLOPS\" , "
148  << "\"" << sg_alg_names[sg_alg] << " MXV Speedup\" , "
149  << "\"" << sg_alg_names[sg_alg] << " MXV Efficiency\" , "
150  << std::endl ;
151 
152  // Do a serial run to base speedup & efficiency from
153  const std::vector<double> perf1 =
154  run_test(1, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg);
155 
156  // First do 1 core per cpu
157  for (size_t n=2; n<=num_cpu; ++n) {
158  const std::vector<double> perf =
159  run_test(n, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
160  }
161 
162  // Now do all cpus, increasing number of cores
163  for (size_t n=2; n<=num_core_per_cpu; ++n) {
164  const std::vector<double> perf =
165  run_test(num_cpu, n, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
166  }
167 
168  // Now do all cpus, all cores, with nthreads/core
169  const std::vector<double> perf =
170  run_test(num_cpu, num_core_per_cpu, n_thread_per_core, p, d, nGrid,
171  nIter, symmetric, sg_alg, perf1);
172 
173 
174  }
175  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
176 
177  if (!success)
178  return -1;
179  return 0 ;
180 }
const int num_sg_alg
Definition: HostScaling.cpp:57
SG_Alg
Definition: HostScaling.cpp:56
const SG_Alg sg_alg_values[]
Definition: HostScaling.cpp:58
std::vector< double > run_test(const size_t num_cpu, const size_t num_core_per_cpu, const size_t num_threads_per_core, const size_t p, const size_t d, const size_t nGrid, const size_t nIter, const bool symmetric, SG_Alg sg_alg, const std::vector< double > &perf1=std::vector< double >())
Definition: HostScaling.cpp:62
pce_type Scalar
const char * sg_alg_names[]
Definition: HostScaling.cpp:59
int main(int argc, char *argv[])