casacore
PrecTimer.h
Go to the documentation of this file.
1 //# PrecTimer.h: Precision timer to measure elapsed times in a cumulative way
2 //# Copyright (C) 2006
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id$
27 
28 #ifndef CASA_PRECTIMER_H
29 #define CASA_PRECTIMER_H
30 
31 
32 #include <casacore/casa/aips.h>
33 #include <cstdlib>
34 #include <iostream>
35 
36 #if defined __ia64__ && defined __INTEL_COMPILER
37 #include <ia64regs.h>
38 #endif
39 
40 
41 namespace casacore { //# NAMESPACE CASACORE - BEGIN
42 
43 // Forward Declaration.
44 class String;
45 
46 
47 // <summary>
48 // Precision timer to measure elapsed times in a cumulative way
49 // </summary>
50 
51 // <use visibility=export>
52 
53 // <reviewed reviewer="" date="" tests="tPrecTimer" demos="">
54 // </reviewed>
55 
56 // <synopsis>
57 // The PrecTimer supplements the <linkto class=Timer>Timer</linkto> class.
58 // If offers a low-overhead and high-resolution interval timer for use
59 // on i386, x86_64, ia64, and powerpc platforms, using the processor's
60 // timestamp counter that is incremented each cycle.
61 // Put timer.start() and timer.stop() calls around the piece of
62 // code to be timed. Because the timer is cumulative, the total time of
63 // a particular piece of code can be timed.
64 // <note role=caution>
65 // Make sure that start() and stop() calls alternate,
66 // otherwise very strange times will be the result.
67 // </note>
68 //
69 // A timer can be started and stopped multiple times; both the average and
70 // total time, as well as the number of iterations are printed.
71 // The measured time is real time (as opposed to user or system time).
72 // The timer can be used to measure from 10 nanosecond to a century interval.
73 //
74 // Multiple timers can be used in a nested way as long as each of them
75 // has independent (matching) start and stop calls.
76 //
77 // The class is more or less a copy of the original written by John Romein
78 // at ASTRON, Dwingeloo, the Netherlands.
79 // </synopsis>
80 
81 // <example>
82 // Here's how to create a timer, start it (the 'mark' member function)
83 // and display a breakdown.
84 // <srcblock>
85 // PrecTimer ttimer; // the timer is reset at construction time
86 // PrecTimer ctimer;
87 // ttimer.reset(); // if you want to reset the timer (not needed here)
88 // ttimer.start(); // start the total timer
89 // for (int i=0; i<n; ++i) {
90 // ... do something ...
91 // ctimer.start(); // start the calc timer
92 // ...do some calculation which will be timed...
93 // ctimer.stop(); // and stop it
94 // }
95 // ttimer.stop();
96 // ttimer.show (cout, "Total ");
97 // ctimer.show (cout, "Calculations");
98 // </srcblock>
99 // </example>
100 
101  class PrecTimer {
102  public:
103  // Construct.
104  PrecTimer();
105 
106  // Destruct.
107  ~PrecTimer();
108 
109  // Restart the timer.
110  void start();
111  // Stop the timer
112  void stop();
113 
114  // Reset the timer to zero.
115  void reset();
116 
117  // Show real time on cout or a user supplied stream.
118  // <group>
119  void show() const;
120  void show (std::ostream& os) const;
121  // </group>
122 
123  // Show real time on cout or a user supplied
124  // stream preceeded by the string parameter.
125  // <group>
126  void show (const String&) const;
127  void show (std::ostream& os, const String& prefix) const;
128  // </group>
129 
130  // Get the real time (in seconds).
131  double getReal() const;
132 
133  // Get the total number of times start/stop is done.
134  unsigned long long getCount() const;
135 
136  private:
137  void print_time (std::ostream&, double time) const;
138 
139  struct TimeStruct {
140 #if defined __PPC__
142 #else
144 #endif
145  };
146  union Union1 {
147  long long total_time;
149  };
150 
151 #if defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
152  struct CountStruct {
153  int count_low, count_high;
154  };
155  union Union2 {
156  unsigned long long count;
157  CountStruct s2;
158  };
159 #else
160  struct Union2 {
161  unsigned long long count;
162  };
163 #endif
164 
167 
168  static double CPU_speed_in_MHz;
169  static double get_CPU_speed_in_MHz();
170  };
171 
172 
173 
174  inline void PrecTimer::reset()
175  {
176  u1.total_time = 0;
177  u2.count = 0;
178  }
179 
180  inline unsigned long long PrecTimer::getCount() const
181  {
182  return u2.count;
183  }
184 
186  {
187  reset();
188  }
189 
191  {}
192 
193 
194  inline void PrecTimer::start()
195  {
196 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP
197  asm volatile
198  (
199  "rdtsc\n\t"
200  "shlq $32,%%rdx\n\t"
201  "leaq (%%rax,%%rdx),%%rax\n\t"
202  "lock;subq %%rax,%0"
203  :
204  "+m" (u1.total_time)
205  :
206  :
207  "rax", "rdx"
208  );
209 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
210  asm volatile
211  (
212  "rdtsc\n\t"
213  "lock;subl %%eax,%0\n\t"
214  "lock;sbbl %%edx,%1"
215  :
216  "+m" (u1.s1.total_time_low), "+m" (u1.s1total_time_high)
217  :
218  :
219  "eax", "edx"
220  );
221 #elif (defined __i386__ || defined __x86_64__) && (defined __PATHSCALE__ || (defined __APPLE__ && defined __APPLE_CC__ && __APPLE_CC__ == 5531))
222  unsigned eax, edx;
223 
224  asm volatile ("rdtsc" : "=a" (eax), "=d" (edx));
225 
226  u1.total_time -= ((unsigned long long) edx << 32) + eax;
227 #elif (defined __i386__ || defined __x86_64__) && (defined __GNUC__ || defined __INTEL_COMPILER)
228  asm volatile
229  (
230  "rdtsc\n\t"
231  "subl %%eax, %0\n\t"
232  "sbbl %%edx, %1"
233  :
234  "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high)
235  :
236  :
237  "eax", "edx"
238  );
239 #elif defined __ia64__ && defined __INTEL_COMPILER
240  u1.total_time -= __getReg(_IA64_REG_AR_ITC);
241 #elif defined __ia64__ && defined __GNUC__
242  long long time;
243  asm volatile ("mov %0=ar.itc" : "=r" (time));
244  u1.total_time -= time;
245 #elif defined __PPC__ && (defined __GNUC__ || defined __xlC__)
246  int high, low, retry;
247 
248  asm
249  (
250  "0:\n\t"
251  "mftbu %0\n\t"
252  "mftb %1\n\t"
253  "mftbu %2\n\t"
254  "cmpw %2,%0\n\t"
255  "bne 0b\n\t"
256  "subfc %3,%1,%3\n\t"
257  "subfe %4,%0,%4"
258  :
259  "=r" (high), "=r" (low), "=r" (retry),
260  "=r" (u1.s1.total_time_low), "=r" (u1.s1.total_time_high)
261  :
262  "3" (u1.s1.total_time_low), "4" (u1.s1.total_time_high)
263  );
264 #endif
265  }
266 
267 
268  inline void PrecTimer::stop()
269  {
270 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP
271  asm volatile
272  (
273  "rdtsc\n\t"
274  "shlq $32,%%rdx\n\t"
275  "leaq (%%rax,%%rdx),%%rax\n\t"
276  "lock;addq %%rax,%0"
277  :
278  "+m" (u1.total_time)
279  :
280  :
281  "rax", "rdx"
282  );
283 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
284  asm volatile
285  (
286  "rdtsc\n\t"
287  "lock;addl %%eax, %0\n\t"
288  "lock;adcl %%edx, %1"
289  :
290  "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high)
291  :
292  :
293  "eax", "edx"
294  );
295 #elif (defined __i386__ || defined __x86_64__) && (defined __PATHSCALE__ || (defined __APPLE__ && defined __APPLE_CC__ && __APPLE_CC__ == 5531))
296  unsigned eax, edx;
297 
298  asm volatile ("rdtsc\n\t" : "=a" (eax), "=d" (edx));
299  u1.total_time += ((unsigned long long) edx << 32) + eax;
300 #elif (defined __i386__ || defined __x86_64__) && (defined __GNUC__ || defined __INTEL_COMPILER)
301  asm volatile
302  (
303  "rdtsc\n\t"
304  "addl %%eax, %0\n\t"
305  "adcl %%edx, %1"
306  :
307  "+m" (u1.s1.total_time_low), "+m" (u1.s1.total_time_high)
308  :
309  :
310  "eax", "edx"
311  );
312 #elif defined __ia64__ && defined __INTEL_COMPILER
313  u1.total_time += __getReg(_IA64_REG_AR_ITC);
314 #elif defined __ia64__ && defined __GNUC__
315  long long time;
316  asm volatile ("mov %0=ar.itc" : "=r" (time));
317  u1.total_time += time;
318 #elif defined __PPC__ && (defined __GNUC__ || defined __xlC__)
319  int high, low, retry;
320 
321  asm
322  (
323  "0:\n\t"
324  "mftbu %0\n\t"
325  "mftb %1\n\t"
326  "mftbu %2\n\t"
327  "cmpw %2,%0\n\t"
328  "bne 0b\n\t"
329  "addc %3,%3,%1\n\t"
330  "adde %4,%4,%0"
331  :
332  "=r" (high), "=r" (low), "=r" (retry),
333  "=r" (u1.s1.total_time_low), "=r" (u1.s1.total_time_high)
334  :
335  "3" (u1.s1.total_time_low), "4" (u1.s1.total_time_high)
336  );
337 #endif
338 
339 #if defined __x86_64__ && defined __INTEL_COMPILER && defined _OPENMP
340  asm volatile ("lock;addq $1,%0" : "+m" (u2.count));
341 #elif defined __i386__ && defined __INTEL_COMPILER && defined _OPENMP
342  asm volatile
343  (
344  "lock;addl $1,%0\n\t"
345  "lock;adcl $0,%1"
346  :
347  "+m" (u2.s2.count_low), "+m" (u2.s2.count_high)
348  );
349 #else
350  ++u2.count;
351 #endif
352  }
353 
354 } //# NAMESPACE CASACORE - END
355 
356 
357 #endif
static double CPU_speed_in_MHz
Definition: PrecTimer.h:168
void show(std::ostream &os, const String &prefix) const
void reset()
Reset the timer to zero.
Definition: PrecTimer.h:174
void start()
Restart the timer.
Definition: PrecTimer.h:194
void show(std::ostream &os) const
~PrecTimer()
Destruct.
Definition: PrecTimer.h:190
unsigned long long getCount() const
Get the total number of times start/stop is done.
Definition: PrecTimer.h:180
static double get_CPU_speed_in_MHz()
PrecTimer()
Construct.
Definition: PrecTimer.h:185
void show(const String &) const
Show real time on cout or a user supplied stream preceeded by the string parameter.
void print_time(std::ostream &, double time) const
double getReal() const
Get the real time (in seconds).
void stop()
Stop the timer.
Definition: PrecTimer.h:268
void show() const
Show real time on cout or a user supplied stream.
String: the storage and methods of handling collections of characters.
Definition: String.h:225
this file contains all the compiler specific defines
Definition: mainpage.dox:28
TableExprNode time(const TableExprNode &node)
Definition: ExprNode.h:1580
unsigned long long count
Definition: PrecTimer.h:161