Grid 0.7.0
PerfCount.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/PerfCount.h
6
7 Copyright (C) 2015
8
9Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
10Author: Peter Boyle <peterboyle@MacBook-Pro.local>
11Author: paboyle <paboyle@ph.ed.ac.uk>
12
13 This program is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2 of the License, or
16 (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License along
24 with this program; if not, write to the Free Software Foundation, Inc.,
25 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26
27 See the full license in the file "LICENSE" in the top level distribution directory
28*************************************************************************************/
29/* END LEGAL */
30#ifndef GRID_PERFCOUNT_H
31#define GRID_PERFCOUNT_H
32
33
34#ifndef __SSC_START
35#define __SSC_START
36#define __SSC_STOP
37#endif
38
39#include <sys/time.h>
40#include <ctime>
41#include <chrono>
42#include <string.h>
43#include <unistd.h>
44#include <sys/ioctl.h>
45
46#ifdef __linux__
47#include <syscall.h>
48#include <linux/perf_event.h>
49#else
50#include <sys/syscall.h>
51#endif
52#ifdef __x86_64__
53#ifdef GRID_CUDA
54accelerator_inline uint64_t __rdtsc(void) { return 0; }
55accelerator_inline uint64_t __rdpmc(int ) { return 0; }
56#else
57#include <x86intrin.h>
58#endif
59#endif
60
62
63#ifdef __linux__
64static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
65 int cpu, int group_fd, unsigned long flags)
66{
67 int ret=0;
68
69 ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
70 group_fd, flags);
71 return ret;
72}
73#endif
74
75#ifdef TIMERS_OFF
76
77
78inline uint64_t cyclecount(void){
79 return 0;
80}
81
82#else
83
84/*
85 * cycle counters arch dependent
86 */
87
88#ifdef __bgq__
89inline uint64_t cyclecount(void){
90 uint64_t tmp;
91 asm volatile ("mfspr %0,0x10C" : "=&r" (tmp) );
92 return tmp;
93}
94#elif defined __x86_64__
95inline uint64_t cyclecount(void){
96 uint64_t ret = __rdtsc();
97 return (uint64_t)ret;
98}
99#else
100
101inline uint64_t cyclecount(void){
102 return 0;
103}
104
105#endif
106
107#endif
108
110private:
111
112 typedef struct {
113 uint32_t type;
114 uint64_t config;
115 const char *name;
118
120
121public:
122
131
132public:
133
134 int PCT;
135
136 long long count;
137 long long cycles;
138 int fd;
140 unsigned long long elapsed;
141 uint64_t begin;
142
143 static int NumTypes(void){
145 }
146
148#ifdef __linux__
149 assert(_pct>=0);
151 fd=-1;
152 cyclefd=-1;
153 count=0;
154 cycles=0;
155 PCT =_pct;
156 Open();
157#endif
158 }
159 void Open(void)
160 {
161#ifdef __linux__
162 struct perf_event_attr pe;
163 memset(&pe, 0, sizeof(struct perf_event_attr));
164 pe.size = sizeof(struct perf_event_attr);
165
166 pe.disabled = 1;
167 pe.exclude_kernel = 1;
168 pe.exclude_hv = 1;
169 pe.inherit = 1;
170
171 pe.type = PerformanceCounterConfigs[PCT].type;
172 pe.config= PerformanceCounterConfigs[PCT].config;
173 const char * name = PerformanceCounterConfigs[PCT].name;
174 fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
175 if (fd == -1) {
176 fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
177 perror("Error is");
178 }
179 int norm = PerformanceCounterConfigs[PCT].normalisation;
180 pe.type = PerformanceCounterConfigs[norm].type;
181 pe.config= PerformanceCounterConfigs[norm].config;
182 name = PerformanceCounterConfigs[norm].name;
183 cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
184 if (cyclefd == -1) {
185 fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
186 perror("Error is");
187 }
188#endif
189 }
190
191 void Start(void)
192 {
193#ifdef __linux__
194 if ( fd!= -1) {
195 ::ioctl(fd, PERF_EVENT_IOC_RESET, 0);
196 ::ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
197 ::ioctl(cyclefd, PERF_EVENT_IOC_RESET, 0);
198 ::ioctl(cyclefd, PERF_EVENT_IOC_ENABLE, 0);
199 }
200 begin =cyclecount();
201#else
202 begin = 0;
203#endif
204 }
205
206 void Stop(void) {
207 count=0;
208 cycles=0;
209#ifdef __linux__
210 ssize_t ign;
211 if ( fd!= -1) {
212 ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
213 ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
214 ign=::read(fd, &count, sizeof(long long));
215 ign+=::read(cyclefd, &cycles, sizeof(long long));
216 assert(ign==2*sizeof(long long));
217 }
219#else
220 elapsed = 0;
221#endif
222
223 }
224 void Report(void) {
225#ifdef __linux__
226 int N = PerformanceCounterConfigs[PCT].normalisation;
227 const char * sn = PerformanceCounterConfigs[N].name ;
228 const char * sc = PerformanceCounterConfigs[PCT].name;
229 std::printf("tsc = %llu %s = %llu %s = %20llu\n (%s/%s) rate = %lf\n", elapsed,sn ,cycles,
230 sc, count, sc,sn, (double)count/(double)cycles);
231#else
232 std::printf("%llu cycles \n", elapsed );
233#endif
234 }
235
237 {
238#ifdef __linux__
239 ::close(fd); ::close(cyclefd);
240#endif
241 }
242
243};
244
246
247#endif
#define accelerator_inline
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
uint64_t cyclecount(void)
Definition PerfCount.h:101
PerformanceCounter(int _pct)
Definition PerfCount.h:147
static const PerformanceCounterConfig PerformanceCounterConfigs[]
Definition PerfCount.h:39
static int NumTypes(void)
Definition PerfCount.h:143
unsigned long long elapsed
Definition PerfCount.h:140
void Start(void)
Definition PerfCount.h:191
void Open(void)
Definition PerfCount.h:159
void Stop(void)
Definition PerfCount.h:206
void Report(void)
Definition PerfCount.h:224