Grid 0.7.0
MemoryManager.cc
Go to the documentation of this file.
1#include <Grid/GridCore.h>
2
4
5/*Allocation types, saying which pointer cache should be used*/
6#define Cpu (0)
7#define CpuHuge (1)
8#define CpuSmall (2)
9#define Acc (3)
10#define AccHuge (4)
11#define AccSmall (5)
12#define Shared (6)
13#define SharedHuge (7)
14#define SharedSmall (8)
15#undef GRID_MM_VERBOSE
16uint64_t total_shared;
17uint64_t total_device;
18uint64_t total_host;;
19
20#if defined(__has_feature)
21#if __has_feature(leak_sanitizer)
22#define ASAN_LEAK_CHECK
23#endif
24#endif
25
26#ifdef ASAN_LEAK_CHECK
27#include <sanitizer/asan_interface.h>
28#include <sanitizer/common_interface_defs.h>
29#include <sanitizer/lsan_interface.h>
30#define LEAK_CHECK(A) { __lsan_do_recoverable_leak_check(); }
31#else
32#define LEAK_CHECK(A) { }
33#endif
34
36{
37#ifdef __linux__
38 struct mallinfo mi; // really want mallinfo2, but glibc version isn't uniform
39
40 mi = mallinfo();
41
42 std::cout << "MemoryManager: Total non-mmapped bytes (arena): "<< (size_t)mi.arena<<std::endl;
43 std::cout << "MemoryManager: # of free chunks (ordblks): "<< (size_t)mi.ordblks<<std::endl;
44 std::cout << "MemoryManager: # of free fastbin blocks (smblks): "<< (size_t)mi.smblks<<std::endl;
45 std::cout << "MemoryManager: # of mapped regions (hblks): "<< (size_t)mi.hblks<<std::endl;
46 std::cout << "MemoryManager: Bytes in mapped regions (hblkhd): "<< (size_t)mi.hblkhd<<std::endl;
47 std::cout << "MemoryManager: Max. total allocated space (usmblks): "<< (size_t)mi.usmblks<<std::endl;
48 std::cout << "MemoryManager: Free bytes held in fastbins (fsmblks): "<< (size_t)mi.fsmblks<<std::endl;
49 std::cout << "MemoryManager: Total allocated space (uordblks): "<< (size_t)mi.uordblks<<std::endl;
50 std::cout << "MemoryManager: Total free space (fordblks): "<< (size_t)mi.fordblks<<std::endl;
51 std::cout << "MemoryManager: Topmost releasable block (keepcost): "<< (size_t)mi.keepcost<<std::endl;
52#endif
53 LEAK_CHECK();
54
55}
56
58{
59 std::cout << " MemoryManager : ------------------------------------ "<<std::endl;
60 std::cout << " MemoryManager : PrintBytes "<<std::endl;
61 std::cout << " MemoryManager : ------------------------------------ "<<std::endl;
62 std::cout << " MemoryManager : "<<(total_shared>>20)<<" shared Mbytes "<<std::endl;
63 std::cout << " MemoryManager : "<<(total_device>>20)<<" accelerator Mbytes "<<std::endl;
64 std::cout << " MemoryManager : "<<(total_host>>20) <<" cpu Mbytes "<<std::endl;
65 uint64_t cacheBytes;
66 cacheBytes = CacheBytes[Cpu];
67 std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" cpu cache Mbytes "<<std::endl;
68 cacheBytes = CacheBytes[Acc];
69 std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" acc cache Mbytes "<<std::endl;
70 cacheBytes = CacheBytes[Shared];
71 std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" shared cache Mbytes "<<std::endl;
72
73#ifdef GRID_CUDA
74 cuda_mem();
75#endif
77}
78
81
83// Data tables for recently freed pooiniter caches
87int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 0, 8, 8, 0, 16, 8, 0, 16 };
90// Actual allocation and deallocation utils
93{
94 total_device+=bytes;
95 void *ptr = (void *) Lookup(bytes,Acc);
96 if ( ptr == (void *) NULL ) {
97 ptr = (void *) acceleratorAllocDevice(bytes);
98 }
99#ifdef GRID_MM_VERBOSE
100 std::cout <<"AcceleratorAllocate "<<std::endl;
101 PrintBytes();
102#endif
103 return ptr;
104}
105void MemoryManager::AcceleratorFree (void *ptr,size_t bytes)
106{
107 total_device-=bytes;
108 void *__freeme = Insert(ptr,bytes,Acc);
109 if ( __freeme ) {
110 acceleratorFreeDevice(__freeme);
111 }
112#ifdef GRID_MM_VERBOSE
113 std::cout <<"AcceleratorFree "<<std::endl;
114 PrintBytes();
115#endif
116}
118{
119 total_shared+=bytes;
120 void *ptr = (void *) Lookup(bytes,Shared);
121 if ( ptr == (void *) NULL ) {
122 ptr = (void *) acceleratorAllocShared(bytes);
123 }
124#ifdef GRID_MM_VERBOSE
125 std::cout <<"SharedAllocate "<<std::endl;
126 PrintBytes();
127#endif
128 return ptr;
129}
130void MemoryManager::SharedFree (void *ptr,size_t bytes)
131{
132 total_shared-=bytes;
133 void *__freeme = Insert(ptr,bytes,Shared);
134 if ( __freeme ) {
135 acceleratorFreeShared(__freeme);
136 }
137#ifdef GRID_MM_VERBOSE
138 std::cout <<"SharedFree "<<std::endl;
139 PrintBytes();
140#endif
141}
142#ifdef GRID_UVM
143void *MemoryManager::CpuAllocate(size_t bytes)
144{
145 total_host+=bytes;
146 void *ptr = (void *) Lookup(bytes,Cpu);
147 if ( ptr == (void *) NULL ) {
148 ptr = (void *) acceleratorAllocShared(bytes);
149 }
150#ifdef GRID_MM_VERBOSE
151 std::cout <<"CpuAllocate "<<std::endl;
152 PrintBytes();
153#endif
154 return ptr;
155}
156void MemoryManager::CpuFree (void *_ptr,size_t bytes)
157{
158 total_host-=bytes;
159 NotifyDeletion(_ptr);
160 void *__freeme = Insert(_ptr,bytes,Cpu);
161 if ( __freeme ) {
162 acceleratorFreeShared(__freeme);
163 }
164#ifdef GRID_MM_VERBOSE
165 std::cout <<"CpuFree "<<std::endl;
166 PrintBytes();
167#endif
168}
169#else
170void *MemoryManager::CpuAllocate(size_t bytes)
171{
172 total_host+=bytes;
173 void *ptr = (void *) Lookup(bytes,Cpu);
174 if ( ptr == (void *) NULL ) {
175 ptr = (void *) acceleratorAllocCpu(bytes);
176 }
177#ifdef GRID_MM_VERBOSE
178 std::cout <<"CpuAllocate "<<std::endl;
179 PrintBytes();
180#endif
181 return ptr;
182}
183void MemoryManager::CpuFree (void *_ptr,size_t bytes)
184{
185 total_host-=bytes;
186 NotifyDeletion(_ptr);
187 void *__freeme = Insert(_ptr,bytes,Cpu);
188 if ( __freeme ) {
189 acceleratorFreeCpu(__freeme);
190 }
191#ifdef GRID_MM_VERBOSE
192 std::cout <<"CpuFree "<<std::endl;
193 PrintBytes();
194#endif
195}
196#endif
197
199// call only once
202{
203
204 char * str;
205 int Nc;
206
207 str= getenv("GRID_ALLOC_NCACHE_LARGE");
208 if ( str ) {
209 Nc = atoi(str);
210 if ( (Nc>=0) && (Nc < NallocCacheMax)) {
211 Ncache[Cpu]=Nc;
212 Ncache[Acc]=Nc;
214 }
215 }
216
217 str= getenv("GRID_ALLOC_NCACHE_HUGE");
218 if ( str ) {
219 Nc = atoi(str);
220 if ( (Nc>=0) && (Nc < NallocCacheMax)) {
224 }
225 }
226
227 str= getenv("GRID_ALLOC_NCACHE_SMALL");
228 if ( str ) {
229 Nc = atoi(str);
230 if ( (Nc>=0) && (Nc < NallocCacheMax)) {
234 }
235 }
236
237}
238
240
241#ifndef GRID_UVM
242 std::cout << GridLogMessage << "MemoryManager Cache "<< MemoryManager::DeviceMaxBytes <<" bytes "<<std::endl;
243#endif
244
245 std::cout << GridLogMessage<< "MemoryManager::Init() setting up"<<std::endl;
246#ifdef ALLOCATION_CACHE
247 std::cout << GridLogMessage<< "MemoryManager::Init() cache pool for recent host allocations: SMALL "<<Ncache[CpuSmall]<<" LARGE "<<Ncache[Cpu]<<" HUGE "<<Ncache[CpuHuge]<<std::endl;
248 std::cout << GridLogMessage<< "MemoryManager::Init() cache pool for recent device allocations: SMALL "<<Ncache[AccSmall]<<" LARGE "<<Ncache[Acc]<<" Huge "<<Ncache[AccHuge]<<std::endl;
249 std::cout << GridLogMessage<< "MemoryManager::Init() cache pool for recent shared allocations: SMALL "<<Ncache[SharedSmall]<<" LARGE "<<Ncache[Shared]<<" Huge "<<Ncache[SharedHuge]<<std::endl;
250#endif
251
252#ifdef GRID_UVM
253 std::cout << GridLogMessage<< "MemoryManager::Init() Unified memory space"<<std::endl;
254#ifdef GRID_CUDA
255 std::cout << GridLogMessage<< "MemoryManager::Init() Using cudaMallocManaged"<<std::endl;
256#endif
257#ifdef GRID_HIP
258 std::cout << GridLogMessage<< "MemoryManager::Init() Using hipMallocManaged"<<std::endl;
259#endif
260#ifdef GRID_SYCL
261 std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl;
262#endif
263#else
264 std::cout << GridLogMessage<< "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl;
265#ifdef GRID_CUDA
266 std::cout << GridLogMessage<< "MemoryManager::Init() Using cudaMalloc"<<std::endl;
267#endif
268#ifdef GRID_HIP
269 std::cout << GridLogMessage<< "MemoryManager::Init() Using hipMalloc"<<std::endl;
270#endif
271#ifdef GRID_SYCL
272 std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_device"<<std::endl;
273#endif
274#endif
275
276}
277
278void *MemoryManager::Insert(void *ptr,size_t bytes,int type)
279{
280#ifdef ALLOCATION_CACHE
281 int cache;
282 if (bytes < GRID_ALLOC_SMALL_LIMIT) cache = type + 2;
283 else if (bytes >= GRID_ALLOC_HUGE_LIMIT) cache = type + 1;
284 else cache = type;
285
286 return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache],CacheBytes[cache]);
287#else
288 return ptr;
289#endif
290}
291
292void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim, uint64_t &cacheBytes)
293{
294#ifdef GRID_OMP
295 assert(omp_in_parallel()==0);
296#endif
297
298 if (ncache == 0) return ptr;
299
300 void * ret = NULL;
301 int v = -1;
302
303 for(int e=0;e<ncache;e++) {
304 if ( entries[e].valid==0 ) {
305 v=e;
306 break;
307 }
308 }
309
310 if ( v==-1 ) {
311 v=victim;
312 victim = (victim+1)%ncache;
313 }
314
315 if ( entries[v].valid ) {
316 ret = entries[v].address;
317 cacheBytes -= entries[v].bytes;
318 entries[v].valid = 0;
319 entries[v].address = NULL;
320 entries[v].bytes = 0;
321 }
322
323 entries[v].address=ptr;
324 entries[v].bytes =bytes;
325 entries[v].valid =1;
326 cacheBytes += bytes;
327
328 return ret;
329}
330
331void *MemoryManager::Lookup(size_t bytes,int type)
332{
333#ifdef ALLOCATION_CACHE
334 int cache;
335 if (bytes < GRID_ALLOC_SMALL_LIMIT) cache = type + 2;
336 else if (bytes >= GRID_ALLOC_HUGE_LIMIT) cache = type + 1;
337 else cache = type;
338
339 return Lookup(bytes,Entries[cache],Ncache[cache],CacheBytes[cache]);
340#else
341 return NULL;
342#endif
343}
344
345void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t & cacheBytes)
346{
347#ifdef GRID_OMP
348 assert(omp_in_parallel()==0);
349#endif
350 for(int e=0;e<ncache;e++){
351 if ( entries[e].valid && ( entries[e].bytes == bytes ) ) {
352 entries[e].valid = 0;
353 cacheBytes -= entries[e].bytes;
354 return entries[e].address;
355 }
356 }
357 return NULL;
358}
359
360
362
void * acceleratorAllocShared(size_t bytes)
void * acceleratorAllocDevice(size_t bytes)
void acceleratorFreeShared(void *ptr)
void acceleratorFreeCpu(void *ptr)
void * acceleratorAllocCpu(size_t bytes)
void acceleratorFreeDevice(void *ptr)
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL")
#define SharedSmall
#define AccHuge
uint64_t total_shared
#define Acc
#define LEAK_CHECK(A)
#define Shared
uint64_t total_host
uint64_t total_device
#define AccSmall
#define SharedHuge
#define CpuHuge
#define CpuSmall
#define Cpu
#define GRID_ALLOC_SMALL_LIMIT
#define GRID_ALLOC_HUGE_LIMIT
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
static constexpr int Nc
Definition QCD.h:50
static int Victim[NallocType]
static const int NallocCacheMax
static uint64_t DeviceCacheBytes()
static void * Lookup(size_t bytes, int type)
static void CpuFree(void *ptr, size_t bytes)
static const int NallocType
static void PrintBytes(void)
static void InitMessage(void)
static void DisplayMallinfo(void)
static uint64_t HostCacheBytes()
static void * Insert(void *ptr, size_t bytes, int type)
static uint64_t DeviceMaxBytes
static void * CpuAllocate(size_t bytes)
static int Ncache[NallocType]
static void NotifyDeletion(void *CpuPtr)
static uint64_t CacheBytes[NallocType]
static void Init(void)
static void SharedFree(void *ptr, size_t bytes)
static AllocationCacheEntry Entries[NallocType][NallocCacheMax]
static void * SharedAllocate(size_t bytes)
static void * AcceleratorAllocate(size_t bytes)
static void AcceleratorFree(void *ptr, size_t bytes)