diff options
author | Mike Pall <mike> | 2016-04-18 10:57:49 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2016-04-18 10:57:49 +0200 |
commit | 0c6fdc1039a3a4450d366fba7af4b29de73f0dc6 (patch) | |
tree | b84b2aad8ae2c8ab62752653b4e1688de96d3afe | |
parent | 101115ddd86f6e66f225eee4702793623e683007 (diff) | |
download | luajit-0c6fdc1039a3a4450d366fba7af4b29de73f0dc6.tar.gz luajit-0c6fdc1039a3a4450d366fba7af4b29de73f0dc6.tar.bz2 luajit-0c6fdc1039a3a4450d366fba7af4b29de73f0dc6.zip |
Rewrite memory block allocator.
Use a mix of linear probing and pseudo-random probing.
Workaround for 1GB MAP_32BIT limit on Linux/x64. Now 2GB with !LJ_GC64.
Enforce 128TB LJ_GC64 limit for > 47 bit memory layouts (ARM64).
-rw-r--r-- | src/lj_alloc.c | 255 |
1 files changed, 170 insertions, 85 deletions
diff --git a/src/lj_alloc.c b/src/lj_alloc.c index 32de45ec..5d0834ec 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c | |||
@@ -72,13 +72,56 @@ | |||
72 | 72 | ||
73 | #define IS_DIRECT_BIT (SIZE_T_ONE) | 73 | #define IS_DIRECT_BIT (SIZE_T_ONE) |
74 | 74 | ||
75 | |||
76 | /* Determine system-specific block allocation method. */ | ||
75 | #if LJ_TARGET_WINDOWS | 77 | #if LJ_TARGET_WINDOWS |
76 | 78 | ||
77 | #define WIN32_LEAN_AND_MEAN | 79 | #define WIN32_LEAN_AND_MEAN |
78 | #include <windows.h> | 80 | #include <windows.h> |
79 | 81 | ||
82 | #define LJ_ALLOC_VIRTUALALLOC 1 | ||
83 | |||
80 | #if LJ_64 && !LJ_GC64 | 84 | #if LJ_64 && !LJ_GC64 |
85 | #define LJ_ALLOC_NTAVM 1 | ||
86 | #endif | ||
87 | |||
88 | #else | ||
89 | |||
90 | #include <errno.h> | ||
91 | /* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */ | ||
92 | #include <sys/mman.h> | ||
93 | |||
94 | #define LJ_ALLOC_MMAP 1 | ||
95 | |||
96 | #if LJ_64 | ||
97 | |||
98 | #define LJ_ALLOC_MMAP_PROBE 1 | ||
99 | |||
100 | #if LJ_GC64 | ||
101 | #define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */ | ||
102 | #elif LJ_TARGET_X64 && LJ_HASJIT | ||
103 | /* Due to limitations in the x64 compiler backend. */ | ||
104 | #define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */ | ||
105 | #else | ||
106 | #define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */ | ||
107 | #endif | ||
108 | |||
109 | #endif | ||
110 | |||
111 | #if LJ_64 && !LJ_GC64 && defined(MAP_32BIT) | ||
112 | #define LJ_ALLOC_MMAP32 1 | ||
113 | #endif | ||
114 | |||
115 | #if LJ_TARGET_LINUX | ||
116 | #define LJ_ALLOC_MREMAP 1 | ||
117 | #endif | ||
118 | |||
119 | #endif | ||
81 | 120 | ||
121 | |||
122 | #if LJ_ALLOC_VIRTUALALLOC | ||
123 | |||
124 | #if LJ_ALLOC_NTAVM | ||
82 | /* Undocumented, but hey, that's what we all love so much about Windows. */ | 125 | /* Undocumented, but hey, that's what we all love so much about Windows. */ |
83 | typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, | 126 | typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, |
84 | size_t *size, ULONG alloctype, ULONG prot); | 127 | size_t *size, ULONG alloctype, ULONG prot); |
@@ -89,14 +132,15 @@ static PNTAVM ntavm; | |||
89 | */ | 132 | */ |
90 | #define NTAVM_ZEROBITS 1 | 133 | #define NTAVM_ZEROBITS 1 |
91 | 134 | ||
92 | static void INIT_MMAP(void) | 135 | static void init_mmap(void) |
93 | { | 136 | { |
94 | ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), | 137 | ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), |
95 | "NtAllocateVirtualMemory"); | 138 | "NtAllocateVirtualMemory"); |
96 | } | 139 | } |
140 | #define INIT_MMAP() init_mmap() | ||
97 | 141 | ||
98 | /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ | 142 | /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ |
99 | static LJ_AINLINE void *CALL_MMAP(size_t size) | 143 | static void *CALL_MMAP(size_t size) |
100 | { | 144 | { |
101 | DWORD olderr = GetLastError(); | 145 | DWORD olderr = GetLastError(); |
102 | void *ptr = NULL; | 146 | void *ptr = NULL; |
@@ -107,7 +151,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size) | |||
107 | } | 151 | } |
108 | 152 | ||
109 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ | 153 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ |
110 | static LJ_AINLINE void *DIRECT_MMAP(size_t size) | 154 | static void *DIRECT_MMAP(size_t size) |
111 | { | 155 | { |
112 | DWORD olderr = GetLastError(); | 156 | DWORD olderr = GetLastError(); |
113 | void *ptr = NULL; | 157 | void *ptr = NULL; |
@@ -119,10 +163,8 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size) | |||
119 | 163 | ||
120 | #else | 164 | #else |
121 | 165 | ||
122 | #define INIT_MMAP() ((void)0) | ||
123 | |||
124 | /* Win32 MMAP via VirtualAlloc */ | 166 | /* Win32 MMAP via VirtualAlloc */ |
125 | static LJ_AINLINE void *CALL_MMAP(size_t size) | 167 | static void *CALL_MMAP(size_t size) |
126 | { | 168 | { |
127 | DWORD olderr = GetLastError(); | 169 | DWORD olderr = GetLastError(); |
128 | void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); | 170 | void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); |
@@ -131,7 +173,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size) | |||
131 | } | 173 | } |
132 | 174 | ||
133 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ | 175 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ |
134 | static LJ_AINLINE void *DIRECT_MMAP(size_t size) | 176 | static void *DIRECT_MMAP(size_t size) |
135 | { | 177 | { |
136 | DWORD olderr = GetLastError(); | 178 | DWORD olderr = GetLastError(); |
137 | void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, | 179 | void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, |
@@ -143,7 +185,7 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size) | |||
143 | #endif | 185 | #endif |
144 | 186 | ||
145 | /* This function supports releasing coalesed segments */ | 187 | /* This function supports releasing coalesed segments */ |
146 | static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | 188 | static int CALL_MUNMAP(void *ptr, size_t size) |
147 | { | 189 | { |
148 | DWORD olderr = GetLastError(); | 190 | DWORD olderr = GetLastError(); |
149 | MEMORY_BASIC_INFORMATION minfo; | 191 | MEMORY_BASIC_INFORMATION minfo; |
@@ -163,10 +205,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | |||
163 | return 0; | 205 | return 0; |
164 | } | 206 | } |
165 | 207 | ||
166 | #else | 208 | #elif LJ_ALLOC_MMAP |
167 | |||
168 | #include <errno.h> | ||
169 | #include <sys/mman.h> | ||
170 | 209 | ||
171 | #define MMAP_PROT (PROT_READ|PROT_WRITE) | 210 | #define MMAP_PROT (PROT_READ|PROT_WRITE) |
172 | #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) | 211 | #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) |
@@ -174,107 +213,145 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | |||
174 | #endif | 213 | #endif |
175 | #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) | 214 | #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) |
176 | 215 | ||
177 | #if LJ_64 && !LJ_GC64 | 216 | #if LJ_ALLOC_MMAP_PROBE |
178 | /* 64 bit mode with 32 bit pointers needs special support for allocating | ||
179 | ** memory in the lower 2GB. | ||
180 | */ | ||
181 | 217 | ||
182 | #if defined(MAP_32BIT) | 218 | #define LJ_ALLOC_MMAP_PROBE_MAX 30 |
219 | #define LJ_ALLOC_MMAP_PROBE_LINEAR 5 | ||
183 | 220 | ||
184 | #if defined(__sun__) | 221 | #define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000) |
185 | #define MMAP_REGION_START ((uintptr_t)0x1000) | 222 | |
186 | #else | 223 | /* No point in a giant ifdef mess. Just try to open /dev/urandom. |
187 | /* Actually this only gives us max. 1GB in current Linux kernels. */ | 224 | ** It doesn't really matter if this fails, since we get some ASLR bits from |
188 | #define MMAP_REGION_START ((uintptr_t)0) | 225 | ** every unsuitable allocation, too. And we prefer linear allocation, anyway. |
189 | #endif | 226 | */ |
227 | #include <fcntl.h> | ||
228 | #include <unistd.h> | ||
190 | 229 | ||
191 | static LJ_AINLINE void *CALL_MMAP(size_t size) | 230 | static uintptr_t mmap_probe_seed(void) |
192 | { | 231 | { |
232 | uintptr_t val; | ||
233 | int fd = open("/dev/urandom", O_RDONLY); | ||
234 | if (fd != -1) { | ||
235 | int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val)); | ||
236 | (void)close(fd); | ||
237 | if (ok) return val; | ||
238 | } | ||
239 | return 1; /* Punt. */ | ||
240 | } | ||
241 | |||
242 | static void *mmap_probe(size_t size) | ||
243 | { | ||
244 | /* Hint for next allocation. Doesn't need to be thread-safe. */ | ||
245 | static uintptr_t hint_addr = 0; | ||
246 | static uintptr_t hint_prng = 0; | ||
193 | int olderr = errno; | 247 | int olderr = errno; |
194 | void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); | 248 | int retry; |
249 | for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) { | ||
250 | void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS, -1, 0); | ||
251 | uintptr_t addr = (uintptr_t)p; | ||
252 | if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER) { | ||
253 | /* We got a suitable address. Bump the hint address. */ | ||
254 | hint_addr = addr + size; | ||
255 | errno = olderr; | ||
256 | return p; | ||
257 | } | ||
258 | if (p != MFAIL) { | ||
259 | munmap(p, size); | ||
260 | } else if (errno == ENOMEM) { | ||
261 | return MFAIL; | ||
262 | } | ||
263 | if (hint_addr) { | ||
264 | /* First, try linear probing. */ | ||
265 | if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) { | ||
266 | hint_addr += 0x1000000; | ||
267 | if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0) | ||
268 | hint_addr = 0; | ||
269 | continue; | ||
270 | } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) { | ||
271 | /* Next, try a no-hint probe to get back an ASLR address. */ | ||
272 | hint_addr = 0; | ||
273 | continue; | ||
274 | } | ||
275 | } | ||
276 | /* Finally, try pseudo-random probing. */ | ||
277 | if (LJ_UNLIKELY(hint_prng == 0)) { | ||
278 | hint_prng = mmap_probe_seed(); | ||
279 | } | ||
280 | /* The unsuitable address we got has some ASLR PRNG bits. */ | ||
281 | hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1)); | ||
282 | do { /* The PRNG itself is very weak, but see above. */ | ||
283 | hint_prng = hint_prng * 1103515245 + 12345; | ||
284 | hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE; | ||
285 | hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1); | ||
286 | } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER); | ||
287 | } | ||
195 | errno = olderr; | 288 | errno = olderr; |
196 | return ptr; | 289 | return MFAIL; |
197 | } | 290 | } |
198 | 291 | ||
199 | #elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN | ||
200 | |||
201 | /* OSX and FreeBSD mmap() use a naive first-fit linear search. | ||
202 | ** That's perfect for us. Except that -pagezero_size must be set for OSX, | ||
203 | ** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs | ||
204 | ** to be reduced to 250MB on FreeBSD. | ||
205 | */ | ||
206 | #if LJ_TARGET_OSX || defined(__DragonFly__) | ||
207 | #define MMAP_REGION_START ((uintptr_t)0x10000) | ||
208 | #elif LJ_TARGET_PS4 | ||
209 | #define MMAP_REGION_START ((uintptr_t)0x4000) | ||
210 | #else | ||
211 | #define MMAP_REGION_START ((uintptr_t)0x10000000) | ||
212 | #endif | 292 | #endif |
213 | #define MMAP_REGION_END ((uintptr_t)0x80000000) | ||
214 | 293 | ||
215 | #if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 | 294 | #if LJ_ALLOC_MMAP32 |
216 | #include <sys/resource.h> | 295 | |
296 | #if defined(__sun__) | ||
297 | #define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000) | ||
298 | #else | ||
299 | #define LJ_ALLOC_MMAP32_START ((uintptr_t)0) | ||
217 | #endif | 300 | #endif |
218 | 301 | ||
219 | static LJ_AINLINE void *CALL_MMAP(size_t size) | 302 | static void *mmap_map32(size_t size) |
220 | { | 303 | { |
221 | int olderr = errno; | 304 | #if LJ_ALLOC_MMAP_PROBE |
222 | /* Hint for next allocation. Doesn't need to be thread-safe. */ | 305 | static int fallback = 0; |
223 | static uintptr_t alloc_hint = MMAP_REGION_START; | 306 | if (fallback) |
224 | int retry = 0; | 307 | return mmap_probe(size); |
225 | #if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 | ||
226 | static int rlimit_modified = 0; | ||
227 | if (LJ_UNLIKELY(rlimit_modified == 0)) { | ||
228 | struct rlimit rlim; | ||
229 | rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START; | ||
230 | setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */ | ||
231 | rlimit_modified = 1; | ||
232 | } | ||
233 | #endif | 308 | #endif |
234 | for (;;) { | 309 | { |
235 | void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0); | 310 | int olderr = errno; |
236 | if ((uintptr_t)p >= MMAP_REGION_START && | 311 | void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); |
237 | (uintptr_t)p + size < MMAP_REGION_END) { | 312 | errno = olderr; |
238 | alloc_hint = (uintptr_t)p + size; | 313 | /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */ |
239 | errno = olderr; | 314 | #if LJ_ALLOC_MMAP_PROBE |
240 | return p; | 315 | if (ptr == MFAIL) { |
316 | fallback = 1; | ||
317 | return mmap_probe(size); | ||
241 | } | 318 | } |
242 | if (p != CMFAIL) munmap(p, size); | ||
243 | #if defined(__sun__) || defined(__DragonFly__) | ||
244 | alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */ | ||
245 | if (alloc_hint + size < MMAP_REGION_END) continue; | ||
246 | #endif | 319 | #endif |
247 | if (retry) break; | 320 | return ptr; |
248 | retry = 1; | ||
249 | alloc_hint = MMAP_REGION_START; | ||
250 | } | 321 | } |
251 | errno = olderr; | ||
252 | return CMFAIL; | ||
253 | } | 322 | } |
254 | 323 | ||
255 | #else | ||
256 | |||
257 | #error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS" | ||
258 | |||
259 | #endif | 324 | #endif |
260 | 325 | ||
326 | #if LJ_ALLOC_MMAP32 | ||
327 | #define CALL_MMAP(size) mmap_map32(size) | ||
328 | #elif LJ_ALLOC_MMAP_PROBE | ||
329 | #define CALL_MMAP(size) mmap_probe(size) | ||
261 | #else | 330 | #else |
262 | 331 | static void *CALL_MMAP(size_t size) | |
263 | /* 32 bit mode and GC64 mode is easy. */ | ||
264 | static LJ_AINLINE void *CALL_MMAP(size_t size) | ||
265 | { | 332 | { |
266 | int olderr = errno; | 333 | int olderr = errno; |
267 | void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); | 334 | void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); |
268 | errno = olderr; | 335 | errno = olderr; |
269 | return ptr; | 336 | return ptr; |
270 | } | 337 | } |
271 | |||
272 | #endif | 338 | #endif |
273 | 339 | ||
274 | #define INIT_MMAP() ((void)0) | 340 | #if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 |
275 | #define DIRECT_MMAP(s) CALL_MMAP(s) | 341 | |
342 | #include <sys/resource.h> | ||
276 | 343 | ||
277 | static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | 344 | static void init_mmap(void) |
345 | { | ||
346 | struct rlimit rlim; | ||
347 | rlim.rlim_cur = rlim.rlim_max = 0x10000; | ||
348 | setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */ | ||
349 | } | ||
350 | #define INIT_MMAP() init_mmap() | ||
351 | |||
352 | #endif | ||
353 | |||
354 | static int CALL_MUNMAP(void *ptr, size_t size) | ||
278 | { | 355 | { |
279 | int olderr = errno; | 356 | int olderr = errno; |
280 | int ret = munmap(ptr, size); | 357 | int ret = munmap(ptr, size); |
@@ -282,10 +359,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | |||
282 | return ret; | 359 | return ret; |
283 | } | 360 | } |
284 | 361 | ||
285 | #if LJ_TARGET_LINUX | 362 | #if LJ_ALLOC_MREMAP |
286 | /* Need to define _GNU_SOURCE to get the mremap prototype. */ | 363 | /* Need to define _GNU_SOURCE to get the mremap prototype. */ |
287 | static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, | 364 | static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) |
288 | int flags) | ||
289 | { | 365 | { |
290 | int olderr = errno; | 366 | int olderr = errno; |
291 | ptr = mremap(ptr, osz, nsz, flags); | 367 | ptr = mremap(ptr, osz, nsz, flags); |
@@ -305,6 +381,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, | |||
305 | 381 | ||
306 | #endif | 382 | #endif |
307 | 383 | ||
384 | |||
385 | #ifndef INIT_MMAP | ||
386 | #define INIT_MMAP() ((void)0) | ||
387 | #endif | ||
388 | |||
389 | #ifndef DIRECT_MMAP | ||
390 | #define DIRECT_MMAP(s) CALL_MMAP(s) | ||
391 | #endif | ||
392 | |||
308 | #ifndef CALL_MREMAP | 393 | #ifndef CALL_MREMAP |
309 | #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) | 394 | #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) |
310 | #endif | 395 | #endif |