diff options
author | Mike Pall <mike> | 2016-11-20 22:16:08 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2016-11-20 22:18:14 +0100 |
commit | 04b60707d7d117da22b40736a353e2a10179108a (patch) | |
tree | d11f50b00a8589108f5ebeeb005a12071fe6fcdf | |
parent | 13642b75ac37957d9e2a37b35ebec69d6d4b3bc1 (diff) | |
download | luajit-04b60707d7d117da22b40736a353e2a10179108a.tar.gz luajit-04b60707d7d117da22b40736a353e2a10179108a.tar.bz2 luajit-04b60707d7d117da22b40736a353e2a10179108a.zip |
ARM64: Add JIT compiler backend.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
Sponsored by Cisco Systems, Inc.
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | src/jit/dis_arm64.lua | 1215 | ||||
-rw-r--r-- | src/lj_arch.h | 1 | ||||
-rw-r--r-- | src/lj_asm.c | 4 | ||||
-rw-r--r-- | src/lj_asm_arm64.h | 1823 | ||||
-rw-r--r-- | src/lj_ccall.c | 2 | ||||
-rw-r--r-- | src/lj_dispatch.h | 1 | ||||
-rw-r--r-- | src/lj_emit_arm64.h | 397 | ||||
-rw-r--r-- | src/lj_gdbjit.c | 12 | ||||
-rw-r--r-- | src/lj_target.h | 4 | ||||
-rw-r--r-- | src/lj_target_arm64.h | 221 | ||||
-rw-r--r-- | src/vm_arm64.dasc | 227 |
12 files changed, 3887 insertions, 24 deletions
@@ -86,8 +86,8 @@ FILE_MAN= luajit.1 | |||
86 | FILE_PC= luajit.pc | 86 | FILE_PC= luajit.pc |
87 | FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h | 87 | FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h |
88 | FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ | 88 | FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ |
89 | dis_x86.lua dis_x64.lua dis_arm.lua dis_ppc.lua \ | 89 | dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ |
90 | dis_mips.lua dis_mipsel.lua vmdef.lua | 90 | dis_ppc.lua dis_mips.lua dis_mipsel.lua vmdef.lua |
91 | 91 | ||
92 | ifeq (,$(findstring Windows,$(OS))) | 92 | ifeq (,$(findstring Windows,$(OS))) |
93 | HOST_SYS:= $(shell uname -s) | 93 | HOST_SYS:= $(shell uname -s) |
diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua new file mode 100644 index 00000000..909b33bc --- /dev/null +++ b/src/jit/dis_arm64.lua | |||
@@ -0,0 +1,1215 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT ARM64 disassembler module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2016 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | -- | ||
7 | -- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
8 | -- Sponsored by Cisco Systems, Inc. | ||
9 | ---------------------------------------------------------------------------- | ||
10 | -- This is a helper module used by the LuaJIT machine code dumper module. | ||
11 | -- | ||
12 | -- It disassembles most user-mode AArch64 instructions. | ||
13 | -- NYI: Advanced SIMD and VFP instructions. | ||
14 | ------------------------------------------------------------------------------ | ||
15 | |||
16 | local type, tonumber = type, tonumber | ||
17 | local sub, byte, format = string.sub, string.byte, string.format | ||
18 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub | ||
19 | local rep = string.rep | ||
20 | local concat = table.concat | ||
21 | local bit = require("bit") | ||
22 | local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex | ||
23 | local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift | ||
24 | local ror = bit.ror | ||
25 | |||
26 | ------------------------------------------------------------------------------ | ||
27 | -- Opcode maps | ||
28 | ------------------------------------------------------------------------------ | ||
29 | |||
30 | local map_adr = { -- PC-relative addressing. | ||
31 | shift = 31, mask = 1, | ||
32 | [0] = "adrDBx", "adrpDBx" | ||
33 | } | ||
34 | |||
35 | local map_addsubi = { -- Add/subtract immediate. | ||
36 | shift = 29, mask = 3, | ||
37 | [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg", | ||
38 | } | ||
39 | |||
40 | local map_logi = { -- Logical immediate. | ||
41 | shift = 31, mask = 1, | ||
42 | [0] = { | ||
43 | shift = 22, mask = 1, | ||
44 | [0] = { | ||
45 | shift = 29, mask = 3, | ||
46 | [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" | ||
47 | }, | ||
48 | false -- unallocated | ||
49 | }, | ||
50 | { | ||
51 | shift = 29, mask = 3, | ||
52 | [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" | ||
53 | } | ||
54 | } | ||
55 | |||
56 | local map_movwi = { -- Move wide immediate. | ||
57 | shift = 31, mask = 1, | ||
58 | [0] = { | ||
59 | shift = 22, mask = 1, | ||
60 | [0] = { | ||
61 | shift = 29, mask = 3, | ||
62 | [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" | ||
63 | }, false -- unallocated | ||
64 | }, | ||
65 | { | ||
66 | shift = 29, mask = 3, | ||
67 | [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" | ||
68 | }, | ||
69 | } | ||
70 | |||
71 | local map_bitf = { -- Bitfield. | ||
72 | shift = 31, mask = 1, | ||
73 | [0] = { | ||
74 | shift = 22, mask = 1, | ||
75 | [0] = { | ||
76 | shift = 29, mask = 3, | ||
77 | [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w", | ||
78 | "bfm|bfi|bfxilDN13w", | ||
79 | "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w" | ||
80 | } | ||
81 | }, | ||
82 | { | ||
83 | shift = 22, mask = 1, | ||
84 | { | ||
85 | shift = 29, mask = 3, | ||
86 | [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x", | ||
87 | "bfm|bfi|bfxilDN13x", | ||
88 | "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x" | ||
89 | } | ||
90 | } | ||
91 | } | ||
92 | |||
93 | local map_datai = { -- Data processing - immediate. | ||
94 | shift = 23, mask = 7, | ||
95 | [0] = map_adr, map_adr, map_addsubi, false, | ||
96 | map_logi, map_movwi, map_bitf, | ||
97 | { | ||
98 | shift = 15, mask = 0x1c0c1, | ||
99 | [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x", | ||
100 | [0x10081] = "extr|rorDNM4x" | ||
101 | } | ||
102 | } | ||
103 | |||
104 | local map_logsr = { -- Logical, shifted register. | ||
105 | shift = 31, mask = 1, | ||
106 | [0] = { | ||
107 | shift = 15, mask = 1, | ||
108 | [0] = { | ||
109 | shift = 29, mask = 3, | ||
110 | [0] = { | ||
111 | shift = 21, mask = 7, | ||
112 | [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", | ||
113 | "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" | ||
114 | }, | ||
115 | { | ||
116 | shift = 21, mask = 7, | ||
117 | [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", | ||
118 | "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" | ||
119 | }, | ||
120 | { | ||
121 | shift = 21, mask = 7, | ||
122 | [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", | ||
123 | "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" | ||
124 | }, | ||
125 | { | ||
126 | shift = 21, mask = 7, | ||
127 | [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", | ||
128 | "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" | ||
129 | } | ||
130 | }, | ||
131 | false -- unallocated | ||
132 | }, | ||
133 | { | ||
134 | shift = 29, mask = 3, | ||
135 | [0] = { | ||
136 | shift = 21, mask = 7, | ||
137 | [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", | ||
138 | "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" | ||
139 | }, | ||
140 | { | ||
141 | shift = 21, mask = 7, | ||
142 | [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", | ||
143 | "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" | ||
144 | }, | ||
145 | { | ||
146 | shift = 21, mask = 7, | ||
147 | [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", | ||
148 | "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" | ||
149 | }, | ||
150 | { | ||
151 | shift = 21, mask = 7, | ||
152 | [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", | ||
153 | "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" | ||
154 | } | ||
155 | } | ||
156 | } | ||
157 | |||
158 | local map_assh = { | ||
159 | shift = 31, mask = 1, | ||
160 | [0] = { | ||
161 | shift = 15, mask = 1, | ||
162 | [0] = { | ||
163 | shift = 29, mask = 3, | ||
164 | [0] = { | ||
165 | shift = 22, mask = 3, | ||
166 | [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" | ||
167 | }, | ||
168 | { | ||
169 | shift = 22, mask = 3, | ||
170 | [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", | ||
171 | "adds|cmnD0NMSg", "adds|cmnD0NMg" | ||
172 | }, | ||
173 | { | ||
174 | shift = 22, mask = 3, | ||
175 | [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" | ||
176 | }, | ||
177 | { | ||
178 | shift = 22, mask = 3, | ||
179 | [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", | ||
180 | "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" | ||
181 | }, | ||
182 | }, | ||
183 | false -- unallocated | ||
184 | }, | ||
185 | { | ||
186 | shift = 29, mask = 3, | ||
187 | [0] = { | ||
188 | shift = 22, mask = 3, | ||
189 | [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" | ||
190 | }, | ||
191 | { | ||
192 | shift = 22, mask = 3, | ||
193 | [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg", | ||
194 | "adds|cmnD0NMg" | ||
195 | }, | ||
196 | { | ||
197 | shift = 22, mask = 3, | ||
198 | [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" | ||
199 | }, | ||
200 | { | ||
201 | shift = 22, mask = 3, | ||
202 | [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", | ||
203 | "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" | ||
204 | } | ||
205 | } | ||
206 | } | ||
207 | |||
208 | local map_addsubsh = { -- Add/subtract, shifted register. | ||
209 | shift = 22, mask = 3, | ||
210 | [0] = map_assh, map_assh, map_assh | ||
211 | } | ||
212 | |||
213 | local map_addsubex = { -- Add/subtract, extended register. | ||
214 | shift = 22, mask = 3, | ||
215 | [0] = { | ||
216 | shift = 29, mask = 3, | ||
217 | [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg", | ||
218 | } | ||
219 | } | ||
220 | |||
221 | local map_addsubc = { -- Add/subtract, with carry. | ||
222 | shift = 10, mask = 63, | ||
223 | [0] = { | ||
224 | shift = 29, mask = 3, | ||
225 | [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg", | ||
226 | } | ||
227 | } | ||
228 | |||
229 | local map_ccomp = { | ||
230 | shift = 4, mask = 1, | ||
231 | [0] = { | ||
232 | shift = 10, mask = 3, | ||
233 | [0] = { -- Conditional compare register. | ||
234 | shift = 29, mask = 3, | ||
235 | "ccmnNMVCg", false, "ccmpNMVCg", | ||
236 | }, | ||
237 | [2] = { -- Conditional compare immediate. | ||
238 | shift = 29, mask = 3, | ||
239 | "ccmnN5VCg", false, "ccmpN5VCg", | ||
240 | } | ||
241 | } | ||
242 | } | ||
243 | |||
244 | local map_csel = { -- Conditional select. | ||
245 | shift = 11, mask = 1, | ||
246 | [0] = { | ||
247 | shift = 10, mask = 1, | ||
248 | [0] = { | ||
249 | shift = 29, mask = 3, | ||
250 | [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false, | ||
251 | }, | ||
252 | { | ||
253 | shift = 29, mask = 3, | ||
254 | [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false, | ||
255 | } | ||
256 | } | ||
257 | } | ||
258 | |||
259 | local map_data1s = { -- Data processing, 1 source. | ||
260 | shift = 29, mask = 1, | ||
261 | [0] = { | ||
262 | shift = 31, mask = 1, | ||
263 | [0] = { | ||
264 | shift = 10, mask = 0x7ff, | ||
265 | [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg" | ||
266 | }, | ||
267 | { | ||
268 | shift = 10, mask = 0x7ff, | ||
269 | [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg" | ||
270 | } | ||
271 | } | ||
272 | } | ||
273 | |||
274 | local map_data2s = { -- Data processing, 2 sources. | ||
275 | shift = 29, mask = 1, | ||
276 | [0] = { | ||
277 | shift = 10, mask = 63, | ||
278 | false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg", | ||
279 | "lsrDNMg", "asrDNMg", "rorDNMg" | ||
280 | } | ||
281 | } | ||
282 | |||
283 | local map_data3s = { -- Data processing, 3 sources. | ||
284 | shift = 29, mask = 7, | ||
285 | [0] = { | ||
286 | shift = 21, mask = 7, | ||
287 | [0] = { | ||
288 | shift = 15, mask = 1, | ||
289 | [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g" | ||
290 | } | ||
291 | }, false, false, false, | ||
292 | { | ||
293 | shift = 15, mask = 1, | ||
294 | [0] = { | ||
295 | shift = 21, mask = 7, | ||
296 | [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false, | ||
297 | false, "umaddl|umullDxNMwA0x", "umulhDNMx" | ||
298 | }, | ||
299 | { | ||
300 | shift = 21, mask = 7, | ||
301 | [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false, | ||
302 | false, "umsubl|umneglDxNMwA0x" | ||
303 | } | ||
304 | } | ||
305 | } | ||
306 | |||
307 | local map_datar = { -- Data processing, register. | ||
308 | shift = 28, mask = 1, | ||
309 | [0] = { | ||
310 | shift = 24, mask = 1, | ||
311 | [0] = map_logsr, | ||
312 | { | ||
313 | shift = 21, mask = 1, | ||
314 | [0] = map_addsubsh, map_addsubex | ||
315 | } | ||
316 | }, | ||
317 | { | ||
318 | shift = 21, mask = 15, | ||
319 | [0] = map_addsubc, false, map_ccomp, false, map_csel, false, | ||
320 | { | ||
321 | shift = 30, mask = 1, | ||
322 | [0] = map_data2s, map_data1s | ||
323 | }, | ||
324 | false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s, | ||
325 | map_data3s, map_data3s, map_data3s | ||
326 | } | ||
327 | } | ||
328 | |||
329 | local map_lrl = { -- Load register, literal. | ||
330 | shift = 26, mask = 1, | ||
331 | [0] = { | ||
332 | shift = 30, mask = 3, | ||
333 | [0] = "ldrDwB", "ldrDxB", "ldrswDxB" | ||
334 | }, | ||
335 | { | ||
336 | shift = 30, mask = 3, | ||
337 | [0] = "ldrDsB", "ldrDdB" | ||
338 | } | ||
339 | } | ||
340 | |||
341 | local map_lsriind = { -- Load/store register, immediate pre/post-indexed. | ||
342 | shift = 30, mask = 3, | ||
343 | [0] = { | ||
344 | shift = 26, mask = 1, | ||
345 | [0] = { | ||
346 | shift = 22, mask = 3, | ||
347 | [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL" | ||
348 | } | ||
349 | }, | ||
350 | { | ||
351 | shift = 26, mask = 1, | ||
352 | [0] = { | ||
353 | shift = 22, mask = 3, | ||
354 | [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL" | ||
355 | } | ||
356 | }, | ||
357 | { | ||
358 | shift = 26, mask = 1, | ||
359 | [0] = { | ||
360 | shift = 22, mask = 3, | ||
361 | [0] = "strDwzL", "ldrDwzL", "ldrswDxzL" | ||
362 | }, | ||
363 | { | ||
364 | shift = 22, mask = 3, | ||
365 | [0] = "strDszL", "ldrDszL" | ||
366 | } | ||
367 | }, | ||
368 | { | ||
369 | shift = 26, mask = 1, | ||
370 | [0] = { | ||
371 | shift = 22, mask = 3, | ||
372 | [0] = "strDxzL", "ldrDxzL" | ||
373 | }, | ||
374 | { | ||
375 | shift = 22, mask = 3, | ||
376 | [0] = "strDdzL", "ldrDdzL" | ||
377 | } | ||
378 | } | ||
379 | } | ||
380 | |||
381 | local map_lsriro = { | ||
382 | shift = 21, mask = 1, | ||
383 | [0] = { -- Load/store register immediate. | ||
384 | shift = 10, mask = 3, | ||
385 | [0] = { -- Unscaled immediate. | ||
386 | shift = 26, mask = 1, | ||
387 | [0] = { | ||
388 | shift = 30, mask = 3, | ||
389 | [0] = { | ||
390 | shift = 22, mask = 3, | ||
391 | [0] = "sturbDwK", "ldurbDwK" | ||
392 | }, | ||
393 | { | ||
394 | shift = 22, mask = 3, | ||
395 | [0] = "sturhDwK", "ldurhDwK" | ||
396 | }, | ||
397 | { | ||
398 | shift = 22, mask = 3, | ||
399 | [0] = "sturDwK", "ldurDwK" | ||
400 | }, | ||
401 | { | ||
402 | shift = 22, mask = 3, | ||
403 | [0] = "sturDxK", "ldurDxK" | ||
404 | } | ||
405 | } | ||
406 | }, map_lsriind, false, map_lsriind | ||
407 | }, | ||
408 | { -- Load/store register, register offset. | ||
409 | shift = 10, mask = 3, | ||
410 | [2] = { | ||
411 | shift = 26, mask = 1, | ||
412 | [0] = { | ||
413 | shift = 30, mask = 3, | ||
414 | [1] = { | ||
415 | shift = 22, mask = 3, | ||
416 | [0] = "strhDwO", "ldrhDwO", "ldrshDwO", "ldrshDxO" | ||
417 | }, | ||
418 | [2] = { | ||
419 | shift = 22, mask = 3, | ||
420 | [0] = "strDwO", "ldrDwO", "ldrswDxO" | ||
421 | }, | ||
422 | [3] = { | ||
423 | shift = 22, mask = 3, | ||
424 | [0] = "strDxO", "ldrDxO" | ||
425 | } | ||
426 | }, | ||
427 | { | ||
428 | shift = 30, mask = 3, | ||
429 | [2] = { | ||
430 | shift = 22, mask = 3, | ||
431 | [0] = "strDsO", "ldrDsO" | ||
432 | }, | ||
433 | [3] = { | ||
434 | shift = 22, mask = 3, | ||
435 | [0] = "strDdO", "ldrDdO" | ||
436 | } | ||
437 | } | ||
438 | } | ||
439 | } | ||
440 | } | ||
441 | |||
442 | local map_lsp = { -- Load/store register pair, offset. | ||
443 | shift = 22, mask = 1, | ||
444 | [0] = { | ||
445 | shift = 30, mask = 3, | ||
446 | [0] = { | ||
447 | shift = 26, mask = 1, | ||
448 | [0] = "stpDzAzwP", "stpDzAzsP", | ||
449 | }, | ||
450 | { | ||
451 | shift = 26, mask = 1, | ||
452 | "stpDzAzdP" | ||
453 | }, | ||
454 | { | ||
455 | shift = 26, mask = 1, | ||
456 | [0] = "stpDzAzxP" | ||
457 | } | ||
458 | }, | ||
459 | { | ||
460 | shift = 30, mask = 3, | ||
461 | [0] = { | ||
462 | shift = 26, mask = 1, | ||
463 | [0] = "ldpDzAzwP", "ldpDzAzsP", | ||
464 | }, | ||
465 | { | ||
466 | shift = 26, mask = 1, | ||
467 | [0] = "ldpswDAxP", "ldpDzAzdP" | ||
468 | }, | ||
469 | { | ||
470 | shift = 26, mask = 1, | ||
471 | [0] = "ldpDzAzxP" | ||
472 | } | ||
473 | } | ||
474 | } | ||
475 | |||
476 | local map_ls = { -- Loads and stores. | ||
477 | shift = 24, mask = 0x31, | ||
478 | [0x10] = map_lrl, [0x30] = map_lsriro, | ||
479 | [0x20] = { | ||
480 | shift = 23, mask = 3, | ||
481 | map_lsp, map_lsp, map_lsp | ||
482 | }, | ||
483 | [0x21] = { | ||
484 | shift = 23, mask = 3, | ||
485 | map_lsp, map_lsp, map_lsp | ||
486 | }, | ||
487 | [0x31] = { | ||
488 | shift = 26, mask = 1, | ||
489 | [0] = { | ||
490 | shift = 30, mask = 3, | ||
491 | [0] = { | ||
492 | shift = 22, mask = 3, | ||
493 | [0] = "strbDwzU", "ldrbDwzU" | ||
494 | }, | ||
495 | { | ||
496 | shift = 22, mask = 3, | ||
497 | [0] = "strhDwzU", "ldrhDwzU" | ||
498 | }, | ||
499 | { | ||
500 | shift = 22, mask = 3, | ||
501 | [0] = "strDwzU", "ldrDwzU" | ||
502 | }, | ||
503 | { | ||
504 | shift = 22, mask = 3, | ||
505 | [0] = "strDxzU", "ldrDxzU" | ||
506 | } | ||
507 | }, | ||
508 | { | ||
509 | shift = 30, mask = 3, | ||
510 | [2] = { | ||
511 | shift = 22, mask = 3, | ||
512 | [0] = "strDszU", "ldrDszU" | ||
513 | }, | ||
514 | [3] = { | ||
515 | shift = 22, mask = 3, | ||
516 | [0] = "strDdzU", "ldrDdzU" | ||
517 | } | ||
518 | } | ||
519 | }, | ||
520 | } | ||
521 | |||
522 | local map_datafp = { -- Data processing, SIMD and FP. | ||
523 | shift = 28, mask = 7, | ||
524 | { -- 001 | ||
525 | shift = 24, mask = 1, | ||
526 | [0] = { | ||
527 | shift = 21, mask = 1, | ||
528 | { | ||
529 | shift = 10, mask = 3, | ||
530 | [0] = { | ||
531 | shift = 12, mask = 1, | ||
532 | [0] = { | ||
533 | shift = 13, mask = 1, | ||
534 | [0] = { | ||
535 | shift = 14, mask = 1, | ||
536 | [0] = { | ||
537 | shift = 15, mask = 1, | ||
538 | [0] = { -- FP/int conversion. | ||
539 | shift = 31, mask = 1, | ||
540 | [0] = { | ||
541 | shift = 16, mask = 0xff, | ||
542 | [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs", | ||
543 | [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw", | ||
544 | [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs", | ||
545 | [0x26] = "fmovDwNs", [0x27] = "fmovDsNw", | ||
546 | [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs", | ||
547 | [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs", | ||
548 | [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs", | ||
549 | [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd", | ||
550 | [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw", | ||
551 | [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd", | ||
552 | [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd", | ||
553 | [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd", | ||
554 | [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd" | ||
555 | }, | ||
556 | { | ||
557 | shift = 16, mask = 0xff, | ||
558 | [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs", | ||
559 | [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx", | ||
560 | [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs", | ||
561 | [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs", | ||
562 | [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs", | ||
563 | [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs", | ||
564 | [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd", | ||
565 | [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx", | ||
566 | [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd", | ||
567 | [0x66] = "fmovDxNd", [0x67] = "fmovDdNx", | ||
568 | [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd", | ||
569 | [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd", | ||
570 | [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd" | ||
571 | } | ||
572 | } | ||
573 | }, | ||
574 | { -- FP data-processing, 1 source. | ||
575 | shift = 31, mask = 1, | ||
576 | [0] = { | ||
577 | shift = 22, mask = 3, | ||
578 | [0] = { | ||
579 | shift = 15, mask = 63, | ||
580 | [0] = "fmovDNf", "fabsDNf", "fnegDNf", | ||
581 | "fsqrtDNf", false, "fcvtDdNs", false, false, | ||
582 | "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", | ||
583 | "frintaDNf", false, "frintxDNf", "frintiDNf", | ||
584 | }, | ||
585 | { | ||
586 | shift = 15, mask = 63, | ||
587 | [0] = "fmovDNf", "fabsDNf", "fnegDNf", | ||
588 | "fsqrtDNf", "fcvtDsNd", false, false, false, | ||
589 | "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", | ||
590 | "frintaDNf", false, "frintxDNf", "frintiDNf", | ||
591 | } | ||
592 | } | ||
593 | } | ||
594 | }, | ||
595 | { -- FP compare. | ||
596 | shift = 31, mask = 1, | ||
597 | [0] = { | ||
598 | shift = 14, mask = 3, | ||
599 | [0] = { | ||
600 | shift = 23, mask = 1, | ||
601 | [0] = { | ||
602 | shift = 0, mask = 31, | ||
603 | [0] = "fcmpNMf", [8] = "fcmpNZf", | ||
604 | [16] = "fcmpeNMf", [24] = "fcmpeNZf", | ||
605 | } | ||
606 | } | ||
607 | } | ||
608 | } | ||
609 | }, | ||
610 | { -- FP immediate. | ||
611 | shift = 31, mask = 1, | ||
612 | [0] = { | ||
613 | shift = 5, mask = 31, | ||
614 | [0] = { | ||
615 | shift = 23, mask = 1, | ||
616 | [0] = "fmovDFf" | ||
617 | } | ||
618 | } | ||
619 | } | ||
620 | }, | ||
621 | { -- FP conditional compare. | ||
622 | shift = 31, mask = 1, | ||
623 | [0] = { | ||
624 | shift = 23, mask = 1, | ||
625 | [0] = { | ||
626 | shift = 4, mask = 1, | ||
627 | [0] = "fccmpNMVCf", "fccmpeNMVCf" | ||
628 | } | ||
629 | } | ||
630 | }, | ||
631 | { -- FP data-processing, 2 sources. | ||
632 | shift = 31, mask = 1, | ||
633 | [0] = { | ||
634 | shift = 23, mask = 1, | ||
635 | [0] = { | ||
636 | shift = 12, mask = 15, | ||
637 | [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf", | ||
638 | "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf", | ||
639 | "fnmulDNMf" | ||
640 | } | ||
641 | } | ||
642 | }, | ||
643 | { -- FP conditional select. | ||
644 | shift = 31, mask = 1, | ||
645 | [0] = { | ||
646 | shift = 23, mask = 1, | ||
647 | [0] = "fcselDNMCf" | ||
648 | } | ||
649 | } | ||
650 | } | ||
651 | }, | ||
652 | { -- FP data-processing, 3 sources. | ||
653 | shift = 31, mask = 1, | ||
654 | [0] = { | ||
655 | shift = 15, mask = 1, | ||
656 | [0] = { | ||
657 | shift = 21, mask = 5, | ||
658 | [0] = "fmaddDNMAf", "fnmaddDNMAf" | ||
659 | }, | ||
660 | { | ||
661 | shift = 21, mask = 5, | ||
662 | [0] = "fmsubDNMAf", "fnmsubDNMAf" | ||
663 | } | ||
664 | } | ||
665 | } | ||
666 | } | ||
667 | } | ||
668 | |||
669 | local map_br = { -- Branches, exception generating and system instructions. | ||
670 | shift = 29, mask = 7, | ||
671 | [0] = "bB", | ||
672 | { -- Compare & branch, immediate. | ||
673 | shift = 24, mask = 3, | ||
674 | [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw" | ||
675 | }, | ||
676 | { -- Conditional branch, immediate. | ||
677 | shift = 24, mask = 3, | ||
678 | [0] = { | ||
679 | shift = 4, mask = 1, | ||
680 | [0] = { | ||
681 | shift = 0, mask = 15, | ||
682 | [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB", | ||
683 | "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB" | ||
684 | } | ||
685 | } | ||
686 | }, false, "blB", | ||
687 | { -- Compare & branch, immediate. | ||
688 | shift = 24, mask = 3, | ||
689 | [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx" | ||
690 | }, | ||
691 | { | ||
692 | shift = 24, mask = 3, | ||
693 | [0] = { -- Exception generation. | ||
694 | shift = 0, mask = 0xe0001f, | ||
695 | [0x200000] = "brkW" | ||
696 | }, | ||
697 | { -- System instructions. | ||
698 | shift = 0, mask = 0x3fffff, | ||
699 | [0x03201f] = "nop" | ||
700 | }, | ||
701 | { -- Unconditional branch, register. | ||
702 | shift = 0, mask = 0xfffc1f, | ||
703 | [0x1f0000] = "brNx", [0x3f0000] = "blrNx", | ||
704 | [0x5f0000] = "retNx" | ||
705 | }, | ||
706 | } | ||
707 | } | ||
708 | |||
709 | local map_init = { | ||
710 | shift = 25, mask = 15, | ||
711 | [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp, | ||
712 | map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp | ||
713 | } | ||
714 | |||
715 | ------------------------------------------------------------------------------ | ||
716 | |||
717 | local map_regs = { x = {}, w = {}, d = {}, s = {} } | ||
718 | |||
719 | for i=0,30 do | ||
720 | map_regs.x[i] = "x"..i | ||
721 | map_regs.w[i] = "w"..i | ||
722 | map_regs.d[i] = "d"..i | ||
723 | map_regs.s[i] = "s"..i | ||
724 | end | ||
725 | map_regs.x[31] = "sp" | ||
726 | map_regs.w[31] = "wsp" | ||
727 | map_regs.d[31] = "d31" | ||
728 | map_regs.s[31] = "s31" | ||
729 | |||
730 | local map_cond = { | ||
731 | [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", | ||
732 | "hi", "ls", "ge", "lt", "gt", "le", "al", | ||
733 | } | ||
734 | |||
735 | local map_shift = { [0] = "lsl", "lsr", "asr", } | ||
736 | |||
737 | local map_extend = { | ||
738 | [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", | ||
739 | } | ||
740 | |||
741 | ------------------------------------------------------------------------------ | ||
742 | |||
743 | -- Output a nicely formatted line with an opcode and operands. | ||
744 | local function putop(ctx, text, operands) | ||
745 | local pos = ctx.pos | ||
746 | local extra = "" | ||
747 | if ctx.rel then | ||
748 | local sym = ctx.symtab[ctx.rel] | ||
749 | if sym then | ||
750 | extra = "\t->"..sym | ||
751 | end | ||
752 | end | ||
753 | if ctx.hexdump > 0 then | ||
754 | ctx.out(format("%08x %s %-5s %s%s\n", | ||
755 | ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) | ||
756 | else | ||
757 | ctx.out(format("%08x %-5s %s%s\n", | ||
758 | ctx.addr+pos, text, concat(operands, ", "), extra)) | ||
759 | end | ||
760 | ctx.pos = pos + 4 | ||
761 | end | ||
762 | |||
763 | -- Fallback for unknown opcodes. | ||
764 | local function unknown(ctx) | ||
765 | return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) | ||
766 | end | ||
767 | |||
768 | local function match_reg(p, pat, regnum) | ||
769 | return map_regs[match(pat, p.."%w-([xwds])")][regnum] | ||
770 | end | ||
771 | |||
772 | local function fmt_hex32(x) | ||
773 | if x < 0 then | ||
774 | return tohex(x) | ||
775 | else | ||
776 | return format("%x", x) | ||
777 | end | ||
778 | end | ||
779 | |||
780 | local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 } | ||
781 | |||
782 | local function decode_imm13(op) | ||
783 | local imms = band(rshift(op, 10), 63) | ||
784 | local immr = band(rshift(op, 16), 63) | ||
785 | if band(op, 0x00400000) == 0 then | ||
786 | local len = 5 | ||
787 | if imms >= 56 then | ||
788 | if imms >= 60 then len = 1 else len = 2 end | ||
789 | elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end | ||
790 | local l = lshift(1, len)-1 | ||
791 | local s = band(imms, l) | ||
792 | local r = band(immr, l) | ||
793 | local imm = ror(rshift(-1, 31-s), r) | ||
794 | if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end | ||
795 | imm = imm * imm13_rep[len] | ||
796 | local ix = fmt_hex32(imm) | ||
797 | if rshift(op, 31) ~= 0 then | ||
798 | return ix..tohex(imm) | ||
799 | else | ||
800 | return ix | ||
801 | end | ||
802 | else | ||
803 | local lo, hi = -1, 0 | ||
804 | if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end | ||
805 | if immr ~= 0 then | ||
806 | lo, hi = ror(lo, immr), ror(hi, immr) | ||
807 | local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr)) | ||
808 | lo, hi = bxor(lo, x), bxor(hi, x) | ||
809 | if immr >= 32 then lo, hi = hi, lo end | ||
810 | end | ||
811 | if hi ~= 0 then | ||
812 | return fmt_hex32(hi)..tohex(lo) | ||
813 | else | ||
814 | return fmt_hex32(lo) | ||
815 | end | ||
816 | end | ||
817 | end | ||
818 | |||
819 | local function parse_immpc(op, name) | ||
820 | if name == "b" or name == "bl" then | ||
821 | return arshift(lshift(op, 6), 4) | ||
822 | elseif name == "adr" or name == "adrp" then | ||
823 | local immlo = band(rshift(op, 29), 3) | ||
824 | local immhi = lshift(arshift(lshift(op, 8), 13), 2) | ||
825 | return bor(immhi, immlo) | ||
826 | elseif name == "tbz" or name == "tbnz" then | ||
827 | return lshift(arshift(lshift(op, 13), 18), 2) | ||
828 | else | ||
829 | return lshift(arshift(lshift(op, 8), 13), 2) | ||
830 | end | ||
831 | end | ||
832 | |||
833 | local function parse_fpimm8(op) | ||
834 | local sign = band(op, 0x100000) == 0 and 1 or -1 | ||
835 | local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131 | ||
836 | local frac = 16+band(rshift(op, 13), 15) | ||
837 | return sign * frac * 2^exp | ||
838 | end | ||
839 | |||
840 | local function prefer_bfx(sf, uns, imms, immr) | ||
841 | if imms < immr or imms == 31 or imms == 63 then | ||
842 | return false | ||
843 | end | ||
844 | if immr == 0 then | ||
845 | if sf == 0 and (imms == 7 or imms == 15) then | ||
846 | return false | ||
847 | end | ||
848 | if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then | ||
849 | return false | ||
850 | end | ||
851 | end | ||
852 | return true | ||
853 | end | ||
854 | |||
855 | -- Disassemble a single instruction. | ||
856 | local function disass_ins(ctx) | ||
857 | local pos = ctx.pos | ||
858 | local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) | ||
859 | local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) | ||
860 | local operands = {} | ||
861 | local suffix = "" | ||
862 | local last, name, pat | ||
863 | local vr | ||
864 | local map_reg | ||
865 | ctx.op = op | ||
866 | ctx.rel = nil | ||
867 | last = nil | ||
868 | local opat | ||
869 | opat = map_init[band(rshift(op, 25), 15)] | ||
870 | while type(opat) ~= "string" do | ||
871 | if not opat then return unknown(ctx) end | ||
872 | opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ | ||
873 | end | ||
874 | name, pat = match(opat, "^([a-z0-9]*)(.*)") | ||
875 | local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") | ||
876 | if altname then pat = pat2 end | ||
877 | if sub(pat, 1, 1) == "." then | ||
878 | local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)") | ||
879 | suffix = suffix..s2 | ||
880 | pat = p2 | ||
881 | end | ||
882 | |||
883 | local rt = match(pat, "[gf]") | ||
884 | if rt then | ||
885 | if rt == "g" then | ||
886 | map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w | ||
887 | else | ||
888 | map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s | ||
889 | end | ||
890 | end | ||
891 | |||
892 | local second0, immr | ||
893 | |||
894 | for p in gmatch(pat, ".") do | ||
895 | local x = nil | ||
896 | if p == "D" then | ||
897 | local regnum = band(op, 31) | ||
898 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
899 | elseif p == "N" then | ||
900 | local regnum = band(rshift(op, 5), 31) | ||
901 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
902 | elseif p == "M" then | ||
903 | local regnum = band(rshift(op, 16), 31) | ||
904 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
905 | elseif p == "A" then | ||
906 | local regnum = band(rshift(op, 10), 31) | ||
907 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
908 | elseif p == "B" then | ||
909 | local addr = ctx.addr + pos + parse_immpc(op, name) | ||
910 | ctx.rel = addr | ||
911 | x = "0x"..tohex(addr) | ||
912 | elseif p == "T" then | ||
913 | x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31)) | ||
914 | elseif p == "V" then | ||
915 | x = band(op, 15) | ||
916 | elseif p == "C" then | ||
917 | x = map_cond[band(rshift(op, 12), 15)] | ||
918 | elseif p == "c" then | ||
919 | local rn = band(rshift(op, 5), 31) | ||
920 | local rm = band(rshift(op, 16), 31) | ||
921 | local cond = band(rshift(op, 12), 15) | ||
922 | local invc = bxor(cond, 1) | ||
923 | x = map_cond[cond] | ||
924 | if altname and cond ~= 14 and cond ~= 15 then | ||
925 | local a1, a2 = match(altname, "([^|]*)|(.*)") | ||
926 | if rn == rm then | ||
927 | local n = #operands | ||
928 | operands[n] = nil | ||
929 | x = map_cond[invc] | ||
930 | if rn ~= 31 then | ||
931 | if a1 then name = a1 else name = altname end | ||
932 | else | ||
933 | operands[n-1] = nil | ||
934 | name = a2 | ||
935 | end | ||
936 | end | ||
937 | end | ||
938 | elseif p == "W" then | ||
939 | x = band(rshift(op, 5), 0xffff) | ||
940 | elseif p == "Y" then | ||
941 | x = band(rshift(op, 5), 0xffff) | ||
942 | local hw = band(rshift(op, 21), 3) | ||
943 | if altname and (hw == 0 or x ~= 0) then | ||
944 | name = altname | ||
945 | end | ||
946 | elseif p == "L" then | ||
947 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
948 | local imm9 = arshift(lshift(op, 11), 23) | ||
949 | if band(op, 0x800) ~= 0 then | ||
950 | x = "["..rn..", #"..imm9.."]!" | ||
951 | else | ||
952 | x = "["..rn.."], #"..imm9 | ||
953 | end | ||
954 | elseif p == "U" then | ||
955 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
956 | local sz = band(rshift(op, 30), 3) | ||
957 | local imm12 = lshift(arshift(lshift(op, 10), 20), sz) | ||
958 | if imm12 ~= 0 then | ||
959 | x = "["..rn..", #"..imm12.."]" | ||
960 | else | ||
961 | x = "["..rn.."]" | ||
962 | end | ||
963 | elseif p == "K" then | ||
964 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
965 | local imm9 = arshift(lshift(op, 11), 23) | ||
966 | if imm9 ~= 0 then | ||
967 | x = "["..rn..", #"..imm9.."]" | ||
968 | else | ||
969 | x = "["..rn.."]" | ||
970 | end | ||
971 | elseif p == "O" then | ||
972 | local rn, rm = map_regs.x[band(rshift(op, 5), 31)] | ||
973 | local m = band(rshift(op, 13), 1) | ||
974 | if m == 0 then | ||
975 | rm = map_regs.w[band(rshift(op, 16), 31)] | ||
976 | else | ||
977 | rm = map_regs.x[band(rshift(op, 16), 31)] | ||
978 | end | ||
979 | x = "["..rn..", "..rm | ||
980 | local opt = band(rshift(op, 13), 7) | ||
981 | local s = band(rshift(op, 12), 1) | ||
982 | local sz = band(rshift(op, 30), 3) | ||
983 | -- extension to be applied | ||
984 | if opt == 3 then | ||
985 | if s == 0 then x = nil | ||
986 | else x = x..", lsl #"..sz.."]" end | ||
987 | elseif opt == 2 or opt == 6 or opt == 7 then | ||
988 | if s == 0 then x = x..", "..map_extend[opt].."]" | ||
989 | else x = x..", "..map_extend[opt].." #"..sz.."]" end | ||
990 | else | ||
991 | x = x.."]" | ||
992 | end | ||
993 | elseif p == "P" then | ||
994 | local opcv, sh = rshift(op, 26), 2 | ||
995 | if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end | ||
996 | local imm7 = lshift(arshift(lshift(op, 10), 25), sh) | ||
997 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
998 | local ind = band(rshift(op, 23), 3) | ||
999 | if ind == 1 then | ||
1000 | x = "["..rn.."], #"..imm7 | ||
1001 | elseif ind == 2 then | ||
1002 | if imm7 == 0 then | ||
1003 | x = "["..rn.."]" | ||
1004 | else | ||
1005 | x = "["..rn..", #"..imm7.."]" | ||
1006 | end | ||
1007 | elseif ind == 3 then | ||
1008 | x = "["..rn..", #"..imm7.."]!" | ||
1009 | end | ||
1010 | elseif p == "I" then | ||
1011 | local shf = band(rshift(op, 22), 3) | ||
1012 | local imm12 = band(rshift(op, 10), 0x0fff) | ||
1013 | local n = #operands | ||
1014 | local rn, rd = band(rshift(op, 5), 31), band(op, 31) | ||
1015 | if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then | ||
1016 | name = altname | ||
1017 | x = nil | ||
1018 | elseif shf == 0 then | ||
1019 | x = imm12 | ||
1020 | elseif shf == 1 then | ||
1021 | x = imm12..", lsl #12" | ||
1022 | end | ||
1023 | elseif p == "i" then | ||
1024 | x = "#0x"..decode_imm13(op) | ||
1025 | elseif p == "1" then | ||
1026 | immr = band(rshift(op, 16), 63) | ||
1027 | x = immr | ||
1028 | elseif p == "2" then | ||
1029 | x = band(rshift(op, 10), 63) | ||
1030 | if altname then | ||
1031 | local a1, a2, a3, a4, a5, a6 = | ||
1032 | match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)") | ||
1033 | local sf = band(rshift(op, 26), 32) | ||
1034 | local uns = band(rshift(op, 30), 1) | ||
1035 | if prefer_bfx(sf, uns, x, immr) then | ||
1036 | name = a2 | ||
1037 | x = x - immr + 1 | ||
1038 | elseif immr == 0 and x == 7 then | ||
1039 | local n = #operands | ||
1040 | operands[n] = nil | ||
1041 | if sf ~= 0 then | ||
1042 | operands[n-1] = gsub(operands[n-1], "x", "w") | ||
1043 | end | ||
1044 | last = operands[n-1] | ||
1045 | name = a6 | ||
1046 | x = nil | ||
1047 | elseif immr == 0 and x == 15 then | ||
1048 | local n = #operands | ||
1049 | operands[n] = nil | ||
1050 | if sf ~= 0 then | ||
1051 | operands[n-1] = gsub(operands[n-1], "x", "w") | ||
1052 | end | ||
1053 | last = operands[n-1] | ||
1054 | name = a5 | ||
1055 | x = nil | ||
1056 | elseif x == 31 or x == 63 then | ||
1057 | if x == 31 and immr == 0 and name == "sbfm" then | ||
1058 | name = a4 | ||
1059 | local n = #operands | ||
1060 | operands[n] = nil | ||
1061 | if sf ~= 0 then | ||
1062 | operands[n-1] = gsub(operands[n-1], "x", "w") | ||
1063 | end | ||
1064 | last = operands[n-1] | ||
1065 | else | ||
1066 | name = a3 | ||
1067 | end | ||
1068 | x = nil | ||
1069 | elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then | ||
1070 | name = a4 | ||
1071 | last = "#"..(sf+32 - immr) | ||
1072 | operands[#operands] = last | ||
1073 | x = nil | ||
1074 | elseif x < immr then | ||
1075 | name = a1 | ||
1076 | last = "#"..(sf+32 - immr) | ||
1077 | operands[#operands] = last | ||
1078 | x = x + 1 | ||
1079 | end | ||
1080 | end | ||
1081 | elseif p == "3" then | ||
1082 | x = band(rshift(op, 10), 63) | ||
1083 | if altname then | ||
1084 | local a1, a2 = match(altname, "([^|]*)|(.*)") | ||
1085 | if x < immr then | ||
1086 | name = a1 | ||
1087 | local sf = band(rshift(op, 26), 32) | ||
1088 | last = "#"..(sf+32 - immr) | ||
1089 | operands[#operands] = last | ||
1090 | x = x + 1 | ||
1091 | elseif x >= immr then | ||
1092 | name = a2 | ||
1093 | x = x - immr + 1 | ||
1094 | end | ||
1095 | end | ||
1096 | elseif p == "4" then | ||
1097 | x = band(rshift(op, 10), 63) | ||
1098 | local rn = band(rshift(op, 5), 31) | ||
1099 | local rm = band(rshift(op, 16), 31) | ||
1100 | if altname and rn == rm then | ||
1101 | local n = #operands | ||
1102 | operands[n] = nil | ||
1103 | last = operands[n-1] | ||
1104 | name = altname | ||
1105 | end | ||
1106 | elseif p == "5" then | ||
1107 | x = band(rshift(op, 16), 31) | ||
1108 | elseif p == "S" then | ||
1109 | x = band(rshift(op, 10), 63) | ||
1110 | if x == 0 then x = nil | ||
1111 | else x = map_shift[band(rshift(op, 22), 3)].." #"..x end | ||
1112 | elseif p == "X" then | ||
1113 | local opt = band(rshift(op, 13), 7) | ||
1114 | -- Width specifier <R>. | ||
1115 | if opt ~= 3 and opt ~= 7 then | ||
1116 | last = map_regs.w[band(rshift(op, 16), 31)] | ||
1117 | operands[#operands] = last | ||
1118 | end | ||
1119 | x = band(rshift(op, 10), 7) | ||
1120 | -- Extension. | ||
1121 | if opt == 2 + band(rshift(op, 31), 1) and | ||
1122 | band(rshift(op, second0 and 5 or 0), 31) == 31 then | ||
1123 | if x == 0 then x = nil | ||
1124 | else x = "lsl #"..x end | ||
1125 | else | ||
1126 | if x == 0 then x = map_extend[band(rshift(op, 13), 7)] | ||
1127 | else x = map_extend[band(rshift(op, 13), 7)].." #"..x end | ||
1128 | end | ||
1129 | elseif p == "R" then | ||
1130 | x = band(rshift(op,21), 3) | ||
1131 | if x == 0 then x = nil | ||
1132 | else x = "lsl #"..x*16 end | ||
1133 | elseif p == "z" then | ||
1134 | local n = #operands | ||
1135 | if operands[n] == "sp" then operands[n] = "xzr" | ||
1136 | elseif operands[n] == "wsp" then operands[n] = "wzr" | ||
1137 | end | ||
1138 | elseif p == "Z" then | ||
1139 | x = 0 | ||
1140 | elseif p == "F" then | ||
1141 | x = parse_fpimm8(op) | ||
1142 | elseif p == "g" or p == "f" or p == "x" or p == "w" or | ||
1143 | p == "d" or p == "s" then | ||
1144 | -- These are handled in D/N/M/A. | ||
1145 | elseif p == "0" then | ||
1146 | if last == "sp" or last == "wsp" then | ||
1147 | local n = #operands | ||
1148 | operands[n] = nil | ||
1149 | last = operands[n-1] | ||
1150 | if altname then | ||
1151 | local a1, a2 = match(altname, "([^|]*)|(.*)") | ||
1152 | if not a1 then | ||
1153 | name = altname | ||
1154 | elseif second0 then | ||
1155 | name, altname = a2, a1 | ||
1156 | else | ||
1157 | name, altname = a1, a2 | ||
1158 | end | ||
1159 | end | ||
1160 | end | ||
1161 | second0 = true | ||
1162 | else | ||
1163 | assert(false) | ||
1164 | end | ||
1165 | if x then | ||
1166 | last = x | ||
1167 | if type(x) == "number" then x = "#"..x end | ||
1168 | operands[#operands+1] = x | ||
1169 | end | ||
1170 | end | ||
1171 | |||
1172 | return putop(ctx, name..suffix, operands) | ||
1173 | end | ||
1174 | |||
1175 | ------------------------------------------------------------------------------ | ||
1176 | |||
1177 | -- Disassemble a block of code. | ||
1178 | local function disass_block(ctx, ofs, len) | ||
1179 | if not ofs then ofs = 0 end | ||
1180 | local stop = len and ofs+len or #ctx.code | ||
1181 | ctx.pos = ofs | ||
1182 | ctx.rel = nil | ||
1183 | while ctx.pos < stop do disass_ins(ctx) end | ||
1184 | end | ||
1185 | |||
1186 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | ||
1187 | local function create(code, addr, out) | ||
1188 | local ctx = {} | ||
1189 | ctx.code = code | ||
1190 | ctx.addr = addr or 0 | ||
1191 | ctx.out = out or io.write | ||
1192 | ctx.symtab = {} | ||
1193 | ctx.disass = disass_block | ||
1194 | ctx.hexdump = 8 | ||
1195 | return ctx | ||
1196 | end | ||
1197 | |||
1198 | -- Simple API: disassemble code (a string) at address and output via out. | ||
1199 | local function disass(code, addr, out) | ||
1200 | create(code, addr, out):disass() | ||
1201 | end | ||
1202 | |||
1203 | -- Return register name for RID. | ||
1204 | local function regname(r) | ||
1205 | if r < 32 then return map_regs.x[r] end | ||
1206 | return map_regs.d[r-32] | ||
1207 | end | ||
1208 | |||
1209 | -- Public module functions. | ||
1210 | return { | ||
1211 | create = create, | ||
1212 | disass = disass, | ||
1213 | regname = regname | ||
1214 | } | ||
1215 | |||
diff --git a/src/lj_arch.h b/src/lj_arch.h index cc5a0a66..3df602e3 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
@@ -226,7 +226,6 @@ | |||
226 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ | 226 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ |
227 | #define LJ_TARGET_GC64 1 | 227 | #define LJ_TARGET_GC64 1 |
228 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL | 228 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL |
229 | #define LJ_ARCH_NOJIT 1 /* NYI */ | ||
230 | 229 | ||
231 | #define LJ_ARCH_VERSION 80 | 230 | #define LJ_ARCH_VERSION 80 |
232 | 231 | ||
diff --git a/src/lj_asm.c b/src/lj_asm.c index 7ce58924..2cb5abea 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -171,6 +171,8 @@ IRFLDEF(FLOFS) | |||
171 | #include "lj_emit_x86.h" | 171 | #include "lj_emit_x86.h" |
172 | #elif LJ_TARGET_ARM | 172 | #elif LJ_TARGET_ARM |
173 | #include "lj_emit_arm.h" | 173 | #include "lj_emit_arm.h" |
174 | #elif LJ_TARGET_ARM64 | ||
175 | #include "lj_emit_arm64.h" | ||
174 | #elif LJ_TARGET_PPC | 176 | #elif LJ_TARGET_PPC |
175 | #include "lj_emit_ppc.h" | 177 | #include "lj_emit_ppc.h" |
176 | #elif LJ_TARGET_MIPS | 178 | #elif LJ_TARGET_MIPS |
@@ -1563,6 +1565,8 @@ static void asm_loop(ASMState *as) | |||
1563 | #include "lj_asm_x86.h" | 1565 | #include "lj_asm_x86.h" |
1564 | #elif LJ_TARGET_ARM | 1566 | #elif LJ_TARGET_ARM |
1565 | #include "lj_asm_arm.h" | 1567 | #include "lj_asm_arm.h" |
1568 | #elif LJ_TARGET_ARM64 | ||
1569 | #include "lj_asm_arm64.h" | ||
1566 | #elif LJ_TARGET_PPC | 1570 | #elif LJ_TARGET_PPC |
1567 | #include "lj_asm_ppc.h" | 1571 | #include "lj_asm_ppc.h" |
1568 | #elif LJ_TARGET_MIPS | 1572 | #elif LJ_TARGET_MIPS |
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h new file mode 100644 index 00000000..0a2f5306 --- /dev/null +++ b/src/lj_asm_arm64.h | |||
@@ -0,0 +1,1823 @@ | |||
1 | /* | ||
2 | ** ARM64 IR assembler (SSA IR -> machine code). | ||
3 | ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
6 | ** Sponsored by Cisco Systems, Inc. | ||
7 | */ | ||
8 | |||
9 | /* -- Register allocator extensions --------------------------------------- */ | ||
10 | |||
11 | /* Allocate a register with a hint. */ | ||
12 | static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) | ||
13 | { | ||
14 | Reg r = IR(ref)->r; | ||
15 | if (ra_noreg(r)) { | ||
16 | if (!ra_hashint(r) && !iscrossref(as, ref)) | ||
17 | ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ | ||
18 | r = ra_allocref(as, ref, allow); | ||
19 | } | ||
20 | ra_noweak(as, r); | ||
21 | return r; | ||
22 | } | ||
23 | |||
24 | /* Allocate two source registers for three-operand instructions. */ | ||
25 | static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) | ||
26 | { | ||
27 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); | ||
28 | Reg left = irl->r, right = irr->r; | ||
29 | if (ra_hasreg(left)) { | ||
30 | ra_noweak(as, left); | ||
31 | if (ra_noreg(right)) | ||
32 | right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); | ||
33 | else | ||
34 | ra_noweak(as, right); | ||
35 | } else if (ra_hasreg(right)) { | ||
36 | ra_noweak(as, right); | ||
37 | left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); | ||
38 | } else if (ra_hashint(right)) { | ||
39 | right = ra_allocref(as, ir->op2, allow); | ||
40 | left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); | ||
41 | } else { | ||
42 | left = ra_allocref(as, ir->op1, allow); | ||
43 | right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); | ||
44 | } | ||
45 | return left | (right << 8); | ||
46 | } | ||
47 | |||
48 | /* -- Guard handling ------------------------------------------------------ */ | ||
49 | |||
50 | /* Generate an exit stub group at the bottom of the reserved MCode memory. */ | ||
51 | static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) | ||
52 | { | ||
53 | MCode *mxp = as->mcbot; | ||
54 | int i; | ||
55 | if (mxp + 3*4+4*EXITSTUBS_PER_GROUP >= as->mctop) | ||
56 | asm_mclimit(as); | ||
57 | /* str lr, [sp]; bl ->vm_exit_handler; .long group. */ | ||
58 | *mxp++ = A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP); | ||
59 | *mxp = A64I_BL | (((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu); | ||
60 | mxp++; | ||
61 | *mxp++ = group*EXITSTUBS_PER_GROUP; | ||
62 | for (i = 0; i < EXITSTUBS_PER_GROUP; i++) | ||
63 | *mxp++ = A64I_B | ((-3-i)&0x03ffffffu); | ||
64 | lj_mcode_sync(as->mcbot, mxp); | ||
65 | lj_mcode_commitbot(as->J, mxp); | ||
66 | as->mcbot = mxp; | ||
67 | as->mclim = as->mcbot + MCLIM_REDZONE; | ||
68 | return mxp - EXITSTUBS_PER_GROUP; | ||
69 | } | ||
70 | |||
71 | /* Setup all needed exit stubs. */ | ||
72 | static void asm_exitstub_setup(ASMState *as, ExitNo nexits) | ||
73 | { | ||
74 | ExitNo i; | ||
75 | if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) | ||
76 | lj_trace_err(as->J, LJ_TRERR_SNAPOV); | ||
77 | for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) | ||
78 | if (as->J->exitstubgroup[i] == NULL) | ||
79 | as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); | ||
80 | } | ||
81 | |||
82 | /* Emit conditional branch to exit for guard. */ | ||
83 | static void asm_guardcc(ASMState *as, A64CC cc) | ||
84 | { | ||
85 | MCode *target = exitstub_addr(as->J, as->snapno); | ||
86 | MCode *p = as->mcp; | ||
87 | if (LJ_UNLIKELY(p == as->invmcp)) { | ||
88 | as->loopinv = 1; | ||
89 | *p = A64I_BL | ((target-p) & 0x03ffffffu); | ||
90 | emit_cond_branch(as, cc^1, p-1); | ||
91 | return; | ||
92 | } | ||
93 | /* No conditional calls. Emit b.cc/bl instead. */ | ||
94 | /* That's a bad idea. NYI: emit per-trace exit stubs instead, see PPC. */ | ||
95 | emit_branch(as, A64I_BL, target); | ||
96 | emit_cond_branch(as, cc^1, p); | ||
97 | } | ||
98 | |||
99 | /* -- Operand fusion ------------------------------------------------------ */ | ||
100 | |||
101 | /* Limit linear search to this distance. Avoids O(n^2) behavior. */ | ||
102 | #define CONFLICT_SEARCH_LIM 31 | ||
103 | |||
104 | static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | ||
105 | { | ||
106 | if (irref_isk(ref)) { | ||
107 | IRIns *ir = IR(ref); | ||
108 | if (ir->o == IR_KNULL || !irt_is64(ir->t)) { | ||
109 | *k = ir->i; | ||
110 | return 1; | ||
111 | } else if (checki32((int64_t)ir_k64(ir)->u64)) { | ||
112 | *k = (int32_t)ir_k64(ir)->u64; | ||
113 | return 1; | ||
114 | } | ||
115 | } | ||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | /* Check if there's no conflicting instruction between curins and ref. */ | ||
120 | static int noconflict(ASMState *as, IRRef ref, IROp conflict) | ||
121 | { | ||
122 | IRIns *ir = as->ir; | ||
123 | IRRef i = as->curins; | ||
124 | if (i > ref + CONFLICT_SEARCH_LIM) | ||
125 | return 0; /* Give up, ref is too far away. */ | ||
126 | while (--i > ref) | ||
127 | if (ir[i].o == conflict) | ||
128 | return 0; /* Conflict found. */ | ||
129 | return 1; /* Ok, no conflict. */ | ||
130 | } | ||
131 | |||
132 | /* Fuse the array base of colocated arrays. */ | ||
133 | static int32_t asm_fuseabase(ASMState *as, IRRef ref) | ||
134 | { | ||
135 | IRIns *ir = IR(ref); | ||
136 | if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && | ||
137 | !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) | ||
138 | return (int32_t)sizeof(GCtab); | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | #define FUSE_REG 0x40000000 | ||
143 | |||
144 | /* Fuse array/hash/upvalue reference into register+offset operand. */ | ||
145 | static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, | ||
146 | A64Ins ins) | ||
147 | { | ||
148 | IRIns *ir = IR(ref); | ||
149 | if (ra_noreg(ir->r)) { | ||
150 | if (ir->o == IR_AREF) { | ||
151 | if (mayfuse(as, ref)) { | ||
152 | if (irref_isk(ir->op2)) { | ||
153 | IRRef tab = IR(ir->op1)->op1; | ||
154 | int32_t ofs = asm_fuseabase(as, tab); | ||
155 | IRRef refa = ofs ? tab : ir->op1; | ||
156 | ofs += 8*IR(ir->op2)->i; | ||
157 | if (emit_checkofs(ins, ofs)) { | ||
158 | *ofsp = ofs; | ||
159 | return ra_alloc1(as, refa, allow); | ||
160 | } | ||
161 | } else { | ||
162 | Reg base = ra_alloc1(as, ir->op1, allow); | ||
163 | *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); | ||
164 | return base; | ||
165 | } | ||
166 | } | ||
167 | } else if (ir->o == IR_HREFK) { | ||
168 | if (mayfuse(as, ref)) { | ||
169 | int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); | ||
170 | if (emit_checkofs(ins, ofs)) { | ||
171 | *ofsp = ofs; | ||
172 | return ra_alloc1(as, ir->op1, allow); | ||
173 | } | ||
174 | } | ||
175 | } else if (ir->o == IR_UREFC) { | ||
176 | if (irref_isk(ir->op1)) { | ||
177 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
178 | GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; | ||
179 | int64_t ofs = glofs(as, &uv->tv); | ||
180 | if (emit_checkofs(ins, ofs)) { | ||
181 | *ofsp = (int32_t)ofs; | ||
182 | return RID_GL; | ||
183 | } | ||
184 | } | ||
185 | } | ||
186 | } | ||
187 | *ofsp = 0; | ||
188 | return ra_alloc1(as, ref, allow); | ||
189 | } | ||
190 | |||
191 | /* Fuse m operand into arithmetic/logic instructions. */ | ||
192 | static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) | ||
193 | { | ||
194 | IRIns *ir = IR(ref); | ||
195 | if (ra_hasreg(ir->r)) { | ||
196 | ra_noweak(as, ir->r); | ||
197 | return A64F_M(ir->r); | ||
198 | } else if (irref_isk(ref)) { | ||
199 | uint32_t m; | ||
200 | int64_t k = get_k64val(ir); | ||
201 | if ((ai & 0x1f000000) == 0x0a000000) | ||
202 | m = emit_isk13(k, irt_is64(ir->t)); | ||
203 | else | ||
204 | m = emit_isk12(k); | ||
205 | if (m) | ||
206 | return m; | ||
207 | } else if (mayfuse(as, ref)) { | ||
208 | if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) || | ||
209 | (ir->o == IR_ADD && ir->op1 == ir->op2)) { | ||
210 | A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR : | ||
211 | ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL; | ||
212 | int shift = ir->o == IR_ADD ? 1 : | ||
213 | (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); | ||
214 | IRIns *irl = IR(ir->op1); | ||
215 | if (sh == A64SH_LSL && | ||
216 | irl->o == IR_CONV && | ||
217 | irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && | ||
218 | shift <= 4 && | ||
219 | mayfuse(as, ir->op1)) { | ||
220 | Reg m = ra_alloc1(as, irl->op1, allow); | ||
221 | return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); | ||
222 | } else { | ||
223 | Reg m = ra_alloc1(as, ir->op1, allow); | ||
224 | return A64F_M(m) | A64F_SH(sh, shift); | ||
225 | } | ||
226 | } else if (ir->o == IR_CONV && | ||
227 | ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) { | ||
228 | Reg m = ra_alloc1(as, ir->op1, allow); | ||
229 | return A64F_M(m) | A64F_EX(A64EX_SXTW); | ||
230 | } | ||
231 | } | ||
232 | return A64F_M(ra_allocref(as, ref, allow)); | ||
233 | } | ||
234 | |||
235 | /* Fuse XLOAD/XSTORE reference into load/store operand. */ | ||
236 | static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, | ||
237 | RegSet allow) | ||
238 | { | ||
239 | IRIns *ir = IR(ref); | ||
240 | Reg base; | ||
241 | int32_t ofs = 0; | ||
242 | if (ra_noreg(ir->r) && canfuse(as, ir)) { | ||
243 | if (ir->o == IR_ADD) { | ||
244 | if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) | ||
245 | ref = ir->op1; | ||
246 | /* NYI: Fuse add with two registers. */ | ||
247 | } else if (ir->o == IR_STRREF) { | ||
248 | if (asm_isk32(as, ir->op2, &ofs)) { | ||
249 | ref = ir->op1; | ||
250 | } else if (asm_isk32(as, ir->op1, &ofs)) { | ||
251 | ref = ir->op2; | ||
252 | } else { | ||
253 | /* NYI: Fuse ADD with constant. */ | ||
254 | Reg rn = ra_alloc1(as, ir->op1, allow); | ||
255 | uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); | ||
256 | emit_lso(as, ai, rd, rd, sizeof(GCstr)); | ||
257 | emit_dn(as, A64I_ADDx^m, rd, rn); | ||
258 | return; | ||
259 | } | ||
260 | ofs += sizeof(GCstr); | ||
261 | if (!emit_checkofs(ai, ofs)) { | ||
262 | Reg rn = ra_alloc1(as, ref, allow); | ||
263 | Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); | ||
264 | emit_dnm(as, (ai ^ 0x01204800), rd, rn, rm); | ||
265 | return; | ||
266 | } | ||
267 | } | ||
268 | } | ||
269 | base = ra_alloc1(as, ref, allow); | ||
270 | emit_lso(as, ai, (rd & 31), base, ofs); | ||
271 | } | ||
272 | |||
273 | /* -- Calls --------------------------------------------------------------- */ | ||
274 | |||
275 | /* Generate a call to a C function. */ | ||
276 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | ||
277 | { | ||
278 | uint32_t n, nargs = CCI_XNARGS(ci); | ||
279 | int32_t ofs = 0; | ||
280 | Reg gpr, fpr = REGARG_FIRSTFPR; | ||
281 | if ((void *)ci->func) | ||
282 | emit_call(as, (void *)ci->func); | ||
283 | for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) | ||
284 | as->cost[gpr] = REGCOST(~0u, ASMREF_L); | ||
285 | gpr = REGARG_FIRSTGPR; | ||
286 | for (n = 0; n < nargs; n++) { /* Setup args. */ | ||
287 | IRRef ref = args[n]; | ||
288 | IRIns *ir = IR(ref); | ||
289 | if (ref) { | ||
290 | if (irt_isfp(ir->t)) { | ||
291 | if (fpr <= REGARG_LASTFPR) { | ||
292 | lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */ | ||
293 | ra_leftov(as, fpr, ref); | ||
294 | fpr++; | ||
295 | } else { | ||
296 | Reg r = ra_alloc1(as, ref, RSET_FPR); | ||
297 | emit_spstore(as, ir, r, ofs); | ||
298 | ofs += 8; | ||
299 | } | ||
300 | } else { | ||
301 | if (gpr <= REGARG_LASTGPR) { | ||
302 | lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ | ||
303 | ra_leftov(as, gpr, ref); | ||
304 | gpr++; | ||
305 | } else { | ||
306 | Reg r = ra_alloc1(as, ref, RSET_GPR); | ||
307 | emit_spstore(as, ir, r, ofs); | ||
308 | ofs += 8; | ||
309 | } | ||
310 | } | ||
311 | } | ||
312 | } | ||
313 | } | ||
314 | |||
315 | /* Setup result reg/sp for call. Evict scratch regs. */ | ||
316 | static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | ||
317 | { | ||
318 | RegSet drop = RSET_SCRATCH; | ||
319 | if (ra_hasreg(ir->r)) | ||
320 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
321 | ra_evictset(as, drop); /* Evictions must be performed first. */ | ||
322 | if (ra_used(ir)) { | ||
323 | lua_assert(!irt_ispri(ir->t)); | ||
324 | if (irt_isfp(ir->t)) { | ||
325 | if (ci->flags & CCI_CASTU64) { | ||
326 | Reg dest = ra_dest(as, ir, RSET_FPR) & 31; | ||
327 | emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R, | ||
328 | dest, RID_RET); | ||
329 | } else { | ||
330 | ra_destreg(as, ir, RID_FPRET); | ||
331 | } | ||
332 | } else { | ||
333 | ra_destreg(as, ir, RID_RET); | ||
334 | } | ||
335 | } | ||
336 | UNUSED(ci); | ||
337 | } | ||
338 | |||
339 | static void asm_callx(ASMState *as, IRIns *ir) | ||
340 | { | ||
341 | IRRef args[CCI_NARGS_MAX*2]; | ||
342 | CCallInfo ci; | ||
343 | IRRef func; | ||
344 | IRIns *irf; | ||
345 | ci.flags = asm_callx_flags(as, ir); | ||
346 | asm_collectargs(as, ir, &ci, args); | ||
347 | asm_setupresult(as, ir, &ci); | ||
348 | func = ir->op2; irf = IR(func); | ||
349 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } | ||
350 | if (irref_isk(func)) { /* Call to constant address. */ | ||
351 | ci.func = (ASMFunction)(ir_k64(irf)->u64); | ||
352 | } else { /* Need a non-argument register for indirect calls. */ | ||
353 | Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); | ||
354 | emit_n(as, A64I_BLR, freg); | ||
355 | ci.func = (ASMFunction)(void *)0; | ||
356 | } | ||
357 | asm_gencall(as, &ci, args); | ||
358 | } | ||
359 | |||
360 | /* -- Returns ------------------------------------------------------------- */ | ||
361 | |||
362 | /* Return to lower frame. Guard that it goes to the right spot. */ | ||
363 | static void asm_retf(ASMState *as, IRIns *ir) | ||
364 | { | ||
365 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | ||
366 | void *pc = ir_kptr(IR(ir->op2)); | ||
367 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); | ||
368 | as->topslot -= (BCReg)delta; | ||
369 | if ((int32_t)as->topslot < 0) as->topslot = 0; | ||
370 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | ||
371 | /* Need to force a spill on REF_BASE now to update the stack slot. */ | ||
372 | emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); | ||
373 | emit_setgl(as, base, jit_base); | ||
374 | emit_addptr(as, base, -8*delta); | ||
375 | asm_guardcc(as, CC_NE); | ||
376 | emit_nm(as, A64I_CMPx, RID_TMP, | ||
377 | ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base))); | ||
378 | emit_lso(as, A64I_LDRx, RID_TMP, base, -8); | ||
379 | } | ||
380 | |||
381 | /* -- Type conversions ---------------------------------------------------- */ | ||
382 | |||
383 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | ||
384 | { | ||
385 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | ||
386 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
387 | asm_guardcc(as, CC_NE); | ||
388 | emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31)); | ||
389 | emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest); | ||
390 | emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31)); | ||
391 | } | ||
392 | |||
393 | static void asm_tobit(ASMState *as, IRIns *ir) | ||
394 | { | ||
395 | RegSet allow = RSET_FPR; | ||
396 | Reg left = ra_alloc1(as, ir->op1, allow); | ||
397 | Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); | ||
398 | Reg tmp = ra_scratch(as, rset_clear(allow, right)); | ||
399 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
400 | emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31)); | ||
401 | emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31)); | ||
402 | } | ||
403 | |||
404 | static void asm_conv(ASMState *as, IRIns *ir) | ||
405 | { | ||
406 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | ||
407 | int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); | ||
408 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | ||
409 | IRRef lref = ir->op1; | ||
410 | lua_assert(irt_type(ir->t) != st); | ||
411 | if (irt_isfp(ir->t)) { | ||
412 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
413 | if (stfp) { /* FP to FP conversion. */ | ||
414 | emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32, | ||
415 | (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31)); | ||
416 | } else { /* Integer to FP conversion. */ | ||
417 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
418 | A64Ins ai = irt_isfloat(ir->t) ? | ||
419 | (((IRT_IS64 >> st) & 1) ? | ||
420 | (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) : | ||
421 | (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) : | ||
422 | (((IRT_IS64 >> st) & 1) ? | ||
423 | (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) : | ||
424 | (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32)); | ||
425 | emit_dn(as, ai, (dest & 31), left); | ||
426 | } | ||
427 | } else if (stfp) { /* FP to integer conversion. */ | ||
428 | if (irt_isguard(ir->t)) { | ||
429 | /* Checked conversions are only supported from number to int. */ | ||
430 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | ||
431 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | ||
432 | } else { | ||
433 | Reg left = ra_alloc1(as, lref, RSET_FPR); | ||
434 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
435 | A64Ins ai = irt_is64(ir->t) ? | ||
436 | (st == IRT_NUM ? | ||
437 | (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : | ||
438 | (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : | ||
439 | (st == IRT_NUM ? | ||
440 | (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : | ||
441 | (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); | ||
442 | emit_dn(as, ai, dest, (left & 31)); | ||
443 | } | ||
444 | } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | ||
445 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
446 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
447 | A64Ins ai = st == IRT_I8 ? A64I_SXTBw : | ||
448 | st == IRT_U8 ? A64I_UXTBw : | ||
449 | st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; | ||
450 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); | ||
451 | emit_dn(as, ai, dest, left); | ||
452 | } else { | ||
453 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
454 | if (irt_is64(ir->t)) { | ||
455 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { | ||
456 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ | ||
457 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | ||
458 | } else { /* 32 to 64 bit sign extension. */ | ||
459 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
460 | emit_dn(as, A64I_SXTW, dest, left); | ||
461 | } | ||
462 | } else { | ||
463 | if (st64) { | ||
464 | /* This is either a 32 bit reg/reg mov which zeroes the hiword | ||
465 | ** or a load of the loword from a 64 bit address. | ||
466 | */ | ||
467 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
468 | emit_dm(as, A64I_MOVw, dest, left); | ||
469 | } else { /* 32/32 bit no-op (cast). */ | ||
470 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | ||
471 | } | ||
472 | } | ||
473 | } | ||
474 | } | ||
475 | |||
476 | static void asm_strto(ASMState *as, IRIns *ir) | ||
477 | { | ||
478 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | ||
479 | IRRef args[2]; | ||
480 | Reg dest = 0, tmp; | ||
481 | int destused = ra_used(ir); | ||
482 | int32_t ofs = 0; | ||
483 | ra_evictset(as, RSET_SCRATCH); | ||
484 | if (destused) { | ||
485 | if (ra_hasspill(ir->s)) { | ||
486 | ofs = sps_scale(ir->s); | ||
487 | destused = 0; | ||
488 | if (ra_hasreg(ir->r)) { | ||
489 | ra_free(as, ir->r); | ||
490 | ra_modified(as, ir->r); | ||
491 | emit_spload(as, ir, ir->r, ofs); | ||
492 | } | ||
493 | } else { | ||
494 | dest = ra_dest(as, ir, RSET_FPR); | ||
495 | } | ||
496 | } | ||
497 | asm_guardcc(as, CC_EQ); | ||
498 | if (destused) | ||
499 | emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); | ||
500 | emit_n(as, (A64I_CMPw^A64I_K12)|A64F_U12(0), RID_RET); | ||
501 | args[0] = ir->op1; /* GCstr *str */ | ||
502 | args[1] = ASMREF_TMP1; /* TValue *n */ | ||
503 | asm_gencall(as, ci, args); | ||
504 | tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
505 | emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR); | ||
506 | } | ||
507 | |||
508 | /* -- Memory references --------------------------------------------------- */ | ||
509 | |||
510 | /* Get pointer to TValue. */ | ||
511 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | ||
512 | { | ||
513 | IRIns *ir = IR(ref); | ||
514 | if (irt_isnum(ir->t)) { | ||
515 | if (irref_isk(ref)) { | ||
516 | /* Use the number constant itself as a TValue. */ | ||
517 | ra_allockreg(as, i64ptr(ir_knum(ir)), dest); | ||
518 | } else { | ||
519 | /* Otherwise force a spill and use the spill slot. */ | ||
520 | emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR); | ||
521 | } | ||
522 | } else { | ||
523 | /* Otherwise use g->tmptv to hold the TValue. */ | ||
524 | RegSet allow = rset_exclude(RSET_GPR, dest); | ||
525 | Reg src; | ||
526 | if (irref_isk(ref)) { | ||
527 | TValue k; | ||
528 | lj_ir_kvalue(as->J->L, &k, ir); | ||
529 | src = ra_allock(as, k.u64, allow); | ||
530 | emit_lso(as, A64I_STRx, src, dest, 0); | ||
531 | } else { | ||
532 | Reg type; | ||
533 | if (irt_ispri(ir->t)) { | ||
534 | src = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); | ||
535 | emit_lso(as, A64I_STRx, src, dest, 0); | ||
536 | } else if (irt_isint(ir->t)) { | ||
537 | src = ra_alloc1(as, ref, allow); | ||
538 | type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); | ||
539 | emit_lso(as, A64I_STRx, RID_TMP, dest, 0); | ||
540 | emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src); | ||
541 | } else { | ||
542 | src = ra_alloc1(as, ref, allow); | ||
543 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
544 | emit_lso(as, A64I_STRx, RID_TMP, dest, 0); | ||
545 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type); | ||
546 | } | ||
547 | } | ||
548 | ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest); | ||
549 | } | ||
550 | } | ||
551 | |||
552 | static void asm_aref(ASMState *as, IRIns *ir) | ||
553 | { | ||
554 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
555 | Reg idx, base; | ||
556 | if (irref_isk(ir->op2)) { | ||
557 | IRRef tab = IR(ir->op1)->op1; | ||
558 | int32_t ofs = asm_fuseabase(as, tab); | ||
559 | IRRef refa = ofs ? tab : ir->op1; | ||
560 | uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i); | ||
561 | if (k) { | ||
562 | base = ra_alloc1(as, refa, RSET_GPR); | ||
563 | emit_dn(as, A64I_ADDx^k, dest, base); | ||
564 | return; | ||
565 | } | ||
566 | } | ||
567 | base = ra_alloc1(as, ir->op1, RSET_GPR); | ||
568 | idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); | ||
569 | emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx); | ||
570 | } | ||
571 | |||
572 | /* Inlined hash lookup. Specialized for key type and for const keys. | ||
573 | ** The equivalent C code is: | ||
574 | ** Node *n = hashkey(t, key); | ||
575 | ** do { | ||
576 | ** if (lj_obj_equal(&n->key, key)) return &n->val; | ||
577 | ** } while ((n = nextnode(n))); | ||
578 | ** return niltv(L); | ||
579 | */ | ||
580 | static void asm_href(ASMState *as, IRIns *ir, IROp merge) | ||
581 | { | ||
582 | RegSet allow = RSET_GPR; | ||
583 | int destused = ra_used(ir); | ||
584 | Reg dest = ra_dest(as, ir, allow); | ||
585 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | ||
586 | Reg key = 0, tmp = RID_TMP; | ||
587 | IRRef refkey = ir->op2; | ||
588 | IRIns *irkey = IR(refkey); | ||
589 | int isk = irref_isk(ir->op2); | ||
590 | IRType1 kt = irkey->t; | ||
591 | uint32_t k = 0; | ||
592 | uint32_t khash; | ||
593 | MCLabel l_end, l_loop, l_next; | ||
594 | rset_clear(allow, tab); | ||
595 | |||
596 | if (!isk) { | ||
597 | key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); | ||
598 | rset_clear(allow, key); | ||
599 | if (!irt_isstr(kt)) { | ||
600 | tmp = ra_scratch(as, allow); | ||
601 | rset_clear(allow, tmp); | ||
602 | } | ||
603 | } else if (irt_isnum(kt)) { | ||
604 | int64_t val = (int64_t)ir_knum(irkey)->u64; | ||
605 | if (!(k = emit_isk12(val))) { | ||
606 | key = ra_allock(as, val, allow); | ||
607 | rset_clear(allow, key); | ||
608 | } | ||
609 | } else if (!irt_ispri(kt)) { | ||
610 | if (!(k = emit_isk12(irkey->i))) { | ||
611 | key = ra_alloc1(as, refkey, allow); | ||
612 | rset_clear(allow, key); | ||
613 | } | ||
614 | } | ||
615 | |||
616 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ | ||
617 | l_end = emit_label(as); | ||
618 | as->invmcp = NULL; | ||
619 | if (merge == IR_NE) | ||
620 | asm_guardcc(as, CC_AL); | ||
621 | else if (destused) | ||
622 | emit_loada(as, dest, niltvg(J2G(as->J))); | ||
623 | |||
624 | /* Follow hash chain until the end. */ | ||
625 | l_loop = --as->mcp; | ||
626 | emit_n(as, A64I_CMPx^A64I_K12^0, dest); | ||
627 | emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); | ||
628 | l_next = emit_label(as); | ||
629 | |||
630 | /* Type and value comparison. */ | ||
631 | if (merge == IR_EQ) | ||
632 | asm_guardcc(as, CC_EQ); | ||
633 | else | ||
634 | emit_cond_branch(as, CC_EQ, l_end); | ||
635 | |||
636 | if (irt_isnum(kt)) { | ||
637 | if (isk) { | ||
638 | /* Assumes -0.0 is already canonicalized to +0.0. */ | ||
639 | if (k) | ||
640 | emit_n(as, A64I_CMPx^k, tmp); | ||
641 | else | ||
642 | emit_nm(as, A64I_CMPx, key, tmp); | ||
643 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); | ||
644 | } else { | ||
645 | Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow); | ||
646 | Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); | ||
647 | rset_clear(allow, tisnum); | ||
648 | emit_nm(as, A64I_FCMPd, key, ftmp); | ||
649 | emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); | ||
650 | emit_cond_branch(as, CC_LO, l_next); | ||
651 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); | ||
652 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); | ||
653 | } | ||
654 | } else if (irt_isaddr(kt)) { | ||
655 | Reg scr; | ||
656 | if (isk) { | ||
657 | int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; | ||
658 | scr = ra_allock(as, kk, allow); | ||
659 | emit_nm(as, A64I_CMPx, scr, tmp); | ||
660 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); | ||
661 | } else { | ||
662 | scr = ra_scratch(as, allow); | ||
663 | emit_nm(as, A64I_CMPx, tmp, scr); | ||
664 | emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); | ||
665 | } | ||
666 | rset_clear(allow, scr); | ||
667 | } else { | ||
668 | Reg type, scr; | ||
669 | lua_assert(irt_ispri(kt) && !irt_isnil(kt)); | ||
670 | type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); | ||
671 | scr = ra_scratch(as, rset_clear(allow, type)); | ||
672 | rset_clear(allow, scr); | ||
673 | emit_nm(as, A64I_CMPw, scr, type); | ||
674 | emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); | ||
675 | } | ||
676 | |||
677 | *l_loop = A64I_BCC | A64F_S19((as->mcp-l_loop) & 0x0007ffffu) | CC_NE; | ||
678 | if (!isk && irt_isaddr(kt)) { | ||
679 | Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow); | ||
680 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); | ||
681 | rset_clear(allow, type); | ||
682 | } | ||
683 | /* Load main position relative to tab->node into dest. */ | ||
684 | khash = isk ? ir_khash(irkey) : 1; | ||
685 | if (khash == 0) { | ||
686 | emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); | ||
687 | } else { | ||
688 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest); | ||
689 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest); | ||
690 | emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node)); | ||
691 | if (isk) { | ||
692 | Reg tmphash = ra_allock(as, khash, allow); | ||
693 | emit_dnm(as, A64I_ANDw, dest, dest, tmphash); | ||
694 | emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); | ||
695 | } else if (irt_isstr(kt)) { | ||
696 | /* Fetch of str->hash is cheaper than ra_allock. */ | ||
697 | emit_dnm(as, A64I_ANDw, dest, dest, tmp); | ||
698 | emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash)); | ||
699 | emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); | ||
700 | } else { /* Must match with hash*() in lj_tab.c. */ | ||
701 | emit_dnm(as, A64I_ANDw, dest, dest, tmp); | ||
702 | emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); | ||
703 | emit_dnm(as, A64I_SUBw, dest, dest, tmp); | ||
704 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); | ||
705 | emit_dnm(as, A64I_EORw, dest, dest, tmp); | ||
706 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); | ||
707 | emit_dnm(as, A64I_SUBw, tmp, tmp, dest); | ||
708 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); | ||
709 | emit_dnm(as, A64I_EORw, tmp, tmp, dest); | ||
710 | if (irt_isnum(kt)) { | ||
711 | emit_dnm(as, A64I_ADDw, dest, dest, dest); | ||
712 | emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); | ||
713 | emit_dm(as, A64I_MOVw, tmp, dest); | ||
714 | emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); | ||
715 | } else { | ||
716 | checkmclim(as); | ||
717 | emit_dm(as, A64I_MOVw, tmp, key); | ||
718 | emit_dnm(as, A64I_EORw, dest, dest, | ||
719 | ra_allock(as, irt_toitype(kt) << 15, allow)); | ||
720 | emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); | ||
721 | emit_dm(as, A64I_MOVx, dest, key); | ||
722 | } | ||
723 | } | ||
724 | } | ||
725 | } | ||
726 | |||
727 | static void asm_hrefk(ASMState *as, IRIns *ir) | ||
728 | { | ||
729 | IRIns *kslot = IR(ir->op2); | ||
730 | IRIns *irkey = IR(kslot->op1); | ||
731 | int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); | ||
732 | int32_t kofs = ofs + (int32_t)offsetof(Node, key); | ||
733 | int bigofs = !emit_checkofs(A64I_LDRx, ofs); | ||
734 | RegSet allow = RSET_GPR; | ||
735 | Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; | ||
736 | Reg node = ra_alloc1(as, ir->op1, allow); | ||
737 | Reg key = ra_scratch(as, rset_clear(allow, node)); | ||
738 | Reg idx = node; | ||
739 | uint64_t k; | ||
740 | lua_assert(ofs % sizeof(Node) == 0); | ||
741 | rset_clear(allow, key); | ||
742 | if (bigofs) { | ||
743 | idx = dest; | ||
744 | rset_clear(allow, dest); | ||
745 | kofs = (int32_t)offsetof(Node, key); | ||
746 | } else if (ra_hasreg(dest)) { | ||
747 | emit_opk(as, A64I_ADDx, dest, node, ofs, allow); | ||
748 | } | ||
749 | asm_guardcc(as, CC_NE); | ||
750 | if (irt_ispri(irkey->t)) { | ||
751 | k = ~((int64_t)~irt_toitype(irkey->t) << 47); | ||
752 | } else if (irt_isnum(irkey->t)) { | ||
753 | k = ir_knum(irkey)->u64; | ||
754 | } else { | ||
755 | k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); | ||
756 | } | ||
757 | emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); | ||
758 | emit_lso(as, A64I_LDRx, key, idx, kofs); | ||
759 | if (bigofs) | ||
760 | emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); | ||
761 | } | ||
762 | |||
763 | static void asm_uref(ASMState *as, IRIns *ir) | ||
764 | { | ||
765 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
766 | if (irref_isk(ir->op1)) { | ||
767 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
768 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | ||
769 | emit_lsptr(as, A64I_LDRx, dest, v); | ||
770 | } else { | ||
771 | Reg uv = ra_scratch(as, RSET_GPR); | ||
772 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | ||
773 | if (ir->o == IR_UREFC) { | ||
774 | asm_guardcc(as, CC_NE); | ||
775 | emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); | ||
776 | emit_opk(as, A64I_ADDx, dest, uv, | ||
777 | (int32_t)offsetof(GCupval, tv), RSET_GPR); | ||
778 | emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); | ||
779 | } else { | ||
780 | emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); | ||
781 | } | ||
782 | emit_lso(as, A64I_LDRx, uv, func, | ||
783 | (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); | ||
784 | } | ||
785 | } | ||
786 | |||
787 | static void asm_fref(ASMState *as, IRIns *ir) | ||
788 | { | ||
789 | UNUSED(as); UNUSED(ir); | ||
790 | lua_assert(!ra_used(ir)); | ||
791 | } | ||
792 | |||
793 | static void asm_strref(ASMState *as, IRIns *ir) | ||
794 | { | ||
795 | RegSet allow = RSET_GPR; | ||
796 | Reg dest = ra_dest(as, ir, allow); | ||
797 | Reg base = ra_alloc1(as, ir->op1, allow); | ||
798 | IRIns *irr = IR(ir->op2); | ||
799 | int32_t ofs = sizeof(GCstr); | ||
800 | uint32_t m; | ||
801 | rset_clear(allow, base); | ||
802 | if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) { | ||
803 | emit_dn(as, A64I_ADDx^m, dest, base); | ||
804 | } else { | ||
805 | emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest); | ||
806 | emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow)); | ||
807 | } | ||
808 | } | ||
809 | |||
810 | /* -- Loads and stores ---------------------------------------------------- */ | ||
811 | |||
812 | static A64Ins asm_fxloadins(IRIns *ir) | ||
813 | { | ||
814 | switch (irt_type(ir->t)) { | ||
815 | case IRT_I8: return A64I_LDRB ^ A64I_LS_S; | ||
816 | case IRT_U8: return A64I_LDRB; | ||
817 | case IRT_I16: return A64I_LDRH ^ A64I_LS_S; | ||
818 | case IRT_U16: return A64I_LDRH; | ||
819 | case IRT_NUM: return A64I_LDRd; | ||
820 | case IRT_FLOAT: return A64I_LDRs; | ||
821 | default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw; | ||
822 | } | ||
823 | } | ||
824 | |||
825 | static A64Ins asm_fxstoreins(IRIns *ir) | ||
826 | { | ||
827 | switch (irt_type(ir->t)) { | ||
828 | case IRT_I8: case IRT_U8: return A64I_STRB; | ||
829 | case IRT_I16: case IRT_U16: return A64I_STRH; | ||
830 | case IRT_NUM: return A64I_STRd; | ||
831 | case IRT_FLOAT: return A64I_STRs; | ||
832 | default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw; | ||
833 | } | ||
834 | } | ||
835 | |||
836 | static void asm_fload(ASMState *as, IRIns *ir) | ||
837 | { | ||
838 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
839 | Reg idx; | ||
840 | A64Ins ai = asm_fxloadins(ir); | ||
841 | int32_t ofs; | ||
842 | if (ir->op1 == REF_NIL) { | ||
843 | idx = RID_GL; | ||
844 | ofs = (ir->op2 << 2) - GG_OFS(g); | ||
845 | } else { | ||
846 | idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
847 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
848 | ofs = asm_fuseabase(as, ir->op1); | ||
849 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
850 | emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx); | ||
851 | return; | ||
852 | } | ||
853 | } | ||
854 | ofs = field_ofs[ir->op2]; | ||
855 | } | ||
856 | emit_lso(as, ai, (dest & 31), idx, ofs); | ||
857 | } | ||
858 | |||
859 | static void asm_fstore(ASMState *as, IRIns *ir) | ||
860 | { | ||
861 | if (ir->r != RID_SINK) { | ||
862 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
863 | IRIns *irf = IR(ir->op1); | ||
864 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | ||
865 | int32_t ofs = field_ofs[irf->op2]; | ||
866 | emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs); | ||
867 | } | ||
868 | } | ||
869 | |||
870 | static void asm_xload(ASMState *as, IRIns *ir) | ||
871 | { | ||
872 | Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | ||
873 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | ||
874 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); | ||
875 | } | ||
876 | |||
877 | static void asm_xstore(ASMState *as, IRIns *ir) | ||
878 | { | ||
879 | if (ir->r != RID_SINK) { | ||
880 | Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | ||
881 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | ||
882 | rset_exclude(RSET_GPR, src)); | ||
883 | } | ||
884 | } | ||
885 | |||
886 | static void asm_ahuvload(ASMState *as, IRIns *ir) | ||
887 | { | ||
888 | Reg idx, tmp, type; | ||
889 | int32_t ofs = 0; | ||
890 | RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | ||
891 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || | ||
892 | irt_isint(ir->t)); | ||
893 | if (ra_used(ir)) { | ||
894 | Reg dest = ra_dest(as, ir, allow); | ||
895 | tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; | ||
896 | if (irt_isaddr(ir->t)) { | ||
897 | emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); | ||
898 | } else if (irt_isnum(ir->t)) { | ||
899 | emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); | ||
900 | } else if (irt_isint(ir->t)) { | ||
901 | emit_dm(as, A64I_MOVw, dest, dest); | ||
902 | } | ||
903 | } else { | ||
904 | tmp = ra_scratch(as, gpr); | ||
905 | } | ||
906 | type = ra_scratch(as, rset_clear(gpr, tmp)); | ||
907 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); | ||
908 | /* Always do the type check, even if the load result is unused. */ | ||
909 | asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); | ||
910 | if (irt_type(ir->t) >= IRT_NUM) { | ||
911 | lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); | ||
912 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), | ||
913 | ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); | ||
914 | } else if (irt_isaddr(ir->t)) { | ||
915 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); | ||
916 | emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); | ||
917 | } else if (irt_isnil(ir->t)) { | ||
918 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); | ||
919 | } else { | ||
920 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), | ||
921 | ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp); | ||
922 | } | ||
923 | if (ofs & FUSE_REG) | ||
924 | emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx, tmp, idx, (ofs & 31)); | ||
925 | else | ||
926 | emit_lso(as, A64I_LDRx, tmp, idx, ofs); | ||
927 | } | ||
928 | |||
929 | static void asm_ahustore(ASMState *as, IRIns *ir) | ||
930 | { | ||
931 | if (ir->r != RID_SINK) { | ||
932 | RegSet allow = RSET_GPR; | ||
933 | Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE; | ||
934 | int32_t ofs = 0; | ||
935 | if (irt_isnum(ir->t)) { | ||
936 | src = ra_alloc1(as, ir->op2, RSET_FPR); | ||
937 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd); | ||
938 | if (ofs & FUSE_REG) | ||
939 | emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx, (src & 31), idx, (ofs &31)); | ||
940 | else | ||
941 | emit_lso(as, A64I_STRd, (src & 31), idx, ofs); | ||
942 | } else { | ||
943 | if (!irt_ispri(ir->t)) { | ||
944 | src = ra_alloc1(as, ir->op2, allow); | ||
945 | rset_clear(allow, src); | ||
946 | if (irt_isinteger(ir->t)) | ||
947 | type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow); | ||
948 | else | ||
949 | type = ra_allock(as, irt_toitype(ir->t), allow); | ||
950 | } else { | ||
951 | tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow); | ||
952 | } | ||
953 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), | ||
954 | A64I_STRx); | ||
955 | if (ofs & FUSE_REG) | ||
956 | emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx, tmp, idx, (ofs & 31)); | ||
957 | else | ||
958 | emit_lso(as, A64I_STRx, tmp, idx, ofs); | ||
959 | if (ra_hasreg(src)) { | ||
960 | if (irt_isinteger(ir->t)) { | ||
961 | emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src); | ||
962 | } else { | ||
963 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type); | ||
964 | } | ||
965 | } | ||
966 | } | ||
967 | } | ||
968 | } | ||
969 | |||
970 | static void asm_sload(ASMState *as, IRIns *ir) | ||
971 | { | ||
972 | int32_t ofs = 8*((int32_t)ir->op1-2); | ||
973 | IRType1 t = ir->t; | ||
974 | Reg dest = RID_NONE, base; | ||
975 | RegSet allow = RSET_GPR; | ||
976 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | ||
977 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | ||
978 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | ||
979 | dest = ra_scratch(as, RSET_FPR); | ||
980 | asm_tointg(as, ir, dest); | ||
981 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | ||
982 | } else if (ra_used(ir)) { | ||
983 | Reg tmp = RID_NONE; | ||
984 | if ((ir->op2 & IRSLOAD_CONVERT)) | ||
985 | tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); | ||
986 | lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t)); | ||
987 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); | ||
988 | base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); | ||
989 | if (irt_isaddr(t)) { | ||
990 | emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); | ||
991 | } else if ((ir->op2 & IRSLOAD_CONVERT)) { | ||
992 | if (irt_isint(t)) { | ||
993 | emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31)); | ||
994 | /* If value is already loaded for type check, move it to FPR. */ | ||
995 | if ((ir->op2 & IRSLOAD_TYPECHECK)) | ||
996 | emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest); | ||
997 | else | ||
998 | dest = tmp; | ||
999 | t.irt = IRT_NUM; /* Check for original type. */ | ||
1000 | } else { | ||
1001 | emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp); | ||
1002 | dest = tmp; | ||
1003 | t.irt = IRT_INT; /* Check for original type. */ | ||
1004 | } | ||
1005 | } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { | ||
1006 | emit_dm(as, A64I_MOVw, dest, dest); | ||
1007 | } | ||
1008 | goto dotypecheck; | ||
1009 | } | ||
1010 | base = ra_alloc1(as, REF_BASE, allow); | ||
1011 | dotypecheck: | ||
1012 | rset_clear(allow, base); | ||
1013 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | ||
1014 | Reg tmp; | ||
1015 | if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) { | ||
1016 | tmp = dest; | ||
1017 | } else { | ||
1018 | tmp = ra_scratch(as, allow); | ||
1019 | rset_clear(allow, tmp); | ||
1020 | } | ||
1021 | if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT)) | ||
1022 | emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); | ||
1023 | /* Need type check, even if the load result is unused. */ | ||
1024 | asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); | ||
1025 | if (irt_type(t) >= IRT_NUM) { | ||
1026 | lua_assert(irt_isinteger(t) || irt_isnum(t)); | ||
1027 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), | ||
1028 | ra_allock(as, LJ_TISNUM << 15, allow), tmp); | ||
1029 | } else if (irt_isnil(t)) { | ||
1030 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); | ||
1031 | } else if (irt_ispri(t)) { | ||
1032 | emit_nm(as, A64I_CMPx, | ||
1033 | ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); | ||
1034 | } else { | ||
1035 | Reg type = ra_scratch(as, allow); | ||
1036 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); | ||
1037 | emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); | ||
1038 | } | ||
1039 | emit_lso(as, A64I_LDRx, tmp, base, ofs); | ||
1040 | return; | ||
1041 | } | ||
1042 | if (ra_hasreg(dest)) { | ||
1043 | emit_lso(as, irt_isnum(t) ? A64I_LDRd : | ||
1044 | (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, ofs); | ||
1045 | } | ||
1046 | } | ||
1047 | |||
1048 | /* -- Allocations --------------------------------------------------------- */ | ||
1049 | |||
1050 | #if LJ_HASFFI | ||
1051 | static void asm_cnew(ASMState *as, IRIns *ir) | ||
1052 | { | ||
1053 | CTState *cts = ctype_ctsG(J2G(as->J)); | ||
1054 | CTypeID id = (CTypeID)IR(ir->op1)->i; | ||
1055 | CTSize sz; | ||
1056 | CTInfo info = lj_ctype_info(cts, id, &sz); | ||
1057 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | ||
1058 | IRRef args[4]; | ||
1059 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1060 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); | ||
1061 | |||
1062 | as->gcsteps++; | ||
1063 | asm_setupresult(as, ir, ci); /* GCcdata * */ | ||
1064 | /* Initialize immutable cdata object. */ | ||
1065 | if (ir->o == IR_CNEWI) { | ||
1066 | int32_t ofs = sizeof(GCcdata); | ||
1067 | Reg r = ra_alloc1(as, ir->op2, allow); | ||
1068 | lua_assert(sz == 4 || sz == 8); | ||
1069 | emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); | ||
1070 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1071 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1072 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1073 | args[1] = ir->op1; /* CTypeID id */ | ||
1074 | args[2] = ir->op2; /* CTSize sz */ | ||
1075 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1076 | asm_gencall(as, ci, args); | ||
1077 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1078 | return; | ||
1079 | } | ||
1080 | |||
1081 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | ||
1082 | { | ||
1083 | Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow); | ||
1084 | emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); | ||
1085 | emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); | ||
1086 | emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP); | ||
1087 | if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1); | ||
1088 | } | ||
1089 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1090 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1091 | asm_gencall(as, ci, args); | ||
1092 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | ||
1093 | ra_releasetmp(as, ASMREF_TMP1)); | ||
1094 | } | ||
1095 | #else | ||
1096 | #define asm_cnew(as, ir) ((void)0) | ||
1097 | #endif | ||
1098 | |||
1099 | /* -- Write barriers ------------------------------------------------------ */ | ||
1100 | |||
1101 | static void asm_tbar(ASMState *as, IRIns *ir) | ||
1102 | { | ||
1103 | Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1104 | Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); | ||
1105 | Reg gr = ra_allock(as, i64ptr(J2G(as->J)), | ||
1106 | rset_exclude(rset_exclude(RSET_GPR, tab), link)); | ||
1107 | Reg mark = RID_TMP; | ||
1108 | MCLabel l_end = emit_label(as); | ||
1109 | emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); | ||
1110 | emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); | ||
1111 | emit_lso(as, A64I_STRx, tab, gr, | ||
1112 | (int32_t)offsetof(global_State, gc.grayagain)); | ||
1113 | emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); | ||
1114 | emit_lso(as, A64I_LDRx, link, gr, | ||
1115 | (int32_t)offsetof(global_State, gc.grayagain)); | ||
1116 | emit_cond_branch(as, CC_EQ, l_end); | ||
1117 | emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark); | ||
1118 | emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); | ||
1119 | } | ||
1120 | |||
1121 | static void asm_obar(ASMState *as, IRIns *ir) | ||
1122 | { | ||
1123 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; | ||
1124 | IRRef args[2]; | ||
1125 | MCLabel l_end; | ||
1126 | RegSet allow = RSET_GPR; | ||
1127 | Reg obj, val, tmp; | ||
1128 | /* No need for other object barriers (yet). */ | ||
1129 | lua_assert(IR(ir->op1)->o == IR_UREFC); | ||
1130 | ra_evictset(as, RSET_SCRATCH); | ||
1131 | l_end = emit_label(as); | ||
1132 | args[0] = ASMREF_TMP1; /* global_State *g */ | ||
1133 | args[1] = ir->op1; /* TValue *tv */ | ||
1134 | asm_gencall(as, ci, args); | ||
1135 | ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) ); | ||
1136 | obj = IR(ir->op1)->r; | ||
1137 | tmp = ra_scratch(as, rset_exclude(allow, obj)); | ||
1138 | emit_cond_branch(as, CC_EQ, l_end); | ||
1139 | emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); | ||
1140 | emit_cond_branch(as, CC_EQ, l_end); | ||
1141 | emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); | ||
1142 | val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); | ||
1143 | emit_lso(as, A64I_LDRB, tmp, obj, | ||
1144 | (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); | ||
1145 | emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); | ||
1146 | } | ||
1147 | |||
1148 | /* -- Arithmetic and logic operations ------------------------------------- */ | ||
1149 | |||
1150 | static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai) | ||
1151 | { | ||
1152 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
1153 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | ||
1154 | right = (left >> 8); left &= 255; | ||
1155 | emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31)); | ||
1156 | } | ||
1157 | |||
1158 | static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai) | ||
1159 | { | ||
1160 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
1161 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); | ||
1162 | emit_dn(as, ai, (dest & 31), (left & 31)); | ||
1163 | } | ||
1164 | |||
1165 | static void asm_fpmath(ASMState *as, IRIns *ir) | ||
1166 | { | ||
1167 | IRFPMathOp fpm = (IRFPMathOp)ir->op2; | ||
1168 | if (fpm == IRFPM_SQRT) { | ||
1169 | asm_fpunary(as, ir, A64I_FSQRTd); | ||
1170 | } else if (fpm <= IRFPM_TRUNC) { | ||
1171 | asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : | ||
1172 | fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); | ||
1173 | } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { | ||
1174 | return; | ||
1175 | } else { | ||
1176 | asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); | ||
1177 | } | ||
1178 | } | ||
1179 | |||
1180 | static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) | ||
1181 | { | ||
1182 | IRIns *ir; | ||
1183 | if (irref_isk(rref)) | ||
1184 | return 0; /* Don't swap constants to the left. */ | ||
1185 | if (irref_isk(lref)) | ||
1186 | return 1; /* But swap constants to the right. */ | ||
1187 | ir = IR(rref); | ||
1188 | if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || | ||
1189 | (ir->o == IR_ADD && ir->op1 == ir->op2) || | ||
1190 | (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) | ||
1191 | return 0; /* Don't swap fusable operands to the left. */ | ||
1192 | ir = IR(lref); | ||
1193 | if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || | ||
1194 | (ir->o == IR_ADD && ir->op1 == ir->op2) || | ||
1195 | (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) | ||
1196 | return 1; /* But swap fusable operands to the right. */ | ||
1197 | return 0; /* Otherwise don't swap. */ | ||
1198 | } | ||
1199 | |||
1200 | static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai) | ||
1201 | { | ||
1202 | IRRef lref = ir->op1, rref = ir->op2; | ||
1203 | Reg left, dest = ra_dest(as, ir, RSET_GPR); | ||
1204 | uint32_t m; | ||
1205 | if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) { | ||
1206 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
1207 | } | ||
1208 | left = ra_hintalloc(as, lref, dest, RSET_GPR); | ||
1209 | if (irt_is64(ir->t)) ai |= A64I_X; | ||
1210 | m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); | ||
1211 | if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ | ||
1212 | asm_guardcc(as, CC_VS); | ||
1213 | ai |= A64I_S; | ||
1214 | } | ||
1215 | emit_dn(as, ai^m, dest, left); | ||
1216 | } | ||
1217 | |||
1218 | static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai) | ||
1219 | { | ||
1220 | if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ | ||
1221 | as->flagmcp = NULL; | ||
1222 | as->mcp++; | ||
1223 | ai |= A64I_S; | ||
1224 | } | ||
1225 | asm_intop(as, ir, ai); | ||
1226 | } | ||
1227 | |||
1228 | static void asm_intneg(ASMState *as, IRIns *ir) | ||
1229 | { | ||
1230 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1231 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1232 | emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left); | ||
1233 | } | ||
1234 | |||
1235 | /* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ | ||
1236 | static void asm_intmul(ASMState *as, IRIns *ir) | ||
1237 | { | ||
1238 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1239 | Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); | ||
1240 | Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1241 | if (irt_isguard(ir->t)) { /* IR_MULOV */ | ||
1242 | asm_guardcc(as, CC_NE); | ||
1243 | emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ | ||
1244 | emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); | ||
1245 | emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); | ||
1246 | emit_dnm(as, A64I_SMULL, dest, right, left); | ||
1247 | } else { | ||
1248 | emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); | ||
1249 | } | ||
1250 | } | ||
1251 | |||
1252 | static void asm_add(ASMState *as, IRIns *ir) | ||
1253 | { | ||
1254 | if (irt_isnum(ir->t)) { | ||
1255 | asm_fparith(as, ir, A64I_FADDd); | ||
1256 | return; | ||
1257 | } | ||
1258 | asm_intop_s(as, ir, A64I_ADDw); | ||
1259 | } | ||
1260 | |||
1261 | static void asm_sub(ASMState *as, IRIns *ir) | ||
1262 | { | ||
1263 | if (irt_isnum(ir->t)) { | ||
1264 | asm_fparith(as, ir, A64I_FSUBd); | ||
1265 | return; | ||
1266 | } | ||
1267 | asm_intop_s(as, ir, A64I_SUBw); | ||
1268 | } | ||
1269 | |||
1270 | static void asm_mul(ASMState *as, IRIns *ir) | ||
1271 | { | ||
1272 | if (irt_isnum(ir->t)) { | ||
1273 | asm_fparith(as, ir, A64I_FMULd); | ||
1274 | return; | ||
1275 | } | ||
1276 | asm_intmul(as, ir); | ||
1277 | } | ||
1278 | |||
1279 | static void asm_div(ASMState *as, IRIns *ir) | ||
1280 | { | ||
1281 | #if LJ_HASFFI | ||
1282 | if (!irt_isnum(ir->t)) | ||
1283 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
1284 | IRCALL_lj_carith_divu64); | ||
1285 | else | ||
1286 | #endif | ||
1287 | asm_fparith(as, ir, A64I_FDIVd); | ||
1288 | } | ||
1289 | |||
1290 | static void asm_pow(ASMState *as, IRIns *ir) | ||
1291 | { | ||
1292 | #if LJ_HASFFI | ||
1293 | if (!irt_isnum(ir->t)) | ||
1294 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
1295 | IRCALL_lj_carith_powu64); | ||
1296 | else | ||
1297 | #endif | ||
1298 | asm_callid(as, ir, IRCALL_lj_vm_powi); | ||
1299 | } | ||
1300 | |||
1301 | #define asm_addov(as, ir) asm_add(as, ir) | ||
1302 | #define asm_subov(as, ir) asm_sub(as, ir) | ||
1303 | #define asm_mulov(as, ir) asm_mul(as, ir) | ||
1304 | |||
1305 | #define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) | ||
1306 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) | ||
1307 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | ||
1308 | |||
1309 | static void asm_mod(ASMState *as, IRIns *ir) | ||
1310 | { | ||
1311 | #if LJ_HASFFI | ||
1312 | if (!irt_isint(ir->t)) | ||
1313 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
1314 | IRCALL_lj_carith_modu64); | ||
1315 | else | ||
1316 | #endif | ||
1317 | asm_callid(as, ir, IRCALL_lj_vm_modi); | ||
1318 | } | ||
1319 | |||
1320 | static void asm_neg(ASMState *as, IRIns *ir) | ||
1321 | { | ||
1322 | if (irt_isnum(ir->t)) { | ||
1323 | asm_fpunary(as, ir, A64I_FNEGd); | ||
1324 | return; | ||
1325 | } | ||
1326 | asm_intneg(as, ir); | ||
1327 | } | ||
1328 | |||
1329 | static void asm_bitop(ASMState *as, IRIns *ir, A64Ins ai) | ||
1330 | { | ||
1331 | if (as->flagmcp == as->mcp && ai == A64I_ANDw) { | ||
1332 | /* Try to drop cmp r, #0. */ | ||
1333 | as->flagmcp = NULL; | ||
1334 | as->mcp++; | ||
1335 | ai += A64I_ANDSw - A64I_ANDw; | ||
1336 | } | ||
1337 | if (ir->op2 == 0) { | ||
1338 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1339 | uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); | ||
1340 | if (irt_is64(ir->t)) ai |= A64I_X; | ||
1341 | emit_d(as, ai^m, dest); | ||
1342 | } else { | ||
1343 | asm_intop(as, ir, ai); | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1347 | #define asm_bnot(as, ir) asm_bitop(as, ir, A64I_MVNw) | ||
1348 | #define asm_band(as, ir) asm_bitop(as, ir, A64I_ANDw) | ||
1349 | #define asm_bor(as, ir) asm_bitop(as, ir, A64I_ORRw) | ||
1350 | #define asm_bxor(as, ir) asm_bitop(as, ir, A64I_EORw) | ||
1351 | |||
1352 | static void asm_bswap(ASMState *as, IRIns *ir) | ||
1353 | { | ||
1354 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1355 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1356 | emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left); | ||
1357 | } | ||
1358 | |||
1359 | static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) | ||
1360 | { | ||
1361 | int shmask = irt_is64(ir->t) ? 63 : 31; | ||
1362 | if (irref_isk(ir->op2)) { /* Constant shifts. */ | ||
1363 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1364 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1365 | int32_t shift = (IR(ir->op2)->i & shmask); | ||
1366 | |||
1367 | if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; | ||
1368 | switch (sh) { | ||
1369 | case A64SH_LSL: | ||
1370 | emit_dn(as, ai | A64F_IMMS(shmask-shift) | A64F_IMMR(shmask-shift+1), dest, left); | ||
1371 | break; | ||
1372 | case A64SH_LSR: case A64SH_ASR: | ||
1373 | emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); | ||
1374 | break; | ||
1375 | case A64SH_ROR: | ||
1376 | emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left); | ||
1377 | break; | ||
1378 | } | ||
1379 | } else { /* Variable-length shifts. */ | ||
1380 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1381 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1382 | Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1383 | emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right); | ||
1384 | } | ||
1385 | } | ||
1386 | |||
1387 | #define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL) | ||
1388 | #define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) | ||
1389 | #define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) | ||
1390 | #define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) | ||
1391 | #define asm_brol(as, ir) lua_assert(0) | ||
1392 | |||
1393 | static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) | ||
1394 | { | ||
1395 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1396 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1397 | Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1398 | emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right); | ||
1399 | emit_nm(as, A64I_CMPw, left, right); | ||
1400 | } | ||
1401 | |||
1402 | static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) | ||
1403 | { | ||
1404 | Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); | ||
1405 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | ||
1406 | right = ((left >> 8) & 31); left &= 31; | ||
1407 | emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right); | ||
1408 | emit_nm(as, A64I_FCMPd, left, right); | ||
1409 | } | ||
1410 | |||
1411 | static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) | ||
1412 | { | ||
1413 | if (irt_isnum(ir->t)) | ||
1414 | asm_fpmin_max(as, ir, fcc); | ||
1415 | else | ||
1416 | asm_intmin_max(as, ir, cc); | ||
1417 | } | ||
1418 | |||
1419 | #define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) | ||
1420 | #define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) | ||
1421 | |||
1422 | /* -- Comparisons --------------------------------------------------------- */ | ||
1423 | |||
1424 | /* Map of comparisons to flags. ORDER IR. */ | ||
1425 | static const uint8_t asm_compmap[IR_ABC+1] = { | ||
1426 | /* op FP swp int cc FP cc */ | ||
1427 | /* LT */ CC_GE + (CC_HS << 4), | ||
1428 | /* GE x */ CC_LT + (CC_HI << 4), | ||
1429 | /* LE */ CC_GT + (CC_HI << 4), | ||
1430 | /* GT x */ CC_LE + (CC_HS << 4), | ||
1431 | /* ULT x */ CC_HS + (CC_LS << 4), | ||
1432 | /* UGE */ CC_LO + (CC_LO << 4), | ||
1433 | /* ULE x */ CC_HI + (CC_LO << 4), | ||
1434 | /* UGT */ CC_LS + (CC_LS << 4), | ||
1435 | /* EQ */ CC_NE + (CC_NE << 4), | ||
1436 | /* NE */ CC_EQ + (CC_EQ << 4), | ||
1437 | /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ | ||
1438 | }; | ||
1439 | |||
1440 | /* FP comparisons. */ | ||
1441 | static void asm_fpcomp(ASMState *as, IRIns *ir) | ||
1442 | { | ||
1443 | Reg left, right; | ||
1444 | A64Ins ai; | ||
1445 | int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); | ||
1446 | if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { | ||
1447 | left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31); | ||
1448 | right = 0; | ||
1449 | ai = A64I_FCMPZd; | ||
1450 | } else { | ||
1451 | left = ra_alloc2(as, ir, RSET_FPR); | ||
1452 | if (swp) { | ||
1453 | right = (left & 31); left = ((left >> 8) & 31); | ||
1454 | } else { | ||
1455 | right = ((left >> 8) & 31); left &= 31; | ||
1456 | } | ||
1457 | ai = A64I_FCMPd; | ||
1458 | } | ||
1459 | asm_guardcc(as, (asm_compmap[ir->o] >> 4)); | ||
1460 | emit_nm(as, ai, left, right); | ||
1461 | } | ||
1462 | |||
1463 | /* Integer comparisons. */ | ||
1464 | static void asm_intcomp(ASMState *as, IRIns *ir) | ||
1465 | { | ||
1466 | A64CC oldcc, cc = (asm_compmap[ir->o] & 15); | ||
1467 | A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw; | ||
1468 | IRRef lref = ir->op1, rref = ir->op2; | ||
1469 | Reg left; | ||
1470 | uint32_t m; | ||
1471 | int cmpprev0 = 0; | ||
1472 | lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || | ||
1473 | irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); | ||
1474 | if (asm_swapops(as, lref, rref)) { | ||
1475 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
1476 | if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ | ||
1477 | else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ | ||
1478 | } | ||
1479 | oldcc = cc; | ||
1480 | if (irref_isk(rref) && IR(rref)->i == 0) { | ||
1481 | IRIns *irl = IR(lref); | ||
1482 | if (cc == CC_GE) cc = CC_PL; | ||
1483 | else if (cc == CC_LT) cc = CC_MI; | ||
1484 | else if (cc > CC_NE) goto notst; /* Other conds don't work with tst. */ | ||
1485 | cmpprev0 = (irl+1 == ir); | ||
1486 | /* Combine comp(BAND(left, right), 0) into tst left, right. */ | ||
1487 | if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { | ||
1488 | IRRef blref = irl->op1, brref = irl->op2; | ||
1489 | uint32_t m2 = 0; | ||
1490 | Reg bleft; | ||
1491 | if (asm_swapops(as, blref, brref)) { | ||
1492 | Reg tmp = blref; blref = brref; brref = tmp; | ||
1493 | } | ||
1494 | if (irref_isk(brref)) { | ||
1495 | /* NYI: use tbz/tbnz, if applicable. */ | ||
1496 | m2 = emit_isk13(IR(brref)->i, irt_is64(irl->t)); | ||
1497 | if (!m2) | ||
1498 | goto notst; /* Not beneficial if we miss a constant operand. */ | ||
1499 | } | ||
1500 | bleft = ra_alloc1(as, blref, RSET_GPR); | ||
1501 | ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); | ||
1502 | if (!m2) | ||
1503 | m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); | ||
1504 | asm_guardcc(as, cc); | ||
1505 | emit_n(as, ai^m2, bleft); | ||
1506 | return; | ||
1507 | } | ||
1508 | /* NYI: use cbz/cbnz for EQ/NE 0. */ | ||
1509 | } | ||
1510 | notst: | ||
1511 | left = ra_alloc1(as, lref, RSET_GPR); | ||
1512 | m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); | ||
1513 | asm_guardcc(as, cc); | ||
1514 | emit_n(as, ai^m, left); | ||
1515 | /* Signed comparison with zero and referencing previous ins? */ | ||
1516 | if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE)) | ||
1517 | as->flagmcp = as->mcp; /* Allow elimination of the compare. */ | ||
1518 | } | ||
1519 | |||
1520 | static void asm_comp(ASMState *as, IRIns *ir) | ||
1521 | { | ||
1522 | if (irt_isnum(ir->t)) | ||
1523 | asm_fpcomp(as, ir); | ||
1524 | else | ||
1525 | asm_intcomp(as, ir); | ||
1526 | } | ||
1527 | |||
1528 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
1529 | |||
1530 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | ||
1531 | |||
1532 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | ||
1533 | static void asm_hiop(ASMState *as, IRIns *ir) | ||
1534 | { | ||
1535 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */ | ||
1536 | } | ||
1537 | |||
1538 | /* -- Profiling ----------------------------------------------------------- */ | ||
1539 | |||
1540 | static void asm_prof(ASMState *as, IRIns *ir) | ||
1541 | { | ||
1542 | uint32_t k = emit_isk13(HOOK_PROFILE, 0); | ||
1543 | lua_assert(k != 0); | ||
1544 | UNUSED(ir); | ||
1545 | asm_guardcc(as, CC_NE); | ||
1546 | emit_n(as, A64I_TSTw^k, RID_TMP); | ||
1547 | emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); | ||
1548 | } | ||
1549 | |||
1550 | /* -- Stack handling ------------------------------------------------------ */ | ||
1551 | |||
1552 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | ||
1553 | static void asm_stack_check(ASMState *as, BCReg topslot, | ||
1554 | IRIns *irp, RegSet allow, ExitNo exitno) | ||
1555 | { | ||
1556 | Reg pbase; | ||
1557 | uint32_t k; | ||
1558 | if (irp) { | ||
1559 | if (!ra_hasspill(irp->s)) { | ||
1560 | pbase = irp->r; | ||
1561 | lua_assert(ra_hasreg(pbase)); | ||
1562 | } else if (allow) { | ||
1563 | pbase = rset_pickbot(allow); | ||
1564 | } else { | ||
1565 | pbase = RID_RET; | ||
1566 | emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ | ||
1567 | } | ||
1568 | } else { | ||
1569 | pbase = RID_BASE; | ||
1570 | } | ||
1571 | emit_branch(as, A64I_BL, exitstub_addr(as->J, exitno)); | ||
1572 | emit_cond_branch(as, CC_LS^1, as->mcp+1); | ||
1573 | k = emit_isk12((8*topslot)); | ||
1574 | lua_assert(k); | ||
1575 | emit_n(as, A64I_CMPx^k, RID_TMP); | ||
1576 | emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); | ||
1577 | emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, | ||
1578 | (int32_t)offsetof(lua_State, maxstack)); | ||
1579 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ | ||
1580 | if (ra_hasspill(irp->s)) | ||
1581 | emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); | ||
1582 | emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); | ||
1583 | if (ra_hasspill(irp->s) && !allow) | ||
1584 | emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ | ||
1585 | } else { | ||
1586 | emit_getgl(as, RID_TMP, cur_L); | ||
1587 | } | ||
1588 | } | ||
1589 | |||
1590 | /* Restore Lua stack from on-trace state. */ | ||
1591 | static void asm_stack_restore(ASMState *as, SnapShot *snap) | ||
1592 | { | ||
1593 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | ||
1594 | #ifdef LUA_USE_ASSERT | ||
1595 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; | ||
1596 | #endif | ||
1597 | MSize n, nent = snap->nent; | ||
1598 | /* Store the value of all modified slots to the Lua stack. */ | ||
1599 | for (n = 0; n < nent; n++) { | ||
1600 | SnapEntry sn = map[n]; | ||
1601 | BCReg s = snap_slot(sn); | ||
1602 | int32_t ofs = 8*((int32_t)s-1-LJ_FR2); | ||
1603 | IRRef ref = snap_ref(sn); | ||
1604 | IRIns *ir = IR(ref); | ||
1605 | if ((sn & SNAP_NORESTORE)) | ||
1606 | continue; | ||
1607 | if (irt_isnum(ir->t)) { | ||
1608 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
1609 | emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); | ||
1610 | } else { | ||
1611 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | ||
1612 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); | ||
1613 | if (!irref_isk(ref)) { | ||
1614 | Reg type, src; | ||
1615 | if (irt_is64(ir->t)) { | ||
1616 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
1617 | src = ra_alloc1(as, ref, rset_exclude(allow, type)); | ||
1618 | emit_lso(as, A64I_STRx, RID_TMP, RID_BASE, ofs); | ||
1619 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type); | ||
1620 | } else if (irt_isinteger(ir->t)) { | ||
1621 | type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow); | ||
1622 | src = ra_alloc1(as, ref, rset_exclude(allow, type)); | ||
1623 | emit_lso(as, A64I_STRx, RID_TMP, RID_BASE, ofs); | ||
1624 | emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src); | ||
1625 | } else { | ||
1626 | type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); | ||
1627 | emit_lso(as, A64I_STRx, type, RID_BASE, ofs); | ||
1628 | } | ||
1629 | } else { | ||
1630 | TValue k; | ||
1631 | lj_ir_kvalue(as->J->L, &k, ir); | ||
1632 | emit_lso(as, A64I_STRx, | ||
1633 | ra_allock(as, tvisnil(&k) ? -1 : (int64_t)k.u64, allow), | ||
1634 | RID_BASE, ofs); | ||
1635 | } | ||
1636 | } | ||
1637 | checkmclim(as); | ||
1638 | } | ||
1639 | lua_assert(map + nent == flinks); | ||
1640 | } | ||
1641 | |||
1642 | /* -- GC handling --------------------------------------------------------- */ | ||
1643 | |||
1644 | /* Check GC threshold and do one or more GC steps. */ | ||
1645 | static void asm_gc_check(ASMState *as) | ||
1646 | { | ||
1647 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; | ||
1648 | IRRef args[2]; | ||
1649 | MCLabel l_end; | ||
1650 | Reg tmp1, tmp2; | ||
1651 | ra_evictset(as, RSET_SCRATCH); | ||
1652 | l_end = emit_label(as); | ||
1653 | /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ | ||
1654 | asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ | ||
1655 | emit_n(as, A64I_CMPx^A64I_K12, RID_RET); | ||
1656 | args[0] = ASMREF_TMP1; /* global_State *g */ | ||
1657 | args[1] = ASMREF_TMP2; /* MSize steps */ | ||
1658 | asm_gencall(as, ci, args); | ||
1659 | tmp1 = ra_releasetmp(as, ASMREF_TMP1); | ||
1660 | tmp2 = ra_releasetmp(as, ASMREF_TMP2); | ||
1661 | emit_loadi(as, tmp2, as->gcsteps); | ||
1662 | /* Jump around GC step if GC total < GC threshold. */ | ||
1663 | emit_cond_branch(as, CC_LS, l_end); | ||
1664 | emit_nm(as, A64I_CMPx, RID_TMP, tmp2); | ||
1665 | emit_lso(as, A64I_LDRx, tmp2, tmp1, | ||
1666 | (int32_t)offsetof(global_State, gc.threshold)); | ||
1667 | emit_lso(as, A64I_LDRx, RID_TMP, tmp1, | ||
1668 | (int32_t)offsetof(global_State, gc.total)); | ||
1669 | ra_allockreg(as, i64ptr(J2G(as->J)), tmp1); | ||
1670 | as->gcsteps = 0; | ||
1671 | checkmclim(as); | ||
1672 | } | ||
1673 | |||
1674 | /* -- Loop handling ------------------------------------------------------- */ | ||
1675 | |||
1676 | /* Fixup the loop branch. */ | ||
1677 | static void asm_loop_fixup(ASMState *as) | ||
1678 | { | ||
1679 | MCode *p = as->mctop; | ||
1680 | MCode *target = as->mcp; | ||
1681 | if (as->loopinv) { /* Inverted loop branch? */ | ||
1682 | ptrdiff_t delta = target - (p - 2); | ||
1683 | lua_assert(((delta + 0x40000) >> 19) == 0); | ||
1684 | /* asm_guardcc already inverted the b.cc and patched the final bl. */ | ||
1685 | p[-2] |= ((uint32_t)delta & 0x7ffff) << 5; | ||
1686 | } else { | ||
1687 | ptrdiff_t delta = target - (p - 1); | ||
1688 | p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu); | ||
1689 | } | ||
1690 | } | ||
1691 | |||
1692 | /* -- Head of trace ------------------------------------------------------- */ | ||
1693 | |||
1694 | /* Reload L register from g->cur_L. */ | ||
1695 | static void asm_head_lreg(ASMState *as) | ||
1696 | { | ||
1697 | IRIns *ir = IR(ASMREF_L); | ||
1698 | if (ra_used(ir)) { | ||
1699 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1700 | emit_getgl(as, r, cur_L); | ||
1701 | ra_evictk(as); | ||
1702 | } | ||
1703 | } | ||
1704 | |||
1705 | /* Coalesce BASE register for a root trace. */ | ||
1706 | static void asm_head_root_base(ASMState *as) | ||
1707 | { | ||
1708 | IRIns *ir; | ||
1709 | asm_head_lreg(as); | ||
1710 | ir = IR(REF_BASE); | ||
1711 | if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) | ||
1712 | ra_spill(as, ir); | ||
1713 | ra_destreg(as, ir, RID_BASE); | ||
1714 | } | ||
1715 | |||
1716 | /* Coalesce BASE register for a side trace. */ | ||
1717 | static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) | ||
1718 | { | ||
1719 | IRIns *ir; | ||
1720 | asm_head_lreg(as); | ||
1721 | ir = IR(REF_BASE); | ||
1722 | if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) | ||
1723 | ra_spill(as, ir); | ||
1724 | if (ra_hasspill(irp->s)) { | ||
1725 | rset_clear(allow, ra_dest(as, ir, allow)); | ||
1726 | } else { | ||
1727 | Reg r = irp->r; | ||
1728 | lua_assert(ra_hasreg(r)); | ||
1729 | rset_clear(allow, r); | ||
1730 | if (r != ir->r && !rset_test(as->freeset, r)) | ||
1731 | ra_restore(as, regcost_ref(as->cost[r])); | ||
1732 | ra_destreg(as, ir, r); | ||
1733 | } | ||
1734 | return allow; | ||
1735 | } | ||
1736 | |||
1737 | /* -- Tail of trace ------------------------------------------------------- */ | ||
1738 | |||
1739 | /* Fixup the tail code. */ | ||
1740 | static void asm_tail_fixup(ASMState *as, TraceNo lnk) | ||
1741 | { | ||
1742 | MCode *p = as->mctop; | ||
1743 | MCode *target; | ||
1744 | /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ | ||
1745 | int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); | ||
1746 | if (spadj == 0) { | ||
1747 | as->mctop = --p; | ||
1748 | } else { | ||
1749 | /* Patch stack adjustment. */ | ||
1750 | uint32_t k = emit_isk12(spadj); | ||
1751 | lua_assert(k); | ||
1752 | p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); | ||
1753 | } | ||
1754 | /* Patch exit branch. */ | ||
1755 | target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; | ||
1756 | p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu); | ||
1757 | } | ||
1758 | |||
1759 | /* Prepare tail of code. */ | ||
1760 | static void asm_tail_prep(ASMState *as) | ||
1761 | { | ||
1762 | MCode *p = as->mctop - 1; /* Leave room for exit branch. */ | ||
1763 | if (as->loopref) { | ||
1764 | as->invmcp = as->mcp = p; | ||
1765 | } else { | ||
1766 | as->mcp = p-1; /* Leave room for stack pointer adjustment. */ | ||
1767 | as->invmcp = NULL; | ||
1768 | } | ||
1769 | *p = 0; /* Prevent load/store merging. */ | ||
1770 | } | ||
1771 | |||
1772 | /* -- Trace setup --------------------------------------------------------- */ | ||
1773 | |||
1774 | /* Ensure there are enough stack slots for call arguments. */ | ||
1775 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | ||
1776 | { | ||
1777 | IRRef args[CCI_NARGS_MAX*2]; | ||
1778 | uint32_t i, nargs = CCI_XNARGS(ci); | ||
1779 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; | ||
1780 | asm_collectargs(as, ir, ci, args); | ||
1781 | for (i = 0; i < nargs; i++) { | ||
1782 | if (args[i] && irt_isfp(IR(args[i])->t)) { | ||
1783 | if (nfpr > 0) nfpr--; else nslots += 2; | ||
1784 | } else { | ||
1785 | if (ngpr > 0) ngpr--; else nslots += 2; | ||
1786 | } | ||
1787 | } | ||
1788 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ | ||
1789 | as->evenspill = nslots; | ||
1790 | return REGSP_HINT(RID_RET); | ||
1791 | } | ||
1792 | |||
1793 | static void asm_setup_target(ASMState *as) | ||
1794 | { | ||
1795 | /* May need extra exit for asm_stack_check on side traces. */ | ||
1796 | asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); | ||
1797 | } | ||
1798 | |||
1799 | /* -- Trace patching ------------------------------------------------------ */ | ||
1800 | |||
1801 | /* Patch exit jumps of existing machine code to a new target. */ | ||
1802 | void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | ||
1803 | { | ||
1804 | MCode *p = T->mcode; | ||
1805 | MCode *pe = (MCode *)((char *)p + T->szmcode); | ||
1806 | MCode *cstart = NULL, *cend = p; | ||
1807 | MCode *mcarea = lj_mcode_patch(J, p, 0); | ||
1808 | MCode *px = exitstub_addr(J, exitno); | ||
1809 | for (; p < pe; p++) { | ||
1810 | /* Look for bl exitstub, replace with b target. */ | ||
1811 | uint32_t ins = *p; | ||
1812 | if ((ins & 0xfc000000u) == 0x94000000u && | ||
1813 | ((ins ^ (px-p)) & 0x03ffffffu) == 0) { | ||
1814 | *p = (ins & 0x7c000000u) | ((target-p) & 0x03ffffffu); | ||
1815 | cend = p+1; | ||
1816 | if (!cstart) cstart = p; | ||
1817 | } | ||
1818 | } | ||
1819 | lua_assert(cstart != NULL); | ||
1820 | lj_mcode_sync(cstart, cend); | ||
1821 | lj_mcode_patch(J, mcarea, 1); | ||
1822 | } | ||
1823 | |||
diff --git a/src/lj_ccall.c b/src/lj_ccall.c index b599be33..a3ae8b05 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c | |||
@@ -331,7 +331,7 @@ | |||
331 | 331 | ||
332 | #define CCALL_HANDLE_COMPLEXARG \ | 332 | #define CCALL_HANDLE_COMPLEXARG \ |
333 | /* Pass complex by value in separate (!) FPRs or on stack. */ \ | 333 | /* Pass complex by value in separate (!) FPRs or on stack. */ \ |
334 | isfp = ctr->size == 2*sizeof(float) ? 2 : 1; | 334 | isfp = sz == 2*sizeof(float) ? 2 : 1; |
335 | 335 | ||
336 | #define CCALL_HANDLE_REGARG \ | 336 | #define CCALL_HANDLE_REGARG \ |
337 | if (LJ_TARGET_IOS && isva) { \ | 337 | if (LJ_TARGET_IOS && isva) { \ |
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 82708077..362d6202 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h | |||
@@ -107,6 +107,7 @@ typedef struct GG_State { | |||
107 | #define J2G(J) (&J2GG(J)->g) | 107 | #define J2G(J) (&J2GG(J)->g) |
108 | #define G2J(gl) (&G2GG(gl)->J) | 108 | #define G2J(gl) (&G2GG(gl)->J) |
109 | #define L2J(L) (&L2GG(L)->J) | 109 | #define L2J(L) (&L2GG(L)->J) |
110 | #define GG_G2J (GG_OFS(J) - GG_OFS(g)) | ||
110 | #define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) | 111 | #define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) |
111 | #define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) | 112 | #define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) |
112 | #define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) | 113 | #define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) |
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h new file mode 100644 index 00000000..eb8f7fc7 --- /dev/null +++ b/src/lj_emit_arm64.h | |||
@@ -0,0 +1,397 @@ | |||
1 | /* | ||
2 | ** ARM64 instruction emitter. | ||
3 | ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
6 | ** Sponsored by Cisco Systems, Inc. | ||
7 | */ | ||
8 | |||
9 | /* -- Constant encoding --------------------------------------------------- */ | ||
10 | |||
11 | static uint64_t get_k64val(IRIns *ir) | ||
12 | { | ||
13 | if (ir->o == IR_KINT64) { | ||
14 | return ir_kint64(ir)->u64; | ||
15 | } else if (ir->o == IR_KGC) { | ||
16 | return (uint64_t)ir_kgc(ir); | ||
17 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
18 | return (uint64_t)ir_kptr(ir); | ||
19 | } else { | ||
20 | lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); | ||
21 | return ir->i; /* Sign-extended. */ | ||
22 | } | ||
23 | } | ||
24 | |||
25 | /* Encode constant in K12 format for data processing instructions. */ | ||
26 | static uint32_t emit_isk12(int64_t n) | ||
27 | { | ||
28 | uint64_t k = (n < 0) ? -n : n; | ||
29 | uint32_t m = (n < 0) ? 0x40000000 : 0; | ||
30 | if (k < 0x1000) { | ||
31 | return A64I_K12|m|A64F_U12(k); | ||
32 | } else if ((k & 0xfff000) == k) { | ||
33 | return A64I_K12|m|0x400000|A64F_U12(k>>12); | ||
34 | } | ||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | #define emit_clz64(n) __builtin_clzll(n) | ||
39 | #define emit_ctz64(n) __builtin_ctzll(n) | ||
40 | |||
41 | /* Encode constant in K13 format for logical data processing instructions. */ | ||
42 | static uint32_t emit_isk13(uint64_t n, int is64) | ||
43 | { | ||
44 | int inv = 0, w = 128, lz, tz; | ||
45 | if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ | ||
46 | if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ | ||
47 | do { /* Find the repeat width. */ | ||
48 | if (is64 && (uint32_t)(n^(n>>32))) break; | ||
49 | n = (uint32_t)n; w = 32; if ((n^(n>>16)) & 0xffff) break; | ||
50 | n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; | ||
51 | n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; | ||
52 | n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; | ||
53 | n = n & 0x3; w = 2; | ||
54 | } while (0); | ||
55 | lz = emit_clz64(n); | ||
56 | tz = emit_ctz64(n); | ||
57 | if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ | ||
58 | if (inv) | ||
59 | return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); | ||
60 | else | ||
61 | return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); | ||
62 | } | ||
63 | |||
64 | static uint32_t emit_isfpk64(uint64_t n) | ||
65 | { | ||
66 | uint64_t etop9 = ((n >> 54) & 0x1ff); | ||
67 | if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) { | ||
68 | return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80)); | ||
69 | } | ||
70 | return ~0u; | ||
71 | } | ||
72 | |||
73 | /* -- Emit basic instructions --------------------------------------------- */ | ||
74 | |||
75 | static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) | ||
76 | { | ||
77 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); | ||
78 | } | ||
79 | |||
80 | static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm) | ||
81 | { | ||
82 | *--as->mcp = ai | A64F_D(rd) | A64F_M(rm); | ||
83 | } | ||
84 | |||
85 | static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn) | ||
86 | { | ||
87 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn); | ||
88 | } | ||
89 | |||
90 | static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm) | ||
91 | { | ||
92 | *--as->mcp = ai | A64F_N(rn) | A64F_M(rm); | ||
93 | } | ||
94 | |||
95 | static void emit_d(ASMState *as, A64Ins ai, Reg rd) | ||
96 | { | ||
97 | *--as->mcp = ai | A64F_D(rd); | ||
98 | } | ||
99 | |||
100 | static void emit_n(ASMState *as, A64Ins ai, Reg rn) | ||
101 | { | ||
102 | *--as->mcp = ai | A64F_N(rn); | ||
103 | } | ||
104 | |||
105 | static int emit_checkofs(A64Ins ai, int64_t ofs) | ||
106 | { | ||
107 | int scale = (ai >> 30) & 3; | ||
108 | if (ofs < 0 || (ofs & ((1<<scale)-1))) { | ||
109 | return (ofs >= -256 && ofs <= 255) ? -1 : 0; | ||
110 | } else { | ||
111 | return (ofs < (4096<<scale)) ? 1 : 0; | ||
112 | } | ||
113 | } | ||
114 | |||
115 | static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) | ||
116 | { | ||
117 | int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; | ||
118 | lua_assert(ot); | ||
119 | /* Combine LDR/STR pairs to LDP/STP. */ | ||
120 | if ((sc == 2 || sc == 3) && | ||
121 | (!(ai & 0x400000) || rd != rn) && | ||
122 | as->mcp != as->mcloop) { | ||
123 | uint32_t prev = *as->mcp & ~A64F_D(31); | ||
124 | int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc); | ||
125 | A64Ins aip; | ||
126 | if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) || | ||
127 | prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { | ||
128 | aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); | ||
129 | } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || | ||
130 | prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { | ||
131 | aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); | ||
132 | ofsm = ofs; | ||
133 | } else { | ||
134 | goto nopair; | ||
135 | } | ||
136 | if (ofsm >= (-64<<sc) && ofsm <= (63<<sc)) { | ||
137 | *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | | ||
138 | (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); | ||
139 | return; | ||
140 | } | ||
141 | } | ||
142 | nopair: | ||
143 | if (ot == 1) | ||
144 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc); | ||
145 | else | ||
146 | *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff); | ||
147 | } | ||
148 | |||
149 | /* -- Emit loads/stores --------------------------------------------------- */ | ||
150 | |||
151 | /* Prefer rematerialization of BASE/L from global_State over spills. */ | ||
152 | #define emit_canremat(ref) ((ref) <= ASMREF_L) | ||
153 | |||
154 | /* Try to find an N-step delta relative to other consts with N < lim. */ | ||
155 | static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) | ||
156 | { | ||
157 | RegSet work = ~as->freeset & RSET_GPR; | ||
158 | if (lim <= 1) return 0; /* Can't beat that. */ | ||
159 | while (work) { | ||
160 | Reg r = rset_picktop(work); | ||
161 | IRRef ref = regcost_ref(as->cost[r]); | ||
162 | lua_assert(r != rd); | ||
163 | if (ref < REF_TRUE) { | ||
164 | uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : | ||
165 | get_k64val(IR(ref)); | ||
166 | int64_t delta = (int64_t)(k - kx); | ||
167 | if (delta == 0) { | ||
168 | emit_dm(as, A64I_MOVx, rd, r); | ||
169 | return 1; | ||
170 | } else { | ||
171 | uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta); | ||
172 | if (k12) { | ||
173 | emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); | ||
174 | return 1; | ||
175 | } | ||
176 | /* Do other ops or multi-step deltas pay off? Probably not. | ||
177 | ** E.g. XOR rarely helps with pointer consts. | ||
178 | */ | ||
179 | } | ||
180 | } | ||
181 | rset_clear(work, r); | ||
182 | } | ||
183 | return 0; /* Failed. */ | ||
184 | } | ||
185 | |||
186 | static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) | ||
187 | { | ||
188 | uint32_t k13 = emit_isk13(u64, is64); | ||
189 | if (k13) { /* Can the constant be represented as a bitmask immediate? */ | ||
190 | emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); | ||
191 | } else { | ||
192 | int i, zeros = 0, ones = 0, neg; | ||
193 | if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ | ||
194 | /* Count homogeneous 16 bit fragments. */ | ||
195 | for (i = 0; i < 4; i++) { | ||
196 | uint64_t frag = (u64 >> i*16) & 0xffff; | ||
197 | zeros += (frag == 0); | ||
198 | ones += (frag == 0xffff); | ||
199 | } | ||
200 | neg = ones > zeros; /* Use MOVN if it pays off. */ | ||
201 | if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { | ||
202 | int shift = 0, lshift = 0; | ||
203 | uint64_t n64 = neg ? ~u64 : u64; | ||
204 | if (n64 != 0) { | ||
205 | /* Find first/last fragment to be filled. */ | ||
206 | shift = (63-emit_clz64(n64)) & ~15; | ||
207 | lshift = emit_ctz64(n64) & ~15; | ||
208 | } | ||
209 | /* MOVK requires the original value (u64). */ | ||
210 | while (shift > lshift) { | ||
211 | uint32_t u16 = (u64 >> shift) & 0xffff; | ||
212 | /* Skip fragments that are correctly filled by MOVN/MOVZ. */ | ||
213 | if (u16 != (neg ? 0xffff : 0)) | ||
214 | emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); | ||
215 | shift -= 16; | ||
216 | } | ||
217 | /* But MOVN needs an inverted value (n64). */ | ||
218 | emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | | ||
219 | A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); | ||
220 | } | ||
221 | } | ||
222 | } | ||
223 | |||
224 | /* Load a 32 bit constant into a GPR. */ | ||
225 | #define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) | ||
226 | |||
227 | /* Load a 64 bit constant into a GPR. */ | ||
228 | #define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) | ||
229 | |||
230 | #define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) | ||
231 | |||
232 | #define glofs(as, k) \ | ||
233 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) | ||
234 | #define mcpofs(as, k) \ | ||
235 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp)) | ||
236 | #define checkmcpofs(as, k) \ | ||
237 | ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0) | ||
238 | |||
239 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); | ||
240 | |||
241 | /* Get/set from constant pointer. */ | ||
242 | static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) | ||
243 | { | ||
244 | /* First, check if ip + offset is in range. */ | ||
245 | if ((ai & 0x00400000) && checkmcpofs(as, p)) { | ||
246 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); | ||
247 | } else { | ||
248 | Reg base = RID_GL; /* Next, try GL + offset. */ | ||
249 | int64_t ofs = glofs(as, p); | ||
250 | if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ | ||
251 | int64_t i64 = i64ptr(p); | ||
252 | base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); | ||
253 | ofs = i64 & 0x7fffull; | ||
254 | } | ||
255 | emit_lso(as, ai, r, base, ofs); | ||
256 | } | ||
257 | } | ||
258 | |||
259 | /* Load 64 bit IR constant into register. */ | ||
260 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) | ||
261 | { | ||
262 | const uint64_t *k = &ir_k64(ir)->u64; | ||
263 | int64_t ofs; | ||
264 | if (r >= RID_MAX_GPR) { | ||
265 | uint32_t fpk = emit_isfpk64(*k); | ||
266 | if (fpk != ~0u) { | ||
267 | emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31)); | ||
268 | return; | ||
269 | } | ||
270 | } | ||
271 | ofs = glofs(as, k); | ||
272 | if (emit_checkofs(A64I_LDRx, ofs)) { | ||
273 | emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, | ||
274 | (r & 31), RID_GL, ofs); | ||
275 | } else { | ||
276 | if (r >= RID_MAX_GPR) { | ||
277 | emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); | ||
278 | r = RID_TMP; | ||
279 | } | ||
280 | if (checkmcpofs(as, k)) | ||
281 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); | ||
282 | else | ||
283 | emit_loadu64(as, r, *k); | ||
284 | } | ||
285 | } | ||
286 | |||
287 | /* Get/set global_State fields. */ | ||
288 | #define emit_getgl(as, r, field) \ | ||
289 | emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field) | ||
290 | #define emit_setgl(as, r, field) \ | ||
291 | emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field) | ||
292 | |||
293 | /* Trace number is determined from pc of exit instruction. */ | ||
294 | #define emit_setvmstate(as, i) UNUSED(i) | ||
295 | |||
296 | /* -- Emit control-flow instructions -------------------------------------- */ | ||
297 | |||
298 | /* Label for internal jumps. */ | ||
299 | typedef MCode *MCLabel; | ||
300 | |||
301 | /* Return label pointing to current PC. */ | ||
302 | #define emit_label(as) ((as)->mcp) | ||
303 | |||
304 | static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) | ||
305 | { | ||
306 | MCode *p = as->mcp; | ||
307 | ptrdiff_t delta = target - (p - 1); | ||
308 | lua_assert(((delta + 0x40000) >> 19) == 0); | ||
309 | *--p = A64I_BCC | A64F_S19((uint32_t)delta & 0x7ffff) | cond; | ||
310 | as->mcp = p; | ||
311 | } | ||
312 | |||
313 | static void emit_branch(ASMState *as, A64Ins ai, MCode *target) | ||
314 | { | ||
315 | MCode *p = as->mcp; | ||
316 | ptrdiff_t delta = target - (p - 1); | ||
317 | lua_assert(((delta + 0x02000000) >> 26) == 0); | ||
318 | *--p = ai | ((uint32_t)delta & 0x03ffffffu); | ||
319 | as->mcp = p; | ||
320 | } | ||
321 | |||
322 | #define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) | ||
323 | |||
324 | static void emit_call(ASMState *as, void *target) | ||
325 | { | ||
326 | MCode *p = --as->mcp; | ||
327 | ptrdiff_t delta = (char *)target - (char *)p; | ||
328 | if ((((delta>>2) + 0x02000000) >> 26) == 0) { | ||
329 | *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu); | ||
330 | } else { /* Target out of range: need indirect call. But don't use R0-R7. */ | ||
331 | Reg r = ra_allock(as, i64ptr(target), | ||
332 | RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); | ||
333 | *p = A64I_BLR | A64F_N(r); | ||
334 | } | ||
335 | } | ||
336 | |||
337 | /* -- Emit generic operations --------------------------------------------- */ | ||
338 | |||
339 | /* Generic move between two regs. */ | ||
340 | static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | ||
341 | { | ||
342 | if (dst >= RID_MAX_GPR) { | ||
343 | emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S, | ||
344 | (dst & 31), (src & 31)); | ||
345 | return; | ||
346 | } | ||
347 | if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ | ||
348 | MCode ins = *as->mcp, swp = (src^dst); | ||
349 | if ((ins & 0xbf800000) == 0xb9000000) { | ||
350 | if (!((ins ^ (dst << 5)) & 0x000003e0)) | ||
351 | *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */ | ||
352 | if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f)) | ||
353 | *as->mcp = ins ^ swp; /* Swap D in store. */ | ||
354 | } | ||
355 | } | ||
356 | emit_dm(as, A64I_MOVx, dst, src); | ||
357 | } | ||
358 | |||
359 | /* Generic load of register with base and (small) offset address. */ | ||
360 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | ||
361 | { | ||
362 | if (r >= RID_MAX_GPR) | ||
363 | emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs); | ||
364 | else | ||
365 | emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs); | ||
366 | } | ||
367 | |||
368 | /* Generic store of register with base and (small) offset address. */ | ||
369 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | ||
370 | { | ||
371 | if (r >= RID_MAX_GPR) | ||
372 | emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs); | ||
373 | else | ||
374 | emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs); | ||
375 | } | ||
376 | |||
377 | /* Emit an arithmetic operation with a constant operand. */ | ||
378 | static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src, | ||
379 | int32_t i, RegSet allow) | ||
380 | { | ||
381 | uint32_t k = emit_isk12(i); | ||
382 | if (k) | ||
383 | emit_dn(as, ai^k, dest, src); | ||
384 | else | ||
385 | emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); | ||
386 | } | ||
387 | |||
388 | /* Add offset to pointer. */ | ||
389 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | ||
390 | { | ||
391 | if (ofs) | ||
392 | emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r, | ||
393 | ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r)); | ||
394 | } | ||
395 | |||
396 | #define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) | ||
397 | |||
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index 8b72be7d..8bc2474c 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c | |||
@@ -296,6 +296,9 @@ enum { | |||
296 | #elif LJ_TARGET_ARM | 296 | #elif LJ_TARGET_ARM |
297 | DW_REG_SP = 13, | 297 | DW_REG_SP = 13, |
298 | DW_REG_RA = 14, | 298 | DW_REG_RA = 14, |
299 | #elif LJ_TARGET_ARM64 | ||
300 | DW_REG_SP = 31, | ||
301 | DW_REG_RA = 30, | ||
299 | #elif LJ_TARGET_PPC | 302 | #elif LJ_TARGET_PPC |
300 | DW_REG_SP = 1, | 303 | DW_REG_SP = 1, |
301 | DW_REG_RA = 65, | 304 | DW_REG_RA = 65, |
@@ -374,6 +377,8 @@ static const ELFheader elfhdr_template = { | |||
374 | .machine = 62, | 377 | .machine = 62, |
375 | #elif LJ_TARGET_ARM | 378 | #elif LJ_TARGET_ARM |
376 | .machine = 40, | 379 | .machine = 40, |
380 | #elif LJ_TARGET_ARM64 | ||
381 | .machine = 183, | ||
377 | #elif LJ_TARGET_PPC | 382 | #elif LJ_TARGET_PPC |
378 | .machine = 20, | 383 | .machine = 20, |
379 | #elif LJ_TARGET_MIPS | 384 | #elif LJ_TARGET_MIPS |
@@ -563,6 +568,13 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) | |||
563 | int i; | 568 | int i; |
564 | for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } | 569 | for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } |
565 | } | 570 | } |
571 | #elif LJ_TARGET_ARM64 | ||
572 | { | ||
573 | int i; | ||
574 | DB(DW_CFA_offset|31); DUV(2); | ||
575 | for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); } | ||
576 | for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); } | ||
577 | } | ||
566 | #elif LJ_TARGET_PPC | 578 | #elif LJ_TARGET_PPC |
567 | { | 579 | { |
568 | int i; | 580 | int i; |
diff --git a/src/lj_target.h b/src/lj_target.h index abea8d5b..c069eb95 100644 --- a/src/lj_target.h +++ b/src/lj_target.h | |||
@@ -55,7 +55,7 @@ typedef uint32_t RegSP; | |||
55 | /* Bitset for registers. 32 registers suffice for most architectures. | 55 | /* Bitset for registers. 32 registers suffice for most architectures. |
56 | ** Note that one set holds bits for both GPRs and FPRs. | 56 | ** Note that one set holds bits for both GPRs and FPRs. |
57 | */ | 57 | */ |
58 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | 58 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 |
59 | typedef uint64_t RegSet; | 59 | typedef uint64_t RegSet; |
60 | #else | 60 | #else |
61 | typedef uint32_t RegSet; | 61 | typedef uint32_t RegSet; |
@@ -69,7 +69,7 @@ typedef uint32_t RegSet; | |||
69 | #define rset_set(rs, r) (rs |= RID2RSET(r)) | 69 | #define rset_set(rs, r) (rs |= RID2RSET(r)) |
70 | #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) | 70 | #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) |
71 | #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) | 71 | #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) |
72 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | 72 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 |
73 | #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) | 73 | #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) |
74 | #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) | 74 | #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) |
75 | #else | 75 | #else |
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 57ab134f..0cef06d5 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h | |||
@@ -55,7 +55,8 @@ enum { | |||
55 | 55 | ||
56 | /* Make use of all registers, except for x18, fp, lr and sp. */ | 56 | /* Make use of all registers, except for x18, fp, lr and sp. */ |
57 | #define RSET_FIXED \ | 57 | #define RSET_FIXED \ |
58 | (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)) | 58 | (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\ |
59 | RID2RSET(RID_GL)) | ||
59 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) | 60 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) |
60 | #define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) | 61 | #define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) |
61 | #define RSET_ALL (RSET_GPR|RSET_FPR) | 62 | #define RSET_ALL (RSET_GPR|RSET_FPR) |
@@ -73,25 +74,235 @@ enum { | |||
73 | #define REGARG_LASTFPR RID_D7 | 74 | #define REGARG_LASTFPR RID_D7 |
74 | #define REGARG_NUMFPR 8 | 75 | #define REGARG_NUMFPR 8 |
75 | 76 | ||
77 | /* -- Spill slots --------------------------------------------------------- */ | ||
78 | |||
79 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. | ||
80 | ** | ||
81 | ** SPS_FIXED: Available fixed spill slots in interpreter frame. | ||
82 | ** This definition must match with the vm_arm64.dasc file. | ||
83 | ** Pre-allocate some slots to avoid sp adjust in every root trace. | ||
84 | ** | ||
85 | ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. | ||
86 | */ | ||
87 | #define SPS_FIXED 4 | ||
88 | #define SPS_FIRST 2 | ||
89 | |||
90 | #define SPOFS_TMP 0 | ||
91 | |||
92 | #define sps_scale(slot) (4 * (int32_t)(slot)) | ||
93 | #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) | ||
94 | |||
95 | /* -- Exit state ---------------------------------------------------------- */ | ||
96 | |||
97 | /* This definition must match with the *.dasc file(s). */ | ||
98 | typedef struct { | ||
99 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ | ||
100 | intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ | ||
101 | int32_t spill[256]; /* Spill slots. */ | ||
102 | } ExitState; | ||
103 | |||
104 | /* PC after instruction that caused an exit. Used to find the trace number. */ | ||
105 | #define EXITSTATE_PCREG RID_LR | ||
106 | /* Highest exit + 1 indicates stack check. */ | ||
107 | #define EXITSTATE_CHECKEXIT 1 | ||
108 | |||
109 | #define EXITSTUB_SPACING 4 | ||
110 | #define EXITSTUBS_PER_GROUP 32 | ||
111 | |||
112 | |||
76 | /* -- Instructions -------------------------------------------------------- */ | 113 | /* -- Instructions -------------------------------------------------------- */ |
77 | 114 | ||
78 | /* Instruction fields. */ | 115 | /* Instruction fields. */ |
79 | #define A64F_D(r) (r) | 116 | #define A64F_D(r) (r) |
80 | #define A64F_N(r) ((r) << 5) | 117 | #define A64F_N(r) ((r) << 5) |
81 | #define A64F_A(r) ((r) << 10) | 118 | #define A64F_A(r) ((r) << 10) |
82 | #define A64F_M(r) ((r) << 16) | 119 | #define A64F_M(r) ((r) << 16) |
120 | #define A64F_IMMS(x) ((x) << 10) | ||
121 | #define A64F_IMMR(x) ((x) << 16) | ||
83 | #define A64F_U16(x) ((x) << 5) | 122 | #define A64F_U16(x) ((x) << 5) |
123 | #define A64F_U12(x) ((x) << 10) | ||
84 | #define A64F_S26(x) (x) | 124 | #define A64F_S26(x) (x) |
85 | #define A64F_S19(x) ((x) << 5) | 125 | #define A64F_S19(x) ((x) << 5) |
126 | #define A64F_S9(x) ((x) << 12) | ||
127 | #define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) | ||
128 | #define A64F_EX(ex) (A64I_EX | ((ex) << 13)) | ||
129 | #define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10)) | ||
130 | #define A64F_FP8(x) ((x) << 13) | ||
131 | #define A64F_CC(cc) ((cc) << 12) | ||
132 | #define A64F_LSL16(x) (((x) / 16) << 21) | ||
133 | #define A64F_BSH(sh) ((sh) << 10) | ||
86 | 134 | ||
87 | typedef enum A64Ins { | 135 | typedef enum A64Ins { |
136 | A64I_S = 0x20000000, | ||
137 | A64I_X = 0x80000000, | ||
138 | A64I_EX = 0x00200000, | ||
139 | A64I_K12 = 0x1a000000, | ||
140 | A64I_K13 = 0x18000000, | ||
141 | A64I_LS_U = 0x01000000, | ||
142 | A64I_LS_S = 0x00800000, | ||
143 | A64I_LS_R = 0x01200800, | ||
144 | A64I_LS_UXTWx = 0x00005000, | ||
145 | A64I_LS_LSLx = 0x00007000, | ||
146 | |||
147 | A64I_ADDw = 0x0b000000, | ||
148 | A64I_ADDx = 0x8b000000, | ||
149 | A64I_ADDSw = 0x2b000000, | ||
150 | A64I_ADDSx = 0xab000000, | ||
151 | A64I_NEGw = 0x4b0003e0, | ||
152 | A64I_NEGx = 0xcb0003e0, | ||
153 | A64I_SUBw = 0x4b000000, | ||
154 | A64I_SUBx = 0xcb000000, | ||
155 | A64I_SUBSw = 0x6b000000, | ||
156 | A64I_SUBSx = 0xeb000000, | ||
157 | |||
158 | A64I_MULw = 0x1b007c00, | ||
159 | A64I_MULx = 0x9b007c00, | ||
160 | A64I_SMULL = 0x9b207c00, | ||
161 | |||
162 | A64I_ANDw = 0x0a000000, | ||
163 | A64I_ANDx = 0x8a000000, | ||
164 | A64I_ANDSw = 0x6a000000, | ||
165 | A64I_ANDSx = 0xea000000, | ||
166 | A64I_EORw = 0x4a000000, | ||
167 | A64I_EORx = 0xca000000, | ||
168 | A64I_ORRw = 0x2a000000, | ||
169 | A64I_ORRx = 0xaa000000, | ||
170 | A64I_TSTw = 0x6a00001f, | ||
171 | A64I_TSTx = 0xea00001f, | ||
172 | |||
173 | A64I_CMPw = 0x6b00001f, | ||
174 | A64I_CMPx = 0xeb00001f, | ||
175 | A64I_CMNw = 0x2b00001f, | ||
176 | A64I_CMNx = 0xab00001f, | ||
177 | A64I_CCMPw = 0x7a400000, | ||
178 | A64I_CCMPx = 0xfa400000, | ||
179 | A64I_CSELw = 0x1a800000, | ||
180 | A64I_CSELx = 0x9a800000, | ||
181 | |||
182 | A64I_ASRw = 0x13007c00, | ||
183 | A64I_ASRx = 0x9340fc00, | ||
184 | A64I_LSLx = 0xd3400000, | ||
185 | A64I_LSRx = 0xd340fc00, | ||
186 | A64I_SHRw = 0x1ac02000, | ||
187 | A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */ | ||
188 | A64I_REVw = 0x5ac00800, | ||
189 | A64I_REVx = 0xdac00c00, | ||
190 | |||
191 | A64I_EXTRw = 0x13800000, | ||
192 | A64I_EXTRx = 0x93c00000, | ||
193 | A64I_SBFMw = 0x13000000, | ||
194 | A64I_SBFMx = 0x93400000, | ||
195 | A64I_SXTBw = 0x13001c00, | ||
196 | A64I_SXTHw = 0x13003c00, | ||
197 | A64I_SXTW = 0x93407c00, | ||
198 | A64I_UBFMw = 0x53000000, | ||
199 | A64I_UBFMx = 0xd3400000, | ||
200 | A64I_UXTBw = 0x53001c00, | ||
201 | A64I_UXTHw = 0x53003c00, | ||
202 | |||
203 | A64I_MOVw = 0x2a0003e0, | ||
204 | A64I_MOVx = 0xaa0003e0, | ||
205 | A64I_MVNw = 0x2a2003e0, | ||
206 | A64I_MVNx = 0xaa2003e0, | ||
207 | A64I_MOVKw = 0x72800000, | ||
208 | A64I_MOVKx = 0xf2800000, | ||
88 | A64I_MOVZw = 0x52800000, | 209 | A64I_MOVZw = 0x52800000, |
89 | A64I_MOVZx = 0xd2800000, | 210 | A64I_MOVZx = 0xd2800000, |
211 | A64I_MOVNw = 0x12800000, | ||
212 | A64I_MOVNx = 0x92800000, | ||
213 | |||
214 | A64I_LDRB = 0x39400000, | ||
215 | A64I_LDRH = 0x79400000, | ||
216 | A64I_LDRw = 0xb9400000, | ||
217 | A64I_LDRx = 0xf9400000, | ||
90 | A64I_LDRLw = 0x18000000, | 218 | A64I_LDRLw = 0x18000000, |
91 | A64I_LDRLx = 0x58000000, | 219 | A64I_LDRLx = 0x58000000, |
92 | A64I_NOP = 0xd503201f, | 220 | A64I_STRB = 0x39000000, |
221 | A64I_STRH = 0x79000000, | ||
222 | A64I_STRw = 0xb9000000, | ||
223 | A64I_STRx = 0xf9000000, | ||
224 | A64I_STPw = 0x29000000, | ||
225 | A64I_STPx = 0xa9000000, | ||
226 | A64I_LDPw = 0x29400000, | ||
227 | A64I_LDPx = 0xa9400000, | ||
228 | |||
93 | A64I_B = 0x14000000, | 229 | A64I_B = 0x14000000, |
230 | A64I_BCC = 0x54000000, | ||
231 | A64I_BL = 0x94000000, | ||
94 | A64I_BR = 0xd61f0000, | 232 | A64I_BR = 0xd61f0000, |
233 | A64I_BLR = 0xd63f0000, | ||
234 | |||
235 | A64I_NOP = 0xd503201f, | ||
236 | |||
237 | /* FP */ | ||
238 | A64I_FADDd = 0x1e602800, | ||
239 | A64I_FSUBd = 0x1e603800, | ||
240 | A64I_FMADDd = 0x1f400000, | ||
241 | A64I_FMSUBd = 0x1f408000, | ||
242 | A64I_FNMADDd = 0x1f600000, | ||
243 | A64I_FNMSUBd = 0x1f608000, | ||
244 | A64I_FMULd = 0x1e600800, | ||
245 | A64I_FDIVd = 0x1e601800, | ||
246 | A64I_FNEGd = 0x1e614000, | ||
247 | A64I_FABS = 0x1e60c000, | ||
248 | A64I_FSQRTd = 0x1e61c000, | ||
249 | A64I_LDRs = 0xbd400000, | ||
250 | A64I_LDRd = 0xfd400000, | ||
251 | A64I_STRs = 0xbd000000, | ||
252 | A64I_STRd = 0xfd000000, | ||
253 | A64I_LDPs = 0x2d400000, | ||
254 | A64I_LDPd = 0x6d400000, | ||
255 | A64I_STPs = 0x2d000000, | ||
256 | A64I_STPd = 0x6d000000, | ||
257 | A64I_FCMPd = 0x1e602000, | ||
258 | A64I_FCMPZd = 0x1e602008, | ||
259 | A64I_FCSELd = 0x1e600c00, | ||
260 | A64I_FRINTMd = 0x1e654000, | ||
261 | A64I_FRINTPd = 0x1e64c000, | ||
262 | A64I_FRINTZd = 0x1e65c000, | ||
263 | |||
264 | A64I_FCVT_F32_F64 = 0x1e624000, | ||
265 | A64I_FCVT_F64_F32 = 0x1e22c000, | ||
266 | A64I_FCVT_F32_S32 = 0x1e220000, | ||
267 | A64I_FCVT_F64_S32 = 0x1e620000, | ||
268 | A64I_FCVT_F32_U32 = 0x1e230000, | ||
269 | A64I_FCVT_F64_U32 = 0x1e630000, | ||
270 | A64I_FCVT_F32_S64 = 0x9e220000, | ||
271 | A64I_FCVT_F64_S64 = 0x9e620000, | ||
272 | A64I_FCVT_F32_U64 = 0x9e230000, | ||
273 | A64I_FCVT_F64_U64 = 0x9e630000, | ||
274 | A64I_FCVT_S32_F64 = 0x1e780000, | ||
275 | A64I_FCVT_S32_F32 = 0x1e380000, | ||
276 | A64I_FCVT_U32_F64 = 0x1e790000, | ||
277 | A64I_FCVT_U32_F32 = 0x1e390000, | ||
278 | A64I_FCVT_S64_F64 = 0x9e780000, | ||
279 | A64I_FCVT_S64_F32 = 0x9e380000, | ||
280 | A64I_FCVT_U64_F64 = 0x9e790000, | ||
281 | A64I_FCVT_U64_F32 = 0x9e390000, | ||
282 | |||
283 | A64I_FMOV_S = 0x1e204000, | ||
284 | A64I_FMOV_D = 0x1e604000, | ||
285 | A64I_FMOV_R_S = 0x1e260000, | ||
286 | A64I_FMOV_S_R = 0x1e270000, | ||
287 | A64I_FMOV_R_D = 0x9e660000, | ||
288 | A64I_FMOV_D_R = 0x9e670000, | ||
289 | A64I_FMOV_DI = 0x1e601000, | ||
95 | } A64Ins; | 290 | } A64Ins; |
96 | 291 | ||
292 | typedef enum A64Shift { | ||
293 | A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR | ||
294 | } A64Shift; | ||
295 | |||
296 | typedef enum A64Extend { | ||
297 | A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX, | ||
298 | A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX, | ||
299 | } A64Extend; | ||
300 | |||
301 | /* ARM condition codes. */ | ||
302 | typedef enum A64CC { | ||
303 | CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, | ||
304 | CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, | ||
305 | CC_HS = CC_CS, CC_LO = CC_CC | ||
306 | } A64CC; | ||
307 | |||
97 | #endif | 308 | #endif |
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 7a881bdd..a6227bf7 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc | |||
@@ -236,12 +236,17 @@ | |||
236 | |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro | 236 | |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro |
237 | |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro | 237 | |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro |
238 | | | 238 | | |
239 | #define GL_J(field) (GG_OFS(J) + (int)offsetof(jit_State, field)) | 239 | #define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) |
240 | | | 240 | | |
241 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | 241 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) |
242 | | | 242 | | |
243 | |.macro hotcheck, delta | 243 | |.macro hotcheck, delta |
244 | | NYI | 244 | | lsr CARG1, PC, #1 |
245 | | and CARG1, CARG1, #126 | ||
246 | | add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT | ||
247 | | ldrh CARG2w, [GL, CARG1] | ||
248 | | subs CARG2, CARG2, #delta | ||
249 | | strh CARG2w, [GL, CARG1] | ||
245 | |.endmacro | 250 | |.endmacro |
246 | | | 251 | | |
247 | |.macro hotloop | 252 | |.macro hotloop |
@@ -869,7 +874,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
869 | | bl extern lj_meta_for // (lua_State *L, TValue *base) | 874 | | bl extern lj_meta_for // (lua_State *L, TValue *base) |
870 | | ldr INSw, [PC, #-4] | 875 | | ldr INSw, [PC, #-4] |
871 | |.if JIT | 876 | |.if JIT |
872 | | uxtb TMP0, INS | 877 | | uxtb TMP0w, INSw |
873 | |.endif | 878 | |.endif |
874 | | decode_RA RA, INS | 879 | | decode_RA RA, INS |
875 | | decode_RD RC, INS | 880 | | decode_RD RC, INS |
@@ -1732,7 +1737,20 @@ static void build_subroutines(BuildCtx *ctx) | |||
1732 | |//----------------------------------------------------------------------- | 1737 | |//----------------------------------------------------------------------- |
1733 | | | 1738 | | |
1734 | |->vm_record: // Dispatch target for recording phase. | 1739 | |->vm_record: // Dispatch target for recording phase. |
1735 | | NYI | 1740 | |.if JIT |
1741 | | ldrb CARG1w, GL->hookmask | ||
1742 | | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. | ||
1743 | | bne >5 | ||
1744 | | // Decrement the hookcount for consistency, but always do the call. | ||
1745 | | ldr CARG2w, GL->hookcount | ||
1746 | | tst CARG1, #HOOK_ACTIVE | ||
1747 | | bne >1 | ||
1748 | | sub CARG2w, CARG2w, #1 | ||
1749 | | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT | ||
1750 | | beq >1 | ||
1751 | | str CARG2w, GL->hookcount | ||
1752 | | b >1 | ||
1753 | |.endif | ||
1736 | | | 1754 | | |
1737 | |->vm_rethook: // Dispatch target for return hooks. | 1755 | |->vm_rethook: // Dispatch target for return hooks. |
1738 | | ldrb TMP2w, GL->hookmask | 1756 | | ldrb TMP2w, GL->hookmask |
@@ -1774,7 +1792,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
1774 | | b <4 | 1792 | | b <4 |
1775 | | | 1793 | | |
1776 | |->vm_hotloop: // Hot loop counter underflow. | 1794 | |->vm_hotloop: // Hot loop counter underflow. |
1777 | | NYI | 1795 | |.if JIT |
1796 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). | ||
1797 | | add CARG1, GL, #GG_G2DISP+GG_DISP2J | ||
1798 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1799 | | str PC, SAVE_PC | ||
1800 | | ldr CARG3, LFUNC:CARG3->pc | ||
1801 | | mov CARG2, PC | ||
1802 | | str L, [GL, #GL_J(L)] | ||
1803 | | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)] | ||
1804 | | str BASE, L->base | ||
1805 | | add CARG3, BASE, CARG3, lsl #3 | ||
1806 | | str CARG3, L->top | ||
1807 | | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) | ||
1808 | | b <3 | ||
1809 | |.endif | ||
1778 | | | 1810 | | |
1779 | |->vm_callhook: // Dispatch target for call hooks. | 1811 | |->vm_callhook: // Dispatch target for call hooks. |
1780 | | mov CARG2, PC | 1812 | | mov CARG2, PC |
@@ -1804,7 +1836,54 @@ static void build_subroutines(BuildCtx *ctx) | |||
1804 | | br CRET1 | 1836 | | br CRET1 |
1805 | | | 1837 | | |
1806 | |->cont_stitch: // Trace stitching. | 1838 | |->cont_stitch: // Trace stitching. |
1807 | | NYI | 1839 | |.if JIT |
1840 | | // RA = resultptr, CARG4 = meta base | ||
1841 | | ldr RB, SAVE_MULTRES | ||
1842 | | ldr INSw, [PC, #-4] | ||
1843 | | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. | ||
1844 | | subs RB, RB, #8 | ||
1845 | | decode_RA RC, INS // Call base. | ||
1846 | | and CARG3, CARG3, #LJ_GCVMASK | ||
1847 | | beq >2 | ||
1848 | |1: // Move results down. | ||
1849 | | ldr CARG1, [RA] | ||
1850 | | add RA, RA, #8 | ||
1851 | | subs RB, RB, #8 | ||
1852 | | str CARG1, [BASE, RC, lsl #3] | ||
1853 | | add RC, RC, #1 | ||
1854 | | bne <1 | ||
1855 | |2: | ||
1856 | | decode_RA RA, INS | ||
1857 | | decode_RB RB, INS | ||
1858 | | add RA, RA, RB | ||
1859 | |3: | ||
1860 | | cmp RA, RC | ||
1861 | | bhi >9 // More results wanted? | ||
1862 | | | ||
1863 | | ldrh RAw, TRACE:CARG3->traceno | ||
1864 | | ldrh RCw, TRACE:CARG3->link | ||
1865 | | cmp RCw, RAw | ||
1866 | | beq ->cont_nop // Blacklisted. | ||
1867 | | cmp RCw, #0 | ||
1868 | | bne =>BC_JLOOP // Jump to stitched trace. | ||
1869 | | | ||
1870 | | // Stitch a new trace to the previous trace. | ||
1871 | | mov CARG1, #GL_J(exitno) | ||
1872 | | str RA, [GL, CARG1] | ||
1873 | | mov CARG1, #GL_J(L) | ||
1874 | | str L, [GL, CARG1] | ||
1875 | | str BASE, L->base | ||
1876 | | add CARG1, GL, #GG_G2J | ||
1877 | | mov CARG2, PC | ||
1878 | | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
1879 | | ldr BASE, L->base | ||
1880 | | b ->cont_nop | ||
1881 | | | ||
1882 | |9: // Fill up results with nil. | ||
1883 | | str TISNIL, [BASE, RC, lsl #3] | ||
1884 | | add RC, RC, #1 | ||
1885 | | b <3 | ||
1886 | |.endif | ||
1808 | | | 1887 | | |
1809 | |->vm_profhook: // Dispatch target for profiler hook. | 1888 | |->vm_profhook: // Dispatch target for profiler hook. |
1810 | #if LJ_HASPROFILE | 1889 | #if LJ_HASPROFILE |
@@ -1822,10 +1901,120 @@ static void build_subroutines(BuildCtx *ctx) | |||
1822 | |//-- Trace exit handler ------------------------------------------------- | 1901 | |//-- Trace exit handler ------------------------------------------------- |
1823 | |//----------------------------------------------------------------------- | 1902 | |//----------------------------------------------------------------------- |
1824 | | | 1903 | | |
1904 | |.macro savex_, a, b | ||
1905 | | stp d..a, d..b, [sp, #a*8] | ||
1906 | | stp x..a, x..b, [sp, #32*8+a*8] | ||
1907 | |.endmacro | ||
1908 | | | ||
1825 | |->vm_exit_handler: | 1909 | |->vm_exit_handler: |
1826 | | NYI | 1910 | |.if JIT |
1911 | | sub sp, sp, #(64*8) | ||
1912 | | savex_, 0, 1 | ||
1913 | | savex_, 2, 3 | ||
1914 | | savex_, 4, 5 | ||
1915 | | savex_, 6, 7 | ||
1916 | | savex_, 8, 9 | ||
1917 | | savex_, 10, 11 | ||
1918 | | savex_, 12, 13 | ||
1919 | | savex_, 14, 15 | ||
1920 | | savex_, 16, 17 | ||
1921 | | savex_, 18, 19 | ||
1922 | | savex_, 20, 21 | ||
1923 | | savex_, 22, 23 | ||
1924 | | savex_, 24, 25 | ||
1925 | | savex_, 26, 27 | ||
1926 | | savex_, 28, 29 | ||
1927 | | stp d30, d31, [sp, #30*8] | ||
1928 | | ldr CARG1, [sp, #64*8] // Load original value of lr. | ||
1929 | | add CARG3, sp, #64*8 // Recompute original value of sp. | ||
1930 | | mv_vmstate CARG4, EXIT | ||
1931 | | ldr CARG2w, [CARG1, #-4]! // Get exit instruction. | ||
1932 | | stp CARG1, CARG3, [sp, #62*8] // Store exit pc/sp in RID_LR/RID_SP. | ||
1933 | | lsl CARG2, CARG2, #38 | ||
1934 | | add CARG1, CARG1, CARG2, asr #36 | ||
1935 | | ldr CARG2w, [lr] // Load exit stub group offset. | ||
1936 | | sub CARG1, CARG1, lr | ||
1937 | | sub CARG1, CARG1, #4 | ||
1938 | | ldr L, GL->cur_L | ||
1939 | | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. | ||
1940 | | ldr BASE, GL->jit_base | ||
1941 | | st_vmstate CARG4 | ||
1942 | | str CARG1w, [GL, #GL_J(exitno)] | ||
1943 | | str BASE, L->base | ||
1944 | | str L, [GL, #GL_J(L)] | ||
1945 | | str xzr, GL->jit_base | ||
1946 | | add CARG1, GL, #GG_G2J | ||
1947 | | mov CARG2, sp | ||
1948 | | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) | ||
1949 | | // Returns MULTRES (unscaled) or negated error code. | ||
1950 | | ldr CARG2, L->cframe | ||
1951 | | ldr BASE, L->base | ||
1952 | | and sp, CARG2, #CFRAME_RAWMASK | ||
1953 | | ldr PC, SAVE_PC // Get SAVE_PC. | ||
1954 | | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). | ||
1955 | | b >1 | ||
1956 | |.endif | ||
1957 | | | ||
1827 | |->vm_exit_interp: | 1958 | |->vm_exit_interp: |
1828 | | NYI | 1959 | | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. |
1960 | |.if JIT | ||
1961 | | ldr L, SAVE_L | ||
1962 | |1: | ||
1963 | | cmp CARG1w, #0 | ||
1964 | | blt >9 // Check for error from exit. | ||
1965 | | lsl RC, CARG1, #3 | ||
1966 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
1967 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
1968 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
1969 | | movn TISNIL, #0 | ||
1970 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
1971 | | str RC, SAVE_MULTRES | ||
1972 | | str BASE, L->base | ||
1973 | | ldr CARG2, LFUNC:CARG2->pc | ||
1974 | | str xzr, GL->jit_base | ||
1975 | | mv_vmstate CARG4, INTERP | ||
1976 | | ldr KBASE, [CARG2, #PC2PROTO(k)] | ||
1977 | | // Modified copy of ins_next which handles function header dispatch, too. | ||
1978 | | ldrb RBw, [PC] | ||
1979 | | ldr INSw, [PC], #4 | ||
1980 | | st_vmstate CARG4 | ||
1981 | | cmp RBw, #BC_FUNCC+2 // Fast function? | ||
1982 | | add TMP1, GL, INS, uxtb #3 | ||
1983 | | bhs >4 | ||
1984 | |2: | ||
1985 | | cmp RBw, #BC_FUNCF // Function header? | ||
1986 | | add TMP0, GL, RB, uxtb #3 | ||
1987 | | ldr RB, [TMP0, #GG_G2DISP] | ||
1988 | | decode_RA RA, INS | ||
1989 | | lsr TMP0, INS, #16 | ||
1990 | | csel RC, TMP0, RC, lo | ||
1991 | | blo >5 | ||
1992 | | ldr CARG3, [BASE, FRAME_FUNC] | ||
1993 | | sub RC, RC, #8 | ||
1994 | | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 | ||
1995 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1996 | |5: | ||
1997 | | br RB | ||
1998 | | | ||
1999 | |4: // Check frame below fast function. | ||
2000 | | ldr CARG1, [BASE, FRAME_PC] | ||
2001 | | ands CARG2, CARG1, #FRAME_TYPE | ||
2002 | | bne <2 // Trace stitching continuation? | ||
2003 | | // Otherwise set KBASE for Lua function below fast function. | ||
2004 | | ldr CARG3, [CARG1, #-4] | ||
2005 | | decode_RA CARG1, CARG3 | ||
2006 | | sub CARG2, BASE, CARG1, lsl #3 | ||
2007 | | ldr LFUNC:CARG3, [CARG2, #-32] | ||
2008 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
2009 | | ldr CARG3, LFUNC:CARG3->pc | ||
2010 | | ldr KBASE, [CARG3, #PC2PROTO(k)] | ||
2011 | | b <2 | ||
2012 | | | ||
2013 | |9: // Rethrow error from the right C frame. | ||
2014 | | neg CARG2, CARG1 | ||
2015 | | mov CARG1, L | ||
2016 | | bl extern lj_err_throw // (lua_State *L, int errcode) | ||
2017 | |.endif | ||
1829 | | | 2018 | | |
1830 | |//----------------------------------------------------------------------- | 2019 | |//----------------------------------------------------------------------- |
1831 | |//-- Math helper functions ---------------------------------------------- | 2020 | |//-- Math helper functions ---------------------------------------------- |
@@ -3387,6 +3576,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3387 | if (op == BC_FORI) { | 3576 | if (op == BC_FORI) { |
3388 | | csel PC, RC, PC, gt | 3577 | | csel PC, RC, PC, gt |
3389 | } else if (op == BC_JFORI) { | 3578 | } else if (op == BC_JFORI) { |
3579 | | mov PC, RC | ||
3390 | | ldrh RCw, [RC, #-2] | 3580 | | ldrh RCw, [RC, #-2] |
3391 | } else if (op == BC_IFORL) { | 3581 | } else if (op == BC_IFORL) { |
3392 | | csel PC, RC, PC, le | 3582 | | csel PC, RC, PC, le |
@@ -3488,7 +3678,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3488 | 3678 | ||
3489 | case BC_JLOOP: | 3679 | case BC_JLOOP: |
3490 | |.if JIT | 3680 | |.if JIT |
3491 | | NYI | 3681 | | // RA = base (ignored), RC = traceno |
3682 | | ldr CARG1, [GL, #GL_J(trace)] | ||
3683 | | mov CARG2, #0 // Traces on ARM64 don't store the trace #, so use 0. | ||
3684 | | ldr TRACE:RC, [CARG1, RC, lsl #3] | ||
3685 | | st_vmstate CARG2 | ||
3686 | | ldr RA, TRACE:RC->mcode | ||
3687 | | str BASE, GL->jit_base | ||
3688 | | str L, GL->tmpbuf.L | ||
3689 | | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. | ||
3690 | | br RA | ||
3492 | |.endif | 3691 | |.endif |
3493 | break; | 3692 | break; |
3494 | 3693 | ||
@@ -3546,10 +3745,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3546 | case BC_IFUNCV: | 3745 | case BC_IFUNCV: |
3547 | | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 | 3746 | | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 |
3548 | | ldr CARG1, L->maxstack | 3747 | | ldr CARG1, L->maxstack |
3748 | | movn TMP0, #~LJ_TFUNC | ||
3549 | | add TMP2, BASE, RC | 3749 | | add TMP2, BASE, RC |
3750 | | add LFUNC:CARG3, CARG3, TMP0, lsl #47 | ||
3550 | | add RA, RA, RC | 3751 | | add RA, RA, RC |
3551 | | add TMP0, RC, #16+FRAME_VARG | 3752 | | add TMP0, RC, #16+FRAME_VARG |
3552 | | str LFUNC:CARG3, [TMP2], #8 // Store (untagged) copy of LFUNC. | 3753 | | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. |
3553 | | ldr KBASE, [PC, #-4+PC2PROTO(k)] | 3754 | | ldr KBASE, [PC, #-4+PC2PROTO(k)] |
3554 | | cmp RA, CARG1 | 3755 | | cmp RA, CARG1 |
3555 | | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. | 3756 | | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. |
@@ -3736,8 +3937,8 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
3736 | "\t.uleb128 0x1\n" | 3937 | "\t.uleb128 0x1\n" |
3737 | "\t.sleb128 -8\n" | 3938 | "\t.sleb128 -8\n" |
3738 | "\t.byte 30\n" /* Return address is in lr. */ | 3939 | "\t.byte 30\n" /* Return address is in lr. */ |
3739 | "\t.uleb128 1\n" /* augmentation length */ | 3940 | "\t.uleb128 1\n" /* augmentation length */ |
3740 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | 3941 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ |
3741 | "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ | 3942 | "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ |
3742 | "\t.align 3\n" | 3943 | "\t.align 3\n" |
3743 | ".LECIE2:\n\n"); | 3944 | ".LECIE2:\n\n"); |
@@ -3748,7 +3949,7 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
3748 | "\t.long .LASFDE3-.Lframe2\n" | 3949 | "\t.long .LASFDE3-.Lframe2\n" |
3749 | "\t.long lj_vm_ffi_call-.\n" | 3950 | "\t.long lj_vm_ffi_call-.\n" |
3750 | "\t.long %d\n" | 3951 | "\t.long %d\n" |
3751 | "\t.uleb128 0\n" /* augmentation length */ | 3952 | "\t.uleb128 0\n" /* augmentation length */ |
3752 | "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ | 3953 | "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ |
3753 | "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ | 3954 | "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ |
3754 | "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ | 3955 | "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ |