diff options
| -rw-r--r-- | Makefile | 4 | ||||
| -rw-r--r-- | src/jit/dis_arm64.lua | 1215 | ||||
| -rw-r--r-- | src/lj_arch.h | 1 | ||||
| -rw-r--r-- | src/lj_asm.c | 4 | ||||
| -rw-r--r-- | src/lj_asm_arm64.h | 1823 | ||||
| -rw-r--r-- | src/lj_ccall.c | 2 | ||||
| -rw-r--r-- | src/lj_dispatch.h | 1 | ||||
| -rw-r--r-- | src/lj_emit_arm64.h | 397 | ||||
| -rw-r--r-- | src/lj_gdbjit.c | 12 | ||||
| -rw-r--r-- | src/lj_target.h | 4 | ||||
| -rw-r--r-- | src/lj_target_arm64.h | 221 | ||||
| -rw-r--r-- | src/vm_arm64.dasc | 227 |
12 files changed, 3887 insertions, 24 deletions
| @@ -86,8 +86,8 @@ FILE_MAN= luajit.1 | |||
| 86 | FILE_PC= luajit.pc | 86 | FILE_PC= luajit.pc |
| 87 | FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h | 87 | FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h |
| 88 | FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ | 88 | FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ |
| 89 | dis_x86.lua dis_x64.lua dis_arm.lua dis_ppc.lua \ | 89 | dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ |
| 90 | dis_mips.lua dis_mipsel.lua vmdef.lua | 90 | dis_ppc.lua dis_mips.lua dis_mipsel.lua vmdef.lua |
| 91 | 91 | ||
| 92 | ifeq (,$(findstring Windows,$(OS))) | 92 | ifeq (,$(findstring Windows,$(OS))) |
| 93 | HOST_SYS:= $(shell uname -s) | 93 | HOST_SYS:= $(shell uname -s) |
diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua new file mode 100644 index 00000000..909b33bc --- /dev/null +++ b/src/jit/dis_arm64.lua | |||
| @@ -0,0 +1,1215 @@ | |||
| 1 | ---------------------------------------------------------------------------- | ||
| 2 | -- LuaJIT ARM64 disassembler module. | ||
| 3 | -- | ||
| 4 | -- Copyright (C) 2005-2016 Mike Pall. All rights reserved. | ||
| 5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
| 6 | -- | ||
| 7 | -- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
| 8 | -- Sponsored by Cisco Systems, Inc. | ||
| 9 | ---------------------------------------------------------------------------- | ||
| 10 | -- This is a helper module used by the LuaJIT machine code dumper module. | ||
| 11 | -- | ||
| 12 | -- It disassembles most user-mode AArch64 instructions. | ||
| 13 | -- NYI: Advanced SIMD and VFP instructions. | ||
| 14 | ------------------------------------------------------------------------------ | ||
| 15 | |||
| 16 | local type, tonumber = type, tonumber | ||
| 17 | local sub, byte, format = string.sub, string.byte, string.format | ||
| 18 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub | ||
| 19 | local rep = string.rep | ||
| 20 | local concat = table.concat | ||
| 21 | local bit = require("bit") | ||
| 22 | local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex | ||
| 23 | local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift | ||
| 24 | local ror = bit.ror | ||
| 25 | |||
| 26 | ------------------------------------------------------------------------------ | ||
| 27 | -- Opcode maps | ||
| 28 | ------------------------------------------------------------------------------ | ||
| 29 | |||
| 30 | local map_adr = { -- PC-relative addressing. | ||
| 31 | shift = 31, mask = 1, | ||
| 32 | [0] = "adrDBx", "adrpDBx" | ||
| 33 | } | ||
| 34 | |||
| 35 | local map_addsubi = { -- Add/subtract immediate. | ||
| 36 | shift = 29, mask = 3, | ||
| 37 | [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg", | ||
| 38 | } | ||
| 39 | |||
| 40 | local map_logi = { -- Logical immediate. | ||
| 41 | shift = 31, mask = 1, | ||
| 42 | [0] = { | ||
| 43 | shift = 22, mask = 1, | ||
| 44 | [0] = { | ||
| 45 | shift = 29, mask = 3, | ||
| 46 | [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" | ||
| 47 | }, | ||
| 48 | false -- unallocated | ||
| 49 | }, | ||
| 50 | { | ||
| 51 | shift = 29, mask = 3, | ||
| 52 | [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | local map_movwi = { -- Move wide immediate. | ||
| 57 | shift = 31, mask = 1, | ||
| 58 | [0] = { | ||
| 59 | shift = 22, mask = 1, | ||
| 60 | [0] = { | ||
| 61 | shift = 29, mask = 3, | ||
| 62 | [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" | ||
| 63 | }, false -- unallocated | ||
| 64 | }, | ||
| 65 | { | ||
| 66 | shift = 29, mask = 3, | ||
| 67 | [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" | ||
| 68 | }, | ||
| 69 | } | ||
| 70 | |||
| 71 | local map_bitf = { -- Bitfield. | ||
| 72 | shift = 31, mask = 1, | ||
| 73 | [0] = { | ||
| 74 | shift = 22, mask = 1, | ||
| 75 | [0] = { | ||
| 76 | shift = 29, mask = 3, | ||
| 77 | [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w", | ||
| 78 | "bfm|bfi|bfxilDN13w", | ||
| 79 | "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w" | ||
| 80 | } | ||
| 81 | }, | ||
| 82 | { | ||
| 83 | shift = 22, mask = 1, | ||
| 84 | { | ||
| 85 | shift = 29, mask = 3, | ||
| 86 | [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x", | ||
| 87 | "bfm|bfi|bfxilDN13x", | ||
| 88 | "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x" | ||
| 89 | } | ||
| 90 | } | ||
| 91 | } | ||
| 92 | |||
| 93 | local map_datai = { -- Data processing - immediate. | ||
| 94 | shift = 23, mask = 7, | ||
| 95 | [0] = map_adr, map_adr, map_addsubi, false, | ||
| 96 | map_logi, map_movwi, map_bitf, | ||
| 97 | { | ||
| 98 | shift = 15, mask = 0x1c0c1, | ||
| 99 | [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x", | ||
| 100 | [0x10081] = "extr|rorDNM4x" | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | local map_logsr = { -- Logical, shifted register. | ||
| 105 | shift = 31, mask = 1, | ||
| 106 | [0] = { | ||
| 107 | shift = 15, mask = 1, | ||
| 108 | [0] = { | ||
| 109 | shift = 29, mask = 3, | ||
| 110 | [0] = { | ||
| 111 | shift = 21, mask = 7, | ||
| 112 | [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", | ||
| 113 | "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" | ||
| 114 | }, | ||
| 115 | { | ||
| 116 | shift = 21, mask = 7, | ||
| 117 | [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", | ||
| 118 | "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" | ||
| 119 | }, | ||
| 120 | { | ||
| 121 | shift = 21, mask = 7, | ||
| 122 | [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", | ||
| 123 | "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" | ||
| 124 | }, | ||
| 125 | { | ||
| 126 | shift = 21, mask = 7, | ||
| 127 | [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", | ||
| 128 | "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" | ||
| 129 | } | ||
| 130 | }, | ||
| 131 | false -- unallocated | ||
| 132 | }, | ||
| 133 | { | ||
| 134 | shift = 29, mask = 3, | ||
| 135 | [0] = { | ||
| 136 | shift = 21, mask = 7, | ||
| 137 | [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", | ||
| 138 | "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" | ||
| 139 | }, | ||
| 140 | { | ||
| 141 | shift = 21, mask = 7, | ||
| 142 | [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", | ||
| 143 | "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" | ||
| 144 | }, | ||
| 145 | { | ||
| 146 | shift = 21, mask = 7, | ||
| 147 | [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", | ||
| 148 | "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" | ||
| 149 | }, | ||
| 150 | { | ||
| 151 | shift = 21, mask = 7, | ||
| 152 | [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", | ||
| 153 | "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" | ||
| 154 | } | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | local map_assh = { | ||
| 159 | shift = 31, mask = 1, | ||
| 160 | [0] = { | ||
| 161 | shift = 15, mask = 1, | ||
| 162 | [0] = { | ||
| 163 | shift = 29, mask = 3, | ||
| 164 | [0] = { | ||
| 165 | shift = 22, mask = 3, | ||
| 166 | [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" | ||
| 167 | }, | ||
| 168 | { | ||
| 169 | shift = 22, mask = 3, | ||
| 170 | [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", | ||
| 171 | "adds|cmnD0NMSg", "adds|cmnD0NMg" | ||
| 172 | }, | ||
| 173 | { | ||
| 174 | shift = 22, mask = 3, | ||
| 175 | [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" | ||
| 176 | }, | ||
| 177 | { | ||
| 178 | shift = 22, mask = 3, | ||
| 179 | [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", | ||
| 180 | "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" | ||
| 181 | }, | ||
| 182 | }, | ||
| 183 | false -- unallocated | ||
| 184 | }, | ||
| 185 | { | ||
| 186 | shift = 29, mask = 3, | ||
| 187 | [0] = { | ||
| 188 | shift = 22, mask = 3, | ||
| 189 | [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" | ||
| 190 | }, | ||
| 191 | { | ||
| 192 | shift = 22, mask = 3, | ||
| 193 | [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg", | ||
| 194 | "adds|cmnD0NMg" | ||
| 195 | }, | ||
| 196 | { | ||
| 197 | shift = 22, mask = 3, | ||
| 198 | [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" | ||
| 199 | }, | ||
| 200 | { | ||
| 201 | shift = 22, mask = 3, | ||
| 202 | [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", | ||
| 203 | "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | local map_addsubsh = { -- Add/subtract, shifted register. | ||
| 209 | shift = 22, mask = 3, | ||
| 210 | [0] = map_assh, map_assh, map_assh | ||
| 211 | } | ||
| 212 | |||
| 213 | local map_addsubex = { -- Add/subtract, extended register. | ||
| 214 | shift = 22, mask = 3, | ||
| 215 | [0] = { | ||
| 216 | shift = 29, mask = 3, | ||
| 217 | [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg", | ||
| 218 | } | ||
| 219 | } | ||
| 220 | |||
| 221 | local map_addsubc = { -- Add/subtract, with carry. | ||
| 222 | shift = 10, mask = 63, | ||
| 223 | [0] = { | ||
| 224 | shift = 29, mask = 3, | ||
| 225 | [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg", | ||
| 226 | } | ||
| 227 | } | ||
| 228 | |||
| 229 | local map_ccomp = { | ||
| 230 | shift = 4, mask = 1, | ||
| 231 | [0] = { | ||
| 232 | shift = 10, mask = 3, | ||
| 233 | [0] = { -- Conditional compare register. | ||
| 234 | shift = 29, mask = 3, | ||
| 235 | "ccmnNMVCg", false, "ccmpNMVCg", | ||
| 236 | }, | ||
| 237 | [2] = { -- Conditional compare immediate. | ||
| 238 | shift = 29, mask = 3, | ||
| 239 | "ccmnN5VCg", false, "ccmpN5VCg", | ||
| 240 | } | ||
| 241 | } | ||
| 242 | } | ||
| 243 | |||
| 244 | local map_csel = { -- Conditional select. | ||
| 245 | shift = 11, mask = 1, | ||
| 246 | [0] = { | ||
| 247 | shift = 10, mask = 1, | ||
| 248 | [0] = { | ||
| 249 | shift = 29, mask = 3, | ||
| 250 | [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false, | ||
| 251 | }, | ||
| 252 | { | ||
| 253 | shift = 29, mask = 3, | ||
| 254 | [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false, | ||
| 255 | } | ||
| 256 | } | ||
| 257 | } | ||
| 258 | |||
| 259 | local map_data1s = { -- Data processing, 1 source. | ||
| 260 | shift = 29, mask = 1, | ||
| 261 | [0] = { | ||
| 262 | shift = 31, mask = 1, | ||
| 263 | [0] = { | ||
| 264 | shift = 10, mask = 0x7ff, | ||
| 265 | [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg" | ||
| 266 | }, | ||
| 267 | { | ||
| 268 | shift = 10, mask = 0x7ff, | ||
| 269 | [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg" | ||
| 270 | } | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | local map_data2s = { -- Data processing, 2 sources. | ||
| 275 | shift = 29, mask = 1, | ||
| 276 | [0] = { | ||
| 277 | shift = 10, mask = 63, | ||
| 278 | false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg", | ||
| 279 | "lsrDNMg", "asrDNMg", "rorDNMg" | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | local map_data3s = { -- Data processing, 3 sources. | ||
| 284 | shift = 29, mask = 7, | ||
| 285 | [0] = { | ||
| 286 | shift = 21, mask = 7, | ||
| 287 | [0] = { | ||
| 288 | shift = 15, mask = 1, | ||
| 289 | [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g" | ||
| 290 | } | ||
| 291 | }, false, false, false, | ||
| 292 | { | ||
| 293 | shift = 15, mask = 1, | ||
| 294 | [0] = { | ||
| 295 | shift = 21, mask = 7, | ||
| 296 | [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false, | ||
| 297 | false, "umaddl|umullDxNMwA0x", "umulhDNMx" | ||
| 298 | }, | ||
| 299 | { | ||
| 300 | shift = 21, mask = 7, | ||
| 301 | [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false, | ||
| 302 | false, "umsubl|umneglDxNMwA0x" | ||
| 303 | } | ||
| 304 | } | ||
| 305 | } | ||
| 306 | |||
| 307 | local map_datar = { -- Data processing, register. | ||
| 308 | shift = 28, mask = 1, | ||
| 309 | [0] = { | ||
| 310 | shift = 24, mask = 1, | ||
| 311 | [0] = map_logsr, | ||
| 312 | { | ||
| 313 | shift = 21, mask = 1, | ||
| 314 | [0] = map_addsubsh, map_addsubex | ||
| 315 | } | ||
| 316 | }, | ||
| 317 | { | ||
| 318 | shift = 21, mask = 15, | ||
| 319 | [0] = map_addsubc, false, map_ccomp, false, map_csel, false, | ||
| 320 | { | ||
| 321 | shift = 30, mask = 1, | ||
| 322 | [0] = map_data2s, map_data1s | ||
| 323 | }, | ||
| 324 | false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s, | ||
| 325 | map_data3s, map_data3s, map_data3s | ||
| 326 | } | ||
| 327 | } | ||
| 328 | |||
| 329 | local map_lrl = { -- Load register, literal. | ||
| 330 | shift = 26, mask = 1, | ||
| 331 | [0] = { | ||
| 332 | shift = 30, mask = 3, | ||
| 333 | [0] = "ldrDwB", "ldrDxB", "ldrswDxB" | ||
| 334 | }, | ||
| 335 | { | ||
| 336 | shift = 30, mask = 3, | ||
| 337 | [0] = "ldrDsB", "ldrDdB" | ||
| 338 | } | ||
| 339 | } | ||
| 340 | |||
| 341 | local map_lsriind = { -- Load/store register, immediate pre/post-indexed. | ||
| 342 | shift = 30, mask = 3, | ||
| 343 | [0] = { | ||
| 344 | shift = 26, mask = 1, | ||
| 345 | [0] = { | ||
| 346 | shift = 22, mask = 3, | ||
| 347 | [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL" | ||
| 348 | } | ||
| 349 | }, | ||
| 350 | { | ||
| 351 | shift = 26, mask = 1, | ||
| 352 | [0] = { | ||
| 353 | shift = 22, mask = 3, | ||
| 354 | [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL" | ||
| 355 | } | ||
| 356 | }, | ||
| 357 | { | ||
| 358 | shift = 26, mask = 1, | ||
| 359 | [0] = { | ||
| 360 | shift = 22, mask = 3, | ||
| 361 | [0] = "strDwzL", "ldrDwzL", "ldrswDxzL" | ||
| 362 | }, | ||
| 363 | { | ||
| 364 | shift = 22, mask = 3, | ||
| 365 | [0] = "strDszL", "ldrDszL" | ||
| 366 | } | ||
| 367 | }, | ||
| 368 | { | ||
| 369 | shift = 26, mask = 1, | ||
| 370 | [0] = { | ||
| 371 | shift = 22, mask = 3, | ||
| 372 | [0] = "strDxzL", "ldrDxzL" | ||
| 373 | }, | ||
| 374 | { | ||
| 375 | shift = 22, mask = 3, | ||
| 376 | [0] = "strDdzL", "ldrDdzL" | ||
| 377 | } | ||
| 378 | } | ||
| 379 | } | ||
| 380 | |||
| 381 | local map_lsriro = { | ||
| 382 | shift = 21, mask = 1, | ||
| 383 | [0] = { -- Load/store register immediate. | ||
| 384 | shift = 10, mask = 3, | ||
| 385 | [0] = { -- Unscaled immediate. | ||
| 386 | shift = 26, mask = 1, | ||
| 387 | [0] = { | ||
| 388 | shift = 30, mask = 3, | ||
| 389 | [0] = { | ||
| 390 | shift = 22, mask = 3, | ||
| 391 | [0] = "sturbDwK", "ldurbDwK" | ||
| 392 | }, | ||
| 393 | { | ||
| 394 | shift = 22, mask = 3, | ||
| 395 | [0] = "sturhDwK", "ldurhDwK" | ||
| 396 | }, | ||
| 397 | { | ||
| 398 | shift = 22, mask = 3, | ||
| 399 | [0] = "sturDwK", "ldurDwK" | ||
| 400 | }, | ||
| 401 | { | ||
| 402 | shift = 22, mask = 3, | ||
| 403 | [0] = "sturDxK", "ldurDxK" | ||
| 404 | } | ||
| 405 | } | ||
| 406 | }, map_lsriind, false, map_lsriind | ||
| 407 | }, | ||
| 408 | { -- Load/store register, register offset. | ||
| 409 | shift = 10, mask = 3, | ||
| 410 | [2] = { | ||
| 411 | shift = 26, mask = 1, | ||
| 412 | [0] = { | ||
| 413 | shift = 30, mask = 3, | ||
| 414 | [1] = { | ||
| 415 | shift = 22, mask = 3, | ||
| 416 | [0] = "strhDwO", "ldrhDwO", "ldrshDwO", "ldrshDxO" | ||
| 417 | }, | ||
| 418 | [2] = { | ||
| 419 | shift = 22, mask = 3, | ||
| 420 | [0] = "strDwO", "ldrDwO", "ldrswDxO" | ||
| 421 | }, | ||
| 422 | [3] = { | ||
| 423 | shift = 22, mask = 3, | ||
| 424 | [0] = "strDxO", "ldrDxO" | ||
| 425 | } | ||
| 426 | }, | ||
| 427 | { | ||
| 428 | shift = 30, mask = 3, | ||
| 429 | [2] = { | ||
| 430 | shift = 22, mask = 3, | ||
| 431 | [0] = "strDsO", "ldrDsO" | ||
| 432 | }, | ||
| 433 | [3] = { | ||
| 434 | shift = 22, mask = 3, | ||
| 435 | [0] = "strDdO", "ldrDdO" | ||
| 436 | } | ||
| 437 | } | ||
| 438 | } | ||
| 439 | } | ||
| 440 | } | ||
| 441 | |||
| 442 | local map_lsp = { -- Load/store register pair, offset. | ||
| 443 | shift = 22, mask = 1, | ||
| 444 | [0] = { | ||
| 445 | shift = 30, mask = 3, | ||
| 446 | [0] = { | ||
| 447 | shift = 26, mask = 1, | ||
| 448 | [0] = "stpDzAzwP", "stpDzAzsP", | ||
| 449 | }, | ||
| 450 | { | ||
| 451 | shift = 26, mask = 1, | ||
| 452 | "stpDzAzdP" | ||
| 453 | }, | ||
| 454 | { | ||
| 455 | shift = 26, mask = 1, | ||
| 456 | [0] = "stpDzAzxP" | ||
| 457 | } | ||
| 458 | }, | ||
| 459 | { | ||
| 460 | shift = 30, mask = 3, | ||
| 461 | [0] = { | ||
| 462 | shift = 26, mask = 1, | ||
| 463 | [0] = "ldpDzAzwP", "ldpDzAzsP", | ||
| 464 | }, | ||
| 465 | { | ||
| 466 | shift = 26, mask = 1, | ||
| 467 | [0] = "ldpswDAxP", "ldpDzAzdP" | ||
| 468 | }, | ||
| 469 | { | ||
| 470 | shift = 26, mask = 1, | ||
| 471 | [0] = "ldpDzAzxP" | ||
| 472 | } | ||
| 473 | } | ||
| 474 | } | ||
| 475 | |||
| 476 | local map_ls = { -- Loads and stores. | ||
| 477 | shift = 24, mask = 0x31, | ||
| 478 | [0x10] = map_lrl, [0x30] = map_lsriro, | ||
| 479 | [0x20] = { | ||
| 480 | shift = 23, mask = 3, | ||
| 481 | map_lsp, map_lsp, map_lsp | ||
| 482 | }, | ||
| 483 | [0x21] = { | ||
| 484 | shift = 23, mask = 3, | ||
| 485 | map_lsp, map_lsp, map_lsp | ||
| 486 | }, | ||
| 487 | [0x31] = { | ||
| 488 | shift = 26, mask = 1, | ||
| 489 | [0] = { | ||
| 490 | shift = 30, mask = 3, | ||
| 491 | [0] = { | ||
| 492 | shift = 22, mask = 3, | ||
| 493 | [0] = "strbDwzU", "ldrbDwzU" | ||
| 494 | }, | ||
| 495 | { | ||
| 496 | shift = 22, mask = 3, | ||
| 497 | [0] = "strhDwzU", "ldrhDwzU" | ||
| 498 | }, | ||
| 499 | { | ||
| 500 | shift = 22, mask = 3, | ||
| 501 | [0] = "strDwzU", "ldrDwzU" | ||
| 502 | }, | ||
| 503 | { | ||
| 504 | shift = 22, mask = 3, | ||
| 505 | [0] = "strDxzU", "ldrDxzU" | ||
| 506 | } | ||
| 507 | }, | ||
| 508 | { | ||
| 509 | shift = 30, mask = 3, | ||
| 510 | [2] = { | ||
| 511 | shift = 22, mask = 3, | ||
| 512 | [0] = "strDszU", "ldrDszU" | ||
| 513 | }, | ||
| 514 | [3] = { | ||
| 515 | shift = 22, mask = 3, | ||
| 516 | [0] = "strDdzU", "ldrDdzU" | ||
| 517 | } | ||
| 518 | } | ||
| 519 | }, | ||
| 520 | } | ||
| 521 | |||
| 522 | local map_datafp = { -- Data processing, SIMD and FP. | ||
| 523 | shift = 28, mask = 7, | ||
| 524 | { -- 001 | ||
| 525 | shift = 24, mask = 1, | ||
| 526 | [0] = { | ||
| 527 | shift = 21, mask = 1, | ||
| 528 | { | ||
| 529 | shift = 10, mask = 3, | ||
| 530 | [0] = { | ||
| 531 | shift = 12, mask = 1, | ||
| 532 | [0] = { | ||
| 533 | shift = 13, mask = 1, | ||
| 534 | [0] = { | ||
| 535 | shift = 14, mask = 1, | ||
| 536 | [0] = { | ||
| 537 | shift = 15, mask = 1, | ||
| 538 | [0] = { -- FP/int conversion. | ||
| 539 | shift = 31, mask = 1, | ||
| 540 | [0] = { | ||
| 541 | shift = 16, mask = 0xff, | ||
| 542 | [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs", | ||
| 543 | [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw", | ||
| 544 | [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs", | ||
| 545 | [0x26] = "fmovDwNs", [0x27] = "fmovDsNw", | ||
| 546 | [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs", | ||
| 547 | [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs", | ||
| 548 | [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs", | ||
| 549 | [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd", | ||
| 550 | [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw", | ||
| 551 | [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd", | ||
| 552 | [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd", | ||
| 553 | [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd", | ||
| 554 | [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd" | ||
| 555 | }, | ||
| 556 | { | ||
| 557 | shift = 16, mask = 0xff, | ||
| 558 | [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs", | ||
| 559 | [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx", | ||
| 560 | [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs", | ||
| 561 | [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs", | ||
| 562 | [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs", | ||
| 563 | [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs", | ||
| 564 | [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd", | ||
| 565 | [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx", | ||
| 566 | [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd", | ||
| 567 | [0x66] = "fmovDxNd", [0x67] = "fmovDdNx", | ||
| 568 | [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd", | ||
| 569 | [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd", | ||
| 570 | [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd" | ||
| 571 | } | ||
| 572 | } | ||
| 573 | }, | ||
| 574 | { -- FP data-processing, 1 source. | ||
| 575 | shift = 31, mask = 1, | ||
| 576 | [0] = { | ||
| 577 | shift = 22, mask = 3, | ||
| 578 | [0] = { | ||
| 579 | shift = 15, mask = 63, | ||
| 580 | [0] = "fmovDNf", "fabsDNf", "fnegDNf", | ||
| 581 | "fsqrtDNf", false, "fcvtDdNs", false, false, | ||
| 582 | "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", | ||
| 583 | "frintaDNf", false, "frintxDNf", "frintiDNf", | ||
| 584 | }, | ||
| 585 | { | ||
| 586 | shift = 15, mask = 63, | ||
| 587 | [0] = "fmovDNf", "fabsDNf", "fnegDNf", | ||
| 588 | "fsqrtDNf", "fcvtDsNd", false, false, false, | ||
| 589 | "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", | ||
| 590 | "frintaDNf", false, "frintxDNf", "frintiDNf", | ||
| 591 | } | ||
| 592 | } | ||
| 593 | } | ||
| 594 | }, | ||
| 595 | { -- FP compare. | ||
| 596 | shift = 31, mask = 1, | ||
| 597 | [0] = { | ||
| 598 | shift = 14, mask = 3, | ||
| 599 | [0] = { | ||
| 600 | shift = 23, mask = 1, | ||
| 601 | [0] = { | ||
| 602 | shift = 0, mask = 31, | ||
| 603 | [0] = "fcmpNMf", [8] = "fcmpNZf", | ||
| 604 | [16] = "fcmpeNMf", [24] = "fcmpeNZf", | ||
| 605 | } | ||
| 606 | } | ||
| 607 | } | ||
| 608 | } | ||
| 609 | }, | ||
| 610 | { -- FP immediate. | ||
| 611 | shift = 31, mask = 1, | ||
| 612 | [0] = { | ||
| 613 | shift = 5, mask = 31, | ||
| 614 | [0] = { | ||
| 615 | shift = 23, mask = 1, | ||
| 616 | [0] = "fmovDFf" | ||
| 617 | } | ||
| 618 | } | ||
| 619 | } | ||
| 620 | }, | ||
| 621 | { -- FP conditional compare. | ||
| 622 | shift = 31, mask = 1, | ||
| 623 | [0] = { | ||
| 624 | shift = 23, mask = 1, | ||
| 625 | [0] = { | ||
| 626 | shift = 4, mask = 1, | ||
| 627 | [0] = "fccmpNMVCf", "fccmpeNMVCf" | ||
| 628 | } | ||
| 629 | } | ||
| 630 | }, | ||
| 631 | { -- FP data-processing, 2 sources. | ||
| 632 | shift = 31, mask = 1, | ||
| 633 | [0] = { | ||
| 634 | shift = 23, mask = 1, | ||
| 635 | [0] = { | ||
| 636 | shift = 12, mask = 15, | ||
| 637 | [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf", | ||
| 638 | "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf", | ||
| 639 | "fnmulDNMf" | ||
| 640 | } | ||
| 641 | } | ||
| 642 | }, | ||
| 643 | { -- FP conditional select. | ||
| 644 | shift = 31, mask = 1, | ||
| 645 | [0] = { | ||
| 646 | shift = 23, mask = 1, | ||
| 647 | [0] = "fcselDNMCf" | ||
| 648 | } | ||
| 649 | } | ||
| 650 | } | ||
| 651 | }, | ||
| 652 | { -- FP data-processing, 3 sources. | ||
| 653 | shift = 31, mask = 1, | ||
| 654 | [0] = { | ||
| 655 | shift = 15, mask = 1, | ||
| 656 | [0] = { | ||
| 657 | shift = 21, mask = 5, | ||
| 658 | [0] = "fmaddDNMAf", "fnmaddDNMAf" | ||
| 659 | }, | ||
| 660 | { | ||
| 661 | shift = 21, mask = 5, | ||
| 662 | [0] = "fmsubDNMAf", "fnmsubDNMAf" | ||
| 663 | } | ||
| 664 | } | ||
| 665 | } | ||
| 666 | } | ||
| 667 | } | ||
| 668 | |||
| 669 | local map_br = { -- Branches, exception generating and system instructions. | ||
| 670 | shift = 29, mask = 7, | ||
| 671 | [0] = "bB", | ||
| 672 | { -- Compare & branch, immediate. | ||
| 673 | shift = 24, mask = 3, | ||
| 674 | [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw" | ||
| 675 | }, | ||
| 676 | { -- Conditional branch, immediate. | ||
| 677 | shift = 24, mask = 3, | ||
| 678 | [0] = { | ||
| 679 | shift = 4, mask = 1, | ||
| 680 | [0] = { | ||
| 681 | shift = 0, mask = 15, | ||
| 682 | [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB", | ||
| 683 | "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB" | ||
| 684 | } | ||
| 685 | } | ||
| 686 | }, false, "blB", | ||
| 687 | { -- Compare & branch, immediate. | ||
| 688 | shift = 24, mask = 3, | ||
| 689 | [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx" | ||
| 690 | }, | ||
| 691 | { | ||
| 692 | shift = 24, mask = 3, | ||
| 693 | [0] = { -- Exception generation. | ||
| 694 | shift = 0, mask = 0xe0001f, | ||
| 695 | [0x200000] = "brkW" | ||
| 696 | }, | ||
| 697 | { -- System instructions. | ||
| 698 | shift = 0, mask = 0x3fffff, | ||
| 699 | [0x03201f] = "nop" | ||
| 700 | }, | ||
| 701 | { -- Unconditional branch, register. | ||
| 702 | shift = 0, mask = 0xfffc1f, | ||
| 703 | [0x1f0000] = "brNx", [0x3f0000] = "blrNx", | ||
| 704 | [0x5f0000] = "retNx" | ||
| 705 | }, | ||
| 706 | } | ||
| 707 | } | ||
| 708 | |||
| 709 | local map_init = { | ||
| 710 | shift = 25, mask = 15, | ||
| 711 | [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp, | ||
| 712 | map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp | ||
| 713 | } | ||
| 714 | |||
| 715 | ------------------------------------------------------------------------------ | ||
| 716 | |||
| 717 | local map_regs = { x = {}, w = {}, d = {}, s = {} } | ||
| 718 | |||
| 719 | for i=0,30 do | ||
| 720 | map_regs.x[i] = "x"..i | ||
| 721 | map_regs.w[i] = "w"..i | ||
| 722 | map_regs.d[i] = "d"..i | ||
| 723 | map_regs.s[i] = "s"..i | ||
| 724 | end | ||
| 725 | map_regs.x[31] = "sp" | ||
| 726 | map_regs.w[31] = "wsp" | ||
| 727 | map_regs.d[31] = "d31" | ||
| 728 | map_regs.s[31] = "s31" | ||
| 729 | |||
| 730 | local map_cond = { | ||
| 731 | [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", | ||
| 732 | "hi", "ls", "ge", "lt", "gt", "le", "al", | ||
| 733 | } | ||
| 734 | |||
| 735 | local map_shift = { [0] = "lsl", "lsr", "asr", } | ||
| 736 | |||
| 737 | local map_extend = { | ||
| 738 | [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", | ||
| 739 | } | ||
| 740 | |||
| 741 | ------------------------------------------------------------------------------ | ||
| 742 | |||
| 743 | -- Output a nicely formatted line with an opcode and operands. | ||
| 744 | local function putop(ctx, text, operands) | ||
| 745 | local pos = ctx.pos | ||
| 746 | local extra = "" | ||
| 747 | if ctx.rel then | ||
| 748 | local sym = ctx.symtab[ctx.rel] | ||
| 749 | if sym then | ||
| 750 | extra = "\t->"..sym | ||
| 751 | end | ||
| 752 | end | ||
| 753 | if ctx.hexdump > 0 then | ||
| 754 | ctx.out(format("%08x %s %-5s %s%s\n", | ||
| 755 | ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) | ||
| 756 | else | ||
| 757 | ctx.out(format("%08x %-5s %s%s\n", | ||
| 758 | ctx.addr+pos, text, concat(operands, ", "), extra)) | ||
| 759 | end | ||
| 760 | ctx.pos = pos + 4 | ||
| 761 | end | ||
| 762 | |||
| 763 | -- Fallback for unknown opcodes. | ||
| 764 | local function unknown(ctx) | ||
| 765 | return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) | ||
| 766 | end | ||
| 767 | |||
| 768 | local function match_reg(p, pat, regnum) | ||
| 769 | return map_regs[match(pat, p.."%w-([xwds])")][regnum] | ||
| 770 | end | ||
| 771 | |||
| 772 | local function fmt_hex32(x) | ||
| 773 | if x < 0 then | ||
| 774 | return tohex(x) | ||
| 775 | else | ||
| 776 | return format("%x", x) | ||
| 777 | end | ||
| 778 | end | ||
| 779 | |||
| 780 | local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 } | ||
| 781 | |||
| 782 | local function decode_imm13(op) | ||
| 783 | local imms = band(rshift(op, 10), 63) | ||
| 784 | local immr = band(rshift(op, 16), 63) | ||
| 785 | if band(op, 0x00400000) == 0 then | ||
| 786 | local len = 5 | ||
| 787 | if imms >= 56 then | ||
| 788 | if imms >= 60 then len = 1 else len = 2 end | ||
| 789 | elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end | ||
| 790 | local l = lshift(1, len)-1 | ||
| 791 | local s = band(imms, l) | ||
| 792 | local r = band(immr, l) | ||
| 793 | local imm = ror(rshift(-1, 31-s), r) | ||
| 794 | if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end | ||
| 795 | imm = imm * imm13_rep[len] | ||
| 796 | local ix = fmt_hex32(imm) | ||
| 797 | if rshift(op, 31) ~= 0 then | ||
| 798 | return ix..tohex(imm) | ||
| 799 | else | ||
| 800 | return ix | ||
| 801 | end | ||
| 802 | else | ||
| 803 | local lo, hi = -1, 0 | ||
| 804 | if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end | ||
| 805 | if immr ~= 0 then | ||
| 806 | lo, hi = ror(lo, immr), ror(hi, immr) | ||
| 807 | local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr)) | ||
| 808 | lo, hi = bxor(lo, x), bxor(hi, x) | ||
| 809 | if immr >= 32 then lo, hi = hi, lo end | ||
| 810 | end | ||
| 811 | if hi ~= 0 then | ||
| 812 | return fmt_hex32(hi)..tohex(lo) | ||
| 813 | else | ||
| 814 | return fmt_hex32(lo) | ||
| 815 | end | ||
| 816 | end | ||
| 817 | end | ||
| 818 | |||
| 819 | local function parse_immpc(op, name) | ||
| 820 | if name == "b" or name == "bl" then | ||
| 821 | return arshift(lshift(op, 6), 4) | ||
| 822 | elseif name == "adr" or name == "adrp" then | ||
| 823 | local immlo = band(rshift(op, 29), 3) | ||
| 824 | local immhi = lshift(arshift(lshift(op, 8), 13), 2) | ||
| 825 | return bor(immhi, immlo) | ||
| 826 | elseif name == "tbz" or name == "tbnz" then | ||
| 827 | return lshift(arshift(lshift(op, 13), 18), 2) | ||
| 828 | else | ||
| 829 | return lshift(arshift(lshift(op, 8), 13), 2) | ||
| 830 | end | ||
| 831 | end | ||
| 832 | |||
| 833 | local function parse_fpimm8(op) | ||
| 834 | local sign = band(op, 0x100000) == 0 and 1 or -1 | ||
| 835 | local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131 | ||
| 836 | local frac = 16+band(rshift(op, 13), 15) | ||
| 837 | return sign * frac * 2^exp | ||
| 838 | end | ||
| 839 | |||
| 840 | local function prefer_bfx(sf, uns, imms, immr) | ||
| 841 | if imms < immr or imms == 31 or imms == 63 then | ||
| 842 | return false | ||
| 843 | end | ||
| 844 | if immr == 0 then | ||
| 845 | if sf == 0 and (imms == 7 or imms == 15) then | ||
| 846 | return false | ||
| 847 | end | ||
| 848 | if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then | ||
| 849 | return false | ||
| 850 | end | ||
| 851 | end | ||
| 852 | return true | ||
| 853 | end | ||
| 854 | |||
| 855 | -- Disassemble a single instruction. | ||
| 856 | local function disass_ins(ctx) | ||
| 857 | local pos = ctx.pos | ||
| 858 | local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) | ||
| 859 | local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) | ||
| 860 | local operands = {} | ||
| 861 | local suffix = "" | ||
| 862 | local last, name, pat | ||
| 863 | local vr | ||
| 864 | local map_reg | ||
| 865 | ctx.op = op | ||
| 866 | ctx.rel = nil | ||
| 867 | last = nil | ||
| 868 | local opat | ||
| 869 | opat = map_init[band(rshift(op, 25), 15)] | ||
| 870 | while type(opat) ~= "string" do | ||
| 871 | if not opat then return unknown(ctx) end | ||
| 872 | opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ | ||
| 873 | end | ||
| 874 | name, pat = match(opat, "^([a-z0-9]*)(.*)") | ||
| 875 | local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") | ||
| 876 | if altname then pat = pat2 end | ||
| 877 | if sub(pat, 1, 1) == "." then | ||
| 878 | local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)") | ||
| 879 | suffix = suffix..s2 | ||
| 880 | pat = p2 | ||
| 881 | end | ||
| 882 | |||
| 883 | local rt = match(pat, "[gf]") | ||
| 884 | if rt then | ||
| 885 | if rt == "g" then | ||
| 886 | map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w | ||
| 887 | else | ||
| 888 | map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s | ||
| 889 | end | ||
| 890 | end | ||
| 891 | |||
| 892 | local second0, immr | ||
| 893 | |||
| 894 | for p in gmatch(pat, ".") do | ||
| 895 | local x = nil | ||
| 896 | if p == "D" then | ||
| 897 | local regnum = band(op, 31) | ||
| 898 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
| 899 | elseif p == "N" then | ||
| 900 | local regnum = band(rshift(op, 5), 31) | ||
| 901 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
| 902 | elseif p == "M" then | ||
| 903 | local regnum = band(rshift(op, 16), 31) | ||
| 904 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
| 905 | elseif p == "A" then | ||
| 906 | local regnum = band(rshift(op, 10), 31) | ||
| 907 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
| 908 | elseif p == "B" then | ||
| 909 | local addr = ctx.addr + pos + parse_immpc(op, name) | ||
| 910 | ctx.rel = addr | ||
| 911 | x = "0x"..tohex(addr) | ||
| 912 | elseif p == "T" then | ||
| 913 | x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31)) | ||
| 914 | elseif p == "V" then | ||
| 915 | x = band(op, 15) | ||
| 916 | elseif p == "C" then | ||
| 917 | x = map_cond[band(rshift(op, 12), 15)] | ||
| 918 | elseif p == "c" then | ||
| 919 | local rn = band(rshift(op, 5), 31) | ||
| 920 | local rm = band(rshift(op, 16), 31) | ||
| 921 | local cond = band(rshift(op, 12), 15) | ||
| 922 | local invc = bxor(cond, 1) | ||
| 923 | x = map_cond[cond] | ||
| 924 | if altname and cond ~= 14 and cond ~= 15 then | ||
| 925 | local a1, a2 = match(altname, "([^|]*)|(.*)") | ||
| 926 | if rn == rm then | ||
| 927 | local n = #operands | ||
| 928 | operands[n] = nil | ||
| 929 | x = map_cond[invc] | ||
| 930 | if rn ~= 31 then | ||
| 931 | if a1 then name = a1 else name = altname end | ||
| 932 | else | ||
| 933 | operands[n-1] = nil | ||
| 934 | name = a2 | ||
| 935 | end | ||
| 936 | end | ||
| 937 | end | ||
| 938 | elseif p == "W" then | ||
| 939 | x = band(rshift(op, 5), 0xffff) | ||
| 940 | elseif p == "Y" then | ||
| 941 | x = band(rshift(op, 5), 0xffff) | ||
| 942 | local hw = band(rshift(op, 21), 3) | ||
| 943 | if altname and (hw == 0 or x ~= 0) then | ||
| 944 | name = altname | ||
| 945 | end | ||
| 946 | elseif p == "L" then | ||
| 947 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
| 948 | local imm9 = arshift(lshift(op, 11), 23) | ||
| 949 | if band(op, 0x800) ~= 0 then | ||
| 950 | x = "["..rn..", #"..imm9.."]!" | ||
| 951 | else | ||
| 952 | x = "["..rn.."], #"..imm9 | ||
| 953 | end | ||
| 954 | elseif p == "U" then | ||
| 955 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
| 956 | local sz = band(rshift(op, 30), 3) | ||
| 957 | local imm12 = lshift(arshift(lshift(op, 10), 20), sz) | ||
| 958 | if imm12 ~= 0 then | ||
| 959 | x = "["..rn..", #"..imm12.."]" | ||
| 960 | else | ||
| 961 | x = "["..rn.."]" | ||
| 962 | end | ||
| 963 | elseif p == "K" then | ||
| 964 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
| 965 | local imm9 = arshift(lshift(op, 11), 23) | ||
| 966 | if imm9 ~= 0 then | ||
| 967 | x = "["..rn..", #"..imm9.."]" | ||
| 968 | else | ||
| 969 | x = "["..rn.."]" | ||
| 970 | end | ||
| 971 | elseif p == "O" then | ||
| 972 | local rn, rm = map_regs.x[band(rshift(op, 5), 31)] | ||
| 973 | local m = band(rshift(op, 13), 1) | ||
| 974 | if m == 0 then | ||
| 975 | rm = map_regs.w[band(rshift(op, 16), 31)] | ||
| 976 | else | ||
| 977 | rm = map_regs.x[band(rshift(op, 16), 31)] | ||
| 978 | end | ||
| 979 | x = "["..rn..", "..rm | ||
| 980 | local opt = band(rshift(op, 13), 7) | ||
| 981 | local s = band(rshift(op, 12), 1) | ||
| 982 | local sz = band(rshift(op, 30), 3) | ||
| 983 | -- extension to be applied | ||
| 984 | if opt == 3 then | ||
| 985 | if s == 0 then x = nil | ||
| 986 | else x = x..", lsl #"..sz.."]" end | ||
| 987 | elseif opt == 2 or opt == 6 or opt == 7 then | ||
| 988 | if s == 0 then x = x..", "..map_extend[opt].."]" | ||
| 989 | else x = x..", "..map_extend[opt].." #"..sz.."]" end | ||
| 990 | else | ||
| 991 | x = x.."]" | ||
| 992 | end | ||
| 993 | elseif p == "P" then | ||
| 994 | local opcv, sh = rshift(op, 26), 2 | ||
| 995 | if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end | ||
| 996 | local imm7 = lshift(arshift(lshift(op, 10), 25), sh) | ||
| 997 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
| 998 | local ind = band(rshift(op, 23), 3) | ||
| 999 | if ind == 1 then | ||
| 1000 | x = "["..rn.."], #"..imm7 | ||
| 1001 | elseif ind == 2 then | ||
| 1002 | if imm7 == 0 then | ||
| 1003 | x = "["..rn.."]" | ||
| 1004 | else | ||
| 1005 | x = "["..rn..", #"..imm7.."]" | ||
| 1006 | end | ||
| 1007 | elseif ind == 3 then | ||
| 1008 | x = "["..rn..", #"..imm7.."]!" | ||
| 1009 | end | ||
| 1010 | elseif p == "I" then | ||
| 1011 | local shf = band(rshift(op, 22), 3) | ||
| 1012 | local imm12 = band(rshift(op, 10), 0x0fff) | ||
| 1013 | local n = #operands | ||
| 1014 | local rn, rd = band(rshift(op, 5), 31), band(op, 31) | ||
| 1015 | if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then | ||
| 1016 | name = altname | ||
| 1017 | x = nil | ||
| 1018 | elseif shf == 0 then | ||
| 1019 | x = imm12 | ||
| 1020 | elseif shf == 1 then | ||
| 1021 | x = imm12..", lsl #12" | ||
| 1022 | end | ||
| 1023 | elseif p == "i" then | ||
| 1024 | x = "#0x"..decode_imm13(op) | ||
| 1025 | elseif p == "1" then | ||
| 1026 | immr = band(rshift(op, 16), 63) | ||
| 1027 | x = immr | ||
| 1028 | elseif p == "2" then | ||
| 1029 | x = band(rshift(op, 10), 63) | ||
| 1030 | if altname then | ||
| 1031 | local a1, a2, a3, a4, a5, a6 = | ||
| 1032 | match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)") | ||
| 1033 | local sf = band(rshift(op, 26), 32) | ||
| 1034 | local uns = band(rshift(op, 30), 1) | ||
| 1035 | if prefer_bfx(sf, uns, x, immr) then | ||
| 1036 | name = a2 | ||
| 1037 | x = x - immr + 1 | ||
| 1038 | elseif immr == 0 and x == 7 then | ||
| 1039 | local n = #operands | ||
| 1040 | operands[n] = nil | ||
| 1041 | if sf ~= 0 then | ||
| 1042 | operands[n-1] = gsub(operands[n-1], "x", "w") | ||
| 1043 | end | ||
| 1044 | last = operands[n-1] | ||
| 1045 | name = a6 | ||
| 1046 | x = nil | ||
| 1047 | elseif immr == 0 and x == 15 then | ||
| 1048 | local n = #operands | ||
| 1049 | operands[n] = nil | ||
| 1050 | if sf ~= 0 then | ||
| 1051 | operands[n-1] = gsub(operands[n-1], "x", "w") | ||
| 1052 | end | ||
| 1053 | last = operands[n-1] | ||
| 1054 | name = a5 | ||
| 1055 | x = nil | ||
| 1056 | elseif x == 31 or x == 63 then | ||
| 1057 | if x == 31 and immr == 0 and name == "sbfm" then | ||
| 1058 | name = a4 | ||
| 1059 | local n = #operands | ||
| 1060 | operands[n] = nil | ||
| 1061 | if sf ~= 0 then | ||
| 1062 | operands[n-1] = gsub(operands[n-1], "x", "w") | ||
| 1063 | end | ||
| 1064 | last = operands[n-1] | ||
| 1065 | else | ||
| 1066 | name = a3 | ||
| 1067 | end | ||
| 1068 | x = nil | ||
| 1069 | elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then | ||
| 1070 | name = a4 | ||
| 1071 | last = "#"..(sf+32 - immr) | ||
| 1072 | operands[#operands] = last | ||
| 1073 | x = nil | ||
| 1074 | elseif x < immr then | ||
| 1075 | name = a1 | ||
| 1076 | last = "#"..(sf+32 - immr) | ||
| 1077 | operands[#operands] = last | ||
| 1078 | x = x + 1 | ||
| 1079 | end | ||
| 1080 | end | ||
| 1081 | elseif p == "3" then | ||
| 1082 | x = band(rshift(op, 10), 63) | ||
| 1083 | if altname then | ||
| 1084 | local a1, a2 = match(altname, "([^|]*)|(.*)") | ||
| 1085 | if x < immr then | ||
| 1086 | name = a1 | ||
| 1087 | local sf = band(rshift(op, 26), 32) | ||
| 1088 | last = "#"..(sf+32 - immr) | ||
| 1089 | operands[#operands] = last | ||
| 1090 | x = x + 1 | ||
| 1091 | elseif x >= immr then | ||
| 1092 | name = a2 | ||
| 1093 | x = x - immr + 1 | ||
| 1094 | end | ||
| 1095 | end | ||
| 1096 | elseif p == "4" then | ||
| 1097 | x = band(rshift(op, 10), 63) | ||
| 1098 | local rn = band(rshift(op, 5), 31) | ||
| 1099 | local rm = band(rshift(op, 16), 31) | ||
| 1100 | if altname and rn == rm then | ||
| 1101 | local n = #operands | ||
| 1102 | operands[n] = nil | ||
| 1103 | last = operands[n-1] | ||
| 1104 | name = altname | ||
| 1105 | end | ||
| 1106 | elseif p == "5" then | ||
| 1107 | x = band(rshift(op, 16), 31) | ||
| 1108 | elseif p == "S" then | ||
| 1109 | x = band(rshift(op, 10), 63) | ||
| 1110 | if x == 0 then x = nil | ||
| 1111 | else x = map_shift[band(rshift(op, 22), 3)].." #"..x end | ||
| 1112 | elseif p == "X" then | ||
| 1113 | local opt = band(rshift(op, 13), 7) | ||
| 1114 | -- Width specifier <R>. | ||
| 1115 | if opt ~= 3 and opt ~= 7 then | ||
| 1116 | last = map_regs.w[band(rshift(op, 16), 31)] | ||
| 1117 | operands[#operands] = last | ||
| 1118 | end | ||
| 1119 | x = band(rshift(op, 10), 7) | ||
| 1120 | -- Extension. | ||
| 1121 | if opt == 2 + band(rshift(op, 31), 1) and | ||
| 1122 | band(rshift(op, second0 and 5 or 0), 31) == 31 then | ||
| 1123 | if x == 0 then x = nil | ||
| 1124 | else x = "lsl #"..x end | ||
| 1125 | else | ||
| 1126 | if x == 0 then x = map_extend[band(rshift(op, 13), 7)] | ||
| 1127 | else x = map_extend[band(rshift(op, 13), 7)].." #"..x end | ||
| 1128 | end | ||
| 1129 | elseif p == "R" then | ||
| 1130 | x = band(rshift(op,21), 3) | ||
| 1131 | if x == 0 then x = nil | ||
| 1132 | else x = "lsl #"..x*16 end | ||
| 1133 | elseif p == "z" then | ||
| 1134 | local n = #operands | ||
| 1135 | if operands[n] == "sp" then operands[n] = "xzr" | ||
| 1136 | elseif operands[n] == "wsp" then operands[n] = "wzr" | ||
| 1137 | end | ||
| 1138 | elseif p == "Z" then | ||
| 1139 | x = 0 | ||
| 1140 | elseif p == "F" then | ||
| 1141 | x = parse_fpimm8(op) | ||
| 1142 | elseif p == "g" or p == "f" or p == "x" or p == "w" or | ||
| 1143 | p == "d" or p == "s" then | ||
| 1144 | -- These are handled in D/N/M/A. | ||
| 1145 | elseif p == "0" then | ||
| 1146 | if last == "sp" or last == "wsp" then | ||
| 1147 | local n = #operands | ||
| 1148 | operands[n] = nil | ||
| 1149 | last = operands[n-1] | ||
| 1150 | if altname then | ||
| 1151 | local a1, a2 = match(altname, "([^|]*)|(.*)") | ||
| 1152 | if not a1 then | ||
| 1153 | name = altname | ||
| 1154 | elseif second0 then | ||
| 1155 | name, altname = a2, a1 | ||
| 1156 | else | ||
| 1157 | name, altname = a1, a2 | ||
| 1158 | end | ||
| 1159 | end | ||
| 1160 | end | ||
| 1161 | second0 = true | ||
| 1162 | else | ||
| 1163 | assert(false) | ||
| 1164 | end | ||
| 1165 | if x then | ||
| 1166 | last = x | ||
| 1167 | if type(x) == "number" then x = "#"..x end | ||
| 1168 | operands[#operands+1] = x | ||
| 1169 | end | ||
| 1170 | end | ||
| 1171 | |||
| 1172 | return putop(ctx, name..suffix, operands) | ||
| 1173 | end | ||
| 1174 | |||
| 1175 | ------------------------------------------------------------------------------ | ||
| 1176 | |||
| 1177 | -- Disassemble a block of code. | ||
| 1178 | local function disass_block(ctx, ofs, len) | ||
| 1179 | if not ofs then ofs = 0 end | ||
| 1180 | local stop = len and ofs+len or #ctx.code | ||
| 1181 | ctx.pos = ofs | ||
| 1182 | ctx.rel = nil | ||
| 1183 | while ctx.pos < stop do disass_ins(ctx) end | ||
| 1184 | end | ||
| 1185 | |||
| 1186 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | ||
| 1187 | local function create(code, addr, out) | ||
| 1188 | local ctx = {} | ||
| 1189 | ctx.code = code | ||
| 1190 | ctx.addr = addr or 0 | ||
| 1191 | ctx.out = out or io.write | ||
| 1192 | ctx.symtab = {} | ||
| 1193 | ctx.disass = disass_block | ||
| 1194 | ctx.hexdump = 8 | ||
| 1195 | return ctx | ||
| 1196 | end | ||
| 1197 | |||
| 1198 | -- Simple API: disassemble code (a string) at address and output via out. | ||
| 1199 | local function disass(code, addr, out) | ||
| 1200 | create(code, addr, out):disass() | ||
| 1201 | end | ||
| 1202 | |||
| 1203 | -- Return register name for RID. | ||
| 1204 | local function regname(r) | ||
| 1205 | if r < 32 then return map_regs.x[r] end | ||
| 1206 | return map_regs.d[r-32] | ||
| 1207 | end | ||
| 1208 | |||
| 1209 | -- Public module functions. | ||
| 1210 | return { | ||
| 1211 | create = create, | ||
| 1212 | disass = disass, | ||
| 1213 | regname = regname | ||
| 1214 | } | ||
| 1215 | |||
diff --git a/src/lj_arch.h b/src/lj_arch.h index cc5a0a66..3df602e3 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
| @@ -226,7 +226,6 @@ | |||
| 226 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ | 226 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ |
| 227 | #define LJ_TARGET_GC64 1 | 227 | #define LJ_TARGET_GC64 1 |
| 228 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL | 228 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL |
| 229 | #define LJ_ARCH_NOJIT 1 /* NYI */ | ||
| 230 | 229 | ||
| 231 | #define LJ_ARCH_VERSION 80 | 230 | #define LJ_ARCH_VERSION 80 |
| 232 | 231 | ||
diff --git a/src/lj_asm.c b/src/lj_asm.c index 7ce58924..2cb5abea 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -171,6 +171,8 @@ IRFLDEF(FLOFS) | |||
| 171 | #include "lj_emit_x86.h" | 171 | #include "lj_emit_x86.h" |
| 172 | #elif LJ_TARGET_ARM | 172 | #elif LJ_TARGET_ARM |
| 173 | #include "lj_emit_arm.h" | 173 | #include "lj_emit_arm.h" |
| 174 | #elif LJ_TARGET_ARM64 | ||
| 175 | #include "lj_emit_arm64.h" | ||
| 174 | #elif LJ_TARGET_PPC | 176 | #elif LJ_TARGET_PPC |
| 175 | #include "lj_emit_ppc.h" | 177 | #include "lj_emit_ppc.h" |
| 176 | #elif LJ_TARGET_MIPS | 178 | #elif LJ_TARGET_MIPS |
| @@ -1563,6 +1565,8 @@ static void asm_loop(ASMState *as) | |||
| 1563 | #include "lj_asm_x86.h" | 1565 | #include "lj_asm_x86.h" |
| 1564 | #elif LJ_TARGET_ARM | 1566 | #elif LJ_TARGET_ARM |
| 1565 | #include "lj_asm_arm.h" | 1567 | #include "lj_asm_arm.h" |
| 1568 | #elif LJ_TARGET_ARM64 | ||
| 1569 | #include "lj_asm_arm64.h" | ||
| 1566 | #elif LJ_TARGET_PPC | 1570 | #elif LJ_TARGET_PPC |
| 1567 | #include "lj_asm_ppc.h" | 1571 | #include "lj_asm_ppc.h" |
| 1568 | #elif LJ_TARGET_MIPS | 1572 | #elif LJ_TARGET_MIPS |
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h new file mode 100644 index 00000000..0a2f5306 --- /dev/null +++ b/src/lj_asm_arm64.h | |||
| @@ -0,0 +1,1823 @@ | |||
| 1 | /* | ||
| 2 | ** ARM64 IR assembler (SSA IR -> machine code). | ||
| 3 | ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h | ||
| 4 | ** | ||
| 5 | ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
| 6 | ** Sponsored by Cisco Systems, Inc. | ||
| 7 | */ | ||
| 8 | |||
| 9 | /* -- Register allocator extensions --------------------------------------- */ | ||
| 10 | |||
| 11 | /* Allocate a register with a hint. */ | ||
| 12 | static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) | ||
| 13 | { | ||
| 14 | Reg r = IR(ref)->r; | ||
| 15 | if (ra_noreg(r)) { | ||
| 16 | if (!ra_hashint(r) && !iscrossref(as, ref)) | ||
| 17 | ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ | ||
| 18 | r = ra_allocref(as, ref, allow); | ||
| 19 | } | ||
| 20 | ra_noweak(as, r); | ||
| 21 | return r; | ||
| 22 | } | ||
| 23 | |||
| 24 | /* Allocate two source registers for three-operand instructions. */ | ||
| 25 | static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) | ||
| 26 | { | ||
| 27 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); | ||
| 28 | Reg left = irl->r, right = irr->r; | ||
| 29 | if (ra_hasreg(left)) { | ||
| 30 | ra_noweak(as, left); | ||
| 31 | if (ra_noreg(right)) | ||
| 32 | right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); | ||
| 33 | else | ||
| 34 | ra_noweak(as, right); | ||
| 35 | } else if (ra_hasreg(right)) { | ||
| 36 | ra_noweak(as, right); | ||
| 37 | left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); | ||
| 38 | } else if (ra_hashint(right)) { | ||
| 39 | right = ra_allocref(as, ir->op2, allow); | ||
| 40 | left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); | ||
| 41 | } else { | ||
| 42 | left = ra_allocref(as, ir->op1, allow); | ||
| 43 | right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); | ||
| 44 | } | ||
| 45 | return left | (right << 8); | ||
| 46 | } | ||
| 47 | |||
| 48 | /* -- Guard handling ------------------------------------------------------ */ | ||
| 49 | |||
| 50 | /* Generate an exit stub group at the bottom of the reserved MCode memory. */ | ||
| 51 | static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) | ||
| 52 | { | ||
| 53 | MCode *mxp = as->mcbot; | ||
| 54 | int i; | ||
| 55 | if (mxp + 3*4+4*EXITSTUBS_PER_GROUP >= as->mctop) | ||
| 56 | asm_mclimit(as); | ||
| 57 | /* str lr, [sp]; bl ->vm_exit_handler; .long group. */ | ||
| 58 | *mxp++ = A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP); | ||
| 59 | *mxp = A64I_BL | (((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu); | ||
| 60 | mxp++; | ||
| 61 | *mxp++ = group*EXITSTUBS_PER_GROUP; | ||
| 62 | for (i = 0; i < EXITSTUBS_PER_GROUP; i++) | ||
| 63 | *mxp++ = A64I_B | ((-3-i)&0x03ffffffu); | ||
| 64 | lj_mcode_sync(as->mcbot, mxp); | ||
| 65 | lj_mcode_commitbot(as->J, mxp); | ||
| 66 | as->mcbot = mxp; | ||
| 67 | as->mclim = as->mcbot + MCLIM_REDZONE; | ||
| 68 | return mxp - EXITSTUBS_PER_GROUP; | ||
| 69 | } | ||
| 70 | |||
| 71 | /* Setup all needed exit stubs. */ | ||
| 72 | static void asm_exitstub_setup(ASMState *as, ExitNo nexits) | ||
| 73 | { | ||
| 74 | ExitNo i; | ||
| 75 | if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) | ||
| 76 | lj_trace_err(as->J, LJ_TRERR_SNAPOV); | ||
| 77 | for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) | ||
| 78 | if (as->J->exitstubgroup[i] == NULL) | ||
| 79 | as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); | ||
| 80 | } | ||
| 81 | |||
| 82 | /* Emit conditional branch to exit for guard. */ | ||
| 83 | static void asm_guardcc(ASMState *as, A64CC cc) | ||
| 84 | { | ||
| 85 | MCode *target = exitstub_addr(as->J, as->snapno); | ||
| 86 | MCode *p = as->mcp; | ||
| 87 | if (LJ_UNLIKELY(p == as->invmcp)) { | ||
| 88 | as->loopinv = 1; | ||
| 89 | *p = A64I_BL | ((target-p) & 0x03ffffffu); | ||
| 90 | emit_cond_branch(as, cc^1, p-1); | ||
| 91 | return; | ||
| 92 | } | ||
| 93 | /* No conditional calls. Emit b.cc/bl instead. */ | ||
| 94 | /* That's a bad idea. NYI: emit per-trace exit stubs instead, see PPC. */ | ||
| 95 | emit_branch(as, A64I_BL, target); | ||
| 96 | emit_cond_branch(as, cc^1, p); | ||
| 97 | } | ||
| 98 | |||
| 99 | /* -- Operand fusion ------------------------------------------------------ */ | ||
| 100 | |||
| 101 | /* Limit linear search to this distance. Avoids O(n^2) behavior. */ | ||
| 102 | #define CONFLICT_SEARCH_LIM 31 | ||
| 103 | |||
| 104 | static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | ||
| 105 | { | ||
| 106 | if (irref_isk(ref)) { | ||
| 107 | IRIns *ir = IR(ref); | ||
| 108 | if (ir->o == IR_KNULL || !irt_is64(ir->t)) { | ||
| 109 | *k = ir->i; | ||
| 110 | return 1; | ||
| 111 | } else if (checki32((int64_t)ir_k64(ir)->u64)) { | ||
| 112 | *k = (int32_t)ir_k64(ir)->u64; | ||
| 113 | return 1; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | return 0; | ||
| 117 | } | ||
| 118 | |||
| 119 | /* Check if there's no conflicting instruction between curins and ref. */ | ||
| 120 | static int noconflict(ASMState *as, IRRef ref, IROp conflict) | ||
| 121 | { | ||
| 122 | IRIns *ir = as->ir; | ||
| 123 | IRRef i = as->curins; | ||
| 124 | if (i > ref + CONFLICT_SEARCH_LIM) | ||
| 125 | return 0; /* Give up, ref is too far away. */ | ||
| 126 | while (--i > ref) | ||
| 127 | if (ir[i].o == conflict) | ||
| 128 | return 0; /* Conflict found. */ | ||
| 129 | return 1; /* Ok, no conflict. */ | ||
| 130 | } | ||
| 131 | |||
| 132 | /* Fuse the array base of colocated arrays. */ | ||
| 133 | static int32_t asm_fuseabase(ASMState *as, IRRef ref) | ||
| 134 | { | ||
| 135 | IRIns *ir = IR(ref); | ||
| 136 | if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && | ||
| 137 | !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) | ||
| 138 | return (int32_t)sizeof(GCtab); | ||
| 139 | return 0; | ||
| 140 | } | ||
| 141 | |||
| 142 | #define FUSE_REG 0x40000000 | ||
| 143 | |||
| 144 | /* Fuse array/hash/upvalue reference into register+offset operand. */ | ||
| 145 | static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, | ||
| 146 | A64Ins ins) | ||
| 147 | { | ||
| 148 | IRIns *ir = IR(ref); | ||
| 149 | if (ra_noreg(ir->r)) { | ||
| 150 | if (ir->o == IR_AREF) { | ||
| 151 | if (mayfuse(as, ref)) { | ||
| 152 | if (irref_isk(ir->op2)) { | ||
| 153 | IRRef tab = IR(ir->op1)->op1; | ||
| 154 | int32_t ofs = asm_fuseabase(as, tab); | ||
| 155 | IRRef refa = ofs ? tab : ir->op1; | ||
| 156 | ofs += 8*IR(ir->op2)->i; | ||
| 157 | if (emit_checkofs(ins, ofs)) { | ||
| 158 | *ofsp = ofs; | ||
| 159 | return ra_alloc1(as, refa, allow); | ||
| 160 | } | ||
| 161 | } else { | ||
| 162 | Reg base = ra_alloc1(as, ir->op1, allow); | ||
| 163 | *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); | ||
| 164 | return base; | ||
| 165 | } | ||
| 166 | } | ||
| 167 | } else if (ir->o == IR_HREFK) { | ||
| 168 | if (mayfuse(as, ref)) { | ||
| 169 | int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); | ||
| 170 | if (emit_checkofs(ins, ofs)) { | ||
| 171 | *ofsp = ofs; | ||
| 172 | return ra_alloc1(as, ir->op1, allow); | ||
| 173 | } | ||
| 174 | } | ||
| 175 | } else if (ir->o == IR_UREFC) { | ||
| 176 | if (irref_isk(ir->op1)) { | ||
| 177 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
| 178 | GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; | ||
| 179 | int64_t ofs = glofs(as, &uv->tv); | ||
| 180 | if (emit_checkofs(ins, ofs)) { | ||
| 181 | *ofsp = (int32_t)ofs; | ||
| 182 | return RID_GL; | ||
| 183 | } | ||
| 184 | } | ||
| 185 | } | ||
| 186 | } | ||
| 187 | *ofsp = 0; | ||
| 188 | return ra_alloc1(as, ref, allow); | ||
| 189 | } | ||
| 190 | |||
| 191 | /* Fuse m operand into arithmetic/logic instructions. */ | ||
| 192 | static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) | ||
| 193 | { | ||
| 194 | IRIns *ir = IR(ref); | ||
| 195 | if (ra_hasreg(ir->r)) { | ||
| 196 | ra_noweak(as, ir->r); | ||
| 197 | return A64F_M(ir->r); | ||
| 198 | } else if (irref_isk(ref)) { | ||
| 199 | uint32_t m; | ||
| 200 | int64_t k = get_k64val(ir); | ||
| 201 | if ((ai & 0x1f000000) == 0x0a000000) | ||
| 202 | m = emit_isk13(k, irt_is64(ir->t)); | ||
| 203 | else | ||
| 204 | m = emit_isk12(k); | ||
| 205 | if (m) | ||
| 206 | return m; | ||
| 207 | } else if (mayfuse(as, ref)) { | ||
| 208 | if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) || | ||
| 209 | (ir->o == IR_ADD && ir->op1 == ir->op2)) { | ||
| 210 | A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR : | ||
| 211 | ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL; | ||
| 212 | int shift = ir->o == IR_ADD ? 1 : | ||
| 213 | (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); | ||
| 214 | IRIns *irl = IR(ir->op1); | ||
| 215 | if (sh == A64SH_LSL && | ||
| 216 | irl->o == IR_CONV && | ||
| 217 | irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && | ||
| 218 | shift <= 4 && | ||
| 219 | mayfuse(as, ir->op1)) { | ||
| 220 | Reg m = ra_alloc1(as, irl->op1, allow); | ||
| 221 | return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); | ||
| 222 | } else { | ||
| 223 | Reg m = ra_alloc1(as, ir->op1, allow); | ||
| 224 | return A64F_M(m) | A64F_SH(sh, shift); | ||
| 225 | } | ||
| 226 | } else if (ir->o == IR_CONV && | ||
| 227 | ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) { | ||
| 228 | Reg m = ra_alloc1(as, ir->op1, allow); | ||
| 229 | return A64F_M(m) | A64F_EX(A64EX_SXTW); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | return A64F_M(ra_allocref(as, ref, allow)); | ||
| 233 | } | ||
| 234 | |||
| 235 | /* Fuse XLOAD/XSTORE reference into load/store operand. */ | ||
| 236 | static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, | ||
| 237 | RegSet allow) | ||
| 238 | { | ||
| 239 | IRIns *ir = IR(ref); | ||
| 240 | Reg base; | ||
| 241 | int32_t ofs = 0; | ||
| 242 | if (ra_noreg(ir->r) && canfuse(as, ir)) { | ||
| 243 | if (ir->o == IR_ADD) { | ||
| 244 | if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) | ||
| 245 | ref = ir->op1; | ||
| 246 | /* NYI: Fuse add with two registers. */ | ||
| 247 | } else if (ir->o == IR_STRREF) { | ||
| 248 | if (asm_isk32(as, ir->op2, &ofs)) { | ||
| 249 | ref = ir->op1; | ||
| 250 | } else if (asm_isk32(as, ir->op1, &ofs)) { | ||
| 251 | ref = ir->op2; | ||
| 252 | } else { | ||
| 253 | /* NYI: Fuse ADD with constant. */ | ||
| 254 | Reg rn = ra_alloc1(as, ir->op1, allow); | ||
| 255 | uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); | ||
| 256 | emit_lso(as, ai, rd, rd, sizeof(GCstr)); | ||
| 257 | emit_dn(as, A64I_ADDx^m, rd, rn); | ||
| 258 | return; | ||
| 259 | } | ||
| 260 | ofs += sizeof(GCstr); | ||
| 261 | if (!emit_checkofs(ai, ofs)) { | ||
| 262 | Reg rn = ra_alloc1(as, ref, allow); | ||
| 263 | Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); | ||
| 264 | emit_dnm(as, (ai ^ 0x01204800), rd, rn, rm); | ||
| 265 | return; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | } | ||
| 269 | base = ra_alloc1(as, ref, allow); | ||
| 270 | emit_lso(as, ai, (rd & 31), base, ofs); | ||
| 271 | } | ||
| 272 | |||
| 273 | /* -- Calls --------------------------------------------------------------- */ | ||
| 274 | |||
| 275 | /* Generate a call to a C function. */ | ||
| 276 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | ||
| 277 | { | ||
| 278 | uint32_t n, nargs = CCI_XNARGS(ci); | ||
| 279 | int32_t ofs = 0; | ||
| 280 | Reg gpr, fpr = REGARG_FIRSTFPR; | ||
| 281 | if ((void *)ci->func) | ||
| 282 | emit_call(as, (void *)ci->func); | ||
| 283 | for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) | ||
| 284 | as->cost[gpr] = REGCOST(~0u, ASMREF_L); | ||
| 285 | gpr = REGARG_FIRSTGPR; | ||
| 286 | for (n = 0; n < nargs; n++) { /* Setup args. */ | ||
| 287 | IRRef ref = args[n]; | ||
| 288 | IRIns *ir = IR(ref); | ||
| 289 | if (ref) { | ||
| 290 | if (irt_isfp(ir->t)) { | ||
| 291 | if (fpr <= REGARG_LASTFPR) { | ||
| 292 | lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */ | ||
| 293 | ra_leftov(as, fpr, ref); | ||
| 294 | fpr++; | ||
| 295 | } else { | ||
| 296 | Reg r = ra_alloc1(as, ref, RSET_FPR); | ||
| 297 | emit_spstore(as, ir, r, ofs); | ||
| 298 | ofs += 8; | ||
| 299 | } | ||
| 300 | } else { | ||
| 301 | if (gpr <= REGARG_LASTGPR) { | ||
| 302 | lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ | ||
| 303 | ra_leftov(as, gpr, ref); | ||
| 304 | gpr++; | ||
| 305 | } else { | ||
| 306 | Reg r = ra_alloc1(as, ref, RSET_GPR); | ||
| 307 | emit_spstore(as, ir, r, ofs); | ||
| 308 | ofs += 8; | ||
| 309 | } | ||
| 310 | } | ||
| 311 | } | ||
| 312 | } | ||
| 313 | } | ||
| 314 | |||
| 315 | /* Setup result reg/sp for call. Evict scratch regs. */ | ||
| 316 | static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | ||
| 317 | { | ||
| 318 | RegSet drop = RSET_SCRATCH; | ||
| 319 | if (ra_hasreg(ir->r)) | ||
| 320 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
| 321 | ra_evictset(as, drop); /* Evictions must be performed first. */ | ||
| 322 | if (ra_used(ir)) { | ||
| 323 | lua_assert(!irt_ispri(ir->t)); | ||
| 324 | if (irt_isfp(ir->t)) { | ||
| 325 | if (ci->flags & CCI_CASTU64) { | ||
| 326 | Reg dest = ra_dest(as, ir, RSET_FPR) & 31; | ||
| 327 | emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R, | ||
| 328 | dest, RID_RET); | ||
| 329 | } else { | ||
| 330 | ra_destreg(as, ir, RID_FPRET); | ||
| 331 | } | ||
| 332 | } else { | ||
| 333 | ra_destreg(as, ir, RID_RET); | ||
| 334 | } | ||
| 335 | } | ||
| 336 | UNUSED(ci); | ||
| 337 | } | ||
| 338 | |||
| 339 | static void asm_callx(ASMState *as, IRIns *ir) | ||
| 340 | { | ||
| 341 | IRRef args[CCI_NARGS_MAX*2]; | ||
| 342 | CCallInfo ci; | ||
| 343 | IRRef func; | ||
| 344 | IRIns *irf; | ||
| 345 | ci.flags = asm_callx_flags(as, ir); | ||
| 346 | asm_collectargs(as, ir, &ci, args); | ||
| 347 | asm_setupresult(as, ir, &ci); | ||
| 348 | func = ir->op2; irf = IR(func); | ||
| 349 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } | ||
| 350 | if (irref_isk(func)) { /* Call to constant address. */ | ||
| 351 | ci.func = (ASMFunction)(ir_k64(irf)->u64); | ||
| 352 | } else { /* Need a non-argument register for indirect calls. */ | ||
| 353 | Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); | ||
| 354 | emit_n(as, A64I_BLR, freg); | ||
| 355 | ci.func = (ASMFunction)(void *)0; | ||
| 356 | } | ||
| 357 | asm_gencall(as, &ci, args); | ||
| 358 | } | ||
| 359 | |||
| 360 | /* -- Returns ------------------------------------------------------------- */ | ||
| 361 | |||
| 362 | /* Return to lower frame. Guard that it goes to the right spot. */ | ||
| 363 | static void asm_retf(ASMState *as, IRIns *ir) | ||
| 364 | { | ||
| 365 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | ||
| 366 | void *pc = ir_kptr(IR(ir->op2)); | ||
| 367 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); | ||
| 368 | as->topslot -= (BCReg)delta; | ||
| 369 | if ((int32_t)as->topslot < 0) as->topslot = 0; | ||
| 370 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | ||
| 371 | /* Need to force a spill on REF_BASE now to update the stack slot. */ | ||
| 372 | emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); | ||
| 373 | emit_setgl(as, base, jit_base); | ||
| 374 | emit_addptr(as, base, -8*delta); | ||
| 375 | asm_guardcc(as, CC_NE); | ||
| 376 | emit_nm(as, A64I_CMPx, RID_TMP, | ||
| 377 | ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base))); | ||
| 378 | emit_lso(as, A64I_LDRx, RID_TMP, base, -8); | ||
| 379 | } | ||
| 380 | |||
| 381 | /* -- Type conversions ---------------------------------------------------- */ | ||
| 382 | |||
| 383 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | ||
| 384 | { | ||
| 385 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | ||
| 386 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 387 | asm_guardcc(as, CC_NE); | ||
| 388 | emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31)); | ||
| 389 | emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest); | ||
| 390 | emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31)); | ||
| 391 | } | ||
| 392 | |||
| 393 | static void asm_tobit(ASMState *as, IRIns *ir) | ||
| 394 | { | ||
| 395 | RegSet allow = RSET_FPR; | ||
| 396 | Reg left = ra_alloc1(as, ir->op1, allow); | ||
| 397 | Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); | ||
| 398 | Reg tmp = ra_scratch(as, rset_clear(allow, right)); | ||
| 399 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 400 | emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31)); | ||
| 401 | emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31)); | ||
| 402 | } | ||
| 403 | |||
| 404 | static void asm_conv(ASMState *as, IRIns *ir) | ||
| 405 | { | ||
| 406 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | ||
| 407 | int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); | ||
| 408 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | ||
| 409 | IRRef lref = ir->op1; | ||
| 410 | lua_assert(irt_type(ir->t) != st); | ||
| 411 | if (irt_isfp(ir->t)) { | ||
| 412 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
| 413 | if (stfp) { /* FP to FP conversion. */ | ||
| 414 | emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32, | ||
| 415 | (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31)); | ||
| 416 | } else { /* Integer to FP conversion. */ | ||
| 417 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
| 418 | A64Ins ai = irt_isfloat(ir->t) ? | ||
| 419 | (((IRT_IS64 >> st) & 1) ? | ||
| 420 | (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) : | ||
| 421 | (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) : | ||
| 422 | (((IRT_IS64 >> st) & 1) ? | ||
| 423 | (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) : | ||
| 424 | (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32)); | ||
| 425 | emit_dn(as, ai, (dest & 31), left); | ||
| 426 | } | ||
| 427 | } else if (stfp) { /* FP to integer conversion. */ | ||
| 428 | if (irt_isguard(ir->t)) { | ||
| 429 | /* Checked conversions are only supported from number to int. */ | ||
| 430 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | ||
| 431 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | ||
| 432 | } else { | ||
| 433 | Reg left = ra_alloc1(as, lref, RSET_FPR); | ||
| 434 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 435 | A64Ins ai = irt_is64(ir->t) ? | ||
| 436 | (st == IRT_NUM ? | ||
| 437 | (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : | ||
| 438 | (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : | ||
| 439 | (st == IRT_NUM ? | ||
| 440 | (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : | ||
| 441 | (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); | ||
| 442 | emit_dn(as, ai, dest, (left & 31)); | ||
| 443 | } | ||
| 444 | } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | ||
| 445 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 446 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
| 447 | A64Ins ai = st == IRT_I8 ? A64I_SXTBw : | ||
| 448 | st == IRT_U8 ? A64I_UXTBw : | ||
| 449 | st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; | ||
| 450 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); | ||
| 451 | emit_dn(as, ai, dest, left); | ||
| 452 | } else { | ||
| 453 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 454 | if (irt_is64(ir->t)) { | ||
| 455 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { | ||
| 456 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ | ||
| 457 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | ||
| 458 | } else { /* 32 to 64 bit sign extension. */ | ||
| 459 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
| 460 | emit_dn(as, A64I_SXTW, dest, left); | ||
| 461 | } | ||
| 462 | } else { | ||
| 463 | if (st64) { | ||
| 464 | /* This is either a 32 bit reg/reg mov which zeroes the hiword | ||
| 465 | ** or a load of the loword from a 64 bit address. | ||
| 466 | */ | ||
| 467 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
| 468 | emit_dm(as, A64I_MOVw, dest, left); | ||
| 469 | } else { /* 32/32 bit no-op (cast). */ | ||
| 470 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | ||
| 471 | } | ||
| 472 | } | ||
| 473 | } | ||
| 474 | } | ||
| 475 | |||
| 476 | static void asm_strto(ASMState *as, IRIns *ir) | ||
| 477 | { | ||
| 478 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | ||
| 479 | IRRef args[2]; | ||
| 480 | Reg dest = 0, tmp; | ||
| 481 | int destused = ra_used(ir); | ||
| 482 | int32_t ofs = 0; | ||
| 483 | ra_evictset(as, RSET_SCRATCH); | ||
| 484 | if (destused) { | ||
| 485 | if (ra_hasspill(ir->s)) { | ||
| 486 | ofs = sps_scale(ir->s); | ||
| 487 | destused = 0; | ||
| 488 | if (ra_hasreg(ir->r)) { | ||
| 489 | ra_free(as, ir->r); | ||
| 490 | ra_modified(as, ir->r); | ||
| 491 | emit_spload(as, ir, ir->r, ofs); | ||
| 492 | } | ||
| 493 | } else { | ||
| 494 | dest = ra_dest(as, ir, RSET_FPR); | ||
| 495 | } | ||
| 496 | } | ||
| 497 | asm_guardcc(as, CC_EQ); | ||
| 498 | if (destused) | ||
| 499 | emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); | ||
| 500 | emit_n(as, (A64I_CMPw^A64I_K12)|A64F_U12(0), RID_RET); | ||
| 501 | args[0] = ir->op1; /* GCstr *str */ | ||
| 502 | args[1] = ASMREF_TMP1; /* TValue *n */ | ||
| 503 | asm_gencall(as, ci, args); | ||
| 504 | tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
| 505 | emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR); | ||
| 506 | } | ||
| 507 | |||
| 508 | /* -- Memory references --------------------------------------------------- */ | ||
| 509 | |||
| 510 | /* Get pointer to TValue. */ | ||
| 511 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | ||
| 512 | { | ||
| 513 | IRIns *ir = IR(ref); | ||
| 514 | if (irt_isnum(ir->t)) { | ||
| 515 | if (irref_isk(ref)) { | ||
| 516 | /* Use the number constant itself as a TValue. */ | ||
| 517 | ra_allockreg(as, i64ptr(ir_knum(ir)), dest); | ||
| 518 | } else { | ||
| 519 | /* Otherwise force a spill and use the spill slot. */ | ||
| 520 | emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR); | ||
| 521 | } | ||
| 522 | } else { | ||
| 523 | /* Otherwise use g->tmptv to hold the TValue. */ | ||
| 524 | RegSet allow = rset_exclude(RSET_GPR, dest); | ||
| 525 | Reg src; | ||
| 526 | if (irref_isk(ref)) { | ||
| 527 | TValue k; | ||
| 528 | lj_ir_kvalue(as->J->L, &k, ir); | ||
| 529 | src = ra_allock(as, k.u64, allow); | ||
| 530 | emit_lso(as, A64I_STRx, src, dest, 0); | ||
| 531 | } else { | ||
| 532 | Reg type; | ||
| 533 | if (irt_ispri(ir->t)) { | ||
| 534 | src = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); | ||
| 535 | emit_lso(as, A64I_STRx, src, dest, 0); | ||
| 536 | } else if (irt_isint(ir->t)) { | ||
| 537 | src = ra_alloc1(as, ref, allow); | ||
| 538 | type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); | ||
| 539 | emit_lso(as, A64I_STRx, RID_TMP, dest, 0); | ||
| 540 | emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src); | ||
| 541 | } else { | ||
| 542 | src = ra_alloc1(as, ref, allow); | ||
| 543 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
| 544 | emit_lso(as, A64I_STRx, RID_TMP, dest, 0); | ||
| 545 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type); | ||
| 546 | } | ||
| 547 | } | ||
| 548 | ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest); | ||
| 549 | } | ||
| 550 | } | ||
| 551 | |||
| 552 | static void asm_aref(ASMState *as, IRIns *ir) | ||
| 553 | { | ||
| 554 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 555 | Reg idx, base; | ||
| 556 | if (irref_isk(ir->op2)) { | ||
| 557 | IRRef tab = IR(ir->op1)->op1; | ||
| 558 | int32_t ofs = asm_fuseabase(as, tab); | ||
| 559 | IRRef refa = ofs ? tab : ir->op1; | ||
| 560 | uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i); | ||
| 561 | if (k) { | ||
| 562 | base = ra_alloc1(as, refa, RSET_GPR); | ||
| 563 | emit_dn(as, A64I_ADDx^k, dest, base); | ||
| 564 | return; | ||
| 565 | } | ||
| 566 | } | ||
| 567 | base = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 568 | idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); | ||
| 569 | emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx); | ||
| 570 | } | ||
| 571 | |||
| 572 | /* Inlined hash lookup. Specialized for key type and for const keys. | ||
| 573 | ** The equivalent C code is: | ||
| 574 | ** Node *n = hashkey(t, key); | ||
| 575 | ** do { | ||
| 576 | ** if (lj_obj_equal(&n->key, key)) return &n->val; | ||
| 577 | ** } while ((n = nextnode(n))); | ||
| 578 | ** return niltv(L); | ||
| 579 | */ | ||
| 580 | static void asm_href(ASMState *as, IRIns *ir, IROp merge) | ||
| 581 | { | ||
| 582 | RegSet allow = RSET_GPR; | ||
| 583 | int destused = ra_used(ir); | ||
| 584 | Reg dest = ra_dest(as, ir, allow); | ||
| 585 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | ||
| 586 | Reg key = 0, tmp = RID_TMP; | ||
| 587 | IRRef refkey = ir->op2; | ||
| 588 | IRIns *irkey = IR(refkey); | ||
| 589 | int isk = irref_isk(ir->op2); | ||
| 590 | IRType1 kt = irkey->t; | ||
| 591 | uint32_t k = 0; | ||
| 592 | uint32_t khash; | ||
| 593 | MCLabel l_end, l_loop, l_next; | ||
| 594 | rset_clear(allow, tab); | ||
| 595 | |||
| 596 | if (!isk) { | ||
| 597 | key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); | ||
| 598 | rset_clear(allow, key); | ||
| 599 | if (!irt_isstr(kt)) { | ||
| 600 | tmp = ra_scratch(as, allow); | ||
| 601 | rset_clear(allow, tmp); | ||
| 602 | } | ||
| 603 | } else if (irt_isnum(kt)) { | ||
| 604 | int64_t val = (int64_t)ir_knum(irkey)->u64; | ||
| 605 | if (!(k = emit_isk12(val))) { | ||
| 606 | key = ra_allock(as, val, allow); | ||
| 607 | rset_clear(allow, key); | ||
| 608 | } | ||
| 609 | } else if (!irt_ispri(kt)) { | ||
| 610 | if (!(k = emit_isk12(irkey->i))) { | ||
| 611 | key = ra_alloc1(as, refkey, allow); | ||
| 612 | rset_clear(allow, key); | ||
| 613 | } | ||
| 614 | } | ||
| 615 | |||
| 616 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ | ||
| 617 | l_end = emit_label(as); | ||
| 618 | as->invmcp = NULL; | ||
| 619 | if (merge == IR_NE) | ||
| 620 | asm_guardcc(as, CC_AL); | ||
| 621 | else if (destused) | ||
| 622 | emit_loada(as, dest, niltvg(J2G(as->J))); | ||
| 623 | |||
| 624 | /* Follow hash chain until the end. */ | ||
| 625 | l_loop = --as->mcp; | ||
| 626 | emit_n(as, A64I_CMPx^A64I_K12^0, dest); | ||
| 627 | emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); | ||
| 628 | l_next = emit_label(as); | ||
| 629 | |||
| 630 | /* Type and value comparison. */ | ||
| 631 | if (merge == IR_EQ) | ||
| 632 | asm_guardcc(as, CC_EQ); | ||
| 633 | else | ||
| 634 | emit_cond_branch(as, CC_EQ, l_end); | ||
| 635 | |||
| 636 | if (irt_isnum(kt)) { | ||
| 637 | if (isk) { | ||
| 638 | /* Assumes -0.0 is already canonicalized to +0.0. */ | ||
| 639 | if (k) | ||
| 640 | emit_n(as, A64I_CMPx^k, tmp); | ||
| 641 | else | ||
| 642 | emit_nm(as, A64I_CMPx, key, tmp); | ||
| 643 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); | ||
| 644 | } else { | ||
| 645 | Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow); | ||
| 646 | Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); | ||
| 647 | rset_clear(allow, tisnum); | ||
| 648 | emit_nm(as, A64I_FCMPd, key, ftmp); | ||
| 649 | emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); | ||
| 650 | emit_cond_branch(as, CC_LO, l_next); | ||
| 651 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); | ||
| 652 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); | ||
| 653 | } | ||
| 654 | } else if (irt_isaddr(kt)) { | ||
| 655 | Reg scr; | ||
| 656 | if (isk) { | ||
| 657 | int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; | ||
| 658 | scr = ra_allock(as, kk, allow); | ||
| 659 | emit_nm(as, A64I_CMPx, scr, tmp); | ||
| 660 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); | ||
| 661 | } else { | ||
| 662 | scr = ra_scratch(as, allow); | ||
| 663 | emit_nm(as, A64I_CMPx, tmp, scr); | ||
| 664 | emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); | ||
| 665 | } | ||
| 666 | rset_clear(allow, scr); | ||
| 667 | } else { | ||
| 668 | Reg type, scr; | ||
| 669 | lua_assert(irt_ispri(kt) && !irt_isnil(kt)); | ||
| 670 | type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); | ||
| 671 | scr = ra_scratch(as, rset_clear(allow, type)); | ||
| 672 | rset_clear(allow, scr); | ||
| 673 | emit_nm(as, A64I_CMPw, scr, type); | ||
| 674 | emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); | ||
| 675 | } | ||
| 676 | |||
| 677 | *l_loop = A64I_BCC | A64F_S19((as->mcp-l_loop) & 0x0007ffffu) | CC_NE; | ||
| 678 | if (!isk && irt_isaddr(kt)) { | ||
| 679 | Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow); | ||
| 680 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); | ||
| 681 | rset_clear(allow, type); | ||
| 682 | } | ||
| 683 | /* Load main position relative to tab->node into dest. */ | ||
| 684 | khash = isk ? ir_khash(irkey) : 1; | ||
| 685 | if (khash == 0) { | ||
| 686 | emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); | ||
| 687 | } else { | ||
| 688 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest); | ||
| 689 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest); | ||
| 690 | emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node)); | ||
| 691 | if (isk) { | ||
| 692 | Reg tmphash = ra_allock(as, khash, allow); | ||
| 693 | emit_dnm(as, A64I_ANDw, dest, dest, tmphash); | ||
| 694 | emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); | ||
| 695 | } else if (irt_isstr(kt)) { | ||
| 696 | /* Fetch of str->hash is cheaper than ra_allock. */ | ||
| 697 | emit_dnm(as, A64I_ANDw, dest, dest, tmp); | ||
| 698 | emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash)); | ||
| 699 | emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); | ||
| 700 | } else { /* Must match with hash*() in lj_tab.c. */ | ||
| 701 | emit_dnm(as, A64I_ANDw, dest, dest, tmp); | ||
| 702 | emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); | ||
| 703 | emit_dnm(as, A64I_SUBw, dest, dest, tmp); | ||
| 704 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); | ||
| 705 | emit_dnm(as, A64I_EORw, dest, dest, tmp); | ||
| 706 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); | ||
| 707 | emit_dnm(as, A64I_SUBw, tmp, tmp, dest); | ||
| 708 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); | ||
| 709 | emit_dnm(as, A64I_EORw, tmp, tmp, dest); | ||
| 710 | if (irt_isnum(kt)) { | ||
| 711 | emit_dnm(as, A64I_ADDw, dest, dest, dest); | ||
| 712 | emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); | ||
| 713 | emit_dm(as, A64I_MOVw, tmp, dest); | ||
| 714 | emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); | ||
| 715 | } else { | ||
| 716 | checkmclim(as); | ||
| 717 | emit_dm(as, A64I_MOVw, tmp, key); | ||
| 718 | emit_dnm(as, A64I_EORw, dest, dest, | ||
| 719 | ra_allock(as, irt_toitype(kt) << 15, allow)); | ||
| 720 | emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); | ||
| 721 | emit_dm(as, A64I_MOVx, dest, key); | ||
| 722 | } | ||
| 723 | } | ||
| 724 | } | ||
| 725 | } | ||
| 726 | |||
| 727 | static void asm_hrefk(ASMState *as, IRIns *ir) | ||
| 728 | { | ||
| 729 | IRIns *kslot = IR(ir->op2); | ||
| 730 | IRIns *irkey = IR(kslot->op1); | ||
| 731 | int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); | ||
| 732 | int32_t kofs = ofs + (int32_t)offsetof(Node, key); | ||
| 733 | int bigofs = !emit_checkofs(A64I_LDRx, ofs); | ||
| 734 | RegSet allow = RSET_GPR; | ||
| 735 | Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; | ||
| 736 | Reg node = ra_alloc1(as, ir->op1, allow); | ||
| 737 | Reg key = ra_scratch(as, rset_clear(allow, node)); | ||
| 738 | Reg idx = node; | ||
| 739 | uint64_t k; | ||
| 740 | lua_assert(ofs % sizeof(Node) == 0); | ||
| 741 | rset_clear(allow, key); | ||
| 742 | if (bigofs) { | ||
| 743 | idx = dest; | ||
| 744 | rset_clear(allow, dest); | ||
| 745 | kofs = (int32_t)offsetof(Node, key); | ||
| 746 | } else if (ra_hasreg(dest)) { | ||
| 747 | emit_opk(as, A64I_ADDx, dest, node, ofs, allow); | ||
| 748 | } | ||
| 749 | asm_guardcc(as, CC_NE); | ||
| 750 | if (irt_ispri(irkey->t)) { | ||
| 751 | k = ~((int64_t)~irt_toitype(irkey->t) << 47); | ||
| 752 | } else if (irt_isnum(irkey->t)) { | ||
| 753 | k = ir_knum(irkey)->u64; | ||
| 754 | } else { | ||
| 755 | k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); | ||
| 756 | } | ||
| 757 | emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); | ||
| 758 | emit_lso(as, A64I_LDRx, key, idx, kofs); | ||
| 759 | if (bigofs) | ||
| 760 | emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); | ||
| 761 | } | ||
| 762 | |||
| 763 | static void asm_uref(ASMState *as, IRIns *ir) | ||
| 764 | { | ||
| 765 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 766 | if (irref_isk(ir->op1)) { | ||
| 767 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
| 768 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | ||
| 769 | emit_lsptr(as, A64I_LDRx, dest, v); | ||
| 770 | } else { | ||
| 771 | Reg uv = ra_scratch(as, RSET_GPR); | ||
| 772 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 773 | if (ir->o == IR_UREFC) { | ||
| 774 | asm_guardcc(as, CC_NE); | ||
| 775 | emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); | ||
| 776 | emit_opk(as, A64I_ADDx, dest, uv, | ||
| 777 | (int32_t)offsetof(GCupval, tv), RSET_GPR); | ||
| 778 | emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); | ||
| 779 | } else { | ||
| 780 | emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); | ||
| 781 | } | ||
| 782 | emit_lso(as, A64I_LDRx, uv, func, | ||
| 783 | (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); | ||
| 784 | } | ||
| 785 | } | ||
| 786 | |||
| 787 | static void asm_fref(ASMState *as, IRIns *ir) | ||
| 788 | { | ||
| 789 | UNUSED(as); UNUSED(ir); | ||
| 790 | lua_assert(!ra_used(ir)); | ||
| 791 | } | ||
| 792 | |||
| 793 | static void asm_strref(ASMState *as, IRIns *ir) | ||
| 794 | { | ||
| 795 | RegSet allow = RSET_GPR; | ||
| 796 | Reg dest = ra_dest(as, ir, allow); | ||
| 797 | Reg base = ra_alloc1(as, ir->op1, allow); | ||
| 798 | IRIns *irr = IR(ir->op2); | ||
| 799 | int32_t ofs = sizeof(GCstr); | ||
| 800 | uint32_t m; | ||
| 801 | rset_clear(allow, base); | ||
| 802 | if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) { | ||
| 803 | emit_dn(as, A64I_ADDx^m, dest, base); | ||
| 804 | } else { | ||
| 805 | emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest); | ||
| 806 | emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow)); | ||
| 807 | } | ||
| 808 | } | ||
| 809 | |||
| 810 | /* -- Loads and stores ---------------------------------------------------- */ | ||
| 811 | |||
| 812 | static A64Ins asm_fxloadins(IRIns *ir) | ||
| 813 | { | ||
| 814 | switch (irt_type(ir->t)) { | ||
| 815 | case IRT_I8: return A64I_LDRB ^ A64I_LS_S; | ||
| 816 | case IRT_U8: return A64I_LDRB; | ||
| 817 | case IRT_I16: return A64I_LDRH ^ A64I_LS_S; | ||
| 818 | case IRT_U16: return A64I_LDRH; | ||
| 819 | case IRT_NUM: return A64I_LDRd; | ||
| 820 | case IRT_FLOAT: return A64I_LDRs; | ||
| 821 | default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw; | ||
| 822 | } | ||
| 823 | } | ||
| 824 | |||
| 825 | static A64Ins asm_fxstoreins(IRIns *ir) | ||
| 826 | { | ||
| 827 | switch (irt_type(ir->t)) { | ||
| 828 | case IRT_I8: case IRT_U8: return A64I_STRB; | ||
| 829 | case IRT_I16: case IRT_U16: return A64I_STRH; | ||
| 830 | case IRT_NUM: return A64I_STRd; | ||
| 831 | case IRT_FLOAT: return A64I_STRs; | ||
| 832 | default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw; | ||
| 833 | } | ||
| 834 | } | ||
| 835 | |||
| 836 | static void asm_fload(ASMState *as, IRIns *ir) | ||
| 837 | { | ||
| 838 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 839 | Reg idx; | ||
| 840 | A64Ins ai = asm_fxloadins(ir); | ||
| 841 | int32_t ofs; | ||
| 842 | if (ir->op1 == REF_NIL) { | ||
| 843 | idx = RID_GL; | ||
| 844 | ofs = (ir->op2 << 2) - GG_OFS(g); | ||
| 845 | } else { | ||
| 846 | idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 847 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
| 848 | ofs = asm_fuseabase(as, ir->op1); | ||
| 849 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
| 850 | emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx); | ||
| 851 | return; | ||
| 852 | } | ||
| 853 | } | ||
| 854 | ofs = field_ofs[ir->op2]; | ||
| 855 | } | ||
| 856 | emit_lso(as, ai, (dest & 31), idx, ofs); | ||
| 857 | } | ||
| 858 | |||
| 859 | static void asm_fstore(ASMState *as, IRIns *ir) | ||
| 860 | { | ||
| 861 | if (ir->r != RID_SINK) { | ||
| 862 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
| 863 | IRIns *irf = IR(ir->op1); | ||
| 864 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | ||
| 865 | int32_t ofs = field_ofs[irf->op2]; | ||
| 866 | emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs); | ||
| 867 | } | ||
| 868 | } | ||
| 869 | |||
| 870 | static void asm_xload(ASMState *as, IRIns *ir) | ||
| 871 | { | ||
| 872 | Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | ||
| 873 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | ||
| 874 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); | ||
| 875 | } | ||
| 876 | |||
| 877 | static void asm_xstore(ASMState *as, IRIns *ir) | ||
| 878 | { | ||
| 879 | if (ir->r != RID_SINK) { | ||
| 880 | Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | ||
| 881 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | ||
| 882 | rset_exclude(RSET_GPR, src)); | ||
| 883 | } | ||
| 884 | } | ||
| 885 | |||
| 886 | static void asm_ahuvload(ASMState *as, IRIns *ir) | ||
| 887 | { | ||
| 888 | Reg idx, tmp, type; | ||
| 889 | int32_t ofs = 0; | ||
| 890 | RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | ||
| 891 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || | ||
| 892 | irt_isint(ir->t)); | ||
| 893 | if (ra_used(ir)) { | ||
| 894 | Reg dest = ra_dest(as, ir, allow); | ||
| 895 | tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; | ||
| 896 | if (irt_isaddr(ir->t)) { | ||
| 897 | emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); | ||
| 898 | } else if (irt_isnum(ir->t)) { | ||
| 899 | emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); | ||
| 900 | } else if (irt_isint(ir->t)) { | ||
| 901 | emit_dm(as, A64I_MOVw, dest, dest); | ||
| 902 | } | ||
| 903 | } else { | ||
| 904 | tmp = ra_scratch(as, gpr); | ||
| 905 | } | ||
| 906 | type = ra_scratch(as, rset_clear(gpr, tmp)); | ||
| 907 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); | ||
| 908 | /* Always do the type check, even if the load result is unused. */ | ||
| 909 | asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); | ||
| 910 | if (irt_type(ir->t) >= IRT_NUM) { | ||
| 911 | lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); | ||
| 912 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), | ||
| 913 | ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); | ||
| 914 | } else if (irt_isaddr(ir->t)) { | ||
| 915 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); | ||
| 916 | emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); | ||
| 917 | } else if (irt_isnil(ir->t)) { | ||
| 918 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); | ||
| 919 | } else { | ||
| 920 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), | ||
| 921 | ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp); | ||
| 922 | } | ||
| 923 | if (ofs & FUSE_REG) | ||
| 924 | emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx, tmp, idx, (ofs & 31)); | ||
| 925 | else | ||
| 926 | emit_lso(as, A64I_LDRx, tmp, idx, ofs); | ||
| 927 | } | ||
| 928 | |||
| 929 | static void asm_ahustore(ASMState *as, IRIns *ir) | ||
| 930 | { | ||
| 931 | if (ir->r != RID_SINK) { | ||
| 932 | RegSet allow = RSET_GPR; | ||
| 933 | Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE; | ||
| 934 | int32_t ofs = 0; | ||
| 935 | if (irt_isnum(ir->t)) { | ||
| 936 | src = ra_alloc1(as, ir->op2, RSET_FPR); | ||
| 937 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd); | ||
| 938 | if (ofs & FUSE_REG) | ||
| 939 | emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx, (src & 31), idx, (ofs &31)); | ||
| 940 | else | ||
| 941 | emit_lso(as, A64I_STRd, (src & 31), idx, ofs); | ||
| 942 | } else { | ||
| 943 | if (!irt_ispri(ir->t)) { | ||
| 944 | src = ra_alloc1(as, ir->op2, allow); | ||
| 945 | rset_clear(allow, src); | ||
| 946 | if (irt_isinteger(ir->t)) | ||
| 947 | type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow); | ||
| 948 | else | ||
| 949 | type = ra_allock(as, irt_toitype(ir->t), allow); | ||
| 950 | } else { | ||
| 951 | tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow); | ||
| 952 | } | ||
| 953 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), | ||
| 954 | A64I_STRx); | ||
| 955 | if (ofs & FUSE_REG) | ||
| 956 | emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx, tmp, idx, (ofs & 31)); | ||
| 957 | else | ||
| 958 | emit_lso(as, A64I_STRx, tmp, idx, ofs); | ||
| 959 | if (ra_hasreg(src)) { | ||
| 960 | if (irt_isinteger(ir->t)) { | ||
| 961 | emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src); | ||
| 962 | } else { | ||
| 963 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type); | ||
| 964 | } | ||
| 965 | } | ||
| 966 | } | ||
| 967 | } | ||
| 968 | } | ||
| 969 | |||
| 970 | static void asm_sload(ASMState *as, IRIns *ir) | ||
| 971 | { | ||
| 972 | int32_t ofs = 8*((int32_t)ir->op1-2); | ||
| 973 | IRType1 t = ir->t; | ||
| 974 | Reg dest = RID_NONE, base; | ||
| 975 | RegSet allow = RSET_GPR; | ||
| 976 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | ||
| 977 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | ||
| 978 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | ||
| 979 | dest = ra_scratch(as, RSET_FPR); | ||
| 980 | asm_tointg(as, ir, dest); | ||
| 981 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | ||
| 982 | } else if (ra_used(ir)) { | ||
| 983 | Reg tmp = RID_NONE; | ||
| 984 | if ((ir->op2 & IRSLOAD_CONVERT)) | ||
| 985 | tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); | ||
| 986 | lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t)); | ||
| 987 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); | ||
| 988 | base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); | ||
| 989 | if (irt_isaddr(t)) { | ||
| 990 | emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); | ||
| 991 | } else if ((ir->op2 & IRSLOAD_CONVERT)) { | ||
| 992 | if (irt_isint(t)) { | ||
| 993 | emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31)); | ||
| 994 | /* If value is already loaded for type check, move it to FPR. */ | ||
| 995 | if ((ir->op2 & IRSLOAD_TYPECHECK)) | ||
| 996 | emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest); | ||
| 997 | else | ||
| 998 | dest = tmp; | ||
| 999 | t.irt = IRT_NUM; /* Check for original type. */ | ||
| 1000 | } else { | ||
| 1001 | emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp); | ||
| 1002 | dest = tmp; | ||
| 1003 | t.irt = IRT_INT; /* Check for original type. */ | ||
| 1004 | } | ||
| 1005 | } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { | ||
| 1006 | emit_dm(as, A64I_MOVw, dest, dest); | ||
| 1007 | } | ||
| 1008 | goto dotypecheck; | ||
| 1009 | } | ||
| 1010 | base = ra_alloc1(as, REF_BASE, allow); | ||
| 1011 | dotypecheck: | ||
| 1012 | rset_clear(allow, base); | ||
| 1013 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | ||
| 1014 | Reg tmp; | ||
| 1015 | if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) { | ||
| 1016 | tmp = dest; | ||
| 1017 | } else { | ||
| 1018 | tmp = ra_scratch(as, allow); | ||
| 1019 | rset_clear(allow, tmp); | ||
| 1020 | } | ||
| 1021 | if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT)) | ||
| 1022 | emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); | ||
| 1023 | /* Need type check, even if the load result is unused. */ | ||
| 1024 | asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); | ||
| 1025 | if (irt_type(t) >= IRT_NUM) { | ||
| 1026 | lua_assert(irt_isinteger(t) || irt_isnum(t)); | ||
| 1027 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), | ||
| 1028 | ra_allock(as, LJ_TISNUM << 15, allow), tmp); | ||
| 1029 | } else if (irt_isnil(t)) { | ||
| 1030 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); | ||
| 1031 | } else if (irt_ispri(t)) { | ||
| 1032 | emit_nm(as, A64I_CMPx, | ||
| 1033 | ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); | ||
| 1034 | } else { | ||
| 1035 | Reg type = ra_scratch(as, allow); | ||
| 1036 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); | ||
| 1037 | emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); | ||
| 1038 | } | ||
| 1039 | emit_lso(as, A64I_LDRx, tmp, base, ofs); | ||
| 1040 | return; | ||
| 1041 | } | ||
| 1042 | if (ra_hasreg(dest)) { | ||
| 1043 | emit_lso(as, irt_isnum(t) ? A64I_LDRd : | ||
| 1044 | (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, ofs); | ||
| 1045 | } | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | /* -- Allocations --------------------------------------------------------- */ | ||
| 1049 | |||
| 1050 | #if LJ_HASFFI | ||
| 1051 | static void asm_cnew(ASMState *as, IRIns *ir) | ||
| 1052 | { | ||
| 1053 | CTState *cts = ctype_ctsG(J2G(as->J)); | ||
| 1054 | CTypeID id = (CTypeID)IR(ir->op1)->i; | ||
| 1055 | CTSize sz; | ||
| 1056 | CTInfo info = lj_ctype_info(cts, id, &sz); | ||
| 1057 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | ||
| 1058 | IRRef args[4]; | ||
| 1059 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
| 1060 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); | ||
| 1061 | |||
| 1062 | as->gcsteps++; | ||
| 1063 | asm_setupresult(as, ir, ci); /* GCcdata * */ | ||
| 1064 | /* Initialize immutable cdata object. */ | ||
| 1065 | if (ir->o == IR_CNEWI) { | ||
| 1066 | int32_t ofs = sizeof(GCcdata); | ||
| 1067 | Reg r = ra_alloc1(as, ir->op2, allow); | ||
| 1068 | lua_assert(sz == 4 || sz == 8); | ||
| 1069 | emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); | ||
| 1070 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
| 1071 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
| 1072 | args[0] = ASMREF_L; /* lua_State *L */ | ||
| 1073 | args[1] = ir->op1; /* CTypeID id */ | ||
| 1074 | args[2] = ir->op2; /* CTSize sz */ | ||
| 1075 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
| 1076 | asm_gencall(as, ci, args); | ||
| 1077 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
| 1078 | return; | ||
| 1079 | } | ||
| 1080 | |||
| 1081 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | ||
| 1082 | { | ||
| 1083 | Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow); | ||
| 1084 | emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); | ||
| 1085 | emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); | ||
| 1086 | emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP); | ||
| 1087 | if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1); | ||
| 1088 | } | ||
| 1089 | args[0] = ASMREF_L; /* lua_State *L */ | ||
| 1090 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
| 1091 | asm_gencall(as, ci, args); | ||
| 1092 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | ||
| 1093 | ra_releasetmp(as, ASMREF_TMP1)); | ||
| 1094 | } | ||
| 1095 | #else | ||
| 1096 | #define asm_cnew(as, ir) ((void)0) | ||
| 1097 | #endif | ||
| 1098 | |||
| 1099 | /* -- Write barriers ------------------------------------------------------ */ | ||
| 1100 | |||
| 1101 | static void asm_tbar(ASMState *as, IRIns *ir) | ||
| 1102 | { | ||
| 1103 | Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 1104 | Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); | ||
| 1105 | Reg gr = ra_allock(as, i64ptr(J2G(as->J)), | ||
| 1106 | rset_exclude(rset_exclude(RSET_GPR, tab), link)); | ||
| 1107 | Reg mark = RID_TMP; | ||
| 1108 | MCLabel l_end = emit_label(as); | ||
| 1109 | emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); | ||
| 1110 | emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); | ||
| 1111 | emit_lso(as, A64I_STRx, tab, gr, | ||
| 1112 | (int32_t)offsetof(global_State, gc.grayagain)); | ||
| 1113 | emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); | ||
| 1114 | emit_lso(as, A64I_LDRx, link, gr, | ||
| 1115 | (int32_t)offsetof(global_State, gc.grayagain)); | ||
| 1116 | emit_cond_branch(as, CC_EQ, l_end); | ||
| 1117 | emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark); | ||
| 1118 | emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | static void asm_obar(ASMState *as, IRIns *ir) | ||
| 1122 | { | ||
| 1123 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; | ||
| 1124 | IRRef args[2]; | ||
| 1125 | MCLabel l_end; | ||
| 1126 | RegSet allow = RSET_GPR; | ||
| 1127 | Reg obj, val, tmp; | ||
| 1128 | /* No need for other object barriers (yet). */ | ||
| 1129 | lua_assert(IR(ir->op1)->o == IR_UREFC); | ||
| 1130 | ra_evictset(as, RSET_SCRATCH); | ||
| 1131 | l_end = emit_label(as); | ||
| 1132 | args[0] = ASMREF_TMP1; /* global_State *g */ | ||
| 1133 | args[1] = ir->op1; /* TValue *tv */ | ||
| 1134 | asm_gencall(as, ci, args); | ||
| 1135 | ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) ); | ||
| 1136 | obj = IR(ir->op1)->r; | ||
| 1137 | tmp = ra_scratch(as, rset_exclude(allow, obj)); | ||
| 1138 | emit_cond_branch(as, CC_EQ, l_end); | ||
| 1139 | emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); | ||
| 1140 | emit_cond_branch(as, CC_EQ, l_end); | ||
| 1141 | emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); | ||
| 1142 | val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); | ||
| 1143 | emit_lso(as, A64I_LDRB, tmp, obj, | ||
| 1144 | (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); | ||
| 1145 | emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); | ||
| 1146 | } | ||
| 1147 | |||
| 1148 | /* -- Arithmetic and logic operations ------------------------------------- */ | ||
| 1149 | |||
| 1150 | static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai) | ||
| 1151 | { | ||
| 1152 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
| 1153 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | ||
| 1154 | right = (left >> 8); left &= 255; | ||
| 1155 | emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31)); | ||
| 1156 | } | ||
| 1157 | |||
| 1158 | static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai) | ||
| 1159 | { | ||
| 1160 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
| 1161 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); | ||
| 1162 | emit_dn(as, ai, (dest & 31), (left & 31)); | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | static void asm_fpmath(ASMState *as, IRIns *ir) | ||
| 1166 | { | ||
| 1167 | IRFPMathOp fpm = (IRFPMathOp)ir->op2; | ||
| 1168 | if (fpm == IRFPM_SQRT) { | ||
| 1169 | asm_fpunary(as, ir, A64I_FSQRTd); | ||
| 1170 | } else if (fpm <= IRFPM_TRUNC) { | ||
| 1171 | asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : | ||
| 1172 | fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); | ||
| 1173 | } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { | ||
| 1174 | return; | ||
| 1175 | } else { | ||
| 1176 | asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); | ||
| 1177 | } | ||
| 1178 | } | ||
| 1179 | |||
| 1180 | static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) | ||
| 1181 | { | ||
| 1182 | IRIns *ir; | ||
| 1183 | if (irref_isk(rref)) | ||
| 1184 | return 0; /* Don't swap constants to the left. */ | ||
| 1185 | if (irref_isk(lref)) | ||
| 1186 | return 1; /* But swap constants to the right. */ | ||
| 1187 | ir = IR(rref); | ||
| 1188 | if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || | ||
| 1189 | (ir->o == IR_ADD && ir->op1 == ir->op2) || | ||
| 1190 | (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) | ||
| 1191 | return 0; /* Don't swap fusable operands to the left. */ | ||
| 1192 | ir = IR(lref); | ||
| 1193 | if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || | ||
| 1194 | (ir->o == IR_ADD && ir->op1 == ir->op2) || | ||
| 1195 | (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) | ||
| 1196 | return 1; /* But swap fusable operands to the right. */ | ||
| 1197 | return 0; /* Otherwise don't swap. */ | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai) | ||
| 1201 | { | ||
| 1202 | IRRef lref = ir->op1, rref = ir->op2; | ||
| 1203 | Reg left, dest = ra_dest(as, ir, RSET_GPR); | ||
| 1204 | uint32_t m; | ||
| 1205 | if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) { | ||
| 1206 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
| 1207 | } | ||
| 1208 | left = ra_hintalloc(as, lref, dest, RSET_GPR); | ||
| 1209 | if (irt_is64(ir->t)) ai |= A64I_X; | ||
| 1210 | m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); | ||
| 1211 | if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ | ||
| 1212 | asm_guardcc(as, CC_VS); | ||
| 1213 | ai |= A64I_S; | ||
| 1214 | } | ||
| 1215 | emit_dn(as, ai^m, dest, left); | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai) | ||
| 1219 | { | ||
| 1220 | if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ | ||
| 1221 | as->flagmcp = NULL; | ||
| 1222 | as->mcp++; | ||
| 1223 | ai |= A64I_S; | ||
| 1224 | } | ||
| 1225 | asm_intop(as, ir, ai); | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | static void asm_intneg(ASMState *as, IRIns *ir) | ||
| 1229 | { | ||
| 1230 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 1231 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
| 1232 | emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left); | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | /* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ | ||
| 1236 | static void asm_intmul(ASMState *as, IRIns *ir) | ||
| 1237 | { | ||
| 1238 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 1239 | Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); | ||
| 1240 | Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
| 1241 | if (irt_isguard(ir->t)) { /* IR_MULOV */ | ||
| 1242 | asm_guardcc(as, CC_NE); | ||
| 1243 | emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ | ||
| 1244 | emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); | ||
| 1245 | emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); | ||
| 1246 | emit_dnm(as, A64I_SMULL, dest, right, left); | ||
| 1247 | } else { | ||
| 1248 | emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); | ||
| 1249 | } | ||
| 1250 | } | ||
| 1251 | |||
| 1252 | static void asm_add(ASMState *as, IRIns *ir) | ||
| 1253 | { | ||
| 1254 | if (irt_isnum(ir->t)) { | ||
| 1255 | asm_fparith(as, ir, A64I_FADDd); | ||
| 1256 | return; | ||
| 1257 | } | ||
| 1258 | asm_intop_s(as, ir, A64I_ADDw); | ||
| 1259 | } | ||
| 1260 | |||
| 1261 | static void asm_sub(ASMState *as, IRIns *ir) | ||
| 1262 | { | ||
| 1263 | if (irt_isnum(ir->t)) { | ||
| 1264 | asm_fparith(as, ir, A64I_FSUBd); | ||
| 1265 | return; | ||
| 1266 | } | ||
| 1267 | asm_intop_s(as, ir, A64I_SUBw); | ||
| 1268 | } | ||
| 1269 | |||
| 1270 | static void asm_mul(ASMState *as, IRIns *ir) | ||
| 1271 | { | ||
| 1272 | if (irt_isnum(ir->t)) { | ||
| 1273 | asm_fparith(as, ir, A64I_FMULd); | ||
| 1274 | return; | ||
| 1275 | } | ||
| 1276 | asm_intmul(as, ir); | ||
| 1277 | } | ||
| 1278 | |||
| 1279 | static void asm_div(ASMState *as, IRIns *ir) | ||
| 1280 | { | ||
| 1281 | #if LJ_HASFFI | ||
| 1282 | if (!irt_isnum(ir->t)) | ||
| 1283 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
| 1284 | IRCALL_lj_carith_divu64); | ||
| 1285 | else | ||
| 1286 | #endif | ||
| 1287 | asm_fparith(as, ir, A64I_FDIVd); | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | static void asm_pow(ASMState *as, IRIns *ir) | ||
| 1291 | { | ||
| 1292 | #if LJ_HASFFI | ||
| 1293 | if (!irt_isnum(ir->t)) | ||
| 1294 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
| 1295 | IRCALL_lj_carith_powu64); | ||
| 1296 | else | ||
| 1297 | #endif | ||
| 1298 | asm_callid(as, ir, IRCALL_lj_vm_powi); | ||
| 1299 | } | ||
| 1300 | |||
| 1301 | #define asm_addov(as, ir) asm_add(as, ir) | ||
| 1302 | #define asm_subov(as, ir) asm_sub(as, ir) | ||
| 1303 | #define asm_mulov(as, ir) asm_mul(as, ir) | ||
| 1304 | |||
| 1305 | #define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) | ||
| 1306 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) | ||
| 1307 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | ||
| 1308 | |||
| 1309 | static void asm_mod(ASMState *as, IRIns *ir) | ||
| 1310 | { | ||
| 1311 | #if LJ_HASFFI | ||
| 1312 | if (!irt_isint(ir->t)) | ||
| 1313 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
| 1314 | IRCALL_lj_carith_modu64); | ||
| 1315 | else | ||
| 1316 | #endif | ||
| 1317 | asm_callid(as, ir, IRCALL_lj_vm_modi); | ||
| 1318 | } | ||
| 1319 | |||
| 1320 | static void asm_neg(ASMState *as, IRIns *ir) | ||
| 1321 | { | ||
| 1322 | if (irt_isnum(ir->t)) { | ||
| 1323 | asm_fpunary(as, ir, A64I_FNEGd); | ||
| 1324 | return; | ||
| 1325 | } | ||
| 1326 | asm_intneg(as, ir); | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | static void asm_bitop(ASMState *as, IRIns *ir, A64Ins ai) | ||
| 1330 | { | ||
| 1331 | if (as->flagmcp == as->mcp && ai == A64I_ANDw) { | ||
| 1332 | /* Try to drop cmp r, #0. */ | ||
| 1333 | as->flagmcp = NULL; | ||
| 1334 | as->mcp++; | ||
| 1335 | ai += A64I_ANDSw - A64I_ANDw; | ||
| 1336 | } | ||
| 1337 | if (ir->op2 == 0) { | ||
| 1338 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 1339 | uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); | ||
| 1340 | if (irt_is64(ir->t)) ai |= A64I_X; | ||
| 1341 | emit_d(as, ai^m, dest); | ||
| 1342 | } else { | ||
| 1343 | asm_intop(as, ir, ai); | ||
| 1344 | } | ||
| 1345 | } | ||
| 1346 | |||
| 1347 | #define asm_bnot(as, ir) asm_bitop(as, ir, A64I_MVNw) | ||
| 1348 | #define asm_band(as, ir) asm_bitop(as, ir, A64I_ANDw) | ||
| 1349 | #define asm_bor(as, ir) asm_bitop(as, ir, A64I_ORRw) | ||
| 1350 | #define asm_bxor(as, ir) asm_bitop(as, ir, A64I_EORw) | ||
| 1351 | |||
| 1352 | static void asm_bswap(ASMState *as, IRIns *ir) | ||
| 1353 | { | ||
| 1354 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 1355 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 1356 | emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left); | ||
| 1357 | } | ||
| 1358 | |||
| 1359 | static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) | ||
| 1360 | { | ||
| 1361 | int shmask = irt_is64(ir->t) ? 63 : 31; | ||
| 1362 | if (irref_isk(ir->op2)) { /* Constant shifts. */ | ||
| 1363 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 1364 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 1365 | int32_t shift = (IR(ir->op2)->i & shmask); | ||
| 1366 | |||
| 1367 | if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; | ||
| 1368 | switch (sh) { | ||
| 1369 | case A64SH_LSL: | ||
| 1370 | emit_dn(as, ai | A64F_IMMS(shmask-shift) | A64F_IMMR(shmask-shift+1), dest, left); | ||
| 1371 | break; | ||
| 1372 | case A64SH_LSR: case A64SH_ASR: | ||
| 1373 | emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); | ||
| 1374 | break; | ||
| 1375 | case A64SH_ROR: | ||
| 1376 | emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left); | ||
| 1377 | break; | ||
| 1378 | } | ||
| 1379 | } else { /* Variable-length shifts. */ | ||
| 1380 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 1381 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 1382 | Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
| 1383 | emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right); | ||
| 1384 | } | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | #define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL) | ||
| 1388 | #define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) | ||
| 1389 | #define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) | ||
| 1390 | #define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) | ||
| 1391 | #define asm_brol(as, ir) lua_assert(0) | ||
| 1392 | |||
| 1393 | static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) | ||
| 1394 | { | ||
| 1395 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 1396 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
| 1397 | Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
| 1398 | emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right); | ||
| 1399 | emit_nm(as, A64I_CMPw, left, right); | ||
| 1400 | } | ||
| 1401 | |||
| 1402 | static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) | ||
| 1403 | { | ||
| 1404 | Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); | ||
| 1405 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | ||
| 1406 | right = ((left >> 8) & 31); left &= 31; | ||
| 1407 | emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right); | ||
| 1408 | emit_nm(as, A64I_FCMPd, left, right); | ||
| 1409 | } | ||
| 1410 | |||
| 1411 | static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) | ||
| 1412 | { | ||
| 1413 | if (irt_isnum(ir->t)) | ||
| 1414 | asm_fpmin_max(as, ir, fcc); | ||
| 1415 | else | ||
| 1416 | asm_intmin_max(as, ir, cc); | ||
| 1417 | } | ||
| 1418 | |||
| 1419 | #define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) | ||
| 1420 | #define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) | ||
| 1421 | |||
| 1422 | /* -- Comparisons --------------------------------------------------------- */ | ||
| 1423 | |||
| 1424 | /* Map of comparisons to flags. ORDER IR. */ | ||
| 1425 | static const uint8_t asm_compmap[IR_ABC+1] = { | ||
| 1426 | /* op FP swp int cc FP cc */ | ||
| 1427 | /* LT */ CC_GE + (CC_HS << 4), | ||
| 1428 | /* GE x */ CC_LT + (CC_HI << 4), | ||
| 1429 | /* LE */ CC_GT + (CC_HI << 4), | ||
| 1430 | /* GT x */ CC_LE + (CC_HS << 4), | ||
| 1431 | /* ULT x */ CC_HS + (CC_LS << 4), | ||
| 1432 | /* UGE */ CC_LO + (CC_LO << 4), | ||
| 1433 | /* ULE x */ CC_HI + (CC_LO << 4), | ||
| 1434 | /* UGT */ CC_LS + (CC_LS << 4), | ||
| 1435 | /* EQ */ CC_NE + (CC_NE << 4), | ||
| 1436 | /* NE */ CC_EQ + (CC_EQ << 4), | ||
| 1437 | /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ | ||
| 1438 | }; | ||
| 1439 | |||
| 1440 | /* FP comparisons. */ | ||
| 1441 | static void asm_fpcomp(ASMState *as, IRIns *ir) | ||
| 1442 | { | ||
| 1443 | Reg left, right; | ||
| 1444 | A64Ins ai; | ||
| 1445 | int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); | ||
| 1446 | if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { | ||
| 1447 | left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31); | ||
| 1448 | right = 0; | ||
| 1449 | ai = A64I_FCMPZd; | ||
| 1450 | } else { | ||
| 1451 | left = ra_alloc2(as, ir, RSET_FPR); | ||
| 1452 | if (swp) { | ||
| 1453 | right = (left & 31); left = ((left >> 8) & 31); | ||
| 1454 | } else { | ||
| 1455 | right = ((left >> 8) & 31); left &= 31; | ||
| 1456 | } | ||
| 1457 | ai = A64I_FCMPd; | ||
| 1458 | } | ||
| 1459 | asm_guardcc(as, (asm_compmap[ir->o] >> 4)); | ||
| 1460 | emit_nm(as, ai, left, right); | ||
| 1461 | } | ||
| 1462 | |||
| 1463 | /* Integer comparisons. */ | ||
| 1464 | static void asm_intcomp(ASMState *as, IRIns *ir) | ||
| 1465 | { | ||
| 1466 | A64CC oldcc, cc = (asm_compmap[ir->o] & 15); | ||
| 1467 | A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw; | ||
| 1468 | IRRef lref = ir->op1, rref = ir->op2; | ||
| 1469 | Reg left; | ||
| 1470 | uint32_t m; | ||
| 1471 | int cmpprev0 = 0; | ||
| 1472 | lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || | ||
| 1473 | irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); | ||
| 1474 | if (asm_swapops(as, lref, rref)) { | ||
| 1475 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
| 1476 | if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ | ||
| 1477 | else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ | ||
| 1478 | } | ||
| 1479 | oldcc = cc; | ||
| 1480 | if (irref_isk(rref) && IR(rref)->i == 0) { | ||
| 1481 | IRIns *irl = IR(lref); | ||
| 1482 | if (cc == CC_GE) cc = CC_PL; | ||
| 1483 | else if (cc == CC_LT) cc = CC_MI; | ||
| 1484 | else if (cc > CC_NE) goto notst; /* Other conds don't work with tst. */ | ||
| 1485 | cmpprev0 = (irl+1 == ir); | ||
| 1486 | /* Combine comp(BAND(left, right), 0) into tst left, right. */ | ||
| 1487 | if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { | ||
| 1488 | IRRef blref = irl->op1, brref = irl->op2; | ||
| 1489 | uint32_t m2 = 0; | ||
| 1490 | Reg bleft; | ||
| 1491 | if (asm_swapops(as, blref, brref)) { | ||
| 1492 | Reg tmp = blref; blref = brref; brref = tmp; | ||
| 1493 | } | ||
| 1494 | if (irref_isk(brref)) { | ||
| 1495 | /* NYI: use tbz/tbnz, if applicable. */ | ||
| 1496 | m2 = emit_isk13(IR(brref)->i, irt_is64(irl->t)); | ||
| 1497 | if (!m2) | ||
| 1498 | goto notst; /* Not beneficial if we miss a constant operand. */ | ||
| 1499 | } | ||
| 1500 | bleft = ra_alloc1(as, blref, RSET_GPR); | ||
| 1501 | ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); | ||
| 1502 | if (!m2) | ||
| 1503 | m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); | ||
| 1504 | asm_guardcc(as, cc); | ||
| 1505 | emit_n(as, ai^m2, bleft); | ||
| 1506 | return; | ||
| 1507 | } | ||
| 1508 | /* NYI: use cbz/cbnz for EQ/NE 0. */ | ||
| 1509 | } | ||
| 1510 | notst: | ||
| 1511 | left = ra_alloc1(as, lref, RSET_GPR); | ||
| 1512 | m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); | ||
| 1513 | asm_guardcc(as, cc); | ||
| 1514 | emit_n(as, ai^m, left); | ||
| 1515 | /* Signed comparison with zero and referencing previous ins? */ | ||
| 1516 | if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE)) | ||
| 1517 | as->flagmcp = as->mcp; /* Allow elimination of the compare. */ | ||
| 1518 | } | ||
| 1519 | |||
| 1520 | static void asm_comp(ASMState *as, IRIns *ir) | ||
| 1521 | { | ||
| 1522 | if (irt_isnum(ir->t)) | ||
| 1523 | asm_fpcomp(as, ir); | ||
| 1524 | else | ||
| 1525 | asm_intcomp(as, ir); | ||
| 1526 | } | ||
| 1527 | |||
| 1528 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
| 1529 | |||
| 1530 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | ||
| 1531 | |||
| 1532 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | ||
| 1533 | static void asm_hiop(ASMState *as, IRIns *ir) | ||
| 1534 | { | ||
| 1535 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */ | ||
| 1536 | } | ||
| 1537 | |||
| 1538 | /* -- Profiling ----------------------------------------------------------- */ | ||
| 1539 | |||
| 1540 | static void asm_prof(ASMState *as, IRIns *ir) | ||
| 1541 | { | ||
| 1542 | uint32_t k = emit_isk13(HOOK_PROFILE, 0); | ||
| 1543 | lua_assert(k != 0); | ||
| 1544 | UNUSED(ir); | ||
| 1545 | asm_guardcc(as, CC_NE); | ||
| 1546 | emit_n(as, A64I_TSTw^k, RID_TMP); | ||
| 1547 | emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); | ||
| 1548 | } | ||
| 1549 | |||
| 1550 | /* -- Stack handling ------------------------------------------------------ */ | ||
| 1551 | |||
| 1552 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | ||
| 1553 | static void asm_stack_check(ASMState *as, BCReg topslot, | ||
| 1554 | IRIns *irp, RegSet allow, ExitNo exitno) | ||
| 1555 | { | ||
| 1556 | Reg pbase; | ||
| 1557 | uint32_t k; | ||
| 1558 | if (irp) { | ||
| 1559 | if (!ra_hasspill(irp->s)) { | ||
| 1560 | pbase = irp->r; | ||
| 1561 | lua_assert(ra_hasreg(pbase)); | ||
| 1562 | } else if (allow) { | ||
| 1563 | pbase = rset_pickbot(allow); | ||
| 1564 | } else { | ||
| 1565 | pbase = RID_RET; | ||
| 1566 | emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ | ||
| 1567 | } | ||
| 1568 | } else { | ||
| 1569 | pbase = RID_BASE; | ||
| 1570 | } | ||
| 1571 | emit_branch(as, A64I_BL, exitstub_addr(as->J, exitno)); | ||
| 1572 | emit_cond_branch(as, CC_LS^1, as->mcp+1); | ||
| 1573 | k = emit_isk12((8*topslot)); | ||
| 1574 | lua_assert(k); | ||
| 1575 | emit_n(as, A64I_CMPx^k, RID_TMP); | ||
| 1576 | emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); | ||
| 1577 | emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, | ||
| 1578 | (int32_t)offsetof(lua_State, maxstack)); | ||
| 1579 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ | ||
| 1580 | if (ra_hasspill(irp->s)) | ||
| 1581 | emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); | ||
| 1582 | emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); | ||
| 1583 | if (ra_hasspill(irp->s) && !allow) | ||
| 1584 | emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ | ||
| 1585 | } else { | ||
| 1586 | emit_getgl(as, RID_TMP, cur_L); | ||
| 1587 | } | ||
| 1588 | } | ||
| 1589 | |||
| 1590 | /* Restore Lua stack from on-trace state. */ | ||
| 1591 | static void asm_stack_restore(ASMState *as, SnapShot *snap) | ||
| 1592 | { | ||
| 1593 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | ||
| 1594 | #ifdef LUA_USE_ASSERT | ||
| 1595 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; | ||
| 1596 | #endif | ||
| 1597 | MSize n, nent = snap->nent; | ||
| 1598 | /* Store the value of all modified slots to the Lua stack. */ | ||
| 1599 | for (n = 0; n < nent; n++) { | ||
| 1600 | SnapEntry sn = map[n]; | ||
| 1601 | BCReg s = snap_slot(sn); | ||
| 1602 | int32_t ofs = 8*((int32_t)s-1-LJ_FR2); | ||
| 1603 | IRRef ref = snap_ref(sn); | ||
| 1604 | IRIns *ir = IR(ref); | ||
| 1605 | if ((sn & SNAP_NORESTORE)) | ||
| 1606 | continue; | ||
| 1607 | if (irt_isnum(ir->t)) { | ||
| 1608 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
| 1609 | emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); | ||
| 1610 | } else { | ||
| 1611 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | ||
| 1612 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); | ||
| 1613 | if (!irref_isk(ref)) { | ||
| 1614 | Reg type, src; | ||
| 1615 | if (irt_is64(ir->t)) { | ||
| 1616 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
| 1617 | src = ra_alloc1(as, ref, rset_exclude(allow, type)); | ||
| 1618 | emit_lso(as, A64I_STRx, RID_TMP, RID_BASE, ofs); | ||
| 1619 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type); | ||
| 1620 | } else if (irt_isinteger(ir->t)) { | ||
| 1621 | type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow); | ||
| 1622 | src = ra_alloc1(as, ref, rset_exclude(allow, type)); | ||
| 1623 | emit_lso(as, A64I_STRx, RID_TMP, RID_BASE, ofs); | ||
| 1624 | emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src); | ||
| 1625 | } else { | ||
| 1626 | type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); | ||
| 1627 | emit_lso(as, A64I_STRx, type, RID_BASE, ofs); | ||
| 1628 | } | ||
| 1629 | } else { | ||
| 1630 | TValue k; | ||
| 1631 | lj_ir_kvalue(as->J->L, &k, ir); | ||
| 1632 | emit_lso(as, A64I_STRx, | ||
| 1633 | ra_allock(as, tvisnil(&k) ? -1 : (int64_t)k.u64, allow), | ||
| 1634 | RID_BASE, ofs); | ||
| 1635 | } | ||
| 1636 | } | ||
| 1637 | checkmclim(as); | ||
| 1638 | } | ||
| 1639 | lua_assert(map + nent == flinks); | ||
| 1640 | } | ||
| 1641 | |||
| 1642 | /* -- GC handling --------------------------------------------------------- */ | ||
| 1643 | |||
| 1644 | /* Check GC threshold and do one or more GC steps. */ | ||
| 1645 | static void asm_gc_check(ASMState *as) | ||
| 1646 | { | ||
| 1647 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; | ||
| 1648 | IRRef args[2]; | ||
| 1649 | MCLabel l_end; | ||
| 1650 | Reg tmp1, tmp2; | ||
| 1651 | ra_evictset(as, RSET_SCRATCH); | ||
| 1652 | l_end = emit_label(as); | ||
| 1653 | /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ | ||
| 1654 | asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ | ||
| 1655 | emit_n(as, A64I_CMPx^A64I_K12, RID_RET); | ||
| 1656 | args[0] = ASMREF_TMP1; /* global_State *g */ | ||
| 1657 | args[1] = ASMREF_TMP2; /* MSize steps */ | ||
| 1658 | asm_gencall(as, ci, args); | ||
| 1659 | tmp1 = ra_releasetmp(as, ASMREF_TMP1); | ||
| 1660 | tmp2 = ra_releasetmp(as, ASMREF_TMP2); | ||
| 1661 | emit_loadi(as, tmp2, as->gcsteps); | ||
| 1662 | /* Jump around GC step if GC total < GC threshold. */ | ||
| 1663 | emit_cond_branch(as, CC_LS, l_end); | ||
| 1664 | emit_nm(as, A64I_CMPx, RID_TMP, tmp2); | ||
| 1665 | emit_lso(as, A64I_LDRx, tmp2, tmp1, | ||
| 1666 | (int32_t)offsetof(global_State, gc.threshold)); | ||
| 1667 | emit_lso(as, A64I_LDRx, RID_TMP, tmp1, | ||
| 1668 | (int32_t)offsetof(global_State, gc.total)); | ||
| 1669 | ra_allockreg(as, i64ptr(J2G(as->J)), tmp1); | ||
| 1670 | as->gcsteps = 0; | ||
| 1671 | checkmclim(as); | ||
| 1672 | } | ||
| 1673 | |||
| 1674 | /* -- Loop handling ------------------------------------------------------- */ | ||
| 1675 | |||
| 1676 | /* Fixup the loop branch. */ | ||
| 1677 | static void asm_loop_fixup(ASMState *as) | ||
| 1678 | { | ||
| 1679 | MCode *p = as->mctop; | ||
| 1680 | MCode *target = as->mcp; | ||
| 1681 | if (as->loopinv) { /* Inverted loop branch? */ | ||
| 1682 | ptrdiff_t delta = target - (p - 2); | ||
| 1683 | lua_assert(((delta + 0x40000) >> 19) == 0); | ||
| 1684 | /* asm_guardcc already inverted the b.cc and patched the final bl. */ | ||
| 1685 | p[-2] |= ((uint32_t)delta & 0x7ffff) << 5; | ||
| 1686 | } else { | ||
| 1687 | ptrdiff_t delta = target - (p - 1); | ||
| 1688 | p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu); | ||
| 1689 | } | ||
| 1690 | } | ||
| 1691 | |||
| 1692 | /* -- Head of trace ------------------------------------------------------- */ | ||
| 1693 | |||
| 1694 | /* Reload L register from g->cur_L. */ | ||
| 1695 | static void asm_head_lreg(ASMState *as) | ||
| 1696 | { | ||
| 1697 | IRIns *ir = IR(ASMREF_L); | ||
| 1698 | if (ra_used(ir)) { | ||
| 1699 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
| 1700 | emit_getgl(as, r, cur_L); | ||
| 1701 | ra_evictk(as); | ||
| 1702 | } | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | /* Coalesce BASE register for a root trace. */ | ||
| 1706 | static void asm_head_root_base(ASMState *as) | ||
| 1707 | { | ||
| 1708 | IRIns *ir; | ||
| 1709 | asm_head_lreg(as); | ||
| 1710 | ir = IR(REF_BASE); | ||
| 1711 | if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) | ||
| 1712 | ra_spill(as, ir); | ||
| 1713 | ra_destreg(as, ir, RID_BASE); | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | /* Coalesce BASE register for a side trace. */ | ||
| 1717 | static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) | ||
| 1718 | { | ||
| 1719 | IRIns *ir; | ||
| 1720 | asm_head_lreg(as); | ||
| 1721 | ir = IR(REF_BASE); | ||
| 1722 | if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) | ||
| 1723 | ra_spill(as, ir); | ||
| 1724 | if (ra_hasspill(irp->s)) { | ||
| 1725 | rset_clear(allow, ra_dest(as, ir, allow)); | ||
| 1726 | } else { | ||
| 1727 | Reg r = irp->r; | ||
| 1728 | lua_assert(ra_hasreg(r)); | ||
| 1729 | rset_clear(allow, r); | ||
| 1730 | if (r != ir->r && !rset_test(as->freeset, r)) | ||
| 1731 | ra_restore(as, regcost_ref(as->cost[r])); | ||
| 1732 | ra_destreg(as, ir, r); | ||
| 1733 | } | ||
| 1734 | return allow; | ||
| 1735 | } | ||
| 1736 | |||
| 1737 | /* -- Tail of trace ------------------------------------------------------- */ | ||
| 1738 | |||
| 1739 | /* Fixup the tail code. */ | ||
| 1740 | static void asm_tail_fixup(ASMState *as, TraceNo lnk) | ||
| 1741 | { | ||
| 1742 | MCode *p = as->mctop; | ||
| 1743 | MCode *target; | ||
| 1744 | /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ | ||
| 1745 | int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); | ||
| 1746 | if (spadj == 0) { | ||
| 1747 | as->mctop = --p; | ||
| 1748 | } else { | ||
| 1749 | /* Patch stack adjustment. */ | ||
| 1750 | uint32_t k = emit_isk12(spadj); | ||
| 1751 | lua_assert(k); | ||
| 1752 | p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); | ||
| 1753 | } | ||
| 1754 | /* Patch exit branch. */ | ||
| 1755 | target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; | ||
| 1756 | p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu); | ||
| 1757 | } | ||
| 1758 | |||
| 1759 | /* Prepare tail of code. */ | ||
| 1760 | static void asm_tail_prep(ASMState *as) | ||
| 1761 | { | ||
| 1762 | MCode *p = as->mctop - 1; /* Leave room for exit branch. */ | ||
| 1763 | if (as->loopref) { | ||
| 1764 | as->invmcp = as->mcp = p; | ||
| 1765 | } else { | ||
| 1766 | as->mcp = p-1; /* Leave room for stack pointer adjustment. */ | ||
| 1767 | as->invmcp = NULL; | ||
| 1768 | } | ||
| 1769 | *p = 0; /* Prevent load/store merging. */ | ||
| 1770 | } | ||
| 1771 | |||
| 1772 | /* -- Trace setup --------------------------------------------------------- */ | ||
| 1773 | |||
| 1774 | /* Ensure there are enough stack slots for call arguments. */ | ||
| 1775 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | ||
| 1776 | { | ||
| 1777 | IRRef args[CCI_NARGS_MAX*2]; | ||
| 1778 | uint32_t i, nargs = CCI_XNARGS(ci); | ||
| 1779 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; | ||
| 1780 | asm_collectargs(as, ir, ci, args); | ||
| 1781 | for (i = 0; i < nargs; i++) { | ||
| 1782 | if (args[i] && irt_isfp(IR(args[i])->t)) { | ||
| 1783 | if (nfpr > 0) nfpr--; else nslots += 2; | ||
| 1784 | } else { | ||
| 1785 | if (ngpr > 0) ngpr--; else nslots += 2; | ||
| 1786 | } | ||
| 1787 | } | ||
| 1788 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ | ||
| 1789 | as->evenspill = nslots; | ||
| 1790 | return REGSP_HINT(RID_RET); | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | static void asm_setup_target(ASMState *as) | ||
| 1794 | { | ||
| 1795 | /* May need extra exit for asm_stack_check on side traces. */ | ||
| 1796 | asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); | ||
| 1797 | } | ||
| 1798 | |||
| 1799 | /* -- Trace patching ------------------------------------------------------ */ | ||
| 1800 | |||
| 1801 | /* Patch exit jumps of existing machine code to a new target. */ | ||
| 1802 | void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | ||
| 1803 | { | ||
| 1804 | MCode *p = T->mcode; | ||
| 1805 | MCode *pe = (MCode *)((char *)p + T->szmcode); | ||
| 1806 | MCode *cstart = NULL, *cend = p; | ||
| 1807 | MCode *mcarea = lj_mcode_patch(J, p, 0); | ||
| 1808 | MCode *px = exitstub_addr(J, exitno); | ||
| 1809 | for (; p < pe; p++) { | ||
| 1810 | /* Look for bl exitstub, replace with b target. */ | ||
| 1811 | uint32_t ins = *p; | ||
| 1812 | if ((ins & 0xfc000000u) == 0x94000000u && | ||
| 1813 | ((ins ^ (px-p)) & 0x03ffffffu) == 0) { | ||
| 1814 | *p = (ins & 0x7c000000u) | ((target-p) & 0x03ffffffu); | ||
| 1815 | cend = p+1; | ||
| 1816 | if (!cstart) cstart = p; | ||
| 1817 | } | ||
| 1818 | } | ||
| 1819 | lua_assert(cstart != NULL); | ||
| 1820 | lj_mcode_sync(cstart, cend); | ||
| 1821 | lj_mcode_patch(J, mcarea, 1); | ||
| 1822 | } | ||
| 1823 | |||
diff --git a/src/lj_ccall.c b/src/lj_ccall.c index b599be33..a3ae8b05 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c | |||
| @@ -331,7 +331,7 @@ | |||
| 331 | 331 | ||
| 332 | #define CCALL_HANDLE_COMPLEXARG \ | 332 | #define CCALL_HANDLE_COMPLEXARG \ |
| 333 | /* Pass complex by value in separate (!) FPRs or on stack. */ \ | 333 | /* Pass complex by value in separate (!) FPRs or on stack. */ \ |
| 334 | isfp = ctr->size == 2*sizeof(float) ? 2 : 1; | 334 | isfp = sz == 2*sizeof(float) ? 2 : 1; |
| 335 | 335 | ||
| 336 | #define CCALL_HANDLE_REGARG \ | 336 | #define CCALL_HANDLE_REGARG \ |
| 337 | if (LJ_TARGET_IOS && isva) { \ | 337 | if (LJ_TARGET_IOS && isva) { \ |
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 82708077..362d6202 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h | |||
| @@ -107,6 +107,7 @@ typedef struct GG_State { | |||
| 107 | #define J2G(J) (&J2GG(J)->g) | 107 | #define J2G(J) (&J2GG(J)->g) |
| 108 | #define G2J(gl) (&G2GG(gl)->J) | 108 | #define G2J(gl) (&G2GG(gl)->J) |
| 109 | #define L2J(L) (&L2GG(L)->J) | 109 | #define L2J(L) (&L2GG(L)->J) |
| 110 | #define GG_G2J (GG_OFS(J) - GG_OFS(g)) | ||
| 110 | #define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) | 111 | #define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) |
| 111 | #define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) | 112 | #define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) |
| 112 | #define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) | 113 | #define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) |
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h new file mode 100644 index 00000000..eb8f7fc7 --- /dev/null +++ b/src/lj_emit_arm64.h | |||
| @@ -0,0 +1,397 @@ | |||
| 1 | /* | ||
| 2 | ** ARM64 instruction emitter. | ||
| 3 | ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h | ||
| 4 | ** | ||
| 5 | ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
| 6 | ** Sponsored by Cisco Systems, Inc. | ||
| 7 | */ | ||
| 8 | |||
| 9 | /* -- Constant encoding --------------------------------------------------- */ | ||
| 10 | |||
| 11 | static uint64_t get_k64val(IRIns *ir) | ||
| 12 | { | ||
| 13 | if (ir->o == IR_KINT64) { | ||
| 14 | return ir_kint64(ir)->u64; | ||
| 15 | } else if (ir->o == IR_KGC) { | ||
| 16 | return (uint64_t)ir_kgc(ir); | ||
| 17 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
| 18 | return (uint64_t)ir_kptr(ir); | ||
| 19 | } else { | ||
| 20 | lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); | ||
| 21 | return ir->i; /* Sign-extended. */ | ||
| 22 | } | ||
| 23 | } | ||
| 24 | |||
| 25 | /* Encode constant in K12 format for data processing instructions. */ | ||
| 26 | static uint32_t emit_isk12(int64_t n) | ||
| 27 | { | ||
| 28 | uint64_t k = (n < 0) ? -n : n; | ||
| 29 | uint32_t m = (n < 0) ? 0x40000000 : 0; | ||
| 30 | if (k < 0x1000) { | ||
| 31 | return A64I_K12|m|A64F_U12(k); | ||
| 32 | } else if ((k & 0xfff000) == k) { | ||
| 33 | return A64I_K12|m|0x400000|A64F_U12(k>>12); | ||
| 34 | } | ||
| 35 | return 0; | ||
| 36 | } | ||
| 37 | |||
| 38 | #define emit_clz64(n) __builtin_clzll(n) | ||
| 39 | #define emit_ctz64(n) __builtin_ctzll(n) | ||
| 40 | |||
| 41 | /* Encode constant in K13 format for logical data processing instructions. */ | ||
| 42 | static uint32_t emit_isk13(uint64_t n, int is64) | ||
| 43 | { | ||
| 44 | int inv = 0, w = 128, lz, tz; | ||
| 45 | if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ | ||
| 46 | if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ | ||
| 47 | do { /* Find the repeat width. */ | ||
| 48 | if (is64 && (uint32_t)(n^(n>>32))) break; | ||
| 49 | n = (uint32_t)n; w = 32; if ((n^(n>>16)) & 0xffff) break; | ||
| 50 | n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; | ||
| 51 | n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; | ||
| 52 | n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; | ||
| 53 | n = n & 0x3; w = 2; | ||
| 54 | } while (0); | ||
| 55 | lz = emit_clz64(n); | ||
| 56 | tz = emit_ctz64(n); | ||
| 57 | if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ | ||
| 58 | if (inv) | ||
| 59 | return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); | ||
| 60 | else | ||
| 61 | return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); | ||
| 62 | } | ||
| 63 | |||
| 64 | static uint32_t emit_isfpk64(uint64_t n) | ||
| 65 | { | ||
| 66 | uint64_t etop9 = ((n >> 54) & 0x1ff); | ||
| 67 | if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) { | ||
| 68 | return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80)); | ||
| 69 | } | ||
| 70 | return ~0u; | ||
| 71 | } | ||
| 72 | |||
| 73 | /* -- Emit basic instructions --------------------------------------------- */ | ||
| 74 | |||
| 75 | static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) | ||
| 76 | { | ||
| 77 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); | ||
| 78 | } | ||
| 79 | |||
| 80 | static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm) | ||
| 81 | { | ||
| 82 | *--as->mcp = ai | A64F_D(rd) | A64F_M(rm); | ||
| 83 | } | ||
| 84 | |||
| 85 | static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn) | ||
| 86 | { | ||
| 87 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn); | ||
| 88 | } | ||
| 89 | |||
| 90 | static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm) | ||
| 91 | { | ||
| 92 | *--as->mcp = ai | A64F_N(rn) | A64F_M(rm); | ||
| 93 | } | ||
| 94 | |||
| 95 | static void emit_d(ASMState *as, A64Ins ai, Reg rd) | ||
| 96 | { | ||
| 97 | *--as->mcp = ai | A64F_D(rd); | ||
| 98 | } | ||
| 99 | |||
| 100 | static void emit_n(ASMState *as, A64Ins ai, Reg rn) | ||
| 101 | { | ||
| 102 | *--as->mcp = ai | A64F_N(rn); | ||
| 103 | } | ||
| 104 | |||
| 105 | static int emit_checkofs(A64Ins ai, int64_t ofs) | ||
| 106 | { | ||
| 107 | int scale = (ai >> 30) & 3; | ||
| 108 | if (ofs < 0 || (ofs & ((1<<scale)-1))) { | ||
| 109 | return (ofs >= -256 && ofs <= 255) ? -1 : 0; | ||
| 110 | } else { | ||
| 111 | return (ofs < (4096<<scale)) ? 1 : 0; | ||
| 112 | } | ||
| 113 | } | ||
| 114 | |||
| 115 | static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) | ||
| 116 | { | ||
| 117 | int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; | ||
| 118 | lua_assert(ot); | ||
| 119 | /* Combine LDR/STR pairs to LDP/STP. */ | ||
| 120 | if ((sc == 2 || sc == 3) && | ||
| 121 | (!(ai & 0x400000) || rd != rn) && | ||
| 122 | as->mcp != as->mcloop) { | ||
| 123 | uint32_t prev = *as->mcp & ~A64F_D(31); | ||
| 124 | int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc); | ||
| 125 | A64Ins aip; | ||
| 126 | if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) || | ||
| 127 | prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { | ||
| 128 | aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); | ||
| 129 | } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || | ||
| 130 | prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { | ||
| 131 | aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); | ||
| 132 | ofsm = ofs; | ||
| 133 | } else { | ||
| 134 | goto nopair; | ||
| 135 | } | ||
| 136 | if (ofsm >= (-64<<sc) && ofsm <= (63<<sc)) { | ||
| 137 | *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | | ||
| 138 | (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); | ||
| 139 | return; | ||
| 140 | } | ||
| 141 | } | ||
| 142 | nopair: | ||
| 143 | if (ot == 1) | ||
| 144 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc); | ||
| 145 | else | ||
| 146 | *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff); | ||
| 147 | } | ||
| 148 | |||
| 149 | /* -- Emit loads/stores --------------------------------------------------- */ | ||
| 150 | |||
| 151 | /* Prefer rematerialization of BASE/L from global_State over spills. */ | ||
| 152 | #define emit_canremat(ref) ((ref) <= ASMREF_L) | ||
| 153 | |||
| 154 | /* Try to find an N-step delta relative to other consts with N < lim. */ | ||
| 155 | static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) | ||
| 156 | { | ||
| 157 | RegSet work = ~as->freeset & RSET_GPR; | ||
| 158 | if (lim <= 1) return 0; /* Can't beat that. */ | ||
| 159 | while (work) { | ||
| 160 | Reg r = rset_picktop(work); | ||
| 161 | IRRef ref = regcost_ref(as->cost[r]); | ||
| 162 | lua_assert(r != rd); | ||
| 163 | if (ref < REF_TRUE) { | ||
| 164 | uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : | ||
| 165 | get_k64val(IR(ref)); | ||
| 166 | int64_t delta = (int64_t)(k - kx); | ||
| 167 | if (delta == 0) { | ||
| 168 | emit_dm(as, A64I_MOVx, rd, r); | ||
| 169 | return 1; | ||
| 170 | } else { | ||
| 171 | uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta); | ||
| 172 | if (k12) { | ||
| 173 | emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); | ||
| 174 | return 1; | ||
| 175 | } | ||
| 176 | /* Do other ops or multi-step deltas pay off? Probably not. | ||
| 177 | ** E.g. XOR rarely helps with pointer consts. | ||
| 178 | */ | ||
| 179 | } | ||
| 180 | } | ||
| 181 | rset_clear(work, r); | ||
| 182 | } | ||
| 183 | return 0; /* Failed. */ | ||
| 184 | } | ||
| 185 | |||
| 186 | static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) | ||
| 187 | { | ||
| 188 | uint32_t k13 = emit_isk13(u64, is64); | ||
| 189 | if (k13) { /* Can the constant be represented as a bitmask immediate? */ | ||
| 190 | emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); | ||
| 191 | } else { | ||
| 192 | int i, zeros = 0, ones = 0, neg; | ||
| 193 | if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ | ||
| 194 | /* Count homogeneous 16 bit fragments. */ | ||
| 195 | for (i = 0; i < 4; i++) { | ||
| 196 | uint64_t frag = (u64 >> i*16) & 0xffff; | ||
| 197 | zeros += (frag == 0); | ||
| 198 | ones += (frag == 0xffff); | ||
| 199 | } | ||
| 200 | neg = ones > zeros; /* Use MOVN if it pays off. */ | ||
| 201 | if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { | ||
| 202 | int shift = 0, lshift = 0; | ||
| 203 | uint64_t n64 = neg ? ~u64 : u64; | ||
| 204 | if (n64 != 0) { | ||
| 205 | /* Find first/last fragment to be filled. */ | ||
| 206 | shift = (63-emit_clz64(n64)) & ~15; | ||
| 207 | lshift = emit_ctz64(n64) & ~15; | ||
| 208 | } | ||
| 209 | /* MOVK requires the original value (u64). */ | ||
| 210 | while (shift > lshift) { | ||
| 211 | uint32_t u16 = (u64 >> shift) & 0xffff; | ||
| 212 | /* Skip fragments that are correctly filled by MOVN/MOVZ. */ | ||
| 213 | if (u16 != (neg ? 0xffff : 0)) | ||
| 214 | emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); | ||
| 215 | shift -= 16; | ||
| 216 | } | ||
| 217 | /* But MOVN needs an inverted value (n64). */ | ||
| 218 | emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | | ||
| 219 | A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); | ||
| 220 | } | ||
| 221 | } | ||
| 222 | } | ||
| 223 | |||
| 224 | /* Load a 32 bit constant into a GPR. */ | ||
| 225 | #define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) | ||
| 226 | |||
| 227 | /* Load a 64 bit constant into a GPR. */ | ||
| 228 | #define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) | ||
| 229 | |||
| 230 | #define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) | ||
| 231 | |||
| 232 | #define glofs(as, k) \ | ||
| 233 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) | ||
| 234 | #define mcpofs(as, k) \ | ||
| 235 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp)) | ||
| 236 | #define checkmcpofs(as, k) \ | ||
| 237 | ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0) | ||
| 238 | |||
| 239 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); | ||
| 240 | |||
| 241 | /* Get/set from constant pointer. */ | ||
| 242 | static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) | ||
| 243 | { | ||
| 244 | /* First, check if ip + offset is in range. */ | ||
| 245 | if ((ai & 0x00400000) && checkmcpofs(as, p)) { | ||
| 246 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); | ||
| 247 | } else { | ||
| 248 | Reg base = RID_GL; /* Next, try GL + offset. */ | ||
| 249 | int64_t ofs = glofs(as, p); | ||
| 250 | if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ | ||
| 251 | int64_t i64 = i64ptr(p); | ||
| 252 | base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); | ||
| 253 | ofs = i64 & 0x7fffull; | ||
| 254 | } | ||
| 255 | emit_lso(as, ai, r, base, ofs); | ||
| 256 | } | ||
| 257 | } | ||
| 258 | |||
| 259 | /* Load 64 bit IR constant into register. */ | ||
| 260 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) | ||
| 261 | { | ||
| 262 | const uint64_t *k = &ir_k64(ir)->u64; | ||
| 263 | int64_t ofs; | ||
| 264 | if (r >= RID_MAX_GPR) { | ||
| 265 | uint32_t fpk = emit_isfpk64(*k); | ||
| 266 | if (fpk != ~0u) { | ||
| 267 | emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31)); | ||
| 268 | return; | ||
| 269 | } | ||
| 270 | } | ||
| 271 | ofs = glofs(as, k); | ||
| 272 | if (emit_checkofs(A64I_LDRx, ofs)) { | ||
| 273 | emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, | ||
| 274 | (r & 31), RID_GL, ofs); | ||
| 275 | } else { | ||
| 276 | if (r >= RID_MAX_GPR) { | ||
| 277 | emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); | ||
| 278 | r = RID_TMP; | ||
| 279 | } | ||
| 280 | if (checkmcpofs(as, k)) | ||
| 281 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); | ||
| 282 | else | ||
| 283 | emit_loadu64(as, r, *k); | ||
| 284 | } | ||
| 285 | } | ||
| 286 | |||
| 287 | /* Get/set global_State fields. */ | ||
| 288 | #define emit_getgl(as, r, field) \ | ||
| 289 | emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field) | ||
| 290 | #define emit_setgl(as, r, field) \ | ||
| 291 | emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field) | ||
| 292 | |||
| 293 | /* Trace number is determined from pc of exit instruction. */ | ||
| 294 | #define emit_setvmstate(as, i) UNUSED(i) | ||
| 295 | |||
| 296 | /* -- Emit control-flow instructions -------------------------------------- */ | ||
| 297 | |||
| 298 | /* Label for internal jumps. */ | ||
| 299 | typedef MCode *MCLabel; | ||
| 300 | |||
| 301 | /* Return label pointing to current PC. */ | ||
| 302 | #define emit_label(as) ((as)->mcp) | ||
| 303 | |||
| 304 | static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) | ||
| 305 | { | ||
| 306 | MCode *p = as->mcp; | ||
| 307 | ptrdiff_t delta = target - (p - 1); | ||
| 308 | lua_assert(((delta + 0x40000) >> 19) == 0); | ||
| 309 | *--p = A64I_BCC | A64F_S19((uint32_t)delta & 0x7ffff) | cond; | ||
| 310 | as->mcp = p; | ||
| 311 | } | ||
| 312 | |||
| 313 | static void emit_branch(ASMState *as, A64Ins ai, MCode *target) | ||
| 314 | { | ||
| 315 | MCode *p = as->mcp; | ||
| 316 | ptrdiff_t delta = target - (p - 1); | ||
| 317 | lua_assert(((delta + 0x02000000) >> 26) == 0); | ||
| 318 | *--p = ai | ((uint32_t)delta & 0x03ffffffu); | ||
| 319 | as->mcp = p; | ||
| 320 | } | ||
| 321 | |||
| 322 | #define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) | ||
| 323 | |||
| 324 | static void emit_call(ASMState *as, void *target) | ||
| 325 | { | ||
| 326 | MCode *p = --as->mcp; | ||
| 327 | ptrdiff_t delta = (char *)target - (char *)p; | ||
| 328 | if ((((delta>>2) + 0x02000000) >> 26) == 0) { | ||
| 329 | *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu); | ||
| 330 | } else { /* Target out of range: need indirect call. But don't use R0-R7. */ | ||
| 331 | Reg r = ra_allock(as, i64ptr(target), | ||
| 332 | RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); | ||
| 333 | *p = A64I_BLR | A64F_N(r); | ||
| 334 | } | ||
| 335 | } | ||
| 336 | |||
| 337 | /* -- Emit generic operations --------------------------------------------- */ | ||
| 338 | |||
| 339 | /* Generic move between two regs. */ | ||
| 340 | static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | ||
| 341 | { | ||
| 342 | if (dst >= RID_MAX_GPR) { | ||
| 343 | emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S, | ||
| 344 | (dst & 31), (src & 31)); | ||
| 345 | return; | ||
| 346 | } | ||
| 347 | if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ | ||
| 348 | MCode ins = *as->mcp, swp = (src^dst); | ||
| 349 | if ((ins & 0xbf800000) == 0xb9000000) { | ||
| 350 | if (!((ins ^ (dst << 5)) & 0x000003e0)) | ||
| 351 | *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */ | ||
| 352 | if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f)) | ||
| 353 | *as->mcp = ins ^ swp; /* Swap D in store. */ | ||
| 354 | } | ||
| 355 | } | ||
| 356 | emit_dm(as, A64I_MOVx, dst, src); | ||
| 357 | } | ||
| 358 | |||
| 359 | /* Generic load of register with base and (small) offset address. */ | ||
| 360 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | ||
| 361 | { | ||
| 362 | if (r >= RID_MAX_GPR) | ||
| 363 | emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs); | ||
| 364 | else | ||
| 365 | emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs); | ||
| 366 | } | ||
| 367 | |||
| 368 | /* Generic store of register with base and (small) offset address. */ | ||
| 369 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | ||
| 370 | { | ||
| 371 | if (r >= RID_MAX_GPR) | ||
| 372 | emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs); | ||
| 373 | else | ||
| 374 | emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs); | ||
| 375 | } | ||
| 376 | |||
| 377 | /* Emit an arithmetic operation with a constant operand. */ | ||
| 378 | static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src, | ||
| 379 | int32_t i, RegSet allow) | ||
| 380 | { | ||
| 381 | uint32_t k = emit_isk12(i); | ||
| 382 | if (k) | ||
| 383 | emit_dn(as, ai^k, dest, src); | ||
| 384 | else | ||
| 385 | emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); | ||
| 386 | } | ||
| 387 | |||
| 388 | /* Add offset to pointer. */ | ||
| 389 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | ||
| 390 | { | ||
| 391 | if (ofs) | ||
| 392 | emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r, | ||
| 393 | ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r)); | ||
| 394 | } | ||
| 395 | |||
| 396 | #define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) | ||
| 397 | |||
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index 8b72be7d..8bc2474c 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c | |||
| @@ -296,6 +296,9 @@ enum { | |||
| 296 | #elif LJ_TARGET_ARM | 296 | #elif LJ_TARGET_ARM |
| 297 | DW_REG_SP = 13, | 297 | DW_REG_SP = 13, |
| 298 | DW_REG_RA = 14, | 298 | DW_REG_RA = 14, |
| 299 | #elif LJ_TARGET_ARM64 | ||
| 300 | DW_REG_SP = 31, | ||
| 301 | DW_REG_RA = 30, | ||
| 299 | #elif LJ_TARGET_PPC | 302 | #elif LJ_TARGET_PPC |
| 300 | DW_REG_SP = 1, | 303 | DW_REG_SP = 1, |
| 301 | DW_REG_RA = 65, | 304 | DW_REG_RA = 65, |
| @@ -374,6 +377,8 @@ static const ELFheader elfhdr_template = { | |||
| 374 | .machine = 62, | 377 | .machine = 62, |
| 375 | #elif LJ_TARGET_ARM | 378 | #elif LJ_TARGET_ARM |
| 376 | .machine = 40, | 379 | .machine = 40, |
| 380 | #elif LJ_TARGET_ARM64 | ||
| 381 | .machine = 183, | ||
| 377 | #elif LJ_TARGET_PPC | 382 | #elif LJ_TARGET_PPC |
| 378 | .machine = 20, | 383 | .machine = 20, |
| 379 | #elif LJ_TARGET_MIPS | 384 | #elif LJ_TARGET_MIPS |
| @@ -563,6 +568,13 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) | |||
| 563 | int i; | 568 | int i; |
| 564 | for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } | 569 | for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } |
| 565 | } | 570 | } |
| 571 | #elif LJ_TARGET_ARM64 | ||
| 572 | { | ||
| 573 | int i; | ||
| 574 | DB(DW_CFA_offset|31); DUV(2); | ||
| 575 | for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); } | ||
| 576 | for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); } | ||
| 577 | } | ||
| 566 | #elif LJ_TARGET_PPC | 578 | #elif LJ_TARGET_PPC |
| 567 | { | 579 | { |
| 568 | int i; | 580 | int i; |
diff --git a/src/lj_target.h b/src/lj_target.h index abea8d5b..c069eb95 100644 --- a/src/lj_target.h +++ b/src/lj_target.h | |||
| @@ -55,7 +55,7 @@ typedef uint32_t RegSP; | |||
| 55 | /* Bitset for registers. 32 registers suffice for most architectures. | 55 | /* Bitset for registers. 32 registers suffice for most architectures. |
| 56 | ** Note that one set holds bits for both GPRs and FPRs. | 56 | ** Note that one set holds bits for both GPRs and FPRs. |
| 57 | */ | 57 | */ |
| 58 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | 58 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 |
| 59 | typedef uint64_t RegSet; | 59 | typedef uint64_t RegSet; |
| 60 | #else | 60 | #else |
| 61 | typedef uint32_t RegSet; | 61 | typedef uint32_t RegSet; |
| @@ -69,7 +69,7 @@ typedef uint32_t RegSet; | |||
| 69 | #define rset_set(rs, r) (rs |= RID2RSET(r)) | 69 | #define rset_set(rs, r) (rs |= RID2RSET(r)) |
| 70 | #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) | 70 | #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) |
| 71 | #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) | 71 | #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) |
| 72 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | 72 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 |
| 73 | #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) | 73 | #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) |
| 74 | #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) | 74 | #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) |
| 75 | #else | 75 | #else |
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 57ab134f..0cef06d5 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h | |||
| @@ -55,7 +55,8 @@ enum { | |||
| 55 | 55 | ||
| 56 | /* Make use of all registers, except for x18, fp, lr and sp. */ | 56 | /* Make use of all registers, except for x18, fp, lr and sp. */ |
| 57 | #define RSET_FIXED \ | 57 | #define RSET_FIXED \ |
| 58 | (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)) | 58 | (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\ |
| 59 | RID2RSET(RID_GL)) | ||
| 59 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) | 60 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) |
| 60 | #define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) | 61 | #define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) |
| 61 | #define RSET_ALL (RSET_GPR|RSET_FPR) | 62 | #define RSET_ALL (RSET_GPR|RSET_FPR) |
| @@ -73,25 +74,235 @@ enum { | |||
| 73 | #define REGARG_LASTFPR RID_D7 | 74 | #define REGARG_LASTFPR RID_D7 |
| 74 | #define REGARG_NUMFPR 8 | 75 | #define REGARG_NUMFPR 8 |
| 75 | 76 | ||
| 77 | /* -- Spill slots --------------------------------------------------------- */ | ||
| 78 | |||
| 79 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. | ||
| 80 | ** | ||
| 81 | ** SPS_FIXED: Available fixed spill slots in interpreter frame. | ||
| 82 | ** This definition must match with the vm_arm64.dasc file. | ||
| 83 | ** Pre-allocate some slots to avoid sp adjust in every root trace. | ||
| 84 | ** | ||
| 85 | ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. | ||
| 86 | */ | ||
| 87 | #define SPS_FIXED 4 | ||
| 88 | #define SPS_FIRST 2 | ||
| 89 | |||
| 90 | #define SPOFS_TMP 0 | ||
| 91 | |||
| 92 | #define sps_scale(slot) (4 * (int32_t)(slot)) | ||
| 93 | #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) | ||
| 94 | |||
| 95 | /* -- Exit state ---------------------------------------------------------- */ | ||
| 96 | |||
| 97 | /* This definition must match with the *.dasc file(s). */ | ||
| 98 | typedef struct { | ||
| 99 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ | ||
| 100 | intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ | ||
| 101 | int32_t spill[256]; /* Spill slots. */ | ||
| 102 | } ExitState; | ||
| 103 | |||
| 104 | /* PC after instruction that caused an exit. Used to find the trace number. */ | ||
| 105 | #define EXITSTATE_PCREG RID_LR | ||
| 106 | /* Highest exit + 1 indicates stack check. */ | ||
| 107 | #define EXITSTATE_CHECKEXIT 1 | ||
| 108 | |||
| 109 | #define EXITSTUB_SPACING 4 | ||
| 110 | #define EXITSTUBS_PER_GROUP 32 | ||
| 111 | |||
| 112 | |||
| 76 | /* -- Instructions -------------------------------------------------------- */ | 113 | /* -- Instructions -------------------------------------------------------- */ |
| 77 | 114 | ||
| 78 | /* Instruction fields. */ | 115 | /* Instruction fields. */ |
| 79 | #define A64F_D(r) (r) | 116 | #define A64F_D(r) (r) |
| 80 | #define A64F_N(r) ((r) << 5) | 117 | #define A64F_N(r) ((r) << 5) |
| 81 | #define A64F_A(r) ((r) << 10) | 118 | #define A64F_A(r) ((r) << 10) |
| 82 | #define A64F_M(r) ((r) << 16) | 119 | #define A64F_M(r) ((r) << 16) |
| 120 | #define A64F_IMMS(x) ((x) << 10) | ||
| 121 | #define A64F_IMMR(x) ((x) << 16) | ||
| 83 | #define A64F_U16(x) ((x) << 5) | 122 | #define A64F_U16(x) ((x) << 5) |
| 123 | #define A64F_U12(x) ((x) << 10) | ||
| 84 | #define A64F_S26(x) (x) | 124 | #define A64F_S26(x) (x) |
| 85 | #define A64F_S19(x) ((x) << 5) | 125 | #define A64F_S19(x) ((x) << 5) |
| 126 | #define A64F_S9(x) ((x) << 12) | ||
| 127 | #define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) | ||
| 128 | #define A64F_EX(ex) (A64I_EX | ((ex) << 13)) | ||
| 129 | #define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10)) | ||
| 130 | #define A64F_FP8(x) ((x) << 13) | ||
| 131 | #define A64F_CC(cc) ((cc) << 12) | ||
| 132 | #define A64F_LSL16(x) (((x) / 16) << 21) | ||
| 133 | #define A64F_BSH(sh) ((sh) << 10) | ||
| 86 | 134 | ||
| 87 | typedef enum A64Ins { | 135 | typedef enum A64Ins { |
| 136 | A64I_S = 0x20000000, | ||
| 137 | A64I_X = 0x80000000, | ||
| 138 | A64I_EX = 0x00200000, | ||
| 139 | A64I_K12 = 0x1a000000, | ||
| 140 | A64I_K13 = 0x18000000, | ||
| 141 | A64I_LS_U = 0x01000000, | ||
| 142 | A64I_LS_S = 0x00800000, | ||
| 143 | A64I_LS_R = 0x01200800, | ||
| 144 | A64I_LS_UXTWx = 0x00005000, | ||
| 145 | A64I_LS_LSLx = 0x00007000, | ||
| 146 | |||
| 147 | A64I_ADDw = 0x0b000000, | ||
| 148 | A64I_ADDx = 0x8b000000, | ||
| 149 | A64I_ADDSw = 0x2b000000, | ||
| 150 | A64I_ADDSx = 0xab000000, | ||
| 151 | A64I_NEGw = 0x4b0003e0, | ||
| 152 | A64I_NEGx = 0xcb0003e0, | ||
| 153 | A64I_SUBw = 0x4b000000, | ||
| 154 | A64I_SUBx = 0xcb000000, | ||
| 155 | A64I_SUBSw = 0x6b000000, | ||
| 156 | A64I_SUBSx = 0xeb000000, | ||
| 157 | |||
| 158 | A64I_MULw = 0x1b007c00, | ||
| 159 | A64I_MULx = 0x9b007c00, | ||
| 160 | A64I_SMULL = 0x9b207c00, | ||
| 161 | |||
| 162 | A64I_ANDw = 0x0a000000, | ||
| 163 | A64I_ANDx = 0x8a000000, | ||
| 164 | A64I_ANDSw = 0x6a000000, | ||
| 165 | A64I_ANDSx = 0xea000000, | ||
| 166 | A64I_EORw = 0x4a000000, | ||
| 167 | A64I_EORx = 0xca000000, | ||
| 168 | A64I_ORRw = 0x2a000000, | ||
| 169 | A64I_ORRx = 0xaa000000, | ||
| 170 | A64I_TSTw = 0x6a00001f, | ||
| 171 | A64I_TSTx = 0xea00001f, | ||
| 172 | |||
| 173 | A64I_CMPw = 0x6b00001f, | ||
| 174 | A64I_CMPx = 0xeb00001f, | ||
| 175 | A64I_CMNw = 0x2b00001f, | ||
| 176 | A64I_CMNx = 0xab00001f, | ||
| 177 | A64I_CCMPw = 0x7a400000, | ||
| 178 | A64I_CCMPx = 0xfa400000, | ||
| 179 | A64I_CSELw = 0x1a800000, | ||
| 180 | A64I_CSELx = 0x9a800000, | ||
| 181 | |||
| 182 | A64I_ASRw = 0x13007c00, | ||
| 183 | A64I_ASRx = 0x9340fc00, | ||
| 184 | A64I_LSLx = 0xd3400000, | ||
| 185 | A64I_LSRx = 0xd340fc00, | ||
| 186 | A64I_SHRw = 0x1ac02000, | ||
| 187 | A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */ | ||
| 188 | A64I_REVw = 0x5ac00800, | ||
| 189 | A64I_REVx = 0xdac00c00, | ||
| 190 | |||
| 191 | A64I_EXTRw = 0x13800000, | ||
| 192 | A64I_EXTRx = 0x93c00000, | ||
| 193 | A64I_SBFMw = 0x13000000, | ||
| 194 | A64I_SBFMx = 0x93400000, | ||
| 195 | A64I_SXTBw = 0x13001c00, | ||
| 196 | A64I_SXTHw = 0x13003c00, | ||
| 197 | A64I_SXTW = 0x93407c00, | ||
| 198 | A64I_UBFMw = 0x53000000, | ||
| 199 | A64I_UBFMx = 0xd3400000, | ||
| 200 | A64I_UXTBw = 0x53001c00, | ||
| 201 | A64I_UXTHw = 0x53003c00, | ||
| 202 | |||
| 203 | A64I_MOVw = 0x2a0003e0, | ||
| 204 | A64I_MOVx = 0xaa0003e0, | ||
| 205 | A64I_MVNw = 0x2a2003e0, | ||
| 206 | A64I_MVNx = 0xaa2003e0, | ||
| 207 | A64I_MOVKw = 0x72800000, | ||
| 208 | A64I_MOVKx = 0xf2800000, | ||
| 88 | A64I_MOVZw = 0x52800000, | 209 | A64I_MOVZw = 0x52800000, |
| 89 | A64I_MOVZx = 0xd2800000, | 210 | A64I_MOVZx = 0xd2800000, |
| 211 | A64I_MOVNw = 0x12800000, | ||
| 212 | A64I_MOVNx = 0x92800000, | ||
| 213 | |||
| 214 | A64I_LDRB = 0x39400000, | ||
| 215 | A64I_LDRH = 0x79400000, | ||
| 216 | A64I_LDRw = 0xb9400000, | ||
| 217 | A64I_LDRx = 0xf9400000, | ||
| 90 | A64I_LDRLw = 0x18000000, | 218 | A64I_LDRLw = 0x18000000, |
| 91 | A64I_LDRLx = 0x58000000, | 219 | A64I_LDRLx = 0x58000000, |
| 92 | A64I_NOP = 0xd503201f, | 220 | A64I_STRB = 0x39000000, |
| 221 | A64I_STRH = 0x79000000, | ||
| 222 | A64I_STRw = 0xb9000000, | ||
| 223 | A64I_STRx = 0xf9000000, | ||
| 224 | A64I_STPw = 0x29000000, | ||
| 225 | A64I_STPx = 0xa9000000, | ||
| 226 | A64I_LDPw = 0x29400000, | ||
| 227 | A64I_LDPx = 0xa9400000, | ||
| 228 | |||
| 93 | A64I_B = 0x14000000, | 229 | A64I_B = 0x14000000, |
| 230 | A64I_BCC = 0x54000000, | ||
| 231 | A64I_BL = 0x94000000, | ||
| 94 | A64I_BR = 0xd61f0000, | 232 | A64I_BR = 0xd61f0000, |
| 233 | A64I_BLR = 0xd63f0000, | ||
| 234 | |||
| 235 | A64I_NOP = 0xd503201f, | ||
| 236 | |||
| 237 | /* FP */ | ||
| 238 | A64I_FADDd = 0x1e602800, | ||
| 239 | A64I_FSUBd = 0x1e603800, | ||
| 240 | A64I_FMADDd = 0x1f400000, | ||
| 241 | A64I_FMSUBd = 0x1f408000, | ||
| 242 | A64I_FNMADDd = 0x1f600000, | ||
| 243 | A64I_FNMSUBd = 0x1f608000, | ||
| 244 | A64I_FMULd = 0x1e600800, | ||
| 245 | A64I_FDIVd = 0x1e601800, | ||
| 246 | A64I_FNEGd = 0x1e614000, | ||
| 247 | A64I_FABS = 0x1e60c000, | ||
| 248 | A64I_FSQRTd = 0x1e61c000, | ||
| 249 | A64I_LDRs = 0xbd400000, | ||
| 250 | A64I_LDRd = 0xfd400000, | ||
| 251 | A64I_STRs = 0xbd000000, | ||
| 252 | A64I_STRd = 0xfd000000, | ||
| 253 | A64I_LDPs = 0x2d400000, | ||
| 254 | A64I_LDPd = 0x6d400000, | ||
| 255 | A64I_STPs = 0x2d000000, | ||
| 256 | A64I_STPd = 0x6d000000, | ||
| 257 | A64I_FCMPd = 0x1e602000, | ||
| 258 | A64I_FCMPZd = 0x1e602008, | ||
| 259 | A64I_FCSELd = 0x1e600c00, | ||
| 260 | A64I_FRINTMd = 0x1e654000, | ||
| 261 | A64I_FRINTPd = 0x1e64c000, | ||
| 262 | A64I_FRINTZd = 0x1e65c000, | ||
| 263 | |||
| 264 | A64I_FCVT_F32_F64 = 0x1e624000, | ||
| 265 | A64I_FCVT_F64_F32 = 0x1e22c000, | ||
| 266 | A64I_FCVT_F32_S32 = 0x1e220000, | ||
| 267 | A64I_FCVT_F64_S32 = 0x1e620000, | ||
| 268 | A64I_FCVT_F32_U32 = 0x1e230000, | ||
| 269 | A64I_FCVT_F64_U32 = 0x1e630000, | ||
| 270 | A64I_FCVT_F32_S64 = 0x9e220000, | ||
| 271 | A64I_FCVT_F64_S64 = 0x9e620000, | ||
| 272 | A64I_FCVT_F32_U64 = 0x9e230000, | ||
| 273 | A64I_FCVT_F64_U64 = 0x9e630000, | ||
| 274 | A64I_FCVT_S32_F64 = 0x1e780000, | ||
| 275 | A64I_FCVT_S32_F32 = 0x1e380000, | ||
| 276 | A64I_FCVT_U32_F64 = 0x1e790000, | ||
| 277 | A64I_FCVT_U32_F32 = 0x1e390000, | ||
| 278 | A64I_FCVT_S64_F64 = 0x9e780000, | ||
| 279 | A64I_FCVT_S64_F32 = 0x9e380000, | ||
| 280 | A64I_FCVT_U64_F64 = 0x9e790000, | ||
| 281 | A64I_FCVT_U64_F32 = 0x9e390000, | ||
| 282 | |||
| 283 | A64I_FMOV_S = 0x1e204000, | ||
| 284 | A64I_FMOV_D = 0x1e604000, | ||
| 285 | A64I_FMOV_R_S = 0x1e260000, | ||
| 286 | A64I_FMOV_S_R = 0x1e270000, | ||
| 287 | A64I_FMOV_R_D = 0x9e660000, | ||
| 288 | A64I_FMOV_D_R = 0x9e670000, | ||
| 289 | A64I_FMOV_DI = 0x1e601000, | ||
| 95 | } A64Ins; | 290 | } A64Ins; |
| 96 | 291 | ||
| 292 | typedef enum A64Shift { | ||
| 293 | A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR | ||
| 294 | } A64Shift; | ||
| 295 | |||
| 296 | typedef enum A64Extend { | ||
| 297 | A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX, | ||
| 298 | A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX, | ||
| 299 | } A64Extend; | ||
| 300 | |||
| 301 | /* ARM condition codes. */ | ||
| 302 | typedef enum A64CC { | ||
| 303 | CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, | ||
| 304 | CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, | ||
| 305 | CC_HS = CC_CS, CC_LO = CC_CC | ||
| 306 | } A64CC; | ||
| 307 | |||
| 97 | #endif | 308 | #endif |
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 7a881bdd..a6227bf7 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc | |||
| @@ -236,12 +236,17 @@ | |||
| 236 | |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro | 236 | |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro |
| 237 | |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro | 237 | |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro |
| 238 | | | 238 | | |
| 239 | #define GL_J(field) (GG_OFS(J) + (int)offsetof(jit_State, field)) | 239 | #define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) |
| 240 | | | 240 | | |
| 241 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | 241 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) |
| 242 | | | 242 | | |
| 243 | |.macro hotcheck, delta | 243 | |.macro hotcheck, delta |
| 244 | | NYI | 244 | | lsr CARG1, PC, #1 |
| 245 | | and CARG1, CARG1, #126 | ||
| 246 | | add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT | ||
| 247 | | ldrh CARG2w, [GL, CARG1] | ||
| 248 | | subs CARG2, CARG2, #delta | ||
| 249 | | strh CARG2w, [GL, CARG1] | ||
| 245 | |.endmacro | 250 | |.endmacro |
| 246 | | | 251 | | |
| 247 | |.macro hotloop | 252 | |.macro hotloop |
| @@ -869,7 +874,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 869 | | bl extern lj_meta_for // (lua_State *L, TValue *base) | 874 | | bl extern lj_meta_for // (lua_State *L, TValue *base) |
| 870 | | ldr INSw, [PC, #-4] | 875 | | ldr INSw, [PC, #-4] |
| 871 | |.if JIT | 876 | |.if JIT |
| 872 | | uxtb TMP0, INS | 877 | | uxtb TMP0w, INSw |
| 873 | |.endif | 878 | |.endif |
| 874 | | decode_RA RA, INS | 879 | | decode_RA RA, INS |
| 875 | | decode_RD RC, INS | 880 | | decode_RD RC, INS |
| @@ -1732,7 +1737,20 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1732 | |//----------------------------------------------------------------------- | 1737 | |//----------------------------------------------------------------------- |
| 1733 | | | 1738 | | |
| 1734 | |->vm_record: // Dispatch target for recording phase. | 1739 | |->vm_record: // Dispatch target for recording phase. |
| 1735 | | NYI | 1740 | |.if JIT |
| 1741 | | ldrb CARG1w, GL->hookmask | ||
| 1742 | | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. | ||
| 1743 | | bne >5 | ||
| 1744 | | // Decrement the hookcount for consistency, but always do the call. | ||
| 1745 | | ldr CARG2w, GL->hookcount | ||
| 1746 | | tst CARG1, #HOOK_ACTIVE | ||
| 1747 | | bne >1 | ||
| 1748 | | sub CARG2w, CARG2w, #1 | ||
| 1749 | | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT | ||
| 1750 | | beq >1 | ||
| 1751 | | str CARG2w, GL->hookcount | ||
| 1752 | | b >1 | ||
| 1753 | |.endif | ||
| 1736 | | | 1754 | | |
| 1737 | |->vm_rethook: // Dispatch target for return hooks. | 1755 | |->vm_rethook: // Dispatch target for return hooks. |
| 1738 | | ldrb TMP2w, GL->hookmask | 1756 | | ldrb TMP2w, GL->hookmask |
| @@ -1774,7 +1792,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1774 | | b <4 | 1792 | | b <4 |
| 1775 | | | 1793 | | |
| 1776 | |->vm_hotloop: // Hot loop counter underflow. | 1794 | |->vm_hotloop: // Hot loop counter underflow. |
| 1777 | | NYI | 1795 | |.if JIT |
| 1796 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). | ||
| 1797 | | add CARG1, GL, #GG_G2DISP+GG_DISP2J | ||
| 1798 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
| 1799 | | str PC, SAVE_PC | ||
| 1800 | | ldr CARG3, LFUNC:CARG3->pc | ||
| 1801 | | mov CARG2, PC | ||
| 1802 | | str L, [GL, #GL_J(L)] | ||
| 1803 | | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)] | ||
| 1804 | | str BASE, L->base | ||
| 1805 | | add CARG3, BASE, CARG3, lsl #3 | ||
| 1806 | | str CARG3, L->top | ||
| 1807 | | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) | ||
| 1808 | | b <3 | ||
| 1809 | |.endif | ||
| 1778 | | | 1810 | | |
| 1779 | |->vm_callhook: // Dispatch target for call hooks. | 1811 | |->vm_callhook: // Dispatch target for call hooks. |
| 1780 | | mov CARG2, PC | 1812 | | mov CARG2, PC |
| @@ -1804,7 +1836,54 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1804 | | br CRET1 | 1836 | | br CRET1 |
| 1805 | | | 1837 | | |
| 1806 | |->cont_stitch: // Trace stitching. | 1838 | |->cont_stitch: // Trace stitching. |
| 1807 | | NYI | 1839 | |.if JIT |
| 1840 | | // RA = resultptr, CARG4 = meta base | ||
| 1841 | | ldr RB, SAVE_MULTRES | ||
| 1842 | | ldr INSw, [PC, #-4] | ||
| 1843 | | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. | ||
| 1844 | | subs RB, RB, #8 | ||
| 1845 | | decode_RA RC, INS // Call base. | ||
| 1846 | | and CARG3, CARG3, #LJ_GCVMASK | ||
| 1847 | | beq >2 | ||
| 1848 | |1: // Move results down. | ||
| 1849 | | ldr CARG1, [RA] | ||
| 1850 | | add RA, RA, #8 | ||
| 1851 | | subs RB, RB, #8 | ||
| 1852 | | str CARG1, [BASE, RC, lsl #3] | ||
| 1853 | | add RC, RC, #1 | ||
| 1854 | | bne <1 | ||
| 1855 | |2: | ||
| 1856 | | decode_RA RA, INS | ||
| 1857 | | decode_RB RB, INS | ||
| 1858 | | add RA, RA, RB | ||
| 1859 | |3: | ||
| 1860 | | cmp RA, RC | ||
| 1861 | | bhi >9 // More results wanted? | ||
| 1862 | | | ||
| 1863 | | ldrh RAw, TRACE:CARG3->traceno | ||
| 1864 | | ldrh RCw, TRACE:CARG3->link | ||
| 1865 | | cmp RCw, RAw | ||
| 1866 | | beq ->cont_nop // Blacklisted. | ||
| 1867 | | cmp RCw, #0 | ||
| 1868 | | bne =>BC_JLOOP // Jump to stitched trace. | ||
| 1869 | | | ||
| 1870 | | // Stitch a new trace to the previous trace. | ||
| 1871 | | mov CARG1, #GL_J(exitno) | ||
| 1872 | | str RA, [GL, CARG1] | ||
| 1873 | | mov CARG1, #GL_J(L) | ||
| 1874 | | str L, [GL, CARG1] | ||
| 1875 | | str BASE, L->base | ||
| 1876 | | add CARG1, GL, #GG_G2J | ||
| 1877 | | mov CARG2, PC | ||
| 1878 | | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
| 1879 | | ldr BASE, L->base | ||
| 1880 | | b ->cont_nop | ||
| 1881 | | | ||
| 1882 | |9: // Fill up results with nil. | ||
| 1883 | | str TISNIL, [BASE, RC, lsl #3] | ||
| 1884 | | add RC, RC, #1 | ||
| 1885 | | b <3 | ||
| 1886 | |.endif | ||
| 1808 | | | 1887 | | |
| 1809 | |->vm_profhook: // Dispatch target for profiler hook. | 1888 | |->vm_profhook: // Dispatch target for profiler hook. |
| 1810 | #if LJ_HASPROFILE | 1889 | #if LJ_HASPROFILE |
| @@ -1822,10 +1901,120 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1822 | |//-- Trace exit handler ------------------------------------------------- | 1901 | |//-- Trace exit handler ------------------------------------------------- |
| 1823 | |//----------------------------------------------------------------------- | 1902 | |//----------------------------------------------------------------------- |
| 1824 | | | 1903 | | |
| 1904 | |.macro savex_, a, b | ||
| 1905 | | stp d..a, d..b, [sp, #a*8] | ||
| 1906 | | stp x..a, x..b, [sp, #32*8+a*8] | ||
| 1907 | |.endmacro | ||
| 1908 | | | ||
| 1825 | |->vm_exit_handler: | 1909 | |->vm_exit_handler: |
| 1826 | | NYI | 1910 | |.if JIT |
| 1911 | | sub sp, sp, #(64*8) | ||
| 1912 | | savex_, 0, 1 | ||
| 1913 | | savex_, 2, 3 | ||
| 1914 | | savex_, 4, 5 | ||
| 1915 | | savex_, 6, 7 | ||
| 1916 | | savex_, 8, 9 | ||
| 1917 | | savex_, 10, 11 | ||
| 1918 | | savex_, 12, 13 | ||
| 1919 | | savex_, 14, 15 | ||
| 1920 | | savex_, 16, 17 | ||
| 1921 | | savex_, 18, 19 | ||
| 1922 | | savex_, 20, 21 | ||
| 1923 | | savex_, 22, 23 | ||
| 1924 | | savex_, 24, 25 | ||
| 1925 | | savex_, 26, 27 | ||
| 1926 | | savex_, 28, 29 | ||
| 1927 | | stp d30, d31, [sp, #30*8] | ||
| 1928 | | ldr CARG1, [sp, #64*8] // Load original value of lr. | ||
| 1929 | | add CARG3, sp, #64*8 // Recompute original value of sp. | ||
| 1930 | | mv_vmstate CARG4, EXIT | ||
| 1931 | | ldr CARG2w, [CARG1, #-4]! // Get exit instruction. | ||
| 1932 | | stp CARG1, CARG3, [sp, #62*8] // Store exit pc/sp in RID_LR/RID_SP. | ||
| 1933 | | lsl CARG2, CARG2, #38 | ||
| 1934 | | add CARG1, CARG1, CARG2, asr #36 | ||
| 1935 | | ldr CARG2w, [lr] // Load exit stub group offset. | ||
| 1936 | | sub CARG1, CARG1, lr | ||
| 1937 | | sub CARG1, CARG1, #4 | ||
| 1938 | | ldr L, GL->cur_L | ||
| 1939 | | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. | ||
| 1940 | | ldr BASE, GL->jit_base | ||
| 1941 | | st_vmstate CARG4 | ||
| 1942 | | str CARG1w, [GL, #GL_J(exitno)] | ||
| 1943 | | str BASE, L->base | ||
| 1944 | | str L, [GL, #GL_J(L)] | ||
| 1945 | | str xzr, GL->jit_base | ||
| 1946 | | add CARG1, GL, #GG_G2J | ||
| 1947 | | mov CARG2, sp | ||
| 1948 | | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) | ||
| 1949 | | // Returns MULTRES (unscaled) or negated error code. | ||
| 1950 | | ldr CARG2, L->cframe | ||
| 1951 | | ldr BASE, L->base | ||
| 1952 | | and sp, CARG2, #CFRAME_RAWMASK | ||
| 1953 | | ldr PC, SAVE_PC // Get SAVE_PC. | ||
| 1954 | | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). | ||
| 1955 | | b >1 | ||
| 1956 | |.endif | ||
| 1957 | | | ||
| 1827 | |->vm_exit_interp: | 1958 | |->vm_exit_interp: |
| 1828 | | NYI | 1959 | | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. |
| 1960 | |.if JIT | ||
| 1961 | | ldr L, SAVE_L | ||
| 1962 | |1: | ||
| 1963 | | cmp CARG1w, #0 | ||
| 1964 | | blt >9 // Check for error from exit. | ||
| 1965 | | lsl RC, CARG1, #3 | ||
| 1966 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
| 1967 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
| 1968 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
| 1969 | | movn TISNIL, #0 | ||
| 1970 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
| 1971 | | str RC, SAVE_MULTRES | ||
| 1972 | | str BASE, L->base | ||
| 1973 | | ldr CARG2, LFUNC:CARG2->pc | ||
| 1974 | | str xzr, GL->jit_base | ||
| 1975 | | mv_vmstate CARG4, INTERP | ||
| 1976 | | ldr KBASE, [CARG2, #PC2PROTO(k)] | ||
| 1977 | | // Modified copy of ins_next which handles function header dispatch, too. | ||
| 1978 | | ldrb RBw, [PC] | ||
| 1979 | | ldr INSw, [PC], #4 | ||
| 1980 | | st_vmstate CARG4 | ||
| 1981 | | cmp RBw, #BC_FUNCC+2 // Fast function? | ||
| 1982 | | add TMP1, GL, INS, uxtb #3 | ||
| 1983 | | bhs >4 | ||
| 1984 | |2: | ||
| 1985 | | cmp RBw, #BC_FUNCF // Function header? | ||
| 1986 | | add TMP0, GL, RB, uxtb #3 | ||
| 1987 | | ldr RB, [TMP0, #GG_G2DISP] | ||
| 1988 | | decode_RA RA, INS | ||
| 1989 | | lsr TMP0, INS, #16 | ||
| 1990 | | csel RC, TMP0, RC, lo | ||
| 1991 | | blo >5 | ||
| 1992 | | ldr CARG3, [BASE, FRAME_FUNC] | ||
| 1993 | | sub RC, RC, #8 | ||
| 1994 | | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 | ||
| 1995 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
| 1996 | |5: | ||
| 1997 | | br RB | ||
| 1998 | | | ||
| 1999 | |4: // Check frame below fast function. | ||
| 2000 | | ldr CARG1, [BASE, FRAME_PC] | ||
| 2001 | | ands CARG2, CARG1, #FRAME_TYPE | ||
| 2002 | | bne <2 // Trace stitching continuation? | ||
| 2003 | | // Otherwise set KBASE for Lua function below fast function. | ||
| 2004 | | ldr CARG3, [CARG1, #-4] | ||
| 2005 | | decode_RA CARG1, CARG3 | ||
| 2006 | | sub CARG2, BASE, CARG1, lsl #3 | ||
| 2007 | | ldr LFUNC:CARG3, [CARG2, #-32] | ||
| 2008 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
| 2009 | | ldr CARG3, LFUNC:CARG3->pc | ||
| 2010 | | ldr KBASE, [CARG3, #PC2PROTO(k)] | ||
| 2011 | | b <2 | ||
| 2012 | | | ||
| 2013 | |9: // Rethrow error from the right C frame. | ||
| 2014 | | neg CARG2, CARG1 | ||
| 2015 | | mov CARG1, L | ||
| 2016 | | bl extern lj_err_throw // (lua_State *L, int errcode) | ||
| 2017 | |.endif | ||
| 1829 | | | 2018 | | |
| 1830 | |//----------------------------------------------------------------------- | 2019 | |//----------------------------------------------------------------------- |
| 1831 | |//-- Math helper functions ---------------------------------------------- | 2020 | |//-- Math helper functions ---------------------------------------------- |
| @@ -3387,6 +3576,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3387 | if (op == BC_FORI) { | 3576 | if (op == BC_FORI) { |
| 3388 | | csel PC, RC, PC, gt | 3577 | | csel PC, RC, PC, gt |
| 3389 | } else if (op == BC_JFORI) { | 3578 | } else if (op == BC_JFORI) { |
| 3579 | | mov PC, RC | ||
| 3390 | | ldrh RCw, [RC, #-2] | 3580 | | ldrh RCw, [RC, #-2] |
| 3391 | } else if (op == BC_IFORL) { | 3581 | } else if (op == BC_IFORL) { |
| 3392 | | csel PC, RC, PC, le | 3582 | | csel PC, RC, PC, le |
| @@ -3488,7 +3678,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3488 | 3678 | ||
| 3489 | case BC_JLOOP: | 3679 | case BC_JLOOP: |
| 3490 | |.if JIT | 3680 | |.if JIT |
| 3491 | | NYI | 3681 | | // RA = base (ignored), RC = traceno |
| 3682 | | ldr CARG1, [GL, #GL_J(trace)] | ||
| 3683 | | mov CARG2, #0 // Traces on ARM64 don't store the trace #, so use 0. | ||
| 3684 | | ldr TRACE:RC, [CARG1, RC, lsl #3] | ||
| 3685 | | st_vmstate CARG2 | ||
| 3686 | | ldr RA, TRACE:RC->mcode | ||
| 3687 | | str BASE, GL->jit_base | ||
| 3688 | | str L, GL->tmpbuf.L | ||
| 3689 | | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. | ||
| 3690 | | br RA | ||
| 3492 | |.endif | 3691 | |.endif |
| 3493 | break; | 3692 | break; |
| 3494 | 3693 | ||
| @@ -3546,10 +3745,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3546 | case BC_IFUNCV: | 3745 | case BC_IFUNCV: |
| 3547 | | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 | 3746 | | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 |
| 3548 | | ldr CARG1, L->maxstack | 3747 | | ldr CARG1, L->maxstack |
| 3748 | | movn TMP0, #~LJ_TFUNC | ||
| 3549 | | add TMP2, BASE, RC | 3749 | | add TMP2, BASE, RC |
| 3750 | | add LFUNC:CARG3, CARG3, TMP0, lsl #47 | ||
| 3550 | | add RA, RA, RC | 3751 | | add RA, RA, RC |
| 3551 | | add TMP0, RC, #16+FRAME_VARG | 3752 | | add TMP0, RC, #16+FRAME_VARG |
| 3552 | | str LFUNC:CARG3, [TMP2], #8 // Store (untagged) copy of LFUNC. | 3753 | | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. |
| 3553 | | ldr KBASE, [PC, #-4+PC2PROTO(k)] | 3754 | | ldr KBASE, [PC, #-4+PC2PROTO(k)] |
| 3554 | | cmp RA, CARG1 | 3755 | | cmp RA, CARG1 |
| 3555 | | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. | 3756 | | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. |
| @@ -3736,8 +3937,8 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
| 3736 | "\t.uleb128 0x1\n" | 3937 | "\t.uleb128 0x1\n" |
| 3737 | "\t.sleb128 -8\n" | 3938 | "\t.sleb128 -8\n" |
| 3738 | "\t.byte 30\n" /* Return address is in lr. */ | 3939 | "\t.byte 30\n" /* Return address is in lr. */ |
| 3739 | "\t.uleb128 1\n" /* augmentation length */ | 3940 | "\t.uleb128 1\n" /* augmentation length */ |
| 3740 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | 3941 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ |
| 3741 | "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ | 3942 | "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ |
| 3742 | "\t.align 3\n" | 3943 | "\t.align 3\n" |
| 3743 | ".LECIE2:\n\n"); | 3944 | ".LECIE2:\n\n"); |
| @@ -3748,7 +3949,7 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
| 3748 | "\t.long .LASFDE3-.Lframe2\n" | 3949 | "\t.long .LASFDE3-.Lframe2\n" |
| 3749 | "\t.long lj_vm_ffi_call-.\n" | 3950 | "\t.long lj_vm_ffi_call-.\n" |
| 3750 | "\t.long %d\n" | 3951 | "\t.long %d\n" |
| 3751 | "\t.uleb128 0\n" /* augmentation length */ | 3952 | "\t.uleb128 0\n" /* augmentation length */ |
| 3752 | "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ | 3953 | "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ |
| 3753 | "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ | 3954 | "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ |
| 3754 | "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ | 3955 | "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ |
