summaryrefslogtreecommitdiff
path: root/dynasm
diff options
context:
space:
mode:
Diffstat (limited to 'dynasm')
-rw-r--r--dynasm/dasm_arm.h22
-rw-r--r--dynasm/dasm_arm.lua6
-rw-r--r--dynasm/dasm_arm64.h558
-rw-r--r--dynasm/dasm_arm64.lua1226
-rw-r--r--dynasm/dasm_mips.h52
-rw-r--r--dynasm/dasm_mips.lua684
-rw-r--r--dynasm/dasm_mips64.lua12
-rw-r--r--dynasm/dasm_ppc.h35
-rw-r--r--dynasm/dasm_ppc.lua702
-rw-r--r--dynasm/dasm_proto.h4
-rw-r--r--dynasm/dasm_x86.h79
-rw-r--r--dynasm/dasm_x86.lua648
-rw-r--r--dynasm/dynasm.lua7
13 files changed, 3607 insertions, 428 deletions
diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h
index a93e831e..eaa94d9c 100644
--- a/dynasm/dasm_arm.h
+++ b/dynasm/dasm_arm.h
@@ -70,7 +70,7 @@ struct dasm_State {
70 size_t lgsize; 70 size_t lgsize;
71 int *pclabels; /* PC label chains/pos ptrs. */ 71 int *pclabels; /* PC label chains/pos ptrs. */
72 size_t pcsize; 72 size_t pcsize;
73 void **globals; /* Array of globals (bias -10). */ 73 void **globals; /* Array of globals. */
74 dasm_Section *section; /* Pointer to active section. */ 74 dasm_Section *section; /* Pointer to active section. */
75 size_t codesize; /* Total size of all code sections. */ 75 size_t codesize; /* Total size of all code sections. */
76 int maxsection; /* 0 <= sectionidx < maxsection. */ 76 int maxsection; /* 0 <= sectionidx < maxsection. */
@@ -87,7 +87,6 @@ void dasm_init(Dst_DECL, int maxsection)
87{ 87{
88 dasm_State *D; 88 dasm_State *D;
89 size_t psz = 0; 89 size_t psz = 0;
90 int i;
91 Dst_REF = NULL; 90 Dst_REF = NULL;
92 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); 91 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
93 D = Dst_REF; 92 D = Dst_REF;
@@ -98,12 +97,7 @@ void dasm_init(Dst_DECL, int maxsection)
98 D->pcsize = 0; 97 D->pcsize = 0;
99 D->globals = NULL; 98 D->globals = NULL;
100 D->maxsection = maxsection; 99 D->maxsection = maxsection;
101 for (i = 0; i < maxsection; i++) { 100 memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
102 D->sections[i].buf = NULL; /* Need this for pass3. */
103 D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
104 D->sections[i].bsize = 0;
105 D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
106 }
107} 101}
108 102
109/* Free DynASM state. */ 103/* Free DynASM state. */
@@ -123,7 +117,7 @@ void dasm_free(Dst_DECL)
123void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) 117void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
124{ 118{
125 dasm_State *D = Dst_REF; 119 dasm_State *D = Dst_REF;
126 D->globals = gl - 10; /* Negative bias to compensate for locals. */ 120 D->globals = gl;
127 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); 121 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
128} 122}
129 123
@@ -148,6 +142,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
148 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); 142 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
149 for (i = 0; i < D->maxsection; i++) { 143 for (i = 0; i < D->maxsection; i++) {
150 D->sections[i].pos = DASM_SEC2POS(i); 144 D->sections[i].pos = DASM_SEC2POS(i);
145 D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
151 D->sections[i].ofs = 0; 146 D->sections[i].ofs = 0;
152 } 147 }
153} 148}
@@ -294,7 +289,7 @@ int dasm_link(Dst_DECL, size_t *szp)
294 289
295 { /* Handle globals not defined in this translation unit. */ 290 { /* Handle globals not defined in this translation unit. */
296 int idx; 291 int idx;
297 for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { 292 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
298 int n = D->lglabels[idx]; 293 int n = D->lglabels[idx];
299 /* Undefined label: Collapse rel chain and replace with marker (< 0). */ 294 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
300 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } 295 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -371,7 +366,10 @@ int dasm_encode(Dst_DECL, void *buffer)
371 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; 366 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
372 break; 367 break;
373 case DASM_REL_LG: 368 case DASM_REL_LG:
374 CK(n >= 0, UNDEF_LG); 369 if (n < 0) {
370 n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp - 4);
371 goto patchrel;
372 }
375 /* fallthrough */ 373 /* fallthrough */
376 case DASM_REL_PC: 374 case DASM_REL_PC:
377 CK(n >= 0, UNDEF_PC); 375 CK(n >= 0, UNDEF_PC);
@@ -393,7 +391,7 @@ int dasm_encode(Dst_DECL, void *buffer)
393 } 391 }
394 break; 392 break;
395 case DASM_LABEL_LG: 393 case DASM_LABEL_LG:
396 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); 394 ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
397 break; 395 break;
398 case DASM_LABEL_PC: break; 396 case DASM_LABEL_PC: break;
399 case DASM_IMM: 397 case DASM_IMM:
diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua
index 960f1fe6..edb57536 100644
--- a/dynasm/dasm_arm.lua
+++ b/dynasm/dasm_arm.lua
@@ -9,9 +9,9 @@
9local _info = { 9local _info = {
10 arch = "arm", 10 arch = "arm",
11 description = "DynASM ARM module", 11 description = "DynASM ARM module",
12 version = "1.3.0", 12 version = "1.5.0",
13 vernum = 10300, 13 vernum = 10500,
14 release = "2011-05-05", 14 release = "2021-05-02",
15 author = "Mike Pall", 15 author = "Mike Pall",
16 license = "MIT", 16 license = "MIT",
17} 17}
diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h
new file mode 100644
index 00000000..1c541e5d
--- /dev/null
+++ b/dynasm/dasm_arm64.h
@@ -0,0 +1,558 @@
1/*
2** DynASM ARM64 encoding engine.
3** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
4** Released under the MIT license. See dynasm.lua for full copyright notice.
5*/
6
7#include <stddef.h>
8#include <stdarg.h>
9#include <string.h>
10#include <stdlib.h>
11
12#define DASM_ARCH "arm64"
13
14#ifndef DASM_EXTERN
15#define DASM_EXTERN(a,b,c,d) 0
16#endif
17
18/* Action definitions. */
19enum {
20 DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
21 /* The following actions need a buffer position. */
22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
23 /* The following actions also have an argument. */
24 DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A,
25 DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
26 DASM_IMMV, DASM_VREG,
27 DASM__MAX
28};
29
30/* Maximum number of section buffer positions for a single dasm_put() call. */
31#define DASM_MAXSECPOS 25
32
33/* DynASM encoder status codes. Action list offset or number are or'ed in. */
34#define DASM_S_OK 0x00000000
35#define DASM_S_NOMEM 0x01000000
36#define DASM_S_PHASE 0x02000000
37#define DASM_S_MATCH_SEC 0x03000000
38#define DASM_S_RANGE_I 0x11000000
39#define DASM_S_RANGE_SEC 0x12000000
40#define DASM_S_RANGE_LG 0x13000000
41#define DASM_S_RANGE_PC 0x14000000
42#define DASM_S_RANGE_REL 0x15000000
43#define DASM_S_RANGE_VREG 0x16000000
44#define DASM_S_UNDEF_LG 0x21000000
45#define DASM_S_UNDEF_PC 0x22000000
46
47/* Macros to convert positions (8 bit section + 24 bit index). */
48#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
49#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
50#define DASM_SEC2POS(sec) ((sec)<<24)
51#define DASM_POS2SEC(pos) ((pos)>>24)
52#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
53
54/* Action list type. */
55typedef const unsigned int *dasm_ActList;
56
57/* Per-section structure. */
58typedef struct dasm_Section {
59 int *rbuf; /* Biased buffer pointer (negative section bias). */
60 int *buf; /* True buffer pointer. */
61 size_t bsize; /* Buffer size in bytes. */
62 int pos; /* Biased buffer position. */
63 int epos; /* End of biased buffer position - max single put. */
64 int ofs; /* Byte offset into section. */
65} dasm_Section;
66
67/* Core structure holding the DynASM encoding state. */
68struct dasm_State {
69 size_t psize; /* Allocated size of this structure. */
70 dasm_ActList actionlist; /* Current actionlist pointer. */
71 int *lglabels; /* Local/global chain/pos ptrs. */
72 size_t lgsize;
73 int *pclabels; /* PC label chains/pos ptrs. */
74 size_t pcsize;
75 void **globals; /* Array of globals. */
76 dasm_Section *section; /* Pointer to active section. */
77 size_t codesize; /* Total size of all code sections. */
78 int maxsection; /* 0 <= sectionidx < maxsection. */
79 int status; /* Status code. */
80 dasm_Section sections[1]; /* All sections. Alloc-extended. */
81};
82
83/* The size of the core structure depends on the max. number of sections. */
84#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
85
86
87/* Initialize DynASM state. */
88void dasm_init(Dst_DECL, int maxsection)
89{
90 dasm_State *D;
91 size_t psz = 0;
92 Dst_REF = NULL;
93 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
94 D = Dst_REF;
95 D->psize = psz;
96 D->lglabels = NULL;
97 D->lgsize = 0;
98 D->pclabels = NULL;
99 D->pcsize = 0;
100 D->globals = NULL;
101 D->maxsection = maxsection;
102 memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
103}
104
105/* Free DynASM state. */
106void dasm_free(Dst_DECL)
107{
108 dasm_State *D = Dst_REF;
109 int i;
110 for (i = 0; i < D->maxsection; i++)
111 if (D->sections[i].buf)
112 DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
113 if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
114 if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
115 DASM_M_FREE(Dst, D, D->psize);
116}
117
118/* Setup global label array. Must be called before dasm_setup(). */
119void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
120{
121 dasm_State *D = Dst_REF;
122 D->globals = gl;
123 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
124}
125
126/* Grow PC label array. Can be called after dasm_setup(), too. */
127void dasm_growpc(Dst_DECL, unsigned int maxpc)
128{
129 dasm_State *D = Dst_REF;
130 size_t osz = D->pcsize;
131 DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
132 memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
133}
134
135/* Setup encoder. */
136void dasm_setup(Dst_DECL, const void *actionlist)
137{
138 dasm_State *D = Dst_REF;
139 int i;
140 D->actionlist = (dasm_ActList)actionlist;
141 D->status = DASM_S_OK;
142 D->section = &D->sections[0];
143 memset((void *)D->lglabels, 0, D->lgsize);
144 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
145 for (i = 0; i < D->maxsection; i++) {
146 D->sections[i].pos = DASM_SEC2POS(i);
147 D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
148 D->sections[i].ofs = 0;
149 }
150}
151
152
153#ifdef DASM_CHECKS
154#define CK(x, st) \
155 do { if (!(x)) { \
156 D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
157#define CKPL(kind, st) \
158 do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
159 D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
160#else
161#define CK(x, st) ((void)0)
162#define CKPL(kind, st) ((void)0)
163#endif
164
165static int dasm_imm12(unsigned int n)
166{
167 if ((n >> 12) == 0)
168 return n;
169 else if ((n & 0xff000fff) == 0)
170 return (n >> 12) | 0x1000;
171 else
172 return -1;
173}
174
175static int dasm_ffs(unsigned long long x)
176{
177 int n = -1;
178 while (x) { x >>= 1; n++; }
179 return n;
180}
181
182static int dasm_imm13(int lo, int hi)
183{
184 int inv = 0, w = 64, s = 0xfff, xa, xb;
185 unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
186 unsigned long long m = 1ULL, a, b, c;
187 if (n & 1) { n = ~n; inv = 1; }
188 a = n & (unsigned long long)-(long long)n;
189 b = (n+a)&(unsigned long long)-(long long)(n+a);
190 c = (n+a-b)&(unsigned long long)-(long long)(n+a-b);
191 xa = dasm_ffs(a); xb = dasm_ffs(b);
192 if (c) {
193 w = dasm_ffs(c) - xa;
194 if (w == 32) m = 0x0000000100000001UL;
195 else if (w == 16) m = 0x0001000100010001UL;
196 else if (w == 8) m = 0x0101010101010101UL;
197 else if (w == 4) m = 0x1111111111111111UL;
198 else if (w == 2) m = 0x5555555555555555UL;
199 else return -1;
200 s = (-2*w & 0x3f) - 1;
201 } else if (!a) {
202 return -1;
203 } else if (xb == -1) {
204 xb = 64;
205 }
206 if ((b-a) * m != n) return -1;
207 if (inv) {
208 return ((w - xb) << 6) | (s+w+xa-xb);
209 } else {
210 return ((w - xa) << 6) | (s+xb-xa);
211 }
212 return -1;
213}
214
215/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
216void dasm_put(Dst_DECL, int start, ...)
217{
218 va_list ap;
219 dasm_State *D = Dst_REF;
220 dasm_ActList p = D->actionlist + start;
221 dasm_Section *sec = D->section;
222 int pos = sec->pos, ofs = sec->ofs;
223 int *b;
224
225 if (pos >= sec->epos) {
226 DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
227 sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
228 sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
229 sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
230 }
231
232 b = sec->rbuf;
233 b[pos++] = start;
234
235 va_start(ap, start);
236 while (1) {
237 unsigned int ins = *p++;
238 unsigned int action = (ins >> 16);
239 if (action >= DASM__MAX) {
240 ofs += 4;
241 } else {
242 int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
243 switch (action) {
244 case DASM_STOP: goto stop;
245 case DASM_SECTION:
246 n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
247 D->section = &D->sections[n]; goto stop;
248 case DASM_ESC: p++; ofs += 4; break;
249 case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break;
250 case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
251 case DASM_REL_LG:
252 n = (ins & 2047) - 10; pl = D->lglabels + n;
253 /* Bkwd rel or global. */
254 if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
255 pl += 10; n = *pl;
256 if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
257 goto linkrel;
258 case DASM_REL_PC:
259 pl = D->pclabels + n; CKPL(pc, PC);
260 putrel:
261 n = *pl;
262 if (n < 0) { /* Label exists. Get label pos and store it. */
263 b[pos] = -n;
264 } else {
265 linkrel:
266 b[pos] = n; /* Else link to rel chain, anchored at label. */
267 *pl = pos;
268 }
269 pos++;
270 if ((ins & 0x8000)) ofs += 8;
271 break;
272 case DASM_REL_A:
273 b[pos++] = n;
274 b[pos++] = va_arg(ap, int);
275 break;
276 case DASM_LABEL_LG:
277 pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
278 case DASM_LABEL_PC:
279 pl = D->pclabels + n; CKPL(pc, PC);
280 putlabel:
281 n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
282 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
283 }
284 *pl = -pos; /* Label exists now. */
285 b[pos++] = ofs; /* Store pass1 offset estimate. */
286 break;
287 case DASM_IMM:
288 CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
289 n >>= ((ins>>10)&31);
290#ifdef DASM_CHECKS
291 if ((ins & 0x8000))
292 CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
293 else
294 CK((n>>((ins>>5)&31)) == 0, RANGE_I);
295#endif
296 b[pos++] = n;
297 break;
298 case DASM_IMM6:
299 CK((n >> 6) == 0, RANGE_I);
300 b[pos++] = n;
301 break;
302 case DASM_IMM12:
303 CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
304 b[pos++] = n;
305 break;
306 case DASM_IMM13W:
307 CK(dasm_imm13(n, n) != -1, RANGE_I);
308 b[pos++] = n;
309 break;
310 case DASM_IMM13X: {
311 int m = va_arg(ap, int);
312 CK(dasm_imm13(n, m) != -1, RANGE_I);
313 b[pos++] = n;
314 b[pos++] = m;
315 break;
316 }
317 case DASM_IMML: {
318#ifdef DASM_CHECKS
319 int scale = (ins & 3);
320 CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
321 (unsigned int)(n+256) < 512, RANGE_I);
322#endif
323 b[pos++] = n;
324 break;
325 }
326 case DASM_IMMV:
327 ofs += 4;
328 b[pos++] = n;
329 break;
330 case DASM_VREG:
331 CK(n < 32, RANGE_VREG);
332 b[pos++] = n;
333 break;
334 }
335 }
336 }
337stop:
338 va_end(ap);
339 sec->pos = pos;
340 sec->ofs = ofs;
341}
342#undef CK
343
344/* Pass 2: Link sections, shrink aligns, fix label offsets. */
345int dasm_link(Dst_DECL, size_t *szp)
346{
347 dasm_State *D = Dst_REF;
348 int secnum;
349 int ofs = 0;
350
351#ifdef DASM_CHECKS
352 *szp = 0;
353 if (D->status != DASM_S_OK) return D->status;
354 {
355 int pc;
356 for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
357 if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
358 }
359#endif
360
361 { /* Handle globals not defined in this translation unit. */
362 int idx;
363 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
364 int n = D->lglabels[idx];
365 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
366 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
367 }
368 }
369
370 /* Combine all code sections. No support for data sections (yet). */
371 for (secnum = 0; secnum < D->maxsection; secnum++) {
372 dasm_Section *sec = D->sections + secnum;
373 int *b = sec->rbuf;
374 int pos = DASM_SEC2POS(secnum);
375 int lastpos = sec->pos;
376
377 while (pos != lastpos) {
378 dasm_ActList p = D->actionlist + b[pos++];
379 while (1) {
380 unsigned int ins = *p++;
381 unsigned int action = (ins >> 16);
382 switch (action) {
383 case DASM_STOP: case DASM_SECTION: goto stop;
384 case DASM_ESC: p++; break;
385 case DASM_REL_EXT: break;
386 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
387 case DASM_REL_LG: case DASM_REL_PC: pos++; break;
388 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
389 case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
390 case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break;
391 case DASM_IMM13X: case DASM_REL_A: pos += 2; break;
392 }
393 }
394 stop: (void)0;
395 }
396 ofs += sec->ofs; /* Next section starts right after current section. */
397 }
398
399 D->codesize = ofs; /* Total size of all code sections */
400 *szp = ofs;
401 return DASM_S_OK;
402}
403
404#ifdef DASM_CHECKS
405#define CK(x, st) \
406 do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
407#else
408#define CK(x, st) ((void)0)
409#endif
410
411/* Pass 3: Encode sections. */
412int dasm_encode(Dst_DECL, void *buffer)
413{
414 dasm_State *D = Dst_REF;
415 char *base = (char *)buffer;
416 unsigned int *cp = (unsigned int *)buffer;
417 int secnum;
418
419 /* Encode all code sections. No support for data sections (yet). */
420 for (secnum = 0; secnum < D->maxsection; secnum++) {
421 dasm_Section *sec = D->sections + secnum;
422 int *b = sec->buf;
423 int *endb = sec->rbuf + sec->pos;
424
425 while (b != endb) {
426 dasm_ActList p = D->actionlist + *b++;
427 while (1) {
428 unsigned int ins = *p++;
429 unsigned int action = (ins >> 16);
430 int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
431 switch (action) {
432 case DASM_STOP: case DASM_SECTION: goto stop;
433 case DASM_ESC: *cp++ = *p++; break;
434 case DASM_REL_EXT:
435 n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
436 goto patchrel;
437 case DASM_ALIGN:
438 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xd503201f;
439 break;
440 case DASM_REL_LG:
441 if (n < 0) {
442 ptrdiff_t na = (ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4;
443 n = (int)na;
444 CK((ptrdiff_t)n == na, RANGE_REL);
445 goto patchrel;
446 }
447 /* fallthrough */
448 case DASM_REL_PC:
449 CK(n >= 0, UNDEF_PC);
450 n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
451 patchrel:
452 if (!(ins & 0xf800)) { /* B, BL */
453 CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL);
454 cp[-1] |= ((n >> 2) & 0x03ffffff);
455 } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */
456 CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
457 cp[-1] |= ((n << 3) & 0x00ffffe0);
458 } else if ((ins & 0x3000) == 0x2000) { /* ADR */
459 CK(((n+0x00100000) >> 21) == 0, RANGE_REL);
460 cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29);
461 } else if ((ins & 0x3000) == 0x3000) { /* ADRP */
462 cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29);
463 } else if ((ins & 0x1000)) { /* TBZ, TBNZ */
464 CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
465 cp[-1] |= ((n << 3) & 0x0007ffe0);
466 } else if ((ins & 0x8000)) { /* absolute */
467 cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n);
468 cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32);
469 cp += 2;
470 }
471 break;
472 case DASM_REL_A: {
473 ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n);
474 if ((ins & 0x3000) == 0x3000) { /* ADRP */
475 ins &= ~0x1000;
476 na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12);
477 } else {
478 na = na - (ptrdiff_t)cp + 4;
479 }
480 n = (int)na;
481 CK((ptrdiff_t)n == na, RANGE_REL);
482 goto patchrel;
483 }
484 case DASM_LABEL_LG:
485 ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
486 break;
487 case DASM_LABEL_PC: break;
488 case DASM_IMM:
489 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
490 break;
491 case DASM_IMM6:
492 cp[-1] |= ((n&31) << 19) | ((n&32) << 26);
493 break;
494 case DASM_IMM12:
495 cp[-1] |= (dasm_imm12((unsigned int)n) << 10);
496 break;
497 case DASM_IMM13W:
498 cp[-1] |= (dasm_imm13(n, n) << 10);
499 break;
500 case DASM_IMM13X:
501 cp[-1] |= (dasm_imm13(n, *b++) << 10);
502 break;
503 case DASM_IMML: {
504 int scale = (ins & 3);
505 cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
506 ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
507 break;
508 }
509 case DASM_IMMV:
510 *cp++ = n;
511 break;
512 case DASM_VREG:
513 cp[-1] |= (n & 0x1f) << (ins & 0x1f);
514 break;
515 default: *cp++ = ins; break;
516 }
517 }
518 stop: (void)0;
519 }
520 }
521
522 if (base + D->codesize != (char *)cp) /* Check for phase errors. */
523 return DASM_S_PHASE;
524 return DASM_S_OK;
525}
526#undef CK
527
528/* Get PC label offset. */
529int dasm_getpclabel(Dst_DECL, unsigned int pc)
530{
531 dasm_State *D = Dst_REF;
532 if (pc*sizeof(int) < D->pcsize) {
533 int pos = D->pclabels[pc];
534 if (pos < 0) return *DASM_POS2PTR(D, -pos);
535 if (pos > 0) return -1; /* Undefined. */
536 }
537 return -2; /* Unused or out of range. */
538}
539
540#ifdef DASM_CHECKS
541/* Optional sanity checker to call between isolated encoding steps. */
542int dasm_checkstep(Dst_DECL, int secmatch)
543{
544 dasm_State *D = Dst_REF;
545 if (D->status == DASM_S_OK) {
546 int i;
547 for (i = 1; i <= 9; i++) {
548 if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
549 D->lglabels[i] = 0;
550 }
551 }
552 if (D->status == DASM_S_OK && secmatch >= 0 &&
553 D->section != &D->sections[secmatch])
554 D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
555 return D->status;
556}
557#endif
558
diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua
new file mode 100644
index 00000000..e69f8ef3
--- /dev/null
+++ b/dynasm/dasm_arm64.lua
@@ -0,0 +1,1226 @@
1------------------------------------------------------------------------------
2-- DynASM ARM64 module.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------
7
8-- Module information:
9local _info = {
10 arch = "arm",
11 description = "DynASM ARM64 module",
12 version = "1.5.0",
13 vernum = 10500,
14 release = "2021-05-02",
15 author = "Mike Pall",
16 license = "MIT",
17}
18
19-- Exported glue functions for the arch-specific module.
20local _M = { _info = _info }
21
22-- Cache library functions.
23local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
24local assert, setmetatable, rawget = assert, setmetatable, rawget
25local _s = string
26local format, byte, char = _s.format, _s.byte, _s.char
27local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
28local concat, sort, insert = table.concat, table.sort, table.insert
29local bit = bit or require("bit")
30local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
31local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit
32
33-- Inherited tables and callbacks.
34local g_opt, g_arch
35local wline, werror, wfatal, wwarn
36
37-- Action name list.
38-- CHECK: Keep this in sync with the C code!
39local action_names = {
40 "STOP", "SECTION", "ESC", "REL_EXT",
41 "ALIGN", "REL_LG", "LABEL_LG",
42 "REL_PC", "LABEL_PC", "REL_A",
43 "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", "IMMV",
44 "VREG",
45}
46
47-- Maximum number of section buffer positions for dasm_put().
48-- CHECK: Keep this in sync with the C code!
49local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
50
51-- Action name -> action number.
52local map_action = {}
53for n,name in ipairs(action_names) do
54 map_action[name] = n-1
55end
56
57-- Action list buffer.
58local actlist = {}
59
60-- Argument list for next dasm_put(). Start with offset 0 into action list.
61local actargs = { 0 }
62
63-- Current number of section buffer positions for dasm_put().
64local secpos = 1
65
66------------------------------------------------------------------------------
67
68-- Dump action names and numbers.
69local function dumpactions(out)
70 out:write("DynASM encoding engine action codes:\n")
71 for n,name in ipairs(action_names) do
72 local num = map_action[name]
73 out:write(format(" %-10s %02X %d\n", name, num, num))
74 end
75 out:write("\n")
76end
77
78-- Write action list buffer as a huge static C array.
79local function writeactions(out, name)
80 local nn = #actlist
81 if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
82 out:write("static const unsigned int ", name, "[", nn, "] = {\n")
83 for i = 1,nn-1 do
84 assert(out:write("0x", tohex(actlist[i]), ",\n"))
85 end
86 assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
87end
88
89------------------------------------------------------------------------------
90
91-- Add word to action list.
92local function wputxw(n)
93 assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
94 actlist[#actlist+1] = n
95end
96
97-- Add action to list with optional arg. Advance buffer pos, too.
98local function waction(action, val, a, num)
99 local w = assert(map_action[action], "bad action name `"..action.."'")
100 wputxw(w * 0x10000 + (val or 0))
101 if a then actargs[#actargs+1] = a end
102 if a or num then secpos = secpos + (num or 1) end
103end
104
105-- Flush action list (intervening C code or buffer pos overflow).
106local function wflush(term)
107 if #actlist == actargs[1] then return end -- Nothing to flush.
108 if not term then waction("STOP") end -- Terminate action list.
109 wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
110 actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
111 secpos = 1 -- The actionlist offset occupies a buffer position, too.
112end
113
114-- Put escaped word.
115local function wputw(n)
116 if n <= 0x000fffff then waction("ESC") end
117 wputxw(n)
118end
119
120-- Reserve position for word.
121local function wpos()
122 local pos = #actlist+1
123 actlist[pos] = ""
124 return pos
125end
126
127-- Store word to reserved position.
128local function wputpos(pos, n)
129 assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
130 if n <= 0x000fffff then
131 insert(actlist, pos+1, n)
132 n = map_action.ESC * 0x10000
133 end
134 actlist[pos] = n
135end
136
137------------------------------------------------------------------------------
138
139-- Global label name -> global label number. With auto assignment on 1st use.
140local next_global = 20
141local map_global = setmetatable({}, { __index = function(t, name)
142 if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
143 local n = next_global
144 if n > 2047 then werror("too many global labels") end
145 next_global = n + 1
146 t[name] = n
147 return n
148end})
149
150-- Dump global labels.
151local function dumpglobals(out, lvl)
152 local t = {}
153 for name, n in pairs(map_global) do t[n] = name end
154 out:write("Global labels:\n")
155 for i=20,next_global-1 do
156 out:write(format(" %s\n", t[i]))
157 end
158 out:write("\n")
159end
160
161-- Write global label enum.
162local function writeglobals(out, prefix)
163 local t = {}
164 for name, n in pairs(map_global) do t[n] = name end
165 out:write("enum {\n")
166 for i=20,next_global-1 do
167 out:write(" ", prefix, t[i], ",\n")
168 end
169 out:write(" ", prefix, "_MAX\n};\n")
170end
171
172-- Write global label names.
173local function writeglobalnames(out, name)
174 local t = {}
175 for name, n in pairs(map_global) do t[n] = name end
176 out:write("static const char *const ", name, "[] = {\n")
177 for i=20,next_global-1 do
178 out:write(" \"", t[i], "\",\n")
179 end
180 out:write(" (const char *)0\n};\n")
181end
182
183------------------------------------------------------------------------------
184
185-- Extern label name -> extern label number. With auto assignment on 1st use.
186local next_extern = 0
187local map_extern_ = {}
188local map_extern = setmetatable({}, { __index = function(t, name)
189 -- No restrictions on the name for now.
190 local n = next_extern
191 if n > 2047 then werror("too many extern labels") end
192 next_extern = n + 1
193 t[name] = n
194 map_extern_[n] = name
195 return n
196end})
197
198-- Dump extern labels.
199local function dumpexterns(out, lvl)
200 out:write("Extern labels:\n")
201 for i=0,next_extern-1 do
202 out:write(format(" %s\n", map_extern_[i]))
203 end
204 out:write("\n")
205end
206
207-- Write extern label names.
208local function writeexternnames(out, name)
209 out:write("static const char *const ", name, "[] = {\n")
210 for i=0,next_extern-1 do
211 out:write(" \"", map_extern_[i], "\",\n")
212 end
213 out:write(" (const char *)0\n};\n")
214end
215
216------------------------------------------------------------------------------
217
218-- Arch-specific maps.
219
220-- Ext. register name -> int. name.
221local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
222
223-- Int. register name -> ext. name.
224local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
225
226local map_type = {} -- Type name -> { ctype, reg }
227local ctypenum = 0 -- Type number (for Dt... macros).
228
229-- Reverse defines for registers.
230function _M.revdef(s)
231 return map_reg_rev[s] or s
232end
233
234local map_shift = { lsl = 0, lsr = 1, asr = 2, }
235
236local map_extend = {
237 uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
238 sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
239}
240
241local map_cond = {
242 eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
243 hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
244 hs = 2, lo = 3,
245}
246
247------------------------------------------------------------------------------
248
249local parse_reg_type
250
251local function parse_reg(expr, shift, no_vreg)
252 if not expr then werror("expected register name") end
253 local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
254 if not tname then
255 tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$")
256 end
257 local tp = map_type[tname or expr]
258 if tp then
259 local reg = ovreg or tp.reg
260 if not reg then
261 werror("type `"..(tname or expr).."' needs a register override")
262 end
263 expr = reg
264 end
265 local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$")
266 if r then
267 r = tonumber(r)
268 if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then
269 if not parse_reg_type then
270 parse_reg_type = rt
271 elseif parse_reg_type ~= rt then
272 werror("register size mismatch")
273 end
274 return shl(r, shift), tp
275 end
276 end
277 local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$")
278 if vreg then
279 if not parse_reg_type then
280 parse_reg_type = vrt
281 elseif parse_reg_type ~= vrt then
282 werror("register size mismatch")
283 end
284 if not no_vreg then waction("VREG", shift, vreg) end
285 return 0
286 end
287 werror("bad register name `"..expr.."'")
288end
289
290local function parse_reg_base(expr)
291 if expr == "sp" then return 0x3e0 end
292 local base, tp = parse_reg(expr, 5)
293 if parse_reg_type ~= "x" then werror("bad register type") end
294 parse_reg_type = false
295 return base, tp
296end
297
298local parse_ctx = {}
299
300local loadenv = setfenv and function(s)
301 local code = loadstring(s, "")
302 if code then setfenv(code, parse_ctx) end
303 return code
304end or function(s)
305 return load(s, "", nil, parse_ctx)
306end
307
308-- Try to parse simple arithmetic, too, since some basic ops are aliases.
309local function parse_number(n)
310 local x = tonumber(n)
311 if x then return x end
312 local code = loadenv("return "..n)
313 if code then
314 local ok, y = pcall(code)
315 if ok and type(y) == "number" then return y end
316 end
317 return nil
318end
319
320local function parse_imm(imm, bits, shift, scale, signed)
321 imm = match(imm, "^#(.*)$")
322 if not imm then werror("expected immediate operand") end
323 local n = parse_number(imm)
324 if n then
325 local m = sar(n, scale)
326 if shl(m, scale) == n then
327 if signed then
328 local s = sar(m, bits-1)
329 if s == 0 then return shl(m, shift)
330 elseif s == -1 then return shl(m + shl(1, bits), shift) end
331 else
332 if sar(m, bits) == 0 then return shl(m, shift) end
333 end
334 end
335 werror("out of range immediate `"..imm.."'")
336 else
337 waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
338 return 0
339 end
340end
341
342local function parse_imm12(imm)
343 imm = match(imm, "^#(.*)$")
344 if not imm then werror("expected immediate operand") end
345 local n = parse_number(imm)
346 if n then
347 if shr(n, 12) == 0 then
348 return shl(n, 10)
349 elseif band(n, 0xff000fff) == 0 then
350 return shr(n, 2) + 0x00400000
351 end
352 werror("out of range immediate `"..imm.."'")
353 else
354 waction("IMM12", 0, imm)
355 return 0
356 end
357end
358
359local function parse_imm13(imm)
360 imm = match(imm, "^#(.*)$")
361 if not imm then werror("expected immediate operand") end
362 local n = parse_number(imm)
363 local r64 = parse_reg_type == "x"
364 if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
365 local inv = false
366 if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
367 local t = {}
368 for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
369 local b = table.concat(t)
370 b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
371 local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
372 if p0 then
373 local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
374 if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
375 local s = band(-2*w, 0x3f) - 1
376 if w == 64 then s = s + 0x1000 end
377 if inv then
378 return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
379 else
380 return shl(w-#p0, 16) + shl(s+#p1, 10)
381 end
382 end
383 end
384 werror("out of range immediate `"..imm.."'")
385 elseif r64 then
386 waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
387 actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
388 return 0
389 else
390 waction("IMM13W", 0, imm)
391 return 0
392 end
393end
394
395local function parse_imm6(imm)
396 imm = match(imm, "^#(.*)$")
397 if not imm then werror("expected immediate operand") end
398 local n = parse_number(imm)
399 if n then
400 if n >= 0 and n <= 63 then
401 return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
402 end
403 werror("out of range immediate `"..imm.."'")
404 else
405 waction("IMM6", 0, imm)
406 return 0
407 end
408end
409
410local function parse_imm_load(imm, scale)
411 local n = parse_number(imm)
412 if n then
413 local m = sar(n, scale)
414 if shl(m, scale) == n and m >= 0 and m < 0x1000 then
415 return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
416 elseif n >= -256 and n < 256 then
417 return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
418 end
419 werror("out of range immediate `"..imm.."'")
420 else
421 waction("IMML", scale, imm)
422 return 0
423 end
424end
425
426local function parse_fpimm(imm)
427 imm = match(imm, "^#(.*)$")
428 if not imm then werror("expected immediate operand") end
429 local n = parse_number(imm)
430 if n then
431 local m, e = math.frexp(n)
432 local s, e2 = 0, band(e-2, 7)
433 if m < 0 then m = -m; s = 0x00100000 end
434 m = m*32-16
435 if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
436 return s + shl(e2, 17) + shl(m, 13)
437 end
438 werror("out of range immediate `"..imm.."'")
439 else
440 werror("NYI fpimm action")
441 end
442end
443
444local function parse_shift(expr)
445 local s, s2 = match(expr, "^(%S+)%s*(.*)$")
446 s = map_shift[s]
447 if not s then werror("expected shift operand") end
448 return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
449end
450
451local function parse_lslx16(expr)
452 local n = match(expr, "^lsl%s*#(%d+)$")
453 n = tonumber(n)
454 if not n then werror("expected shift operand") end
455 if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
456 werror("bad shift amount")
457 end
458 return shl(n, 17)
459end
460
461local function parse_extend(expr)
462 local s, s2 = match(expr, "^(%S+)%s*(.*)$")
463 if s == "lsl" then
464 s = parse_reg_type == "x" and 3 or 2
465 else
466 s = map_extend[s]
467 end
468 if not s then werror("expected extend operand") end
469 return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
470end
471
472local function parse_cond(expr, inv)
473 local c = map_cond[expr]
474 if not c then werror("expected condition operand") end
475 return shl(bit.bxor(c, inv), 12)
476end
477
478local function parse_load(params, nparams, n, op)
479 if params[n+2] then werror("too many operands") end
480 local scale = shr(op, 30)
481 local pn, p2 = params[n], params[n+1]
482 local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
483 if not p1 then
484 if not p2 then
485 local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
486 if reg and tailr ~= "" then
487 local base, tp = parse_reg_base(reg)
488 if tp then
489 waction("IMML", scale, format(tp.ctypefmt, tailr))
490 return op + base
491 end
492 end
493 end
494 werror("expected address operand")
495 end
496 if p2 then
497 if wb == "!" then werror("bad use of '!'") end
498 op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
499 elseif wb == "!" then
500 local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
501 if not p1a then werror("bad use of '!'") end
502 op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
503 else
504 local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
505 op = op + parse_reg_base(p1a)
506 if p2a ~= "" then
507 local imm = match(p2a, "^,%s*#(.*)$")
508 if imm then
509 op = op + parse_imm_load(imm, scale)
510 else
511 local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
512 op = op + parse_reg(p2b, 16) + 0x00200800
513 if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
514 werror("bad index register type")
515 end
516 if p3b == "" then
517 if parse_reg_type ~= "x" then werror("bad index register type") end
518 op = op + 0x6000
519 else
520 if p3s == "" or p3s == "#0" then
521 elseif p3s == "#"..scale then
522 op = op + 0x1000
523 else
524 werror("bad scale")
525 end
526 if parse_reg_type == "x" then
527 if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
528 elseif p3b == "sxtx" then op = op + 0xe000
529 else
530 werror("bad extend/shift specifier")
531 end
532 else
533 if p3b == "uxtw" then op = op + 0x4000
534 elseif p3b == "sxtw" then op = op + 0xc000
535 else
536 werror("bad extend/shift specifier")
537 end
538 end
539 end
540 end
541 else
542 if wb == "!" then werror("bad use of '!'") end
543 op = op + 0x01000000
544 end
545 end
546 return op
547end
548
549local function parse_load_pair(params, nparams, n, op)
550 if params[n+2] then werror("too many operands") end
551 local pn, p2 = params[n], params[n+1]
552 local scale = shr(op, 30) == 0 and 2 or 3
553 local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
554 if not p1 then
555 if not p2 then
556 local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
557 if reg and tailr ~= "" then
558 local base, tp = parse_reg_base(reg)
559 if tp then
560 waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
561 return op + base + 0x01000000
562 end
563 end
564 end
565 werror("expected address operand")
566 end
567 if p2 then
568 if wb == "!" then werror("bad use of '!'") end
569 op = op + 0x00800000
570 else
571 local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
572 if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
573 op = op + (wb == "!" and 0x01800000 or 0x01000000)
574 end
575 return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
576end
577
578local function parse_label(label, def)
579 local prefix = label:sub(1, 2)
580 -- =>label (pc label reference)
581 if prefix == "=>" then
582 return "PC", 0, label:sub(3)
583 end
584 -- ->name (global label reference)
585 if prefix == "->" then
586 return "LG", map_global[label:sub(3)]
587 end
588 if def then
589 -- [1-9] (local label definition)
590 if match(label, "^[1-9]$") then
591 return "LG", 10+tonumber(label)
592 end
593 else
594 -- [<>][1-9] (local label reference)
595 local dir, lnum = match(label, "^([<>])([1-9])$")
596 if dir then -- Fwd: 1-9, Bkwd: 11-19.
597 return "LG", lnum + (dir == ">" and 0 or 10)
598 end
599 -- extern label (extern label reference)
600 local extname = match(label, "^extern%s+(%S+)$")
601 if extname then
602 return "EXT", map_extern[extname]
603 end
604 -- &expr (pointer)
605 if label:sub(1, 1) == "&" then
606 return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2))
607 end
608 end
609end
610
611local function branch_type(op)
612 if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
613 elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
614 band(op, 0x3b000000) == 0x18000000 then
615 return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
616 elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
617 elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
618 elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
619 else
620 assert(false, "unknown branch type")
621 end
622end
623
624------------------------------------------------------------------------------
625
626local map_op, op_template
627
628local function op_alias(opname, f)
629 return function(params, nparams)
630 if not params then return "-> "..opname:sub(1, -3) end
631 f(params, nparams)
632 op_template(params, map_op[opname], nparams)
633 end
634end
635
636local function alias_bfx(p)
637 p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
638end
639
640local function alias_bfiz(p)
641 parse_reg(p[1], 0, true)
642 if parse_reg_type == "w" then
643 p[3] = "#(32-("..p[3]:sub(2).."))%32"
644 p[4] = "#("..p[4]:sub(2)..")-1"
645 else
646 p[3] = "#(64-("..p[3]:sub(2).."))%64"
647 p[4] = "#("..p[4]:sub(2)..")-1"
648 end
649end
650
651local alias_lslimm = op_alias("ubfm_4", function(p)
652 parse_reg(p[1], 0, true)
653 local sh = p[3]:sub(2)
654 if parse_reg_type == "w" then
655 p[3] = "#(32-("..sh.."))%32"
656 p[4] = "#31-("..sh..")"
657 else
658 p[3] = "#(64-("..sh.."))%64"
659 p[4] = "#63-("..sh..")"
660 end
661end)
662
663-- Template strings for ARM instructions.
664map_op = {
665 -- Basic data processing instructions.
666 add_3 = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx",
667 add_4 = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX",
668 adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx",
669 adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX",
670 cmn_2 = "2b00001fNMg|3100001fpNIg|ab20601fpNMx",
671 cmn_3 = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX",
672
673 sub_3 = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx",
674 sub_4 = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX",
675 subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx",
676 subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX",
677 cmp_2 = "6b00001fNMg|7100001fpNIg|eb20601fpNMx",
678 cmp_3 = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX",
679
680 neg_2 = "4b0003e0DMg",
681 neg_3 = "4b0003e0DMSg",
682 negs_2 = "6b0003e0DMg",
683 negs_3 = "6b0003e0DMSg",
684
685 adc_3 = "1a000000DNMg",
686 adcs_3 = "3a000000DNMg",
687 sbc_3 = "5a000000DNMg",
688 sbcs_3 = "7a000000DNMg",
689 ngc_2 = "5a0003e0DMg",
690 ngcs_2 = "7a0003e0DMg",
691
692 and_3 = "0a000000DNMg|12000000pDNig",
693 and_4 = "0a000000DNMSg",
694 orr_3 = "2a000000DNMg|32000000pDNig",
695 orr_4 = "2a000000DNMSg",
696 eor_3 = "4a000000DNMg|52000000pDNig",
697 eor_4 = "4a000000DNMSg",
698 ands_3 = "6a000000DNMg|72000000DNig",
699 ands_4 = "6a000000DNMSg",
700 tst_2 = "6a00001fNMg|7200001fNig",
701 tst_3 = "6a00001fNMSg",
702
703 bic_3 = "0a200000DNMg",
704 bic_4 = "0a200000DNMSg",
705 orn_3 = "2a200000DNMg",
706 orn_4 = "2a200000DNMSg",
707 eon_3 = "4a200000DNMg",
708 eon_4 = "4a200000DNMSg",
709 bics_3 = "6a200000DNMg",
710 bics_4 = "6a200000DNMSg",
711
712 movn_2 = "12800000DWg",
713 movn_3 = "12800000DWRg",
714 movz_2 = "52800000DWg",
715 movz_3 = "52800000DWRg",
716 movk_2 = "72800000DWg",
717 movk_3 = "72800000DWRg",
718
719 -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
720 mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
721 mov_3 = "2a0003e0DMSg",
722 mvn_2 = "2a2003e0DMg",
723 mvn_3 = "2a2003e0DMSg",
724
725 adr_2 = "10000000DBx",
726 adrp_2 = "90000000DBx",
727
728 csel_4 = "1a800000DNMCg",
729 csinc_4 = "1a800400DNMCg",
730 csinv_4 = "5a800000DNMCg",
731 csneg_4 = "5a800400DNMCg",
732 cset_2 = "1a9f07e0Dcg",
733 csetm_2 = "5a9f03e0Dcg",
734 cinc_3 = "1a800400DNmcg",
735 cinv_3 = "5a800000DNmcg",
736 cneg_3 = "5a800400DNmcg",
737
738 ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
739 ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
740
741 madd_4 = "1b000000DNMAg",
742 msub_4 = "1b008000DNMAg",
743 mul_3 = "1b007c00DNMg",
744 mneg_3 = "1b00fc00DNMg",
745
746 smaddl_4 = "9b200000DxNMwAx",
747 smsubl_4 = "9b208000DxNMwAx",
748 smull_3 = "9b207c00DxNMw",
749 smnegl_3 = "9b20fc00DxNMw",
750 smulh_3 = "9b407c00DNMx",
751 umaddl_4 = "9ba00000DxNMwAx",
752 umsubl_4 = "9ba08000DxNMwAx",
753 umull_3 = "9ba07c00DxNMw",
754 umnegl_3 = "9ba0fc00DxNMw",
755 umulh_3 = "9bc07c00DNMx",
756
757 udiv_3 = "1ac00800DNMg",
758 sdiv_3 = "1ac00c00DNMg",
759
760 -- Bit operations.
761 sbfm_4 = "13000000DN12w|93400000DN12x",
762 bfm_4 = "33000000DN12w|b3400000DN12x",
763 ubfm_4 = "53000000DN12w|d3400000DN12x",
764 extr_4 = "13800000DNM2w|93c00000DNM2x",
765
766 sxtb_2 = "13001c00DNw|93401c00DNx",
767 sxth_2 = "13003c00DNw|93403c00DNx",
768 sxtw_2 = "93407c00DxNw",
769 uxtb_2 = "53001c00DNw",
770 uxth_2 = "53003c00DNw",
771
772 sbfx_4 = op_alias("sbfm_4", alias_bfx),
773 bfxil_4 = op_alias("bfm_4", alias_bfx),
774 ubfx_4 = op_alias("ubfm_4", alias_bfx),
775 sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
776 bfi_4 = op_alias("bfm_4", alias_bfiz),
777 ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
778
779 lsl_3 = function(params, nparams)
780 if params and params[3]:byte() == 35 then
781 return alias_lslimm(params, nparams)
782 else
783 return op_template(params, "1ac02000DNMg", nparams)
784 end
785 end,
786 lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
787 asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
788 ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
789
790 clz_2 = "5ac01000DNg",
791 cls_2 = "5ac01400DNg",
792 rbit_2 = "5ac00000DNg",
793 rev_2 = "5ac00800DNw|dac00c00DNx",
794 rev16_2 = "5ac00400DNg",
795 rev32_2 = "dac00800DNx",
796
797 -- Loads and stores.
798 ["strb_*"] = "38000000DwL",
799 ["ldrb_*"] = "38400000DwL",
800 ["ldrsb_*"] = "38c00000DwL|38800000DxL",
801 ["strh_*"] = "78000000DwL",
802 ["ldrh_*"] = "78400000DwL",
803 ["ldrsh_*"] = "78c00000DwL|78800000DxL",
804 ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
805 ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
806 ["ldrsw_*"] = "98000000DxB|b8800000DxL",
807 -- NOTE: ldur etc. are handled by ldr et al.
808
809 ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
810 ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
811 ["ldpsw_*"] = "68400000DAxP",
812
813 -- Branches.
814 b_1 = "14000000B",
815 bl_1 = "94000000B",
816 blr_1 = "d63f0000Nx",
817 br_1 = "d61f0000Nx",
818 ret_0 = "d65f03c0",
819 ret_1 = "d65f0000Nx",
820 -- b.cond is added below.
821 cbz_2 = "34000000DBg",
822 cbnz_2 = "35000000DBg",
823 tbz_3 = "36000000DTBw|36000000DTBx",
824 tbnz_3 = "37000000DTBw|37000000DTBx",
825
826 -- ARM64e: Pointer authentication codes (PAC).
827 blraaz_1 = "d63f081fNx",
828 braa_2 = "d71f0800NDx",
829 braaz_1 = "d61f081fNx",
830 pacibsp_0 = "d503237f",
831 retab_0 = "d65f0fff",
832
833 -- Miscellaneous instructions.
834 -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
835 -- TODO: sys, sysl, ic, dc, at, tlbi
836 -- TODO: hint, yield, wfe, wfi, sev, sevl
837 -- TODO: clrex, dsb, dmb, isb
838 nop_0 = "d503201f",
839 brk_0 = "d4200000",
840 brk_1 = "d4200000W",
841
842 -- Floating point instructions.
843 fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
844 fabs_2 = "1e20c000DNf",
845 fneg_2 = "1e214000DNf",
846 fsqrt_2 = "1e21c000DNf",
847
848 fcvt_2 = "1e22c000DdNs|1e624000DsNd",
849
850 -- TODO: half-precision and fixed-point conversions.
851 fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
852 fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
853 fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
854 fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
855 fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
856 fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
857 fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
858 fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
859 fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
860 fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
861
862 scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
863 ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
864
865 frintn_2 = "1e244000DNf",
866 frintp_2 = "1e24c000DNf",
867 frintm_2 = "1e254000DNf",
868 frintz_2 = "1e25c000DNf",
869 frinta_2 = "1e264000DNf",
870 frintx_2 = "1e274000DNf",
871 frinti_2 = "1e27c000DNf",
872
873 fadd_3 = "1e202800DNMf",
874 fsub_3 = "1e203800DNMf",
875 fmul_3 = "1e200800DNMf",
876 fnmul_3 = "1e208800DNMf",
877 fdiv_3 = "1e201800DNMf",
878
879 fmadd_4 = "1f000000DNMAf",
880 fmsub_4 = "1f008000DNMAf",
881 fnmadd_4 = "1f200000DNMAf",
882 fnmsub_4 = "1f208000DNMAf",
883
884 fmax_3 = "1e204800DNMf",
885 fmaxnm_3 = "1e206800DNMf",
886 fmin_3 = "1e205800DNMf",
887 fminnm_3 = "1e207800DNMf",
888
889 fcmp_2 = "1e202000NMf|1e202008NZf",
890 fcmpe_2 = "1e202010NMf|1e202018NZf",
891
892 fccmp_4 = "1e200400NMVCf",
893 fccmpe_4 = "1e200410NMVCf",
894
895 fcsel_4 = "1e200c00DNMCf",
896
897 -- TODO: crc32*, aes*, sha*, pmull
898 -- TODO: SIMD instructions.
899}
900
901for cond,c in pairs(map_cond) do
902 map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
903end
904
905------------------------------------------------------------------------------
906
907-- Handle opcodes defined with template strings.
908local function parse_template(params, template, nparams, pos)
909 local op = tonumber(template:sub(1, 8), 16)
910 local n = 1
911 local rtt = {}
912
913 parse_reg_type = false
914
915 -- Process each character.
916 for p in gmatch(template:sub(9), ".") do
917 local q = params[n]
918 if p == "D" then
919 op = op + parse_reg(q, 0); n = n + 1
920 elseif p == "N" then
921 op = op + parse_reg(q, 5); n = n + 1
922 elseif p == "M" then
923 op = op + parse_reg(q, 16); n = n + 1
924 elseif p == "A" then
925 op = op + parse_reg(q, 10); n = n + 1
926 elseif p == "m" then
927 op = op + parse_reg(params[n-1], 16)
928
929 elseif p == "p" then
930 if q == "sp" then params[n] = "@x31" end
931 elseif p == "g" then
932 if parse_reg_type == "x" then
933 op = op + 0x80000000
934 elseif parse_reg_type ~= "w" then
935 werror("bad register type")
936 end
937 parse_reg_type = false
938 elseif p == "f" then
939 if parse_reg_type == "d" then
940 op = op + 0x00400000
941 elseif parse_reg_type ~= "s" then
942 werror("bad register type")
943 end
944 parse_reg_type = false
945 elseif p == "x" or p == "w" or p == "d" or p == "s" then
946 if parse_reg_type ~= p then
947 werror("register size mismatch")
948 end
949 parse_reg_type = false
950
951 elseif p == "L" then
952 op = parse_load(params, nparams, n, op)
953 elseif p == "P" then
954 op = parse_load_pair(params, nparams, n, op)
955
956 elseif p == "B" then
957 local mode, v, s = parse_label(q, false); n = n + 1
958 if not mode then werror("bad label `"..q.."'") end
959 local m = branch_type(op)
960 if mode == "A" then
961 waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s))
962 actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s)
963 else
964 waction("REL_"..mode, v+m, s, 1)
965 end
966
967 elseif p == "I" then
968 op = op + parse_imm12(q); n = n + 1
969 elseif p == "i" then
970 op = op + parse_imm13(q); n = n + 1
971 elseif p == "W" then
972 op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
973 elseif p == "T" then
974 op = op + parse_imm6(q); n = n + 1
975 elseif p == "1" then
976 op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
977 elseif p == "2" then
978 op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
979 elseif p == "5" then
980 op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
981 elseif p == "V" then
982 op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
983 elseif p == "F" then
984 op = op + parse_fpimm(q); n = n + 1
985 elseif p == "Z" then
986 if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
987 n = n + 1
988
989 elseif p == "S" then
990 op = op + parse_shift(q); n = n + 1
991 elseif p == "X" then
992 op = op + parse_extend(q); n = n + 1
993 elseif p == "R" then
994 op = op + parse_lslx16(q); n = n + 1
995 elseif p == "C" then
996 op = op + parse_cond(q, 0); n = n + 1
997 elseif p == "c" then
998 op = op + parse_cond(q, 1); n = n + 1
999
1000 else
1001 assert(false)
1002 end
1003 end
1004 wputpos(pos, op)
1005end
1006
1007function op_template(params, template, nparams)
1008 if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
1009
1010 -- Limit number of section buffer positions used by a single dasm_put().
1011 -- A single opcode needs a maximum of 4 positions.
1012 if secpos+4 > maxsecpos then wflush() end
1013 local pos = wpos()
1014 local lpos, apos, spos = #actlist, #actargs, secpos
1015
1016 local ok, err
1017 for t in gmatch(template, "[^|]+") do
1018 ok, err = pcall(parse_template, params, t, nparams, pos)
1019 if ok then return end
1020 secpos = spos
1021 actlist[lpos+1] = nil
1022 actlist[lpos+2] = nil
1023 actlist[lpos+3] = nil
1024 actlist[lpos+4] = nil
1025 actargs[apos+1] = nil
1026 actargs[apos+2] = nil
1027 actargs[apos+3] = nil
1028 actargs[apos+4] = nil
1029 end
1030 error(err, 0)
1031end
1032
1033map_op[".template__"] = op_template
1034
1035------------------------------------------------------------------------------
1036
1037-- Pseudo-opcode to mark the position where the action list is to be emitted.
1038map_op[".actionlist_1"] = function(params)
1039 if not params then return "cvar" end
1040 local name = params[1] -- No syntax check. You get to keep the pieces.
1041 wline(function(out) writeactions(out, name) end)
1042end
1043
1044-- Pseudo-opcode to mark the position where the global enum is to be emitted.
1045map_op[".globals_1"] = function(params)
1046 if not params then return "prefix" end
1047 local prefix = params[1] -- No syntax check. You get to keep the pieces.
1048 wline(function(out) writeglobals(out, prefix) end)
1049end
1050
1051-- Pseudo-opcode to mark the position where the global names are to be emitted.
1052map_op[".globalnames_1"] = function(params)
1053 if not params then return "cvar" end
1054 local name = params[1] -- No syntax check. You get to keep the pieces.
1055 wline(function(out) writeglobalnames(out, name) end)
1056end
1057
1058-- Pseudo-opcode to mark the position where the extern names are to be emitted.
1059map_op[".externnames_1"] = function(params)
1060 if not params then return "cvar" end
1061 local name = params[1] -- No syntax check. You get to keep the pieces.
1062 wline(function(out) writeexternnames(out, name) end)
1063end
1064
1065------------------------------------------------------------------------------
1066
1067-- Label pseudo-opcode (converted from trailing colon form).
1068map_op[".label_1"] = function(params)
1069 if not params then return "[1-9] | ->global | =>pcexpr" end
1070 if secpos+1 > maxsecpos then wflush() end
1071 local mode, n, s = parse_label(params[1], true)
1072 if not mode or mode == "EXT" then werror("bad label definition") end
1073 waction("LABEL_"..mode, n, s, 1)
1074end
1075
1076------------------------------------------------------------------------------
1077
1078-- Pseudo-opcodes for data storage.
1079local function op_data(params)
1080 if not params then return "imm..." end
1081 local sz = params.op == ".long" and 4 or 8
1082 for _,p in ipairs(params) do
1083 local imm = parse_number(p)
1084 if imm then
1085 local n = tobit(imm)
1086 if n == imm or (n < 0 and n + 2^32 == imm) then
1087 wputw(n < 0 and n + 2^32 or n)
1088 if sz == 8 then
1089 wputw(imm < 0 and 0xffffffff or 0)
1090 end
1091 elseif sz == 4 then
1092 werror("bad immediate `"..p.."'")
1093 else
1094 imm = nil
1095 end
1096 end
1097 if not imm then
1098 local mode, v, s = parse_label(p, false)
1099 if sz == 4 then
1100 if mode then werror("label does not fit into .long") end
1101 waction("IMMV", 0, p)
1102 elseif mode and mode ~= "A" then
1103 waction("REL_"..mode, v+0x8000, s, 1)
1104 else
1105 if mode == "A" then p = s end
1106 waction("IMMV", 0, format("(unsigned int)(%s)", p))
1107 waction("IMMV", 0, format("(unsigned int)((unsigned long long)(%s)>>32)", p))
1108 end
1109 end
1110 if secpos+2 > maxsecpos then wflush() end
1111 end
1112end
1113map_op[".long_*"] = op_data
1114map_op[".quad_*"] = op_data
1115map_op[".addr_*"] = op_data
1116
1117-- Alignment pseudo-opcode.
1118map_op[".align_1"] = function(params)
1119 if not params then return "numpow2" end
1120 if secpos+1 > maxsecpos then wflush() end
1121 local align = tonumber(params[1])
1122 if align then
1123 local x = align
1124 -- Must be a power of 2 in the range (2 ... 256).
1125 for i=1,8 do
1126 x = x / 2
1127 if x == 1 then
1128 waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
1129 return
1130 end
1131 end
1132 end
1133 werror("bad alignment")
1134end
1135
1136------------------------------------------------------------------------------
1137
1138-- Pseudo-opcode for (primitive) type definitions (map to C types).
1139map_op[".type_3"] = function(params, nparams)
1140 if not params then
1141 return nparams == 2 and "name, ctype" or "name, ctype, reg"
1142 end
1143 local name, ctype, reg = params[1], params[2], params[3]
1144 if not match(name, "^[%a_][%w_]*$") then
1145 werror("bad type name `"..name.."'")
1146 end
1147 local tp = map_type[name]
1148 if tp then
1149 werror("duplicate type `"..name.."'")
1150 end
1151 -- Add #type to defines. A bit unclean to put it in map_archdef.
1152 map_archdef["#"..name] = "sizeof("..ctype..")"
1153 -- Add new type and emit shortcut define.
1154 local num = ctypenum + 1
1155 map_type[name] = {
1156 ctype = ctype,
1157 ctypefmt = format("Dt%X(%%s)", num),
1158 reg = reg,
1159 }
1160 wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
1161 ctypenum = num
1162end
1163map_op[".type_2"] = map_op[".type_3"]
1164
1165-- Dump type definitions.
1166local function dumptypes(out, lvl)
1167 local t = {}
1168 for name in pairs(map_type) do t[#t+1] = name end
1169 sort(t)
1170 out:write("Type definitions:\n")
1171 for _,name in ipairs(t) do
1172 local tp = map_type[name]
1173 local reg = tp.reg or ""
1174 out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
1175 end
1176 out:write("\n")
1177end
1178
1179------------------------------------------------------------------------------
1180
1181-- Set the current section.
1182function _M.section(num)
1183 waction("SECTION", num)
1184 wflush(true) -- SECTION is a terminal action.
1185end
1186
1187------------------------------------------------------------------------------
1188
1189-- Dump architecture description.
1190function _M.dumparch(out)
1191 out:write(format("DynASM %s version %s, released %s\n\n",
1192 _info.arch, _info.version, _info.release))
1193 dumpactions(out)
1194end
1195
1196-- Dump all user defined elements.
1197function _M.dumpdef(out, lvl)
1198 dumptypes(out, lvl)
1199 dumpglobals(out, lvl)
1200 dumpexterns(out, lvl)
1201end
1202
1203------------------------------------------------------------------------------
1204
1205-- Pass callbacks from/to the DynASM core.
1206function _M.passcb(wl, we, wf, ww)
1207 wline, werror, wfatal, wwarn = wl, we, wf, ww
1208 return wflush
1209end
1210
1211-- Setup the arch-specific module.
1212function _M.setup(arch, opt)
1213 g_arch, g_opt = arch, opt
1214end
1215
1216-- Merge the core maps and the arch-specific maps.
1217function _M.mergemaps(map_coreop, map_def)
1218 setmetatable(map_op, { __index = map_coreop })
1219 setmetatable(map_def, { __index = map_archdef })
1220 return map_op, map_def
1221end
1222
1223return _M
1224
1225------------------------------------------------------------------------------
1226
diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h
index 7f3d6c35..7800e933 100644
--- a/dynasm/dasm_mips.h
+++ b/dynasm/dasm_mips.h
@@ -21,7 +21,7 @@ enum {
21 /* The following actions need a buffer position. */ 21 /* The following actions need a buffer position. */
22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, 22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
23 /* The following actions also have an argument. */ 23 /* The following actions also have an argument. */
24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, 24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
25 DASM__MAX 25 DASM__MAX
26}; 26};
27 27
@@ -69,7 +69,7 @@ struct dasm_State {
69 size_t lgsize; 69 size_t lgsize;
70 int *pclabels; /* PC label chains/pos ptrs. */ 70 int *pclabels; /* PC label chains/pos ptrs. */
71 size_t pcsize; 71 size_t pcsize;
72 void **globals; /* Array of globals (bias -10). */ 72 void **globals; /* Array of globals. */
73 dasm_Section *section; /* Pointer to active section. */ 73 dasm_Section *section; /* Pointer to active section. */
74 size_t codesize; /* Total size of all code sections. */ 74 size_t codesize; /* Total size of all code sections. */
75 int maxsection; /* 0 <= sectionidx < maxsection. */ 75 int maxsection; /* 0 <= sectionidx < maxsection. */
@@ -86,7 +86,6 @@ void dasm_init(Dst_DECL, int maxsection)
86{ 86{
87 dasm_State *D; 87 dasm_State *D;
88 size_t psz = 0; 88 size_t psz = 0;
89 int i;
90 Dst_REF = NULL; 89 Dst_REF = NULL;
91 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); 90 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
92 D = Dst_REF; 91 D = Dst_REF;
@@ -97,12 +96,7 @@ void dasm_init(Dst_DECL, int maxsection)
97 D->pcsize = 0; 96 D->pcsize = 0;
98 D->globals = NULL; 97 D->globals = NULL;
99 D->maxsection = maxsection; 98 D->maxsection = maxsection;
100 for (i = 0; i < maxsection; i++) { 99 memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
101 D->sections[i].buf = NULL; /* Need this for pass3. */
102 D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
103 D->sections[i].bsize = 0;
104 D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
105 }
106} 100}
107 101
108/* Free DynASM state. */ 102/* Free DynASM state. */
@@ -122,7 +116,7 @@ void dasm_free(Dst_DECL)
122void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) 116void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
123{ 117{
124 dasm_State *D = Dst_REF; 118 dasm_State *D = Dst_REF;
125 D->globals = gl - 10; /* Negative bias to compensate for locals. */ 119 D->globals = gl;
126 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); 120 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
127} 121}
128 122
@@ -147,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
147 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); 141 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
148 for (i = 0; i < D->maxsection; i++) { 142 for (i = 0; i < D->maxsection; i++) {
149 D->sections[i].pos = DASM_SEC2POS(i); 143 D->sections[i].pos = DASM_SEC2POS(i);
144 D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
150 D->sections[i].ofs = 0; 145 D->sections[i].ofs = 0;
151 } 146 }
152} 147}
@@ -155,10 +150,10 @@ void dasm_setup(Dst_DECL, const void *actionlist)
155#ifdef DASM_CHECKS 150#ifdef DASM_CHECKS
156#define CK(x, st) \ 151#define CK(x, st) \
157 do { if (!(x)) { \ 152 do { if (!(x)) { \
158 D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) 153 D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
159#define CKPL(kind, st) \ 154#define CKPL(kind, st) \
160 do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ 155 do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
161 D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) 156 D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
162#else 157#else
163#define CK(x, st) ((void)0) 158#define CK(x, st) ((void)0)
164#define CKPL(kind, st) ((void)0) 159#define CKPL(kind, st) ((void)0)
@@ -231,7 +226,7 @@ void dasm_put(Dst_DECL, int start, ...)
231 *pl = -pos; /* Label exists now. */ 226 *pl = -pos; /* Label exists now. */
232 b[pos++] = ofs; /* Store pass1 offset estimate. */ 227 b[pos++] = ofs; /* Store pass1 offset estimate. */
233 break; 228 break;
234 case DASM_IMM: 229 case DASM_IMM: case DASM_IMMS:
235#ifdef DASM_CHECKS 230#ifdef DASM_CHECKS
236 CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); 231 CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
237#endif 232#endif
@@ -273,7 +268,7 @@ int dasm_link(Dst_DECL, size_t *szp)
273 268
274 { /* Handle globals not defined in this translation unit. */ 269 { /* Handle globals not defined in this translation unit. */
275 int idx; 270 int idx;
276 for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { 271 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
277 int n = D->lglabels[idx]; 272 int n = D->lglabels[idx];
278 /* Undefined label: Collapse rel chain and replace with marker (< 0). */ 273 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
279 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } 274 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -299,7 +294,7 @@ int dasm_link(Dst_DECL, size_t *szp)
299 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; 294 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
300 case DASM_REL_LG: case DASM_REL_PC: pos++; break; 295 case DASM_REL_LG: case DASM_REL_PC: pos++; break;
301 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; 296 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
302 case DASM_IMM: pos++; break; 297 case DASM_IMM: case DASM_IMMS: pos++; break;
303 } 298 }
304 } 299 }
305 stop: (void)0; 300 stop: (void)0;
@@ -314,7 +309,7 @@ int dasm_link(Dst_DECL, size_t *szp)
314 309
315#ifdef DASM_CHECKS 310#ifdef DASM_CHECKS
316#define CK(x, st) \ 311#define CK(x, st) \
317 do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) 312 do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
318#else 313#else
319#define CK(x, st) ((void)0) 314#define CK(x, st) ((void)0)
320#endif 315#endif
@@ -349,25 +344,32 @@ int dasm_encode(Dst_DECL, void *buffer)
349 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; 344 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
350 break; 345 break;
351 case DASM_REL_LG: 346 case DASM_REL_LG:
352 CK(n >= 0, UNDEF_LG); 347 if (n < 0) {
348 n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp);
349 goto patchrel;
350 }
353 /* fallthrough */ 351 /* fallthrough */
354 case DASM_REL_PC: 352 case DASM_REL_PC:
355 CK(n >= 0, UNDEF_PC); 353 CK(n >= 0, UNDEF_PC);
356 n = *DASM_POS2PTR(D, n); 354 n = *DASM_POS2PTR(D, n);
357 if (ins & 2048) 355 if (ins & 2048)
358 n = n - (int)((char *)cp - base); 356 n = (n + (int)(size_t)base) & 0x0fffffff;
359 else 357 else
360 n = (n + (int)base) & 0x0fffffff; 358 n = n - (int)((char *)cp - base);
361 patchrel: 359 patchrel: {
360 unsigned int e = 16 + ((ins >> 12) & 15);
362 CK((n & 3) == 0 && 361 CK((n & 3) == 0 &&
363 ((n + ((ins & 2048) ? 0x00020000 : 0)) >> 362 ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL);
364 ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL); 363 cp[-1] |= ((n>>2) & ((1<<e)-1));
365 cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff)); 364 }
366 break; 365 break;
367 case DASM_LABEL_LG: 366 case DASM_LABEL_LG:
368 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); 367 ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
369 break; 368 break;
370 case DASM_LABEL_PC: break; 369 case DASM_LABEL_PC: break;
370 case DASM_IMMS:
371 cp[-1] |= ((n>>3) & 4); n &= 0x1f;
372 /* fallthrough */
371 case DASM_IMM: 373 case DASM_IMM:
372 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); 374 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
373 break; 375 break;
@@ -410,7 +412,7 @@ int dasm_checkstep(Dst_DECL, int secmatch)
410 } 412 }
411 if (D->status == DASM_S_OK && secmatch >= 0 && 413 if (D->status == DASM_S_OK && secmatch >= 0 &&
412 D->section != &D->sections[secmatch]) 414 D->section != &D->sections[secmatch])
413 D->status = DASM_S_MATCH_SEC|(D->section-D->sections); 415 D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
414 return D->status; 416 return D->status;
415} 417}
416#endif 418#endif
diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua
index e2ff17f0..1c605b68 100644
--- a/dynasm/dasm_mips.lua
+++ b/dynasm/dasm_mips.lua
@@ -1,17 +1,20 @@
1------------------------------------------------------------------------------ 1------------------------------------------------------------------------------
2-- DynASM MIPS module. 2-- DynASM MIPS32/MIPS64 module.
3-- 3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. 4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice. 5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------ 6------------------------------------------------------------------------------
7 7
8local mips64 = mips64
9local mipsr6 = _map_def.MIPSR6
10
8-- Module information: 11-- Module information:
9local _info = { 12local _info = {
10 arch = "mips", 13 arch = mips64 and "mips64" or "mips",
11 description = "DynASM MIPS module", 14 description = "DynASM MIPS32/MIPS64 module",
12 version = "1.3.0", 15 version = "1.5.0",
13 vernum = 10300, 16 vernum = 10500,
14 release = "2012-01-23", 17 release = "2021-05-02",
15 author = "Mike Pall", 18 author = "Mike Pall",
16 license = "MIT", 19 license = "MIT",
17} 20}
@@ -27,7 +30,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
27local match, gmatch = _s.match, _s.gmatch 30local match, gmatch = _s.match, _s.gmatch
28local concat, sort = table.concat, table.sort 31local concat, sort = table.concat, table.sort
29local bit = bit or require("bit") 32local bit = bit or require("bit")
30local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex 33local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
34local tohex = bit.tohex
31 35
32-- Inherited tables and callbacks. 36-- Inherited tables and callbacks.
33local g_opt, g_arch 37local g_opt, g_arch
@@ -38,7 +42,7 @@ local wline, werror, wfatal, wwarn
38local action_names = { 42local action_names = {
39 "STOP", "SECTION", "ESC", "REL_EXT", 43 "STOP", "SECTION", "ESC", "REL_EXT",
40 "ALIGN", "REL_LG", "LABEL_LG", 44 "ALIGN", "REL_LG", "LABEL_LG",
41 "REL_PC", "LABEL_PC", "IMM", 45 "REL_PC", "LABEL_PC", "IMM", "IMMS",
42} 46}
43 47
44-- Maximum number of section buffer positions for dasm_put(). 48-- Maximum number of section buffer positions for dasm_put().
@@ -235,7 +239,6 @@ local map_op = {
235 bne_3 = "14000000STB", 239 bne_3 = "14000000STB",
236 blez_2 = "18000000SB", 240 blez_2 = "18000000SB",
237 bgtz_2 = "1c000000SB", 241 bgtz_2 = "1c000000SB",
238 addi_3 = "20000000TSI",
239 li_2 = "24000000TI", 242 li_2 = "24000000TI",
240 addiu_3 = "24000000TSI", 243 addiu_3 = "24000000TSI",
241 slti_3 = "28000000TSI", 244 slti_3 = "28000000TSI",
@@ -245,70 +248,52 @@ local map_op = {
245 ori_3 = "34000000TSU", 248 ori_3 = "34000000TSU",
246 xori_3 = "38000000TSU", 249 xori_3 = "38000000TSU",
247 lui_2 = "3c000000TU", 250 lui_2 = "3c000000TU",
248 beqzl_2 = "50000000SB", 251 daddiu_3 = mips64 and "64000000TSI",
249 beql_3 = "50000000STB", 252 ldl_2 = mips64 and "68000000TO",
250 bnezl_2 = "54000000SB", 253 ldr_2 = mips64 and "6c000000TO",
251 bnel_3 = "54000000STB",
252 blezl_2 = "58000000SB",
253 bgtzl_2 = "5c000000SB",
254 lb_2 = "80000000TO", 254 lb_2 = "80000000TO",
255 lh_2 = "84000000TO", 255 lh_2 = "84000000TO",
256 lwl_2 = "88000000TO",
257 lw_2 = "8c000000TO", 256 lw_2 = "8c000000TO",
258 lbu_2 = "90000000TO", 257 lbu_2 = "90000000TO",
259 lhu_2 = "94000000TO", 258 lhu_2 = "94000000TO",
260 lwr_2 = "98000000TO", 259 lwu_2 = mips64 and "9c000000TO",
261 sb_2 = "a0000000TO", 260 sb_2 = "a0000000TO",
262 sh_2 = "a4000000TO", 261 sh_2 = "a4000000TO",
263 swl_2 = "a8000000TO",
264 sw_2 = "ac000000TO", 262 sw_2 = "ac000000TO",
265 swr_2 = "b8000000TO",
266 cache_2 = "bc000000NO",
267 ll_2 = "c0000000TO",
268 lwc1_2 = "c4000000HO", 263 lwc1_2 = "c4000000HO",
269 pref_2 = "cc000000NO",
270 ldc1_2 = "d4000000HO", 264 ldc1_2 = "d4000000HO",
271 sc_2 = "e0000000TO", 265 ld_2 = mips64 and "dc000000TO",
272 swc1_2 = "e4000000HO", 266 swc1_2 = "e4000000HO",
273 sdc1_2 = "f4000000HO", 267 sdc1_2 = "f4000000HO",
268 sd_2 = mips64 and "fc000000TO",
274 269
275 -- Opcode SPECIAL. 270 -- Opcode SPECIAL.
276 nop_0 = "00000000", 271 nop_0 = "00000000",
277 sll_3 = "00000000DTA", 272 sll_3 = "00000000DTA",
278 movf_2 = "00000001DS", 273 sextw_2 = "00000000DT",
279 movf_3 = "00000001DSC",
280 movt_2 = "00010001DS",
281 movt_3 = "00010001DSC",
282 srl_3 = "00000002DTA", 274 srl_3 = "00000002DTA",
283 rotr_3 = "00200002DTA", 275 rotr_3 = "00200002DTA",
284 sra_3 = "00000003DTA", 276 sra_3 = "00000003DTA",
285 sllv_3 = "00000004DTS", 277 sllv_3 = "00000004DTS",
286 srlv_3 = "00000006DTS", 278 srlv_3 = "00000006DTS",
287 rotrv_3 = "00000046DTS", 279 rotrv_3 = "00000046DTS",
280 drotrv_3 = mips64 and "00000056DTS",
288 srav_3 = "00000007DTS", 281 srav_3 = "00000007DTS",
289 jr_1 = "00000008S",
290 jalr_1 = "0000f809S", 282 jalr_1 = "0000f809S",
291 jalr_2 = "00000009DS", 283 jalr_2 = "00000009DS",
292 movz_3 = "0000000aDST",
293 movn_3 = "0000000bDST",
294 syscall_0 = "0000000c", 284 syscall_0 = "0000000c",
295 syscall_1 = "0000000cY", 285 syscall_1 = "0000000cY",
296 break_0 = "0000000d", 286 break_0 = "0000000d",
297 break_1 = "0000000dY", 287 break_1 = "0000000dY",
298 sync_0 = "0000000f", 288 sync_0 = "0000000f",
299 mfhi_1 = "00000010D", 289 dsllv_3 = mips64 and "00000014DTS",
300 mthi_1 = "00000011S", 290 dsrlv_3 = mips64 and "00000016DTS",
301 mflo_1 = "00000012D", 291 dsrav_3 = mips64 and "00000017DTS",
302 mtlo_1 = "00000013S",
303 mult_2 = "00000018ST",
304 multu_2 = "00000019ST",
305 div_2 = "0000001aST",
306 divu_2 = "0000001bST",
307 add_3 = "00000020DST", 292 add_3 = "00000020DST",
308 move_2 = "00000021DS", 293 move_2 = mips64 and "00000025DS" or "00000021DS",
309 addu_3 = "00000021DST", 294 addu_3 = "00000021DST",
310 sub_3 = "00000022DST", 295 sub_3 = "00000022DST",
311 negu_2 = "00000023DT", 296 negu_2 = mips64 and "0000002fDT" or "00000023DT",
312 subu_3 = "00000023DST", 297 subu_3 = "00000023DST",
313 and_3 = "00000024DST", 298 and_3 = "00000024DST",
314 or_3 = "00000025DST", 299 or_3 = "00000025DST",
@@ -317,6 +302,10 @@ local map_op = {
317 nor_3 = "00000027DST", 302 nor_3 = "00000027DST",
318 slt_3 = "0000002aDST", 303 slt_3 = "0000002aDST",
319 sltu_3 = "0000002bDST", 304 sltu_3 = "0000002bDST",
305 dadd_3 = mips64 and "0000002cDST",
306 daddu_3 = mips64 and "0000002dDST",
307 dsub_3 = mips64 and "0000002eDST",
308 dsubu_3 = mips64 and "0000002fDST",
320 tge_2 = "00000030ST", 309 tge_2 = "00000030ST",
321 tge_3 = "00000030STZ", 310 tge_3 = "00000030STZ",
322 tgeu_2 = "00000031ST", 311 tgeu_2 = "00000031ST",
@@ -329,40 +318,36 @@ local map_op = {
329 teq_3 = "00000034STZ", 318 teq_3 = "00000034STZ",
330 tne_2 = "00000036ST", 319 tne_2 = "00000036ST",
331 tne_3 = "00000036STZ", 320 tne_3 = "00000036STZ",
321 dsll_3 = mips64 and "00000038DTa",
322 dsrl_3 = mips64 and "0000003aDTa",
323 drotr_3 = mips64 and "0020003aDTa",
324 dsra_3 = mips64 and "0000003bDTa",
325 dsll32_3 = mips64 and "0000003cDTA",
326 dsrl32_3 = mips64 and "0000003eDTA",
327 drotr32_3 = mips64 and "0020003eDTA",
328 dsra32_3 = mips64 and "0000003fDTA",
332 329
333 -- Opcode REGIMM. 330 -- Opcode REGIMM.
334 bltz_2 = "04000000SB", 331 bltz_2 = "04000000SB",
335 bgez_2 = "04010000SB", 332 bgez_2 = "04010000SB",
336 bltzl_2 = "04020000SB", 333 bltzl_2 = "04020000SB",
337 bgezl_2 = "04030000SB", 334 bgezl_2 = "04030000SB",
338 tgei_2 = "04080000SI",
339 tgeiu_2 = "04090000SI",
340 tlti_2 = "040a0000SI",
341 tltiu_2 = "040b0000SI",
342 teqi_2 = "040c0000SI",
343 tnei_2 = "040e0000SI",
344 bltzal_2 = "04100000SB",
345 bal_1 = "04110000B", 335 bal_1 = "04110000B",
346 bgezal_2 = "04110000SB",
347 bltzall_2 = "04120000SB",
348 bgezall_2 = "04130000SB",
349 synci_1 = "041f0000O", 336 synci_1 = "041f0000O",
350 337
351 -- Opcode SPECIAL2.
352 madd_2 = "70000000ST",
353 maddu_2 = "70000001ST",
354 mul_3 = "70000002DST",
355 msub_2 = "70000004ST",
356 msubu_2 = "70000005ST",
357 clz_2 = "70000020DS=",
358 clo_2 = "70000021DS=",
359 sdbbp_0 = "7000003f",
360 sdbbp_1 = "7000003fY",
361
362 -- Opcode SPECIAL3. 338 -- Opcode SPECIAL3.
363 ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 339 ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1
340 dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32
341 dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1
342 dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1
343 zextw_2 = mips64 and "7c00f803TS",
364 ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 344 ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1
345 dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33
346 dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33
347 dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1
365 wsbh_2 = "7c0000a0DT", 348 wsbh_2 = "7c0000a0DT",
349 dsbh_2 = mips64 and "7c0000a4DT",
350 dshd_2 = mips64 and "7c000164DT",
366 seb_2 = "7c000420DT", 351 seb_2 = "7c000420DT",
367 seh_2 = "7c000620DT", 352 seh_2 = "7c000620DT",
368 rdhwr_2 = "7c00003bTD", 353 rdhwr_2 = "7c00003bTD",
@@ -370,8 +355,12 @@ local map_op = {
370 -- Opcode COP0. 355 -- Opcode COP0.
371 mfc0_2 = "40000000TD", 356 mfc0_2 = "40000000TD",
372 mfc0_3 = "40000000TDW", 357 mfc0_3 = "40000000TDW",
358 dmfc0_2 = mips64 and "40200000TD",
359 dmfc0_3 = mips64 and "40200000TDW",
373 mtc0_2 = "40800000TD", 360 mtc0_2 = "40800000TD",
374 mtc0_3 = "40800000TDW", 361 mtc0_3 = "40800000TDW",
362 dmtc0_2 = mips64 and "40a00000TD",
363 dmtc0_3 = mips64 and "40a00000TDW",
375 rdpgpr_2 = "41400000DT", 364 rdpgpr_2 = "41400000DT",
376 di_0 = "41606000", 365 di_0 = "41606000",
377 di_1 = "41606000T", 366 di_1 = "41606000T",
@@ -388,21 +377,14 @@ local map_op = {
388 377
389 -- Opcode COP1. 378 -- Opcode COP1.
390 mfc1_2 = "44000000TG", 379 mfc1_2 = "44000000TG",
380 dmfc1_2 = mips64 and "44200000TG",
391 cfc1_2 = "44400000TG", 381 cfc1_2 = "44400000TG",
392 mfhc1_2 = "44600000TG", 382 mfhc1_2 = "44600000TG",
393 mtc1_2 = "44800000TG", 383 mtc1_2 = "44800000TG",
384 dmtc1_2 = mips64 and "44a00000TG",
394 ctc1_2 = "44c00000TG", 385 ctc1_2 = "44c00000TG",
395 mthc1_2 = "44e00000TG", 386 mthc1_2 = "44e00000TG",
396 387
397 bc1f_1 = "45000000B",
398 bc1f_2 = "45000000CB",
399 bc1t_1 = "45010000B",
400 bc1t_2 = "45010000CB",
401 bc1fl_1 = "45020000B",
402 bc1fl_2 = "45020000CB",
403 bc1tl_1 = "45030000B",
404 bc1tl_2 = "45030000CB",
405
406 ["add.s_3"] = "46000000FGH", 388 ["add.s_3"] = "46000000FGH",
407 ["sub.s_3"] = "46000001FGH", 389 ["sub.s_3"] = "46000001FGH",
408 ["mul.s_3"] = "46000002FGH", 390 ["mul.s_3"] = "46000002FGH",
@@ -419,51 +401,11 @@ local map_op = {
419 ["trunc.w.s_2"] = "4600000dFG", 401 ["trunc.w.s_2"] = "4600000dFG",
420 ["ceil.w.s_2"] = "4600000eFG", 402 ["ceil.w.s_2"] = "4600000eFG",
421 ["floor.w.s_2"] = "4600000fFG", 403 ["floor.w.s_2"] = "4600000fFG",
422 ["movf.s_2"] = "46000011FG",
423 ["movf.s_3"] = "46000011FGC",
424 ["movt.s_2"] = "46010011FG",
425 ["movt.s_3"] = "46010011FGC",
426 ["movz.s_3"] = "46000012FGT",
427 ["movn.s_3"] = "46000013FGT",
428 ["recip.s_2"] = "46000015FG", 404 ["recip.s_2"] = "46000015FG",
429 ["rsqrt.s_2"] = "46000016FG", 405 ["rsqrt.s_2"] = "46000016FG",
430 ["cvt.d.s_2"] = "46000021FG", 406 ["cvt.d.s_2"] = "46000021FG",
431 ["cvt.w.s_2"] = "46000024FG", 407 ["cvt.w.s_2"] = "46000024FG",
432 ["cvt.l.s_2"] = "46000025FG", 408 ["cvt.l.s_2"] = "46000025FG",
433 ["cvt.ps.s_3"] = "46000026FGH",
434 ["c.f.s_2"] = "46000030GH",
435 ["c.f.s_3"] = "46000030VGH",
436 ["c.un.s_2"] = "46000031GH",
437 ["c.un.s_3"] = "46000031VGH",
438 ["c.eq.s_2"] = "46000032GH",
439 ["c.eq.s_3"] = "46000032VGH",
440 ["c.ueq.s_2"] = "46000033GH",
441 ["c.ueq.s_3"] = "46000033VGH",
442 ["c.olt.s_2"] = "46000034GH",
443 ["c.olt.s_3"] = "46000034VGH",
444 ["c.ult.s_2"] = "46000035GH",
445 ["c.ult.s_3"] = "46000035VGH",
446 ["c.ole.s_2"] = "46000036GH",
447 ["c.ole.s_3"] = "46000036VGH",
448 ["c.ule.s_2"] = "46000037GH",
449 ["c.ule.s_3"] = "46000037VGH",
450 ["c.sf.s_2"] = "46000038GH",
451 ["c.sf.s_3"] = "46000038VGH",
452 ["c.ngle.s_2"] = "46000039GH",
453 ["c.ngle.s_3"] = "46000039VGH",
454 ["c.seq.s_2"] = "4600003aGH",
455 ["c.seq.s_3"] = "4600003aVGH",
456 ["c.ngl.s_2"] = "4600003bGH",
457 ["c.ngl.s_3"] = "4600003bVGH",
458 ["c.lt.s_2"] = "4600003cGH",
459 ["c.lt.s_3"] = "4600003cVGH",
460 ["c.nge.s_2"] = "4600003dGH",
461 ["c.nge.s_3"] = "4600003dVGH",
462 ["c.le.s_2"] = "4600003eGH",
463 ["c.le.s_3"] = "4600003eVGH",
464 ["c.ngt.s_2"] = "4600003fGH",
465 ["c.ngt.s_3"] = "4600003fVGH",
466
467 ["add.d_3"] = "46200000FGH", 409 ["add.d_3"] = "46200000FGH",
468 ["sub.d_3"] = "46200001FGH", 410 ["sub.d_3"] = "46200001FGH",
469 ["mul.d_3"] = "46200002FGH", 411 ["mul.d_3"] = "46200002FGH",
@@ -480,130 +422,410 @@ local map_op = {
480 ["trunc.w.d_2"] = "4620000dFG", 422 ["trunc.w.d_2"] = "4620000dFG",
481 ["ceil.w.d_2"] = "4620000eFG", 423 ["ceil.w.d_2"] = "4620000eFG",
482 ["floor.w.d_2"] = "4620000fFG", 424 ["floor.w.d_2"] = "4620000fFG",
483 ["movf.d_2"] = "46200011FG",
484 ["movf.d_3"] = "46200011FGC",
485 ["movt.d_2"] = "46210011FG",
486 ["movt.d_3"] = "46210011FGC",
487 ["movz.d_3"] = "46200012FGT",
488 ["movn.d_3"] = "46200013FGT",
489 ["recip.d_2"] = "46200015FG", 425 ["recip.d_2"] = "46200015FG",
490 ["rsqrt.d_2"] = "46200016FG", 426 ["rsqrt.d_2"] = "46200016FG",
491 ["cvt.s.d_2"] = "46200020FG", 427 ["cvt.s.d_2"] = "46200020FG",
492 ["cvt.w.d_2"] = "46200024FG", 428 ["cvt.w.d_2"] = "46200024FG",
493 ["cvt.l.d_2"] = "46200025FG", 429 ["cvt.l.d_2"] = "46200025FG",
494 ["c.f.d_2"] = "46200030GH",
495 ["c.f.d_3"] = "46200030VGH",
496 ["c.un.d_2"] = "46200031GH",
497 ["c.un.d_3"] = "46200031VGH",
498 ["c.eq.d_2"] = "46200032GH",
499 ["c.eq.d_3"] = "46200032VGH",
500 ["c.ueq.d_2"] = "46200033GH",
501 ["c.ueq.d_3"] = "46200033VGH",
502 ["c.olt.d_2"] = "46200034GH",
503 ["c.olt.d_3"] = "46200034VGH",
504 ["c.ult.d_2"] = "46200035GH",
505 ["c.ult.d_3"] = "46200035VGH",
506 ["c.ole.d_2"] = "46200036GH",
507 ["c.ole.d_3"] = "46200036VGH",
508 ["c.ule.d_2"] = "46200037GH",
509 ["c.ule.d_3"] = "46200037VGH",
510 ["c.sf.d_2"] = "46200038GH",
511 ["c.sf.d_3"] = "46200038VGH",
512 ["c.ngle.d_2"] = "46200039GH",
513 ["c.ngle.d_3"] = "46200039VGH",
514 ["c.seq.d_2"] = "4620003aGH",
515 ["c.seq.d_3"] = "4620003aVGH",
516 ["c.ngl.d_2"] = "4620003bGH",
517 ["c.ngl.d_3"] = "4620003bVGH",
518 ["c.lt.d_2"] = "4620003cGH",
519 ["c.lt.d_3"] = "4620003cVGH",
520 ["c.nge.d_2"] = "4620003dGH",
521 ["c.nge.d_3"] = "4620003dVGH",
522 ["c.le.d_2"] = "4620003eGH",
523 ["c.le.d_3"] = "4620003eVGH",
524 ["c.ngt.d_2"] = "4620003fGH",
525 ["c.ngt.d_3"] = "4620003fVGH",
526
527 ["add.ps_3"] = "46c00000FGH",
528 ["sub.ps_3"] = "46c00001FGH",
529 ["mul.ps_3"] = "46c00002FGH",
530 ["abs.ps_2"] = "46c00005FG",
531 ["mov.ps_2"] = "46c00006FG",
532 ["neg.ps_2"] = "46c00007FG",
533 ["movf.ps_2"] = "46c00011FG",
534 ["movf.ps_3"] = "46c00011FGC",
535 ["movt.ps_2"] = "46c10011FG",
536 ["movt.ps_3"] = "46c10011FGC",
537 ["movz.ps_3"] = "46c00012FGT",
538 ["movn.ps_3"] = "46c00013FGT",
539 ["cvt.s.pu_2"] = "46c00020FG",
540 ["cvt.s.pl_2"] = "46c00028FG",
541 ["pll.ps_3"] = "46c0002cFGH",
542 ["plu.ps_3"] = "46c0002dFGH",
543 ["pul.ps_3"] = "46c0002eFGH",
544 ["puu.ps_3"] = "46c0002fFGH",
545 ["c.f.ps_2"] = "46c00030GH",
546 ["c.f.ps_3"] = "46c00030VGH",
547 ["c.un.ps_2"] = "46c00031GH",
548 ["c.un.ps_3"] = "46c00031VGH",
549 ["c.eq.ps_2"] = "46c00032GH",
550 ["c.eq.ps_3"] = "46c00032VGH",
551 ["c.ueq.ps_2"] = "46c00033GH",
552 ["c.ueq.ps_3"] = "46c00033VGH",
553 ["c.olt.ps_2"] = "46c00034GH",
554 ["c.olt.ps_3"] = "46c00034VGH",
555 ["c.ult.ps_2"] = "46c00035GH",
556 ["c.ult.ps_3"] = "46c00035VGH",
557 ["c.ole.ps_2"] = "46c00036GH",
558 ["c.ole.ps_3"] = "46c00036VGH",
559 ["c.ule.ps_2"] = "46c00037GH",
560 ["c.ule.ps_3"] = "46c00037VGH",
561 ["c.sf.ps_2"] = "46c00038GH",
562 ["c.sf.ps_3"] = "46c00038VGH",
563 ["c.ngle.ps_2"] = "46c00039GH",
564 ["c.ngle.ps_3"] = "46c00039VGH",
565 ["c.seq.ps_2"] = "46c0003aGH",
566 ["c.seq.ps_3"] = "46c0003aVGH",
567 ["c.ngl.ps_2"] = "46c0003bGH",
568 ["c.ngl.ps_3"] = "46c0003bVGH",
569 ["c.lt.ps_2"] = "46c0003cGH",
570 ["c.lt.ps_3"] = "46c0003cVGH",
571 ["c.nge.ps_2"] = "46c0003dGH",
572 ["c.nge.ps_3"] = "46c0003dVGH",
573 ["c.le.ps_2"] = "46c0003eGH",
574 ["c.le.ps_3"] = "46c0003eVGH",
575 ["c.ngt.ps_2"] = "46c0003fGH",
576 ["c.ngt.ps_3"] = "46c0003fVGH",
577
578 ["cvt.s.w_2"] = "46800020FG", 430 ["cvt.s.w_2"] = "46800020FG",
579 ["cvt.d.w_2"] = "46800021FG", 431 ["cvt.d.w_2"] = "46800021FG",
580
581 ["cvt.s.l_2"] = "46a00020FG", 432 ["cvt.s.l_2"] = "46a00020FG",
582 ["cvt.d.l_2"] = "46a00021FG", 433 ["cvt.d.l_2"] = "46a00021FG",
583
584 -- Opcode COP1X.
585 lwxc1_2 = "4c000000FX",
586 ldxc1_2 = "4c000001FX",
587 luxc1_2 = "4c000005FX",
588 swxc1_2 = "4c000008FX",
589 sdxc1_2 = "4c000009FX",
590 suxc1_2 = "4c00000dFX",
591 prefx_2 = "4c00000fMX",
592 ["alnv.ps_4"] = "4c00001eFGHS",
593 ["madd.s_4"] = "4c000020FRGH",
594 ["madd.d_4"] = "4c000021FRGH",
595 ["madd.ps_4"] = "4c000026FRGH",
596 ["msub.s_4"] = "4c000028FRGH",
597 ["msub.d_4"] = "4c000029FRGH",
598 ["msub.ps_4"] = "4c00002eFRGH",
599 ["nmadd.s_4"] = "4c000030FRGH",
600 ["nmadd.d_4"] = "4c000031FRGH",
601 ["nmadd.ps_4"] = "4c000036FRGH",
602 ["nmsub.s_4"] = "4c000038FRGH",
603 ["nmsub.d_4"] = "4c000039FRGH",
604 ["nmsub.ps_4"] = "4c00003eFRGH",
605} 434}
606 435
436if mipsr6 then -- Instructions added with MIPSR6.
437
438 for k,v in pairs({
439
440 -- Add immediate to upper bits.
441 aui_3 = "3c000000TSI",
442 daui_3 = mips64 and "74000000TSI",
443 dahi_2 = mips64 and "04060000SI",
444 dati_2 = mips64 and "041e0000SI",
445
446 -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc.
447
448 -- Compact branches.
449 blezalc_2 = "18000000TB", -- rt != 0.
450 bgezalc_2 = "18000000T=SB", -- rt != 0.
451 bgtzalc_2 = "1c000000TB", -- rt != 0.
452 bltzalc_2 = "1c000000T=SB", -- rt != 0.
453
454 blezc_2 = "58000000TB", -- rt != 0.
455 bgezc_2 = "58000000T=SB", -- rt != 0.
456 bgec_3 = "58000000STB", -- rs != rt.
457 blec_3 = "58000000TSB", -- rt != rs.
458
459 bgtzc_2 = "5c000000TB", -- rt != 0.
460 bltzc_2 = "5c000000T=SB", -- rt != 0.
461 bltc_3 = "5c000000STB", -- rs != rt.
462 bgtc_3 = "5c000000TSB", -- rt != rs.
463
464 bgeuc_3 = "18000000STB", -- rs != rt.
465 bleuc_3 = "18000000TSB", -- rt != rs.
466 bltuc_3 = "1c000000STB", -- rs != rt.
467 bgtuc_3 = "1c000000TSB", -- rt != rs.
468
469 beqzalc_2 = "20000000TB", -- rt != 0.
470 bnezalc_2 = "60000000TB", -- rt != 0.
471 beqc_3 = "20000000STB", -- rs < rt.
472 bnec_3 = "60000000STB", -- rs < rt.
473 bovc_3 = "20000000STB", -- rs >= rt.
474 bnvc_3 = "60000000STB", -- rs >= rt.
475
476 beqzc_2 = "d8000000SK", -- rs != 0.
477 bnezc_2 = "f8000000SK", -- rs != 0.
478 jic_2 = "d8000000TI",
479 jialc_2 = "f8000000TI",
480 bc_1 = "c8000000L",
481 balc_1 = "e8000000L",
482
483 -- Opcode SPECIAL.
484 jr_1 = "00000009S",
485 sdbbp_0 = "0000000e",
486 sdbbp_1 = "0000000eY",
487 lsa_4 = "00000005DSTA",
488 dlsa_4 = mips64 and "00000015DSTA",
489 seleqz_3 = "00000035DST",
490 selnez_3 = "00000037DST",
491 clz_2 = "00000050DS",
492 clo_2 = "00000051DS",
493 dclz_2 = mips64 and "00000052DS",
494 dclo_2 = mips64 and "00000053DS",
495 mul_3 = "00000098DST",
496 muh_3 = "000000d8DST",
497 mulu_3 = "00000099DST",
498 muhu_3 = "000000d9DST",
499 div_3 = "0000009aDST",
500 mod_3 = "000000daDST",
501 divu_3 = "0000009bDST",
502 modu_3 = "000000dbDST",
503 dmul_3 = mips64 and "0000009cDST",
504 dmuh_3 = mips64 and "000000dcDST",
505 dmulu_3 = mips64 and "0000009dDST",
506 dmuhu_3 = mips64 and "000000ddDST",
507 ddiv_3 = mips64 and "0000009eDST",
508 dmod_3 = mips64 and "000000deDST",
509 ddivu_3 = mips64 and "0000009fDST",
510 dmodu_3 = mips64 and "000000dfDST",
511
512 -- Opcode SPECIAL3.
513 align_4 = "7c000220DSTA",
514 dalign_4 = mips64 and "7c000224DSTA",
515 bitswap_2 = "7c000020DT",
516 dbitswap_2 = mips64 and "7c000024DT",
517
518 -- Opcode COP1.
519 bc1eqz_2 = "45200000HB",
520 bc1nez_2 = "45a00000HB",
521
522 ["sel.s_3"] = "46000010FGH",
523 ["seleqz.s_3"] = "46000014FGH",
524 ["selnez.s_3"] = "46000017FGH",
525 ["maddf.s_3"] = "46000018FGH",
526 ["msubf.s_3"] = "46000019FGH",
527 ["rint.s_2"] = "4600001aFG",
528 ["class.s_2"] = "4600001bFG",
529 ["min.s_3"] = "4600001cFGH",
530 ["mina.s_3"] = "4600001dFGH",
531 ["max.s_3"] = "4600001eFGH",
532 ["maxa.s_3"] = "4600001fFGH",
533 ["cmp.af.s_3"] = "46800000FGH",
534 ["cmp.un.s_3"] = "46800001FGH",
535 ["cmp.or.s_3"] = "46800011FGH",
536 ["cmp.eq.s_3"] = "46800002FGH",
537 ["cmp.une.s_3"] = "46800012FGH",
538 ["cmp.ueq.s_3"] = "46800003FGH",
539 ["cmp.ne.s_3"] = "46800013FGH",
540 ["cmp.lt.s_3"] = "46800004FGH",
541 ["cmp.ult.s_3"] = "46800005FGH",
542 ["cmp.le.s_3"] = "46800006FGH",
543 ["cmp.ule.s_3"] = "46800007FGH",
544 ["cmp.saf.s_3"] = "46800008FGH",
545 ["cmp.sun.s_3"] = "46800009FGH",
546 ["cmp.sor.s_3"] = "46800019FGH",
547 ["cmp.seq.s_3"] = "4680000aFGH",
548 ["cmp.sune.s_3"] = "4680001aFGH",
549 ["cmp.sueq.s_3"] = "4680000bFGH",
550 ["cmp.sne.s_3"] = "4680001bFGH",
551 ["cmp.slt.s_3"] = "4680000cFGH",
552 ["cmp.sult.s_3"] = "4680000dFGH",
553 ["cmp.sle.s_3"] = "4680000eFGH",
554 ["cmp.sule.s_3"] = "4680000fFGH",
555
556 ["sel.d_3"] = "46200010FGH",
557 ["seleqz.d_3"] = "46200014FGH",
558 ["selnez.d_3"] = "46200017FGH",
559 ["maddf.d_3"] = "46200018FGH",
560 ["msubf.d_3"] = "46200019FGH",
561 ["rint.d_2"] = "4620001aFG",
562 ["class.d_2"] = "4620001bFG",
563 ["min.d_3"] = "4620001cFGH",
564 ["mina.d_3"] = "4620001dFGH",
565 ["max.d_3"] = "4620001eFGH",
566 ["maxa.d_3"] = "4620001fFGH",
567 ["cmp.af.d_3"] = "46a00000FGH",
568 ["cmp.un.d_3"] = "46a00001FGH",
569 ["cmp.or.d_3"] = "46a00011FGH",
570 ["cmp.eq.d_3"] = "46a00002FGH",
571 ["cmp.une.d_3"] = "46a00012FGH",
572 ["cmp.ueq.d_3"] = "46a00003FGH",
573 ["cmp.ne.d_3"] = "46a00013FGH",
574 ["cmp.lt.d_3"] = "46a00004FGH",
575 ["cmp.ult.d_3"] = "46a00005FGH",
576 ["cmp.le.d_3"] = "46a00006FGH",
577 ["cmp.ule.d_3"] = "46a00007FGH",
578 ["cmp.saf.d_3"] = "46a00008FGH",
579 ["cmp.sun.d_3"] = "46a00009FGH",
580 ["cmp.sor.d_3"] = "46a00019FGH",
581 ["cmp.seq.d_3"] = "46a0000aFGH",
582 ["cmp.sune.d_3"] = "46a0001aFGH",
583 ["cmp.sueq.d_3"] = "46a0000bFGH",
584 ["cmp.sne.d_3"] = "46a0001bFGH",
585 ["cmp.slt.d_3"] = "46a0000cFGH",
586 ["cmp.sult.d_3"] = "46a0000dFGH",
587 ["cmp.sle.d_3"] = "46a0000eFGH",
588 ["cmp.sule.d_3"] = "46a0000fFGH",
589
590 }) do map_op[k] = v end
591
592else -- Instructions removed by MIPSR6.
593
594 for k,v in pairs({
595 -- Traps, don't use.
596 addi_3 = "20000000TSI",
597 daddi_3 = mips64 and "60000000TSI",
598
599 -- Branch on likely, don't use.
600 beqzl_2 = "50000000SB",
601 beql_3 = "50000000STB",
602 bnezl_2 = "54000000SB",
603 bnel_3 = "54000000STB",
604 blezl_2 = "58000000SB",
605 bgtzl_2 = "5c000000SB",
606
607 lwl_2 = "88000000TO",
608 lwr_2 = "98000000TO",
609 swl_2 = "a8000000TO",
610 sdl_2 = mips64 and "b0000000TO",
611 sdr_2 = mips64 and "b1000000TO",
612 swr_2 = "b8000000TO",
613 cache_2 = "bc000000NO",
614 ll_2 = "c0000000TO",
615 pref_2 = "cc000000NO",
616 sc_2 = "e0000000TO",
617 scd_2 = mips64 and "f0000000TO",
618
619 -- Opcode SPECIAL.
620 movf_2 = "00000001DS",
621 movf_3 = "00000001DSC",
622 movt_2 = "00010001DS",
623 movt_3 = "00010001DSC",
624 jr_1 = "00000008S",
625 movz_3 = "0000000aDST",
626 movn_3 = "0000000bDST",
627 mfhi_1 = "00000010D",
628 mthi_1 = "00000011S",
629 mflo_1 = "00000012D",
630 mtlo_1 = "00000013S",
631 mult_2 = "00000018ST",
632 multu_2 = "00000019ST",
633 div_3 = "0000001aST",
634 divu_3 = "0000001bST",
635 ddiv_3 = mips64 and "0000001eST",
636 ddivu_3 = mips64 and "0000001fST",
637 dmult_2 = mips64 and "0000001cST",
638 dmultu_2 = mips64 and "0000001dST",
639
640 -- Opcode REGIMM.
641 tgei_2 = "04080000SI",
642 tgeiu_2 = "04090000SI",
643 tlti_2 = "040a0000SI",
644 tltiu_2 = "040b0000SI",
645 teqi_2 = "040c0000SI",
646 tnei_2 = "040e0000SI",
647 bltzal_2 = "04100000SB",
648 bgezal_2 = "04110000SB",
649 bltzall_2 = "04120000SB",
650 bgezall_2 = "04130000SB",
651
652 -- Opcode SPECIAL2.
653 madd_2 = "70000000ST",
654 maddu_2 = "70000001ST",
655 mul_3 = "70000002DST",
656 msub_2 = "70000004ST",
657 msubu_2 = "70000005ST",
658 clz_2 = "70000020D=TS",
659 clo_2 = "70000021D=TS",
660 dclz_2 = mips64 and "70000024D=TS",
661 dclo_2 = mips64 and "70000025D=TS",
662 sdbbp_0 = "7000003f",
663 sdbbp_1 = "7000003fY",
664
665 -- Opcode COP1.
666 bc1f_1 = "45000000B",
667 bc1f_2 = "45000000CB",
668 bc1t_1 = "45010000B",
669 bc1t_2 = "45010000CB",
670 bc1fl_1 = "45020000B",
671 bc1fl_2 = "45020000CB",
672 bc1tl_1 = "45030000B",
673 bc1tl_2 = "45030000CB",
674
675 ["movf.s_2"] = "46000011FG",
676 ["movf.s_3"] = "46000011FGC",
677 ["movt.s_2"] = "46010011FG",
678 ["movt.s_3"] = "46010011FGC",
679 ["movz.s_3"] = "46000012FGT",
680 ["movn.s_3"] = "46000013FGT",
681 ["cvt.ps.s_3"] = "46000026FGH",
682 ["c.f.s_2"] = "46000030GH",
683 ["c.f.s_3"] = "46000030VGH",
684 ["c.un.s_2"] = "46000031GH",
685 ["c.un.s_3"] = "46000031VGH",
686 ["c.eq.s_2"] = "46000032GH",
687 ["c.eq.s_3"] = "46000032VGH",
688 ["c.ueq.s_2"] = "46000033GH",
689 ["c.ueq.s_3"] = "46000033VGH",
690 ["c.olt.s_2"] = "46000034GH",
691 ["c.olt.s_3"] = "46000034VGH",
692 ["c.ult.s_2"] = "46000035GH",
693 ["c.ult.s_3"] = "46000035VGH",
694 ["c.ole.s_2"] = "46000036GH",
695 ["c.ole.s_3"] = "46000036VGH",
696 ["c.ule.s_2"] = "46000037GH",
697 ["c.ule.s_3"] = "46000037VGH",
698 ["c.sf.s_2"] = "46000038GH",
699 ["c.sf.s_3"] = "46000038VGH",
700 ["c.ngle.s_2"] = "46000039GH",
701 ["c.ngle.s_3"] = "46000039VGH",
702 ["c.seq.s_2"] = "4600003aGH",
703 ["c.seq.s_3"] = "4600003aVGH",
704 ["c.ngl.s_2"] = "4600003bGH",
705 ["c.ngl.s_3"] = "4600003bVGH",
706 ["c.lt.s_2"] = "4600003cGH",
707 ["c.lt.s_3"] = "4600003cVGH",
708 ["c.nge.s_2"] = "4600003dGH",
709 ["c.nge.s_3"] = "4600003dVGH",
710 ["c.le.s_2"] = "4600003eGH",
711 ["c.le.s_3"] = "4600003eVGH",
712 ["c.ngt.s_2"] = "4600003fGH",
713 ["c.ngt.s_3"] = "4600003fVGH",
714 ["movf.d_2"] = "46200011FG",
715 ["movf.d_3"] = "46200011FGC",
716 ["movt.d_2"] = "46210011FG",
717 ["movt.d_3"] = "46210011FGC",
718 ["movz.d_3"] = "46200012FGT",
719 ["movn.d_3"] = "46200013FGT",
720 ["c.f.d_2"] = "46200030GH",
721 ["c.f.d_3"] = "46200030VGH",
722 ["c.un.d_2"] = "46200031GH",
723 ["c.un.d_3"] = "46200031VGH",
724 ["c.eq.d_2"] = "46200032GH",
725 ["c.eq.d_3"] = "46200032VGH",
726 ["c.ueq.d_2"] = "46200033GH",
727 ["c.ueq.d_3"] = "46200033VGH",
728 ["c.olt.d_2"] = "46200034GH",
729 ["c.olt.d_3"] = "46200034VGH",
730 ["c.ult.d_2"] = "46200035GH",
731 ["c.ult.d_3"] = "46200035VGH",
732 ["c.ole.d_2"] = "46200036GH",
733 ["c.ole.d_3"] = "46200036VGH",
734 ["c.ule.d_2"] = "46200037GH",
735 ["c.ule.d_3"] = "46200037VGH",
736 ["c.sf.d_2"] = "46200038GH",
737 ["c.sf.d_3"] = "46200038VGH",
738 ["c.ngle.d_2"] = "46200039GH",
739 ["c.ngle.d_3"] = "46200039VGH",
740 ["c.seq.d_2"] = "4620003aGH",
741 ["c.seq.d_3"] = "4620003aVGH",
742 ["c.ngl.d_2"] = "4620003bGH",
743 ["c.ngl.d_3"] = "4620003bVGH",
744 ["c.lt.d_2"] = "4620003cGH",
745 ["c.lt.d_3"] = "4620003cVGH",
746 ["c.nge.d_2"] = "4620003dGH",
747 ["c.nge.d_3"] = "4620003dVGH",
748 ["c.le.d_2"] = "4620003eGH",
749 ["c.le.d_3"] = "4620003eVGH",
750 ["c.ngt.d_2"] = "4620003fGH",
751 ["c.ngt.d_3"] = "4620003fVGH",
752 ["add.ps_3"] = "46c00000FGH",
753 ["sub.ps_3"] = "46c00001FGH",
754 ["mul.ps_3"] = "46c00002FGH",
755 ["abs.ps_2"] = "46c00005FG",
756 ["mov.ps_2"] = "46c00006FG",
757 ["neg.ps_2"] = "46c00007FG",
758 ["movf.ps_2"] = "46c00011FG",
759 ["movf.ps_3"] = "46c00011FGC",
760 ["movt.ps_2"] = "46c10011FG",
761 ["movt.ps_3"] = "46c10011FGC",
762 ["movz.ps_3"] = "46c00012FGT",
763 ["movn.ps_3"] = "46c00013FGT",
764 ["cvt.s.pu_2"] = "46c00020FG",
765 ["cvt.s.pl_2"] = "46c00028FG",
766 ["pll.ps_3"] = "46c0002cFGH",
767 ["plu.ps_3"] = "46c0002dFGH",
768 ["pul.ps_3"] = "46c0002eFGH",
769 ["puu.ps_3"] = "46c0002fFGH",
770 ["c.f.ps_2"] = "46c00030GH",
771 ["c.f.ps_3"] = "46c00030VGH",
772 ["c.un.ps_2"] = "46c00031GH",
773 ["c.un.ps_3"] = "46c00031VGH",
774 ["c.eq.ps_2"] = "46c00032GH",
775 ["c.eq.ps_3"] = "46c00032VGH",
776 ["c.ueq.ps_2"] = "46c00033GH",
777 ["c.ueq.ps_3"] = "46c00033VGH",
778 ["c.olt.ps_2"] = "46c00034GH",
779 ["c.olt.ps_3"] = "46c00034VGH",
780 ["c.ult.ps_2"] = "46c00035GH",
781 ["c.ult.ps_3"] = "46c00035VGH",
782 ["c.ole.ps_2"] = "46c00036GH",
783 ["c.ole.ps_3"] = "46c00036VGH",
784 ["c.ule.ps_2"] = "46c00037GH",
785 ["c.ule.ps_3"] = "46c00037VGH",
786 ["c.sf.ps_2"] = "46c00038GH",
787 ["c.sf.ps_3"] = "46c00038VGH",
788 ["c.ngle.ps_2"] = "46c00039GH",
789 ["c.ngle.ps_3"] = "46c00039VGH",
790 ["c.seq.ps_2"] = "46c0003aGH",
791 ["c.seq.ps_3"] = "46c0003aVGH",
792 ["c.ngl.ps_2"] = "46c0003bGH",
793 ["c.ngl.ps_3"] = "46c0003bVGH",
794 ["c.lt.ps_2"] = "46c0003cGH",
795 ["c.lt.ps_3"] = "46c0003cVGH",
796 ["c.nge.ps_2"] = "46c0003dGH",
797 ["c.nge.ps_3"] = "46c0003dVGH",
798 ["c.le.ps_2"] = "46c0003eGH",
799 ["c.le.ps_3"] = "46c0003eVGH",
800 ["c.ngt.ps_2"] = "46c0003fGH",
801 ["c.ngt.ps_3"] = "46c0003fVGH",
802
803 -- Opcode COP1X.
804 lwxc1_2 = "4c000000FX",
805 ldxc1_2 = "4c000001FX",
806 luxc1_2 = "4c000005FX",
807 swxc1_2 = "4c000008FX",
808 sdxc1_2 = "4c000009FX",
809 suxc1_2 = "4c00000dFX",
810 prefx_2 = "4c00000fMX",
811 ["alnv.ps_4"] = "4c00001eFGHS",
812 ["madd.s_4"] = "4c000020FRGH",
813 ["madd.d_4"] = "4c000021FRGH",
814 ["madd.ps_4"] = "4c000026FRGH",
815 ["msub.s_4"] = "4c000028FRGH",
816 ["msub.d_4"] = "4c000029FRGH",
817 ["msub.ps_4"] = "4c00002eFRGH",
818 ["nmadd.s_4"] = "4c000030FRGH",
819 ["nmadd.d_4"] = "4c000031FRGH",
820 ["nmadd.ps_4"] = "4c000036FRGH",
821 ["nmsub.s_4"] = "4c000038FRGH",
822 ["nmsub.d_4"] = "4c000039FRGH",
823 ["nmsub.ps_4"] = "4c00003eFRGH",
824
825 }) do map_op[k] = v end
826
827end
828
607------------------------------------------------------------------------------ 829------------------------------------------------------------------------------
608 830
609local function parse_gpr(expr) 831local function parse_gpr(expr)
@@ -633,7 +855,7 @@ local function parse_fpr(expr)
633 werror("bad register name `"..expr.."'") 855 werror("bad register name `"..expr.."'")
634end 856end
635 857
636local function parse_imm(imm, bits, shift, scale, signed) 858local function parse_imm(imm, bits, shift, scale, signed, action)
637 local n = tonumber(imm) 859 local n = tonumber(imm)
638 if n then 860 if n then
639 local m = sar(n, scale) 861 local m = sar(n, scale)
@@ -651,7 +873,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
651 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then 873 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
652 werror("expected immediate operand, got register") 874 werror("expected immediate operand, got register")
653 else 875 else
654 waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) 876 waction(action or "IMM",
877 (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
655 return 0 878 return 0
656 end 879 end
657end 880end
@@ -756,13 +979,18 @@ map_op[".template__"] = function(params, template, nparams)
756 op = op + parse_disp(params[n]); n = n + 1 979 op = op + parse_disp(params[n]); n = n + 1
757 elseif p == "X" then 980 elseif p == "X" then
758 op = op + parse_index(params[n]); n = n + 1 981 op = op + parse_index(params[n]); n = n + 1
759 elseif p == "B" or p == "J" then 982 elseif p == "B" or p == "J" or p == "K" or p == "L" then
760 local mode, m, s = parse_label(params[n], false) 983 local mode, m, s = parse_label(params[n], false)
761 if p == "B" then m = m + 2048 end 984 if p == "J" then m = m + 0xa800
985 elseif p == "K" then m = m + 0x5000
986 elseif p == "L" then m = m + 0xa000 end
762 waction("REL_"..mode, m, s, 1) 987 waction("REL_"..mode, m, s, 1)
763 n = n + 1 988 n = n + 1
764 elseif p == "A" then 989 elseif p == "A" then
765 op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 990 op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
991 elseif p == "a" then
992 local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1
993 op = op + band(m, 0x7c0) + band(shr(m, 9), 4)
766 elseif p == "M" then 994 elseif p == "M" then
767 op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 995 op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1
768 elseif p == "N" then 996 elseif p == "N" then
@@ -778,7 +1006,7 @@ map_op[".template__"] = function(params, template, nparams)
778 elseif p == "Z" then 1006 elseif p == "Z" then
779 op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 1007 op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1
780 elseif p == "=" then 1008 elseif p == "=" then
781 op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo. 1009 n = n - 1 -- Re-use previous parameter for next template char.
782 else 1010 else
783 assert(false) 1011 assert(false)
784 end 1012 end
diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua
new file mode 100644
index 00000000..c97d666b
--- /dev/null
+++ b/dynasm/dasm_mips64.lua
@@ -0,0 +1,12 @@
1------------------------------------------------------------------------------
2-- DynASM MIPS64 module.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------
7-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
8-- All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11mips64 = true -- Using a global is an ugly, but effective solution.
12return require("dasm_mips")
diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h
index e2d6f1fc..4c7d7289 100644
--- a/dynasm/dasm_ppc.h
+++ b/dynasm/dasm_ppc.h
@@ -1,5 +1,5 @@
1/* 1/*
2** DynASM PPC encoding engine. 2** DynASM PPC/PPC64 encoding engine.
3** Copyright (C) 2005-2023 Mike Pall. All rights reserved. 3** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
4** Released under the MIT license. See dynasm.lua for full copyright notice. 4** Released under the MIT license. See dynasm.lua for full copyright notice.
5*/ 5*/
@@ -21,7 +21,7 @@ enum {
21 /* The following actions need a buffer position. */ 21 /* The following actions need a buffer position. */
22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, 22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
23 /* The following actions also have an argument. */ 23 /* The following actions also have an argument. */
24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, 24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
25 DASM__MAX 25 DASM__MAX
26}; 26};
27 27
@@ -69,7 +69,7 @@ struct dasm_State {
69 size_t lgsize; 69 size_t lgsize;
70 int *pclabels; /* PC label chains/pos ptrs. */ 70 int *pclabels; /* PC label chains/pos ptrs. */
71 size_t pcsize; 71 size_t pcsize;
72 void **globals; /* Array of globals (bias -10). */ 72 void **globals; /* Array of globals. */
73 dasm_Section *section; /* Pointer to active section. */ 73 dasm_Section *section; /* Pointer to active section. */
74 size_t codesize; /* Total size of all code sections. */ 74 size_t codesize; /* Total size of all code sections. */
75 int maxsection; /* 0 <= sectionidx < maxsection. */ 75 int maxsection; /* 0 <= sectionidx < maxsection. */
@@ -86,7 +86,6 @@ void dasm_init(Dst_DECL, int maxsection)
86{ 86{
87 dasm_State *D; 87 dasm_State *D;
88 size_t psz = 0; 88 size_t psz = 0;
89 int i;
90 Dst_REF = NULL; 89 Dst_REF = NULL;
91 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); 90 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
92 D = Dst_REF; 91 D = Dst_REF;
@@ -97,12 +96,7 @@ void dasm_init(Dst_DECL, int maxsection)
97 D->pcsize = 0; 96 D->pcsize = 0;
98 D->globals = NULL; 97 D->globals = NULL;
99 D->maxsection = maxsection; 98 D->maxsection = maxsection;
100 for (i = 0; i < maxsection; i++) { 99 memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
101 D->sections[i].buf = NULL; /* Need this for pass3. */
102 D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
103 D->sections[i].bsize = 0;
104 D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
105 }
106} 100}
107 101
108/* Free DynASM state. */ 102/* Free DynASM state. */
@@ -122,7 +116,7 @@ void dasm_free(Dst_DECL)
122void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) 116void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
123{ 117{
124 dasm_State *D = Dst_REF; 118 dasm_State *D = Dst_REF;
125 D->globals = gl - 10; /* Negative bias to compensate for locals. */ 119 D->globals = gl;
126 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); 120 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
127} 121}
128 122
@@ -147,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
147 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); 141 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
148 for (i = 0; i < D->maxsection; i++) { 142 for (i = 0; i < D->maxsection; i++) {
149 D->sections[i].pos = DASM_SEC2POS(i); 143 D->sections[i].pos = DASM_SEC2POS(i);
144 D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
150 D->sections[i].ofs = 0; 145 D->sections[i].ofs = 0;
151 } 146 }
152} 147}
@@ -244,6 +239,10 @@ void dasm_put(Dst_DECL, int start, ...)
244#endif 239#endif
245 b[pos++] = n; 240 b[pos++] = n;
246 break; 241 break;
242 case DASM_IMMSH:
243 CK((n >> 6) == 0, RANGE_I);
244 b[pos++] = n;
245 break;
247 } 246 }
248 } 247 }
249 } 248 }
@@ -273,7 +272,7 @@ int dasm_link(Dst_DECL, size_t *szp)
273 272
274 { /* Handle globals not defined in this translation unit. */ 273 { /* Handle globals not defined in this translation unit. */
275 int idx; 274 int idx;
276 for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { 275 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
277 int n = D->lglabels[idx]; 276 int n = D->lglabels[idx];
278 /* Undefined label: Collapse rel chain and replace with marker (< 0). */ 277 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
279 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } 278 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -299,7 +298,7 @@ int dasm_link(Dst_DECL, size_t *szp)
299 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; 298 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
300 case DASM_REL_LG: case DASM_REL_PC: pos++; break; 299 case DASM_REL_LG: case DASM_REL_PC: pos++; break;
301 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; 300 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
302 case DASM_IMM: pos++; break; 301 case DASM_IMM: case DASM_IMMSH: pos++; break;
303 } 302 }
304 } 303 }
305 stop: (void)0; 304 stop: (void)0;
@@ -349,7 +348,10 @@ int dasm_encode(Dst_DECL, void *buffer)
349 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; 348 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
350 break; 349 break;
351 case DASM_REL_LG: 350 case DASM_REL_LG:
352 CK(n >= 0, UNDEF_LG); 351 if (n < 0) {
352 n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp);
353 goto patchrel;
354 }
353 /* fallthrough */ 355 /* fallthrough */
354 case DASM_REL_PC: 356 case DASM_REL_PC:
355 CK(n >= 0, UNDEF_PC); 357 CK(n >= 0, UNDEF_PC);
@@ -361,12 +363,15 @@ int dasm_encode(Dst_DECL, void *buffer)
361 cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc)); 363 cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
362 break; 364 break;
363 case DASM_LABEL_LG: 365 case DASM_LABEL_LG:
364 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); 366 ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
365 break; 367 break;
366 case DASM_LABEL_PC: break; 368 case DASM_LABEL_PC: break;
367 case DASM_IMM: 369 case DASM_IMM:
368 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); 370 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
369 break; 371 break;
372 case DASM_IMMSH:
373 cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
374 break;
370 default: *cp++ = ins; break; 375 default: *cp++ = ins; break;
371 } 376 }
372 } 377 }
diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
index b4f5cea4..d66ae4a0 100644
--- a/dynasm/dasm_ppc.lua
+++ b/dynasm/dasm_ppc.lua
@@ -1,17 +1,19 @@
1------------------------------------------------------------------------------ 1------------------------------------------------------------------------------
2-- DynASM PPC module. 2-- DynASM PPC/PPC64 module.
3-- 3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. 4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice. 5-- See dynasm.lua for full copyright notice.
6--
7-- Support for various extensions contributed by Caio Souza Oliveira.
6------------------------------------------------------------------------------ 8------------------------------------------------------------------------------
7 9
8-- Module information: 10-- Module information:
9local _info = { 11local _info = {
10 arch = "ppc", 12 arch = "ppc",
11 description = "DynASM PPC module", 13 description = "DynASM PPC module",
12 version = "1.3.0", 14 version = "1.5.0",
13 vernum = 10300, 15 vernum = 10500,
14 release = "2011-05-05", 16 release = "2021-05-02",
15 author = "Mike Pall", 17 author = "Mike Pall",
16 license = "MIT", 18 license = "MIT",
17} 19}
@@ -39,7 +41,7 @@ local wline, werror, wfatal, wwarn
39local action_names = { 41local action_names = {
40 "STOP", "SECTION", "ESC", "REL_EXT", 42 "STOP", "SECTION", "ESC", "REL_EXT",
41 "ALIGN", "REL_LG", "LABEL_LG", 43 "ALIGN", "REL_LG", "LABEL_LG",
42 "REL_PC", "LABEL_PC", "IMM", 44 "REL_PC", "LABEL_PC", "IMM", "IMMSH"
43} 45}
44 46
45-- Maximum number of section buffer positions for dasm_put(). 47-- Maximum number of section buffer positions for dasm_put().
@@ -228,8 +230,18 @@ local map_cond = {
228 230
229------------------------------------------------------------------------------ 231------------------------------------------------------------------------------
230 232
233local map_op, op_template
234
235local function op_alias(opname, f)
236 return function(params, nparams)
237 if not params then return "-> "..opname:sub(1, -3) end
238 f(params, nparams)
239 op_template(params, map_op[opname], nparams)
240 end
241end
242
231-- Template strings for PPC instructions. 243-- Template strings for PPC instructions.
232local map_op = { 244map_op = {
233 tdi_3 = "08000000ARI", 245 tdi_3 = "08000000ARI",
234 twi_3 = "0c000000ARI", 246 twi_3 = "0c000000ARI",
235 mulli_3 = "1c000000RRI", 247 mulli_3 = "1c000000RRI",
@@ -297,6 +309,250 @@ local map_op = {
297 std_2 = "f8000000RD", 309 std_2 = "f8000000RD",
298 stdu_2 = "f8000001RD", 310 stdu_2 = "f8000001RD",
299 311
312 subi_3 = op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end),
313 subis_3 = op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end),
314 subic_3 = op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end),
315 ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end),
316
317 rotlwi_3 = op_alias("rlwinm_5", function(p)
318 p[4] = "0"; p[5] = "31"
319 end),
320 rotrwi_3 = op_alias("rlwinm_5", function(p)
321 p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31"
322 end),
323 rotlw_3 = op_alias("rlwnm_5", function(p)
324 p[4] = "0"; p[5] = "31"
325 end),
326 slwi_3 = op_alias("rlwinm_5", function(p)
327 p[5] = "31-("..p[3]..")"; p[4] = "0"
328 end),
329 srwi_3 = op_alias("rlwinm_5", function(p)
330 p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31"
331 end),
332 clrlwi_3 = op_alias("rlwinm_5", function(p)
333 p[4] = p[3]; p[3] = "0"; p[5] = "31"
334 end),
335 clrrwi_3 = op_alias("rlwinm_5", function(p)
336 p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0"
337 end),
338
339 -- Primary opcode 4:
340 mulhhwu_3 = "10000010RRR.",
341 machhwu_3 = "10000018RRR.",
342 mulhhw_3 = "10000050RRR.",
343 nmachhw_3 = "1000005cRRR.",
344 machhwsu_3 = "10000098RRR.",
345 machhws_3 = "100000d8RRR.",
346 nmachhws_3 = "100000dcRRR.",
347 mulchwu_3 = "10000110RRR.",
348 macchwu_3 = "10000118RRR.",
349 mulchw_3 = "10000150RRR.",
350 macchw_3 = "10000158RRR.",
351 nmacchw_3 = "1000015cRRR.",
352 macchwsu_3 = "10000198RRR.",
353 macchws_3 = "100001d8RRR.",
354 nmacchws_3 = "100001dcRRR.",
355 mullhw_3 = "10000350RRR.",
356 maclhw_3 = "10000358RRR.",
357 nmaclhw_3 = "1000035cRRR.",
358 maclhwsu_3 = "10000398RRR.",
359 maclhws_3 = "100003d8RRR.",
360 nmaclhws_3 = "100003dcRRR.",
361 machhwuo_3 = "10000418RRR.",
362 nmachhwo_3 = "1000045cRRR.",
363 machhwsuo_3 = "10000498RRR.",
364 machhwso_3 = "100004d8RRR.",
365 nmachhwso_3 = "100004dcRRR.",
366 macchwuo_3 = "10000518RRR.",
367 macchwo_3 = "10000558RRR.",
368 nmacchwo_3 = "1000055cRRR.",
369 macchwsuo_3 = "10000598RRR.",
370 macchwso_3 = "100005d8RRR.",
371 nmacchwso_3 = "100005dcRRR.",
372 maclhwo_3 = "10000758RRR.",
373 nmaclhwo_3 = "1000075cRRR.",
374 maclhwsuo_3 = "10000798RRR.",
375 maclhwso_3 = "100007d8RRR.",
376 nmaclhwso_3 = "100007dcRRR.",
377
378 vaddubm_3 = "10000000VVV",
379 vmaxub_3 = "10000002VVV",
380 vrlb_3 = "10000004VVV",
381 vcmpequb_3 = "10000006VVV",
382 vmuloub_3 = "10000008VVV",
383 vaddfp_3 = "1000000aVVV",
384 vmrghb_3 = "1000000cVVV",
385 vpkuhum_3 = "1000000eVVV",
386 vmhaddshs_4 = "10000020VVVV",
387 vmhraddshs_4 = "10000021VVVV",
388 vmladduhm_4 = "10000022VVVV",
389 vmsumubm_4 = "10000024VVVV",
390 vmsummbm_4 = "10000025VVVV",
391 vmsumuhm_4 = "10000026VVVV",
392 vmsumuhs_4 = "10000027VVVV",
393 vmsumshm_4 = "10000028VVVV",
394 vmsumshs_4 = "10000029VVVV",
395 vsel_4 = "1000002aVVVV",
396 vperm_4 = "1000002bVVVV",
397 vsldoi_4 = "1000002cVVVP",
398 vpermxor_4 = "1000002dVVVV",
399 vmaddfp_4 = "1000002eVVVV~",
400 vnmsubfp_4 = "1000002fVVVV~",
401 vaddeuqm_4 = "1000003cVVVV",
402 vaddecuq_4 = "1000003dVVVV",
403 vsubeuqm_4 = "1000003eVVVV",
404 vsubecuq_4 = "1000003fVVVV",
405 vadduhm_3 = "10000040VVV",
406 vmaxuh_3 = "10000042VVV",
407 vrlh_3 = "10000044VVV",
408 vcmpequh_3 = "10000046VVV",
409 vmulouh_3 = "10000048VVV",
410 vsubfp_3 = "1000004aVVV",
411 vmrghh_3 = "1000004cVVV",
412 vpkuwum_3 = "1000004eVVV",
413 vadduwm_3 = "10000080VVV",
414 vmaxuw_3 = "10000082VVV",
415 vrlw_3 = "10000084VVV",
416 vcmpequw_3 = "10000086VVV",
417 vmulouw_3 = "10000088VVV",
418 vmuluwm_3 = "10000089VVV",
419 vmrghw_3 = "1000008cVVV",
420 vpkuhus_3 = "1000008eVVV",
421 vaddudm_3 = "100000c0VVV",
422 vmaxud_3 = "100000c2VVV",
423 vrld_3 = "100000c4VVV",
424 vcmpeqfp_3 = "100000c6VVV",
425 vcmpequd_3 = "100000c7VVV",
426 vpkuwus_3 = "100000ceVVV",
427 vadduqm_3 = "10000100VVV",
428 vmaxsb_3 = "10000102VVV",
429 vslb_3 = "10000104VVV",
430 vmulosb_3 = "10000108VVV",
431 vrefp_2 = "1000010aV-V",
432 vmrglb_3 = "1000010cVVV",
433 vpkshus_3 = "1000010eVVV",
434 vaddcuq_3 = "10000140VVV",
435 vmaxsh_3 = "10000142VVV",
436 vslh_3 = "10000144VVV",
437 vmulosh_3 = "10000148VVV",
438 vrsqrtefp_2 = "1000014aV-V",
439 vmrglh_3 = "1000014cVVV",
440 vpkswus_3 = "1000014eVVV",
441 vaddcuw_3 = "10000180VVV",
442 vmaxsw_3 = "10000182VVV",
443 vslw_3 = "10000184VVV",
444 vmulosw_3 = "10000188VVV",
445 vexptefp_2 = "1000018aV-V",
446 vmrglw_3 = "1000018cVVV",
447 vpkshss_3 = "1000018eVVV",
448 vmaxsd_3 = "100001c2VVV",
449 vsl_3 = "100001c4VVV",
450 vcmpgefp_3 = "100001c6VVV",
451 vlogefp_2 = "100001caV-V",
452 vpkswss_3 = "100001ceVVV",
453 vadduhs_3 = "10000240VVV",
454 vminuh_3 = "10000242VVV",
455 vsrh_3 = "10000244VVV",
456 vcmpgtuh_3 = "10000246VVV",
457 vmuleuh_3 = "10000248VVV",
458 vrfiz_2 = "1000024aV-V",
459 vsplth_3 = "1000024cVV3",
460 vupkhsh_2 = "1000024eV-V",
461 vminuw_3 = "10000282VVV",
462 vminud_3 = "100002c2VVV",
463 vcmpgtud_3 = "100002c7VVV",
464 vrfim_2 = "100002caV-V",
465 vcmpgtsb_3 = "10000306VVV",
466 vcfux_3 = "1000030aVVA~",
467 vaddshs_3 = "10000340VVV",
468 vminsh_3 = "10000342VVV",
469 vsrah_3 = "10000344VVV",
470 vcmpgtsh_3 = "10000346VVV",
471 vmulesh_3 = "10000348VVV",
472 vcfsx_3 = "1000034aVVA~",
473 vspltish_2 = "1000034cVS",
474 vupkhpx_2 = "1000034eV-V",
475 vaddsws_3 = "10000380VVV",
476 vminsw_3 = "10000382VVV",
477 vsraw_3 = "10000384VVV",
478 vcmpgtsw_3 = "10000386VVV",
479 vmulesw_3 = "10000388VVV",
480 vctuxs_3 = "1000038aVVA~",
481 vspltisw_2 = "1000038cVS",
482 vminsd_3 = "100003c2VVV",
483 vsrad_3 = "100003c4VVV",
484 vcmpbfp_3 = "100003c6VVV",
485 vcmpgtsd_3 = "100003c7VVV",
486 vctsxs_3 = "100003caVVA~",
487 vupklpx_2 = "100003ceV-V",
488 vsububm_3 = "10000400VVV",
489 ["bcdadd._4"] = "10000401VVVy.",
490 vavgub_3 = "10000402VVV",
491 vand_3 = "10000404VVV",
492 ["vcmpequb._3"] = "10000406VVV",
493 vmaxfp_3 = "1000040aVVV",
494 vsubuhm_3 = "10000440VVV",
495 ["bcdsub._4"] = "10000441VVVy.",
496 vavguh_3 = "10000442VVV",
497 vandc_3 = "10000444VVV",
498 ["vcmpequh._3"] = "10000446VVV",
499 vminfp_3 = "1000044aVVV",
500 vpkudum_3 = "1000044eVVV",
501 vsubuwm_3 = "10000480VVV",
502 vavguw_3 = "10000482VVV",
503 vor_3 = "10000484VVV",
504 ["vcmpequw._3"] = "10000486VVV",
505 vpmsumw_3 = "10000488VVV",
506 ["vcmpeqfp._3"] = "100004c6VVV",
507 ["vcmpequd._3"] = "100004c7VVV",
508 vpkudus_3 = "100004ceVVV",
509 vavgsb_3 = "10000502VVV",
510 vavgsh_3 = "10000542VVV",
511 vorc_3 = "10000544VVV",
512 vbpermq_3 = "1000054cVVV",
513 vpksdus_3 = "1000054eVVV",
514 vavgsw_3 = "10000582VVV",
515 vsld_3 = "100005c4VVV",
516 ["vcmpgefp._3"] = "100005c6VVV",
517 vpksdss_3 = "100005ceVVV",
518 vsububs_3 = "10000600VVV",
519 mfvscr_1 = "10000604V--",
520 vsum4ubs_3 = "10000608VVV",
521 vsubuhs_3 = "10000640VVV",
522 mtvscr_1 = "10000644--V",
523 ["vcmpgtuh._3"] = "10000646VVV",
524 vsum4shs_3 = "10000648VVV",
525 vupkhsw_2 = "1000064eV-V",
526 vsubuws_3 = "10000680VVV",
527 vshasigmaw_4 = "10000682VVYp",
528 veqv_3 = "10000684VVV",
529 vsum2sws_3 = "10000688VVV",
530 vmrgow_3 = "1000068cVVV",
531 vshasigmad_4 = "100006c2VVYp",
532 vsrd_3 = "100006c4VVV",
533 ["vcmpgtud._3"] = "100006c7VVV",
534 vupklsw_2 = "100006ceV-V",
535 vupkslw_2 = "100006ceV-V",
536 vsubsbs_3 = "10000700VVV",
537 vclzb_2 = "10000702V-V",
538 vpopcntb_2 = "10000703V-V",
539 ["vcmpgtsb._3"] = "10000706VVV",
540 vsum4sbs_3 = "10000708VVV",
541 vsubshs_3 = "10000740VVV",
542 vclzh_2 = "10000742V-V",
543 vpopcnth_2 = "10000743V-V",
544 ["vcmpgtsh._3"] = "10000746VVV",
545 vsubsws_3 = "10000780VVV",
546 vclzw_2 = "10000782V-V",
547 vpopcntw_2 = "10000783V-V",
548 ["vcmpgtsw._3"] = "10000786VVV",
549 vsumsws_3 = "10000788VVV",
550 vmrgew_3 = "1000078cVVV",
551 vclzd_2 = "100007c2V-V",
552 vpopcntd_2 = "100007c3V-V",
553 ["vcmpbfp._3"] = "100007c6VVV",
554 ["vcmpgtsd._3"] = "100007c7VVV",
555
300 -- Primary opcode 19: 556 -- Primary opcode 19:
301 mcrf_2 = "4c000000XX", 557 mcrf_2 = "4c000000XX",
302 isync_0 = "4c00012c", 558 isync_0 = "4c00012c",
@@ -316,6 +572,8 @@ local map_op = {
316 bclrl_2 = "4c000021AA", 572 bclrl_2 = "4c000021AA",
317 bcctr_2 = "4c000420AA", 573 bcctr_2 = "4c000420AA",
318 bcctrl_2 = "4c000421AA", 574 bcctrl_2 = "4c000421AA",
575 bctar_2 = "4c000460AA",
576 bctarl_2 = "4c000461AA",
319 blr_0 = "4e800020", 577 blr_0 = "4e800020",
320 blrl_0 = "4e800021", 578 blrl_0 = "4e800021",
321 bctr_0 = "4e800420", 579 bctr_0 = "4e800420",
@@ -327,6 +585,7 @@ local map_op = {
327 cmpd_3 = "7c200000XRR", 585 cmpd_3 = "7c200000XRR",
328 cmpd_2 = "7c200000-RR", 586 cmpd_2 = "7c200000-RR",
329 tw_3 = "7c000008ARR", 587 tw_3 = "7c000008ARR",
588 lvsl_3 = "7c00000cVRR",
330 subfc_3 = "7c000010RRR.", 589 subfc_3 = "7c000010RRR.",
331 subc_3 = "7c000010RRR~.", 590 subc_3 = "7c000010RRR~.",
332 mulhdu_3 = "7c000012RRR.", 591 mulhdu_3 = "7c000012RRR.",
@@ -351,50 +610,68 @@ local map_op = {
351 cmplw_2 = "7c000040-RR", 610 cmplw_2 = "7c000040-RR",
352 cmpld_3 = "7c200040XRR", 611 cmpld_3 = "7c200040XRR",
353 cmpld_2 = "7c200040-RR", 612 cmpld_2 = "7c200040-RR",
613 lvsr_3 = "7c00004cVRR",
354 subf_3 = "7c000050RRR.", 614 subf_3 = "7c000050RRR.",
355 sub_3 = "7c000050RRR~.", 615 sub_3 = "7c000050RRR~.",
616 lbarx_3 = "7c000068RR0R",
356 ldux_3 = "7c00006aRR0R", 617 ldux_3 = "7c00006aRR0R",
357 dcbst_2 = "7c00006c-RR", 618 dcbst_2 = "7c00006c-RR",
358 lwzux_3 = "7c00006eRR0R", 619 lwzux_3 = "7c00006eRR0R",
359 cntlzd_2 = "7c000074RR~", 620 cntlzd_2 = "7c000074RR~",
360 andc_3 = "7c000078RR~R.", 621 andc_3 = "7c000078RR~R.",
361 td_3 = "7c000088ARR", 622 td_3 = "7c000088ARR",
623 lvewx_3 = "7c00008eVRR",
362 mulhd_3 = "7c000092RRR.", 624 mulhd_3 = "7c000092RRR.",
625 addg6s_3 = "7c000094RRR",
363 mulhw_3 = "7c000096RRR.", 626 mulhw_3 = "7c000096RRR.",
627 dlmzb_3 = "7c00009cRR~R.",
364 ldarx_3 = "7c0000a8RR0R", 628 ldarx_3 = "7c0000a8RR0R",
365 dcbf_2 = "7c0000ac-RR", 629 dcbf_2 = "7c0000ac-RR",
366 lbzx_3 = "7c0000aeRR0R", 630 lbzx_3 = "7c0000aeRR0R",
631 lvx_3 = "7c0000ceVRR",
367 neg_2 = "7c0000d0RR.", 632 neg_2 = "7c0000d0RR.",
633 lharx_3 = "7c0000e8RR0R",
368 lbzux_3 = "7c0000eeRR0R", 634 lbzux_3 = "7c0000eeRR0R",
369 popcntb_2 = "7c0000f4RR~", 635 popcntb_2 = "7c0000f4RR~",
370 not_2 = "7c0000f8RR~%.", 636 not_2 = "7c0000f8RR~%.",
371 nor_3 = "7c0000f8RR~R.", 637 nor_3 = "7c0000f8RR~R.",
638 stvebx_3 = "7c00010eVRR",
372 subfe_3 = "7c000110RRR.", 639 subfe_3 = "7c000110RRR.",
373 sube_3 = "7c000110RRR~.", 640 sube_3 = "7c000110RRR~.",
374 adde_3 = "7c000114RRR.", 641 adde_3 = "7c000114RRR.",
375 stdx_3 = "7c00012aRR0R", 642 stdx_3 = "7c00012aRR0R",
376 stwcx_3 = "7c00012cRR0R.", 643 ["stwcx._3"] = "7c00012dRR0R.",
377 stwx_3 = "7c00012eRR0R", 644 stwx_3 = "7c00012eRR0R",
378 prtyw_2 = "7c000134RR~", 645 prtyw_2 = "7c000134RR~",
646 stvehx_3 = "7c00014eVRR",
379 stdux_3 = "7c00016aRR0R", 647 stdux_3 = "7c00016aRR0R",
648 ["stqcx._3"] = "7c00016dR:R0R.",
380 stwux_3 = "7c00016eRR0R", 649 stwux_3 = "7c00016eRR0R",
381 prtyd_2 = "7c000174RR~", 650 prtyd_2 = "7c000174RR~",
651 stvewx_3 = "7c00018eVRR",
382 subfze_2 = "7c000190RR.", 652 subfze_2 = "7c000190RR.",
383 addze_2 = "7c000194RR.", 653 addze_2 = "7c000194RR.",
384 stdcx_3 = "7c0001acRR0R.", 654 ["stdcx._3"] = "7c0001adRR0R.",
385 stbx_3 = "7c0001aeRR0R", 655 stbx_3 = "7c0001aeRR0R",
656 stvx_3 = "7c0001ceVRR",
386 subfme_2 = "7c0001d0RR.", 657 subfme_2 = "7c0001d0RR.",
387 mulld_3 = "7c0001d2RRR.", 658 mulld_3 = "7c0001d2RRR.",
388 addme_2 = "7c0001d4RR.", 659 addme_2 = "7c0001d4RR.",
389 mullw_3 = "7c0001d6RRR.", 660 mullw_3 = "7c0001d6RRR.",
390 dcbtst_2 = "7c0001ec-RR", 661 dcbtst_2 = "7c0001ec-RR",
391 stbux_3 = "7c0001eeRR0R", 662 stbux_3 = "7c0001eeRR0R",
663 bpermd_3 = "7c0001f8RR~R",
664 lvepxl_3 = "7c00020eVRR",
392 add_3 = "7c000214RRR.", 665 add_3 = "7c000214RRR.",
666 lqarx_3 = "7c000228R:R0R",
393 dcbt_2 = "7c00022c-RR", 667 dcbt_2 = "7c00022c-RR",
394 lhzx_3 = "7c00022eRR0R", 668 lhzx_3 = "7c00022eRR0R",
669 cdtbcd_2 = "7c000234RR~",
395 eqv_3 = "7c000238RR~R.", 670 eqv_3 = "7c000238RR~R.",
671 lvepx_3 = "7c00024eVRR",
396 eciwx_3 = "7c00026cRR0R", 672 eciwx_3 = "7c00026cRR0R",
397 lhzux_3 = "7c00026eRR0R", 673 lhzux_3 = "7c00026eRR0R",
674 cbcdtd_2 = "7c000274RR~",
398 xor_3 = "7c000278RR~R.", 675 xor_3 = "7c000278RR~R.",
399 mfspefscr_1 = "7c0082a6R", 676 mfspefscr_1 = "7c0082a6R",
400 mfxer_1 = "7c0102a6R", 677 mfxer_1 = "7c0102a6R",
@@ -404,8 +681,12 @@ local map_op = {
404 lhax_3 = "7c0002aeRR0R", 681 lhax_3 = "7c0002aeRR0R",
405 mftb_1 = "7c0c42e6R", 682 mftb_1 = "7c0c42e6R",
406 mftbu_1 = "7c0d42e6R", 683 mftbu_1 = "7c0d42e6R",
684 lvxl_3 = "7c0002ceVRR",
407 lwaux_3 = "7c0002eaRR0R", 685 lwaux_3 = "7c0002eaRR0R",
408 lhaux_3 = "7c0002eeRR0R", 686 lhaux_3 = "7c0002eeRR0R",
687 popcntw_2 = "7c0002f4RR~",
688 divdeu_3 = "7c000312RRR.",
689 divweu_3 = "7c000316RRR.",
409 sthx_3 = "7c00032eRR0R", 690 sthx_3 = "7c00032eRR0R",
410 orc_3 = "7c000338RR~R.", 691 orc_3 = "7c000338RR~R.",
411 ecowx_3 = "7c00036cRR0R", 692 ecowx_3 = "7c00036cRR0R",
@@ -420,10 +701,14 @@ local map_op = {
420 mtctr_1 = "7c0903a6R", 701 mtctr_1 = "7c0903a6R",
421 dcbi_2 = "7c0003ac-RR", 702 dcbi_2 = "7c0003ac-RR",
422 nand_3 = "7c0003b8RR~R.", 703 nand_3 = "7c0003b8RR~R.",
704 dsn_2 = "7c0003c6-RR",
705 stvxl_3 = "7c0003ceVRR",
423 divd_3 = "7c0003d2RRR.", 706 divd_3 = "7c0003d2RRR.",
424 divw_3 = "7c0003d6RRR.", 707 divw_3 = "7c0003d6RRR.",
708 popcntd_2 = "7c0003f4RR~",
425 cmpb_3 = "7c0003f8RR~R.", 709 cmpb_3 = "7c0003f8RR~R.",
426 mcrxr_1 = "7c000400X", 710 mcrxr_1 = "7c000400X",
711 lbdx_3 = "7c000406RRR",
427 subfco_3 = "7c000410RRR.", 712 subfco_3 = "7c000410RRR.",
428 subco_3 = "7c000410RRR~.", 713 subco_3 = "7c000410RRR~.",
429 addco_3 = "7c000414RRR.", 714 addco_3 = "7c000414RRR.",
@@ -433,16 +718,20 @@ local map_op = {
433 lfsx_3 = "7c00042eFR0R", 718 lfsx_3 = "7c00042eFR0R",
434 srw_3 = "7c000430RR~R.", 719 srw_3 = "7c000430RR~R.",
435 srd_3 = "7c000436RR~R.", 720 srd_3 = "7c000436RR~R.",
721 lhdx_3 = "7c000446RRR",
436 subfo_3 = "7c000450RRR.", 722 subfo_3 = "7c000450RRR.",
437 subo_3 = "7c000450RRR~.", 723 subo_3 = "7c000450RRR~.",
438 lfsux_3 = "7c00046eFR0R", 724 lfsux_3 = "7c00046eFR0R",
725 lwdx_3 = "7c000486RRR",
439 lswi_3 = "7c0004aaRR0A", 726 lswi_3 = "7c0004aaRR0A",
440 sync_0 = "7c0004ac", 727 sync_0 = "7c0004ac",
441 lwsync_0 = "7c2004ac", 728 lwsync_0 = "7c2004ac",
442 ptesync_0 = "7c4004ac", 729 ptesync_0 = "7c4004ac",
443 lfdx_3 = "7c0004aeFR0R", 730 lfdx_3 = "7c0004aeFR0R",
731 lddx_3 = "7c0004c6RRR",
444 nego_2 = "7c0004d0RR.", 732 nego_2 = "7c0004d0RR.",
445 lfdux_3 = "7c0004eeFR0R", 733 lfdux_3 = "7c0004eeFR0R",
734 stbdx_3 = "7c000506RRR",
446 subfeo_3 = "7c000510RRR.", 735 subfeo_3 = "7c000510RRR.",
447 subeo_3 = "7c000510RRR~.", 736 subeo_3 = "7c000510RRR~.",
448 addeo_3 = "7c000514RRR.", 737 addeo_3 = "7c000514RRR.",
@@ -450,27 +739,42 @@ local map_op = {
450 stswx_3 = "7c00052aRR0R", 739 stswx_3 = "7c00052aRR0R",
451 stwbrx_3 = "7c00052cRR0R", 740 stwbrx_3 = "7c00052cRR0R",
452 stfsx_3 = "7c00052eFR0R", 741 stfsx_3 = "7c00052eFR0R",
742 sthdx_3 = "7c000546RRR",
743 ["stbcx._3"] = "7c00056dRRR",
453 stfsux_3 = "7c00056eFR0R", 744 stfsux_3 = "7c00056eFR0R",
745 stwdx_3 = "7c000586RRR",
454 subfzeo_2 = "7c000590RR.", 746 subfzeo_2 = "7c000590RR.",
455 addzeo_2 = "7c000594RR.", 747 addzeo_2 = "7c000594RR.",
456 stswi_3 = "7c0005aaRR0A", 748 stswi_3 = "7c0005aaRR0A",
749 ["sthcx._3"] = "7c0005adRRR",
457 stfdx_3 = "7c0005aeFR0R", 750 stfdx_3 = "7c0005aeFR0R",
751 stddx_3 = "7c0005c6RRR",
458 subfmeo_2 = "7c0005d0RR.", 752 subfmeo_2 = "7c0005d0RR.",
459 mulldo_3 = "7c0005d2RRR.", 753 mulldo_3 = "7c0005d2RRR.",
460 addmeo_2 = "7c0005d4RR.", 754 addmeo_2 = "7c0005d4RR.",
461 mullwo_3 = "7c0005d6RRR.", 755 mullwo_3 = "7c0005d6RRR.",
462 dcba_2 = "7c0005ec-RR", 756 dcba_2 = "7c0005ec-RR",
463 stfdux_3 = "7c0005eeFR0R", 757 stfdux_3 = "7c0005eeFR0R",
758 stvepxl_3 = "7c00060eVRR",
464 addo_3 = "7c000614RRR.", 759 addo_3 = "7c000614RRR.",
465 lhbrx_3 = "7c00062cRR0R", 760 lhbrx_3 = "7c00062cRR0R",
761 lfdpx_3 = "7c00062eF:RR",
466 sraw_3 = "7c000630RR~R.", 762 sraw_3 = "7c000630RR~R.",
467 srad_3 = "7c000634RR~R.", 763 srad_3 = "7c000634RR~R.",
764 lfddx_3 = "7c000646FRR",
765 stvepx_3 = "7c00064eVRR",
468 srawi_3 = "7c000670RR~A.", 766 srawi_3 = "7c000670RR~A.",
469 sradi_3 = "7c000674RR~H.", 767 sradi_3 = "7c000674RR~H.",
470 eieio_0 = "7c0006ac", 768 eieio_0 = "7c0006ac",
471 lfiwax_3 = "7c0006aeFR0R", 769 lfiwax_3 = "7c0006aeFR0R",
770 divdeuo_3 = "7c000712RRR.",
771 divweuo_3 = "7c000716RRR.",
472 sthbrx_3 = "7c00072cRR0R", 772 sthbrx_3 = "7c00072cRR0R",
773 stfdpx_3 = "7c00072eF:RR",
473 extsh_2 = "7c000734RR~.", 774 extsh_2 = "7c000734RR~.",
775 stfddx_3 = "7c000746FRR",
776 divdeo_3 = "7c000752RRR.",
777 divweo_3 = "7c000756RRR.",
474 extsb_2 = "7c000774RR~.", 778 extsb_2 = "7c000774RR~.",
475 divduo_3 = "7c000792RRR.", 779 divduo_3 = "7c000792RRR.",
476 divwou_3 = "7c000796RRR.", 780 divwou_3 = "7c000796RRR.",
@@ -481,6 +785,40 @@ local map_op = {
481 divwo_3 = "7c0007d6RRR.", 785 divwo_3 = "7c0007d6RRR.",
482 dcbz_2 = "7c0007ec-RR", 786 dcbz_2 = "7c0007ec-RR",
483 787
788 ["tbegin._1"] = "7c00051d1",
789 ["tbegin._0"] = "7c00051d",
790 ["tend._1"] = "7c00055dY",
791 ["tend._0"] = "7c00055d",
792 ["tendall._0"] = "7e00055d",
793 tcheck_1 = "7c00059cX",
794 ["tsr._1"] = "7c0005dd1",
795 ["tsuspend._0"] = "7c0005dd",
796 ["tresume._0"] = "7c2005dd",
797 ["tabortwc._3"] = "7c00061dARR",
798 ["tabortdc._3"] = "7c00065dARR",
799 ["tabortwci._3"] = "7c00069dARS",
800 ["tabortdci._3"] = "7c0006ddARS",
801 ["tabort._1"] = "7c00071d-R-",
802 ["treclaim._1"] = "7c00075d-R",
803 ["trechkpt._0"] = "7c0007dd",
804
805 lxsiwzx_3 = "7c000018QRR",
806 lxsiwax_3 = "7c000098QRR",
807 mfvsrd_2 = "7c000066-Rq",
808 mfvsrwz_2 = "7c0000e6-Rq",
809 stxsiwx_3 = "7c000118QRR",
810 mtvsrd_2 = "7c000166QR",
811 mtvsrwa_2 = "7c0001a6QR",
812 lxvdsx_3 = "7c000298QRR",
813 lxsspx_3 = "7c000418QRR",
814 lxsdx_3 = "7c000498QRR",
815 stxsspx_3 = "7c000518QRR",
816 stxsdx_3 = "7c000598QRR",
817 lxvw4x_3 = "7c000618QRR",
818 lxvd2x_3 = "7c000698QRR",
819 stxvw4x_3 = "7c000718QRR",
820 stxvd2x_3 = "7c000798QRR",
821
484 -- Primary opcode 30: 822 -- Primary opcode 30:
485 rldicl_4 = "78000000RR~HM.", 823 rldicl_4 = "78000000RR~HM.",
486 rldicr_4 = "78000004RR~HM.", 824 rldicr_4 = "78000004RR~HM.",
@@ -489,6 +827,34 @@ local map_op = {
489 rldcl_4 = "78000010RR~RM.", 827 rldcl_4 = "78000010RR~RM.",
490 rldcr_4 = "78000012RR~RM.", 828 rldcr_4 = "78000012RR~RM.",
491 829
830 rotldi_3 = op_alias("rldicl_4", function(p)
831 p[4] = "0"
832 end),
833 rotrdi_3 = op_alias("rldicl_4", function(p)
834 p[3] = "64-("..p[3]..")"; p[4] = "0"
835 end),
836 rotld_3 = op_alias("rldcl_4", function(p)
837 p[4] = "0"
838 end),
839 sldi_3 = op_alias("rldicr_4", function(p)
840 p[4] = "63-("..p[3]..")"
841 end),
842 srdi_3 = op_alias("rldicl_4", function(p)
843 p[4] = p[3]; p[3] = "64-("..p[3]..")"
844 end),
845 clrldi_3 = op_alias("rldicl_4", function(p)
846 p[4] = p[3]; p[3] = "0"
847 end),
848 clrrdi_3 = op_alias("rldicr_4", function(p)
849 p[4] = "63-("..p[3]..")"; p[3] = "0"
850 end),
851
852 -- Primary opcode 56:
853 lq_2 = "e0000000R:D", -- NYI: displacement must be divisible by 8.
854
855 -- Primary opcode 57:
856 lfdp_2 = "e4000000F:D", -- NYI: displacement must be divisible by 4.
857
492 -- Primary opcode 59: 858 -- Primary opcode 59:
493 fdivs_3 = "ec000024FFF.", 859 fdivs_3 = "ec000024FFF.",
494 fsubs_3 = "ec000028FFF.", 860 fsubs_3 = "ec000028FFF.",
@@ -501,6 +867,200 @@ local map_op = {
501 fmadds_4 = "ec00003aFFFF~.", 867 fmadds_4 = "ec00003aFFFF~.",
502 fnmsubs_4 = "ec00003cFFFF~.", 868 fnmsubs_4 = "ec00003cFFFF~.",
503 fnmadds_4 = "ec00003eFFFF~.", 869 fnmadds_4 = "ec00003eFFFF~.",
870 fcfids_2 = "ec00069cF-F.",
871 fcfidus_2 = "ec00079cF-F.",
872
873 dadd_3 = "ec000004FFF.",
874 dqua_4 = "ec000006FFFZ.",
875 dmul_3 = "ec000044FFF.",
876 drrnd_4 = "ec000046FFFZ.",
877 dscli_3 = "ec000084FF6.",
878 dquai_4 = "ec000086SF~FZ.",
879 dscri_3 = "ec0000c4FF6.",
880 drintx_4 = "ec0000c61F~FZ.",
881 dcmpo_3 = "ec000104XFF",
882 dtstex_3 = "ec000144XFF",
883 dtstdc_3 = "ec000184XF6",
884 dtstdg_3 = "ec0001c4XF6",
885 drintn_4 = "ec0001c61F~FZ.",
886 dctdp_2 = "ec000204F-F.",
887 dctfix_2 = "ec000244F-F.",
888 ddedpd_3 = "ec000284ZF~F.",
889 dxex_2 = "ec0002c4F-F.",
890 dsub_3 = "ec000404FFF.",
891 ddiv_3 = "ec000444FFF.",
892 dcmpu_3 = "ec000504XFF",
893 dtstsf_3 = "ec000544XFF",
894 drsp_2 = "ec000604F-F.",
895 dcffix_2 = "ec000644F-F.",
896 denbcd_3 = "ec000684YF~F.",
897 diex_3 = "ec0006c4FFF.",
898
899 -- Primary opcode 60:
900 xsaddsp_3 = "f0000000QQQ",
901 xsmaddasp_3 = "f0000008QQQ",
902 xxsldwi_4 = "f0000010QQQz",
903 xsrsqrtesp_2 = "f0000028Q-Q",
904 xssqrtsp_2 = "f000002cQ-Q",
905 xxsel_4 = "f0000030QQQQ",
906 xssubsp_3 = "f0000040QQQ",
907 xsmaddmsp_3 = "f0000048QQQ",
908 xxpermdi_4 = "f0000050QQQz",
909 xsresp_2 = "f0000068Q-Q",
910 xsmulsp_3 = "f0000080QQQ",
911 xsmsubasp_3 = "f0000088QQQ",
912 xxmrghw_3 = "f0000090QQQ",
913 xsdivsp_3 = "f00000c0QQQ",
914 xsmsubmsp_3 = "f00000c8QQQ",
915 xsadddp_3 = "f0000100QQQ",
916 xsmaddadp_3 = "f0000108QQQ",
917 xscmpudp_3 = "f0000118XQQ",
918 xscvdpuxws_2 = "f0000120Q-Q",
919 xsrdpi_2 = "f0000124Q-Q",
920 xsrsqrtedp_2 = "f0000128Q-Q",
921 xssqrtdp_2 = "f000012cQ-Q",
922 xssubdp_3 = "f0000140QQQ",
923 xsmaddmdp_3 = "f0000148QQQ",
924 xscmpodp_3 = "f0000158XQQ",
925 xscvdpsxws_2 = "f0000160Q-Q",
926 xsrdpiz_2 = "f0000164Q-Q",
927 xsredp_2 = "f0000168Q-Q",
928 xsmuldp_3 = "f0000180QQQ",
929 xsmsubadp_3 = "f0000188QQQ",
930 xxmrglw_3 = "f0000190QQQ",
931 xsrdpip_2 = "f00001a4Q-Q",
932 xstsqrtdp_2 = "f00001a8X-Q",
933 xsrdpic_2 = "f00001acQ-Q",
934 xsdivdp_3 = "f00001c0QQQ",
935 xsmsubmdp_3 = "f00001c8QQQ",
936 xsrdpim_2 = "f00001e4Q-Q",
937 xstdivdp_3 = "f00001e8XQQ",
938 xvaddsp_3 = "f0000200QQQ",
939 xvmaddasp_3 = "f0000208QQQ",
940 xvcmpeqsp_3 = "f0000218QQQ",
941 xvcvspuxws_2 = "f0000220Q-Q",
942 xvrspi_2 = "f0000224Q-Q",
943 xvrsqrtesp_2 = "f0000228Q-Q",
944 xvsqrtsp_2 = "f000022cQ-Q",
945 xvsubsp_3 = "f0000240QQQ",
946 xvmaddmsp_3 = "f0000248QQQ",
947 xvcmpgtsp_3 = "f0000258QQQ",
948 xvcvspsxws_2 = "f0000260Q-Q",
949 xvrspiz_2 = "f0000264Q-Q",
950 xvresp_2 = "f0000268Q-Q",
951 xvmulsp_3 = "f0000280QQQ",
952 xvmsubasp_3 = "f0000288QQQ",
953 xxspltw_3 = "f0000290QQg~",
954 xvcmpgesp_3 = "f0000298QQQ",
955 xvcvuxwsp_2 = "f00002a0Q-Q",
956 xvrspip_2 = "f00002a4Q-Q",
957 xvtsqrtsp_2 = "f00002a8X-Q",
958 xvrspic_2 = "f00002acQ-Q",
959 xvdivsp_3 = "f00002c0QQQ",
960 xvmsubmsp_3 = "f00002c8QQQ",
961 xvcvsxwsp_2 = "f00002e0Q-Q",
962 xvrspim_2 = "f00002e4Q-Q",
963 xvtdivsp_3 = "f00002e8XQQ",
964 xvadddp_3 = "f0000300QQQ",
965 xvmaddadp_3 = "f0000308QQQ",
966 xvcmpeqdp_3 = "f0000318QQQ",
967 xvcvdpuxws_2 = "f0000320Q-Q",
968 xvrdpi_2 = "f0000324Q-Q",
969 xvrsqrtedp_2 = "f0000328Q-Q",
970 xvsqrtdp_2 = "f000032cQ-Q",
971 xvsubdp_3 = "f0000340QQQ",
972 xvmaddmdp_3 = "f0000348QQQ",
973 xvcmpgtdp_3 = "f0000358QQQ",
974 xvcvdpsxws_2 = "f0000360Q-Q",
975 xvrdpiz_2 = "f0000364Q-Q",
976 xvredp_2 = "f0000368Q-Q",
977 xvmuldp_3 = "f0000380QQQ",
978 xvmsubadp_3 = "f0000388QQQ",
979 xvcmpgedp_3 = "f0000398QQQ",
980 xvcvuxwdp_2 = "f00003a0Q-Q",
981 xvrdpip_2 = "f00003a4Q-Q",
982 xvtsqrtdp_2 = "f00003a8X-Q",
983 xvrdpic_2 = "f00003acQ-Q",
984 xvdivdp_3 = "f00003c0QQQ",
985 xvmsubmdp_3 = "f00003c8QQQ",
986 xvcvsxwdp_2 = "f00003e0Q-Q",
987 xvrdpim_2 = "f00003e4Q-Q",
988 xvtdivdp_3 = "f00003e8XQQ",
989 xsnmaddasp_3 = "f0000408QQQ",
990 xxland_3 = "f0000410QQQ",
991 xscvdpsp_2 = "f0000424Q-Q",
992 xscvdpspn_2 = "f000042cQ-Q",
993 xsnmaddmsp_3 = "f0000448QQQ",
994 xxlandc_3 = "f0000450QQQ",
995 xsrsp_2 = "f0000464Q-Q",
996 xsnmsubasp_3 = "f0000488QQQ",
997 xxlor_3 = "f0000490QQQ",
998 xscvuxdsp_2 = "f00004a0Q-Q",
999 xsnmsubmsp_3 = "f00004c8QQQ",
1000 xxlxor_3 = "f00004d0QQQ",
1001 xscvsxdsp_2 = "f00004e0Q-Q",
1002 xsmaxdp_3 = "f0000500QQQ",
1003 xsnmaddadp_3 = "f0000508QQQ",
1004 xxlnor_3 = "f0000510QQQ",
1005 xscvdpuxds_2 = "f0000520Q-Q",
1006 xscvspdp_2 = "f0000524Q-Q",
1007 xscvspdpn_2 = "f000052cQ-Q",
1008 xsmindp_3 = "f0000540QQQ",
1009 xsnmaddmdp_3 = "f0000548QQQ",
1010 xxlorc_3 = "f0000550QQQ",
1011 xscvdpsxds_2 = "f0000560Q-Q",
1012 xsabsdp_2 = "f0000564Q-Q",
1013 xscpsgndp_3 = "f0000580QQQ",
1014 xsnmsubadp_3 = "f0000588QQQ",
1015 xxlnand_3 = "f0000590QQQ",
1016 xscvuxddp_2 = "f00005a0Q-Q",
1017 xsnabsdp_2 = "f00005a4Q-Q",
1018 xsnmsubmdp_3 = "f00005c8QQQ",
1019 xxleqv_3 = "f00005d0QQQ",
1020 xscvsxddp_2 = "f00005e0Q-Q",
1021 xsnegdp_2 = "f00005e4Q-Q",
1022 xvmaxsp_3 = "f0000600QQQ",
1023 xvnmaddasp_3 = "f0000608QQQ",
1024 ["xvcmpeqsp._3"] = "f0000618QQQ",
1025 xvcvspuxds_2 = "f0000620Q-Q",
1026 xvcvdpsp_2 = "f0000624Q-Q",
1027 xvminsp_3 = "f0000640QQQ",
1028 xvnmaddmsp_3 = "f0000648QQQ",
1029 ["xvcmpgtsp._3"] = "f0000658QQQ",
1030 xvcvspsxds_2 = "f0000660Q-Q",
1031 xvabssp_2 = "f0000664Q-Q",
1032 xvcpsgnsp_3 = "f0000680QQQ",
1033 xvnmsubasp_3 = "f0000688QQQ",
1034 ["xvcmpgesp._3"] = "f0000698QQQ",
1035 xvcvuxdsp_2 = "f00006a0Q-Q",
1036 xvnabssp_2 = "f00006a4Q-Q",
1037 xvnmsubmsp_3 = "f00006c8QQQ",
1038 xvcvsxdsp_2 = "f00006e0Q-Q",
1039 xvnegsp_2 = "f00006e4Q-Q",
1040 xvmaxdp_3 = "f0000700QQQ",
1041 xvnmaddadp_3 = "f0000708QQQ",
1042 ["xvcmpeqdp._3"] = "f0000718QQQ",
1043 xvcvdpuxds_2 = "f0000720Q-Q",
1044 xvcvspdp_2 = "f0000724Q-Q",
1045 xvmindp_3 = "f0000740QQQ",
1046 xvnmaddmdp_3 = "f0000748QQQ",
1047 ["xvcmpgtdp._3"] = "f0000758QQQ",
1048 xvcvdpsxds_2 = "f0000760Q-Q",
1049 xvabsdp_2 = "f0000764Q-Q",
1050 xvcpsgndp_3 = "f0000780QQQ",
1051 xvnmsubadp_3 = "f0000788QQQ",
1052 ["xvcmpgedp._3"] = "f0000798QQQ",
1053 xvcvuxddp_2 = "f00007a0Q-Q",
1054 xvnabsdp_2 = "f00007a4Q-Q",
1055 xvnmsubmdp_3 = "f00007c8QQQ",
1056 xvcvsxddp_2 = "f00007e0Q-Q",
1057 xvnegdp_2 = "f00007e4Q-Q",
1058
1059 -- Primary opcode 61:
1060 stfdp_2 = "f4000000F:D", -- NYI: displacement must be divisible by 4.
1061
1062 -- Primary opcode 62:
1063 stq_2 = "f8000002R:D", -- NYI: displacement must be divisible by 8.
504 1064
505 -- Primary opcode 63: 1065 -- Primary opcode 63:
506 fdiv_3 = "fc000024FFF.", 1066 fdiv_3 = "fc000024FFF.",
@@ -526,8 +1086,12 @@ local map_op = {
526 frsp_2 = "fc000018F-F.", 1086 frsp_2 = "fc000018F-F.",
527 fctiw_2 = "fc00001cF-F.", 1087 fctiw_2 = "fc00001cF-F.",
528 fctiwz_2 = "fc00001eF-F.", 1088 fctiwz_2 = "fc00001eF-F.",
1089 ftdiv_2 = "fc000100X-F.",
1090 fctiwu_2 = "fc00011cF-F.",
1091 fctiwuz_2 = "fc00011eF-F.",
529 mtfsfi_2 = "fc00010cAA", -- NYI: upshift. 1092 mtfsfi_2 = "fc00010cAA", -- NYI: upshift.
530 fnabs_2 = "fc000110F-F.", 1093 fnabs_2 = "fc000110F-F.",
1094 ftsqrt_2 = "fc000140X-F.",
531 fabs_2 = "fc000210F-F.", 1095 fabs_2 = "fc000210F-F.",
532 frin_2 = "fc000310F-F.", 1096 frin_2 = "fc000310F-F.",
533 friz_2 = "fc000350F-F.", 1097 friz_2 = "fc000350F-F.",
@@ -537,7 +1101,38 @@ local map_op = {
537 -- NYI: mtfsf, mtfsb0, mtfsb1. 1101 -- NYI: mtfsf, mtfsb0, mtfsb1.
538 fctid_2 = "fc00065cF-F.", 1102 fctid_2 = "fc00065cF-F.",
539 fctidz_2 = "fc00065eF-F.", 1103 fctidz_2 = "fc00065eF-F.",
1104 fmrgow_3 = "fc00068cFFF",
540 fcfid_2 = "fc00069cF-F.", 1105 fcfid_2 = "fc00069cF-F.",
1106 fctidu_2 = "fc00075cF-F.",
1107 fctiduz_2 = "fc00075eF-F.",
1108 fmrgew_3 = "fc00078cFFF",
1109 fcfidu_2 = "fc00079cF-F.",
1110
1111 daddq_3 = "fc000004F:F:F:.",
1112 dquaq_4 = "fc000006F:F:F:Z.",
1113 dmulq_3 = "fc000044F:F:F:.",
1114 drrndq_4 = "fc000046F:F:F:Z.",
1115 dscliq_3 = "fc000084F:F:6.",
1116 dquaiq_4 = "fc000086SF:~F:Z.",
1117 dscriq_3 = "fc0000c4F:F:6.",
1118 drintxq_4 = "fc0000c61F:~F:Z.",
1119 dcmpoq_3 = "fc000104XF:F:",
1120 dtstexq_3 = "fc000144XF:F:",
1121 dtstdcq_3 = "fc000184XF:6",
1122 dtstdgq_3 = "fc0001c4XF:6",
1123 drintnq_4 = "fc0001c61F:~F:Z.",
1124 dctqpq_2 = "fc000204F:-F:.",
1125 dctfixq_2 = "fc000244F:-F:.",
1126 ddedpdq_3 = "fc000284ZF:~F:.",
1127 dxexq_2 = "fc0002c4F:-F:.",
1128 dsubq_3 = "fc000404F:F:F:.",
1129 ddivq_3 = "fc000444F:F:F:.",
1130 dcmpuq_3 = "fc000504XF:F:",
1131 dtstsfq_3 = "fc000544XF:F:",
1132 drdpq_2 = "fc000604F:-F:.",
1133 dcffixq_2 = "fc000644F:-F:.",
1134 denbcdq_3 = "fc000684YF:~F:.",
1135 diexq_3 = "fc0006c4F:FF:.",
541 1136
542 -- Primary opcode 4, SPE APU extension: 1137 -- Primary opcode 4, SPE APU extension:
543 evaddw_3 = "10000200RRR", 1138 evaddw_3 = "10000200RRR",
@@ -822,7 +1417,7 @@ local map_op = {
822do 1417do
823 local t = {} 1418 local t = {}
824 for k,v in pairs(map_op) do 1419 for k,v in pairs(map_op) do
825 if sub(v, -1) == "." then 1420 if type(v) == "string" and sub(v, -1) == "." then
826 local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2) 1421 local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
827 t[sub(k, 1, -3).."."..sub(k, -2)] = v2 1422 t[sub(k, 1, -3).."."..sub(k, -2)] = v2
828 end 1423 end
@@ -884,6 +1479,24 @@ local function parse_fpr(expr)
884 werror("bad register name `"..expr.."'") 1479 werror("bad register name `"..expr.."'")
885end 1480end
886 1481
1482local function parse_vr(expr)
1483 local r = match(expr, "^v([1-3]?[0-9])$")
1484 if r then
1485 r = tonumber(r)
1486 if r <= 31 then return r end
1487 end
1488 werror("bad register name `"..expr.."'")
1489end
1490
1491local function parse_vs(expr)
1492 local r = match(expr, "^vs([1-6]?[0-9])$")
1493 if r then
1494 r = tonumber(r)
1495 if r <= 63 then return r end
1496 end
1497 werror("bad register name `"..expr.."'")
1498end
1499
887local function parse_cr(expr) 1500local function parse_cr(expr)
888 local r = match(expr, "^cr([0-7])$") 1501 local r = match(expr, "^cr([0-7])$")
889 if r then return tonumber(r) end 1502 if r then return tonumber(r) end
@@ -900,8 +1513,30 @@ local function parse_cond(expr)
900 werror("bad condition bit name `"..expr.."'") 1513 werror("bad condition bit name `"..expr.."'")
901end 1514end
902 1515
1516local parse_ctx = {}
1517
1518local loadenv = setfenv and function(s)
1519 local code = loadstring(s, "")
1520 if code then setfenv(code, parse_ctx) end
1521 return code
1522end or function(s)
1523 return load(s, "", nil, parse_ctx)
1524end
1525
1526-- Try to parse simple arithmetic, too, since some basic ops are aliases.
1527local function parse_number(n)
1528 local x = tonumber(n)
1529 if x then return x end
1530 local code = loadenv("return "..n)
1531 if code then
1532 local ok, y = pcall(code)
1533 if ok then return y end
1534 end
1535 return nil
1536end
1537
903local function parse_imm(imm, bits, shift, scale, signed) 1538local function parse_imm(imm, bits, shift, scale, signed)
904 local n = tonumber(imm) 1539 local n = parse_number(imm)
905 if n then 1540 if n then
906 local m = sar(n, scale) 1541 local m = sar(n, scale)
907 if shl(m, scale) == n then 1542 if shl(m, scale) == n then
@@ -914,7 +1549,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
914 end 1549 end
915 end 1550 end
916 werror("out of range immediate `"..imm.."'") 1551 werror("out of range immediate `"..imm.."'")
917 elseif match(imm, "^r([1-3]?[0-9])$") or 1552 elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
1553 match(imm, "^vs([1-6]?[0-9])$") or
918 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then 1554 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
919 werror("expected immediate operand, got register") 1555 werror("expected immediate operand, got register")
920 else 1556 else
@@ -924,11 +1560,11 @@ local function parse_imm(imm, bits, shift, scale, signed)
924end 1560end
925 1561
926local function parse_shiftmask(imm, isshift) 1562local function parse_shiftmask(imm, isshift)
927 local n = tonumber(imm) 1563 local n = parse_number(imm)
928 if n then 1564 if n then
929 if shr(n, 6) == 0 then 1565 if shr(n, 6) == 0 then
930 local lsb = band(imm, 31) 1566 local lsb = band(n, 31)
931 local msb = imm - lsb 1567 local msb = n - lsb
932 return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb) 1568 return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
933 end 1569 end
934 werror("out of range immediate `"..imm.."'") 1570 werror("out of range immediate `"..imm.."'")
@@ -936,7 +1572,8 @@ local function parse_shiftmask(imm, isshift)
936 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then 1572 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
937 werror("expected immediate operand, got register") 1573 werror("expected immediate operand, got register")
938 else 1574 else
939 werror("NYI: parameterized 64 bit shift/mask") 1575 waction("IMMSH", isshift and 1 or 0, imm)
1576 return 0;
940 end 1577 end
941end 1578end
942 1579
@@ -1011,7 +1648,7 @@ end
1011------------------------------------------------------------------------------ 1648------------------------------------------------------------------------------
1012 1649
1013-- Handle opcodes defined with template strings. 1650-- Handle opcodes defined with template strings.
1014map_op[".template__"] = function(params, template, nparams) 1651op_template = function(params, template, nparams)
1015 if not params then return sub(template, 9) end 1652 if not params then return sub(template, 9) end
1016 local op = tonumber(sub(template, 1, 8), 16) 1653 local op = tonumber(sub(template, 1, 8), 16)
1017 local n, rs = 1, 26 1654 local n, rs = 1, 26
@@ -1027,6 +1664,15 @@ map_op[".template__"] = function(params, template, nparams)
1027 rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1 1664 rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
1028 elseif p == "F" then 1665 elseif p == "F" then
1029 rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1 1666 rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
1667 elseif p == "V" then
1668 rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1
1669 elseif p == "Q" then
1670 local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5
1671 local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3)
1672 op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh)
1673 elseif p == "q" then
1674 local vs = parse_vs(params[n]); n = n + 1
1675 op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5)
1030 elseif p == "A" then 1676 elseif p == "A" then
1031 rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1 1677 rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
1032 elseif p == "S" then 1678 elseif p == "S" then
@@ -1047,6 +1693,26 @@ map_op[".template__"] = function(params, template, nparams)
1047 rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1 1693 rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
1048 elseif p == "X" then 1694 elseif p == "X" then
1049 rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1 1695 rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
1696 elseif p == "1" then
1697 rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1
1698 elseif p == "g" then
1699 rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1
1700 elseif p == "3" then
1701 rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1
1702 elseif p == "P" then
1703 rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
1704 elseif p == "p" then
1705 op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
1706 elseif p == "6" then
1707 rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1
1708 elseif p == "Y" then
1709 rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1
1710 elseif p == "y" then
1711 rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1
1712 elseif p == "Z" then
1713 rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1
1714 elseif p == "z" then
1715 rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1
1050 elseif p == "W" then 1716 elseif p == "W" then
1051 op = op + parse_cr(params[n]); n = n + 1 1717 op = op + parse_cr(params[n]); n = n + 1
1052 elseif p == "G" then 1718 elseif p == "G" then
@@ -1071,6 +1737,8 @@ map_op[".template__"] = function(params, template, nparams)
1071 local lo = band(op, mm) 1737 local lo = band(op, mm)
1072 local hi = band(op, shl(mm, 5)) 1738 local hi = band(op, shl(mm, 5))
1073 op = op - lo - hi + shl(lo, 5) + shr(hi, 5) 1739 op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
1740 elseif p == ":" then
1741 if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end
1074 elseif p == "-" then 1742 elseif p == "-" then
1075 rs = rs - 5 1743 rs = rs - 5
1076 elseif p == "." then 1744 elseif p == "." then
@@ -1082,6 +1750,8 @@ map_op[".template__"] = function(params, template, nparams)
1082 wputpos(pos, op) 1750 wputpos(pos, op)
1083end 1751end
1084 1752
1753map_op[".template__"] = op_template
1754
1085------------------------------------------------------------------------------ 1755------------------------------------------------------------------------------
1086 1756
1087-- Pseudo-opcode to mark the position where the action list is to be emitted. 1757-- Pseudo-opcode to mark the position where the action list is to be emitted.
diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h
index 73558c69..3f50f502 100644
--- a/dynasm/dasm_proto.h
+++ b/dynasm/dasm_proto.h
@@ -10,8 +10,8 @@
10#include <stddef.h> 10#include <stddef.h>
11#include <stdarg.h> 11#include <stdarg.h>
12 12
13#define DASM_IDENT "DynASM 1.3.0" 13#define DASM_IDENT "DynASM 1.5.0"
14#define DASM_VERSION 10300 /* 1.3.0 */ 14#define DASM_VERSION 10500 /* 1.5.0 */
15 15
16#ifndef Dst_DECL 16#ifndef Dst_DECL
17#define Dst_DECL dasm_State **Dst 17#define Dst_DECL dasm_State **Dst
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
index 7b031c72..aded9990 100644
--- a/dynasm/dasm_x86.h
+++ b/dynasm/dasm_x86.h
@@ -68,7 +68,7 @@ struct dasm_State {
68 size_t lgsize; 68 size_t lgsize;
69 int *pclabels; /* PC label chains/pos ptrs. */ 69 int *pclabels; /* PC label chains/pos ptrs. */
70 size_t pcsize; 70 size_t pcsize;
71 void **globals; /* Array of globals (bias -10). */ 71 void **globals; /* Array of globals. */
72 dasm_Section *section; /* Pointer to active section. */ 72 dasm_Section *section; /* Pointer to active section. */
73 size_t codesize; /* Total size of all code sections. */ 73 size_t codesize; /* Total size of all code sections. */
74 int maxsection; /* 0 <= sectionidx < maxsection. */ 74 int maxsection; /* 0 <= sectionidx < maxsection. */
@@ -85,7 +85,6 @@ void dasm_init(Dst_DECL, int maxsection)
85{ 85{
86 dasm_State *D; 86 dasm_State *D;
87 size_t psz = 0; 87 size_t psz = 0;
88 int i;
89 Dst_REF = NULL; 88 Dst_REF = NULL;
90 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); 89 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
91 D = Dst_REF; 90 D = Dst_REF;
@@ -96,12 +95,7 @@ void dasm_init(Dst_DECL, int maxsection)
96 D->pcsize = 0; 95 D->pcsize = 0;
97 D->globals = NULL; 96 D->globals = NULL;
98 D->maxsection = maxsection; 97 D->maxsection = maxsection;
99 for (i = 0; i < maxsection; i++) { 98 memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
100 D->sections[i].buf = NULL; /* Need this for pass3. */
101 D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
102 D->sections[i].bsize = 0;
103 D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
104 }
105} 99}
106 100
107/* Free DynASM state. */ 101/* Free DynASM state. */
@@ -121,7 +115,7 @@ void dasm_free(Dst_DECL)
121void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) 115void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
122{ 116{
123 dasm_State *D = Dst_REF; 117 dasm_State *D = Dst_REF;
124 D->globals = gl - 10; /* Negative bias to compensate for locals. */ 118 D->globals = gl;
125 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); 119 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
126} 120}
127 121
@@ -146,6 +140,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
146 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); 140 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
147 for (i = 0; i < D->maxsection; i++) { 141 for (i = 0; i < D->maxsection; i++) {
148 D->sections[i].pos = DASM_SEC2POS(i); 142 D->sections[i].pos = DASM_SEC2POS(i);
143 D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
149 D->sections[i].ofs = 0; 144 D->sections[i].ofs = 0;
150 } 145 }
151} 146}
@@ -170,7 +165,7 @@ void dasm_put(Dst_DECL, int start, ...)
170 dasm_State *D = Dst_REF; 165 dasm_State *D = Dst_REF;
171 dasm_ActList p = D->actionlist + start; 166 dasm_ActList p = D->actionlist + start;
172 dasm_Section *sec = D->section; 167 dasm_Section *sec = D->section;
173 int pos = sec->pos, ofs = sec->ofs, mrm = 4; 168 int pos = sec->pos, ofs = sec->ofs, mrm = -1;
174 int *b; 169 int *b;
175 170
176 if (pos >= sec->epos) { 171 if (pos >= sec->epos) {
@@ -193,7 +188,7 @@ void dasm_put(Dst_DECL, int start, ...)
193 b[pos++] = n; 188 b[pos++] = n;
194 switch (action) { 189 switch (action) {
195 case DASM_DISP: 190 case DASM_DISP:
196 if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } 191 if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
197 /* fallthrough */ 192 /* fallthrough */
198 case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ 193 case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
199 case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ 194 case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
@@ -204,11 +199,17 @@ void dasm_put(Dst_DECL, int start, ...)
204 case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; 199 case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
205 case DASM_SPACE: p++; ofs += n; break; 200 case DASM_SPACE: p++; ofs += n; break;
206 case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ 201 case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
207 case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); 202 case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
208 if (*p++ == 1 && *p == DASM_DISP) mrm = n; 203 if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
204 if (*p < 0x20 && (n&7) == 4) ofs++;
205 switch ((*p++ >> 3) & 3) {
206 case 3: n |= b[pos-3]; /* fallthrough */
207 case 2: n |= b[pos-2]; /* fallthrough */
208 case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
209 }
209 continue; 210 continue;
210 } 211 }
211 mrm = 4; 212 mrm = -1;
212 } else { 213 } else {
213 int *pl, n; 214 int *pl, n;
214 switch (action) { 215 switch (action) {
@@ -233,8 +234,11 @@ void dasm_put(Dst_DECL, int start, ...)
233 } 234 }
234 pos++; 235 pos++;
235 ofs += 4; /* Maximum offset needed. */ 236 ofs += 4; /* Maximum offset needed. */
236 if (action == DASM_REL_LG || action == DASM_REL_PC) 237 if (action == DASM_REL_LG || action == DASM_REL_PC) {
237 b[pos++] = ofs; /* Store pass1 offset estimate. */ 238 b[pos++] = ofs; /* Store pass1 offset estimate. */
239 } else if (sizeof(ptrdiff_t) == 8) {
240 ofs += 4;
241 }
238 break; 242 break;
239 case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; 243 case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
240 case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); 244 case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
@@ -359,10 +363,22 @@ int dasm_link(Dst_DECL, size_t *szp)
359 do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0) 363 do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
360#define dasmd(x) \ 364#define dasmd(x) \
361 do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0) 365 do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
366#define dasmq(x) \
367 do { *((unsigned long long *)cp) = (unsigned long long)(x); cp+=8; } while (0)
362#else 368#else
363#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) 369#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0)
364#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) 370#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0)
371#define dasmq(x) do { dasmd(x); dasmd((x)>>32); } while (0)
365#endif 372#endif
373static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x)
374{
375 if (sizeof(ptrdiff_t) == 8)
376 dasmq((unsigned long long)x);
377 else
378 dasmd((unsigned int)x);
379 return cp;
380}
381#define dasma(x) (cp = dasma_(cp, (x)))
366 382
367/* Pass 3: Encode sections. */ 383/* Pass 3: Encode sections. */
368int dasm_encode(Dst_DECL, void *buffer) 384int dasm_encode(Dst_DECL, void *buffer)
@@ -402,9 +418,29 @@ int dasm_encode(Dst_DECL, void *buffer)
402 case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; 418 case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
403 /* fallthrough */ 419 /* fallthrough */
404 case DASM_IMM_W: dasmw(n); break; 420 case DASM_IMM_W: dasmw(n); break;
405 case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; } 421 case DASM_VREG: {
422 int t = *p++;
423 unsigned char *ex = cp - (t&7);
424 if ((n & 8) && t < 0xa0) {
425 if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
426 n &= 7;
427 } else if (n & 0x10) {
428 if (*ex & 0x80) {
429 *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
430 }
431 while (++ex < cp) ex[-1] = *ex;
432 if (mark) mark--;
433 cp--;
434 n &= 7;
435 }
436 if (t >= 0xc0) n <<= 4;
437 else if (t >= 0x40) n <<= 3;
438 else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
439 cp[-1] ^= n;
440 break;
441 }
406 case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; 442 case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
407 b++; n = (int)(ptrdiff_t)D->globals[-n]; 443 b++; n = (int)(ptrdiff_t)D->globals[-n-10];
408 /* fallthrough */ 444 /* fallthrough */
409 case DASM_REL_A: rel_a: 445 case DASM_REL_A: rel_a:
410 n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ 446 n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
@@ -417,17 +453,18 @@ int dasm_encode(Dst_DECL, void *buffer)
417 goto wb; 453 goto wb;
418 } 454 }
419 case DASM_IMM_LG: 455 case DASM_IMM_LG:
420 p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } 456 p++;
457 if (n < 0) { dasma((ptrdiff_t)D->globals[-n-10]); break; }
421 /* fallthrough */ 458 /* fallthrough */
422 case DASM_IMM_PC: { 459 case DASM_IMM_PC: {
423 int *pb = DASM_POS2PTR(D, n); 460 int *pb = DASM_POS2PTR(D, n);
424 n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); 461 dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base));
425 goto wd; 462 break;
426 } 463 }
427 case DASM_LABEL_LG: { 464 case DASM_LABEL_LG: {
428 int idx = *p++; 465 int idx = *p++;
429 if (idx >= 10) 466 if (idx >= 10)
430 D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n)); 467 D->globals[idx-10] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
431 break; 468 break;
432 } 469 }
433 case DASM_LABEL_PC: case DASM_SETLABEL: break; 470 case DASM_LABEL_PC: case DASM_SETLABEL: break;
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
index 8a4c93a2..787163c0 100644
--- a/dynasm/dasm_x86.lua
+++ b/dynasm/dasm_x86.lua
@@ -11,9 +11,9 @@ local x64 = x64
11local _info = { 11local _info = {
12 arch = x64 and "x64" or "x86", 12 arch = x64 and "x64" or "x86",
13 description = "DynASM x86/x64 module", 13 description = "DynASM x86/x64 module",
14 version = "1.3.0", 14 version = "1.5.0",
15 vernum = 10300, 15 vernum = 10500,
16 release = "2011-05-05", 16 release = "2021-05-02",
17 author = "Mike Pall", 17 author = "Mike Pall",
18 license = "MIT", 18 license = "MIT",
19} 19}
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
27local _s = string 27local _s = string
28local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char 28local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
29local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub 29local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
30local concat, sort = table.concat, table.sort 30local concat, sort, remove = table.concat, table.sort, table.remove
31local bit = bit or require("bit") 31local bit = bit or require("bit")
32local band, shl, shr = bit.band, bit.lshift, bit.rshift 32local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
33 33
34-- Inherited tables and callbacks. 34-- Inherited tables and callbacks.
35local g_opt, g_arch 35local g_opt, g_arch
@@ -41,7 +41,7 @@ local action_names = {
41 -- int arg, 1 buffer pos: 41 -- int arg, 1 buffer pos:
42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", 42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num): 43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
44 "VREG", "SPACE", -- !x64: VREG support NYI. 44 "VREG", "SPACE",
45 -- ptrdiff_t arg, 1 buffer pos (address): !x64 45 -- ptrdiff_t arg, 1 buffer pos (address): !x64
46 "SETLABEL", "REL_A", 46 "SETLABEL", "REL_A",
47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): 47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -83,6 +83,21 @@ local actargs = { 0 }
83-- Current number of section buffer positions for dasm_put(). 83-- Current number of section buffer positions for dasm_put().
84local secpos = 1 84local secpos = 1
85 85
86-- VREG kind encodings, pre-shifted by 5 bits.
87local map_vreg = {
88 ["modrm.rm.m"] = 0x00,
89 ["modrm.rm.r"] = 0x20,
90 ["opcode"] = 0x20,
91 ["sib.base"] = 0x20,
92 ["sib.index"] = 0x40,
93 ["modrm.reg"] = 0x80,
94 ["vex.v"] = 0xa0,
95 ["imm.hi"] = 0xc0,
96}
97
98-- Current number of VREG actions contributing to REX/VEX shrinkage.
99local vreg_shrink_count = 0
100
86------------------------------------------------------------------------------ 101------------------------------------------------------------------------------
87 102
88-- Compute action numbers for action names. 103-- Compute action numbers for action names.
@@ -134,6 +149,21 @@ local function waction(action, a, num)
134 if a or num then secpos = secpos + (num or 1) end 149 if a or num then secpos = secpos + (num or 1) end
135end 150end
136 151
152-- Optionally add a VREG action.
153local function wvreg(kind, vreg, psz, sk, defer)
154 if not vreg then return end
155 waction("VREG", vreg)
156 local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
157 if b < (sk or 0) then
158 vreg_shrink_count = vreg_shrink_count + 1
159 end
160 if not defer then
161 b = b + vreg_shrink_count * 8
162 vreg_shrink_count = 0
163 end
164 wputxb(b + (psz or 0))
165end
166
137-- Add call to embedded DynASM C code. 167-- Add call to embedded DynASM C code.
138local function wcall(func, args) 168local function wcall(func, args)
139 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) 169 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names)
299 local iname = format("@%s%x%s", sz, i, needrex and "R" or "") 329 local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
300 if needrex then map_reg_needrex[iname] = true end 330 if needrex then map_reg_needrex[iname] = true end
301 local name 331 local name
302 if sz == "o" then name = format("xmm%d", i) 332 if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
303 elseif sz == "f" then name = format("st%d", i) 333 elseif sz == "f" then name = format("st%d", i)
304 else name = format("r%d%s", i, sz == addrsize and "" or sz) end 334 else name = format("r%d%s", i, sz == addrsize and "" or sz) end
305 map_archdef[name] = iname 335 map_archdef[name] = iname
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
326mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) 356mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
327map_reg_valid_index[map_archdef.esp] = false 357map_reg_valid_index[map_archdef.esp] = false
328if x64 then map_reg_valid_index[map_archdef.rsp] = false end 358if x64 then map_reg_valid_index[map_archdef.rsp] = false end
359if x64 then map_reg_needrex[map_archdef.Rb] = true end
329map_archdef["Ra"] = "@"..addrsize 360map_archdef["Ra"] = "@"..addrsize
330 361
331-- FP registers (internally tword sized, but use "f" as operand size). 362-- FP registers (internally tword sized, but use "f" as operand size).
@@ -334,21 +365,24 @@ mkrmap("f", "Rf")
334-- SSE registers (oword sized, but qword and dword accessible). 365-- SSE registers (oword sized, but qword and dword accessible).
335mkrmap("o", "xmm") 366mkrmap("o", "xmm")
336 367
368-- AVX registers (yword sized, but oword, qword and dword accessible).
369mkrmap("y", "ymm")
370
337-- Operand size prefixes to codes. 371-- Operand size prefixes to codes.
338local map_opsize = { 372local map_opsize = {
339 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t", 373 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
340 aword = addrsize, 374 tword = "t", aword = addrsize,
341} 375}
342 376
343-- Operand size code to number. 377-- Operand size code to number.
344local map_opsizenum = { 378local map_opsizenum = {
345 b = 1, w = 2, d = 4, q = 8, o = 16, t = 10, 379 b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
346} 380}
347 381
348-- Operand size code to name. 382-- Operand size code to name.
349local map_opsizename = { 383local map_opsizename = {
350 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword", 384 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
351 f = "fpword", 385 t = "tword", f = "fpword",
352} 386}
353 387
354-- Valid index register scale factors. 388-- Valid index register scale factors.
@@ -450,6 +484,22 @@ local function wputdarg(n)
450 end 484 end
451end 485end
452 486
487-- Put signed or unsigned qword or arg.
488local function wputqarg(n)
489 local tn = type(n)
490 if tn == "number" then -- This is only used for numbers from -2^31..2^32-1.
491 wputb(band(n, 255))
492 wputb(band(shr(n, 8), 255))
493 wputb(band(shr(n, 16), 255))
494 wputb(shr(n, 24))
495 local sign = n < 0 and 255 or 0
496 wputb(sign); wputb(sign); wputb(sign); wputb(sign)
497 else
498 waction("IMM_D", format("(unsigned int)(%s)", n))
499 waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n))
500 end
501end
502
453-- Put operand-size dependent number or arg (defaults to dword). 503-- Put operand-size dependent number or arg (defaults to dword).
454local function wputszarg(sz, n) 504local function wputszarg(sz, n)
455 if not sz or sz == "d" or sz == "q" then wputdarg(n) 505 if not sz or sz == "d" or sz == "q" then wputdarg(n)
@@ -460,9 +510,45 @@ local function wputszarg(sz, n)
460end 510end
461 511
462-- Put multi-byte opcode with operand-size dependent modifications. 512-- Put multi-byte opcode with operand-size dependent modifications.
463local function wputop(sz, op, rex) 513local function wputop(sz, op, rex, vex, vregr, vregxb)
514 local psz, sk = 0, nil
515 if vex then
516 local tail
517 if vex.m == 1 and band(rex, 11) == 0 then
518 if x64 and vregxb then
519 sk = map_vreg["modrm.reg"]
520 else
521 wputb(0xc5)
522 tail = shl(bxor(band(rex, 4), 4), 5)
523 psz = 3
524 end
525 end
526 if not tail then
527 wputb(0xc4)
528 wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
529 tail = shl(band(rex, 8), 4)
530 psz = 4
531 end
532 local reg, vreg = 0, nil
533 if vex.v then
534 reg = vex.v.reg
535 if not reg then werror("bad vex operand") end
536 if reg < 0 then reg = 0; vreg = vex.v.vreg end
537 end
538 if sz == "y" or vex.l then tail = tail + 4 end
539 wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
540 wvreg("vex.v", vreg)
541 rex = 0
542 if op >= 256 then werror("bad vex opcode") end
543 else
544 if rex ~= 0 then
545 if not x64 then werror("bad operand size") end
546 elseif (vregr or vregxb) and x64 then
547 rex = 0x10
548 sk = map_vreg["vex.v"]
549 end
550 end
464 local r 551 local r
465 if rex ~= 0 and not x64 then werror("bad operand size") end
466 if sz == "w" then wputb(102) end 552 if sz == "w" then wputb(102) end
467 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] 553 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
468 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end 554 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -471,20 +557,20 @@ local function wputop(sz, op, rex)
471 if rex ~= 0 then 557 if rex ~= 0 then
472 local opc3 = band(op, 0xffff00) 558 local opc3 = band(op, 0xffff00)
473 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then 559 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
474 wputb(64 + band(rex, 15)); rex = 0 560 wputb(64 + band(rex, 15)); rex = 0; psz = 2
475 end 561 end
476 end 562 end
477 wputb(shr(op, 16)); op = band(op, 0xffff) 563 wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
478 end 564 end
479 if op >= 256 then 565 if op >= 256 then
480 local b = shr(op, 8) 566 local b = shr(op, 8)
481 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end 567 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
482 wputb(b) 568 wputb(b); op = band(op, 255); psz = psz + 1
483 op = band(op, 255)
484 end 569 end
485 if rex ~= 0 then wputb(64 + band(rex, 15)) end 570 if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
486 if sz == "b" then op = op - 1 end 571 if sz == "b" then op = op - 1 end
487 wputb(op) 572 wputb(op)
573 return psz, sk
488end 574end
489 575
490-- Put ModRM or SIB formatted byte. 576-- Put ModRM or SIB formatted byte.
@@ -494,7 +580,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
494end 580end
495 581
496-- Put ModRM/SIB plus optional displacement. 582-- Put ModRM/SIB plus optional displacement.
497local function wputmrmsib(t, imark, s, vsreg) 583local function wputmrmsib(t, imark, s, vsreg, psz, sk)
498 local vreg, vxreg 584 local vreg, vxreg
499 local reg, xreg = t.reg, t.xreg 585 local reg, xreg = t.reg, t.xreg
500 if reg and reg < 0 then reg = 0; vreg = t.vreg end 586 if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -504,8 +590,8 @@ local function wputmrmsib(t, imark, s, vsreg)
504 -- Register mode. 590 -- Register mode.
505 if sub(t.mode, 1, 1) == "r" then 591 if sub(t.mode, 1, 1) == "r" then
506 wputmodrm(3, s, reg) 592 wputmodrm(3, s, reg)
507 if vsreg then waction("VREG", vsreg); wputxb(2) end 593 wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
508 if vreg then waction("VREG", vreg); wputxb(0) end 594 wvreg("modrm.rm.r", vreg, psz+1, sk)
509 return 595 return
510 end 596 end
511 597
@@ -519,21 +605,22 @@ local function wputmrmsib(t, imark, s, vsreg)
519 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) 605 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
520 wputmodrm(0, s, 4) 606 wputmodrm(0, s, 4)
521 if imark == "I" then waction("MARK") end 607 if imark == "I" then waction("MARK") end
522 if vsreg then waction("VREG", vsreg); wputxb(2) end 608 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
523 wputmodrm(t.xsc, xreg, 5) 609 wputmodrm(t.xsc, xreg, 5)
524 if vxreg then waction("VREG", vxreg); wputxb(3) end 610 wvreg("sib.index", vxreg, psz+2, sk)
525 else 611 else
526 -- Pure 32 bit displacement. 612 -- Pure 32 bit displacement.
527 if x64 and tdisp ~= "table" then 613 if x64 and tdisp ~= "table" then
528 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) 614 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
615 wvreg("modrm.reg", vsreg, psz+1, sk)
529 if imark == "I" then waction("MARK") end 616 if imark == "I" then waction("MARK") end
530 wputmodrm(0, 4, 5) 617 wputmodrm(0, 4, 5)
531 else 618 else
532 riprel = x64 619 riprel = x64
533 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) 620 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
621 wvreg("modrm.reg", vsreg, psz+1, sk)
534 if imark == "I" then waction("MARK") end 622 if imark == "I" then waction("MARK") end
535 end 623 end
536 if vsreg then waction("VREG", vsreg); wputxb(2) end
537 end 624 end
538 if riprel then -- Emit rip-relative displacement. 625 if riprel then -- Emit rip-relative displacement.
539 if match("UWSiI", imark) then 626 if match("UWSiI", imark) then
@@ -561,16 +648,16 @@ local function wputmrmsib(t, imark, s, vsreg)
561 if xreg or band(reg, 7) == 4 then 648 if xreg or band(reg, 7) == 4 then
562 wputmodrm(m or 2, s, 4) -- ModRM. 649 wputmodrm(m or 2, s, 4) -- ModRM.
563 if m == nil or imark == "I" then waction("MARK") end 650 if m == nil or imark == "I" then waction("MARK") end
564 if vsreg then waction("VREG", vsreg); wputxb(2) end 651 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
565 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. 652 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
566 if vxreg then waction("VREG", vxreg); wputxb(3) end 653 wvreg("sib.index", vxreg, psz+2, sk, vreg)
567 if vreg then waction("VREG", vreg); wputxb(1) end 654 wvreg("sib.base", vreg, psz+2, sk)
568 else 655 else
569 wputmodrm(m or 2, s, reg) -- ModRM. 656 wputmodrm(m or 2, s, reg) -- ModRM.
570 if (imark == "I" and (m == 1 or m == 2)) or 657 if (imark == "I" and (m == 1 or m == 2)) or
571 (m == nil and (vsreg or vreg)) then waction("MARK") end 658 (m == nil and (vsreg or vreg)) then waction("MARK") end
572 if vsreg then waction("VREG", vsreg); wputxb(2) end 659 wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
573 if vreg then waction("VREG", vreg); wputxb(1) end 660 wvreg("modrm.rm.m", vreg, psz+1, sk)
574 end 661 end
575 662
576 -- Put displacement. 663 -- Put displacement.
@@ -592,10 +679,16 @@ local function opmodestr(op, args)
592end 679end
593 680
594-- Convert number to valid integer or nil. 681-- Convert number to valid integer or nil.
595local function toint(expr) 682local function toint(expr, isqword)
596 local n = tonumber(expr) 683 local n = tonumber(expr)
597 if n then 684 if n then
598 if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then 685 if n % 1 ~= 0 then
686 werror("not an integer number `"..expr.."'")
687 elseif isqword then
688 if n < -2147483648 or n > 2147483647 then
689 n = nil -- Handle it as an expression to avoid precision loss.
690 end
691 elseif n < -2147483648 or n > 4294967295 then
599 werror("bad integer number `"..expr.."'") 692 werror("bad integer number `"..expr.."'")
600 end 693 end
601 return n 694 return n
@@ -678,7 +771,7 @@ local function rtexpr(expr)
678end 771end
679 772
680-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. 773-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
681local function parseoperand(param) 774local function parseoperand(param, isqword)
682 local t = {} 775 local t = {}
683 776
684 local expr = param 777 local expr = param
@@ -766,7 +859,7 @@ local function parseoperand(param)
766 t.disp = dispexpr(tailx) 859 t.disp = dispexpr(tailx)
767 else 860 else
768 -- imm or opsize*imm 861 -- imm or opsize*imm
769 local imm = toint(expr) 862 local imm = toint(expr, isqword)
770 if not imm and sub(expr, 1, 1) == "*" and t.opsize then 863 if not imm and sub(expr, 1, 1) == "*" and t.opsize then
771 imm = toint(sub(expr, 2)) 864 imm = toint(sub(expr, 2))
772 if imm then 865 if imm then
@@ -881,9 +974,16 @@ end
881-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. 974-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
882-- The spare 3 bits are either filled with the last hex digit or 975-- The spare 3 bits are either filled with the last hex digit or
883-- the result from a previous "r"/"R". The opcode is restored. 976-- the result from a previous "r"/"R". The opcode is restored.
977-- "u" Use VEX encoding, vvvv unused.
978-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
979-- removed from the list used by future characters).
980-- "w" Use VEX encoding, vvvv from 3rd operand.
981-- "L" Force VEX.L
884-- 982--
885-- All of the following characters force a flush of the opcode: 983-- All of the following characters force a flush of the opcode:
886-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. 984-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
985-- "s" stores a 4 bit immediate from the last register operand,
986-- followed by 4 zero bits.
887-- "S" stores a signed 8 bit immediate from the last operand. 987-- "S" stores a signed 8 bit immediate from the last operand.
888-- "U" stores an unsigned 8 bit immediate from the last operand. 988-- "U" stores an unsigned 8 bit immediate from the last operand.
889-- "W" stores an unsigned 16 bit immediate from the last operand. 989-- "W" stores an unsigned 16 bit immediate from the last operand.
@@ -1226,46 +1326,14 @@ local map_op = {
1226 movups_2 = "rmo:0F10rM|mro:0F11Rm", 1326 movups_2 = "rmo:0F10rM|mro:0F11Rm",
1227 orpd_2 = "rmo:660F56rM", 1327 orpd_2 = "rmo:660F56rM",
1228 orps_2 = "rmo:0F56rM", 1328 orps_2 = "rmo:0F56rM",
1229 packssdw_2 = "rmo:660F6BrM",
1230 packsswb_2 = "rmo:660F63rM",
1231 packuswb_2 = "rmo:660F67rM",
1232 paddb_2 = "rmo:660FFCrM",
1233 paddd_2 = "rmo:660FFErM",
1234 paddq_2 = "rmo:660FD4rM",
1235 paddsb_2 = "rmo:660FECrM",
1236 paddsw_2 = "rmo:660FEDrM",
1237 paddusb_2 = "rmo:660FDCrM",
1238 paddusw_2 = "rmo:660FDDrM",
1239 paddw_2 = "rmo:660FFDrM",
1240 pand_2 = "rmo:660FDBrM",
1241 pandn_2 = "rmo:660FDFrM",
1242 pause_0 = "F390", 1329 pause_0 = "F390",
1243 pavgb_2 = "rmo:660FE0rM",
1244 pavgw_2 = "rmo:660FE3rM",
1245 pcmpeqb_2 = "rmo:660F74rM",
1246 pcmpeqd_2 = "rmo:660F76rM",
1247 pcmpeqw_2 = "rmo:660F75rM",
1248 pcmpgtb_2 = "rmo:660F64rM",
1249 pcmpgtd_2 = "rmo:660F66rM",
1250 pcmpgtw_2 = "rmo:660F65rM",
1251 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. 1330 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
1252 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", 1331 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
1253 pmaddwd_2 = "rmo:660FF5rM",
1254 pmaxsw_2 = "rmo:660FEErM",
1255 pmaxub_2 = "rmo:660FDErM",
1256 pminsw_2 = "rmo:660FEArM",
1257 pminub_2 = "rmo:660FDArM",
1258 pmovmskb_2 = "rr/do:660FD7rM", 1332 pmovmskb_2 = "rr/do:660FD7rM",
1259 pmulhuw_2 = "rmo:660FE4rM",
1260 pmulhw_2 = "rmo:660FE5rM",
1261 pmullw_2 = "rmo:660FD5rM",
1262 pmuludq_2 = "rmo:660FF4rM",
1263 por_2 = "rmo:660FEBrM",
1264 prefetchnta_1 = "xb:n0F180m", 1333 prefetchnta_1 = "xb:n0F180m",
1265 prefetcht0_1 = "xb:n0F181m", 1334 prefetcht0_1 = "xb:n0F181m",
1266 prefetcht1_1 = "xb:n0F182m", 1335 prefetcht1_1 = "xb:n0F182m",
1267 prefetcht2_1 = "xb:n0F183m", 1336 prefetcht2_1 = "xb:n0F183m",
1268 psadbw_2 = "rmo:660FF6rM",
1269 pshufd_3 = "rmio:660F70rMU", 1337 pshufd_3 = "rmio:660F70rMU",
1270 pshufhw_3 = "rmio:F30F70rMU", 1338 pshufhw_3 = "rmio:F30F70rMU",
1271 pshuflw_3 = "rmio:F20F70rMU", 1339 pshuflw_3 = "rmio:F20F70rMU",
@@ -1279,23 +1347,6 @@ local map_op = {
1279 psrldq_2 = "rio:660F733mU", 1347 psrldq_2 = "rio:660F733mU",
1280 psrlq_2 = "rmo:660FD3rM|rio:660F732mU", 1348 psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
1281 psrlw_2 = "rmo:660FD1rM|rio:660F712mU", 1349 psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
1282 psubb_2 = "rmo:660FF8rM",
1283 psubd_2 = "rmo:660FFArM",
1284 psubq_2 = "rmo:660FFBrM",
1285 psubsb_2 = "rmo:660FE8rM",
1286 psubsw_2 = "rmo:660FE9rM",
1287 psubusb_2 = "rmo:660FD8rM",
1288 psubusw_2 = "rmo:660FD9rM",
1289 psubw_2 = "rmo:660FF9rM",
1290 punpckhbw_2 = "rmo:660F68rM",
1291 punpckhdq_2 = "rmo:660F6ArM",
1292 punpckhqdq_2 = "rmo:660F6DrM",
1293 punpckhwd_2 = "rmo:660F69rM",
1294 punpcklbw_2 = "rmo:660F60rM",
1295 punpckldq_2 = "rmo:660F62rM",
1296 punpcklqdq_2 = "rmo:660F6CrM",
1297 punpcklwd_2 = "rmo:660F61rM",
1298 pxor_2 = "rmo:660FEFrM",
1299 rcpps_2 = "rmo:0F53rM", 1350 rcpps_2 = "rmo:0F53rM",
1300 rcpss_2 = "rro:F30F53rM|rx/od:", 1351 rcpss_2 = "rro:F30F53rM|rx/od:",
1301 rsqrtps_2 = "rmo:0F52rM", 1352 rsqrtps_2 = "rmo:0F52rM",
@@ -1413,6 +1464,327 @@ local map_op = {
1413 movntsd_2 = "xr/qo:nF20F2BRm", 1464 movntsd_2 = "xr/qo:nF20F2BRm",
1414 movntss_2 = "xr/do:F30F2BRm", 1465 movntss_2 = "xr/do:F30F2BRm",
1415 -- popcnt is also in SSE4.2 1466 -- popcnt is also in SSE4.2
1467
1468 -- AES-NI
1469 aesdec_2 = "rmo:660F38DErM",
1470 aesdeclast_2 = "rmo:660F38DFrM",
1471 aesenc_2 = "rmo:660F38DCrM",
1472 aesenclast_2 = "rmo:660F38DDrM",
1473 aesimc_2 = "rmo:660F38DBrM",
1474 aeskeygenassist_3 = "rmio:660F3ADFrMU",
1475 pclmulqdq_3 = "rmio:660F3A44rMU",
1476
1477 -- AVX FP ops
1478 vaddsubpd_3 = "rrmoy:660FVD0rM",
1479 vaddsubps_3 = "rrmoy:F20FVD0rM",
1480 vandpd_3 = "rrmoy:660FV54rM",
1481 vandps_3 = "rrmoy:0FV54rM",
1482 vandnpd_3 = "rrmoy:660FV55rM",
1483 vandnps_3 = "rrmoy:0FV55rM",
1484 vblendpd_4 = "rrmioy:660F3AV0DrMU",
1485 vblendps_4 = "rrmioy:660F3AV0CrMU",
1486 vblendvpd_4 = "rrmroy:660F3AV4BrMs",
1487 vblendvps_4 = "rrmroy:660F3AV4ArMs",
1488 vbroadcastf128_2 = "rx/yo:660F38u1ArM",
1489 vcmppd_4 = "rrmioy:660FVC2rMU",
1490 vcmpps_4 = "rrmioy:0FVC2rMU",
1491 vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:",
1492 vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:",
1493 vcomisd_2 = "rro:660Fu2FrM|rx/oq:",
1494 vcomiss_2 = "rro:0Fu2FrM|rx/od:",
1495 vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:",
1496 vcvtdq2ps_2 = "rmoy:0Fu5BrM",
1497 vcvtpd2dq_2 = "rmoy:F20FuE6rM",
1498 vcvtpd2ps_2 = "rmoy:660Fu5ArM",
1499 vcvtps2dq_2 = "rmoy:660Fu5BrM",
1500 vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:",
1501 vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
1502 vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:",
1503 vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
1504 vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
1505 vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:",
1506 vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
1507 vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
1508 vcvttps2dq_2 = "rmoy:F30Fu5BrM",
1509 vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
1510 vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
1511 vdppd_4 = "rrmio:660F3AV41rMU",
1512 vdpps_4 = "rrmioy:660F3AV40rMU",
1513 vextractf128_3 = "mri/oy:660F3AuL19RmU",
1514 vextractps_3 = "mri/do:660F3Au17RmU",
1515 vhaddpd_3 = "rrmoy:660FV7CrM",
1516 vhaddps_3 = "rrmoy:F20FV7CrM",
1517 vhsubpd_3 = "rrmoy:660FV7DrM",
1518 vhsubps_3 = "rrmoy:F20FV7DrM",
1519 vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
1520 vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:",
1521 vldmxcsr_1 = "xd:0FuAE2m",
1522 vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
1523 vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
1524 vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm",
1525 vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm",
1526 vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
1527 vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
1528 vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:",
1529 vmovhlps_3 = "rrro:0FV12rM",
1530 vmovhpd_2 = "xr/qo:660Fu17Rm",
1531 vmovhpd_3 = "rrx/ooq:660FV16rM",
1532 vmovhps_2 = "xr/qo:0Fu17Rm",
1533 vmovhps_3 = "rrx/ooq:0FV16rM",
1534 vmovlhps_3 = "rrro:0FV16rM",
1535 vmovlpd_2 = "xr/qo:660Fu13Rm",
1536 vmovlpd_3 = "rrx/ooq:660FV12rM",
1537 vmovlps_2 = "xr/qo:0Fu13Rm",
1538 vmovlps_3 = "rrx/ooq:0FV12rM",
1539 vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM",
1540 vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM",
1541 vmovntpd_2 = "xroy:660Fu2BRm",
1542 vmovntps_2 = "xroy:0Fu2BRm",
1543 vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
1544 vmovsd_3 = "rrro:F20FV10rM",
1545 vmovshdup_2 = "rmoy:F30Fu16rM",
1546 vmovsldup_2 = "rmoy:F30Fu12rM",
1547 vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
1548 vmovss_3 = "rrro:F30FV10rM",
1549 vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm",
1550 vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm",
1551 vorpd_3 = "rrmoy:660FV56rM",
1552 vorps_3 = "rrmoy:0FV56rM",
1553 vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
1554 vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
1555 vperm2f128_4 = "rrmiy:660F3AV06rMU",
1556 vptestpd_2 = "rmoy:660F38u0FrM",
1557 vptestps_2 = "rmoy:660F38u0ErM",
1558 vrcpps_2 = "rmoy:0Fu53rM",
1559 vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
1560 vrsqrtps_2 = "rmoy:0Fu52rM",
1561 vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
1562 vroundpd_3 = "rmioy:660F3Au09rMU",
1563 vroundps_3 = "rmioy:660F3Au08rMU",
1564 vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
1565 vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
1566 vshufpd_4 = "rrmioy:660FVC6rMU",
1567 vshufps_4 = "rrmioy:0FVC6rMU",
1568 vsqrtps_2 = "rmoy:0Fu51rM",
1569 vsqrtss_2 = "rro:F30Fu51rM|rx/od:",
1570 vsqrtpd_2 = "rmoy:660Fu51rM",
1571 vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:",
1572 vstmxcsr_1 = "xd:0FuAE3m",
1573 vucomisd_2 = "rro:660Fu2ErM|rx/oq:",
1574 vucomiss_2 = "rro:0Fu2ErM|rx/od:",
1575 vunpckhpd_3 = "rrmoy:660FV15rM",
1576 vunpckhps_3 = "rrmoy:0FV15rM",
1577 vunpcklpd_3 = "rrmoy:660FV14rM",
1578 vunpcklps_3 = "rrmoy:0FV14rM",
1579 vxorpd_3 = "rrmoy:660FV57rM",
1580 vxorps_3 = "rrmoy:0FV57rM",
1581 vzeroall_0 = "0FuL77",
1582 vzeroupper_0 = "0Fu77",
1583
1584 -- AVX2 FP ops
1585 vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
1586 vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
1587 -- *vgather* (!vsib)
1588 vpermpd_3 = "rmiy:660F3AuX01rMU",
1589 vpermps_3 = "rrmy:660F38V16rM",
1590
1591 -- AVX, AVX2 integer ops
1592 -- In general, xmm requires AVX, ymm requires AVX2.
1593 vaesdec_3 = "rrmo:660F38VDErM",
1594 vaesdeclast_3 = "rrmo:660F38VDFrM",
1595 vaesenc_3 = "rrmo:660F38VDCrM",
1596 vaesenclast_3 = "rrmo:660F38VDDrM",
1597 vaesimc_2 = "rmo:660F38uDBrM",
1598 vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
1599 vlddqu_2 = "rxoy:F20FuF0rM",
1600 vmaskmovdqu_2 = "rro:660FuF7rM",
1601 vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
1602 vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
1603 vmovntdq_2 = "xroy:660FuE7Rm",
1604 vmovntdqa_2 = "rxoy:660F38u2ArM",
1605 vmpsadbw_4 = "rrmioy:660F3AV42rMU",
1606 vpabsb_2 = "rmoy:660F38u1CrM",
1607 vpabsd_2 = "rmoy:660F38u1ErM",
1608 vpabsw_2 = "rmoy:660F38u1DrM",
1609 vpackusdw_3 = "rrmoy:660F38V2BrM",
1610 vpalignr_4 = "rrmioy:660F3AV0FrMU",
1611 vpblendvb_4 = "rrmroy:660F3AV4CrMs",
1612 vpblendw_4 = "rrmioy:660F3AV0ErMU",
1613 vpclmulqdq_4 = "rrmio:660F3AV44rMU",
1614 vpcmpeqq_3 = "rrmoy:660F38V29rM",
1615 vpcmpestri_3 = "rmio:660F3Au61rMU",
1616 vpcmpestrm_3 = "rmio:660F3Au60rMU",
1617 vpcmpgtq_3 = "rrmoy:660F38V37rM",
1618 vpcmpistri_3 = "rmio:660F3Au63rMU",
1619 vpcmpistrm_3 = "rmio:660F3Au62rMU",
1620 vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
1621 vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
1622 vpextrd_3 = "mri/do:660F3Au16RmU",
1623 vpextrq_3 = "mri/qo:660F3Au16RmU",
1624 vphaddw_3 = "rrmoy:660F38V01rM",
1625 vphaddd_3 = "rrmoy:660F38V02rM",
1626 vphaddsw_3 = "rrmoy:660F38V03rM",
1627 vphminposuw_2 = "rmo:660F38u41rM",
1628 vphsubw_3 = "rrmoy:660F38V05rM",
1629 vphsubd_3 = "rrmoy:660F38V06rM",
1630 vphsubsw_3 = "rrmoy:660F38V07rM",
1631 vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:",
1632 vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:",
1633 vpinsrd_4 = "rrmi/ood:660F3AV22rMU",
1634 vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU",
1635 vpmaddubsw_3 = "rrmoy:660F38V04rM",
1636 vpmaxsb_3 = "rrmoy:660F38V3CrM",
1637 vpmaxsd_3 = "rrmoy:660F38V3DrM",
1638 vpmaxuw_3 = "rrmoy:660F38V3ErM",
1639 vpmaxud_3 = "rrmoy:660F38V3FrM",
1640 vpminsb_3 = "rrmoy:660F38V38rM",
1641 vpminsd_3 = "rrmoy:660F38V39rM",
1642 vpminuw_3 = "rrmoy:660F38V3ArM",
1643 vpminud_3 = "rrmoy:660F38V3BrM",
1644 vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
1645 vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:",
1646 vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:",
1647 vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:",
1648 vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:",
1649 vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:",
1650 vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:",
1651 vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:",
1652 vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:",
1653 vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:",
1654 vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:",
1655 vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:",
1656 vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:",
1657 vpmuldq_3 = "rrmoy:660F38V28rM",
1658 vpmulhrsw_3 = "rrmoy:660F38V0BrM",
1659 vpmulld_3 = "rrmoy:660F38V40rM",
1660 vpshufb_3 = "rrmoy:660F38V00rM",
1661 vpshufd_3 = "rmioy:660Fu70rMU",
1662 vpshufhw_3 = "rmioy:F30Fu70rMU",
1663 vpshuflw_3 = "rmioy:F20Fu70rMU",
1664 vpsignb_3 = "rrmoy:660F38V08rM",
1665 vpsignw_3 = "rrmoy:660F38V09rM",
1666 vpsignd_3 = "rrmoy:660F38V0ArM",
1667 vpslldq_3 = "rrioy:660Fv737mU",
1668 vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU",
1669 vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU",
1670 vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU",
1671 vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU",
1672 vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU",
1673 vpsrldq_3 = "rrioy:660Fv733mU",
1674 vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU",
1675 vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU",
1676 vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU",
1677 vptest_2 = "rmoy:660F38u17rM",
1678
1679 -- AVX2 integer ops
1680 vbroadcasti128_2 = "rx/yo:660F38u5ArM",
1681 vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
1682 vextracti128_3 = "mri/oy:660F3AuL39RmU",
1683 vpblendd_4 = "rrmioy:660F3AV02rMU",
1684 vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
1685 vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
1686 vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
1687 vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
1688 vpermd_3 = "rrmy:660F38V36rM",
1689 vpermq_3 = "rmiy:660F3AuX00rMU",
1690 -- *vpgather* (!vsib)
1691 vperm2i128_4 = "rrmiy:660F3AV46rMU",
1692 vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
1693 vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
1694 vpsllvd_3 = "rrmoy:660F38V47rM",
1695 vpsllvq_3 = "rrmoy:660F38VX47rM",
1696 vpsravd_3 = "rrmoy:660F38V46rM",
1697 vpsrlvd_3 = "rrmoy:660F38V45rM",
1698 vpsrlvq_3 = "rrmoy:660F38VX45rM",
1699
1700 -- Intel ADX
1701 adcx_2 = "rmqd:660F38F6rM",
1702 adox_2 = "rmqd:F30F38F6rM",
1703
1704 -- BMI1
1705 andn_3 = "rrmqd:0F38VF2rM",
1706 bextr_3 = "rmrqd:0F38wF7rM",
1707 blsi_2 = "rmqd:0F38vF33m",
1708 blsmsk_2 = "rmqd:0F38vF32m",
1709 blsr_2 = "rmqd:0F38vF31m",
1710 tzcnt_2 = "rmqdw:F30FBCrM",
1711
1712 -- BMI2
1713 bzhi_3 = "rmrqd:0F38wF5rM",
1714 mulx_3 = "rrmqd:F20F38VF6rM",
1715 pdep_3 = "rrmqd:F20F38VF5rM",
1716 pext_3 = "rrmqd:F30F38VF5rM",
1717 rorx_3 = "rmSqd:F20F3AuF0rMS",
1718 sarx_3 = "rmrqd:F30F38wF7rM",
1719 shrx_3 = "rmrqd:F20F38wF7rM",
1720 shlx_3 = "rmrqd:660F38wF7rM",
1721
1722 -- FMA3
1723 vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
1724 vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
1725 vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
1726 vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
1727 vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
1728 vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
1729
1730 vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
1731 vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
1732 vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
1733 vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
1734 vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
1735 vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
1736
1737 vfmadd132pd_3 = "rrmoy:660F38VX98rM",
1738 vfmadd132ps_3 = "rrmoy:660F38V98rM",
1739 vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
1740 vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
1741 vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
1742 vfmadd213ps_3 = "rrmoy:660F38VA8rM",
1743 vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
1744 vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
1745 vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
1746 vfmadd231ps_3 = "rrmoy:660F38VB8rM",
1747 vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
1748 vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
1749
1750 vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
1751 vfmsub132ps_3 = "rrmoy:660F38V9ArM",
1752 vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
1753 vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
1754 vfmsub213pd_3 = "rrmoy:660F38VXAArM",
1755 vfmsub213ps_3 = "rrmoy:660F38VAArM",
1756 vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
1757 vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
1758 vfmsub231pd_3 = "rrmoy:660F38VXBArM",
1759 vfmsub231ps_3 = "rrmoy:660F38VBArM",
1760 vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
1761 vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
1762
1763 vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
1764 vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
1765 vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
1766 vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
1767 vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
1768 vfnmadd213ps_3 = "rrmoy:660F38VACrM",
1769 vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
1770 vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
1771 vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
1772 vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
1773 vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
1774 vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
1775
1776 vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
1777 vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
1778 vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
1779 vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
1780 vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
1781 vfnmsub213ps_3 = "rrmoy:660F38VAErM",
1782 vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
1783 vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
1784 vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
1785 vfnmsub231ps_3 = "rrmoy:660F38VBErM",
1786 vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
1787 vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
1416} 1788}
1417 1789
1418------------------------------------------------------------------------------ 1790------------------------------------------------------------------------------
@@ -1463,28 +1835,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
1463 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ 1835 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
1464end 1836end
1465 1837
1466-- SSE FP arithmetic ops. 1838-- SSE / AVX FP arithmetic ops.
1467for name,n in pairs{ sqrt = 1, add = 8, mul = 9, 1839for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
1468 sub = 12, min = 13, div = 14, max = 15 } do 1840 sub = 12, min = 13, div = 14, max = 15 } do
1469 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) 1841 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
1470 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) 1842 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
1471 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) 1843 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
1472 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) 1844 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
1845 if n ~= 1 then
1846 map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
1847 map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
1848 map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
1849 map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
1850 end
1851end
1852
1853-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
1854for name,n in pairs{
1855 paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
1856 paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
1857 packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
1858 paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
1859 pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
1860 pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
1861 pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
1862 pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
1863 pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
1864 pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
1865 psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
1866 psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
1867 punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
1868 punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
1869 punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
1870} do
1871 map_op[name.."_2"] = format("rmo:660F%02XrM", n)
1872 map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
1473end 1873end
1474 1874
1475------------------------------------------------------------------------------ 1875------------------------------------------------------------------------------
1476 1876
1877local map_vexarg = { u = false, v = 1, V = 2, w = 3 }
1878
1477-- Process pattern string. 1879-- Process pattern string.
1478local function dopattern(pat, args, sz, op, needrex) 1880local function dopattern(pat, args, sz, op, needrex)
1479 local digit, addin 1881 local digit, addin, vex
1480 local opcode = 0 1882 local opcode = 0
1481 local szov = sz 1883 local szov = sz
1482 local narg = 1 1884 local narg = 1
1483 local rex = 0 1885 local rex = 0
1484 1886
1485 -- Limit number of section buffer positions used by a single dasm_put(). 1887 -- Limit number of section buffer positions used by a single dasm_put().
1486 -- A single opcode needs a maximum of 5 positions. 1888 -- A single opcode needs a maximum of 6 positions.
1487 if secpos+5 > maxsecpos then wflush() end 1889 if secpos+6 > maxsecpos then wflush() end
1488 1890
1489 -- Process each character. 1891 -- Process each character.
1490 for c in gmatch(pat.."|", ".") do 1892 for c in gmatch(pat.."|", ".") do
@@ -1498,6 +1900,8 @@ local function dopattern(pat, args, sz, op, needrex)
1498 szov = nil 1900 szov = nil
1499 elseif c == "X" then -- Force REX.W. 1901 elseif c == "X" then -- Force REX.W.
1500 rex = 8 1902 rex = 8
1903 elseif c == "L" then -- Force VEX.L.
1904 vex.l = true
1501 elseif c == "r" then -- Merge 1st operand regno. into opcode. 1905 elseif c == "r" then -- Merge 1st operand regno. into opcode.
1502 addin = args[1]; opcode = opcode + (addin.reg % 8) 1906 addin = args[1]; opcode = opcode + (addin.reg % 8)
1503 if narg < 2 then narg = 2 end 1907 if narg < 2 then narg = 2 end
@@ -1521,21 +1925,42 @@ local function dopattern(pat, args, sz, op, needrex)
1521 if t.xreg and t.xreg > 7 then rex = rex + 2 end 1925 if t.xreg and t.xreg > 7 then rex = rex + 2 end
1522 if s > 7 then rex = rex + 4 end 1926 if s > 7 then rex = rex + 4 end
1523 if needrex then rex = rex + 16 end 1927 if needrex then rex = rex + 16 end
1524 wputop(szov, opcode, rex); opcode = nil 1928 local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
1929 opcode = nil
1525 local imark = sub(pat, -1) -- Force a mark (ugly). 1930 local imark = sub(pat, -1) -- Force a mark (ugly).
1526 -- Put ModRM/SIB with regno/last digit as spare. 1931 -- Put ModRM/SIB with regno/last digit as spare.
1527 wputmrmsib(t, imark, s, addin and addin.vreg) 1932 wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
1528 addin = nil 1933 addin = nil
1934 elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
1935 local b = band(opcode, 255); opcode = shr(opcode, 8)
1936 local m = 1
1937 if b == 0x38 then m = 2
1938 elseif b == 0x3a then m = 3 end
1939 if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
1940 if b ~= 0x0f then
1941 werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
1942 "' in pattern `"..pat.."' for `"..op.."'")
1943 end
1944 local v = map_vexarg[c]
1945 if v then v = remove(args, v) end
1946 b = band(opcode, 255)
1947 local p = 0
1948 if b == 0x66 then p = 1
1949 elseif b == 0xf3 then p = 2
1950 elseif b == 0xf2 then p = 3 end
1951 if p ~= 0 then opcode = shr(opcode, 8) end
1952 if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
1953 vex = { m = m, p = p, v = v }
1529 else 1954 else
1530 if opcode then -- Flush opcode. 1955 if opcode then -- Flush opcode.
1531 if szov == "q" and rex == 0 then rex = rex + 8 end 1956 if szov == "q" and rex == 0 then rex = rex + 8 end
1532 if needrex then rex = rex + 16 end 1957 if needrex then rex = rex + 16 end
1533 if addin and addin.reg == -1 then 1958 if addin and addin.reg == -1 then
1534 wputop(szov, opcode - 7, rex) 1959 local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
1535 waction("VREG", addin.vreg); wputxb(0) 1960 wvreg("opcode", addin.vreg, psz, sk)
1536 else 1961 else
1537 if addin and addin.reg > 7 then rex = rex + 1 end 1962 if addin and addin.reg > 7 then rex = rex + 1 end
1538 wputop(szov, opcode, rex) 1963 wputop(szov, opcode, rex, vex)
1539 end 1964 end
1540 opcode = nil 1965 opcode = nil
1541 end 1966 end
@@ -1549,7 +1974,7 @@ local function dopattern(pat, args, sz, op, needrex)
1549 local a = args[narg] 1974 local a = args[narg]
1550 narg = narg + 1 1975 narg = narg + 1
1551 local mode, imm = a.mode, a.imm 1976 local mode, imm = a.mode, a.imm
1552 if mode == "iJ" and not match("iIJ", c) then 1977 if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then
1553 werror("bad operand size for label") 1978 werror("bad operand size for label")
1554 end 1979 end
1555 if c == "S" then 1980 if c == "S" then
@@ -1572,6 +1997,14 @@ local function dopattern(pat, args, sz, op, needrex)
1572 else 1997 else
1573 wputlabel("REL_", imm, 2) 1998 wputlabel("REL_", imm, 2)
1574 end 1999 end
2000 elseif c == "s" then
2001 local reg = a.reg
2002 if reg < 0 then
2003 wputb(0)
2004 wvreg("imm.hi", a.vreg)
2005 else
2006 wputb(shl(reg, 4))
2007 end
1575 else 2008 else
1576 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") 2009 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
1577 end 2010 end
@@ -1648,11 +2081,14 @@ map_op[".template__"] = function(params, template, nparams)
1648 if pat == "" then pat = lastpat else lastpat = pat end 2081 if pat == "" then pat = lastpat else lastpat = pat end
1649 if matchtm(tm, args) then 2082 if matchtm(tm, args) then
1650 local prefix = sub(szm, 1, 1) 2083 local prefix = sub(szm, 1, 1)
1651 if prefix == "/" then -- Match both operand sizes. 2084 if prefix == "/" then -- Exactly match leading operand sizes.
1652 if args[1].opsize == sub(szm, 2, 2) and 2085 for i = #szm,1,-1 do
1653 args[2].opsize == sub(szm, 3, 3) then 2086 if i == 1 then
1654 dopattern(pat, args, sz, params.op, needrex) -- Process pattern. 2087 dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
1655 return 2088 return
2089 elseif args[i-1].opsize ~= sub(szm, i, i) then
2090 break
2091 end
1656 end 2092 end
1657 else -- Match common operand size. 2093 else -- Match common operand size.
1658 local szp = sz 2094 local szp = sz
@@ -1717,8 +2153,8 @@ if x64 then
1717 rex = a.reg > 7 and 9 or 8 2153 rex = a.reg > 7 and 9 or 8
1718 end 2154 end
1719 end 2155 end
1720 wputop(sz, opcode, rex) 2156 local psz, sk = wputop(sz, opcode, rex, nil, vreg)
1721 if vreg then waction("VREG", vreg); wputxb(0) end 2157 wvreg("opcode", vreg, psz, sk)
1722 waction("IMM_D", format("(unsigned int)(%s)", op64)) 2158 waction("IMM_D", format("(unsigned int)(%s)", op64))
1723 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) 2159 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
1724 end 2160 end
@@ -1730,14 +2166,16 @@ end
1730local function op_data(params) 2166local function op_data(params)
1731 if not params then return "imm..." end 2167 if not params then return "imm..." end
1732 local sz = sub(params.op, 2, 2) 2168 local sz = sub(params.op, 2, 2)
1733 if sz == "a" then sz = addrsize end 2169 if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end
1734 for _,p in ipairs(params) do 2170 for _,p in ipairs(params) do
1735 local a = parseoperand(p) 2171 local a = parseoperand(p, sz == "q")
1736 if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then 2172 if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
1737 werror("bad mode or size in `"..p.."'") 2173 werror("bad mode or size in `"..p.."'")
1738 end 2174 end
1739 if a.mode == "iJ" then 2175 if a.mode == "iJ" then
1740 wputlabel("IMM_", a.imm, 1) 2176 wputlabel("IMM_", a.imm, 1)
2177 elseif sz == "q" then
2178 wputqarg(a.imm)
1741 else 2179 else
1742 wputszarg(sz, a.imm) 2180 wputszarg(sz, a.imm)
1743 end 2181 end
@@ -1749,7 +2187,11 @@ map_op[".byte_*"] = op_data
1749map_op[".sbyte_*"] = op_data 2187map_op[".sbyte_*"] = op_data
1750map_op[".word_*"] = op_data 2188map_op[".word_*"] = op_data
1751map_op[".dword_*"] = op_data 2189map_op[".dword_*"] = op_data
2190map_op[".qword_*"] = op_data
1752map_op[".aword_*"] = op_data 2191map_op[".aword_*"] = op_data
2192map_op[".long_*"] = op_data
2193map_op[".quad_*"] = op_data
2194map_op[".addr_*"] = op_data
1753 2195
1754------------------------------------------------------------------------------ 2196------------------------------------------------------------------------------
1755 2197
diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua
index 068efe2a..5be75f7f 100644
--- a/dynasm/dynasm.lua
+++ b/dynasm/dynasm.lua
@@ -10,9 +10,9 @@
10local _info = { 10local _info = {
11 name = "DynASM", 11 name = "DynASM",
12 description = "A dynamic assembler for code generation engines", 12 description = "A dynamic assembler for code generation engines",
13 version = "1.3.0", 13 version = "1.5.0",
14 vernum = 10300, 14 vernum = 10500,
15 release = "2011-05-05", 15 release = "2021-05-02",
16 author = "Mike Pall", 16 author = "Mike Pall",
17 url = "https://luajit.org/dynasm.html", 17 url = "https://luajit.org/dynasm.html",
18 license = "MIT", 18 license = "MIT",
@@ -630,6 +630,7 @@ end
630-- Load architecture-specific module. 630-- Load architecture-specific module.
631local function loadarch(arch) 631local function loadarch(arch)
632 if not match(arch, "^[%w_]+$") then return "bad arch name" end 632 if not match(arch, "^[%w_]+$") then return "bad arch name" end
633 _G._map_def = map_def
633 local ok, m_arch = pcall(require, "dasm_"..arch) 634 local ok, m_arch = pcall(require, "dasm_"..arch)
634 if not ok then return "cannot load module: "..m_arch end 635 if not ok then return "cannot load module: "..m_arch end
635 g_arch = m_arch 636 g_arch = m_arch