aboutsummaryrefslogtreecommitdiff
path: root/C/Bra.c
diff options
context:
space:
mode:
Diffstat (limited to 'C/Bra.c')
-rw-r--r--C/Bra.c325
1 files changed, 307 insertions, 18 deletions
diff --git a/C/Bra.c b/C/Bra.c
index 22e0e47..e61edf8 100644
--- a/C/Bra.c
+++ b/C/Bra.c
@@ -1,11 +1,11 @@
1/* Bra.c -- Branch converters for RISC code 1/* Bra.c -- Branch converters for RISC code
22023-04-02 : Igor Pavlov : Public domain */ 22024-01-20 : Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5 5
6#include "Bra.h" 6#include "Bra.h"
7#include "CpuArch.h"
8#include "RotateDefs.h" 7#include "RotateDefs.h"
8#include "CpuArch.h"
9 9
10#if defined(MY_CPU_SIZEOF_POINTER) \ 10#if defined(MY_CPU_SIZEOF_POINTER) \
11 && ( MY_CPU_SIZEOF_POINTER == 4 \ 11 && ( MY_CPU_SIZEOF_POINTER == 4 \
@@ -26,7 +26,7 @@
26#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; 26#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
27// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; 27// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
28 28
29#define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name 29#define Z7_BRANCH_CONV(name) z7_ ## name
30 30
31#define Z7_BRANCH_FUNC_MAIN(name) \ 31#define Z7_BRANCH_FUNC_MAIN(name) \
32static \ 32static \
@@ -42,11 +42,11 @@ Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
42 42
43#ifdef Z7_EXTRACT_ONLY 43#ifdef Z7_EXTRACT_ONLY
44#define Z7_BRANCH_FUNCS_IMP(name) \ 44#define Z7_BRANCH_FUNCS_IMP(name) \
45 Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) 45 Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0)
46#else 46#else
47#define Z7_BRANCH_FUNCS_IMP(name) \ 47#define Z7_BRANCH_FUNCS_IMP(name) \
48 Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \ 48 Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \
49 Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1) 49 Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1)
50#endif 50#endif
51 51
52#if defined(__clang__) 52#if defined(__clang__)
@@ -72,7 +72,7 @@ Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
72#endif 72#endif
73 73
74 74
75Z7_BRANCH_FUNC_MAIN(ARM64) 75Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64)
76{ 76{
77 // Byte *p = data; 77 // Byte *p = data;
78 const Byte *lim; 78 const Byte *lim;
@@ -121,10 +121,10 @@ Z7_BRANCH_FUNC_MAIN(ARM64)
121 } 121 }
122 } 122 }
123} 123}
124Z7_BRANCH_FUNCS_IMP(ARM64) 124Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64)
125 125
126 126
127Z7_BRANCH_FUNC_MAIN(ARM) 127Z7_BRANCH_FUNC_MAIN(BranchConv_ARM)
128{ 128{
129 // Byte *p = data; 129 // Byte *p = data;
130 const Byte *lim; 130 const Byte *lim;
@@ -152,10 +152,10 @@ Z7_BRANCH_FUNC_MAIN(ARM)
152 } 152 }
153 } 153 }
154} 154}
155Z7_BRANCH_FUNCS_IMP(ARM) 155Z7_BRANCH_FUNCS_IMP(BranchConv_ARM)
156 156
157 157
158Z7_BRANCH_FUNC_MAIN(PPC) 158Z7_BRANCH_FUNC_MAIN(BranchConv_PPC)
159{ 159{
160 // Byte *p = data; 160 // Byte *p = data;
161 const Byte *lim; 161 const Byte *lim;
@@ -192,14 +192,14 @@ Z7_BRANCH_FUNC_MAIN(PPC)
192 } 192 }
193 } 193 }
194} 194}
195Z7_BRANCH_FUNCS_IMP(PPC) 195Z7_BRANCH_FUNCS_IMP(BranchConv_PPC)
196 196
197 197
198#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED 198#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
199#define BR_SPARC_USE_ROTATE 199#define BR_SPARC_USE_ROTATE
200#endif 200#endif
201 201
202Z7_BRANCH_FUNC_MAIN(SPARC) 202Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC)
203{ 203{
204 // Byte *p = data; 204 // Byte *p = data;
205 const Byte *lim; 205 const Byte *lim;
@@ -254,10 +254,10 @@ Z7_BRANCH_FUNC_MAIN(SPARC)
254 } 254 }
255 } 255 }
256} 256}
257Z7_BRANCH_FUNCS_IMP(SPARC) 257Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC)
258 258
259 259
260Z7_BRANCH_FUNC_MAIN(ARMT) 260Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT)
261{ 261{
262 // Byte *p = data; 262 // Byte *p = data;
263 Byte *lim; 263 Byte *lim;
@@ -335,12 +335,12 @@ Z7_BRANCH_FUNC_MAIN(ARMT)
335 // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); 335 // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
336 // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); 336 // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
337} 337}
338Z7_BRANCH_FUNCS_IMP(ARMT) 338Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT)
339 339
340 340
341// #define BR_IA64_NO_INLINE 341// #define BR_IA64_NO_INLINE
342 342
343Z7_BRANCH_FUNC_MAIN(IA64) 343Z7_BRANCH_FUNC_MAIN(BranchConv_IA64)
344{ 344{
345 // Byte *p = data; 345 // Byte *p = data;
346 const Byte *lim; 346 const Byte *lim;
@@ -417,4 +417,293 @@ Z7_BRANCH_FUNC_MAIN(IA64)
417 } 417 }
418 } 418 }
419} 419}
420Z7_BRANCH_FUNCS_IMP(IA64) 420Z7_BRANCH_FUNCS_IMP(BranchConv_IA64)
421
422
423#define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET;
424#define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET;
425
426#if 1 && defined(MY_CPU_LE_UNALIGN)
427 #define RISCV_USE_UNALIGNED_LOAD
428#endif
429
430#ifdef RISCV_USE_UNALIGNED_LOAD
431 #define RISCV_GET_UI32(p) GetUi32(p)
432 #define RISCV_SET_UI32(p, v) { SetUi32(p, v) }
433#else
434 #define RISCV_GET_UI32(p) \
435 ((UInt32)GetUi16a(p) + \
436 ((UInt32)GetUi16a((p) + 2) << 16))
437 #define RISCV_SET_UI32(p, v) { \
438 SetUi16a(p, (UInt16)(v)) \
439 SetUi16a((p) + 2, (UInt16)(v >> 16)) }
440#endif
441
442#if 1 && defined(MY_CPU_LE)
443 #define RISCV_USE_16BIT_LOAD
444#endif
445
446#ifdef RISCV_USE_16BIT_LOAD
447 #define RISCV_LOAD_VAL(p) GetUi16a(p)
448#else
449 #define RISCV_LOAD_VAL(p) (*(p))
450#endif
451
452#define RISCV_INSTR_SIZE 2
453#define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE)
454#define RISCV_STEP_2 4
455#define RISCV_REG_VAL (2 << 7)
456#define RISCV_CMD_VAL 3
457#if 1
458 // for code size optimization:
459 #define RISCV_DELTA_7F 0x7f
460#else
461 #define RISCV_DELTA_7F 0
462#endif
463
464#define RISCV_CHECK_1(v, b) \
465 (((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0)
466
467#if 1
468 #define RISCV_CHECK_2(v, r) \
469 ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \
470 << 18) \
471 < ((r) & 0x1d))
472#else
473 // this branch gives larger code, because
474 // compilers generate larger code for big constants.
475 #define RISCV_CHECK_2(v, r) \
476 ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
477 & ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
478 < ((r) & 0x1d))
479#endif
480
481
482#define RISCV_SCAN_LOOP \
483 Byte *lim; \
484 size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \
485 if (size <= 6) return p; \
486 size -= 6; \
487 lim = p + size; \
488 BR_PC_INIT \
489 for (;;) \
490 { \
491 UInt32 a, v; \
492 /* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \
493 for (;;) \
494 { \
495 if Z7_UNLIKELY(p >= lim) { return p; } \
496 a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \
497 if ((a & 0x77) == 0) break; \
498 a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \
499 p += RISCV_INSTR_SIZE * 2; \
500 if ((a & 0x77) == 0) \
501 { \
502 p -= RISCV_INSTR_SIZE; \
503 if Z7_UNLIKELY(p >= lim) { return p; } \
504 break; \
505 } \
506 }
507// (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL
508// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL
509// (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC
510// (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC
511
512Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc)
513{
514 RISCV_SCAN_LOOP
515 v = a;
516 a = RISCV_GET_UI32(p);
517#ifndef RISCV_USE_16BIT_LOAD
518 v += (UInt32)p[1] << 8;
519#endif
520
521 if ((v & 8) == 0) // JAL
522 {
523 if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80)
524 {
525 p += RISCV_INSTR_SIZE;
526 continue;
527 }
528 {
529 v = ((a & 1u << 31) >> 11)
530 | ((a & 0x3ff << 21) >> 20)
531 | ((a & 1 << 20) >> 9)
532 | (a & 0xff << 12);
533 BR_CONVERT_VAL_ENC(v)
534 // ((v & 1) == 0)
535 // v: bits [1 : 20] contain offset bits
536#if 0 && defined(RISCV_USE_UNALIGNED_LOAD)
537 a &= 0xfff;
538 a |= ((UInt32)(v << 23))
539 | ((UInt32)(v << 7) & ((UInt32)0xff << 16))
540 | ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8));
541 RISCV_SET_UI32(p, a)
542#else // aligned
543#if 0
544 SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff)))
545#else
546 p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf));
547#endif
548
549#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
550 v <<= 15;
551 v = Z7_BSWAP32(v);
552 SetUi16a(p + 2, (UInt16)v)
553#else
554 p[2] = (Byte)(v >> 9);
555 p[3] = (Byte)(v >> 1);
556#endif
557#endif // aligned
558 }
559 p += 4;
560 continue;
561 } // JAL
562
563 {
564 // AUIPC
565 if (v & 0xe80) // (not x0) and (not x2)
566 {
567 const UInt32 b = RISCV_GET_UI32(p + 4);
568 if (RISCV_CHECK_1(v, b))
569 {
570 {
571 const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL);
572 RISCV_SET_UI32(p, temp)
573 }
574 a &= 0xfffff000;
575 {
576#if 1
577 const int t = -1 >> 1;
578 if (t != -1)
579 a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation
580 else
581#endif
582 a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension).
583 }
584 BR_CONVERT_VAL_ENC(a)
585#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
586 a = Z7_BSWAP32(a);
587 RISCV_SET_UI32(p + 4, a)
588#else
589 SetBe32(p + 4, a)
590#endif
591 p += 8;
592 }
593 else
594 p += RISCV_STEP_1;
595 }
596 else
597 {
598 UInt32 r = a >> 27;
599 if (RISCV_CHECK_2(v, r))
600 {
601 v = RISCV_GET_UI32(p + 4);
602 r = (r << 7) + 0x17 + (v & 0xfffff000);
603 a = (a >> 12) | (v << 20);
604 RISCV_SET_UI32(p, r)
605 RISCV_SET_UI32(p + 4, a)
606 p += 8;
607 }
608 else
609 p += RISCV_STEP_2;
610 }
611 }
612 } // for
613}
614
615
616Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc)
617{
618 RISCV_SCAN_LOOP
619#ifdef RISCV_USE_16BIT_LOAD
620 if ((a & 8) == 0)
621 {
622#else
623 v = a;
624 a += (UInt32)p[1] << 8;
625 if ((v & 8) == 0)
626 {
627#endif
628 // JAL
629 a -= 0x100 - RISCV_DELTA_7F;
630 if (a & 0xd80)
631 {
632 p += RISCV_INSTR_SIZE;
633 continue;
634 }
635 {
636 const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff;
637#if 0 // unaligned
638 a = GetUi32(p);
639 v = (UInt32)(a >> 23) & ((UInt32)0xff << 1)
640 | (UInt32)(a >> 7) & ((UInt32)0xff << 9)
641#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
642 v = GetUi16a(p + 2);
643 v = Z7_BSWAP32(v) >> 15
644#else
645 v = (UInt32)p[3] << 1
646 | (UInt32)p[2] << 9
647#endif
648 | (UInt32)((a & 0xf000) << 5);
649 BR_CONVERT_VAL_DEC(v)
650 a = a_old
651 | (v << 11 & 1u << 31)
652 | (v << 20 & 0x3ff << 21)
653 | (v << 9 & 1 << 20)
654 | (v & 0xff << 12);
655 RISCV_SET_UI32(p, a)
656 }
657 p += 4;
658 continue;
659 } // JAL
660
661 {
662 // AUIPC
663 v = a;
664#if 1 && defined(RISCV_USE_UNALIGNED_LOAD)
665 a = GetUi32(p);
666#else
667 a |= (UInt32)GetUi16a(p + 2) << 16;
668#endif
669 if ((v & 0xe80) == 0) // x0/x2
670 {
671 const UInt32 r = a >> 27;
672 if (RISCV_CHECK_2(v, r))
673 {
674 UInt32 b;
675#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
676 b = RISCV_GET_UI32(p + 4);
677 b = Z7_BSWAP32(b);
678#else
679 b = GetBe32(p + 4);
680#endif
681 v = a >> 12;
682 BR_CONVERT_VAL_DEC(b)
683 a = (r << 7) + 0x17;
684 a += (b + 0x800) & 0xfffff000;
685 v |= b << 20;
686 RISCV_SET_UI32(p, a)
687 RISCV_SET_UI32(p + 4, v)
688 p += 8;
689 }
690 else
691 p += RISCV_STEP_2;
692 }
693 else
694 {
695 const UInt32 b = RISCV_GET_UI32(p + 4);
696 if (!RISCV_CHECK_1(v, b))
697 p += RISCV_STEP_1;
698 else
699 {
700 v = (a & 0xfffff000) | (b >> 20);
701 a = (b << 12) | (0x17 + RISCV_REG_VAL);
702 RISCV_SET_UI32(p, a)
703 RISCV_SET_UI32(p + 4, v)
704 p += 8;
705 }
706 }
707 }
708 } // for
709}