diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-05-14 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-05-15 23:55:04 +0500 |
commit | fc662341e6f85da78ada0e443f6116b978f79f22 (patch) | |
tree | 1be1cc402a7a9cbc18d4eeea6b141354c2d559e3 /C/Bra.c | |
parent | 5b39dc76f1bc82f941d5c800ab9f34407a06b53a (diff) | |
download | 7zip-fc662341e6f85da78ada0e443f6116b978f79f22.tar.gz 7zip-fc662341e6f85da78ada0e443f6116b978f79f22.tar.bz2 7zip-fc662341e6f85da78ada0e443f6116b978f79f22.zip |
24.0524.05
Diffstat (limited to 'C/Bra.c')
-rw-r--r-- | C/Bra.c | 325 |
1 files changed, 307 insertions, 18 deletions
@@ -1,11 +1,11 @@ | |||
1 | /* Bra.c -- Branch converters for RISC code | 1 | /* Bra.c -- Branch converters for RISC code |
2 | 2023-04-02 : Igor Pavlov : Public domain */ | 2 | 2024-01-20 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | 5 | ||
6 | #include "Bra.h" | 6 | #include "Bra.h" |
7 | #include "CpuArch.h" | ||
8 | #include "RotateDefs.h" | 7 | #include "RotateDefs.h" |
8 | #include "CpuArch.h" | ||
9 | 9 | ||
10 | #if defined(MY_CPU_SIZEOF_POINTER) \ | 10 | #if defined(MY_CPU_SIZEOF_POINTER) \ |
11 | && ( MY_CPU_SIZEOF_POINTER == 4 \ | 11 | && ( MY_CPU_SIZEOF_POINTER == 4 \ |
@@ -26,7 +26,7 @@ | |||
26 | #define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; | 26 | #define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; |
27 | // #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; | 27 | // #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; |
28 | 28 | ||
29 | #define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name | 29 | #define Z7_BRANCH_CONV(name) z7_ ## name |
30 | 30 | ||
31 | #define Z7_BRANCH_FUNC_MAIN(name) \ | 31 | #define Z7_BRANCH_FUNC_MAIN(name) \ |
32 | static \ | 32 | static \ |
@@ -42,11 +42,11 @@ Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \ | |||
42 | 42 | ||
43 | #ifdef Z7_EXTRACT_ONLY | 43 | #ifdef Z7_EXTRACT_ONLY |
44 | #define Z7_BRANCH_FUNCS_IMP(name) \ | 44 | #define Z7_BRANCH_FUNCS_IMP(name) \ |
45 | Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) | 45 | Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) |
46 | #else | 46 | #else |
47 | #define Z7_BRANCH_FUNCS_IMP(name) \ | 47 | #define Z7_BRANCH_FUNCS_IMP(name) \ |
48 | Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \ | 48 | Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \ |
49 | Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1) | 49 | Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1) |
50 | #endif | 50 | #endif |
51 | 51 | ||
52 | #if defined(__clang__) | 52 | #if defined(__clang__) |
@@ -72,7 +72,7 @@ Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \ | |||
72 | #endif | 72 | #endif |
73 | 73 | ||
74 | 74 | ||
75 | Z7_BRANCH_FUNC_MAIN(ARM64) | 75 | Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64) |
76 | { | 76 | { |
77 | // Byte *p = data; | 77 | // Byte *p = data; |
78 | const Byte *lim; | 78 | const Byte *lim; |
@@ -121,10 +121,10 @@ Z7_BRANCH_FUNC_MAIN(ARM64) | |||
121 | } | 121 | } |
122 | } | 122 | } |
123 | } | 123 | } |
124 | Z7_BRANCH_FUNCS_IMP(ARM64) | 124 | Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64) |
125 | 125 | ||
126 | 126 | ||
127 | Z7_BRANCH_FUNC_MAIN(ARM) | 127 | Z7_BRANCH_FUNC_MAIN(BranchConv_ARM) |
128 | { | 128 | { |
129 | // Byte *p = data; | 129 | // Byte *p = data; |
130 | const Byte *lim; | 130 | const Byte *lim; |
@@ -152,10 +152,10 @@ Z7_BRANCH_FUNC_MAIN(ARM) | |||
152 | } | 152 | } |
153 | } | 153 | } |
154 | } | 154 | } |
155 | Z7_BRANCH_FUNCS_IMP(ARM) | 155 | Z7_BRANCH_FUNCS_IMP(BranchConv_ARM) |
156 | 156 | ||
157 | 157 | ||
158 | Z7_BRANCH_FUNC_MAIN(PPC) | 158 | Z7_BRANCH_FUNC_MAIN(BranchConv_PPC) |
159 | { | 159 | { |
160 | // Byte *p = data; | 160 | // Byte *p = data; |
161 | const Byte *lim; | 161 | const Byte *lim; |
@@ -192,14 +192,14 @@ Z7_BRANCH_FUNC_MAIN(PPC) | |||
192 | } | 192 | } |
193 | } | 193 | } |
194 | } | 194 | } |
195 | Z7_BRANCH_FUNCS_IMP(PPC) | 195 | Z7_BRANCH_FUNCS_IMP(BranchConv_PPC) |
196 | 196 | ||
197 | 197 | ||
198 | #ifdef Z7_CPU_FAST_ROTATE_SUPPORTED | 198 | #ifdef Z7_CPU_FAST_ROTATE_SUPPORTED |
199 | #define BR_SPARC_USE_ROTATE | 199 | #define BR_SPARC_USE_ROTATE |
200 | #endif | 200 | #endif |
201 | 201 | ||
202 | Z7_BRANCH_FUNC_MAIN(SPARC) | 202 | Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC) |
203 | { | 203 | { |
204 | // Byte *p = data; | 204 | // Byte *p = data; |
205 | const Byte *lim; | 205 | const Byte *lim; |
@@ -254,10 +254,10 @@ Z7_BRANCH_FUNC_MAIN(SPARC) | |||
254 | } | 254 | } |
255 | } | 255 | } |
256 | } | 256 | } |
257 | Z7_BRANCH_FUNCS_IMP(SPARC) | 257 | Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC) |
258 | 258 | ||
259 | 259 | ||
260 | Z7_BRANCH_FUNC_MAIN(ARMT) | 260 | Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT) |
261 | { | 261 | { |
262 | // Byte *p = data; | 262 | // Byte *p = data; |
263 | Byte *lim; | 263 | Byte *lim; |
@@ -335,12 +335,12 @@ Z7_BRANCH_FUNC_MAIN(ARMT) | |||
335 | // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); | 335 | // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); |
336 | // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); | 336 | // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); |
337 | } | 337 | } |
338 | Z7_BRANCH_FUNCS_IMP(ARMT) | 338 | Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT) |
339 | 339 | ||
340 | 340 | ||
341 | // #define BR_IA64_NO_INLINE | 341 | // #define BR_IA64_NO_INLINE |
342 | 342 | ||
343 | Z7_BRANCH_FUNC_MAIN(IA64) | 343 | Z7_BRANCH_FUNC_MAIN(BranchConv_IA64) |
344 | { | 344 | { |
345 | // Byte *p = data; | 345 | // Byte *p = data; |
346 | const Byte *lim; | 346 | const Byte *lim; |
@@ -417,4 +417,293 @@ Z7_BRANCH_FUNC_MAIN(IA64) | |||
417 | } | 417 | } |
418 | } | 418 | } |
419 | } | 419 | } |
420 | Z7_BRANCH_FUNCS_IMP(IA64) | 420 | Z7_BRANCH_FUNCS_IMP(BranchConv_IA64) |
421 | |||
422 | |||
423 | #define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET; | ||
424 | #define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET; | ||
425 | |||
426 | #if 1 && defined(MY_CPU_LE_UNALIGN) | ||
427 | #define RISCV_USE_UNALIGNED_LOAD | ||
428 | #endif | ||
429 | |||
430 | #ifdef RISCV_USE_UNALIGNED_LOAD | ||
431 | #define RISCV_GET_UI32(p) GetUi32(p) | ||
432 | #define RISCV_SET_UI32(p, v) { SetUi32(p, v) } | ||
433 | #else | ||
434 | #define RISCV_GET_UI32(p) \ | ||
435 | ((UInt32)GetUi16a(p) + \ | ||
436 | ((UInt32)GetUi16a((p) + 2) << 16)) | ||
437 | #define RISCV_SET_UI32(p, v) { \ | ||
438 | SetUi16a(p, (UInt16)(v)) \ | ||
439 | SetUi16a((p) + 2, (UInt16)(v >> 16)) } | ||
440 | #endif | ||
441 | |||
442 | #if 1 && defined(MY_CPU_LE) | ||
443 | #define RISCV_USE_16BIT_LOAD | ||
444 | #endif | ||
445 | |||
446 | #ifdef RISCV_USE_16BIT_LOAD | ||
447 | #define RISCV_LOAD_VAL(p) GetUi16a(p) | ||
448 | #else | ||
449 | #define RISCV_LOAD_VAL(p) (*(p)) | ||
450 | #endif | ||
451 | |||
452 | #define RISCV_INSTR_SIZE 2 | ||
453 | #define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE) | ||
454 | #define RISCV_STEP_2 4 | ||
455 | #define RISCV_REG_VAL (2 << 7) | ||
456 | #define RISCV_CMD_VAL 3 | ||
457 | #if 1 | ||
458 | // for code size optimization: | ||
459 | #define RISCV_DELTA_7F 0x7f | ||
460 | #else | ||
461 | #define RISCV_DELTA_7F 0 | ||
462 | #endif | ||
463 | |||
464 | #define RISCV_CHECK_1(v, b) \ | ||
465 | (((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0) | ||
466 | |||
467 | #if 1 | ||
468 | #define RISCV_CHECK_2(v, r) \ | ||
469 | ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \ | ||
470 | << 18) \ | ||
471 | < ((r) & 0x1d)) | ||
472 | #else | ||
473 | // this branch gives larger code, because | ||
474 | // compilers generate larger code for big constants. | ||
475 | #define RISCV_CHECK_2(v, r) \ | ||
476 | ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \ | ||
477 | & ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \ | ||
478 | < ((r) & 0x1d)) | ||
479 | #endif | ||
480 | |||
481 | |||
482 | #define RISCV_SCAN_LOOP \ | ||
483 | Byte *lim; \ | ||
484 | size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \ | ||
485 | if (size <= 6) return p; \ | ||
486 | size -= 6; \ | ||
487 | lim = p + size; \ | ||
488 | BR_PC_INIT \ | ||
489 | for (;;) \ | ||
490 | { \ | ||
491 | UInt32 a, v; \ | ||
492 | /* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \ | ||
493 | for (;;) \ | ||
494 | { \ | ||
495 | if Z7_UNLIKELY(p >= lim) { return p; } \ | ||
496 | a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \ | ||
497 | if ((a & 0x77) == 0) break; \ | ||
498 | a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \ | ||
499 | p += RISCV_INSTR_SIZE * 2; \ | ||
500 | if ((a & 0x77) == 0) \ | ||
501 | { \ | ||
502 | p -= RISCV_INSTR_SIZE; \ | ||
503 | if Z7_UNLIKELY(p >= lim) { return p; } \ | ||
504 | break; \ | ||
505 | } \ | ||
506 | } | ||
507 | // (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL | ||
508 | // (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL | ||
509 | // (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC | ||
510 | // (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC | ||
511 | |||
512 | Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc) | ||
513 | { | ||
514 | RISCV_SCAN_LOOP | ||
515 | v = a; | ||
516 | a = RISCV_GET_UI32(p); | ||
517 | #ifndef RISCV_USE_16BIT_LOAD | ||
518 | v += (UInt32)p[1] << 8; | ||
519 | #endif | ||
520 | |||
521 | if ((v & 8) == 0) // JAL | ||
522 | { | ||
523 | if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80) | ||
524 | { | ||
525 | p += RISCV_INSTR_SIZE; | ||
526 | continue; | ||
527 | } | ||
528 | { | ||
529 | v = ((a & 1u << 31) >> 11) | ||
530 | | ((a & 0x3ff << 21) >> 20) | ||
531 | | ((a & 1 << 20) >> 9) | ||
532 | | (a & 0xff << 12); | ||
533 | BR_CONVERT_VAL_ENC(v) | ||
534 | // ((v & 1) == 0) | ||
535 | // v: bits [1 : 20] contain offset bits | ||
536 | #if 0 && defined(RISCV_USE_UNALIGNED_LOAD) | ||
537 | a &= 0xfff; | ||
538 | a |= ((UInt32)(v << 23)) | ||
539 | | ((UInt32)(v << 7) & ((UInt32)0xff << 16)) | ||
540 | | ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8)); | ||
541 | RISCV_SET_UI32(p, a) | ||
542 | #else // aligned | ||
543 | #if 0 | ||
544 | SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff))) | ||
545 | #else | ||
546 | p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf)); | ||
547 | #endif | ||
548 | |||
549 | #if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) | ||
550 | v <<= 15; | ||
551 | v = Z7_BSWAP32(v); | ||
552 | SetUi16a(p + 2, (UInt16)v) | ||
553 | #else | ||
554 | p[2] = (Byte)(v >> 9); | ||
555 | p[3] = (Byte)(v >> 1); | ||
556 | #endif | ||
557 | #endif // aligned | ||
558 | } | ||
559 | p += 4; | ||
560 | continue; | ||
561 | } // JAL | ||
562 | |||
563 | { | ||
564 | // AUIPC | ||
565 | if (v & 0xe80) // (not x0) and (not x2) | ||
566 | { | ||
567 | const UInt32 b = RISCV_GET_UI32(p + 4); | ||
568 | if (RISCV_CHECK_1(v, b)) | ||
569 | { | ||
570 | { | ||
571 | const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL); | ||
572 | RISCV_SET_UI32(p, temp) | ||
573 | } | ||
574 | a &= 0xfffff000; | ||
575 | { | ||
576 | #if 1 | ||
577 | const int t = -1 >> 1; | ||
578 | if (t != -1) | ||
579 | a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation | ||
580 | else | ||
581 | #endif | ||
582 | a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension). | ||
583 | } | ||
584 | BR_CONVERT_VAL_ENC(a) | ||
585 | #if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) | ||
586 | a = Z7_BSWAP32(a); | ||
587 | RISCV_SET_UI32(p + 4, a) | ||
588 | #else | ||
589 | SetBe32(p + 4, a) | ||
590 | #endif | ||
591 | p += 8; | ||
592 | } | ||
593 | else | ||
594 | p += RISCV_STEP_1; | ||
595 | } | ||
596 | else | ||
597 | { | ||
598 | UInt32 r = a >> 27; | ||
599 | if (RISCV_CHECK_2(v, r)) | ||
600 | { | ||
601 | v = RISCV_GET_UI32(p + 4); | ||
602 | r = (r << 7) + 0x17 + (v & 0xfffff000); | ||
603 | a = (a >> 12) | (v << 20); | ||
604 | RISCV_SET_UI32(p, r) | ||
605 | RISCV_SET_UI32(p + 4, a) | ||
606 | p += 8; | ||
607 | } | ||
608 | else | ||
609 | p += RISCV_STEP_2; | ||
610 | } | ||
611 | } | ||
612 | } // for | ||
613 | } | ||
614 | |||
615 | |||
616 | Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc) | ||
617 | { | ||
618 | RISCV_SCAN_LOOP | ||
619 | #ifdef RISCV_USE_16BIT_LOAD | ||
620 | if ((a & 8) == 0) | ||
621 | { | ||
622 | #else | ||
623 | v = a; | ||
624 | a += (UInt32)p[1] << 8; | ||
625 | if ((v & 8) == 0) | ||
626 | { | ||
627 | #endif | ||
628 | // JAL | ||
629 | a -= 0x100 - RISCV_DELTA_7F; | ||
630 | if (a & 0xd80) | ||
631 | { | ||
632 | p += RISCV_INSTR_SIZE; | ||
633 | continue; | ||
634 | } | ||
635 | { | ||
636 | const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff; | ||
637 | #if 0 // unaligned | ||
638 | a = GetUi32(p); | ||
639 | v = (UInt32)(a >> 23) & ((UInt32)0xff << 1) | ||
640 | | (UInt32)(a >> 7) & ((UInt32)0xff << 9) | ||
641 | #elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) | ||
642 | v = GetUi16a(p + 2); | ||
643 | v = Z7_BSWAP32(v) >> 15 | ||
644 | #else | ||
645 | v = (UInt32)p[3] << 1 | ||
646 | | (UInt32)p[2] << 9 | ||
647 | #endif | ||
648 | | (UInt32)((a & 0xf000) << 5); | ||
649 | BR_CONVERT_VAL_DEC(v) | ||
650 | a = a_old | ||
651 | | (v << 11 & 1u << 31) | ||
652 | | (v << 20 & 0x3ff << 21) | ||
653 | | (v << 9 & 1 << 20) | ||
654 | | (v & 0xff << 12); | ||
655 | RISCV_SET_UI32(p, a) | ||
656 | } | ||
657 | p += 4; | ||
658 | continue; | ||
659 | } // JAL | ||
660 | |||
661 | { | ||
662 | // AUIPC | ||
663 | v = a; | ||
664 | #if 1 && defined(RISCV_USE_UNALIGNED_LOAD) | ||
665 | a = GetUi32(p); | ||
666 | #else | ||
667 | a |= (UInt32)GetUi16a(p + 2) << 16; | ||
668 | #endif | ||
669 | if ((v & 0xe80) == 0) // x0/x2 | ||
670 | { | ||
671 | const UInt32 r = a >> 27; | ||
672 | if (RISCV_CHECK_2(v, r)) | ||
673 | { | ||
674 | UInt32 b; | ||
675 | #if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) | ||
676 | b = RISCV_GET_UI32(p + 4); | ||
677 | b = Z7_BSWAP32(b); | ||
678 | #else | ||
679 | b = GetBe32(p + 4); | ||
680 | #endif | ||
681 | v = a >> 12; | ||
682 | BR_CONVERT_VAL_DEC(b) | ||
683 | a = (r << 7) + 0x17; | ||
684 | a += (b + 0x800) & 0xfffff000; | ||
685 | v |= b << 20; | ||
686 | RISCV_SET_UI32(p, a) | ||
687 | RISCV_SET_UI32(p + 4, v) | ||
688 | p += 8; | ||
689 | } | ||
690 | else | ||
691 | p += RISCV_STEP_2; | ||
692 | } | ||
693 | else | ||
694 | { | ||
695 | const UInt32 b = RISCV_GET_UI32(p + 4); | ||
696 | if (!RISCV_CHECK_1(v, b)) | ||
697 | p += RISCV_STEP_1; | ||
698 | else | ||
699 | { | ||
700 | v = (a & 0xfffff000) | (b >> 20); | ||
701 | a = (b << 12) | (0x17 + RISCV_REG_VAL); | ||
702 | RISCV_SET_UI32(p, a) | ||
703 | RISCV_SET_UI32(p + 4, v) | ||
704 | p += 8; | ||
705 | } | ||
706 | } | ||
707 | } | ||
708 | } // for | ||
709 | } | ||