diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-05-26 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-05-27 12:41:10 +0500 |
commit | 89a73b901229c8550c172c9556ff8442ae7ac4b8 (patch) | |
tree | 00bf950adf3b20f36efe4ffa1065676869b040a5 /C | |
parent | fc662341e6f85da78ada0e443f6116b978f79f22 (diff) | |
download | 7zip-89a73b901229c8550c172c9556ff8442ae7ac4b8.tar.gz 7zip-89a73b901229c8550c172c9556ff8442ae7ac4b8.tar.bz2 7zip-89a73b901229c8550c172c9556ff8442ae7ac4b8.zip |
24.0624.06
Diffstat (limited to 'C')
-rw-r--r-- | C/7zVersion.h | 6 | ||||
-rw-r--r-- | C/Blake2s.c | 43 | ||||
-rw-r--r-- | C/CpuArch.c | 24 | ||||
-rw-r--r-- | C/CpuArch.h | 8 | ||||
-rw-r--r-- | C/ZstdDec.c | 5 |
5 files changed, 62 insertions, 24 deletions
diff --git a/C/7zVersion.h b/C/7zVersion.h index 72b915a..75052e9 100644 --- a/C/7zVersion.h +++ b/C/7zVersion.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #define MY_VER_MAJOR 24 | 1 | #define MY_VER_MAJOR 24 |
2 | #define MY_VER_MINOR 05 | 2 | #define MY_VER_MINOR 06 |
3 | #define MY_VER_BUILD 0 | 3 | #define MY_VER_BUILD 0 |
4 | #define MY_VERSION_NUMBERS "24.05" | 4 | #define MY_VERSION_NUMBERS "24.06" |
5 | #define MY_VERSION MY_VERSION_NUMBERS | 5 | #define MY_VERSION MY_VERSION_NUMBERS |
6 | 6 | ||
7 | #ifdef MY_CPU_NAME | 7 | #ifdef MY_CPU_NAME |
@@ -10,7 +10,7 @@ | |||
10 | #define MY_VERSION_CPU MY_VERSION | 10 | #define MY_VERSION_CPU MY_VERSION |
11 | #endif | 11 | #endif |
12 | 12 | ||
13 | #define MY_DATE "2024-05-14" | 13 | #define MY_DATE "2024-05-26" |
14 | #undef MY_COPYRIGHT | 14 | #undef MY_COPYRIGHT |
15 | #undef MY_VERSION_COPYRIGHT_DATE | 15 | #undef MY_VERSION_COPYRIGHT_DATE |
16 | #define MY_AUTHOR_NAME "Igor Pavlov" | 16 | #define MY_AUTHOR_NAME "Igor Pavlov" |
diff --git a/C/Blake2s.c b/C/Blake2s.c index 459e76b..abb907d 100644 --- a/C/Blake2s.c +++ b/C/Blake2s.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* Blake2s.c -- BLAKE2sp Hash | 1 | /* Blake2s.c -- BLAKE2sp Hash |
2 | 2024-01-29 : Igor Pavlov : Public domain | 2 | 2024-05-18 : Igor Pavlov : Public domain |
3 | 2015-2019 : Samuel Neves : original code : CC0 1.0 Universal (CC0 1.0). */ | 3 | 2015-2019 : Samuel Neves : original code : CC0 1.0 Universal (CC0 1.0). */ |
4 | 4 | ||
5 | #include "Precomp.h" | 5 | #include "Precomp.h" |
@@ -12,6 +12,17 @@ | |||
12 | #include "Compiler.h" | 12 | #include "Compiler.h" |
13 | #include "CpuArch.h" | 13 | #include "CpuArch.h" |
14 | 14 | ||
15 | /* | ||
16 | if defined(__AVX512F__) && defined(__AVX512VL__) | ||
17 | { | ||
18 | we define Z7_BLAKE2S_USE_AVX512_ALWAYS, | ||
19 | but the compiler can use avx512 for any code. | ||
20 | } | ||
21 | else if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) | ||
22 | { we use avx512 only for sse* and avx* branches of code. } | ||
23 | */ | ||
24 | // #define Z7_BLAKE2S_USE_AVX512_ALWAYS // for debug | ||
25 | |||
15 | #if defined(__SSE2__) | 26 | #if defined(__SSE2__) |
16 | #define Z7_BLAKE2S_USE_VECTORS | 27 | #define Z7_BLAKE2S_USE_VECTORS |
17 | #elif defined(MY_CPU_X86_OR_AMD64) | 28 | #elif defined(MY_CPU_X86_OR_AMD64) |
@@ -59,6 +70,9 @@ | |||
59 | #endif // SSSE3 | 70 | #endif // SSSE3 |
60 | 71 | ||
61 | #if defined(__GNUC__) || defined(__clang__) | 72 | #if defined(__GNUC__) || defined(__clang__) |
73 | #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) && !(defined(__AVX512F__) && defined(__AVX512VL__)) | ||
74 | #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("avx512vl,avx512f"))) | ||
75 | #else | ||
62 | #if defined(Z7_BLAKE2S_USE_SSE41) | 76 | #if defined(Z7_BLAKE2S_USE_SSE41) |
63 | #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse4.1"))) | 77 | #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse4.1"))) |
64 | #elif defined(Z7_BLAKE2S_USE_SSSE3) | 78 | #elif defined(Z7_BLAKE2S_USE_SSSE3) |
@@ -67,6 +81,7 @@ | |||
67 | #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse2"))) | 81 | #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse2"))) |
68 | #endif | 82 | #endif |
69 | #endif | 83 | #endif |
84 | #endif | ||
70 | 85 | ||
71 | 86 | ||
72 | #if defined(__AVX2__) | 87 | #if defined(__AVX2__) |
@@ -77,7 +92,11 @@ | |||
77 | || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) | 92 | || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) |
78 | #define Z7_BLAKE2S_USE_AVX2 | 93 | #define Z7_BLAKE2S_USE_AVX2 |
79 | #ifdef Z7_BLAKE2S_USE_AVX2 | 94 | #ifdef Z7_BLAKE2S_USE_AVX2 |
95 | #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) && !(defined(__AVX512F__) && defined(__AVX512VL__)) | ||
96 | #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx512vl,avx512f"))) | ||
97 | #else | ||
80 | #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx2"))) | 98 | #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx2"))) |
99 | #endif | ||
81 | #endif | 100 | #endif |
82 | #elif defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ | 101 | #elif defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ |
83 | || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) | 102 | || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) |
@@ -107,7 +126,9 @@ | |||
107 | 126 | ||
108 | #if defined(__AVX512F__) && defined(__AVX512VL__) | 127 | #if defined(__AVX512F__) && defined(__AVX512VL__) |
109 | // && defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL > 1930) | 128 | // && defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL > 1930) |
129 | #ifndef Z7_BLAKE2S_USE_AVX512_ALWAYS | ||
110 | #define Z7_BLAKE2S_USE_AVX512_ALWAYS | 130 | #define Z7_BLAKE2S_USE_AVX512_ALWAYS |
131 | #endif | ||
111 | // #pragma message ("=== Blake2s AVX512") | 132 | // #pragma message ("=== Blake2s AVX512") |
112 | #endif | 133 | #endif |
113 | 134 | ||
@@ -1164,7 +1185,9 @@ Blake2sp_Final_V128_Fast(UInt32 *states) | |||
1164 | #if 1 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) | 1185 | #if 1 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) |
1165 | #define MM256_ROR_EPI32 _mm256_ror_epi32 | 1186 | #define MM256_ROR_EPI32 _mm256_ror_epi32 |
1166 | #define Z7_MM256_ROR_EPI32_IS_SUPPORTED | 1187 | #define Z7_MM256_ROR_EPI32_IS_SUPPORTED |
1188 | #ifdef Z7_BLAKE2S_USE_AVX2_WAY2 | ||
1167 | #define LOAD_ROTATE_CONSTS_256 | 1189 | #define LOAD_ROTATE_CONSTS_256 |
1190 | #endif | ||
1168 | #else | 1191 | #else |
1169 | #ifdef Z7_BLAKE2S_USE_AVX2_WAY_SLOW | 1192 | #ifdef Z7_BLAKE2S_USE_AVX2_WAY_SLOW |
1170 | #ifdef Z7_BLAKE2S_USE_AVX2_WAY2 | 1193 | #ifdef Z7_BLAKE2S_USE_AVX2_WAY2 |
@@ -2549,9 +2572,11 @@ void z7_Black2sp_Prepare(void) | |||
2549 | 2572 | ||
2550 | #if defined(MY_CPU_X86_OR_AMD64) | 2573 | #if defined(MY_CPU_X86_OR_AMD64) |
2551 | #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) | 2574 | #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) |
2575 | // optional check | ||
2576 | #if 0 || !(defined(__AVX512F__) && defined(__AVX512VL__)) | ||
2552 | if (CPU_IsSupported_AVX512F_AVX512VL()) | 2577 | if (CPU_IsSupported_AVX512F_AVX512VL()) |
2553 | #endif | 2578 | #endif |
2554 | #if defined(Z7_BLAKE2S_USE_SSE41) | 2579 | #elif defined(Z7_BLAKE2S_USE_SSE41) |
2555 | if (CPU_IsSupported_SSE41()) | 2580 | if (CPU_IsSupported_SSE41()) |
2556 | #elif defined(Z7_BLAKE2S_USE_SSSE3) | 2581 | #elif defined(Z7_BLAKE2S_USE_SSSE3) |
2557 | if (CPU_IsSupported_SSSE3()) | 2582 | if (CPU_IsSupported_SSSE3()) |
@@ -2584,12 +2609,14 @@ void z7_Black2sp_Prepare(void) | |||
2584 | 2609 | ||
2585 | #ifdef Z7_BLAKE2S_USE_AVX2 | 2610 | #ifdef Z7_BLAKE2S_USE_AVX2 |
2586 | #if defined(MY_CPU_X86_OR_AMD64) | 2611 | #if defined(MY_CPU_X86_OR_AMD64) |
2587 | if ( | 2612 | |
2588 | #if 0 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) | 2613 | #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) |
2589 | CPU_IsSupported_AVX512F_AVX512VL() && | 2614 | #if 0 |
2615 | if (CPU_IsSupported_AVX512F_AVX512VL()) | ||
2616 | #endif | ||
2617 | #else | ||
2618 | if (CPU_IsSupported_AVX2()) | ||
2590 | #endif | 2619 | #endif |
2591 | CPU_IsSupported_AVX2() | ||
2592 | ) | ||
2593 | #endif | 2620 | #endif |
2594 | { | 2621 | { |
2595 | // #pragma message ("=== Blake2s AVX2") | 2622 | // #pragma message ("=== Blake2s AVX2") |
diff --git a/C/CpuArch.c b/C/CpuArch.c index d51b38a..c131a68 100644 --- a/C/CpuArch.c +++ b/C/CpuArch.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* CpuArch.c -- CPU specific code | 1 | /* CpuArch.c -- CPU specific code |
2 | 2024-03-02 : Igor Pavlov : Public domain */ | 2 | 2024-05-18 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | 5 | ||
@@ -638,7 +638,7 @@ BoolInt CPU_IsSupported_AVX(void) | |||
638 | 638 | ||
639 | { | 639 | { |
640 | const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); | 640 | const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); |
641 | // printf("\n=== XGetBV=%d\n", bm); | 641 | // printf("\n=== XGetBV=0x%x\n", bm); |
642 | return 1 | 642 | return 1 |
643 | & (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring | 643 | & (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring |
644 | & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring | 644 | & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring |
@@ -662,8 +662,7 @@ BoolInt CPU_IsSupported_AVX2(void) | |||
662 | } | 662 | } |
663 | } | 663 | } |
664 | 664 | ||
665 | /* | 665 | #if 0 |
666 | // fix it: | ||
667 | BoolInt CPU_IsSupported_AVX512F_AVX512VL(void) | 666 | BoolInt CPU_IsSupported_AVX512F_AVX512VL(void) |
668 | { | 667 | { |
669 | if (!CPU_IsSupported_AVX()) | 668 | if (!CPU_IsSupported_AVX()) |
@@ -672,14 +671,25 @@ BoolInt CPU_IsSupported_AVX512F_AVX512VL(void) | |||
672 | return False; | 671 | return False; |
673 | { | 672 | { |
674 | UInt32 d[4]; | 673 | UInt32 d[4]; |
674 | BoolInt v; | ||
675 | z7_x86_cpuid(d, 7); | 675 | z7_x86_cpuid(d, 7); |
676 | // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); | 676 | // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); |
677 | v = 1 | ||
678 | & (BoolInt)(d[1] >> 16) // avx512f | ||
679 | & (BoolInt)(d[1] >> 31); // avx512vl | ||
680 | if (!v) | ||
681 | return False; | ||
682 | } | ||
683 | { | ||
684 | const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); | ||
685 | // printf("\n=== XGetBV=0x%x\n", bm); | ||
677 | return 1 | 686 | return 1 |
678 | & (BoolInt)(d[1] >> 16) // avx512-f | 687 | & (BoolInt)(bm >> 5) // OPMASK |
679 | & (BoolInt)(d[1] >> 31); // avx512-Vl | 688 | & (BoolInt)(bm >> 6) // ZMM upper 256-bit |
689 | & (BoolInt)(bm >> 7); // ZMM16 ... ZMM31 | ||
680 | } | 690 | } |
681 | } | 691 | } |
682 | */ | 692 | #endif |
683 | 693 | ||
684 | BoolInt CPU_IsSupported_VAES_AVX2(void) | 694 | BoolInt CPU_IsSupported_VAES_AVX2(void) |
685 | { | 695 | { |
diff --git a/C/CpuArch.h b/C/CpuArch.h index dfc68f1..d632c2b 100644 --- a/C/CpuArch.h +++ b/C/CpuArch.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* CpuArch.h -- CPU specific code | 1 | /* CpuArch.h -- CPU specific code |
2 | 2024-05-13 : Igor Pavlov : Public domain */ | 2 | 2024-05-18 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #ifndef ZIP7_INC_CPU_ARCH_H | 4 | #ifndef ZIP7_INC_CPU_ARCH_H |
5 | #define ZIP7_INC_CPU_ARCH_H | 5 | #define ZIP7_INC_CPU_ARCH_H |
@@ -370,12 +370,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. | |||
370 | #define Z7_CPU_FAST_BSWAP_SUPPORTED | 370 | #define Z7_CPU_FAST_BSWAP_SUPPORTED |
371 | 371 | ||
372 | /* GCC can generate slow code that calls function for __builtin_bswap32() for: | 372 | /* GCC can generate slow code that calls function for __builtin_bswap32() for: |
373 | - GCC for RISCV, if Zbb extension is not used. | 373 | - GCC for RISCV, if Zbb/XTHeadBb extension is not used. |
374 | - GCC for SPARC. | 374 | - GCC for SPARC. |
375 | The code from CLANG for SPARC also is not fastest. | 375 | The code from CLANG for SPARC also is not fastest. |
376 | So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases. | 376 | So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases. |
377 | */ | 377 | */ |
378 | #elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb)) \ | 378 | #elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \ |
379 | && !defined(MY_CPU_SPARC) \ | 379 | && !defined(MY_CPU_SPARC) \ |
380 | && ( \ | 380 | && ( \ |
381 | (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ | 381 | (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ |
@@ -607,7 +607,7 @@ UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void); | |||
607 | BoolInt CPU_IsSupported_AES(void); | 607 | BoolInt CPU_IsSupported_AES(void); |
608 | BoolInt CPU_IsSupported_AVX(void); | 608 | BoolInt CPU_IsSupported_AVX(void); |
609 | BoolInt CPU_IsSupported_AVX2(void); | 609 | BoolInt CPU_IsSupported_AVX2(void); |
610 | // BoolInt CPU_IsSupported_AVX512F_AVX512VL(void); | 610 | BoolInt CPU_IsSupported_AVX512F_AVX512VL(void); |
611 | BoolInt CPU_IsSupported_VAES_AVX2(void); | 611 | BoolInt CPU_IsSupported_VAES_AVX2(void); |
612 | BoolInt CPU_IsSupported_CMOV(void); | 612 | BoolInt CPU_IsSupported_CMOV(void); |
613 | BoolInt CPU_IsSupported_SSE(void); | 613 | BoolInt CPU_IsSupported_SSE(void); |
diff --git a/C/ZstdDec.c b/C/ZstdDec.c index ecf6d22..ac159d6 100644 --- a/C/ZstdDec.c +++ b/C/ZstdDec.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* ZstdDec.c -- Zstd Decoder | 1 | /* ZstdDec.c -- Zstd Decoder |
2 | 2024-01-21 : the code was developed by Igor Pavlov, using Zstandard format | 2 | 2024-05-26 : the code was developed by Igor Pavlov, using Zstandard format |
3 | specification and original zstd decoder code as reference code. | 3 | specification and original zstd decoder code as reference code. |
4 | original zstd decoder code: Copyright (c) Facebook, Inc. All rights reserved. | 4 | original zstd decoder code: Copyright (c) Facebook, Inc. All rights reserved. |
5 | This source code is licensed under BSD 3-Clause License. | 5 | This source code is licensed under BSD 3-Clause License. |
@@ -2507,6 +2507,7 @@ SRes ZstdDec1_DecodeBlock(CZstdDec1 *p, | |||
2507 | if (vars.numSeqs == 0) | 2507 | if (vars.numSeqs == 0) |
2508 | { | 2508 | { |
2509 | p->winPos += numLits; | 2509 | p->winPos += numLits; |
2510 | UPDATE_TOTAL_OUT(p, numLits) | ||
2510 | return SZ_OK; | 2511 | return SZ_OK; |
2511 | } | 2512 | } |
2512 | } | 2513 | } |
@@ -3310,11 +3311,11 @@ static SRes ZstdDec_DecodeBlock(CZstdDec * const p, CZstdDecState * const ds, | |||
3310 | { | 3311 | { |
3311 | const SizeT xxh64_winPos = p->decoder.winPos - ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p); | 3312 | const SizeT xxh64_winPos = p->decoder.winPos - ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p); |
3312 | p->decoder.winPos += outCur; | 3313 | p->decoder.winPos += outCur; |
3314 | UPDATE_TOTAL_OUT(&p->decoder, outCur) | ||
3313 | p->contentProcessed += outCur; | 3315 | p->contentProcessed += outCur; |
3314 | ZstdDec_Update_XXH(p, xxh64_winPos); | 3316 | ZstdDec_Update_XXH(p, xxh64_winPos); |
3315 | } | 3317 | } |
3316 | // ds->winPos = p->decoder.winPos; // the caller does it instead. for debug: | 3318 | // ds->winPos = p->decoder.winPos; // the caller does it instead. for debug: |
3317 | UPDATE_TOTAL_OUT(&p->decoder, outCur) | ||
3318 | ds->outProcessed += outCur; | 3319 | ds->outProcessed += outCur; |
3319 | if (p->blockSize -= (UInt32)outCur) | 3320 | if (p->blockSize -= (UInt32)outCur) |
3320 | { | 3321 | { |