From 67cc20d0041a32bee12bd9eb20ae218f91b73f77 Mon Sep 17 00:00:00 2001 From: Mark Adler Date: Fri, 9 Sep 2011 23:32:36 -0700 Subject: zlib 1.2.4-pre1 --- contrib/README.contrib | 19 +- contrib/amd64/amd64-match.S | 101 ++- contrib/asm586/README.586 | 43 -- contrib/asm586/match.S | 364 ---------- contrib/asm686/README.686 | 17 + contrib/asm686/match.S | 24 +- contrib/delphi/ZLib.pas | 2 +- contrib/dotzlib/DotZLib/UnitTests.cs | 2 +- contrib/gcc_gvmat64/gvmat64.S | 574 +++++++++++++++ contrib/infback9/inftree9.c | 4 +- contrib/masm686/match.asm | 413 ----------- contrib/masmx64/gvmat64.asm | 49 +- contrib/masmx64/inffas8664.c | 2 +- contrib/masmx64/inffasx64.asm | 10 +- contrib/masmx86/bld_ml32.bat | 2 +- contrib/masmx86/gvmat32.asm | 972 -------------------------- contrib/masmx86/gvmat32c.c | 62 -- contrib/masmx86/match686.asm | 478 +++++++++++++ contrib/masmx86/match686.obj | Bin 0 -> 3507 bytes contrib/masmx86/mkasm.bat | 3 - contrib/masmx86/readme.txt | 4 +- contrib/pascal/zlibpas.pas | 2 +- contrib/vstudio/readme.txt | 40 +- contrib/vstudio/vc10/testzlib.vcxproj | 12 +- contrib/vstudio/vc10/testzlib.vcxproj.filters | 3 - contrib/vstudio/vc10/zlib.rc | 10 +- contrib/vstudio/vc10/zlibstat.vcxproj | 11 +- contrib/vstudio/vc10/zlibstat.vcxproj.filters | 6 - contrib/vstudio/vc10/zlibvc.def | 68 +- contrib/vstudio/vc10/zlibvc.sln | 16 +- contrib/vstudio/vc10/zlibvc.vcxproj | 16 +- contrib/vstudio/vc10/zlibvc.vcxproj.filters | 6 - contrib/vstudio/vc7/zlibvc.def | 114 --- contrib/vstudio/vc8/zlibvc.def | 114 --- contrib/vstudio/vc9/testzlib.vcproj | 274 +++----- contrib/vstudio/vc9/zlib.rc | 10 +- contrib/vstudio/vc9/zlibstat.vcproj | 170 ++--- contrib/vstudio/vc9/zlibvc.def | 68 +- contrib/vstudio/vc9/zlibvc.sln | 16 +- contrib/vstudio/vc9/zlibvc.vcproj | 247 +++---- 40 files changed, 1615 insertions(+), 2733 deletions(-) delete mode 100644 contrib/asm586/README.586 delete mode 100644 contrib/asm586/match.S create mode 100644 contrib/gcc_gvmat64/gvmat64.S delete mode 100644 contrib/masm686/match.asm delete mode 100644 contrib/masmx86/gvmat32.asm delete mode 100644 contrib/masmx86/gvmat32c.c create mode 100644 contrib/masmx86/match686.asm create mode 100644 contrib/masmx86/match686.obj delete mode 100755 contrib/masmx86/mkasm.bat delete mode 100644 contrib/vstudio/vc7/zlibvc.def delete mode 100644 contrib/vstudio/vc8/zlibvc.def (limited to 'contrib') diff --git a/contrib/README.contrib b/contrib/README.contrib index 17fc8f6..dd2285d 100644 --- a/contrib/README.contrib +++ b/contrib/README.contrib @@ -12,7 +12,6 @@ amd64/ by Mikhail Teterin asm code for AMD64 See patch at http://www.freebsd.org/cgi/query-pr.cgi?pr=bin/96393 -asm586/ asm686/ by Brian Raiter asm code for Pentium and PPro/PII, using the AT&T (GNU as) syntax See http://www.muppetlabs.com/~breadbox/software/assembly.html @@ -26,6 +25,10 @@ delphi/ by Cosmin Truta dotzlib/ by Henrik Ravn Support for Microsoft .Net and Visual C++ .Net +gcc_gvmat64/by Gilles Vollant + GCC Version of x86 64-bit (AMD64 and Intel EM64t) code for x64 + assembler to replace longest_match() and inflate_fast() + infback9/ by Mark Adler Unsupported diffs to infback to decode the deflate64 format @@ -42,21 +45,19 @@ iostream3/ by Ludwig Schwardt and Kevin Ruland Yet another C++ I/O streams interface -masm686/ by Dan Higdon - and Chuck Walbourn - asm code for Pentium Pro/PII, using the MASM syntax - masmx64/ by Gilles Vollant - x86 64-bit (AMD64 and Intel EM64t) code for x64 assembler to - replace longest_match() and inflate_fast() + x86 64-bit (AMD64 and Intel EM64t) code for x64 assembler to + replace longest_match() and inflate_fast(), also masm x86 + 64-bits translation of Chris Anderson inflate_fast() masmx86/ by Gilles Vollant x86 asm code to replace longest_match() and inflate_fast(), - for Visual C++ and MASM + for Visual C++ and MASM (32 bits). + Based on Brian Raiter (asm686) and Chris Anderson (inflate86) minizip/ by Gilles Vollant Mini zip and unzip based on zlib - Includes Zip64 support by Mathias Svensson + Includes Zip64 support by Mathias Svensson See http://www.winimage.com/zLibDll/unzip.html pascal/ by Bob Dellaca et al. diff --git a/contrib/amd64/amd64-match.S b/contrib/amd64/amd64-match.S index b3bf1ac..81d4a1c 100644 --- a/contrib/amd64/amd64-match.S +++ b/contrib/amd64/amd64-match.S @@ -52,14 +52,73 @@ #define save_r13 (64-LocalVarsSize)(%rsp) #define save_r15 (80-LocalVarsSize)(%rsp) + +.globl match_init, longest_match + /* * On AMD64 the first argument of a function (in our case -- the pointer to * deflate_state structure) is passed in %rdi, hence our offsets below are * all off of that. */ + +/* you can check the structure offset by running + +#include +#include +#include "deflate.h" + +void print_depl() +{ +deflate_state ds; +deflate_state *s=&ds; +printf("size pointer=%u\n",(int)sizeof(void*)); + +printf("#define dsWSize (%3u)(%%rdi)\n",(int)(((char*)&(s->w_size))-((char*)s))); +printf("#define dsWMask (%3u)(%%rdi)\n",(int)(((char*)&(s->w_mask))-((char*)s))); +printf("#define dsWindow (%3u)(%%rdi)\n",(int)(((char*)&(s->window))-((char*)s))); +printf("#define dsPrev (%3u)(%%rdi)\n",(int)(((char*)&(s->prev))-((char*)s))); +printf("#define dsMatchLen (%3u)(%%rdi)\n",(int)(((char*)&(s->match_length))-((char*)s))); +printf("#define dsPrevMatch (%3u)(%%rdi)\n",(int)(((char*)&(s->prev_match))-((char*)s))); +printf("#define dsStrStart (%3u)(%%rdi)\n",(int)(((char*)&(s->strstart))-((char*)s))); +printf("#define dsMatchStart (%3u)(%%rdi)\n",(int)(((char*)&(s->match_start))-((char*)s))); +printf("#define dsLookahead (%3u)(%%rdi)\n",(int)(((char*)&(s->lookahead))-((char*)s))); +printf("#define dsPrevLen (%3u)(%%rdi)\n",(int)(((char*)&(s->prev_length))-((char*)s))); +printf("#define dsMaxChainLen (%3u)(%%rdi)\n",(int)(((char*)&(s->max_chain_length))-((char*)s))); +printf("#define dsGoodMatch (%3u)(%%rdi)\n",(int)(((char*)&(s->good_match))-((char*)s))); +printf("#define dsNiceMatch (%3u)(%%rdi)\n",(int)(((char*)&(s->nice_match))-((char*)s))); +} + +*/ + + +/* + to compile for XCode 3.2 on MacOSX x86_64 + - run "gcc -g -c -DXCODE_MAC_X64_STRUCTURE amd64-match.S" + */ + + +#ifndef CURRENT_LINX_XCODE_MAC_X64_STRUCTURE +#define dsWSize ( 68)(%rdi) +#define dsWMask ( 76)(%rdi) +#define dsWindow ( 80)(%rdi) +#define dsPrev ( 96)(%rdi) +#define dsMatchLen (144)(%rdi) +#define dsPrevMatch (148)(%rdi) +#define dsStrStart (156)(%rdi) +#define dsMatchStart (160)(%rdi) +#define dsLookahead (164)(%rdi) +#define dsPrevLen (168)(%rdi) +#define dsMaxChainLen (172)(%rdi) +#define dsGoodMatch (188)(%rdi) +#define dsNiceMatch (192)(%rdi) + +#else + #ifndef STRUCT_OFFSET # define STRUCT_OFFSET (0) #endif + + #define dsWSize ( 56 + STRUCT_OFFSET)(%rdi) #define dsWMask ( 64 + STRUCT_OFFSET)(%rdi) #define dsWindow ( 72 + STRUCT_OFFSET)(%rdi) @@ -74,7 +133,10 @@ #define dsGoodMatch (180 + STRUCT_OFFSET)(%rdi) #define dsNiceMatch (184 + STRUCT_OFFSET)(%rdi) -.globl match_init, longest_match +#endif + + + .text @@ -222,7 +284,9 @@ LoopEntry: cmpw -1(%windowbestlen, %curmatch), %scanendw * straightforward "rep cmpsb" would not drastically degrade * performance -- unrolling it, for example, makes no difference. */ + #undef USE_SSE /* works, but is 6-7% slower, than non-SSE... */ + LoopCmps: #ifdef USE_SSE /* Preload the SSE registers */ @@ -244,29 +308,55 @@ LoopCmps: notw %ax bsfw %ax, %ax jnz LeaveLoopCmps - add $16, %rdx + + /* this is the only iteration of the loop with a possibility of having + incremented rdx by 0x108 (each loop iteration add 16*4 = 0x40 + and (0x40*4)+8=0x108 */ + add $8, %rdx + jz LenMaximum + add $8, %rdx + + pmovmskb %xmm3, %rax notw %ax bsfw %ax, %ax jnz LeaveLoopCmps + + add $16, %rdx + + pmovmskb %xmm5, %rax notw %ax bsfw %ax, %ax jnz LeaveLoopCmps + add $16, %rdx + + pmovmskb %xmm7, %rax notw %ax bsfw %ax, %ax jnz LeaveLoopCmps + add $16, %rdx + jmp LoopCmps LeaveLoopCmps: add %rax, %rdx #else mov (%windowbestlen, %rdx), %rax xor (%prev, %rdx), %rax jnz LeaveLoopCmps - add $8, %rdx + + mov 8(%windowbestlen, %rdx), %rax + xor 8(%prev, %rdx), %rax + jnz LeaveLoopCmps8 + + mov 16(%windowbestlen, %rdx), %rax + xor 16(%prev, %rdx), %rax + jnz LeaveLoopCmps16 + + add $24, %rdx jnz LoopCmps jmp LenMaximum # if 0 @@ -274,10 +364,15 @@ LeaveLoopCmps: add %rax, %rdx * This three-liner is tantalizingly simple, but bsf is a slow instruction, * and the complicated alternative down below is quite a bit faster. Sad... */ + LeaveLoopCmps: bsf %rax, %rax /* find the first non-zero bit */ shrl $3, %eax /* divide by 8 to get the byte */ add %rax, %rdx # else +LeaveLoopCmps16: + add $8, %rdx +LeaveLoopCmps8: + add $8, %rdx LeaveLoopCmps: testl $0xFFFFFFFF, %eax /* Check the first 4 bytes */ jnz Check16 add $4, %rdx diff --git a/contrib/asm586/README.586 b/contrib/asm586/README.586 deleted file mode 100644 index 6bb78f3..0000000 --- a/contrib/asm586/README.586 +++ /dev/null @@ -1,43 +0,0 @@ -This is a patched version of zlib modified to use -Pentium-optimized assembly code in the deflation algorithm. The files -changed/added by this patch are: - -README.586 -match.S - -The effectiveness of these modifications is a bit marginal, as the the -program's bottleneck seems to be mostly L1-cache contention, for which -there is no real way to work around without rewriting the basic -algorithm. The speedup on average is around 5-10% (which is generally -less than the amount of variance between subsequent executions). -However, when used at level 9 compression, the cache contention can -drop enough for the assembly version to achieve 10-20% speedup (and -sometimes more, depending on the amount of overall redundancy in the -files). Even here, though, cache contention can still be the limiting -factor, depending on the nature of the program using the zlib library. -This may also mean that better improvements will be seen on a Pentium -with MMX, which suffers much less from L1-cache contention, but I have -not yet verified this. - -Note that this code has been tailored for the Pentium in particular, -and will not perform well on the Pentium Pro (due to the use of a -partial register in the inner loop). - -If you are using an assembler other than GNU as, you will have to -translate match.S to use your assembler's syntax. (Have fun.) - -Brian Raiter -breadbox@muppetlabs.com -April, 1998 - - -Added for zlib 1.1.3: - -The patches come from -http://www.muppetlabs.com/~breadbox/software/assembly.html - -To compile zlib with this asm file, copy match.S to the zlib directory -then do: - -CFLAGS="-O3 -DASMV" ./configure -make OBJA=match.o diff --git a/contrib/asm586/match.S b/contrib/asm586/match.S deleted file mode 100644 index 0368b35..0000000 --- a/contrib/asm586/match.S +++ /dev/null @@ -1,364 +0,0 @@ -/* match.s -- Pentium-optimized version of longest_match() - * Written for zlib 1.1.2 - * Copyright (C) 1998 Brian Raiter - * - * This is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License. - */ - -#ifndef NO_UNDERLINE -#define match_init _match_init -#define longest_match _longest_match -#endif - -#define MAX_MATCH (258) -#define MIN_MATCH (3) -#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) -#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7) - -/* stack frame offsets */ - -#define wmask 0 /* local copy of s->wmask */ -#define window 4 /* local copy of s->window */ -#define windowbestlen 8 /* s->window + bestlen */ -#define chainlenscanend 12 /* high word: current chain len */ - /* low word: last bytes sought */ -#define scanstart 16 /* first two bytes of string */ -#define scanalign 20 /* dword-misalignment of string */ -#define nicematch 24 /* a good enough match size */ -#define bestlen 28 /* size of best match so far */ -#define scan 32 /* ptr to string wanting match */ - -#define LocalVarsSize (36) -/* saved ebx 36 */ -/* saved edi 40 */ -/* saved esi 44 */ -/* saved ebp 48 */ -/* return address 52 */ -#define deflatestate 56 /* the function arguments */ -#define curmatch 60 - -/* Offsets for fields in the deflate_state structure. These numbers - * are calculated from the definition of deflate_state, with the - * assumption that the compiler will dword-align the fields. (Thus, - * changing the definition of deflate_state could easily cause this - * program to crash horribly, without so much as a warning at - * compile time. Sigh.) - */ - -/* All the +zlib1222add offsets are due to the addition of fields - * in zlib in the deflate_state structure since the asm code was first written - * (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). - * (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). - * if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). - */ - -#define zlib1222add (8) - -#define dsWSize (36+zlib1222add) -#define dsWMask (44+zlib1222add) -#define dsWindow (48+zlib1222add) -#define dsPrev (56+zlib1222add) -#define dsMatchLen (88+zlib1222add) -#define dsPrevMatch (92+zlib1222add) -#define dsStrStart (100+zlib1222add) -#define dsMatchStart (104+zlib1222add) -#define dsLookahead (108+zlib1222add) -#define dsPrevLen (112+zlib1222add) -#define dsMaxChainLen (116+zlib1222add) -#define dsGoodMatch (132+zlib1222add) -#define dsNiceMatch (136+zlib1222add) - - -.file "match.S" - -.globl match_init, longest_match - -.text - -/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */ - -longest_match: - -/* Save registers that the compiler may be using, and adjust %esp to */ -/* make room for our stack frame. */ - - pushl %ebp - pushl %edi - pushl %esi - pushl %ebx - subl $LocalVarsSize, %esp - -/* Retrieve the function arguments. %ecx will hold cur_match */ -/* throughout the entire function. %edx will hold the pointer to the */ -/* deflate_state structure during the function's setup (before */ -/* entering the main loop). */ - - movl deflatestate(%esp), %edx - movl curmatch(%esp), %ecx - -/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */ - - movl dsNiceMatch(%edx), %eax - movl dsLookahead(%edx), %ebx - cmpl %eax, %ebx - jl LookaheadLess - movl %eax, %ebx -LookaheadLess: movl %ebx, nicematch(%esp) - -/* register Bytef *scan = s->window + s->strstart; */ - - movl dsWindow(%edx), %esi - movl %esi, window(%esp) - movl dsStrStart(%edx), %ebp - lea (%esi,%ebp), %edi - movl %edi, scan(%esp) - -/* Determine how many bytes the scan ptr is off from being */ -/* dword-aligned. */ - - movl %edi, %eax - negl %eax - andl $3, %eax - movl %eax, scanalign(%esp) - -/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ -/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ - - movl dsWSize(%edx), %eax - subl $MIN_LOOKAHEAD, %eax - subl %eax, %ebp - jg LimitPositive - xorl %ebp, %ebp -LimitPositive: - -/* unsigned chain_length = s->max_chain_length; */ -/* if (s->prev_length >= s->good_match) { */ -/* chain_length >>= 2; */ -/* } */ - - movl dsPrevLen(%edx), %eax - movl dsGoodMatch(%edx), %ebx - cmpl %ebx, %eax - movl dsMaxChainLen(%edx), %ebx - jl LastMatchGood - shrl $2, %ebx -LastMatchGood: - -/* chainlen is decremented once beforehand so that the function can */ -/* use the sign flag instead of the zero flag for the exit test. */ -/* It is then shifted into the high word, to make room for the scanend */ -/* scanend value, which it will always accompany. */ - - decl %ebx - shll $16, %ebx - -/* int best_len = s->prev_length; */ - - movl dsPrevLen(%edx), %eax - movl %eax, bestlen(%esp) - -/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ - - addl %eax, %esi - movl %esi, windowbestlen(%esp) - -/* register ush scan_start = *(ushf*)scan; */ -/* register ush scan_end = *(ushf*)(scan+best_len-1); */ - - movw (%edi), %bx - movw %bx, scanstart(%esp) - movw -1(%edi,%eax), %bx - movl %ebx, chainlenscanend(%esp) - -/* Posf *prev = s->prev; */ -/* uInt wmask = s->w_mask; */ - - movl dsPrev(%edx), %edi - movl dsWMask(%edx), %edx - mov %edx, wmask(%esp) - -/* Jump into the main loop. */ - - jmp LoopEntry - -.balign 16 - -/* do { - * match = s->window + cur_match; - * if (*(ushf*)(match+best_len-1) != scan_end || - * *(ushf*)match != scan_start) continue; - * [...] - * } while ((cur_match = prev[cur_match & wmask]) > limit - * && --chain_length != 0); - * - * Here is the inner loop of the function. The function will spend the - * majority of its time in this loop, and majority of that time will - * be spent in the first ten instructions. - * - * Within this loop: - * %ebx = chainlenscanend - i.e., ((chainlen << 16) | scanend) - * %ecx = curmatch - * %edx = curmatch & wmask - * %esi = windowbestlen - i.e., (window + bestlen) - * %edi = prev - * %ebp = limit - * - * Two optimization notes on the choice of instructions: - * - * The first instruction uses a 16-bit address, which costs an extra, - * unpairable cycle. This is cheaper than doing a 32-bit access and - * zeroing the high word, due to the 3-cycle misalignment penalty which - * would occur half the time. This also turns out to be cheaper than - * doing two separate 8-bit accesses, as the memory is so rarely in the - * L1 cache. - * - * The window buffer, however, apparently spends a lot of time in the - * cache, and so it is faster to retrieve the word at the end of the - * match string with two 8-bit loads. The instructions that test the - * word at the beginning of the match string, however, are executed - * much less frequently, and there it was cheaper to use 16-bit - * instructions, which avoided the necessity of saving off and - * subsequently reloading one of the other registers. - */ -LookupLoop: - /* 1 U & V */ - movw (%edi,%edx,2), %cx /* 2 U pipe */ - movl wmask(%esp), %edx /* 2 V pipe */ - cmpl %ebp, %ecx /* 3 U pipe */ - jbe LeaveNow /* 3 V pipe */ - subl $0x00010000, %ebx /* 4 U pipe */ - js LeaveNow /* 4 V pipe */ -LoopEntry: movb -1(%esi,%ecx), %al /* 5 U pipe */ - andl %ecx, %edx /* 5 V pipe */ - cmpb %bl, %al /* 6 U pipe */ - jnz LookupLoop /* 6 V pipe */ - movb (%esi,%ecx), %ah - cmpb %bh, %ah - jnz LookupLoop - movl window(%esp), %eax - movw (%eax,%ecx), %ax - cmpw scanstart(%esp), %ax - jnz LookupLoop - -/* Store the current value of chainlen. */ - - movl %ebx, chainlenscanend(%esp) - -/* Point %edi to the string under scrutiny, and %esi to the string we */ -/* are hoping to match it up with. In actuality, %esi and %edi are */ -/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ -/* initialized to -(MAX_MATCH_8 - scanalign). */ - - movl window(%esp), %esi - movl scan(%esp), %edi - addl %ecx, %esi - movl scanalign(%esp), %eax - movl $(-MAX_MATCH_8), %edx - lea MAX_MATCH_8(%edi,%eax), %edi - lea MAX_MATCH_8(%esi,%eax), %esi - -/* Test the strings for equality, 8 bytes at a time. At the end, - * adjust %edx so that it is offset to the exact byte that mismatched. - * - * We already know at this point that the first three bytes of the - * strings match each other, and they can be safely passed over before - * starting the compare loop. So what this code does is skip over 0-3 - * bytes, as much as necessary in order to dword-align the %edi - * pointer. (%esi will still be misaligned three times out of four.) - * - * It should be confessed that this loop usually does not represent - * much of the total running time. Replacing it with a more - * straightforward "rep cmpsb" would not drastically degrade - * performance. - */ -LoopCmps: - movl (%esi,%edx), %eax - movl (%edi,%edx), %ebx - xorl %ebx, %eax - jnz LeaveLoopCmps - movl 4(%esi,%edx), %eax - movl 4(%edi,%edx), %ebx - xorl %ebx, %eax - jnz LeaveLoopCmps4 - addl $8, %edx - jnz LoopCmps - jmp LenMaximum -LeaveLoopCmps4: addl $4, %edx -LeaveLoopCmps: testl $0x0000FFFF, %eax - jnz LenLower - addl $2, %edx - shrl $16, %eax -LenLower: subb $1, %al - adcl $0, %edx - -/* Calculate the length of the match. If it is longer than MAX_MATCH, */ -/* then automatically accept it as the best possible match and leave. */ - - lea (%edi,%edx), %eax - movl scan(%esp), %edi - subl %edi, %eax - cmpl $MAX_MATCH, %eax - jge LenMaximum - -/* If the length of the match is not longer than the best match we */ -/* have so far, then forget it and return to the lookup loop. */ - - movl deflatestate(%esp), %edx - movl bestlen(%esp), %ebx - cmpl %ebx, %eax - jg LongerMatch - movl chainlenscanend(%esp), %ebx - movl windowbestlen(%esp), %esi - movl dsPrev(%edx), %edi - movl wmask(%esp), %edx - andl %ecx, %edx - jmp LookupLoop - -/* s->match_start = cur_match; */ -/* best_len = len; */ -/* if (len >= nice_match) break; */ -/* scan_end = *(ushf*)(scan+best_len-1); */ - -LongerMatch: movl nicematch(%esp), %ebx - movl %eax, bestlen(%esp) - movl %ecx, dsMatchStart(%edx) - cmpl %ebx, %eax - jge LeaveNow - movl window(%esp), %esi - addl %eax, %esi - movl %esi, windowbestlen(%esp) - movl chainlenscanend(%esp), %ebx - movw -1(%edi,%eax), %bx - movl dsPrev(%edx), %edi - movl %ebx, chainlenscanend(%esp) - movl wmask(%esp), %edx - andl %ecx, %edx - jmp LookupLoop - -/* Accept the current string, with the maximum possible length. */ - -LenMaximum: movl deflatestate(%esp), %edx - movl $MAX_MATCH, bestlen(%esp) - movl %ecx, dsMatchStart(%edx) - -/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ -/* return s->lookahead; */ - -LeaveNow: - movl deflatestate(%esp), %edx - movl bestlen(%esp), %ebx - movl dsLookahead(%edx), %eax - cmpl %eax, %ebx - jg LookaheadRet - movl %ebx, %eax -LookaheadRet: - -/* Restore the stack and return from whence we came. */ - - addl $LocalVarsSize, %esp - popl %ebx - popl %esi - popl %edi - popl %ebp -match_init: ret diff --git a/contrib/asm686/README.686 b/contrib/asm686/README.686 index a593f23..a0bf3be 100644 --- a/contrib/asm686/README.686 +++ b/contrib/asm686/README.686 @@ -32,3 +32,20 @@ then do: CFLAGS="-O3 -DASMV" ./configure make OBJA=match.o + + +Update: + +I've been ignoring these assembly routines for years, believing that +gcc's generated code had caught up with it sometime around gcc 2.95 +and the major rearchitecting of the Pentium 4. However, I recently +learned that, despite what I believed, this code still has some life +in it. On the Pentium 4 and AMD64 chips, it continues to run about 8% +faster than the code produced by gcc 4.1. + +In acknowledgement of its continuing usefulness, I've altered the +license to match that of the rest of zlib. Share and Enjoy! + +Brian Raiter +breadbox@muppetlabs.com +April, 2007 diff --git a/contrib/asm686/match.S b/contrib/asm686/match.S index 5c3e9ee..06817e1 100644 --- a/contrib/asm686/match.S +++ b/contrib/asm686/match.S @@ -1,9 +1,23 @@ -/* match.s -- Pentium-Pro-optimized version of longest_match() - * Written for zlib 1.1.2 - * Copyright (C) 1998 Brian Raiter +/* match.S -- x86 assembly version of the zlib longest_match() function. + * Optimized for the Intel 686 chips (PPro and later). * - * This is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License. + * Copyright (C) 1998, 2007 Brian Raiter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the author be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. */ #ifndef NO_UNDERLINE diff --git a/contrib/delphi/ZLib.pas b/contrib/delphi/ZLib.pas index 3f2b8b4..179f9a9 100644 --- a/contrib/delphi/ZLib.pas +++ b/contrib/delphi/ZLib.pas @@ -152,7 +152,7 @@ procedure DecompressToUserBuf(const InBuf: Pointer; InBytes: Integer; const OutBuf: Pointer; BufSize: Integer); const - zlib_version = '1.2.3'; + zlib_version = '1.2.4'; type EZlibError = class(Exception); diff --git a/contrib/dotzlib/DotZLib/UnitTests.cs b/contrib/dotzlib/DotZLib/UnitTests.cs index eb751bb..42c4588 100644 --- a/contrib/dotzlib/DotZLib/UnitTests.cs +++ b/contrib/dotzlib/DotZLib/UnitTests.cs @@ -156,7 +156,7 @@ namespace DotZLibTests public void Info_Version() { Info info = new Info(); - Assert.AreEqual("1.2.3", Info.Version); + Assert.AreEqual("1.2.4", Info.Version); Assert.AreEqual(32, info.SizeOfUInt); Assert.AreEqual(32, info.SizeOfULong); Assert.AreEqual(32, info.SizeOfPointer); diff --git a/contrib/gcc_gvmat64/gvmat64.S b/contrib/gcc_gvmat64/gvmat64.S new file mode 100644 index 0000000..dd858dd --- /dev/null +++ b/contrib/gcc_gvmat64/gvmat64.S @@ -0,0 +1,574 @@ +/* +;uInt longest_match_x64( +; deflate_state *s, +; IPos cur_match); // current match + +; gvmat64.S -- Asm portion of the optimized longest_match for 32 bits x86_64 +; (AMD64 on Athlon 64, Opteron, Phenom +; and Intel EM64T on Pentium 4 with EM64T, Pentium D, Core 2 Duo, Core I5/I7) +; this file is translation from gvmat64.asm to GCC 4.x (for Linux, Mac XCode) +; Copyright (C) 1995-2010 Jean-loup Gailly, Brian Raiter and Gilles Vollant. +; +; File written by Gilles Vollant, by converting to assembly the longest_match +; from Jean-loup Gailly in deflate.c of zLib and infoZip zip. +; and by taking inspiration on asm686 with masm, optimised assembly code +; from Brian Raiter, written 1998 +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software +; 3. This notice may not be removed or altered from any source distribution. +; +; http://www.zlib.net +; http://www.winimage.com/zLibDll +; http://www.muppetlabs.com/~breadbox/software/assembly.html +; +; to compile this file for zLib, I use option: +; gcc -c -arch x86_64 gvmat64.S + + +;uInt longest_match(s, cur_match) +; deflate_state *s; +; IPos cur_match; // current match / +; +; with XCode for Mac, I had strange error with some jump on intel syntax +; this is why BEFORE_JMP and AFTER_JMP are used + */ + + +#define BEFORE_JMP .att_syntax +#define AFTER_JMP .intel_syntax noprefix + +#ifndef NO_UNDERLINE +# define match_init _match_init +# define longest_match _longest_match +#endif + +.intel_syntax noprefix + +.globl match_init, longest_match +.text +longest_match: + + + +#define LocalVarsSize 96 +/* +; register used : rax,rbx,rcx,rdx,rsi,rdi,r8,r9,r10,r11,r12 +; free register : r14,r15 +; register can be saved : rsp +*/ + +#define chainlenwmask (rsp + 8 - LocalVarsSize) +#define nicematch (rsp + 16 - LocalVarsSize) + +#define save_rdi (rsp + 24 - LocalVarsSize) +#define save_rsi (rsp + 32 - LocalVarsSize) +#define save_rbx (rsp + 40 - LocalVarsSize) +#define save_rbp (rsp + 48 - LocalVarsSize) +#define save_r12 (rsp + 56 - LocalVarsSize) +#define save_r13 (rsp + 64 - LocalVarsSize) +#define save_r14 (rsp + 72 - LocalVarsSize) +#define save_r15 (rsp + 80 - LocalVarsSize) + + +/* +; all the +4 offsets are due to the addition of pending_buf_size (in zlib +; in the deflate_state structure since the asm code was first written +; (if you compile with zlib 1.0.4 or older, remove the +4). +; Note : these value are good with a 8 bytes boundary pack structure +*/ + +#define MAX_MATCH 258 +#define MIN_MATCH 3 +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) + +/* +;;; Offsets for fields in the deflate_state structure. These numbers +;;; are calculated from the definition of deflate_state, with the +;;; assumption that the compiler will dword-align the fields. (Thus, +;;; changing the definition of deflate_state could easily cause this +;;; program to crash horribly, without so much as a warning at +;;; compile time. Sigh.) + +; all the +zlib1222add offsets are due to the addition of fields +; in zlib in the deflate_state structure since the asm code was first written +; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). +; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). +; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). +*/ + + + +/* you can check the structure offset by running + +#include +#include +#include "deflate.h" + +void print_depl() +{ +deflate_state ds; +deflate_state *s=&ds; +printf("size pointer=%u\n",(int)sizeof(void*)); + +printf("#define dsWSize %u\n",(int)(((char*)&(s->w_size))-((char*)s))); +printf("#define dsWMask %u\n",(int)(((char*)&(s->w_mask))-((char*)s))); +printf("#define dsWindow %u\n",(int)(((char*)&(s->window))-((char*)s))); +printf("#define dsPrev %u\n",(int)(((char*)&(s->prev))-((char*)s))); +printf("#define dsMatchLen %u\n",(int)(((char*)&(s->match_length))-((char*)s))); +printf("#define dsPrevMatch %u\n",(int)(((char*)&(s->prev_match))-((char*)s))); +printf("#define dsStrStart %u\n",(int)(((char*)&(s->strstart))-((char*)s))); +printf("#define dsMatchStart %u\n",(int)(((char*)&(s->match_start))-((char*)s))); +printf("#define dsLookahead %u\n",(int)(((char*)&(s->lookahead))-((char*)s))); +printf("#define dsPrevLen %u\n",(int)(((char*)&(s->prev_length))-((char*)s))); +printf("#define dsMaxChainLen %u\n",(int)(((char*)&(s->max_chain_length))-((char*)s))); +printf("#define dsGoodMatch %u\n",(int)(((char*)&(s->good_match))-((char*)s))); +printf("#define dsNiceMatch %u\n",(int)(((char*)&(s->nice_match))-((char*)s))); +} +*/ + +#define dsWSize 68 +#define dsWMask 76 +#define dsWindow 80 +#define dsPrev 96 +#define dsMatchLen 144 +#define dsPrevMatch 148 +#define dsStrStart 156 +#define dsMatchStart 160 +#define dsLookahead 164 +#define dsPrevLen 168 +#define dsMaxChainLen 172 +#define dsGoodMatch 188 +#define dsNiceMatch 192 + +#define window_size [ rcx + dsWSize] +#define WMask [ rcx + dsWMask] +#define window_ad [ rcx + dsWindow] +#define prev_ad [ rcx + dsPrev] +#define strstart [ rcx + dsStrStart] +#define match_start [ rcx + dsMatchStart] +#define Lookahead [ rcx + dsLookahead] //; 0ffffffffh on infozip +#define prev_length [ rcx + dsPrevLen] +#define max_chain_length [ rcx + dsMaxChainLen] +#define good_match [ rcx + dsGoodMatch] +#define nice_match [ rcx + dsNiceMatch] + +/* +; windows: +; parameter 1 in rcx(deflate state s), param 2 in rdx (cur match) + +; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and +; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp +; +; All registers must be preserved across the call, except for +; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch. + +; +; gcc on macosx-linux: +; see http://www.x86-64.org/documentation/abi-0.99.pdf +; param 1 in rdi, param 2 in rsi +; rbx, rsp, rbp, r12 to r15 must be preserved + +;;; Save registers that the compiler may be using, and adjust esp to +;;; make room for our stack frame. + + +;;; Retrieve the function arguments. r8d will hold cur_match +;;; throughout the entire function. edx will hold the pointer to the +;;; deflate_state structure during the function's setup (before +;;; entering the main loop. + +; ms: parameter 1 in rcx (deflate_state* s), param 2 in edx -> r8 (cur match) +; mac: param 1 in rdi, param 2 rsi +; this clear high 32 bits of r8, which can be garbage in both r8 and rdx +*/ + mov [save_rbx],rbx + mov [save_rbp],rbp + + + mov rcx,rdi + + mov r8d,esi + + + mov [save_r12],r12 + mov [save_r13],r13 + mov [save_r14],r14 + mov [save_r15],r15 + + +//;;; uInt wmask = s->w_mask; +//;;; unsigned chain_length = s->max_chain_length; +//;;; if (s->prev_length >= s->good_match) { +//;;; chain_length >>= 2; +//;;; } + + + mov edi, prev_length + mov esi, good_match + mov eax, WMask + mov ebx, max_chain_length + cmp edi, esi + jl LastMatchGood + shr ebx, 2 +LastMatchGood: + +//;;; chainlen is decremented once beforehand so that the function can +//;;; use the sign flag instead of the zero flag for the exit test. +//;;; It is then shifted into the high word, to make room for the wmask +//;;; value, which it will always accompany. + + dec ebx + shl ebx, 16 + or ebx, eax + +//;;; on zlib only +//;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + + + mov eax, nice_match + mov [chainlenwmask], ebx + mov r10d, Lookahead + cmp r10d, eax + cmovnl r10d, eax + mov [nicematch],r10d + + + +//;;; register Bytef *scan = s->window + s->strstart; + mov r10, window_ad + mov ebp, strstart + lea r13, [r10 + rbp] + +//;;; Determine how many bytes the scan ptr is off from being +//;;; dword-aligned. + + mov r9,r13 + neg r13 + and r13,3 + +//;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? +//;;; s->strstart - (IPos)MAX_DIST(s) : NIL; + + + mov eax, window_size + sub eax, MIN_LOOKAHEAD + + + xor edi,edi + sub ebp, eax + + mov r11d, prev_length + + cmovng ebp,edi + +//;;; int best_len = s->prev_length; + + +//;;; Store the sum of s->window + best_len in esi locally, and in esi. + + lea rsi,[r10+r11] + +//;;; register ush scan_start = *(ushf*)scan; +//;;; register ush scan_end = *(ushf*)(scan+best_len-1); +//;;; Posf *prev = s->prev; + + movzx r12d,word ptr [r9] + movzx ebx, word ptr [r9 + r11 - 1] + + mov rdi, prev_ad + +//;;; Jump into the main loop. + + mov edx, [chainlenwmask] + + cmp bx,word ptr [rsi + r8 - 1] + jz LookupLoopIsZero + + + +LookupLoop1: + and r8d, edx + + movzx r8d, word ptr [rdi + r8*2] + cmp r8d, ebp + jbe LeaveNow + + + + sub edx, 0x00010000 + BEFORE_JMP + js LeaveNow + AFTER_JMP + +LoopEntry1: + cmp bx,word ptr [rsi + r8 - 1] + BEFORE_JMP + jz LookupLoopIsZero + AFTER_JMP + +LookupLoop2: + and r8d, edx + + movzx r8d, word ptr [rdi + r8*2] + cmp r8d, ebp + BEFORE_JMP + jbe LeaveNow + AFTER_JMP + sub edx, 0x00010000 + BEFORE_JMP + js LeaveNow + AFTER_JMP + +LoopEntry2: + cmp bx,word ptr [rsi + r8 - 1] + BEFORE_JMP + jz LookupLoopIsZero + AFTER_JMP + +LookupLoop4: + and r8d, edx + + movzx r8d, word ptr [rdi + r8*2] + cmp r8d, ebp + BEFORE_JMP + jbe LeaveNow + AFTER_JMP + sub edx, 0x00010000 + BEFORE_JMP + js LeaveNow + AFTER_JMP + +LoopEntry4: + + cmp bx,word ptr [rsi + r8 - 1] + BEFORE_JMP + jnz LookupLoop1 + jmp LookupLoopIsZero + AFTER_JMP +/* +;;; do { +;;; match = s->window + cur_match; +;;; if (*(ushf*)(match+best_len-1) != scan_end || +;;; *(ushf*)match != scan_start) continue; +;;; [...] +;;; } while ((cur_match = prev[cur_match & wmask]) > limit +;;; && --chain_length != 0); +;;; +;;; Here is the inner loop of the function. The function will spend the +;;; majority of its time in this loop, and majority of that time will +;;; be spent in the first ten instructions. +;;; +;;; Within this loop: +;;; ebx = scanend +;;; r8d = curmatch +;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) +;;; esi = windowbestlen - i.e., (window + bestlen) +;;; edi = prev +;;; ebp = limit +*/ +.balign 16 +LookupLoop: + and r8d, edx + + movzx r8d, word ptr [rdi + r8*2] + cmp r8d, ebp + BEFORE_JMP + jbe LeaveNow + AFTER_JMP + sub edx, 0x00010000 + BEFORE_JMP + js LeaveNow + AFTER_JMP + +LoopEntry: + + cmp bx,word ptr [rsi + r8 - 1] + BEFORE_JMP + jnz LookupLoop1 + AFTER_JMP +LookupLoopIsZero: + cmp r12w, word ptr [r10 + r8] + BEFORE_JMP + jnz LookupLoop1 + AFTER_JMP + + +//;;; Store the current value of chainlen. + mov [chainlenwmask], edx +/* +;;; Point edi to the string under scrutiny, and esi to the string we +;;; are hoping to match it up with. In actuality, esi and edi are +;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is +;;; initialized to -(MAX_MATCH_8 - scanalign). +*/ + lea rsi,[r8+r10] + mov rdx, 0xfffffffffffffef8 //; -(MAX_MATCH_8) + lea rsi, [rsi + r13 + 0x0108] //;MAX_MATCH_8] + lea rdi, [r9 + r13 + 0x0108] //;MAX_MATCH_8] + + prefetcht1 [rsi+rdx] + prefetcht1 [rdi+rdx] + +/* +;;; Test the strings for equality, 8 bytes at a time. At the end, +;;; adjust rdx so that it is offset to the exact byte that mismatched. +;;; +;;; We already know at this point that the first three bytes of the +;;; strings match each other, and they can be safely passed over before +;;; starting the compare loop. So what this code does is skip over 0-3 +;;; bytes, as much as necessary in order to dword-align the edi +;;; pointer. (rsi will still be misaligned three times out of four.) +;;; +;;; It should be confessed that this loop usually does not represent +;;; much of the total running time. Replacing it with a more +;;; straightforward "rep cmpsb" would not drastically degrade +;;; performance. +*/ + +LoopCmps: + mov rax, [rsi + rdx] + xor rax, [rdi + rdx] + jnz LeaveLoopCmps + + mov rax, [rsi + rdx + 8] + xor rax, [rdi + rdx + 8] + jnz LeaveLoopCmps8 + + + mov rax, [rsi + rdx + 8+8] + xor rax, [rdi + rdx + 8+8] + jnz LeaveLoopCmps16 + + add rdx,8+8+8 + + BEFORE_JMP + jnz LoopCmps + jmp LenMaximum + AFTER_JMP + +LeaveLoopCmps16: add rdx,8 +LeaveLoopCmps8: add rdx,8 +LeaveLoopCmps: + + test eax, 0x0000FFFF + jnz LenLower + + test eax,0xffffffff + + jnz LenLower32 + + add rdx,4 + shr rax,32 + or ax,ax + BEFORE_JMP + jnz LenLower + AFTER_JMP + +LenLower32: + shr eax,16 + add rdx,2 + +LenLower: + sub al, 1 + adc rdx, 0 +//;;; Calculate the length of the match. If it is longer than MAX_MATCH, +//;;; then automatically accept it as the best possible match and leave. + + lea rax, [rdi + rdx] + sub rax, r9 + cmp eax, MAX_MATCH + BEFORE_JMP + jge LenMaximum + AFTER_JMP +/* +;;; If the length of the match is not longer than the best match we +;;; have so far, then forget it and return to the lookup loop. +;/////////////////////////////////// +*/ + cmp eax, r11d + jg LongerMatch + + lea rsi,[r10+r11] + + mov rdi, prev_ad + mov edx, [chainlenwmask] + BEFORE_JMP + jmp LookupLoop + AFTER_JMP +/* +;;; s->match_start = cur_match; +;;; best_len = len; +;;; if (len >= nice_match) break; +;;; scan_end = *(ushf*)(scan+best_len-1); +*/ +LongerMatch: + mov r11d, eax + mov match_start, r8d + cmp eax, [nicematch] + BEFORE_JMP + jge LeaveNow + AFTER_JMP + + lea rsi,[r10+rax] + + movzx ebx, word ptr [r9 + rax - 1] + mov rdi, prev_ad + mov edx, [chainlenwmask] + BEFORE_JMP + jmp LookupLoop + AFTER_JMP + +//;;; Accept the current string, with the maximum possible length. + +LenMaximum: + mov r11d,MAX_MATCH + mov match_start, r8d + +//;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; +//;;; return s->lookahead; + +LeaveNow: + mov eax, Lookahead + cmp r11d, eax + cmovng eax, r11d + + + +//;;; Restore the stack and return from whence we came. + + +// mov rsi,[save_rsi] +// mov rdi,[save_rdi] + mov rbx,[save_rbx] + mov rbp,[save_rbp] + mov r12,[save_r12] + mov r13,[save_r13] + mov r14,[save_r14] + mov r15,[save_r15] + + + ret 0 +//; please don't remove this string ! +//; Your can freely use gvmat64 in any free or commercial app +//; but it is far better don't remove the string in the binary! + // db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998, converted to amd 64 by Gilles Vollant 2005",0dh,0ah,0 + + +match_init: + ret 0 + + diff --git a/contrib/infback9/inftree9.c b/contrib/infback9/inftree9.c index c2c8af9..8d15fdc 100644 --- a/contrib/infback9/inftree9.c +++ b/contrib/infback9/inftree9.c @@ -9,7 +9,7 @@ #define MAXBITS 15 const char inflate9_copyright[] = - " inflate9 1.2.3.9 Copyright 1995-2010 Mark Adler "; + " inflate9 1.2.4 Copyright 1995-2010 Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -64,7 +64,7 @@ unsigned short FAR *work; static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 130, 130, 130, 130, 131, 131, 131, 131, 132, 132, 132, 132, - 133, 133, 133, 133, 144, 193, 201}; + 133, 133, 133, 133, 144, 66, 199}; static const unsigned short dbase[32] = { /* Distance codes 0..31 base */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, diff --git a/contrib/masm686/match.asm b/contrib/masm686/match.asm deleted file mode 100644 index 4b03a71..0000000 --- a/contrib/masm686/match.asm +++ /dev/null @@ -1,413 +0,0 @@ - -; match.asm -- Pentium-Pro optimized version of longest_match() -; -; Updated for zlib 1.1.3 and converted to MASM 6.1x -; Copyright (C) 2000 Dan Higdon -; and Chuck Walbourn -; Corrections by Cosmin Truta -; -; This is free software; you can redistribute it and/or modify it -; under the terms of the GNU General Public License. - -; Based on match.S -; Written for zlib 1.1.2 -; Copyright (C) 1998 Brian Raiter -; -; Modified by Gilles Vollant (2005) for add gzhead and gzindex - - .686P - .MODEL FLAT - -;=========================================================================== -; EQUATES -;=========================================================================== - -MAX_MATCH EQU 258 -MIN_MATCH EQU 3 -MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1) -MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7)) - -;=========================================================================== -; STRUCTURES -;=========================================================================== - -; This STRUCT assumes a 4-byte alignment - -DEFLATE_STATE STRUCT -ds_strm dd ? -ds_status dd ? -ds_pending_buf dd ? -ds_pending_buf_size dd ? -ds_pending_out dd ? -ds_pending dd ? -ds_wrap dd ? -; gzhead and gzindex are added in zlib 1.2.2.2 (see deflate.h) -ds_gzhead dd ? -ds_gzindex dd ? -ds_data_type db ? -ds_method db ? - db ? ; padding - db ? ; padding -ds_last_flush dd ? -ds_w_size dd ? ; used -ds_w_bits dd ? -ds_w_mask dd ? ; used -ds_window dd ? ; used -ds_window_size dd ? -ds_prev dd ? ; used -ds_head dd ? -ds_ins_h dd ? -ds_hash_size dd ? -ds_hash_bits dd ? -ds_hash_mask dd ? -ds_hash_shift dd ? -ds_block_start dd ? -ds_match_length dd ? ; used -ds_prev_match dd ? ; used -ds_match_available dd ? -ds_strstart dd ? ; used -ds_match_start dd ? ; used -ds_lookahead dd ? ; used -ds_prev_length dd ? ; used -ds_max_chain_length dd ? ; used -ds_max_laxy_match dd ? -ds_level dd ? -ds_strategy dd ? -ds_good_match dd ? ; used -ds_nice_match dd ? ; used - -; Don't need anymore of the struct for match -DEFLATE_STATE ENDS - -;=========================================================================== -; CODE -;=========================================================================== -_TEXT SEGMENT - -;--------------------------------------------------------------------------- -; match_init -;--------------------------------------------------------------------------- - ALIGN 4 -PUBLIC _match_init -_match_init PROC - ; no initialization needed - ret -_match_init ENDP - -;--------------------------------------------------------------------------- -; uInt longest_match(deflate_state *deflatestate, IPos curmatch) -;--------------------------------------------------------------------------- - ALIGN 4 - -PUBLIC _longest_match -_longest_match PROC - -; Since this code uses EBP for a scratch register, the stack frame must -; be manually constructed and referenced relative to the ESP register. - -; Stack image -; Variables -chainlenwmask = 0 ; high word: current chain len - ; low word: s->wmask -window = 4 ; local copy of s->window -windowbestlen = 8 ; s->window + bestlen -scanend = 12 ; last two bytes of string -scanstart = 16 ; first two bytes of string -scanalign = 20 ; dword-misalignment of string -nicematch = 24 ; a good enough match size -bestlen = 28 ; size of best match so far -scan = 32 ; ptr to string wanting match -varsize = 36 ; number of bytes (also offset to last saved register) - -; Saved Registers (actually pushed into place) -ebx_save = 36 -edi_save = 40 -esi_save = 44 -ebp_save = 48 - -; Parameters -retaddr = 52 -deflatestate = 56 -curmatch = 60 - -; Save registers that the compiler may be using - push ebp - push edi - push esi - push ebx - -; Allocate local variable space - sub esp,varsize - -; Retrieve the function arguments. ecx will hold cur_match -; throughout the entire function. edx will hold the pointer to the -; deflate_state structure during the function's setup (before -; entering the main loop). - - mov edx, [esp+deflatestate] -ASSUME edx:PTR DEFLATE_STATE - - mov ecx, [esp+curmatch] - -; uInt wmask = s->w_mask; -; unsigned chain_length = s->max_chain_length; -; if (s->prev_length >= s->good_match) { -; chain_length >>= 2; -; } - - mov eax, [edx].ds_prev_length - mov ebx, [edx].ds_good_match - cmp eax, ebx - mov eax, [edx].ds_w_mask - mov ebx, [edx].ds_max_chain_length - jl SHORT LastMatchGood - shr ebx, 2 -LastMatchGood: - -; chainlen is decremented once beforehand so that the function can -; use the sign flag instead of the zero flag for the exit test. -; It is then shifted into the high word, to make room for the wmask -; value, which it will always accompany. - - dec ebx - shl ebx, 16 - or ebx, eax - mov [esp+chainlenwmask], ebx - -; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; - - mov eax, [edx].ds_nice_match - mov ebx, [edx].ds_lookahead - cmp ebx, eax - jl SHORT LookaheadLess - mov ebx, eax -LookaheadLess: - mov [esp+nicematch], ebx - -;/* register Bytef *scan = s->window + s->strstart; */ - - mov esi, [edx].ds_window - mov [esp+window], esi - mov ebp, [edx].ds_strstart - lea edi, [esi+ebp] - mov [esp+scan],edi - -;/* Determine how many bytes the scan ptr is off from being */ -;/* dword-aligned. */ - - mov eax, edi - neg eax - and eax, 3 - mov [esp+scanalign], eax - -;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ -;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ - - mov eax, [edx].ds_w_size - sub eax, MIN_LOOKAHEAD - sub ebp, eax - jg SHORT LimitPositive - xor ebp, ebp -LimitPositive: - -;/* int best_len = s->prev_length; */ - - mov eax, [edx].ds_prev_length - mov [esp+bestlen], eax - -;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ - - add esi, eax - mov [esp+windowbestlen], esi - -;/* register ush scan_start = *(ushf*)scan; */ -;/* register ush scan_end = *(ushf*)(scan+best_len-1); */ -;/* Posf *prev = s->prev; */ - - movzx ebx, WORD PTR[edi] - mov [esp+scanstart], ebx - movzx ebx, WORD PTR[eax+edi-1] - mov [esp+scanend], ebx - mov edi, [edx].ds_prev - -;/* Jump into the main loop. */ - - mov edx, [esp+chainlenwmask] - jmp SHORT LoopEntry - -;/* do { -; * match = s->window + cur_match; -; * if (*(ushf*)(match+best_len-1) != scan_end || -; * *(ushf*)match != scan_start) continue; -; * [...] -; * } while ((cur_match = prev[cur_match & wmask]) > limit -; * && --chain_length != 0); -; * -; * Here is the inner loop of the function. The function will spend the -; * majority of its time in this loop, and majority of that time will -; * be spent in the first ten instructions. -; * -; * Within this loop: -; * %ebx = scanend -; * %ecx = curmatch -; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) -; * %esi = windowbestlen - i.e., (window + bestlen) -; * %edi = prev -; * %ebp = limit -; */ - - ALIGN 4 -LookupLoop: - and ecx, edx - movzx ecx, WORD PTR[edi+ecx*2] - cmp ecx, ebp - jbe LeaveNow - sub edx, 000010000H - js LeaveNow - -LoopEntry: - movzx eax, WORD PTR[esi+ecx-1] - cmp eax, ebx - jnz SHORT LookupLoop - - mov eax, [esp+window] - movzx eax, WORD PTR[eax+ecx] - cmp eax, [esp+scanstart] - jnz SHORT LookupLoop - -;/* Store the current value of chainlen. */ - - mov [esp+chainlenwmask], edx - -;/* Point %edi to the string under scrutiny, and %esi to the string we */ -;/* are hoping to match it up with. In actuality, %esi and %edi are */ -;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ -;/* initialized to -(MAX_MATCH_8 - scanalign). */ - - mov esi, [esp+window] - mov edi, [esp+scan] - add esi, ecx - mov eax, [esp+scanalign] - mov edx, -MAX_MATCH_8 - lea edi, [edi+eax+MAX_MATCH_8] - lea esi, [esi+eax+MAX_MATCH_8] - -;/* Test the strings for equality, 8 bytes at a time. At the end, -; * adjust %edx so that it is offset to the exact byte that mismatched. -; * -; * We already know at this point that the first three bytes of the -; * strings match each other, and they can be safely passed over before -; * starting the compare loop. So what this code does is skip over 0-3 -; * bytes, as much as necessary in order to dword-align the %edi -; * pointer. (%esi will still be misaligned three times out of four.) -; * -; * It should be confessed that this loop usually does not represent -; * much of the total running time. Replacing it with a more -; * straightforward "rep cmpsb" would not drastically degrade -; * performance. -; */ - -LoopCmps: - mov eax, DWORD PTR[esi+edx] - xor eax, DWORD PTR[edi+edx] - jnz SHORT LeaveLoopCmps - - mov eax, DWORD PTR[esi+edx+4] - xor eax, DWORD PTR[edi+edx+4] - jnz SHORT LeaveLoopCmps4 - - add edx, 8 - jnz SHORT LoopCmps - jmp LenMaximum - ALIGN 4 - -LeaveLoopCmps4: - add edx, 4 - -LeaveLoopCmps: - test eax, 00000FFFFH - jnz SHORT LenLower - - add edx, 2 - shr eax, 16 - -LenLower: - sub al, 1 - adc edx, 0 - -;/* Calculate the length of the match. If it is longer than MAX_MATCH, */ -;/* then automatically accept it as the best possible match and leave. */ - - lea eax, [edi+edx] - mov edi, [esp+scan] - sub eax, edi - cmp eax, MAX_MATCH - jge SHORT LenMaximum - -;/* If the length of the match is not longer than the best match we */ -;/* have so far, then forget it and return to the lookup loop. */ - - mov edx, [esp+deflatestate] - mov ebx, [esp+bestlen] - cmp eax, ebx - jg SHORT LongerMatch - mov esi, [esp+windowbestlen] - mov edi, [edx].ds_prev - mov ebx, [esp+scanend] - mov edx, [esp+chainlenwmask] - jmp LookupLoop - ALIGN 4 - -;/* s->match_start = cur_match; */ -;/* best_len = len; */ -;/* if (len >= nice_match) break; */ -;/* scan_end = *(ushf*)(scan+best_len-1); */ - -LongerMatch: - mov ebx, [esp+nicematch] - mov [esp+bestlen], eax - mov [edx].ds_match_start, ecx - cmp eax, ebx - jge SHORT LeaveNow - mov esi, [esp+window] - add esi, eax - mov [esp+windowbestlen], esi - movzx ebx, WORD PTR[edi+eax-1] - mov edi, [edx].ds_prev - mov [esp+scanend], ebx - mov edx, [esp+chainlenwmask] - jmp LookupLoop - ALIGN 4 - -;/* Accept the current string, with the maximum possible length. */ - -LenMaximum: - mov edx, [esp+deflatestate] - mov DWORD PTR[esp+bestlen], MAX_MATCH - mov [edx].ds_match_start, ecx - -;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ -;/* return s->lookahead; */ - -LeaveNow: - mov edx, [esp+deflatestate] - mov ebx, [esp+bestlen] - mov eax, [edx].ds_lookahead - cmp ebx, eax - jg SHORT LookaheadRet - mov eax, ebx -LookaheadRet: - -; Restore the stack and return from whence we came. - - add esp, varsize - pop ebx - pop esi - pop edi - pop ebp - ret - -_longest_match ENDP - -_TEXT ENDS -END diff --git a/contrib/masmx64/gvmat64.asm b/contrib/masmx64/gvmat64.asm index d2790cc..9879c28 100644 --- a/contrib/masmx64/gvmat64.asm +++ b/contrib/masmx64/gvmat64.asm @@ -2,8 +2,10 @@ ; deflate_state *s, ; IPos cur_match); /* current match */ -; gvmat64.asm -- Asm portion of the optimized longest_match for 32 bits x86 -; Copyright (C) 1995-2005 Jean-loup Gailly, Brian Raiter and Gilles Vollant. +; gvmat64.asm -- Asm portion of the optimized longest_match for 32 bits x86_64 +; (AMD64 on Athlon 64, Opteron, Phenom +; and Intel EM64T on Pentium 4 with EM64T, Pentium D, Core 2 Duo, Core I5/I7) +; Copyright (C) 1995-2010 Jean-loup Gailly, Brian Raiter and Gilles Vollant. ; ; File written by Gilles Vollant, by converting to assembly the longest_match ; from Jean-loup Gailly in deflate.c of zLib and infoZip zip. @@ -11,6 +13,24 @@ ; and by taking inspiration on asm686 with masm, optimised assembly code ; from Brian Raiter, written 1998 ; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software +; 3. This notice may not be removed or altered from any source distribution. +; +; +; ; http://www.zlib.net ; http://www.winimage.com/zLibDll ; http://www.muppetlabs.com/~breadbox/software/assembly.html @@ -26,10 +46,10 @@ ; ; This file compile with Microsoft Macro Assembler (x64) for AMD64 ; -; ml64.exe is given with Visual Studio 2005 and Windows 2003 server DDK +; ml64.exe is given with Visual Studio 2005/2008/2010 and Windows WDK ; -; (you can get Windows 2003 server DDK with ml64 and cl for AMD64 from -; http://www.microsoft.com/whdc/devtools/ddk/default.mspx for low price) +; (you can get Windows WDK with ml64 for AMD64 from +; http://www.microsoft.com/whdc/Devtools/wdk/default.mspx for low price) ; @@ -71,6 +91,25 @@ save_r13 equ rsp + 64 - LocalVarsSize ;save_r15 equ rsp + 80 - LocalVarsSize +; summary of register usage +; scanend ebx +; scanendw bx +; chainlenwmask edx +; curmatch rsi +; curmatchd esi +; windowbestlen r8 +; scanalign r9 +; scanalignd r9d +; window r10 +; bestlen r11 +; bestlend r11d +; scanstart r12d +; scanstartw r12w +; scan r13 +; nicematch r14d +; limit r15 +; limitd r15d +; prev rcx ; all the +4 offsets are due to the addition of pending_buf_size (in zlib ; in the deflate_state structure since the asm code was first written diff --git a/contrib/masmx64/inffas8664.c b/contrib/masmx64/inffas8664.c index 2263d77..e8af06f 100644 --- a/contrib/masmx64/inffas8664.c +++ b/contrib/masmx64/inffas8664.c @@ -130,7 +130,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ ar.beg = ar.out - (start - strm->avail_out); ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT); ar.wsize = state->wsize; - ar.write = state->write; + ar.write = state->wnext; ar.window = state->window; ar.hold = state->hold; ar.bits = state->bits; diff --git a/contrib/masmx64/inffasx64.asm b/contrib/masmx64/inffasx64.asm index c2ba03f..60a8d89 100644 --- a/contrib/masmx64/inffasx64.asm +++ b/contrib/masmx64/inffasx64.asm @@ -9,12 +9,16 @@ ; ml64.exe /Flinffasx64 /c /Zi inffasx64.asm ; with Microsoft Macro Assembler (x64) for AMD64 ; -; ml64.exe is given with Visual Studio 2005, Windows 2003 server DDK + +; This file compile with Microsoft Macro Assembler (x64) for AMD64 +; +; ml64.exe is given with Visual Studio 2005/2008/2010 and Windows WDK ; -; (you can get Windows 2003 server DDK with ml64 and cl.exe for AMD64 from -; http://www.microsoft.com/whdc/devtools/ddk/default.mspx for low price) +; (you can get Windows WDK with ml64 for AMD64 from +; http://www.microsoft.com/whdc/Devtools/wdk/default.mspx for low price) ; + .code inffas8664fnc PROC diff --git a/contrib/masmx86/bld_ml32.bat b/contrib/masmx86/bld_ml32.bat index 36962e1..e1b86bf 100644 --- a/contrib/masmx86/bld_ml32.bat +++ b/contrib/masmx86/bld_ml32.bat @@ -1,2 +1,2 @@ -ml /coff /Zi /c /Flgvmat32.lst gvmat32.asm +ml /coff /Zi /c /Flmatch686.lst match686.asm ml /coff /Zi /c /Flinffas32.lst inffas32.asm diff --git a/contrib/masmx86/gvmat32.asm b/contrib/masmx86/gvmat32.asm deleted file mode 100644 index 8111616..0000000 --- a/contrib/masmx86/gvmat32.asm +++ /dev/null @@ -1,972 +0,0 @@ -; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86 -; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. -; File written by Gilles Vollant, by modifiying the longest_match -; from Jean-loup Gailly in deflate.c -; -; http://www.zlib.net -; http://www.winimage.com/zLibDll -; http://www.muppetlabs.com/~breadbox/software/assembly.html -; -; For Visual C++ 4.x and higher and ML 6.x and higher -; ml.exe is in directory \MASM611C of Win95 DDK -; ml.exe is also distributed in http://www.masm32.com/masmdl.htm -; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ -; -; this file contain two implementation of longest_match -; -; longest_match_7fff : written 1996 by Gilles Vollant optimized for -; first Pentium. Assume s->w_mask == 0x7fff -; longest_match_686 : written by Brian raiter (1998), optimized for Pentium Pro -; -; for using an seembly version of longest_match, you need define ASMV in project -; There is two way in using gvmat32.asm -; -; A) Suggested method -; if you want include both longest_match_7fff and longest_match_686 -; compile the asm file running -; ml /coff /Zi /Flgvmat32.lst /c gvmat32.asm -; and include gvmat32c.c in your project -; if you have an old cpu (386,486 or first Pentium) and s->w_mask==0x7fff, -; longest_match_7fff will be used -; if you have a more modern CPU (Pentium Pro, II and higher) -; longest_match_686 will be used -; on old cpu with s->w_mask!=0x7fff, longest_match_686 will be used, -; but this is not a sitation you'll find often -; -; B) Alternative -; if you are not interresed in old cpu performance and want the smaller -; binaries possible -; -; compile the asm file running -; ml /coff /Zi /c /Flgvmat32.lst /DNOOLDPENTIUMCODE gvmat32.asm -; and do not include gvmat32c.c in your project (ou define also -; NOOLDPENTIUMCODE) -; -; note : as I known, longest_match_686 is very faster than longest_match_7fff -; on pentium Pro/II/III, faster (but less) in P4, but it seem -; longest_match_7fff can be faster (very very litte) on AMD Athlon64/K8 -; -; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2 - -;uInt longest_match_7fff(s, cur_match) -; deflate_state *s; -; IPos cur_match; /* current match */ - - NbStack equ 76 - cur_match equ dword ptr[esp+NbStack-0] - str_s equ dword ptr[esp+NbStack-4] -; 5 dword on top (ret,ebp,esi,edi,ebx) - adrret equ dword ptr[esp+NbStack-8] - pushebp equ dword ptr[esp+NbStack-12] - pushedi equ dword ptr[esp+NbStack-16] - pushesi equ dword ptr[esp+NbStack-20] - pushebx equ dword ptr[esp+NbStack-24] - - chain_length equ dword ptr [esp+NbStack-28] - limit equ dword ptr [esp+NbStack-32] - best_len equ dword ptr [esp+NbStack-36] - window equ dword ptr [esp+NbStack-40] - prev equ dword ptr [esp+NbStack-44] - scan_start equ word ptr [esp+NbStack-48] - wmask equ dword ptr [esp+NbStack-52] - match_start_ptr equ dword ptr [esp+NbStack-56] - nice_match equ dword ptr [esp+NbStack-60] - scan equ dword ptr [esp+NbStack-64] - - windowlen equ dword ptr [esp+NbStack-68] - match_start equ dword ptr [esp+NbStack-72] - strend equ dword ptr [esp+NbStack-76] - NbStackAdd equ (NbStack-24) - - .386p - - name gvmatch - .MODEL FLAT - - - -; all the +zlib1222add offsets are due to the addition of fields -; in zlib in the deflate_state structure since the asm code was first written -; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). -; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). -; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). - - zlib1222add equ 8 - -; Note : these value are good with a 8 bytes boundary pack structure - dep_chain_length equ 74h+zlib1222add - dep_window equ 30h+zlib1222add - dep_strstart equ 64h+zlib1222add - dep_prev_length equ 70h+zlib1222add - dep_nice_match equ 88h+zlib1222add - dep_w_size equ 24h+zlib1222add - dep_prev equ 38h+zlib1222add - dep_w_mask equ 2ch+zlib1222add - dep_good_match equ 84h+zlib1222add - dep_match_start equ 68h+zlib1222add - dep_lookahead equ 6ch+zlib1222add - - -_TEXT segment - -IFDEF NOUNDERLINE - IFDEF NOOLDPENTIUMCODE - public longest_match - public match_init - ELSE - public longest_match_7fff - public cpudetect32 - public longest_match_686 - ENDIF -ELSE - IFDEF NOOLDPENTIUMCODE - public _longest_match - public _match_init - ELSE - public _longest_match_7fff - public _cpudetect32 - public _longest_match_686 - ENDIF -ENDIF - - MAX_MATCH equ 258 - MIN_MATCH equ 3 - MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) - - - -IFNDEF NOOLDPENTIUMCODE -IFDEF NOUNDERLINE -longest_match_7fff proc near -ELSE -_longest_match_7fff proc near -ENDIF - - mov edx,[esp+4] - - - - push ebp - push edi - push esi - push ebx - - sub esp,NbStackAdd - -; initialize or check the variables used in match.asm. - mov ebp,edx - -; chain_length = s->max_chain_length -; if (prev_length>=good_match) chain_length >>= 2 - mov edx,[ebp+dep_chain_length] - mov ebx,[ebp+dep_prev_length] - cmp [ebp+dep_good_match],ebx - ja noshr - shr edx,2 -noshr: -; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop - inc edx - mov edi,[ebp+dep_nice_match] - mov chain_length,edx - mov eax,[ebp+dep_lookahead] - cmp eax,edi -; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; - jae nolookaheadnicematch - mov edi,eax -nolookaheadnicematch: -; best_len = s->prev_length - mov best_len,ebx - -; window = s->window - mov esi,[ebp+dep_window] - mov ecx,[ebp+dep_strstart] - mov window,esi - - mov nice_match,edi -; scan = window + strstart - add esi,ecx - mov scan,esi -; dx = *window - mov dx,word ptr [esi] -; bx = *(window+best_len-1) - mov bx,word ptr [esi+ebx-1] - add esi,MAX_MATCH-1 -; scan_start = *scan - mov scan_start,dx -; strend = scan + MAX_MATCH-1 - mov strend,esi -; bx = scan_end = *(window+best_len-1) - -; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? -; s->strstart - (IPos)MAX_DIST(s) : NIL; - - mov esi,[ebp+dep_w_size] - sub esi,MIN_LOOKAHEAD -; here esi = MAX_DIST(s) - sub ecx,esi - ja nodist - xor ecx,ecx -nodist: - mov limit,ecx - -; prev = s->prev - mov edx,[ebp+dep_prev] - mov prev,edx - -; - mov edx,dword ptr [ebp+dep_match_start] - mov bp,scan_start - mov eax,cur_match - mov match_start,edx - - mov edx,window - mov edi,edx - add edi,best_len - mov esi,prev - dec edi -; windowlen = window + best_len -1 - mov windowlen,edi - - jmp beginloop2 - align 4 - -; here, in the loop -; eax = ax = cur_match -; ecx = limit -; bx = scan_end -; bp = scan_start -; edi = windowlen (window + best_len -1) -; esi = prev - - -;// here; chain_length <=16 -normalbeg0add16: - add chain_length,16 - jz exitloop -normalbeg0: - cmp word ptr[edi+eax],bx - je normalbeg2noroll -rcontlabnoroll: -; cur_match = prev[cur_match & wmask] - and eax,7fffh - mov ax,word ptr[esi+eax*2] -; if cur_match > limit, go to exitloop - cmp ecx,eax - jnb exitloop -; if --chain_length != 0, go to exitloop - dec chain_length - jnz normalbeg0 - jmp exitloop - -normalbeg2noroll: -; if (scan_start==*(cur_match+window)) goto normalbeg2 - cmp bp,word ptr[edx+eax] - jne rcontlabnoroll - jmp normalbeg2 - -contloop3: - mov edi,windowlen - -; cur_match = prev[cur_match & wmask] - and eax,7fffh - mov ax,word ptr[esi+eax*2] -; if cur_match > limit, go to exitloop - cmp ecx,eax -jnbexitloopshort1: - jnb exitloop -; if --chain_length != 0, go to exitloop - - -; begin the main loop -beginloop2: - sub chain_length,16+1 -; if chain_length <=16, don't use the unrolled loop - jna normalbeg0add16 - -do16: - cmp word ptr[edi+eax],bx - je normalbeg2dc0 - -maccn MACRO lab - and eax,7fffh - mov ax,word ptr[esi+eax*2] - cmp ecx,eax - jnb exitloop - cmp word ptr[edi+eax],bx - je lab - ENDM - -rcontloop0: - maccn normalbeg2dc1 - -rcontloop1: - maccn normalbeg2dc2 - -rcontloop2: - maccn normalbeg2dc3 - -rcontloop3: - maccn normalbeg2dc4 - -rcontloop4: - maccn normalbeg2dc5 - -rcontloop5: - maccn normalbeg2dc6 - -rcontloop6: - maccn normalbeg2dc7 - -rcontloop7: - maccn normalbeg2dc8 - -rcontloop8: - maccn normalbeg2dc9 - -rcontloop9: - maccn normalbeg2dc10 - -rcontloop10: - maccn short normalbeg2dc11 - -rcontloop11: - maccn short normalbeg2dc12 - -rcontloop12: - maccn short normalbeg2dc13 - -rcontloop13: - maccn short normalbeg2dc14 - -rcontloop14: - maccn short normalbeg2dc15 - -rcontloop15: - and eax,7fffh - mov ax,word ptr[esi+eax*2] - cmp ecx,eax - jnb exitloop - - sub chain_length,16 - ja do16 - jmp normalbeg0add16 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -normbeg MACRO rcontlab,valsub -; if we are here, we know that *(match+best_len-1) == scan_end - cmp bp,word ptr[edx+eax] -; if (match != scan_start) goto rcontlab - jne rcontlab -; calculate the good chain_length, and we'll compare scan and match string - add chain_length,16-valsub - jmp iseq - ENDM - - -normalbeg2dc11: - normbeg rcontloop11,11 - -normalbeg2dc12: - normbeg short rcontloop12,12 - -normalbeg2dc13: - normbeg short rcontloop13,13 - -normalbeg2dc14: - normbeg short rcontloop14,14 - -normalbeg2dc15: - normbeg short rcontloop15,15 - -normalbeg2dc10: - normbeg rcontloop10,10 - -normalbeg2dc9: - normbeg rcontloop9,9 - -normalbeg2dc8: - normbeg rcontloop8,8 - -normalbeg2dc7: - normbeg rcontloop7,7 - -normalbeg2dc6: - normbeg rcontloop6,6 - -normalbeg2dc5: - normbeg rcontloop5,5 - -normalbeg2dc4: - normbeg rcontloop4,4 - -normalbeg2dc3: - normbeg rcontloop3,3 - -normalbeg2dc2: - normbeg rcontloop2,2 - -normalbeg2dc1: - normbeg rcontloop1,1 - -normalbeg2dc0: - normbeg rcontloop0,0 - - -; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end - -normalbeg2: - mov edi,window - - cmp bp,word ptr[edi+eax] - jne contloop3 ; if *(ushf*)match != scan_start, continue - -iseq: -; if we are here, we know that *(match+best_len-1) == scan_end -; and (match == scan_start) - - mov edi,edx - mov esi,scan ; esi = scan - add edi,eax ; edi = window + cur_match = match - - mov edx,[esi+3] ; compare manually dword at match+3 - xor edx,[edi+3] ; and scan +3 - - jz begincompare ; if equal, go to long compare - -; we will determine the unmatch byte and calculate len (in esi) - or dl,dl - je eq1rr - mov esi,3 - jmp trfinval -eq1rr: - or dx,dx - je eq1 - - mov esi,4 - jmp trfinval -eq1: - and edx,0ffffffh - jz eq11 - mov esi,5 - jmp trfinval -eq11: - mov esi,6 - jmp trfinval - -begincompare: - ; here we now scan and match begin same - add edi,6 - add esi,6 - mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes - repe cmpsd ; loop until mismatch - - je trfin ; go to trfin if not unmatch -; we determine the unmatch byte - sub esi,4 - mov edx,[edi-4] - xor edx,[esi] - - or dl,dl - jnz trfin - inc esi - - or dx,dx - jnz trfin - inc esi - - and edx,0ffffffh - jnz trfin - inc esi - -trfin: - sub esi,scan ; esi = len -trfinval: -; here we have finised compare, and esi contain len of equal string - cmp esi,best_len ; if len > best_len, go newbestlen - ja short newbestlen -; now we restore edx, ecx and esi, for the big loop - mov esi,prev - mov ecx,limit - mov edx,window - jmp contloop3 - -newbestlen: - mov best_len,esi ; len become best_len - - mov match_start,eax ; save new position as match_start - cmp esi,nice_match ; if best_len >= nice_match, exit - jae exitloop - mov ecx,scan - mov edx,window ; restore edx=window - add ecx,esi - add esi,edx - - dec esi - mov windowlen,esi ; windowlen = window + best_len-1 - mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end - -; now we restore ecx and esi, for the big loop : - mov esi,prev - mov ecx,limit - jmp contloop3 - -exitloop: -; exit : s->match_start=match_start - mov ebx,match_start - mov ebp,str_s - mov ecx,best_len - mov dword ptr [ebp+dep_match_start],ebx - mov eax,dword ptr [ebp+dep_lookahead] - cmp ecx,eax - ja minexlo - mov eax,ecx -minexlo: -; return min(best_len,s->lookahead) - -; restore stack and register ebx,esi,edi,ebp - add esp,NbStackAdd - - pop ebx - pop esi - pop edi - pop ebp - ret -InfoAuthor: -; please don't remove this string ! -; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary! - db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah - - - -IFDEF NOUNDERLINE -longest_match_7fff endp -ELSE -_longest_match_7fff endp -ENDIF - - -IFDEF NOUNDERLINE -cpudetect32 proc near -ELSE -_cpudetect32 proc near -ENDIF - - push ebx - - pushfd ; push original EFLAGS - pop eax ; get original EFLAGS - mov ecx, eax ; save original EFLAGS - xor eax, 40000h ; flip AC bit in EFLAGS - push eax ; save new EFLAGS value on stack - popfd ; replace current EFLAGS value - pushfd ; get new EFLAGS - pop eax ; store new EFLAGS in EAX - xor eax, ecx ; can’t toggle AC bit, processor=80386 - jz end_cpu_is_386 ; jump if 80386 processor - push ecx - popfd ; restore AC bit in EFLAGS first - - pushfd - pushfd - pop ecx - - mov eax, ecx ; get original EFLAGS - xor eax, 200000h ; flip ID bit in EFLAGS - push eax ; save new EFLAGS value on stack - popfd ; replace current EFLAGS value - pushfd ; get new EFLAGS - pop eax ; store new EFLAGS in EAX - popfd ; restore original EFLAGS - xor eax, ecx ; can’t toggle ID bit, - je is_old_486 ; processor=old - - mov eax,1 - db 0fh,0a2h ;CPUID - -exitcpudetect: - pop ebx - ret - -end_cpu_is_386: - mov eax,0300h - jmp exitcpudetect - -is_old_486: - mov eax,0400h - jmp exitcpudetect - -IFDEF NOUNDERLINE -cpudetect32 endp -ELSE -_cpudetect32 endp -ENDIF -ENDIF - -MAX_MATCH equ 258 -MIN_MATCH equ 3 -MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) -MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) - - -;;; stack frame offsets - -chainlenwmask equ esp + 0 ; high word: current chain len - ; low word: s->wmask -window equ esp + 4 ; local copy of s->window -windowbestlen equ esp + 8 ; s->window + bestlen -scanstart equ esp + 16 ; first two bytes of string -scanend equ esp + 12 ; last two bytes of string -scanalign equ esp + 20 ; dword-misalignment of string -nicematch equ esp + 24 ; a good enough match size -bestlen equ esp + 28 ; size of best match so far -scan equ esp + 32 ; ptr to string wanting match - -LocalVarsSize equ 36 -; saved ebx byte esp + 36 -; saved edi byte esp + 40 -; saved esi byte esp + 44 -; saved ebp byte esp + 48 -; return address byte esp + 52 -deflatestate equ esp + 56 ; the function arguments -curmatch equ esp + 60 - -;;; Offsets for fields in the deflate_state structure. These numbers -;;; are calculated from the definition of deflate_state, with the -;;; assumption that the compiler will dword-align the fields. (Thus, -;;; changing the definition of deflate_state could easily cause this -;;; program to crash horribly, without so much as a warning at -;;; compile time. Sigh.) - -dsWSize equ 36+zlib1222add -dsWMask equ 44+zlib1222add -dsWindow equ 48+zlib1222add -dsPrev equ 56+zlib1222add -dsMatchLen equ 88+zlib1222add -dsPrevMatch equ 92+zlib1222add -dsStrStart equ 100+zlib1222add -dsMatchStart equ 104+zlib1222add -dsLookahead equ 108+zlib1222add -dsPrevLen equ 112+zlib1222add -dsMaxChainLen equ 116+zlib1222add -dsGoodMatch equ 132+zlib1222add -dsNiceMatch equ 136+zlib1222add - - -;;; match.asm -- Pentium-Pro-optimized version of longest_match() -;;; Written for zlib 1.1.2 -;;; Copyright (C) 1998 Brian Raiter -;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html -;;; -;;; This is free software; you can redistribute it and/or modify it -;;; under the terms of the GNU General Public License. - -;GLOBAL _longest_match, _match_init - - -;SECTION .text - -;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) - -;_longest_match: -IFDEF NOOLDPENTIUMCODE - IFDEF NOUNDERLINE - longest_match proc near - ELSE - _longest_match proc near - ENDIF -ELSE - IFDEF NOUNDERLINE - longest_match_686 proc near - ELSE - _longest_match_686 proc near - ENDIF -ENDIF - -;;; Save registers that the compiler may be using, and adjust esp to -;;; make room for our stack frame. - - push ebp - push edi - push esi - push ebx - sub esp, LocalVarsSize - -;;; Retrieve the function arguments. ecx will hold cur_match -;;; throughout the entire function. edx will hold the pointer to the -;;; deflate_state structure during the function's setup (before -;;; entering the main loop. - - mov edx, [deflatestate] - mov ecx, [curmatch] - -;;; uInt wmask = s->w_mask; -;;; unsigned chain_length = s->max_chain_length; -;;; if (s->prev_length >= s->good_match) { -;;; chain_length >>= 2; -;;; } - - mov eax, [edx + dsPrevLen] - mov ebx, [edx + dsGoodMatch] - cmp eax, ebx - mov eax, [edx + dsWMask] - mov ebx, [edx + dsMaxChainLen] - jl LastMatchGood - shr ebx, 2 -LastMatchGood: - -;;; chainlen is decremented once beforehand so that the function can -;;; use the sign flag instead of the zero flag for the exit test. -;;; It is then shifted into the high word, to make room for the wmask -;;; value, which it will always accompany. - - dec ebx - shl ebx, 16 - or ebx, eax - mov [chainlenwmask], ebx - -;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; - - mov eax, [edx + dsNiceMatch] - mov ebx, [edx + dsLookahead] - cmp ebx, eax - jl LookaheadLess - mov ebx, eax -LookaheadLess: mov [nicematch], ebx - -;;; register Bytef *scan = s->window + s->strstart; - - mov esi, [edx + dsWindow] - mov [window], esi - mov ebp, [edx + dsStrStart] - lea edi, [esi + ebp] - mov [scan], edi - -;;; Determine how many bytes the scan ptr is off from being -;;; dword-aligned. - - mov eax, edi - neg eax - and eax, 3 - mov [scanalign], eax - -;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? -;;; s->strstart - (IPos)MAX_DIST(s) : NIL; - - mov eax, [edx + dsWSize] - sub eax, MIN_LOOKAHEAD - sub ebp, eax - jg LimitPositive - xor ebp, ebp -LimitPositive: - -;;; int best_len = s->prev_length; - - mov eax, [edx + dsPrevLen] - mov [bestlen], eax - -;;; Store the sum of s->window + best_len in esi locally, and in esi. - - add esi, eax - mov [windowbestlen], esi - -;;; register ush scan_start = *(ushf*)scan; -;;; register ush scan_end = *(ushf*)(scan+best_len-1); -;;; Posf *prev = s->prev; - - movzx ebx, word ptr [edi] - mov [scanstart], ebx - movzx ebx, word ptr [edi + eax - 1] - mov [scanend], ebx - mov edi, [edx + dsPrev] - -;;; Jump into the main loop. - - mov edx, [chainlenwmask] - jmp short LoopEntry - -align 4 - -;;; do { -;;; match = s->window + cur_match; -;;; if (*(ushf*)(match+best_len-1) != scan_end || -;;; *(ushf*)match != scan_start) continue; -;;; [...] -;;; } while ((cur_match = prev[cur_match & wmask]) > limit -;;; && --chain_length != 0); -;;; -;;; Here is the inner loop of the function. The function will spend the -;;; majority of its time in this loop, and majority of that time will -;;; be spent in the first ten instructions. -;;; -;;; Within this loop: -;;; ebx = scanend -;;; ecx = curmatch -;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) -;;; esi = windowbestlen - i.e., (window + bestlen) -;;; edi = prev -;;; ebp = limit - -LookupLoop: - and ecx, edx - movzx ecx, word ptr [edi + ecx*2] - cmp ecx, ebp - jbe LeaveNow - sub edx, 00010000h - js LeaveNow -LoopEntry: movzx eax, word ptr [esi + ecx - 1] - cmp eax, ebx - jnz LookupLoop - mov eax, [window] - movzx eax, word ptr [eax + ecx] - cmp eax, [scanstart] - jnz LookupLoop - -;;; Store the current value of chainlen. - - mov [chainlenwmask], edx - -;;; Point edi to the string under scrutiny, and esi to the string we -;;; are hoping to match it up with. In actuality, esi and edi are -;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is -;;; initialized to -(MAX_MATCH_8 - scanalign). - - mov esi, [window] - mov edi, [scan] - add esi, ecx - mov eax, [scanalign] - mov edx, 0fffffef8h; -(MAX_MATCH_8) - lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] - lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] - -;;; Test the strings for equality, 8 bytes at a time. At the end, -;;; adjust edx so that it is offset to the exact byte that mismatched. -;;; -;;; We already know at this point that the first three bytes of the -;;; strings match each other, and they can be safely passed over before -;;; starting the compare loop. So what this code does is skip over 0-3 -;;; bytes, as much as necessary in order to dword-align the edi -;;; pointer. (esi will still be misaligned three times out of four.) -;;; -;;; It should be confessed that this loop usually does not represent -;;; much of the total running time. Replacing it with a more -;;; straightforward "rep cmpsb" would not drastically degrade -;;; performance. - -LoopCmps: - mov eax, [esi + edx] - xor eax, [edi + edx] - jnz LeaveLoopCmps - mov eax, [esi + edx + 4] - xor eax, [edi + edx + 4] - jnz LeaveLoopCmps4 - add edx, 8 - jnz LoopCmps - jmp short LenMaximum -LeaveLoopCmps4: add edx, 4 -LeaveLoopCmps: test eax, 0000FFFFh - jnz LenLower - add edx, 2 - shr eax, 16 -LenLower: sub al, 1 - adc edx, 0 - -;;; Calculate the length of the match. If it is longer than MAX_MATCH, -;;; then automatically accept it as the best possible match and leave. - - lea eax, [edi + edx] - mov edi, [scan] - sub eax, edi - cmp eax, MAX_MATCH - jge LenMaximum - -;;; If the length of the match is not longer than the best match we -;;; have so far, then forget it and return to the lookup loop. - - mov edx, [deflatestate] - mov ebx, [bestlen] - cmp eax, ebx - jg LongerMatch - mov esi, [windowbestlen] - mov edi, [edx + dsPrev] - mov ebx, [scanend] - mov edx, [chainlenwmask] - jmp LookupLoop - -;;; s->match_start = cur_match; -;;; best_len = len; -;;; if (len >= nice_match) break; -;;; scan_end = *(ushf*)(scan+best_len-1); - -LongerMatch: mov ebx, [nicematch] - mov [bestlen], eax - mov [edx + dsMatchStart], ecx - cmp eax, ebx - jge LeaveNow - mov esi, [window] - add esi, eax - mov [windowbestlen], esi - movzx ebx, word ptr [edi + eax - 1] - mov edi, [edx + dsPrev] - mov [scanend], ebx - mov edx, [chainlenwmask] - jmp LookupLoop - -;;; Accept the current string, with the maximum possible length. - -LenMaximum: mov edx, [deflatestate] - mov dword ptr [bestlen], MAX_MATCH - mov [edx + dsMatchStart], ecx - -;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; -;;; return s->lookahead; - -LeaveNow: - mov edx, [deflatestate] - mov ebx, [bestlen] - mov eax, [edx + dsLookahead] - cmp ebx, eax - jg LookaheadRet - mov eax, ebx -LookaheadRet: - -;;; Restore the stack and return from whence we came. - - add esp, LocalVarsSize - pop ebx - pop esi - pop edi - pop ebp - - ret -; please don't remove this string ! -; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary! - db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah - - -IFDEF NOOLDPENTIUMCODE - IFDEF NOUNDERLINE - longest_match endp - ELSE - _longest_match endp - ENDIF - - IFDEF NOUNDERLINE - match_init proc near - ret - match_init endp - ELSE - _match_init proc near - ret - _match_init endp - ENDIF -ELSE - IFDEF NOUNDERLINE - longest_match_686 endp - ELSE - _longest_match_686 endp - ENDIF -ENDIF - -_TEXT ends -end diff --git a/contrib/masmx86/gvmat32c.c b/contrib/masmx86/gvmat32c.c deleted file mode 100644 index 7ad2b27..0000000 --- a/contrib/masmx86/gvmat32c.c +++ /dev/null @@ -1,62 +0,0 @@ -/* gvmat32.c -- C portion of the optimized longest_match for 32 bits x86 - * Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. - * File written by Gilles Vollant, by modifiying the longest_match - * from Jean-loup Gailly in deflate.c - * it prepare all parameters and call the assembly longest_match_gvasm - * longest_match execute standard C code is wmask != 0x7fff - * (assembly code is faster with a fixed wmask) - * - * Read comment at beginning of gvmat32.asm for more information - */ - -#if defined(ASMV) && (!defined(NOOLDPENTIUMCODE)) -#include "deflate.h" - -/* if your C compiler don't add underline before function name, - define ADD_UNDERLINE_ASMFUNC */ -#ifdef ADD_UNDERLINE_ASMFUNC -#define longest_match_7fff _longest_match_7fff -#define longest_match_686 _longest_match_686 -#define cpudetect32 _cpudetect32 -#endif - - -unsigned long cpudetect32(); - -uInt longest_match_c( - deflate_state *s, - IPos cur_match); /* current match */ - - -uInt longest_match_7fff( - deflate_state *s, - IPos cur_match); /* current match */ - -uInt longest_match_686( - deflate_state *s, - IPos cur_match); /* current match */ - - -static uInt iIsPPro=2; - -void match_init () -{ - iIsPPro = (((cpudetect32()/0x100)&0xf)>=6) ? 1 : 0; -} - -uInt longest_match( - deflate_state *s, - IPos cur_match) /* current match */ -{ - if (iIsPPro!=0) - return longest_match_686(s,cur_match); - - if (s->w_mask != 0x7fff) - return longest_match_686(s,cur_match); - - /* now ((s->w_mask == 0x7fff) && (iIsPPro==0)) */ - return longest_match_7fff(s,cur_match); -} - - -#endif /* defined(ASMV) && (!defined(NOOLDPENTIUMCODE)) */ diff --git a/contrib/masmx86/match686.asm b/contrib/masmx86/match686.asm new file mode 100644 index 0000000..1eaf555 --- /dev/null +++ b/contrib/masmx86/match686.asm @@ -0,0 +1,478 @@ +; match686.asm -- Asm portion of the optimized longest_match for 32 bits x86 +; Copyright (C) 1995-1996 Jean-loup Gailly, Brian Raiter and Gilles Vollant. +; File written by Gilles Vollant, by converting match686.S from Brian Raiter +; for MASM. This is as assembly version of longest_match +; from Jean-loup Gailly in deflate.c +; +; http://www.zlib.net +; http://www.winimage.com/zLibDll +; http://www.muppetlabs.com/~breadbox/software/assembly.html +; +; For Visual C++ 4.x and higher and ML 6.x and higher +; ml.exe is distributed in +; http://www.microsoft.com/downloads/details.aspx?FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64 +; +; this file contain two implementation of longest_match +; +; this longest_match was written by Brian raiter (1998), optimized for Pentium Pro +; (and the faster known version of match_init on modern Core 2 Duo and AMD Phenom) +; +; for using an assembly version of longest_match, you need define ASMV in project +; +; compile the asm file running +; ml /coff /Zi /c /Flmatch686.lst match686.asm +; and do not include match686.obj in your project +; +; note: contrib of zLib 1.2.3 and earlier contained both a deprecated version for +; Pentium (prior Pentium Pro) and this version for Pentium Pro and modern processor +; with autoselect (with cpu detection code) +; if you want support the old pentium optimization, you can still use these version +; +; this file is not optimized for old pentium, but it compatible with all x86 32 bits +; processor (starting 80386) +; +; +; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2 + +;uInt longest_match(s, cur_match) +; deflate_state *s; +; IPos cur_match; /* current match */ + + NbStack equ 76 + cur_match equ dword ptr[esp+NbStack-0] + str_s equ dword ptr[esp+NbStack-4] +; 5 dword on top (ret,ebp,esi,edi,ebx) + adrret equ dword ptr[esp+NbStack-8] + pushebp equ dword ptr[esp+NbStack-12] + pushedi equ dword ptr[esp+NbStack-16] + pushesi equ dword ptr[esp+NbStack-20] + pushebx equ dword ptr[esp+NbStack-24] + + chain_length equ dword ptr [esp+NbStack-28] + limit equ dword ptr [esp+NbStack-32] + best_len equ dword ptr [esp+NbStack-36] + window equ dword ptr [esp+NbStack-40] + prev equ dword ptr [esp+NbStack-44] + scan_start equ word ptr [esp+NbStack-48] + wmask equ dword ptr [esp+NbStack-52] + match_start_ptr equ dword ptr [esp+NbStack-56] + nice_match equ dword ptr [esp+NbStack-60] + scan equ dword ptr [esp+NbStack-64] + + windowlen equ dword ptr [esp+NbStack-68] + match_start equ dword ptr [esp+NbStack-72] + strend equ dword ptr [esp+NbStack-76] + NbStackAdd equ (NbStack-24) + + .386p + + name gvmatch + .MODEL FLAT + + + +; all the +zlib1222add offsets are due to the addition of fields +; in zlib in the deflate_state structure since the asm code was first written +; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). +; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). +; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). + + zlib1222add equ 8 + +; Note : these value are good with a 8 bytes boundary pack structure + dep_chain_length equ 74h+zlib1222add + dep_window equ 30h+zlib1222add + dep_strstart equ 64h+zlib1222add + dep_prev_length equ 70h+zlib1222add + dep_nice_match equ 88h+zlib1222add + dep_w_size equ 24h+zlib1222add + dep_prev equ 38h+zlib1222add + dep_w_mask equ 2ch+zlib1222add + dep_good_match equ 84h+zlib1222add + dep_match_start equ 68h+zlib1222add + dep_lookahead equ 6ch+zlib1222add + + +_TEXT segment + +IFDEF NOUNDERLINE + public longest_match + public match_init +ELSE + public _longest_match + public _match_init +ENDIF + + MAX_MATCH equ 258 + MIN_MATCH equ 3 + MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) + + + +MAX_MATCH equ 258 +MIN_MATCH equ 3 +MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) +MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) + + +;;; stack frame offsets + +chainlenwmask equ esp + 0 ; high word: current chain len + ; low word: s->wmask +window equ esp + 4 ; local copy of s->window +windowbestlen equ esp + 8 ; s->window + bestlen +scanstart equ esp + 16 ; first two bytes of string +scanend equ esp + 12 ; last two bytes of string +scanalign equ esp + 20 ; dword-misalignment of string +nicematch equ esp + 24 ; a good enough match size +bestlen equ esp + 28 ; size of best match so far +scan equ esp + 32 ; ptr to string wanting match + +LocalVarsSize equ 36 +; saved ebx byte esp + 36 +; saved edi byte esp + 40 +; saved esi byte esp + 44 +; saved ebp byte esp + 48 +; return address byte esp + 52 +deflatestate equ esp + 56 ; the function arguments +curmatch equ esp + 60 + +;;; Offsets for fields in the deflate_state structure. These numbers +;;; are calculated from the definition of deflate_state, with the +;;; assumption that the compiler will dword-align the fields. (Thus, +;;; changing the definition of deflate_state could easily cause this +;;; program to crash horribly, without so much as a warning at +;;; compile time. Sigh.) + +dsWSize equ 36+zlib1222add +dsWMask equ 44+zlib1222add +dsWindow equ 48+zlib1222add +dsPrev equ 56+zlib1222add +dsMatchLen equ 88+zlib1222add +dsPrevMatch equ 92+zlib1222add +dsStrStart equ 100+zlib1222add +dsMatchStart equ 104+zlib1222add +dsLookahead equ 108+zlib1222add +dsPrevLen equ 112+zlib1222add +dsMaxChainLen equ 116+zlib1222add +dsGoodMatch equ 132+zlib1222add +dsNiceMatch equ 136+zlib1222add + + +;;; match686.asm -- Pentium-Pro-optimized version of longest_match() +;;; Written for zlib 1.1.2 +;;; Copyright (C) 1998 Brian Raiter +;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html +;;; +;; +;; This software is provided 'as-is', without any express or implied +;; warranty. In no event will the authors be held liable for any damages +;; arising from the use of this software. +;; +;; Permission is granted to anyone to use this software for any purpose, +;; including commercial applications, and to alter it and redistribute it +;; freely, subject to the following restrictions: +;; +;; 1. The origin of this software must not be misrepresented; you must not +;; claim that you wrote the original software. If you use this software +;; in a product, an acknowledgment in the product documentation would be +;; appreciated but is not required. +;; 2. Altered source versions must be plainly marked as such, and must not be +;; misrepresented as being the original software +;; 3. This notice may not be removed or altered from any source distribution. +;; + +;GLOBAL _longest_match, _match_init + + +;SECTION .text + +;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) + +;_longest_match: + IFDEF NOUNDERLINE + longest_match proc near + ELSE + _longest_match proc near + ENDIF + +;;; Save registers that the compiler may be using, and adjust esp to +;;; make room for our stack frame. + + push ebp + push edi + push esi + push ebx + sub esp, LocalVarsSize + +;;; Retrieve the function arguments. ecx will hold cur_match +;;; throughout the entire function. edx will hold the pointer to the +;;; deflate_state structure during the function's setup (before +;;; entering the main loop. + + mov edx, [deflatestate] + mov ecx, [curmatch] + +;;; uInt wmask = s->w_mask; +;;; unsigned chain_length = s->max_chain_length; +;;; if (s->prev_length >= s->good_match) { +;;; chain_length >>= 2; +;;; } + + mov eax, [edx + dsPrevLen] + mov ebx, [edx + dsGoodMatch] + cmp eax, ebx + mov eax, [edx + dsWMask] + mov ebx, [edx + dsMaxChainLen] + jl LastMatchGood + shr ebx, 2 +LastMatchGood: + +;;; chainlen is decremented once beforehand so that the function can +;;; use the sign flag instead of the zero flag for the exit test. +;;; It is then shifted into the high word, to make room for the wmask +;;; value, which it will always accompany. + + dec ebx + shl ebx, 16 + or ebx, eax + mov [chainlenwmask], ebx + +;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + mov eax, [edx + dsNiceMatch] + mov ebx, [edx + dsLookahead] + cmp ebx, eax + jl LookaheadLess + mov ebx, eax +LookaheadLess: mov [nicematch], ebx + +;;; register Bytef *scan = s->window + s->strstart; + + mov esi, [edx + dsWindow] + mov [window], esi + mov ebp, [edx + dsStrStart] + lea edi, [esi + ebp] + mov [scan], edi + +;;; Determine how many bytes the scan ptr is off from being +;;; dword-aligned. + + mov eax, edi + neg eax + and eax, 3 + mov [scanalign], eax + +;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? +;;; s->strstart - (IPos)MAX_DIST(s) : NIL; + + mov eax, [edx + dsWSize] + sub eax, MIN_LOOKAHEAD + sub ebp, eax + jg LimitPositive + xor ebp, ebp +LimitPositive: + +;;; int best_len = s->prev_length; + + mov eax, [edx + dsPrevLen] + mov [bestlen], eax + +;;; Store the sum of s->window + best_len in esi locally, and in esi. + + add esi, eax + mov [windowbestlen], esi + +;;; register ush scan_start = *(ushf*)scan; +;;; register ush scan_end = *(ushf*)(scan+best_len-1); +;;; Posf *prev = s->prev; + + movzx ebx, word ptr [edi] + mov [scanstart], ebx + movzx ebx, word ptr [edi + eax - 1] + mov [scanend], ebx + mov edi, [edx + dsPrev] + +;;; Jump into the main loop. + + mov edx, [chainlenwmask] + jmp short LoopEntry + +align 4 + +;;; do { +;;; match = s->window + cur_match; +;;; if (*(ushf*)(match+best_len-1) != scan_end || +;;; *(ushf*)match != scan_start) continue; +;;; [...] +;;; } while ((cur_match = prev[cur_match & wmask]) > limit +;;; && --chain_length != 0); +;;; +;;; Here is the inner loop of the function. The function will spend the +;;; majority of its time in this loop, and majority of that time will +;;; be spent in the first ten instructions. +;;; +;;; Within this loop: +;;; ebx = scanend +;;; ecx = curmatch +;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) +;;; esi = windowbestlen - i.e., (window + bestlen) +;;; edi = prev +;;; ebp = limit + +LookupLoop: + and ecx, edx + movzx ecx, word ptr [edi + ecx*2] + cmp ecx, ebp + jbe LeaveNow + sub edx, 00010000h + js LeaveNow +LoopEntry: movzx eax, word ptr [esi + ecx - 1] + cmp eax, ebx + jnz LookupLoop + mov eax, [window] + movzx eax, word ptr [eax + ecx] + cmp eax, [scanstart] + jnz LookupLoop + +;;; Store the current value of chainlen. + + mov [chainlenwmask], edx + +;;; Point edi to the string under scrutiny, and esi to the string we +;;; are hoping to match it up with. In actuality, esi and edi are +;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is +;;; initialized to -(MAX_MATCH_8 - scanalign). + + mov esi, [window] + mov edi, [scan] + add esi, ecx + mov eax, [scanalign] + mov edx, 0fffffef8h; -(MAX_MATCH_8) + lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] + lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] + +;;; Test the strings for equality, 8 bytes at a time. At the end, +;;; adjust edx so that it is offset to the exact byte that mismatched. +;;; +;;; We already know at this point that the first three bytes of the +;;; strings match each other, and they can be safely passed over before +;;; starting the compare loop. So what this code does is skip over 0-3 +;;; bytes, as much as necessary in order to dword-align the edi +;;; pointer. (esi will still be misaligned three times out of four.) +;;; +;;; It should be confessed that this loop usually does not represent +;;; much of the total running time. Replacing it with a more +;;; straightforward "rep cmpsb" would not drastically degrade +;;; performance. + +LoopCmps: + mov eax, [esi + edx] + xor eax, [edi + edx] + jnz LeaveLoopCmps + mov eax, [esi + edx + 4] + xor eax, [edi + edx + 4] + jnz LeaveLoopCmps4 + add edx, 8 + jnz LoopCmps + jmp short LenMaximum +LeaveLoopCmps4: add edx, 4 +LeaveLoopCmps: test eax, 0000FFFFh + jnz LenLower + add edx, 2 + shr eax, 16 +LenLower: sub al, 1 + adc edx, 0 + +;;; Calculate the length of the match. If it is longer than MAX_MATCH, +;;; then automatically accept it as the best possible match and leave. + + lea eax, [edi + edx] + mov edi, [scan] + sub eax, edi + cmp eax, MAX_MATCH + jge LenMaximum + +;;; If the length of the match is not longer than the best match we +;;; have so far, then forget it and return to the lookup loop. + + mov edx, [deflatestate] + mov ebx, [bestlen] + cmp eax, ebx + jg LongerMatch + mov esi, [windowbestlen] + mov edi, [edx + dsPrev] + mov ebx, [scanend] + mov edx, [chainlenwmask] + jmp LookupLoop + +;;; s->match_start = cur_match; +;;; best_len = len; +;;; if (len >= nice_match) break; +;;; scan_end = *(ushf*)(scan+best_len-1); + +LongerMatch: mov ebx, [nicematch] + mov [bestlen], eax + mov [edx + dsMatchStart], ecx + cmp eax, ebx + jge LeaveNow + mov esi, [window] + add esi, eax + mov [windowbestlen], esi + movzx ebx, word ptr [edi + eax - 1] + mov edi, [edx + dsPrev] + mov [scanend], ebx + mov edx, [chainlenwmask] + jmp LookupLoop + +;;; Accept the current string, with the maximum possible length. + +LenMaximum: mov edx, [deflatestate] + mov dword ptr [bestlen], MAX_MATCH + mov [edx + dsMatchStart], ecx + +;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; +;;; return s->lookahead; + +LeaveNow: + mov edx, [deflatestate] + mov ebx, [bestlen] + mov eax, [edx + dsLookahead] + cmp ebx, eax + jg LookaheadRet + mov eax, ebx +LookaheadRet: + +;;; Restore the stack and return from whence we came. + + add esp, LocalVarsSize + pop ebx + pop esi + pop edi + pop ebp + + ret +; please don't remove this string ! +; Your can freely use match686 in any free or commercial app if you don't remove the string in the binary! + db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah + + + IFDEF NOUNDERLINE + longest_match endp + ELSE + _longest_match endp + ENDIF + + IFDEF NOUNDERLINE + match_init proc near + ret + match_init endp + ELSE + _match_init proc near + ret + _match_init endp + ENDIF + + +_TEXT ends +end diff --git a/contrib/masmx86/match686.obj b/contrib/masmx86/match686.obj new file mode 100644 index 0000000..2e4631f Binary files /dev/null and b/contrib/masmx86/match686.obj differ diff --git a/contrib/masmx86/mkasm.bat b/contrib/masmx86/mkasm.bat deleted file mode 100755 index 70a51f8..0000000 --- a/contrib/masmx86/mkasm.bat +++ /dev/null @@ -1,3 +0,0 @@ -cl /DASMV /I..\.. /O2 /c gvmat32c.c -ml /coff /Zi /c /Flgvmat32.lst gvmat32.asm -ml /coff /Zi /c /Flinffas32.lst inffas32.asm diff --git a/contrib/masmx86/readme.txt b/contrib/masmx86/readme.txt index 7b57167..413580e 100644 --- a/contrib/masmx86/readme.txt +++ b/contrib/masmx86/readme.txt @@ -14,8 +14,8 @@ appropriate makefile, as suggested below. Build instructions ------------------ * With Microsoft C and MASM: -nmake -f win32/Makefile.msc LOC="-DASMV -DASMINF" OBJA="gvmat32c.obj gvmat32.obj inffas32.obj" +nmake -f win32/Makefile.msc LOC="-DASMV -DASMINF" OBJA="match686.obj inffas32.obj" * With Borland C and TASM: -make -f win32/Makefile.bor LOCAL_ZLIB="-DASMV -DASMINF" OBJA="gvmat32c.obj gvmat32.obj inffas32.obj" OBJPA="+gvmat32c.obj+gvmat32.obj+inffas32.obj" +make -f win32/Makefile.bor LOCAL_ZLIB="-DASMV -DASMINF" OBJA="match686.obj inffas32.obj" OBJPA="+match686c.obj+match686.obj+inffas32.obj" diff --git a/contrib/pascal/zlibpas.pas b/contrib/pascal/zlibpas.pas index 836848c..dc7d37d 100644 --- a/contrib/pascal/zlibpas.pas +++ b/contrib/pascal/zlibpas.pas @@ -10,7 +10,7 @@ unit zlibpas; interface const - ZLIB_VERSION = '1.2.3'; + ZLIB_VERSION = '1.2.4'; type alloc_func = function(opaque: Pointer; items, size: Integer): Pointer; diff --git a/contrib/vstudio/readme.txt b/contrib/vstudio/readme.txt index a7b7247..904888b 100644 --- a/contrib/vstudio/readme.txt +++ b/contrib/vstudio/readme.txt @@ -1,8 +1,8 @@ -Building instructions for the DLL versions of Zlib 1.2.3 +Building instructions for the DLL versions of Zlib 1.2.4 ======================================================== This directory contains projects that build zlib and minizip using -Microsoft Visual C++ 7.0/7.1/8.0/9.0/10.0, and Visual C++ . +Microsoft Visual C++ 9.0/10.0, and Visual C++ . You don't need to build these projects yourself. You can download the binaries from: @@ -15,51 +15,17 @@ bld_ml64.bat in contrib\masmx64 bld_ml32.bat in contrib\masmx86 -Build instructions for Visual Studio 7.x (32 bits) --------------------------------------------------- -- Uncompress current zlib, including all contrib/* files -- Download the crtdll library from - http://www.winimage.com/zLibDll/crtdll.zip - Unzip crtdll.zip to extract crtdll.lib on contrib\vstudio\vc7. -- Open contrib\vstudio\vc7\zlibvc.sln with Microsoft Visual C++ 7.x - (Visual Studio .Net 2002 or 2003). - -Build instructions for Visual Studio 2005 (32 bits or 64 bits) --------------------------------------------------------------- -- Uncompress current zlib, including all contrib/* files -- For 32 bits only: download the crtdll library from - http://www.winimage.com/zLibDll/crtdll.zip - Unzip crtdll.zip to extract crtdll.lib on contrib\vstudio\vc8. -- Open contrib\vstudio\vc8\zlibvc.sln with Microsoft Visual C++ 2005 - -Build instructions for Visual Studio 2005 64 bits, PSDK compiler ----------------------------------------------------------------- -at the time of writing this text file, Visual Studio 2005 (and - Microsoft Visual C++ 8.0) is on the beta 2 stage. -Using you can get the free 64 bits compiler from Platform SDK, - which is NOT a beta, and compile using the Visual studio 2005 IDE -see http://www.winimage.com/misc/sdk64onvs2005/ for instruction - -- Uncompress current zlib, including all contrib/* files -- start Visual Studio 2005 from a platform SDK command prompt, using - the /useenv switch -- Open contrib\vstudio\vc8\zlibvc.sln with Microsoft Visual C++ 2005 Build instructions for Visual Studio 2008 (32 bits or 64 bits) -------------------------------------------------------------- - Uncompress current zlib, including all contrib/* files -- For 32 bits only: download the crtdll library from - http://www.winimage.com/zLibDll/crtdll.zip - Unzip crtdll.zip to extract crtdll.lib on contrib\vstudio\vc9. - Open contrib\vstudio\vc9\zlibvc.sln with Microsoft Visual C++ 2008.0 +- Or run: vcbuild /rebuild contrib\vstudio\vc9\zlibvc.sln "Release|Win32" Build instructions for Visual Studio 2010 (32 bits or 64 bits) -------------------------------------------------------------- - Uncompress current zlib, including all contrib/* files -- For 32 bits only: download the crtdll library from - http://www.winimage.com/zLibDll/crtdll.zip - Unzip crtdll.zip to extract crtdll.lib on contrib\vstudio\vc10. - Open contrib\vstudio\vc10\zlibvc.sln with Microsoft Visual C++ 2010.0 diff --git a/contrib/vstudio/vc10/testzlib.vcxproj b/contrib/vstudio/vc10/testzlib.vcxproj index 9810412..9088d17 100644 --- a/contrib/vstudio/vc10/testzlib.vcxproj +++ b/contrib/vstudio/vc10/testzlib.vcxproj @@ -194,7 +194,7 @@ EditAndContinue - ..\..\masmx86\gvmat32.obj;..\..\masmx86\inffas32.obj;%(AdditionalDependencies) + ..\..\masmx86\match686.obj;..\..\masmx86\inffas32.obj;%(AdditionalDependencies) $(OutDir)testzlib.exe true $(OutDir)testzlib.pdb @@ -254,7 +254,7 @@ ProgramDatabase - ..\..\masmx86\gvmat32.obj;..\..\masmx86\inffas32.obj;%(AdditionalDependencies) + ..\..\masmx86\match686.obj;..\..\masmx86\inffas32.obj;%(AdditionalDependencies) $(OutDir)testzlib.exe true Console @@ -397,14 +397,6 @@ - - true - true - true - true - true - true - true diff --git a/contrib/vstudio/vc10/testzlib.vcxproj.filters b/contrib/vstudio/vc10/testzlib.vcxproj.filters index a0d9b23..249daa8 100644 --- a/contrib/vstudio/vc10/testzlib.vcxproj.filters +++ b/contrib/vstudio/vc10/testzlib.vcxproj.filters @@ -27,9 +27,6 @@ Source Files - - Source Files - Source Files diff --git a/contrib/vstudio/vc10/zlib.rc b/contrib/vstudio/vc10/zlib.rc index 72cb8b4..23802d8 100644 --- a/contrib/vstudio/vc10/zlib.rc +++ b/contrib/vstudio/vc10/zlib.rc @@ -2,8 +2,8 @@ #define IDR_VERSION1 1 IDR_VERSION1 VERSIONINFO MOVEABLE IMPURE LOADONCALL DISCARDABLE - FILEVERSION 1,2,3,0 - PRODUCTVERSION 1,2,3,0 + FILEVERSION 1,2,4,0 + PRODUCTVERSION 1,2,4,0 FILEFLAGSMASK VS_FFI_FILEFLAGSMASK FILEFLAGS 0 FILEOS VOS_DOS_WINDOWS32 @@ -16,13 +16,13 @@ BEGIN //language ID = U.S. English, char set = Windows, Multilingual BEGIN - VALUE "FileDescription", "zlib data compression library\0" - VALUE "FileVersion", "1.2.3.0\0" + VALUE "FileDescription", "zlib data compression and ZIP file I/O library\0" + VALUE "FileVersion", "1.2.4.0\0" VALUE "InternalName", "zlib\0" VALUE "OriginalFilename", "zlib.dll\0" VALUE "ProductName", "ZLib.DLL\0" VALUE "Comments","DLL support by Alessandro Iacopetti & Gilles Vollant\0" - VALUE "LegalCopyright", "(C) 1995-2003 Jean-loup Gailly & Mark Adler\0" + VALUE "LegalCopyright", "(C) 1995-2010 Jean-loup Gailly & Mark Adler\0" END END BLOCK "VarFileInfo" diff --git a/contrib/vstudio/vc10/zlibstat.vcxproj b/contrib/vstudio/vc10/zlibstat.vcxproj index fbf6c1b..2682fca 100644 --- a/contrib/vstudio/vc10/zlibstat.vcxproj +++ b/contrib/vstudio/vc10/zlibstat.vcxproj @@ -206,7 +206,7 @@ /MACHINE:X86 /NODEFAULTLIB %(AdditionalOptions) - ..\..\masmx86\gvmat32.obj;..\..\masmx86\inffas32.obj;%(AdditionalDependencies) + ..\..\masmx86\match686.obj;..\..\masmx86\inffas32.obj;%(AdditionalDependencies) $(OutDir)zlibstat.lib true @@ -422,16 +422,7 @@ - - true - true - true - true - true - true - - diff --git a/contrib/vstudio/vc10/zlibstat.vcxproj.filters b/contrib/vstudio/vc10/zlibstat.vcxproj.filters index f676c2d..c8c7f7e 100644 --- a/contrib/vstudio/vc10/zlibstat.vcxproj.filters +++ b/contrib/vstudio/vc10/zlibstat.vcxproj.filters @@ -18,15 +18,9 @@ Source Files - - Source Files - Source Files - - Source Files - Source Files diff --git a/contrib/vstudio/vc10/zlibvc.def b/contrib/vstudio/vc10/zlibvc.def index 0b6a9e9..b3b7cab 100644 --- a/contrib/vstudio/vc10/zlibvc.def +++ b/contrib/vstudio/vc10/zlibvc.def @@ -1,7 +1,7 @@ +LIBRARY +; zlib data compression and ZIP file I/O library -VERSION 1.23 - -HEAPSIZE 1048576,8192 +VERSION 1.24 EXPORTS adler32 @1 @@ -90,25 +90,47 @@ EXPORTS unzGoToFilePos @101 fill_win32_filefunc @110 - fill_win32_filefunc64 @111 - fill_win32_filefunc64A @112 - fill_win32_filefunc64W @113 -; quick hack by hkuno@microhouse.co.jp - unzOpen64 @120 - unzOpen2_64 @121 - unzGetGlobalInfo64 @122 - unzGetCurrentFileInfo64 @124 - unzGetCurrentFileZStreamPos64 @125 - unztell64 @126 - unzGetFilePos64 @127 - unzGoToFilePos64 @128 +; zlibwapi v1.2.4 added: + fill_win32_filefunc64 @111 + fill_win32_filefunc64A @112 + fill_win32_filefunc64W @113 + + unzOpen64 @120 + unzOpen2_64 @121 + unzGetGlobalInfo64 @122 + unzGetCurrentFileInfo64 @124 + unzGetCurrentFileZStreamPos64 @125 + unztell64 @126 + unzGetFilePos64 @127 + unzGoToFilePos64 @128 + + zipOpen64 @130 + zipOpen2_64 @131 + zipOpenNewFileInZip64 @132 + zipOpenNewFileInZip2_64 @133 + zipOpenNewFileInZip3_64 @134 + zipOpenNewFileInZip4_64 @135 + zipCloseFileInZipRaw64 @136 - zipOpen64 @130 - zipOpen2_64 @131 - zipOpenNewFileInZip64 @132 - zipOpenNewFileInZip2_64 @133 - zipOpenNewFileInZip3_64 @134 - zipOpenNewFileInZip4_64 @135 - zipCloseFileInZipRaw64 @136 -; end hack +; zlib1 v1.2.4 added: + adler32_combine @140 + adler32_combine64 @141 + crc32_combine @142 + crc32_combine64 @143 + deflateSetHeader @144 + deflateTune @145 + gzbuffer @146 + gzclose_r @147 + gzclose_w @148 + gzdirect @149 + gzoffset @150 + gzoffset64 @151 + gzopen64 @152 + gzseek64 @153 + gztell64 @154 + inflateGetHeader @156 + inflateMark @157 + inflatePrime @158 + inflateReset2 @159 + inflateUndermine @160 diff --git a/contrib/vstudio/vc10/zlibvc.sln b/contrib/vstudio/vc10/zlibvc.sln index 6d2ef64..6f6ffd5 100644 --- a/contrib/vstudio/vc10/zlibvc.sln +++ b/contrib/vstudio/vc10/zlibvc.sln @@ -36,8 +36,8 @@ Global {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|Itanium.Build.0 = Release|Itanium {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|Win32.ActiveCfg = Release|Win32 {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|Win32.Build.0 = Release|Win32 - {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|x64.ActiveCfg = ReleaseWithoutAsm|x64 - {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|x64.Build.0 = ReleaseWithoutAsm|x64 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|x64.ActiveCfg = Release|x64 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|x64.Build.0 = Release|x64 {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|Itanium.ActiveCfg = ReleaseWithoutAsm|Itanium {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|Itanium.Build.0 = ReleaseWithoutAsm|Itanium {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|Win32.ActiveCfg = ReleaseWithoutAsm|Win32 @@ -94,8 +94,8 @@ Global {C52F9E7B-498A-42BE-8DB4-85A15694366A}.Release|x64.Build.0 = Release|x64 {C52F9E7B-498A-42BE-8DB4-85A15694366A}.ReleaseWithoutAsm|Itanium.ActiveCfg = Release|Itanium {C52F9E7B-498A-42BE-8DB4-85A15694366A}.ReleaseWithoutAsm|Itanium.Build.0 = Release|Itanium - {C52F9E7B-498A-42BE-8DB4-85A15694366A}.ReleaseWithoutAsm|Win32.ActiveCfg = Release|Itanium - {C52F9E7B-498A-42BE-8DB4-85A15694366A}.ReleaseWithoutAsm|x64.ActiveCfg = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.ReleaseWithoutAsm|Win32.ActiveCfg = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694366A}.ReleaseWithoutAsm|x64.ActiveCfg = Release|x64 {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Debug|Itanium.ActiveCfg = Debug|Itanium {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Debug|Itanium.Build.0 = Debug|Itanium {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Debug|Win32.ActiveCfg = Debug|Win32 @@ -110,8 +110,8 @@ Global {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.Release|x64.Build.0 = Release|x64 {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|Itanium.ActiveCfg = Release|Itanium {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|Itanium.Build.0 = Release|Itanium - {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|Win32.ActiveCfg = Release|Itanium - {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|x64.ActiveCfg = Release|Itanium + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|Win32.ActiveCfg = Release|Win32 + {48CDD9DC-E09F-4135-9C0C-4FE50C3C654B}.ReleaseWithoutAsm|x64.ActiveCfg = Release|x64 {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|Itanium.ActiveCfg = Debug|Itanium {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|Itanium.Build.0 = Debug|Itanium {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|Win32.ActiveCfg = Debug|Win32 @@ -126,8 +126,8 @@ Global {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release|x64.Build.0 = Release|x64 {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|Itanium.ActiveCfg = Release|Itanium {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|Itanium.Build.0 = Release|Itanium - {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|Win32.ActiveCfg = Release|Itanium - {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|x64.ActiveCfg = Release|Itanium + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|Win32.ActiveCfg = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|x64.ActiveCfg = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/contrib/vstudio/vc10/zlibvc.vcxproj b/contrib/vstudio/vc10/zlibvc.vcxproj index e1067fa..9862398 100644 --- a/contrib/vstudio/vc10/zlibvc.vcxproj +++ b/contrib/vstudio/vc10/zlibvc.vcxproj @@ -213,7 +213,7 @@ /MACHINE:I386 %(AdditionalOptions) - ..\..\masmx86\gvmat32.obj;..\..\masmx86\inffas32.obj;%(AdditionalDependencies) + ..\..\masmx86\match686.obj;..\..\masmx86\inffas32.obj;%(AdditionalDependencies) $(OutDir)zlibwapi.dll true .\zlibvc.def @@ -291,7 +291,7 @@ true - MultiThreadedDLL + MultiThreaded false true $(IntDir)zlibvc.pch @@ -310,7 +310,7 @@ /MACHINE:I386 %(AdditionalOptions) - ..\..\masmx86\gvmat32.obj;..\..\masmx86\inffas32.obj;%(AdditionalDependencies) + ..\..\masmx86\match686.obj;..\..\masmx86\inffas32.obj;%(AdditionalDependencies) $(OutDir)zlibwapi.dll true false @@ -598,17 +598,7 @@ - - true - true - true - true - true - true - true - - diff --git a/contrib/vstudio/vc10/zlibvc.vcxproj.filters b/contrib/vstudio/vc10/zlibvc.vcxproj.filters index 7b595c4..180b71c 100644 --- a/contrib/vstudio/vc10/zlibvc.vcxproj.filters +++ b/contrib/vstudio/vc10/zlibvc.vcxproj.filters @@ -27,15 +27,9 @@ Source Files - - Source Files - Source Files - - Source Files - Source Files diff --git a/contrib/vstudio/vc7/zlibvc.def b/contrib/vstudio/vc7/zlibvc.def deleted file mode 100644 index 0b6a9e9..0000000 --- a/contrib/vstudio/vc7/zlibvc.def +++ /dev/null @@ -1,114 +0,0 @@ - -VERSION 1.23 - -HEAPSIZE 1048576,8192 - -EXPORTS - adler32 @1 - compress @2 - crc32 @3 - deflate @4 - deflateCopy @5 - deflateEnd @6 - deflateInit2_ @7 - deflateInit_ @8 - deflateParams @9 - deflateReset @10 - deflateSetDictionary @11 - gzclose @12 - gzdopen @13 - gzerror @14 - gzflush @15 - gzopen @16 - gzread @17 - gzwrite @18 - inflate @19 - inflateEnd @20 - inflateInit2_ @21 - inflateInit_ @22 - inflateReset @23 - inflateSetDictionary @24 - inflateSync @25 - uncompress @26 - zlibVersion @27 - gzprintf @28 - gzputc @29 - gzgetc @30 - gzseek @31 - gzrewind @32 - gztell @33 - gzeof @34 - gzsetparams @35 - zError @36 - inflateSyncPoint @37 - get_crc_table @38 - compress2 @39 - gzputs @40 - gzgets @41 - inflateCopy @42 - inflateBackInit_ @43 - inflateBack @44 - inflateBackEnd @45 - compressBound @46 - deflateBound @47 - gzclearerr @48 - gzungetc @49 - zlibCompileFlags @50 - deflatePrime @51 - - unzOpen @61 - unzClose @62 - unzGetGlobalInfo @63 - unzGetCurrentFileInfo @64 - unzGoToFirstFile @65 - unzGoToNextFile @66 - unzOpenCurrentFile @67 - unzReadCurrentFile @68 - unzOpenCurrentFile3 @69 - unztell @70 - unzeof @71 - unzCloseCurrentFile @72 - unzGetGlobalComment @73 - unzStringFileNameCompare @74 - unzLocateFile @75 - unzGetLocalExtrafield @76 - unzOpen2 @77 - unzOpenCurrentFile2 @78 - unzOpenCurrentFilePassword @79 - - zipOpen @80 - zipOpenNewFileInZip @81 - zipWriteInFileInZip @82 - zipCloseFileInZip @83 - zipClose @84 - zipOpenNewFileInZip2 @86 - zipCloseFileInZipRaw @87 - zipOpen2 @88 - zipOpenNewFileInZip3 @89 - - unzGetFilePos @100 - unzGoToFilePos @101 - - fill_win32_filefunc @110 - fill_win32_filefunc64 @111 - fill_win32_filefunc64A @112 - fill_win32_filefunc64W @113 - -; quick hack by hkuno@microhouse.co.jp - unzOpen64 @120 - unzOpen2_64 @121 - unzGetGlobalInfo64 @122 - unzGetCurrentFileInfo64 @124 - unzGetCurrentFileZStreamPos64 @125 - unztell64 @126 - unzGetFilePos64 @127 - unzGoToFilePos64 @128 - - zipOpen64 @130 - zipOpen2_64 @131 - zipOpenNewFileInZip64 @132 - zipOpenNewFileInZip2_64 @133 - zipOpenNewFileInZip3_64 @134 - zipOpenNewFileInZip4_64 @135 - zipCloseFileInZipRaw64 @136 -; end hack diff --git a/contrib/vstudio/vc8/zlibvc.def b/contrib/vstudio/vc8/zlibvc.def deleted file mode 100644 index 0b6a9e9..0000000 --- a/contrib/vstudio/vc8/zlibvc.def +++ /dev/null @@ -1,114 +0,0 @@ - -VERSION 1.23 - -HEAPSIZE 1048576,8192 - -EXPORTS - adler32 @1 - compress @2 - crc32 @3 - deflate @4 - deflateCopy @5 - deflateEnd @6 - deflateInit2_ @7 - deflateInit_ @8 - deflateParams @9 - deflateReset @10 - deflateSetDictionary @11 - gzclose @12 - gzdopen @13 - gzerror @14 - gzflush @15 - gzopen @16 - gzread @17 - gzwrite @18 - inflate @19 - inflateEnd @20 - inflateInit2_ @21 - inflateInit_ @22 - inflateReset @23 - inflateSetDictionary @24 - inflateSync @25 - uncompress @26 - zlibVersion @27 - gzprintf @28 - gzputc @29 - gzgetc @30 - gzseek @31 - gzrewind @32 - gztell @33 - gzeof @34 - gzsetparams @35 - zError @36 - inflateSyncPoint @37 - get_crc_table @38 - compress2 @39 - gzputs @40 - gzgets @41 - inflateCopy @42 - inflateBackInit_ @43 - inflateBack @44 - inflateBackEnd @45 - compressBound @46 - deflateBound @47 - gzclearerr @48 - gzungetc @49 - zlibCompileFlags @50 - deflatePrime @51 - - unzOpen @61 - unzClose @62 - unzGetGlobalInfo @63 - unzGetCurrentFileInfo @64 - unzGoToFirstFile @65 - unzGoToNextFile @66 - unzOpenCurrentFile @67 - unzReadCurrentFile @68 - unzOpenCurrentFile3 @69 - unztell @70 - unzeof @71 - unzCloseCurrentFile @72 - unzGetGlobalComment @73 - unzStringFileNameCompare @74 - unzLocateFile @75 - unzGetLocalExtrafield @76 - unzOpen2 @77 - unzOpenCurrentFile2 @78 - unzOpenCurrentFilePassword @79 - - zipOpen @80 - zipOpenNewFileInZip @81 - zipWriteInFileInZip @82 - zipCloseFileInZip @83 - zipClose @84 - zipOpenNewFileInZip2 @86 - zipCloseFileInZipRaw @87 - zipOpen2 @88 - zipOpenNewFileInZip3 @89 - - unzGetFilePos @100 - unzGoToFilePos @101 - - fill_win32_filefunc @110 - fill_win32_filefunc64 @111 - fill_win32_filefunc64A @112 - fill_win32_filefunc64W @113 - -; quick hack by hkuno@microhouse.co.jp - unzOpen64 @120 - unzOpen2_64 @121 - unzGetGlobalInfo64 @122 - unzGetCurrentFileInfo64 @124 - unzGetCurrentFileZStreamPos64 @125 - unztell64 @126 - unzGetFilePos64 @127 - unzGoToFilePos64 @128 - - zipOpen64 @130 - zipOpen2_64 @131 - zipOpenNewFileInZip64 @132 - zipOpenNewFileInZip2_64 @133 - zipOpenNewFileInZip3_64 @134 - zipOpenNewFileInZip4_64 @135 - zipCloseFileInZipRaw64 @136 -; end hack diff --git a/contrib/vstudio/vc9/testzlib.vcproj b/contrib/vstudio/vc9/testzlib.vcproj index 9ad07ae..9cb0bf8 100644 --- a/contrib/vstudio/vc9/testzlib.vcproj +++ b/contrib/vstudio/vc9/testzlib.vcproj @@ -1,7 +1,7 @@ - - - - @@ -614,7 +602,7 @@ AdditionalIncludeDirectories="..\..\.." PreprocessorDefinitions="ASMV;ASMINF;WIN32;ZLIB_WINAPI;NDEBUG;_CONSOLE;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS" BasicRuntimeChecks="0" - RuntimeLibrary="2" + RuntimeLibrary="0" BufferSecurityCheck="false" AssemblerListingLocation="$(IntDir)\" /> @@ -650,9 +638,6 @@ - @@ -738,9 +723,6 @@ - @@ -769,82 +751,6 @@ RelativePath="..\..\..\deflate.c" > - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -861,7 +767,7 @@ /> @@ -168,9 +168,9 @@ /> @@ -241,9 +242,9 @@ /> @@ -315,9 +316,9 @@ /> @@ -389,9 +391,9 @@ /> @@ -464,9 +465,9 @@ /> @@ -708,64 +708,12 @@ RelativePath="..\..\..\deflate.c" > - - - - - - - - - - - - - - - - - - - - @@ -80,7 +81,7 @@ - - - - @@ -810,9 +799,6 @@ - @@ -911,9 +897,6 @@ - @@ -942,72 +925,12 @@ RelativePath="..\..\..\deflate.c" > - - - - - - - - - - - - - - - - - - - - - - -