From: miettinen Date: Tue, 20 May 2014 09:21:21 +0000 (+0000) Subject: Updated the FMUSolution to produce x64 dll's (refs #4892). X-Git-Tag: 1.8.1~54 X-Git-Url: https://gerrit.simantics.org/r/gitweb?a=commitdiff_plain;h=5277653c1ddc415ccef6e5acbda8a93029c0b3d7;p=simantics%2Fsysdyn.git Updated the FMUSolution to produce x64 dll's (refs #4892). git-svn-id: https://www.simantics.org/svn/simantics/sysdyn/trunk@29519 ac1ea38d-2e2b-0410-8846-a27921b304fc --- diff --git a/org.simantics.fmu/FMUSolution/FMUSimulator/FMUSimulator.vcxproj b/org.simantics.fmu/FMUSolution/FMUSimulator/FMUSimulator.vcxproj index a1c71e6f..774e3690 100644 --- a/org.simantics.fmu/FMUSolution/FMUSimulator/FMUSimulator.vcxproj +++ b/org.simantics.fmu/FMUSolution/FMUSimulator/FMUSimulator.vcxproj @@ -5,10 +5,18 @@ Debug Win32 + + Debug + x64 + Release Win32 + + Release + x64 + {9838038D-09A3-43A5-AB97-B5B5C763DF43} @@ -21,21 +29,38 @@ true NotSet + + DynamicLibrary + true + NotSet + DynamicLibrary false false NotSet + + DynamicLibrary + false + false + NotSet + + + + + + + false @@ -43,12 +68,24 @@ $(OutDir);$(ProjectDir)include;$(LibraryPath) $(SourcePath) + + false + $(SolutionDir)zlib-1.2.6\contrib\minizip;$(OutDir);$(ProjectDir)include;$(IncludePath) + $(OutDir);$(ProjectDir)include;$(LibraryPath) + $(SourcePath) + false $(SolutionDir)zlib-1.2.6\contrib\minizip;$(OutDir);$(ProjectDir)include;$(IncludePath) $(OutDir);$(ProjectDir)include;$(LibraryPath) $(SourcePath) + + false + $(SolutionDir)zlib-1.2.6\contrib\minizip;$(OutDir);$(ProjectDir)include;$(IncludePath) + $(OutDir);$(ProjectDir)include;$(LibraryPath) + $(SourcePath) + NotUsing @@ -63,6 +100,20 @@ $(OutDir)zlibwapi.lib;$(OutDir)miniunz.lib;%(AdditionalDependencies) + + + NotUsing + Level3 + Disabled + WIN32;_DEBUG;_WINDOWS;_USRDLL;FMUSIMULATOR_EXPORTS;%(PreprocessorDefinitions) + MultiThreadedDebug + + + Windows + true + $(OutDir)zlibwapi.lib;$(OutDir)miniunz.lib;%(AdditionalDependencies) + + Level3 @@ -84,6 +135,31 @@ $(OutDir)zlibwapi.lib;$(OutDir)miniunz.lib;%(AdditionalDependencies) + + + Level3 + NotUsing + MaxSpeed + + + false + WIN32;NDEBUG;_WINDOWS;_USRDLL;FMUSIMULATOR_EXPORTS;%(PreprocessorDefinitions) + MultiThreaded + + + + + Windows + true + true + true + $(OutDir)zlibwapi.lib;$(OutDir)miniunz.lib;%(AdditionalDependencies) + + + + + + @@ -99,6 +175,7 @@ + diff --git a/org.simantics.fmu/FMUSolution/FMUSimulator/FMUSimulator.vcxproj.filters b/org.simantics.fmu/FMUSolution/FMUSimulator/FMUSimulator.vcxproj.filters index a819ab7d..33d44c10 100644 --- a/org.simantics.fmu/FMUSolution/FMUSimulator/FMUSimulator.vcxproj.filters +++ b/org.simantics.fmu/FMUSolution/FMUSimulator/FMUSimulator.vcxproj.filters @@ -52,5 +52,8 @@ Resource Files + + Resource Files + \ No newline at end of file diff --git a/org.simantics.fmu/FMUSolution/FMUSimulator/include/expat.lib b/org.simantics.fmu/FMUSolution/FMUSimulator/include/expat.lib new file mode 100644 index 00000000..08ee7506 Binary files /dev/null and b/org.simantics.fmu/FMUSolution/FMUSimulator/include/expat.lib differ diff --git a/org.simantics.fmu/FMUSolution/FMUSolution.sln b/org.simantics.fmu/FMUSolution/FMUSolution.sln index f3f21f80..2360dc11 100644 --- a/org.simantics.fmu/FMUSolution/FMUSolution.sln +++ b/org.simantics.fmu/FMUSolution/FMUSolution.sln @@ -1,6 +1,6 @@  Microsoft Visual Studio Solution File, Format Version 11.00 -# Visual C++ Express 2010 +# Visual Studio 2010 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FMUSimulator", "FMUSimulator\FMUSimulator.vcxproj", "{9838038D-09A3-43A5-AB97-B5B5C763DF43}" ProjectSection(ProjectDependencies) = postProject {C52F9E7B-498A-42BE-8DB4-85A15694382A} = {C52F9E7B-498A-42BE-8DB4-85A15694382A} @@ -17,28 +17,49 @@ EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 Release|Win32 = Release|Win32 + Release|x64 = Release|x64 ReleaseWithoutAsm|Win32 = ReleaseWithoutAsm|Win32 + ReleaseWithoutAsm|x64 = ReleaseWithoutAsm|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {9838038D-09A3-43A5-AB97-B5B5C763DF43}.Debug|Win32.ActiveCfg = Debug|Win32 {9838038D-09A3-43A5-AB97-B5B5C763DF43}.Debug|Win32.Build.0 = Debug|Win32 + {9838038D-09A3-43A5-AB97-B5B5C763DF43}.Debug|x64.ActiveCfg = Debug|x64 + {9838038D-09A3-43A5-AB97-B5B5C763DF43}.Debug|x64.Build.0 = Debug|x64 {9838038D-09A3-43A5-AB97-B5B5C763DF43}.Release|Win32.ActiveCfg = Release|Win32 {9838038D-09A3-43A5-AB97-B5B5C763DF43}.Release|Win32.Build.0 = Release|Win32 + {9838038D-09A3-43A5-AB97-B5B5C763DF43}.Release|x64.ActiveCfg = Release|x64 + {9838038D-09A3-43A5-AB97-B5B5C763DF43}.Release|x64.Build.0 = Release|x64 {9838038D-09A3-43A5-AB97-B5B5C763DF43}.ReleaseWithoutAsm|Win32.ActiveCfg = Release|Win32 {9838038D-09A3-43A5-AB97-B5B5C763DF43}.ReleaseWithoutAsm|Win32.Build.0 = Release|Win32 + {9838038D-09A3-43A5-AB97-B5B5C763DF43}.ReleaseWithoutAsm|x64.ActiveCfg = Release|x64 + {9838038D-09A3-43A5-AB97-B5B5C763DF43}.ReleaseWithoutAsm|x64.Build.0 = Release|x64 {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|Win32.ActiveCfg = Debug|Win32 {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|Win32.Build.0 = Debug|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|x64.ActiveCfg = Debug|x64 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Debug|x64.Build.0 = Debug|x64 {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release|Win32.ActiveCfg = Release|Win32 {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release|Win32.Build.0 = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release|x64.ActiveCfg = Release|x64 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.Release|x64.Build.0 = Release|x64 {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|Win32.ActiveCfg = Release|Win32 {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|Win32.Build.0 = Release|Win32 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|x64.ActiveCfg = Release|x64 + {C52F9E7B-498A-42BE-8DB4-85A15694382A}.ReleaseWithoutAsm|x64.Build.0 = Release|x64 {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Debug|Win32.ActiveCfg = Debug|Win32 {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Debug|Win32.Build.0 = Debug|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Debug|x64.ActiveCfg = Debug|x64 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Debug|x64.Build.0 = Debug|x64 {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|Win32.ActiveCfg = ReleaseWithoutAsm|Win32 {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|Win32.Build.0 = ReleaseWithoutAsm|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|x64.ActiveCfg = Release|x64 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.Release|x64.Build.0 = Release|x64 {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|Win32.ActiveCfg = ReleaseWithoutAsm|Win32 {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|Win32.Build.0 = ReleaseWithoutAsm|Win32 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|x64.ActiveCfg = ReleaseWithoutAsm|x64 + {8FD826F8-3739-44E6-8CC8-997122E53B8D}.ReleaseWithoutAsm|x64.Build.0 = ReleaseWithoutAsm|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/org.simantics.fmu/FMUSolution/x64/Release/libexpat-1.dll b/org.simantics.fmu/FMUSolution/x64/Release/libexpat-1.dll new file mode 100644 index 00000000..0f8b01be Binary files /dev/null and b/org.simantics.fmu/FMUSolution/x64/Release/libexpat-1.dll differ diff --git a/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/masmx64/gvmat64.lst b/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/masmx64/gvmat64.lst new file mode 100644 index 00000000..aa5e476c --- /dev/null +++ b/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/masmx64/gvmat64.lst @@ -0,0 +1,666 @@ +Microsoft (R) Macro Assembler (x64) Version 10.00.40219.01 05/20/14 09:52:26 +gvmat64.asm Page 1 - 1 + + + ;uInt longest_match_x64( + ; deflate_state *s, + ; IPos cur_match); /* current match */ + + ; gvmat64.asm -- Asm portion of the optimized longest_match for 32 bits x86_64 + ; (AMD64 on Athlon 64, Opteron, Phenom + ; and Intel EM64T on Pentium 4 with EM64T, Pentium D, Core 2 Duo, Core I5/I7) + ; Copyright (C) 1995-2010 Jean-loup Gailly, Brian Raiter and Gilles Vollant. + ; + ; File written by Gilles Vollant, by converting to assembly the longest_match + ; from Jean-loup Gailly in deflate.c of zLib and infoZip zip. + ; + ; and by taking inspiration on asm686 with masm, optimised assembly code + ; from Brian Raiter, written 1998 + ; + ; This software is provided 'as-is', without any express or implied + ; warranty. In no event will the authors be held liable for any damages + ; arising from the use of this software. + ; + ; Permission is granted to anyone to use this software for any purpose, + ; including commercial applications, and to alter it and redistribute it + ; freely, subject to the following restrictions: + ; + ; 1. The origin of this software must not be misrepresented; you must not + ; claim that you wrote the original software. If you use this software + ; in a product, an acknowledgment in the product documentation would be + ; appreciated but is not required. + ; 2. Altered source versions must be plainly marked as such, and must not be + ; misrepresented as being the original software + ; 3. This notice may not be removed or altered from any source distribution. + ; + ; + ; + ; http://www.zlib.net + ; http://www.winimage.com/zLibDll + ; http://www.muppetlabs.com/~breadbox/software/assembly.html + ; + ; to compile this file for infozip Zip, I use option: + ; ml64.exe /Flgvmat64 /c /Zi /DINFOZIP gvmat64.asm + ; + ; to compile this file for zLib, I use option: + ; ml64.exe /Flgvmat64 /c /Zi gvmat64.asm + ; Be carrefull to adapt zlib1222add below to your version of zLib + ; (if you use a version of zLib before 1.0.4 or after 1.2.2.2, change + ; value of zlib1222add later) + ; + ; This file compile with Microsoft Macro Assembler (x64) for AMD64 + ; + ; ml64.exe is given with Visual Studio 2005/2008/2010 and Windows WDK + ; + ; (you can get Windows WDK with ml64 for AMD64 from + ; http://www.microsoft.com/whdc/Devtools/wdk/default.mspx for low price) + ; + + + ;uInt longest_match(s, cur_match) + ; deflate_state *s; + ; IPos cur_match; /* current match */ + 00000000 .code + 00000000 longest_match PROC + + + ;LocalVarsSize equ 88 + = 00000048 LocalVarsSize equ 72 + + ; register used : rax,rbx,rcx,rdx,rsi,rdi,r8,r9,r10,r11,r12 + ; free register : r14,r15 + ; register can be saved : rsp + + = rsp + 8 - LocalVarsSize chainlenwmask equ rsp + 8 - LocalVarsSize ; high word: current chain len + ; low word: s->wmask + ;window equ rsp + xx - LocalVarsSize ; local copy of s->window ; stored in r10 + ;windowbestlen equ rsp + xx - LocalVarsSize ; s->window + bestlen , use r10+r11 + ;scanstart equ rsp + xx - LocalVarsSize ; first two bytes of string ; stored in r12w + ;scanend equ rsp + xx - LocalVarsSize ; last two bytes of string use ebx + ;scanalign equ rsp + xx - LocalVarsSize ; dword-misalignment of string r13 + ;bestlen equ rsp + xx - LocalVarsSize ; size of best match so far -> r11d + ;scan equ rsp + xx - LocalVarsSize ; ptr to string wanting match -> r9 + IFDEF INFOZIP + ELSE + = (rsp + 16 - LocalVarsSiz nicematch equ (rsp + 16 - LocalVarsSize) ; a good enough match size + e) + ENDIF + + = rsp + 24 - LocalVarsSize save_rdi equ rsp + 24 - LocalVarsSize + = rsp + 32 - LocalVarsSize save_rsi equ rsp + 32 - LocalVarsSize + = rsp + 40 - LocalVarsSize save_rbx equ rsp + 40 - LocalVarsSize + = rsp + 48 - LocalVarsSize save_rbp equ rsp + 48 - LocalVarsSize + = rsp + 56 - LocalVarsSize save_r12 equ rsp + 56 - LocalVarsSize + = rsp + 64 - LocalVarsSize save_r13 equ rsp + 64 - LocalVarsSize + ;save_r14 equ rsp + 72 - LocalVarsSize + ;save_r15 equ rsp + 80 - LocalVarsSize + + + ; summary of register usage + ; scanend ebx + ; scanendw bx + ; chainlenwmask edx + ; curmatch rsi + ; curmatchd esi + ; windowbestlen r8 + ; scanalign r9 + ; scanalignd r9d + ; window r10 + ; bestlen r11 + ; bestlend r11d + ; scanstart r12d + ; scanstartw r12w + ; scan r13 + ; nicematch r14d + ; limit r15 + ; limitd r15d + ; prev rcx + + ; all the +4 offsets are due to the addition of pending_buf_size (in zlib + ; in the deflate_state structure since the asm code was first written + ; (if you compile with zlib 1.0.4 or older, remove the +4). + ; Note : these value are good with a 8 bytes boundary pack structure + + + = 00000102 MAX_MATCH equ 258 + = 00000003 MIN_MATCH equ 3 + = 00000106 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) + + + ;;; Offsets for fields in the deflate_state structure. These numbers + ;;; are calculated from the definition of deflate_state, with the + ;;; assumption that the compiler will dword-align the fields. (Thus, + ;;; changing the definition of deflate_state could easily cause this + ;;; program to crash horribly, without so much as a warning at + ;;; compile time. Sigh.) + + ; all the +zlib1222add offsets are due to the addition of fields + ; in zlib in the deflate_state structure since the asm code was first written + ; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). + ; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). + ; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). + + + IFDEF INFOZIP + ELSE + + IFNDEF zlib1222add + = 00000008 zlib1222add equ 8 + ENDIF + = 00000044 dsWSize equ 56+zlib1222add+(zlib1222add/2) + = 0000004C dsWMask equ 64+zlib1222add+(zlib1222add/2) + = 00000050 dsWindow equ 72+zlib1222add + = 00000060 dsPrev equ 88+zlib1222add + = 00000088 dsMatchLen equ 128+zlib1222add + = 0000008C dsPrevMatch equ 132+zlib1222add + = 00000094 dsStrStart equ 140+zlib1222add + = 00000098 dsMatchStart equ 144+zlib1222add + = 0000009C dsLookahead equ 148+zlib1222add + = 000000A0 dsPrevLen equ 152+zlib1222add + = 000000A4 dsMaxChainLen equ 156+zlib1222add + = 000000B4 dsGoodMatch equ 172+zlib1222add + = 000000B8 dsNiceMatch equ 176+zlib1222add + + = [ rcx + dsWSize] window_size equ [ rcx + dsWSize] + = [ rcx + dsWMask] WMask equ [ rcx + dsWMask] + = [ rcx + dsWindow] window_ad equ [ rcx + dsWindow] + = [ rcx + dsPrev] prev_ad equ [ rcx + dsPrev] + = [ rcx + dsStrStart] strstart equ [ rcx + dsStrStart] + = [ rcx + dsMatchStart] match_start equ [ rcx + dsMatchStart] + = [ rcx + dsLookahead] Lookahead equ [ rcx + dsLookahead] ; 0ffffffffh on infozip + = [ rcx + dsPrevLen] prev_length equ [ rcx + dsPrevLen] + = [ rcx + dsMaxChainLen] max_chain_length equ [ rcx + dsMaxChainLen] + = [ rcx + dsGoodMatch] good_match equ [ rcx + dsGoodMatch] + = [ rcx + dsNiceMatch] nice_match equ [ rcx + dsNiceMatch] + ENDIF + + ; parameter 1 in r8(deflate state s), param 2 in rdx (cur match) + + ; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and + ; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp + ; + ; All registers must be preserved across the call, except for + ; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch. + + + + ;;; Save registers that the compiler may be using, and adjust esp to + ;;; make room for our stack frame. + + + ;;; Retrieve the function arguments. r8d will hold cur_match + ;;; throughout the entire function. edx will hold the pointer to the + ;;; deflate_state structure during the function's setup (before + ;;; entering the main loop. + + ; parameter 1 in rcx (deflate_state* s), param 2 in edx -> r8 (cur match) + + ; this clear high 32 bits of r8, which can be garbage in both r8 and rdx + + 00000000 48/ 89 7C 24 mov [save_rdi],rdi + D0 + 00000005 48/ 89 74 24 mov [save_rsi],rsi + D8 + 0000000A 48/ 89 5C 24 mov [save_rbx],rbx + E0 + 0000000F 48/ 89 6C 24 mov [save_rbp],rbp + E8 + IFDEF INFOZIP + ELSE + 00000014 44/ 8B C2 mov r8d,edx + ENDIF + 00000017 4C/ 89 64 24 mov [save_r12],r12 + F0 + 0000001C 4C/ 89 6C 24 mov [save_r13],r13 + F8 + ; mov [save_r14],r14 + ; mov [save_r15],r15 + + + ;;; uInt wmask = s->w_mask; + ;;; unsigned chain_length = s->max_chain_length; + ;;; if (s->prev_length >= s->good_match) { + ;;; chain_length >>= 2; + ;;; } + + 00000021 8B B9 000000A0 mov edi, prev_length + 00000027 8B B1 000000B4 mov esi, good_match + 0000002D 8B 41 4C mov eax, WMask + 00000030 8B 99 000000A4 mov ebx, max_chain_length + 00000036 3B FE cmp edi, esi + 00000038 7C 03 jl LastMatchGood + 0000003A C1 EB 02 shr ebx, 2 + 0000003D LastMatchGood: + + ;;; chainlen is decremented once beforehand so that the function can + ;;; use the sign flag instead of the zero flag for the exit test. + ;;; It is then shifted into the high word, to make room for the wmask + ;;; value, which it will always accompany. + + 0000003D FF CB dec ebx + 0000003F C1 E3 10 shl ebx, 16 + 00000042 0B D8 or ebx, eax + + ;;; on zlib only + ;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + IFDEF INFOZIP + ELSE + 00000044 8B 81 000000B8 mov eax, nice_match + 0000004A 89 5C 24 C0 mov [chainlenwmask], ebx + 0000004E 44/ 8B 91 mov r10d, Lookahead + 0000009C + 00000055 44/ 3B D0 cmp r10d, eax + 00000058 44/ 0F 4D D0 cmovnl r10d, eax + 0000005C 44/ 89 54 24 mov [nicematch],r10d + C8 + ENDIF + + ;;; register Bytef *scan = s->window + s->strstart; + 00000061 4C/ 8B 51 50 mov r10, window_ad + 00000065 8B A9 00000094 mov ebp, strstart + 0000006B 4E/ 8D 6C 15 lea r13, [r10 + rbp] + 00 + + ;;; Determine how many bytes the scan ptr is off from being + ;;; dword-aligned. + + 00000070 4D/ 8B CD mov r9,r13 + 00000073 49/ F7 DD neg r13 + 00000076 49/ 83 E5 03 and r13,3 + + ;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + ;;; s->strstart - (IPos)MAX_DIST(s) : NIL; + IFDEF INFOZIP + ELSE + 0000007A 8B 41 44 mov eax, window_size + 0000007D 2D 00000106 sub eax, MIN_LOOKAHEAD + ENDIF + 00000082 33 FF xor edi,edi + 00000084 2B E8 sub ebp, eax + + 00000086 44/ 8B 99 mov r11d, prev_length + 000000A0 + + 0000008D 0F 4E EF cmovng ebp,edi + + ;;; int best_len = s->prev_length; + + + ;;; Store the sum of s->window + best_len in esi locally, and in esi. + + 00000090 4B/ 8D 34 13 lea rsi,[r10+r11] + + ;;; register ush scan_start = *(ushf*)scan; + ;;; register ush scan_end = *(ushf*)(scan+best_len-1); + ;;; Posf *prev = s->prev; + + 00000094 45/ 0F B7 21 movzx r12d,word ptr [r9] + 00000098 43/ 0F B7 5C 0B movzx ebx, word ptr [r9 + r11 - 1] + FF + + 0000009E 48/ 8B 79 60 mov rdi, prev_ad + + ;;; Jump into the main loop. + + 000000A2 8B 54 24 C0 mov edx, [chainlenwmask] + + 000000A6 66| 41/ 3B 5C 30 cmp bx,word ptr [rsi + r8 - 1] + FF + 000000AC 0F 84 0000009A jz LookupLoopIsZero + + 000000B2 LookupLoop1: + 000000B2 44/ 23 C2 and r8d, edx + + 000000B5 46/ 0F B7 04 47 movzx r8d, word ptr [rdi + r8*2] + 000000BA 44/ 3B C5 cmp r8d, ebp + 000000BD 0F 86 00000170 jbe LeaveNow + 000000C3 81 EA 00010000 sub edx, 00010000h + 000000C9 0F 88 00000164 js LeaveNow + + 000000CF LoopEntry1: + 000000CF 66| 41/ 3B 5C 30 cmp bx,word ptr [rsi + r8 - 1] + FF + 000000D5 74 75 jz LookupLoopIsZero + + 000000D7 LookupLoop2: + 000000D7 44/ 23 C2 and r8d, edx + + 000000DA 46/ 0F B7 04 47 movzx r8d, word ptr [rdi + r8*2] + 000000DF 44/ 3B C5 cmp r8d, ebp + 000000E2 0F 86 0000014B jbe LeaveNow + 000000E8 81 EA 00010000 sub edx, 00010000h + 000000EE 0F 88 0000013F js LeaveNow + + 000000F4 LoopEntry2: + 000000F4 66| 41/ 3B 5C 30 cmp bx,word ptr [rsi + r8 - 1] + FF + 000000FA 74 50 jz LookupLoopIsZero + + 000000FC LookupLoop4: + 000000FC 44/ 23 C2 and r8d, edx + + 000000FF 46/ 0F B7 04 47 movzx r8d, word ptr [rdi + r8*2] + 00000104 44/ 3B C5 cmp r8d, ebp + 00000107 0F 86 00000126 jbe LeaveNow + 0000010D 81 EA 00010000 sub edx, 00010000h + 00000113 0F 88 0000011A js LeaveNow + + 00000119 LoopEntry4: + + 00000119 66| 41/ 3B 5C 30 cmp bx,word ptr [rsi + r8 - 1] + FF + 0000011F 75 91 jnz LookupLoop1 + 00000121 EB 29 jmp LookupLoopIsZero + + + ;;; do { + ;;; match = s->window + cur_match; + ;;; if (*(ushf*)(match+best_len-1) != scan_end || + ;;; *(ushf*)match != scan_start) continue; + ;;; [...] + ;;; } while ((cur_match = prev[cur_match & wmask]) > limit + ;;; && --chain_length != 0); + ;;; + ;;; Here is the inner loop of the function. The function will spend the + ;;; majority of its time in this loop, and majority of that time will + ;;; be spent in the first ten instructions. + ;;; + ;;; Within this loop: + ;;; ebx = scanend + ;;; r8d = curmatch + ;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) + ;;; esi = windowbestlen - i.e., (window + bestlen) + ;;; edi = prev + ;;; ebp = limit + + 00000123 LookupLoop: + 00000123 44/ 23 C2 and r8d, edx + + 00000126 46/ 0F B7 04 47 movzx r8d, word ptr [rdi + r8*2] + 0000012B 44/ 3B C5 cmp r8d, ebp + 0000012E 0F 86 000000FF jbe LeaveNow + 00000134 81 EA 00010000 sub edx, 00010000h + 0000013A 0F 88 000000F3 js LeaveNow + + 00000140 LoopEntry: + + 00000140 66| 41/ 3B 5C 30 cmp bx,word ptr [rsi + r8 - 1] + FF + 00000146 0F 85 FFFFFF66 jnz LookupLoop1 + 0000014C LookupLoopIsZero: + 0000014C 66| 47/ 3B 24 10 cmp r12w, word ptr [r10 + r8] + 00000151 0F 85 FFFFFF5B jnz LookupLoop1 + + + ;;; Store the current value of chainlen. + 00000157 89 54 24 C0 mov [chainlenwmask], edx + + ;;; Point edi to the string under scrutiny, and esi to the string we + ;;; are hoping to match it up with. In actuality, esi and edi are + ;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is + ;;; initialized to -(MAX_MATCH_8 - scanalign). + + 0000015B 4B/ 8D 34 02 lea rsi,[r8+r10] + 0000015F 48/ BA mov rdx, 0fffffffffffffef8h; -(MAX_MATCH_8) + FFFFFFFFFFFFFEF8 + 00000169 49/ 8D B4 35 lea rsi, [rsi + r13 + 0108h] ;MAX_MATCH_8] + 00000108 + 00000171 4B/ 8D BC 0D lea rdi, [r9 + r13 + 0108h] ;MAX_MATCH_8] + 00000108 + + 00000179 0F 18 14 32 prefetcht1 [rsi+rdx] + 0000017D 0F 18 14 3A prefetcht1 [rdi+rdx] + + + ;;; Test the strings for equality, 8 bytes at a time. At the end, + ;;; adjust rdx so that it is offset to the exact byte that mismatched. + ;;; + ;;; We already know at this point that the first three bytes of the + ;;; strings match each other, and they can be safely passed over before + ;;; starting the compare loop. So what this code does is skip over 0-3 + ;;; bytes, as much as necessary in order to dword-align the edi + ;;; pointer. (rsi will still be misaligned three times out of four.) + ;;; + ;;; It should be confessed that this loop usually does not represent + ;;; much of the total running time. Replacing it with a more + ;;; straightforward "rep cmpsb" would not drastically degrade + ;;; performance. + + + 00000181 LoopCmps: + 00000181 48/ 8B 04 32 mov rax, [rsi + rdx] + 00000185 48/ 33 04 3A xor rax, [rdi + rdx] + 00000189 75 28 jnz LeaveLoopCmps + + 0000018B 48/ 8B 44 32 mov rax, [rsi + rdx + 8] + 08 + 00000190 48/ 33 44 3A xor rax, [rdi + rdx + 8] + 08 + 00000195 75 18 jnz LeaveLoopCmps8 + + + 00000197 48/ 8B 44 32 mov rax, [rsi + rdx + 8+8] + 10 + 0000019C 48/ 33 44 3A xor rax, [rdi + rdx + 8+8] + 10 + 000001A1 75 08 jnz LeaveLoopCmps16 + + 000001A3 48/ 83 C2 18 add rdx,8+8+8 + + 000001A7 75 D8 jnz short LoopCmps + 000001A9 EB 7B jmp short LenMaximum + 000001AB 48/ 83 C2 08 LeaveLoopCmps16: add rdx,8 + 000001AF 48/ 83 C2 08 LeaveLoopCmps8: add rdx,8 + 000001B3 LeaveLoopCmps: + + 000001B3 A9 0000FFFF test eax, 0000FFFFh + 000001B8 75 1B jnz LenLower + + 000001BA A9 FFFFFFFF test eax,0ffffffffh + + 000001BF 75 0D jnz LenLower32 + + 000001C1 48/ 83 C2 04 add rdx,4 + 000001C5 48/ C1 E8 20 shr rax,32 + 000001C9 66| 0B C0 or ax,ax + 000001CC 75 07 jnz LenLower + + 000001CE LenLower32: + 000001CE C1 E8 10 shr eax,16 + 000001D1 48/ 83 C2 02 add rdx,2 + 000001D5 2C 01 LenLower: sub al, 1 + 000001D7 48/ 83 D2 00 adc rdx, 0 + ;;; Calculate the length of the match. If it is longer than MAX_MATCH, + ;;; then automatically accept it as the best possible match and leave. + + 000001DB 48/ 8D 04 3A lea rax, [rdi + rdx] + 000001DF 49/ 2B C1 sub rax, r9 + 000001E2 3D 00000102 cmp eax, MAX_MATCH + 000001E7 7D 3D jge LenMaximum + + ;;; If the length of the match is not longer than the best match we + ;;; have so far, then forget it and return to the lookup loop. + ;/////////////////////////////////// + + 000001E9 41/ 3B C3 cmp eax, r11d + 000001EC 7F 11 jg LongerMatch + + 000001EE 4B/ 8D 34 13 lea rsi,[r10+r11] + + 000001F2 48/ 8B 79 60 mov rdi, prev_ad + 000001F6 8B 54 24 C0 mov edx, [chainlenwmask] + 000001FA E9 FFFFFF24 jmp LookupLoop + + ;;; s->match_start = cur_match; + ;;; best_len = len; + ;;; if (len >= nice_match) break; + ;;; scan_end = *(ushf*)(scan+best_len-1); + + 000001FF LongerMatch: + 000001FF 44/ 8B D8 mov r11d, eax + 00000202 44/ 89 81 mov match_start, r8d + 00000098 + 00000209 3B 44 24 C8 cmp eax, [nicematch] + 0000020D 7D 24 jge LeaveNow + + 0000020F 4A/ 8D 34 10 lea rsi,[r10+rax] + + 00000213 42/ 0F B7 5C 08 movzx ebx, word ptr [r9 + rax - 1] + FF + 00000219 48/ 8B 79 60 mov rdi, prev_ad + 0000021D 8B 54 24 C0 mov edx, [chainlenwmask] + 00000221 E9 FFFFFEFD jmp LookupLoop + + ;;; Accept the current string, with the maximum possible length. + + 00000226 LenMaximum: + 00000226 41/ BB mov r11d,MAX_MATCH + 00000102 + 0000022C 44/ 89 81 mov match_start, r8d + 00000098 + + ;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; + ;;; return s->lookahead; + + 00000233 LeaveNow: + IFDEF INFOZIP + ELSE + 00000233 8B 81 0000009C mov eax, Lookahead + 00000239 44/ 3B D8 cmp r11d, eax + 0000023C 41/ 0F 4E C3 cmovng eax, r11d + ENDIF + + ;;; Restore the stack and return from whence we came. + + + 00000240 48/ 8B 74 24 mov rsi,[save_rsi] + D8 + 00000245 48/ 8B 7C 24 mov rdi,[save_rdi] + D0 + 0000024A 48/ 8B 5C 24 mov rbx,[save_rbx] + E0 + 0000024F 48/ 8B 6C 24 mov rbp,[save_rbp] + E8 + 00000254 4C/ 8B 64 24 mov r12,[save_r12] + F0 + 00000259 4C/ 8B 6C 24 mov r13,[save_r13] + F8 + ; mov r14,[save_r14] + ; mov r15,[save_r15] + + + 0000025E C3 ret 0 + ; please don't remove this string ! + ; Your can freely use gvmat64 in any free or commercial app + ; but it is far better don't remove the string in the binary! + 0000025F 0D 0A 61 73 6D db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998, converted to amd 64 by Gilles Vollant 2005",0dh,0ah,0 + 36 38 36 20 77 + 69 74 68 20 6D + 61 73 6D 2C 20 + 6F 70 74 69 6D + 69 73 65 64 20 + 61 73 73 65 6D + 62 6C 79 20 63 + 6F 64 65 20 66 + 72 6F 6D 20 42 + 72 69 61 6E 20 + 52 61 69 74 65 + 72 2C 20 77 72 + 69 74 74 65 6E + 20 31 39 39 38 + 2C 20 63 6F 6E + 76 65 72 74 65 + 64 20 74 6F 20 + 61 6D 64 20 36 + 34 20 62 79 20 + 47 69 6C 6C 65 + 73 20 56 6F 6C + 6C 61 6E 74 20 + 32 30 30 35 0D + 0A 00 + 000002D9 longest_match ENDP + + 000002D9 match_init PROC + 000002D9 C3 ret 0 + 000002DA match_init ENDP + + + END + Microsoft (R) Macro Assembler (x64) Version 10.00.40219.01 05/20/14 09:52:26 +gvmat64.asm Symbols 2 - 1 + + + + +Procedures, parameters, and locals: + + N a m e Type Value Attr + +longest_match . . . . . . . . . P 00000000 _TEXT Length= 000002D9 Public + LastMatchGood . . . . . . . . L 0000003D _TEXT + LookupLoop1 . . . . . . . . . L 000000B2 _TEXT + LoopEntry1 . . . . . . . . . . L 000000CF _TEXT + LookupLoop2 . . . . . . . . . L 000000D7 _TEXT + LoopEntry2 . . . . . . . . . . L 000000F4 _TEXT + LookupLoop4 . . . . . . . . . L 000000FC _TEXT + LoopEntry4 . . . . . . . . . . L 00000119 _TEXT + LookupLoop . . . . . . . . . . L 00000123 _TEXT + LoopEntry . . . . . . . . . . L 00000140 _TEXT + LookupLoopIsZero . . . . . . . L 0000014C _TEXT + LoopCmps . . . . . . . . . . . L 00000181 _TEXT + LeaveLoopCmps16 . . . . . . . L 000001AB _TEXT + LeaveLoopCmps8 . . . . . . . . L 000001AF _TEXT + LeaveLoopCmps . . . . . . . . L 000001B3 _TEXT + LenLower32 . . . . . . . . . . L 000001CE _TEXT + LenLower . . . . . . . . . . . L 000001D5 _TEXT + LongerMatch . . . . . . . . . L 000001FF _TEXT + LenMaximum . . . . . . . . . . L 00000226 _TEXT + LeaveNow . . . . . . . . . . . L 00000233 _TEXT +match_init . . . . . . . . . . . P 000002D9 _TEXT Length= 00000001 Public + + +Symbols: + + N a m e Type Value Attr + +LocalVarsSize . . . . . . . . . Number 00000048h +Lookahead . . . . . . . . . . . Text [ rcx + dsLookahead] +MAX_MATCH . . . . . . . . . . . Number 00000102h +MIN_LOOKAHEAD . . . . . . . . . Number 00000106h +MIN_MATCH . . . . . . . . . . . Number 00000003h +WMask . . . . . . . . . . . . . Text [ rcx + dsWMask] +chainlenwmask . . . . . . . . . Text rsp + 8 - LocalVarsSize +dsGoodMatch . . . . . . . . . . Number 000000B4h +dsLookahead . . . . . . . . . . Number 0000009Ch +dsMatchLen . . . . . . . . . . . Number 00000088h +dsMatchStart . . . . . . . . . . Number 00000098h +dsMaxChainLen . . . . . . . . . Number 000000A4h +dsNiceMatch . . . . . . . . . . Number 000000B8h +dsPrevLen . . . . . . . . . . . Number 000000A0h +dsPrevMatch . . . . . . . . . . Number 0000008Ch +dsPrev . . . . . . . . . . . . . Number 00000060h +dsStrStart . . . . . . . . . . . Number 00000094h +dsWMask . . . . . . . . . . . . Number 0000004Ch +dsWSize . . . . . . . . . . . . Number 00000044h +dsWindow . . . . . . . . . . . . Number 00000050h +good_match . . . . . . . . . . . Text [ rcx + dsGoodMatch] +match_start . . . . . . . . . . Text [ rcx + dsMatchStart] +max_chain_length . . . . . . . . Text [ rcx + dsMaxChainLen] +nice_match . . . . . . . . . . . Text [ rcx + dsNiceMatch] +nicematch . . . . . . . . . . . Text (rsp + 16 - LocalVarsSize) +prev_ad . . . . . . . . . . . . Text [ rcx + dsPrev] +prev_length . . . . . . . . . . Text [ rcx + dsPrevLen] +save_r12 . . . . . . . . . . . . Text rsp + 56 - LocalVarsSize +save_r13 . . . . . . . . . . . . Text rsp + 64 - LocalVarsSize +save_rbp . . . . . . . . . . . . Text rsp + 48 - LocalVarsSize +save_rbx . . . . . . . . . . . . Text rsp + 40 - LocalVarsSize +save_rdi . . . . . . . . . . . . Text rsp + 24 - LocalVarsSize +save_rsi . . . . . . . . . . . . Text rsp + 32 - LocalVarsSize +strstart . . . . . . . . . . . . Text [ rcx + dsStrStart] +window_ad . . . . . . . . . . . Text [ rcx + dsWindow] +window_size . . . . . . . . . . Text [ rcx + dsWSize] +zlib1222add . . . . . . . . . . Number 00000008h + + 0 Warnings + 0 Errors diff --git a/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/masmx64/gvmat64.obj b/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/masmx64/gvmat64.obj new file mode 100644 index 00000000..dc25f151 Binary files /dev/null and b/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/masmx64/gvmat64.obj differ diff --git a/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/masmx64/inffasx64.lst b/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/masmx64/inffasx64.lst new file mode 100644 index 00000000..53b89a64 --- /dev/null +++ b/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/masmx64/inffasx64.lst @@ -0,0 +1,488 @@ +Microsoft (R) Macro Assembler (x64) Version 10.00.40219.01 05/20/14 09:52:26 +inffasx64.asm Page 1 - 1 + + + ; inffasx64.asm is a hand tuned assembler version of inffast.c - fast decoding + ; version for AMD64 on Windows using Microsoft C compiler + ; + ; inffasx64.asm is automatically convert from AMD64 portion of inffas86.c + ; inffasx64.asm is called by inffas8664.c, which contain more info. + + + ; to compile this file, I use option + ; ml64.exe /Flinffasx64 /c /Zi inffasx64.asm + ; with Microsoft Macro Assembler (x64) for AMD64 + ; + + ; This file compile with Microsoft Macro Assembler (x64) for AMD64 + ; + ; ml64.exe is given with Visual Studio 2005/2008/2010 and Windows WDK + ; + ; (you can get Windows WDK with ml64 for AMD64 from + ; http://www.microsoft.com/whdc/Devtools/wdk/default.mspx for low price) + ; + + + 00000000 .code + 00000000 inffas8664fnc PROC + + ; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and + ; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp + ; + ; All registers must be preserved across the call, except for + ; rax, rcx, rdx, r8, r-9, r10, and r11, which are scratch. + + + 00000000 48/ 89 74 24 mov [rsp-8],rsi + F8 + 00000005 48/ 89 7C 24 mov [rsp-16],rdi + F0 + 0000000A 4C/ 89 64 24 mov [rsp-24],r12 + E8 + 0000000F 4C/ 89 6C 24 mov [rsp-32],r13 + E0 + 00000014 4C/ 89 74 24 mov [rsp-40],r14 + D8 + 00000019 4C/ 89 7C 24 mov [rsp-48],r15 + D0 + 0000001E 48/ 89 5C 24 mov [rsp-56],rbx + C8 + + 00000023 48/ 8B C1 mov rax,rcx + + 00000026 48/ 89 68 08 mov [rax+8], rbp ; /* save regs rbp and rsp */ + 0000002A 48/ 89 20 mov [rax], rsp + + 0000002D 48/ 8B E0 mov rsp, rax ; /* make rsp point to &ar */ + + 00000030 48/ 8B 74 24 mov rsi, [rsp+16] ; /* rsi = in */ + 10 + 00000035 48/ 8B 7C 24 mov rdi, [rsp+32] ; /* rdi = out */ + 20 + 0000003A 4C/ 8B 4C 24 mov r9, [rsp+24] ; /* r9 = last */ + 18 + 0000003F 4C/ 8B 54 24 mov r10, [rsp+48] ; /* r10 = end */ + 30 + 00000044 48/ 8B 6C 24 mov rbp, [rsp+64] ; /* rbp = lcode */ + 40 + 00000049 4C/ 8B 5C 24 mov r11, [rsp+72] ; /* r11 = dcode */ + 48 + 0000004E 48/ 8B 54 24 mov rdx, [rsp+80] ; /* rdx = hold */ + 50 + 00000053 8B 5C 24 58 mov ebx, [rsp+88] ; /* ebx = bits */ + 00000057 44/ 8B 64 24 mov r12d, [rsp+100] ; /* r12d = lmask */ + 64 + 0000005C 44/ 8B 6C 24 mov r13d, [rsp+104] ; /* r13d = dmask */ + 68 + ; /* r14d = len */ + ; /* r15d = dist */ + + + 00000061 FC cld + 00000062 4C/ 3B D7 cmp r10, rdi + 00000065 74 05 je L_one_time ; /* if only one decode left */ + 00000067 4C/ 3B CE cmp r9, rsi + + 0000006A 75 2A jne L_do_loop + + + 0000006C L_one_time: + 0000006C 4D/ 8B C4 mov r8, r12 ; /* r8 = lmask */ + 0000006F 80 FB 20 cmp bl, 32 + 00000072 77 50 ja L_get_length_code_one_time + + 00000074 AD lodsd ; /* eax = *(uint *)in++ */ + 00000075 8A CB mov cl, bl ; /* cl = bits, needs it for shifting */ + 00000077 80 C3 20 add bl, 32 ; /* bits += 32 */ + 0000007A 48/ D3 E0 shl rax, cl + 0000007D 48/ 0B D0 or rdx, rax ; /* hold |= *((uint *)in)++ << bits */ + 00000080 EB 42 jmp L_get_length_code_one_time + + ALIGN 4 + 00000084 L_while_test: + 00000084 4C/ 3B D7 cmp r10, rdi + 00000087 0F 86 0000023F jbe L_break_loop + 0000008D 4C/ 3B CE cmp r9, rsi + 00000090 0F 86 00000236 jbe L_break_loop + + 00000096 L_do_loop: + 00000096 4D/ 8B C4 mov r8, r12 ; /* r8 = lmask */ + 00000099 80 FB 20 cmp bl, 32 + 0000009C 77 0C ja L_get_length_code ; /* if (32 < bits) */ + + 0000009E AD lodsd ; /* eax = *(uint *)in++ */ + 0000009F 8A CB mov cl, bl ; /* cl = bits, needs it for shifting */ + 000000A1 80 C3 20 add bl, 32 ; /* bits += 32 */ + 000000A4 48/ D3 E0 shl rax, cl + 000000A7 48/ 0B D0 or rdx, rax ; /* hold |= *((uint *)in)++ << bits */ + + 000000AA L_get_length_code: + 000000AA 4C/ 23 C2 and r8, rdx ; /* r8 &= hold */ + 000000AD 42/ 8B 44 85 mov eax, [rbp+r8*4] ; /* eax = lcode[hold & lmask] */ + 00 + + 000000B2 8A CC mov cl, ah ; /* cl = this.bits */ + 000000B4 2A DC sub bl, ah ; /* bits -= this.bits */ + 000000B6 48/ D3 EA shr rdx, cl ; /* hold >>= this.bits */ + + 000000B9 84 C0 test al, al + 000000BB 75 23 jnz L_test_for_length_base ; /* if (op != 0) 45.7% */ + + 000000BD 4D/ 8B C4 mov r8, r12 ; /* r8 = lmask */ + 000000C0 C1 E8 10 shr eax, 16 ; /* output this.val char */ + 000000C3 AA stosb + + 000000C4 L_get_length_code_one_time: + 000000C4 4C/ 23 C2 and r8, rdx ; /* r8 &= hold */ + 000000C7 42/ 8B 44 85 mov eax, [rbp+r8*4] ; /* eax = lcode[hold & lmask] */ + 00 + + 000000CC L_dolen: + 000000CC 8A CC mov cl, ah ; /* cl = this.bits */ + 000000CE 2A DC sub bl, ah ; /* bits -= this.bits */ + 000000D0 48/ D3 EA shr rdx, cl ; /* hold >>= this.bits */ + + 000000D3 84 C0 test al, al + 000000D5 75 09 jnz L_test_for_length_base ; /* if (op != 0) 45.7% */ + + 000000D7 C1 E8 10 shr eax, 16 ; /* output this.val char */ + 000000DA AA stosb + 000000DB EB A7 jmp L_while_test + + ALIGN 4 + 000000E0 L_test_for_length_base: + 000000E0 44/ 8B F0 mov r14d, eax ; /* len = this */ + 000000E3 41/ C1 EE 10 shr r14d, 16 ; /* len = this.val */ + 000000E7 8A C8 mov cl, al + + 000000E9 A8 10 test al, 16 + 000000EB 0F 84 000000C7 jz L_test_for_second_level_length ; /* if ((op & 16) == 0) 8% */ + 000000F1 80 E1 0F and cl, 15 ; /* op &= 15 */ + 000000F4 74 12 jz L_decode_distance ; /* if (!op) */ + + 000000F6 L_add_bits_to_len: + 000000F6 2A D9 sub bl, cl + 000000F8 33 C0 xor eax, eax + 000000FA FF C0 inc eax + 000000FC D3 E0 shl eax, cl + 000000FE FF C8 dec eax + 00000100 23 C2 and eax, edx ; /* eax &= hold */ + 00000102 48/ D3 EA shr rdx, cl + 00000105 44/ 03 F0 add r14d, eax ; /* len += hold & mask[op] */ + + 00000108 L_decode_distance: + 00000108 4D/ 8B C5 mov r8, r13 ; /* r8 = dmask */ + 0000010B 80 FB 20 cmp bl, 32 + 0000010E 77 0C ja L_get_distance_code ; /* if (32 < bits) */ + + 00000110 AD lodsd ; /* eax = *(uint *)in++ */ + 00000111 8A CB mov cl, bl ; /* cl = bits, needs it for shifting */ + 00000113 80 C3 20 add bl, 32 ; /* bits += 32 */ + 00000116 48/ D3 E0 shl rax, cl + 00000119 48/ 0B D0 or rdx, rax ; /* hold |= *((uint *)in)++ << bits */ + + 0000011C L_get_distance_code: + 0000011C 4C/ 23 C2 and r8, rdx ; /* r8 &= hold */ + 0000011F 43/ 8B 04 83 mov eax, [r11+r8*4] ; /* eax = dcode[hold & dmask] */ + + 00000123 L_dodist: + 00000123 44/ 8B F8 mov r15d, eax ; /* dist = this */ + 00000126 41/ C1 EF 10 shr r15d, 16 ; /* dist = this.val */ + 0000012A 8A CC mov cl, ah + 0000012C 2A DC sub bl, ah ; /* bits -= this.bits */ + 0000012E 48/ D3 EA shr rdx, cl ; /* hold >>= this.bits */ + 00000131 8A C8 mov cl, al ; /* cl = this.op */ + + 00000133 A8 10 test al, 16 ; /* if ((op & 16) == 0) */ + 00000135 0F 84 0000009D jz L_test_for_second_level_dist + 0000013B 80 E1 0F and cl, 15 ; /* op &= 15 */ + 0000013E 74 50 jz L_check_dist_one + + 00000140 L_add_bits_to_dist: + 00000140 2A D9 sub bl, cl + 00000142 33 C0 xor eax, eax + 00000144 FF C0 inc eax + 00000146 D3 E0 shl eax, cl + 00000148 FF C8 dec eax ; /* (1 << op) - 1 */ + 0000014A 23 C2 and eax, edx ; /* eax &= hold */ + 0000014C 48/ D3 EA shr rdx, cl + 0000014F 44/ 03 F8 add r15d, eax ; /* dist += hold & ((1 << op) - 1) */ + + 00000152 L_check_window: + 00000152 4C/ 8B C6 mov r8, rsi ; /* save in so from can use it's reg */ + 00000155 48/ 8B C7 mov rax, rdi + 00000158 48/ 2B 44 24 sub rax, [rsp+40] ; /* nbytes = out - beg */ + 28 + + 0000015D 41/ 3B C7 cmp eax, r15d + 00000160 0F 82 00000092 jb L_clip_window ; /* if (dist > nbytes) 4.2% */ + + 00000166 41/ 8B CE mov ecx, r14d ; /* ecx = len */ + 00000169 48/ 8B F7 mov rsi, rdi + 0000016C 49/ 2B F7 sub rsi, r15 ; /* from = out - dist */ + + 0000016F D1 F9 sar ecx, 1 + 00000171 73 12 jnc L_copy_two ; /* if len % 2 == 0 */ + + 00000173 F3/ 66| A5 rep movsw + 00000176 8A 06 mov al, [rsi] + 00000178 88 07 mov [rdi], al + 0000017A 48/ FF C7 inc rdi + + 0000017D 49/ 8B F0 mov rsi, r8 ; /* move in back to %rsi, toss from */ + 00000180 E9 FFFFFEFF jmp L_while_test + + 00000185 L_copy_two: + 00000185 F3/ 66| A5 rep movsw + 00000188 49/ 8B F0 mov rsi, r8 ; /* move in back to %rsi, toss from */ + 0000018B E9 FFFFFEF4 jmp L_while_test + + ALIGN 4 + 00000190 L_check_dist_one: + 00000190 41/ 83 FF 01 cmp r15d, 1 ; /* if dist 1, is a memset */ + 00000194 75 BC jne L_check_window + 00000196 48/ 39 7C 24 cmp [rsp+40], rdi ; /* if out == beg, outside window */ + 28 + 0000019B 74 B5 je L_check_window + + 0000019D 41/ 8B CE mov ecx, r14d ; /* ecx = len */ + 000001A0 8A 47 FF mov al, [rdi-1] + 000001A3 8A E0 mov ah, al + + 000001A5 D1 F9 sar ecx, 1 + 000001A7 73 05 jnc L_set_two + 000001A9 88 07 mov [rdi], al + 000001AB 48/ FF C7 inc rdi + + 000001AE L_set_two: + 000001AE F3/ 66| AB rep stosw + 000001B1 E9 FFFFFECE jmp L_while_test + + ALIGN 4 + 000001B8 L_test_for_second_level_length: + 000001B8 A8 40 test al, 64 + 000001BA 0F 85 000000E0 jnz L_test_for_end_of_block ; /* if ((op & 64) != 0) */ + + 000001C0 33 C0 xor eax, eax + 000001C2 FF C0 inc eax + 000001C4 D3 E0 shl eax, cl + 000001C6 FF C8 dec eax + 000001C8 23 C2 and eax, edx ; /* eax &= hold */ + 000001CA 41/ 03 C6 add eax, r14d ; /* eax += len */ + 000001CD 8B 44 85 00 mov eax, [rbp+rax*4] ; /* eax = lcode[val+(hold&mask[op])]*/ + 000001D1 E9 FFFFFEF6 jmp L_dolen + + ALIGN 4 + 000001D8 L_test_for_second_level_dist: + 000001D8 A8 40 test al, 64 + 000001DA 0F 85 000000D8 jnz L_invalid_distance_code ; /* if ((op & 64) != 0) */ + + 000001E0 33 C0 xor eax, eax + 000001E2 FF C0 inc eax + 000001E4 D3 E0 shl eax, cl + 000001E6 FF C8 dec eax + 000001E8 23 C2 and eax, edx ; /* eax &= hold */ + 000001EA 41/ 03 C7 add eax, r15d ; /* eax += dist */ + 000001ED 41/ 8B 04 83 mov eax, [r11+rax*4] ; /* eax = dcode[val+(hold&mask[op])]*/ + 000001F1 E9 FFFFFF2D jmp L_dodist + + ALIGN 4 + 000001F8 L_clip_window: + 000001F8 8B C8 mov ecx, eax ; /* ecx = nbytes */ + 000001FA 8B 44 24 5C mov eax, [rsp+92] ; /* eax = wsize, prepare for dist cmp */ + 000001FE F7 D9 neg ecx ; /* nbytes = -nbytes */ + + 00000200 41/ 3B C7 cmp eax, r15d + 00000203 0F 82 000000B9 jb L_invalid_distance_too_far ; /* if (dist > wsize) */ + + 00000209 41/ 03 CF add ecx, r15d ; /* nbytes = dist - nbytes */ + 0000020C 83 7C 24 60 00 cmp dword ptr [rsp+96], 0 + 00000211 75 21 jne L_wrap_around_window ; /* if (write != 0) */ + + 00000213 48/ 8B 74 24 mov rsi, [rsp+56] ; /* from = window */ + 38 + 00000218 2B C1 sub eax, ecx ; /* eax -= nbytes */ + 0000021A 48/ 03 F0 add rsi, rax ; /* from += wsize - nbytes */ + + 0000021D 41/ 8B C6 mov eax, r14d ; /* eax = len */ + 00000220 44/ 3B F1 cmp r14d, ecx + 00000223 76 6F jbe L_do_copy ; /* if (nbytes >= len) */ + + 00000225 2B C1 sub eax, ecx ; /* eax -= nbytes */ + 00000227 F3/ A4 rep movsb + 00000229 48/ 8B F7 mov rsi, rdi + 0000022C 49/ 2B F7 sub rsi, r15 ; /* from = &out[ -dist ] */ + 0000022F EB 63 jmp L_do_copy + + ALIGN 4 + 00000234 L_wrap_around_window: + 00000234 8B 44 24 60 mov eax, [rsp+96] ; /* eax = write */ + 00000238 3B C8 cmp ecx, eax + 0000023A 76 38 jbe L_contiguous_in_window ; /* if (write >= nbytes) */ + + 0000023C 8B 74 24 5C mov esi, [rsp+92] ; /* from = wsize */ + 00000240 48/ 03 74 24 add rsi, [rsp+56] ; /* from += window */ + 38 + 00000245 48/ 03 F0 add rsi, rax ; /* from += write */ + 00000248 48/ 2B F1 sub rsi, rcx ; /* from -= nbytes */ + 0000024B 2B C8 sub ecx, eax ; /* nbytes -= write */ + + 0000024D 41/ 8B C6 mov eax, r14d ; /* eax = len */ + 00000250 3B C1 cmp eax, ecx + 00000252 76 40 jbe L_do_copy ; /* if (nbytes >= len) */ + + 00000254 2B C1 sub eax, ecx ; /* len -= nbytes */ + 00000256 F3/ A4 rep movsb + 00000258 48/ 8B 74 24 mov rsi, [rsp+56] ; /* from = window */ + 38 + 0000025D 8B 4C 24 60 mov ecx, [rsp+96] ; /* nbytes = write */ + 00000261 3B C1 cmp eax, ecx + 00000263 76 2F jbe L_do_copy ; /* if (nbytes >= len) */ + + 00000265 2B C1 sub eax, ecx ; /* len -= nbytes */ + 00000267 F3/ A4 rep movsb + 00000269 48/ 8B F7 mov rsi, rdi + 0000026C 49/ 2B F7 sub rsi, r15 ; /* from = out - dist */ + 0000026F EB 23 jmp L_do_copy + + ALIGN 4 + 00000274 L_contiguous_in_window: + 00000274 48/ 8B 74 24 mov rsi, [rsp+56] ; /* rsi = window */ + 38 + 00000279 48/ 03 F0 add rsi, rax + 0000027C 48/ 2B F1 sub rsi, rcx ; /* from += write - nbytes */ + + 0000027F 41/ 8B C6 mov eax, r14d ; /* eax = len */ + 00000282 3B C1 cmp eax, ecx + 00000284 76 0E jbe L_do_copy ; /* if (nbytes >= len) */ + + 00000286 2B C1 sub eax, ecx ; /* len -= nbytes */ + 00000288 F3/ A4 rep movsb + 0000028A 48/ 8B F7 mov rsi, rdi + 0000028D 49/ 2B F7 sub rsi, r15 ; /* from = out - dist */ + 00000290 EB 02 jmp L_do_copy ; /* if (nbytes >= len) */ + + ALIGN 4 + 00000294 L_do_copy: + 00000294 8B C8 mov ecx, eax ; /* ecx = len */ + 00000296 F3/ A4 rep movsb + + 00000298 49/ 8B F0 mov rsi, r8 ; /* move in back to %esi, toss from */ + 0000029B E9 FFFFFDE4 jmp L_while_test + + 000002A0 L_test_for_end_of_block: + 000002A0 A8 20 test al, 32 + 000002A2 74 0A jz L_invalid_literal_length_code + 000002A4 C7 44 24 74 mov dword ptr [rsp+116], 1 + 00000001 + 000002AC EB 26 jmp L_break_loop_with_status + + 000002AE L_invalid_literal_length_code: + 000002AE C7 44 24 74 mov dword ptr [rsp+116], 2 + 00000002 + 000002B6 EB 1C jmp L_break_loop_with_status + + 000002B8 L_invalid_distance_code: + 000002B8 C7 44 24 74 mov dword ptr [rsp+116], 3 + 00000003 + 000002C0 EB 12 jmp L_break_loop_with_status + + 000002C2 L_invalid_distance_too_far: + 000002C2 C7 44 24 74 mov dword ptr [rsp+116], 4 + 00000004 + 000002CA EB 08 jmp L_break_loop_with_status + + 000002CC L_break_loop: + 000002CC C7 44 24 74 mov dword ptr [rsp+116], 0 + 00000000 + + 000002D4 L_break_loop_with_status: + ; /* put in, out, bits, and hold back into ar and pop esp */ + 000002D4 48/ 89 74 24 mov [rsp+16], rsi ; /* in */ + 10 + 000002D9 48/ 89 7C 24 mov [rsp+32], rdi ; /* out */ + 20 + 000002DE 89 5C 24 58 mov [rsp+88], ebx ; /* bits */ + 000002E2 48/ 89 54 24 mov [rsp+80], rdx ; /* hold */ + 50 + + 000002E7 48/ 8B 04 24 mov rax, [rsp] ; /* restore rbp and rsp */ + 000002EB 48/ 8B 6C 24 mov rbp, [rsp+8] + 08 + 000002F0 48/ 8B E0 mov rsp, rax + + + + 000002F3 48/ 8B 74 24 mov rsi,[rsp-8] + F8 + 000002F8 48/ 8B 7C 24 mov rdi,[rsp-16] + F0 + 000002FD 4C/ 8B 64 24 mov r12,[rsp-24] + E8 + 00000302 4C/ 8B 6C 24 mov r13,[rsp-32] + E0 + 00000307 4C/ 8B 74 24 mov r14,[rsp-40] + D8 + 0000030C 4C/ 8B 7C 24 mov r15,[rsp-48] + D0 + 00000311 48/ 8B 5C 24 mov rbx,[rsp-56] + C8 + + 00000316 C3 ret 0 + ; : + ; : "m" (ar) + ; : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", + ; "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" + ; ); + + 00000317 inffas8664fnc ENDP + ;_TEXT ENDS + END + Microsoft (R) Macro Assembler (x64) Version 10.00.40219.01 05/20/14 09:52:26 +inffasx64.asm Symbols 2 - 1 + + + + +Procedures, parameters, and locals: + + N a m e Type Value Attr + +inffas8664fnc . . . . . . . . . P 00000000 _TEXT Length= 00000317 Public + L_one_time . . . . . . . . . . L 0000006C _TEXT + L_while_test . . . . . . . . . L 00000084 _TEXT + L_do_loop . . . . . . . . . . L 00000096 _TEXT + L_get_length_code . . . . . . L 000000AA _TEXT + L_get_length_code_one_time . . L 000000C4 _TEXT + L_dolen . . . . . . . . . . . L 000000CC _TEXT + L_test_for_length_base . . . . L 000000E0 _TEXT + L_add_bits_to_len . . . . . . L 000000F6 _TEXT + L_decode_distance . . . . . . L 00000108 _TEXT + L_get_distance_code . . . . . L 0000011C _TEXT + L_dodist . . . . . . . . . . . L 00000123 _TEXT + L_add_bits_to_dist . . . . . . L 00000140 _TEXT + L_check_window . . . . . . . . L 00000152 _TEXT + L_copy_two . . . . . . . . . . L 00000185 _TEXT + L_check_dist_one . . . . . . . L 00000190 _TEXT + L_set_two . . . . . . . . . . L 000001AE _TEXT + L_test_for_second_level_length . L 000001B8 _TEXT + L_test_for_second_level_dist . L 000001D8 _TEXT + L_clip_window . . . . . . . . L 000001F8 _TEXT + L_wrap_around_window . . . . . L 00000234 _TEXT + L_contiguous_in_window . . . . L 00000274 _TEXT + L_do_copy . . . . . . . . . . L 00000294 _TEXT + L_test_for_end_of_block . . . L 000002A0 _TEXT + L_invalid_literal_length_code L 000002AE _TEXT + L_invalid_distance_code . . . L 000002B8 _TEXT + L_invalid_distance_too_far . . L 000002C2 _TEXT + L_break_loop . . . . . . . . . L 000002CC _TEXT + L_break_loop_with_status . . . L 000002D4 _TEXT + + +Symbols: + + N a m e Type Value Attr + + + 0 Warnings + 0 Errors diff --git a/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/masmx64/inffasx64.obj b/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/masmx64/inffasx64.obj new file mode 100644 index 00000000..7136ecc9 Binary files /dev/null and b/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/masmx64/inffasx64.obj differ diff --git a/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/vstudio/vc10/miniunz.vcxproj b/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/vstudio/vc10/miniunz.vcxproj index c34cd847..79732060 100644 --- a/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/vstudio/vc10/miniunz.vcxproj +++ b/org.simantics.fmu/FMUSolution/zlib-1.2.6/contrib/vstudio/vc10/miniunz.vcxproj @@ -48,7 +48,7 @@ MultiByte - Application + DynamicLibrary MultiByte @@ -95,7 +95,7 @@ ia64\MiniUnzip$(Configuration)\Tmp\ true false - x64\MiniUnzip$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ x64\MiniUnzip$(Configuration)\Tmp\ false false @@ -252,10 +252,10 @@ OnlyExplicitInline true ..\..\..;..\..\minizip;%(AdditionalIncludeDirectories) - _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE;ZLIB_WINAPI;NDEBUG;_CONSOLE;WIN64;%(PreprocessorDefinitions) + _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE;ZLIB_WINAPI;NDEBUG;_CONSOLE;WIN64;EXTRACT_DLL;%(PreprocessorDefinitions) true Default - MultiThreadedDLL + MultiThreaded false true @@ -266,7 +266,7 @@ x64\ZlibDllRelease\zlibwapi.lib;%(AdditionalDependencies) - $(OutDir)miniunz.exe + $(OutDir)$(TargetName)$(TargetExt) true Console true