mirror of
https://github.com/VCMP-SqMod/SqMod.git
synced 2025-07-24 17:51:47 +02:00
bin
cmake
module
Base
Core
Entity
Library
Misc
Vendor
AES256
B64
CivetWeb
ConcurrentQueue
Fmt
Hash
MDBC
cmake
examples
include
libmariadb
mariadb_config
plugins
win
win-iconv
zlib
amiga
contrib
ada
amd64
asm686
blast
delphi
dotzlib
gcc_gvmat64
infback9
inflate86
iostream
iostream2
iostream3
masmx64
bld_ml64.bat
gvmat64.asm
inffas8664.c
inffasx64.asm
readme.txt
masmx86
minizip
pascal
puff
testzlib
untgz
vstudio
README.contrib
doc
examples
msdos
nintendods
old
os400
qnx
test
watcom
win32
CMakeLists.txt
ChangeLog
FAQ
INDEX
Makefile
Makefile.in
README
adler32.c
compress.c
configure
crc32.c
crc32.h
deflate.c
deflate.h
gzclose.c
gzguts.h
gzlib.c
gzread.c
gzwrite.c
infback.c
inffast.c
inffast.h
inffixed.h
inflate.c
inflate.h
inftrees.c
inftrees.h
make_vms.com
treebuild.xml
trees.c
trees.h
uncompr.c
zconf.h.cmakein
zconf.h.in
zconf.h.included
zlib.3
zlib.3.pdf
zlib.h
zlib.map
zlib.pc.cmakein
zlib.pc.in
zlib2ansi
zutil.c
zutil.h
CMakeLists.txt
COPYING.LIB
README
MaxmindDB
PUGIXML
SQLite
SimpleIni
SimpleSocket
TinyDir
Whirlpool
CMakeLists.txt
CMakeLists.txt
Core.cpp
Core.hpp
Logger.cpp
Logger.hpp
Main.cpp
Register.cpp
SqBase.hpp
sdk
.gitignore
.gitmodules
CMakeLists.txt
LICENSE
README.md
397 lines
9.9 KiB
NASM
397 lines
9.9 KiB
NASM
; inffasx64.asm is a hand tuned assembler version of inffast.c - fast decoding
|
|
; version for AMD64 on Windows using Microsoft C compiler
|
|
;
|
|
; inffasx64.asm is automatically convert from AMD64 portion of inffas86.c
|
|
; inffasx64.asm is called by inffas8664.c, which contain more info.
|
|
|
|
|
|
; to compile this file, I use option
|
|
; ml64.exe /Flinffasx64 /c /Zi inffasx64.asm
|
|
; with Microsoft Macro Assembler (x64) for AMD64
|
|
;
|
|
|
|
; This file compile with Microsoft Macro Assembler (x64) for AMD64
|
|
;
|
|
; ml64.exe is given with Visual Studio 2005/2008/2010 and Windows WDK
|
|
;
|
|
; (you can get Windows WDK with ml64 for AMD64 from
|
|
; http://www.microsoft.com/whdc/Devtools/wdk/default.mspx for low price)
|
|
;
|
|
|
|
|
|
.code
|
|
inffas8664fnc PROC
|
|
|
|
; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and
|
|
; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp
|
|
;
|
|
; All registers must be preserved across the call, except for
|
|
; rax, rcx, rdx, r8, r-9, r10, and r11, which are scratch.
|
|
|
|
|
|
mov [rsp-8],rsi
|
|
mov [rsp-16],rdi
|
|
mov [rsp-24],r12
|
|
mov [rsp-32],r13
|
|
mov [rsp-40],r14
|
|
mov [rsp-48],r15
|
|
mov [rsp-56],rbx
|
|
|
|
mov rax,rcx
|
|
|
|
mov [rax+8], rbp ; /* save regs rbp and rsp */
|
|
mov [rax], rsp
|
|
|
|
mov rsp, rax ; /* make rsp point to &ar */
|
|
|
|
mov rsi, [rsp+16] ; /* rsi = in */
|
|
mov rdi, [rsp+32] ; /* rdi = out */
|
|
mov r9, [rsp+24] ; /* r9 = last */
|
|
mov r10, [rsp+48] ; /* r10 = end */
|
|
mov rbp, [rsp+64] ; /* rbp = lcode */
|
|
mov r11, [rsp+72] ; /* r11 = dcode */
|
|
mov rdx, [rsp+80] ; /* rdx = hold */
|
|
mov ebx, [rsp+88] ; /* ebx = bits */
|
|
mov r12d, [rsp+100] ; /* r12d = lmask */
|
|
mov r13d, [rsp+104] ; /* r13d = dmask */
|
|
; /* r14d = len */
|
|
; /* r15d = dist */
|
|
|
|
|
|
cld
|
|
cmp r10, rdi
|
|
je L_one_time ; /* if only one decode left */
|
|
cmp r9, rsi
|
|
|
|
jne L_do_loop
|
|
|
|
|
|
L_one_time:
|
|
mov r8, r12 ; /* r8 = lmask */
|
|
cmp bl, 32
|
|
ja L_get_length_code_one_time
|
|
|
|
lodsd ; /* eax = *(uint *)in++ */
|
|
mov cl, bl ; /* cl = bits, needs it for shifting */
|
|
add bl, 32 ; /* bits += 32 */
|
|
shl rax, cl
|
|
or rdx, rax ; /* hold |= *((uint *)in)++ << bits */
|
|
jmp L_get_length_code_one_time
|
|
|
|
ALIGN 4
|
|
L_while_test:
|
|
cmp r10, rdi
|
|
jbe L_break_loop
|
|
cmp r9, rsi
|
|
jbe L_break_loop
|
|
|
|
L_do_loop:
|
|
mov r8, r12 ; /* r8 = lmask */
|
|
cmp bl, 32
|
|
ja L_get_length_code ; /* if (32 < bits) */
|
|
|
|
lodsd ; /* eax = *(uint *)in++ */
|
|
mov cl, bl ; /* cl = bits, needs it for shifting */
|
|
add bl, 32 ; /* bits += 32 */
|
|
shl rax, cl
|
|
or rdx, rax ; /* hold |= *((uint *)in)++ << bits */
|
|
|
|
L_get_length_code:
|
|
and r8, rdx ; /* r8 &= hold */
|
|
mov eax, [rbp+r8*4] ; /* eax = lcode[hold & lmask] */
|
|
|
|
mov cl, ah ; /* cl = this.bits */
|
|
sub bl, ah ; /* bits -= this.bits */
|
|
shr rdx, cl ; /* hold >>= this.bits */
|
|
|
|
test al, al
|
|
jnz L_test_for_length_base ; /* if (op != 0) 45.7% */
|
|
|
|
mov r8, r12 ; /* r8 = lmask */
|
|
shr eax, 16 ; /* output this.val char */
|
|
stosb
|
|
|
|
L_get_length_code_one_time:
|
|
and r8, rdx ; /* r8 &= hold */
|
|
mov eax, [rbp+r8*4] ; /* eax = lcode[hold & lmask] */
|
|
|
|
L_dolen:
|
|
mov cl, ah ; /* cl = this.bits */
|
|
sub bl, ah ; /* bits -= this.bits */
|
|
shr rdx, cl ; /* hold >>= this.bits */
|
|
|
|
test al, al
|
|
jnz L_test_for_length_base ; /* if (op != 0) 45.7% */
|
|
|
|
shr eax, 16 ; /* output this.val char */
|
|
stosb
|
|
jmp L_while_test
|
|
|
|
ALIGN 4
|
|
L_test_for_length_base:
|
|
mov r14d, eax ; /* len = this */
|
|
shr r14d, 16 ; /* len = this.val */
|
|
mov cl, al
|
|
|
|
test al, 16
|
|
jz L_test_for_second_level_length ; /* if ((op & 16) == 0) 8% */
|
|
and cl, 15 ; /* op &= 15 */
|
|
jz L_decode_distance ; /* if (!op) */
|
|
|
|
L_add_bits_to_len:
|
|
sub bl, cl
|
|
xor eax, eax
|
|
inc eax
|
|
shl eax, cl
|
|
dec eax
|
|
and eax, edx ; /* eax &= hold */
|
|
shr rdx, cl
|
|
add r14d, eax ; /* len += hold & mask[op] */
|
|
|
|
L_decode_distance:
|
|
mov r8, r13 ; /* r8 = dmask */
|
|
cmp bl, 32
|
|
ja L_get_distance_code ; /* if (32 < bits) */
|
|
|
|
lodsd ; /* eax = *(uint *)in++ */
|
|
mov cl, bl ; /* cl = bits, needs it for shifting */
|
|
add bl, 32 ; /* bits += 32 */
|
|
shl rax, cl
|
|
or rdx, rax ; /* hold |= *((uint *)in)++ << bits */
|
|
|
|
L_get_distance_code:
|
|
and r8, rdx ; /* r8 &= hold */
|
|
mov eax, [r11+r8*4] ; /* eax = dcode[hold & dmask] */
|
|
|
|
L_dodist:
|
|
mov r15d, eax ; /* dist = this */
|
|
shr r15d, 16 ; /* dist = this.val */
|
|
mov cl, ah
|
|
sub bl, ah ; /* bits -= this.bits */
|
|
shr rdx, cl ; /* hold >>= this.bits */
|
|
mov cl, al ; /* cl = this.op */
|
|
|
|
test al, 16 ; /* if ((op & 16) == 0) */
|
|
jz L_test_for_second_level_dist
|
|
and cl, 15 ; /* op &= 15 */
|
|
jz L_check_dist_one
|
|
|
|
L_add_bits_to_dist:
|
|
sub bl, cl
|
|
xor eax, eax
|
|
inc eax
|
|
shl eax, cl
|
|
dec eax ; /* (1 << op) - 1 */
|
|
and eax, edx ; /* eax &= hold */
|
|
shr rdx, cl
|
|
add r15d, eax ; /* dist += hold & ((1 << op) - 1) */
|
|
|
|
L_check_window:
|
|
mov r8, rsi ; /* save in so from can use it's reg */
|
|
mov rax, rdi
|
|
sub rax, [rsp+40] ; /* nbytes = out - beg */
|
|
|
|
cmp eax, r15d
|
|
jb L_clip_window ; /* if (dist > nbytes) 4.2% */
|
|
|
|
mov ecx, r14d ; /* ecx = len */
|
|
mov rsi, rdi
|
|
sub rsi, r15 ; /* from = out - dist */
|
|
|
|
sar ecx, 1
|
|
jnc L_copy_two ; /* if len % 2 == 0 */
|
|
|
|
rep movsw
|
|
mov al, [rsi]
|
|
mov [rdi], al
|
|
inc rdi
|
|
|
|
mov rsi, r8 ; /* move in back to %rsi, toss from */
|
|
jmp L_while_test
|
|
|
|
L_copy_two:
|
|
rep movsw
|
|
mov rsi, r8 ; /* move in back to %rsi, toss from */
|
|
jmp L_while_test
|
|
|
|
ALIGN 4
|
|
L_check_dist_one:
|
|
cmp r15d, 1 ; /* if dist 1, is a memset */
|
|
jne L_check_window
|
|
cmp [rsp+40], rdi ; /* if out == beg, outside window */
|
|
je L_check_window
|
|
|
|
mov ecx, r14d ; /* ecx = len */
|
|
mov al, [rdi-1]
|
|
mov ah, al
|
|
|
|
sar ecx, 1
|
|
jnc L_set_two
|
|
mov [rdi], al
|
|
inc rdi
|
|
|
|
L_set_two:
|
|
rep stosw
|
|
jmp L_while_test
|
|
|
|
ALIGN 4
|
|
L_test_for_second_level_length:
|
|
test al, 64
|
|
jnz L_test_for_end_of_block ; /* if ((op & 64) != 0) */
|
|
|
|
xor eax, eax
|
|
inc eax
|
|
shl eax, cl
|
|
dec eax
|
|
and eax, edx ; /* eax &= hold */
|
|
add eax, r14d ; /* eax += len */
|
|
mov eax, [rbp+rax*4] ; /* eax = lcode[val+(hold&mask[op])]*/
|
|
jmp L_dolen
|
|
|
|
ALIGN 4
|
|
L_test_for_second_level_dist:
|
|
test al, 64
|
|
jnz L_invalid_distance_code ; /* if ((op & 64) != 0) */
|
|
|
|
xor eax, eax
|
|
inc eax
|
|
shl eax, cl
|
|
dec eax
|
|
and eax, edx ; /* eax &= hold */
|
|
add eax, r15d ; /* eax += dist */
|
|
mov eax, [r11+rax*4] ; /* eax = dcode[val+(hold&mask[op])]*/
|
|
jmp L_dodist
|
|
|
|
ALIGN 4
|
|
L_clip_window:
|
|
mov ecx, eax ; /* ecx = nbytes */
|
|
mov eax, [rsp+92] ; /* eax = wsize, prepare for dist cmp */
|
|
neg ecx ; /* nbytes = -nbytes */
|
|
|
|
cmp eax, r15d
|
|
jb L_invalid_distance_too_far ; /* if (dist > wsize) */
|
|
|
|
add ecx, r15d ; /* nbytes = dist - nbytes */
|
|
cmp dword ptr [rsp+96], 0
|
|
jne L_wrap_around_window ; /* if (write != 0) */
|
|
|
|
mov rsi, [rsp+56] ; /* from = window */
|
|
sub eax, ecx ; /* eax -= nbytes */
|
|
add rsi, rax ; /* from += wsize - nbytes */
|
|
|
|
mov eax, r14d ; /* eax = len */
|
|
cmp r14d, ecx
|
|
jbe L_do_copy ; /* if (nbytes >= len) */
|
|
|
|
sub eax, ecx ; /* eax -= nbytes */
|
|
rep movsb
|
|
mov rsi, rdi
|
|
sub rsi, r15 ; /* from = &out[ -dist ] */
|
|
jmp L_do_copy
|
|
|
|
ALIGN 4
|
|
L_wrap_around_window:
|
|
mov eax, [rsp+96] ; /* eax = write */
|
|
cmp ecx, eax
|
|
jbe L_contiguous_in_window ; /* if (write >= nbytes) */
|
|
|
|
mov esi, [rsp+92] ; /* from = wsize */
|
|
add rsi, [rsp+56] ; /* from += window */
|
|
add rsi, rax ; /* from += write */
|
|
sub rsi, rcx ; /* from -= nbytes */
|
|
sub ecx, eax ; /* nbytes -= write */
|
|
|
|
mov eax, r14d ; /* eax = len */
|
|
cmp eax, ecx
|
|
jbe L_do_copy ; /* if (nbytes >= len) */
|
|
|
|
sub eax, ecx ; /* len -= nbytes */
|
|
rep movsb
|
|
mov rsi, [rsp+56] ; /* from = window */
|
|
mov ecx, [rsp+96] ; /* nbytes = write */
|
|
cmp eax, ecx
|
|
jbe L_do_copy ; /* if (nbytes >= len) */
|
|
|
|
sub eax, ecx ; /* len -= nbytes */
|
|
rep movsb
|
|
mov rsi, rdi
|
|
sub rsi, r15 ; /* from = out - dist */
|
|
jmp L_do_copy
|
|
|
|
ALIGN 4
|
|
L_contiguous_in_window:
|
|
mov rsi, [rsp+56] ; /* rsi = window */
|
|
add rsi, rax
|
|
sub rsi, rcx ; /* from += write - nbytes */
|
|
|
|
mov eax, r14d ; /* eax = len */
|
|
cmp eax, ecx
|
|
jbe L_do_copy ; /* if (nbytes >= len) */
|
|
|
|
sub eax, ecx ; /* len -= nbytes */
|
|
rep movsb
|
|
mov rsi, rdi
|
|
sub rsi, r15 ; /* from = out - dist */
|
|
jmp L_do_copy ; /* if (nbytes >= len) */
|
|
|
|
ALIGN 4
|
|
L_do_copy:
|
|
mov ecx, eax ; /* ecx = len */
|
|
rep movsb
|
|
|
|
mov rsi, r8 ; /* move in back to %esi, toss from */
|
|
jmp L_while_test
|
|
|
|
L_test_for_end_of_block:
|
|
test al, 32
|
|
jz L_invalid_literal_length_code
|
|
mov dword ptr [rsp+116], 1
|
|
jmp L_break_loop_with_status
|
|
|
|
L_invalid_literal_length_code:
|
|
mov dword ptr [rsp+116], 2
|
|
jmp L_break_loop_with_status
|
|
|
|
L_invalid_distance_code:
|
|
mov dword ptr [rsp+116], 3
|
|
jmp L_break_loop_with_status
|
|
|
|
L_invalid_distance_too_far:
|
|
mov dword ptr [rsp+116], 4
|
|
jmp L_break_loop_with_status
|
|
|
|
L_break_loop:
|
|
mov dword ptr [rsp+116], 0
|
|
|
|
L_break_loop_with_status:
|
|
; /* put in, out, bits, and hold back into ar and pop esp */
|
|
mov [rsp+16], rsi ; /* in */
|
|
mov [rsp+32], rdi ; /* out */
|
|
mov [rsp+88], ebx ; /* bits */
|
|
mov [rsp+80], rdx ; /* hold */
|
|
|
|
mov rax, [rsp] ; /* restore rbp and rsp */
|
|
mov rbp, [rsp+8]
|
|
mov rsp, rax
|
|
|
|
|
|
|
|
mov rsi,[rsp-8]
|
|
mov rdi,[rsp-16]
|
|
mov r12,[rsp-24]
|
|
mov r13,[rsp-32]
|
|
mov r14,[rsp-40]
|
|
mov r15,[rsp-48]
|
|
mov rbx,[rsp-56]
|
|
|
|
ret 0
|
|
; :
|
|
; : "m" (ar)
|
|
; : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
|
|
; "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
|
|
; );
|
|
|
|
inffas8664fnc ENDP
|
|
;_TEXT ENDS
|
|
END
|