unpack_mo3_routine1.txt =
seg000:0040E470
seg000:0040E470 ; ¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦ S U B R O U T I N E ¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦
seg000:0040E470
seg000:0040E470
seg000:0040E470 read_ctrl_bit proc near ; CODE XREF: decode_ctrl_bits:carry_is_setp
seg000:0040E470 ; decode_ctrl_bits+8p ...
seg000:0040E470 add dl, dl
seg000:0040E472 jnz short not_zero
seg000:0040E474 mov dl, [esi]
seg000:0040E476 sub esi, 0FFFFFFFFh
seg000:0040E479 adc dl, dl
seg000:0040E47B
seg000:0040E47B not_zero: ; CODE XREF: read_ctrl_bit+2j
seg000:0040E47B retn
seg000:0040E47B read_ctrl_bit endp
seg000:0040E47B
seg000:0040E47C
seg000:0040E47C ; ¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦ S U B R O U T I N E ¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦
seg000:0040E47C
seg000:0040E47C
seg000:0040E47C decode_ctrl_bits proc near ; CODE XREF: unpack+2Ap
seg000:0040E47C ; unpack+68p
seg000:0040E47C inc ecx
seg000:0040E47D
seg000:0040E47D carry_is_set: ; CODE XREF: decode_ctrl_bits+Dj
seg000:0040E47D call read_ctrl_bit
seg000:0040E482 adc ecx, ecx
seg000:0040E484 call read_ctrl_bit
seg000:0040E489 jb short carry_is_set ; 10/01/11 or encoded length in 1st bit or bit pairs, until 'n1'
seg000:0040E48B retn
seg000:0040E48B decode_ctrl_bits endp
seg000:0040E48B
seg000:0040E48C ; ---------------------------------------------------------------------------
seg000:0040E48C ; START OF FUNCTION CHUNK FOR unpack
seg000:0040E48C
seg000:0040E48C unpack_end: ; CODE XREF: unpack+1Bj
seg000:0040E48C ; unpack+74j ...
seg000:0040E48C pop eax
seg000:0040E48D mov eax, esi
seg000:0040E48F pop ebp
seg000:0040E490 pop ebx
seg000:0040E491 pop edi
seg000:0040E492 pop esi
seg000:0040E493 retn
seg000:0040E493 ; END OF FUNCTION CHUNK FOR unpack
seg000:0040E494
seg000:0040E494 ; ¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦ S U B R O U T I N E ¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦
seg000:0040E494
seg000:0040E494
seg000:0040E494 unpack proc near ; CODE XREF: process+58p
seg000:0040E494
seg000:0040E494 compr_ptr = dword ptr 28h
seg000:0040E494 uncompr_ptr = dword ptr 2Ch
seg000:0040E494 uncompr_size = dword ptr 30h
seg000:0040E494
seg000:0040E494 ; FUNCTION CHUNK AT seg000:0040E48C SIZE 00000008 BYTES
seg000:0040E494
seg000:0040E494 push esi
seg000:0040E495 push edi
seg000:0040E496 push ebx
seg000:0040E497 push ebp
seg000:0040E498 mov esi, [esp-14h+compr_ptr]
seg000:0040E49C mov edi, [esp-14h+uncompr_ptr]
seg000:0040E4A0 mov ebx, [esp-14h+uncompr_size]
seg000:0040E4A4 push 0
seg000:0040E4A6 xor dl, dl
seg000:0040E4A8 dec ebx ; uncompressed_size--;
seg000:0040E4A9 movsb ; move byte at ds:(e)si to ds:(e)di
seg000:0040E4AA xor ecx, ecx
seg000:0040E4AC
seg000:0040E4AC unpack_loop: ; CODE XREF: unpack+26j
seg000:0040E4AC ; unpack+7Dj
seg000:0040E4AC cmp ebx, 0
seg000:0040E4AF jle short unpack_end ; end of decompression
seg000:0040E4B1 call read_ctrl_bit
seg000:0040E4B6 jb short compressed_data
seg000:0040E4B8 movsb ; if bit in dl is 0 then copy the byte,
seg000:0040E4B8 ; else it is compressed
seg000:0040E4B9 dec ebx
seg000:0040E4BA jmp short unpack_loop
seg000:0040E4BC ; ---------------------------------------------------------------------------
seg000:0040E4BC
seg000:0040E4BC compressed_data: ; CODE XREF: unpack+22j
seg000:0040E4BC xor ebp, ebp
seg000:0040E4BE call decode_ctrl_bits
seg000:0040E4C3 sub ecx, 3
seg000:0040E4C6 jnb short lz_ptr_in_ctrl_stream
seg000:0040E4C8 mov eax, [esp+0] ; '00' = LZ ptr with same previous relative LZ ptr (from [esp+0])
seg000:0040E4CB inc ecx
seg000:0040E4CC jmp short previous_lz_ptr ; length in 2 bits, unless ecx==0
seg000:0040E4CE ; ---------------------------------------------------------------------------
seg000:0040E4CE
seg000:0040E4CE lz_ptr_in_ctrl_stream: ; CODE XREF: unpack+32j
seg000:0040E4CE mov eax, ecx
seg000:0040E4D0 xor ecx, ecx
seg000:0040E4D2 shl eax, 8
seg000:0040E4D5 lodsb ; load byte in esi into al
seg000:0040E4D6 xor eax, 0FFFFFFFFh ; not
seg000:0040E4D9 cmp eax, 0FFFFFB00h
seg000:0040E4DE adc ebp, 1 ; if eax < -500 then ebp++; ebp++;
seg000:0040E4E1 cmp eax, 0FFFF8300h
seg000:0040E4E6 adc ebp, 0 ; if eax < -32000 then ebp++
seg000:0040E4E9 mov [esp+0], eax ; offset to previous string
seg000:0040E4EC
seg000:0040E4EC previous_lz_ptr: ; CODE XREF: unpack+38j
seg000:0040E4EC call read_ctrl_bit ; length in 2 bits, unless ecx==0
seg000:0040E4F1 adc ecx, ecx
seg000:0040E4F3 call read_ctrl_bit
seg000:0040E4F8 adc ecx, ecx
seg000:0040E4FA jnz short len_in_2bits ; ecx = length of previous string
seg000:0040E4FC call decode_ctrl_bits ; decode length, first bit of bits pairs (n0), until (n1)
seg000:0040E501 add ecx, 2
seg000:0040E504
seg000:0040E504 len_in_2bits: ; CODE XREF: unpack+66j
seg000:0040E504 add ecx, ebp ; ecx = length of previous string
seg000:0040E506 sub ebx, ecx ; decrease remaining bytes to decompress
seg000:0040E508 jb short unpack_end
seg000:0040E50A push esi ; save esi
seg000:0040E50B lea esi, [eax+edi] ; pointer to previous string
seg000:0040E50E rep movsb ; copy ecx bytes from esi to edi
seg000:0040E510 pop esi ; restore esi
seg000:0040E511 jmp short unpack_loop
seg000:0040E511 unpack endp
seg000:0040E511
unpack_mo3.txt =
(big thank you to Matt for the x86 code analysis)
The first byte is always uncompressed. After that, you've got two interleaved streams
of control bytes and data bytes. The control bytes are read by the shift_dl routine.
In the unpack routine, the control bits are read most-significant first.
A zero bit indicates "uncompressed byte". A one bit indicates compressed data.
The next two control bits control which kind of compression
-- if they are '00' it's LZ with the same (relative) pointer as a previous LZ.
The next two bits of the control stream are the length, unless they are both zero.
If they are both zero, the true length minus 2 is encoded in the control stream, two bits per bit.
The first bit in each pair is the actual data, the second bit is 0 on the last pair.
If the first control bits are '11', '10' or '01', then the LZ pointer is in the control stream.
The most significant bit of the pointer is a '1', then the next most significant bits
of the pointer are read from the control stream two bits at a time as described above
(including the initial 11 or 01 or 10). Then 3 is subtracted from that value
and it is shifted left by 8 bits, and the 8 least significant bits
f the pointer are taken from the data stream. The one's-complement of the result is taken.
The length adjustment for -500 and -32000 is saved and added back in later
(it's always at least one). Then it goes into the same LZ as before,
with the next two bits of the control stream being the length unless they are both zero, etc.
Example:
64 6d 08 69 61
64 = 01100100
0 = next byte is literal 0x6d
1 = compressed data
10 = LZ with MSB of pointer zero after subtracting 3
08 -- byte from data stream, pointer to -9 bytes back (points to the 'a' in Danny)
01 -- from control stream, a length of 1, plus the adjustment 1 from earlier = 2.
0 -- indicates a literal 69
0 -- indicates a literal 61.