Results 1 to 12 of 12

Thread: Help understanding Recompression

  1. #1
    Member
    Join Date
    Nov 2016
    Location
    US
    Posts
    6
    Thanks
    0
    Thanked 0 Times in 0 Posts

    Help understanding Recompression

    Hello everyone!

    Recently I've started a project working on a decompressing and re-compressing MKD archives from a game.
    I managed to get an IDA dump of the decompression routine, and put together a test program in C.

    That was easy enough, but wow, the format makes no sense, and I have no idea where to start
    with recompression, it appears to be a hybrid LZW/RLE implementation.

    I'm attaching my code, and would love any suggestions...because I just can't seem to wrap my head around it

    #include <stdio.h>#include <stdlib.h>


    // translated routine
    unsigned decompress(unsigned char *input,unsigned char *output)
    {
    unsigned size=((unsigned*)(&input[8]))[0];
    unsigned char op1,op2;
    unsigned input_pos,output_pos;

    for(input_pos=12,output_pos=0,op1=0,op2=0; output_pos<size; op1--,op2>>=1)
    {
    if(op1==0)
    {
    op1=8;
    op2=input[input_pos++];
    }
    if(!(op2 & 1)) // uncompressed data, only copies input (seems to be at most8 bytes long)
    {
    output[output_pos++]=input[input_pos++];
    }
    else
    {
    unsigned char low=input[input_pos++]; // these vars are used to calculate block size
    unsigned char high=input[input_pos++]; // and data to be repeated
    switch(low & 0x0F)
    {
    case 0:
    for(int i=0; i<(input[input_pos]+16); i++) // reuses a previous block of data,
    // seems to be able to handle bigger sizes than the
    // default switch case
    {
    output[output_pos]=output[output_pos-((high<<4) + (low>>4))];
    output_pos++;
    }
    input_pos++;
    break;
    case 1: // repeated data, high=data to repeat, low/16+3= times
    for(int i=0; i<((low>>4) + 3); i++)
    {
    output[output_pos++]=high;
    }
    break;
    case 2:
    for(int i=0; i<((high<<4) + (low>>4) + 18); i++) // uncompressed data, it seems that it copies
    { // data blocks with bigger sizes than line 17
    output[output_pos++]=input[input_pos++];
    }
    break;
    default: // reuse a previous block of data of size at most 15
    for(int i=0; i<(low & 0xF); i++)
    {
    output[output_pos]=output[output_pos-((high<<4) + (low>>4))];
    output_pos++;
    }
    break;
    }
    }
    }
    return output_pos;
    }




    // load file to decompress
    unsigned char* load_file(char *filename)
    {
    FILE *f=fopen(filename,"rb");
    if(f==NULL)
    {
    printf("Error opening input file!\n");
    exit(1);
    }
    fseek(f , 0 , SEEK_END );
    int sz = ftell (f);
    fseek(f , 0 , SEEK_SET );
    unsigned char *data=(unsigned char*)malloc(sz);
    fread(data,1,sz,f);
    fclose(f);
    return data;
    }


    // save compressed file
    void save_file(unsigned char *data,int sz,char *filename)
    {
    FILE *f=fopen(filename,"wb");
    if(f==NULL)
    {
    printf("Error opening output file!\n");
    exit(1);
    }
    fwrite(data,1,sz,f);
    fclose(f);
    }


    int main(int argc,char **argv)
    {
    if(argc!=3)
    {
    printf("Usage:\n\tdecompress <input file> <output file>\n");
    exit(0);
    }
    unsigned char *cdata=load_file(argv[1]); // read compressed data


    int usize=((int*)(&cdata[8]))[0]; // uncompressed data size
    unsigned char *udata=(unsigned char*)malloc(usize); // create uncompressed buffer
    decompress(cdata,udata);
    save_file(udata,usize,argv[2]); // save decompressed data to file
    return 0;
    }



  2. #2
    Administrator Shelwien's Avatar
    Join Date
    May 2008
    Location
    Kharkov, Ukraine
    Posts
    3,134
    Thanks
    179
    Thanked 921 Times in 469 Posts
    Can you add a small sample of compressed file?

    And to clarify things, you want a compressor for this format?

    Also its not LZW when there're length and distance encoded - could be LZSS or something similar.

  3. #3
    Member
    Join Date
    Nov 2016
    Location
    US
    Posts
    6
    Thanks
    0
    Thanked 0 Times in 0 Posts
    Hey,

    yes I do, I can't wrap my head around it,

    (forgot to attach the file)
    here is the attached format compressed.


    I was thinking maybe lz77, the 8 chunks seem to be similar
    Attached Files Attached Files

  4. #4
    Administrator Shelwien's Avatar
    Join Date
    May 2008
    Location
    Kharkov, Ukraine
    Posts
    3,134
    Thanks
    179
    Thanked 921 Times in 469 Posts
    Well, here's a very simple encoder: http://nishi.dreamhosters.com/u/mkd_v0.rar
    Implementing a proper LZ encoder with parsing optimization is kinda hard, so it uses deflate encoder (7z in test.bat),
    then extracts the deflate stream with reflate's rawdet and removes deflate's entropy coding with raw2dec.
    Then mkd_enc (modified dec2unp) encodes deflate's lz tokens using this format.
    It doesn't implement literal runs and RLE, so compression is worse than original (matches with dist=4k..32k getting unpacked to literals is probably also bad),
    but seems to work otherwise.

  5. #5
    Member
    Join Date
    Nov 2016
    Location
    US
    Posts
    6
    Thanks
    0
    Thanked 0 Times in 0 Posts
    Hey,

    Thanks allot I'll take a look!

  6. #6
    Administrator Shelwien's Avatar
    Join Date
    May 2008
    Location
    Kharkov, Ukraine
    Posts
    3,134
    Thanks
    179
    Thanked 921 Times in 469 Posts
    Btw, there's also a tricky case with zero distance in matches ( ((high<<4) + (low>>4))==0 ).
    I treated it as zero runs, but it can be anything really - it would be good to also have unpacked version of that sample file
    (and I mean unpacked with original decoder).

  7. #7
    Member
    Join Date
    Nov 2016
    Location
    US
    Posts
    6
    Thanks
    0
    Thanked 0 Times in 0 Posts
    hey, I ran test bat and uncompressed the file,

    how do I test recompression?

    I'm sorry if that is a dumb question

    here is the uncompressed version of the unpacked file
    Attached Files Attached Files

  8. #8
    Administrator Shelwien's Avatar
    Join Date
    May 2008
    Location
    Kharkov, Ukraine
    Posts
    3,134
    Thanks
    179
    Thanked 921 Times in 469 Posts
    The test.bat supposedly also tests it - 00000000.mrg file there is compressed with mkd_enc.
    Then its decoded with the same decoder and md5 hashes are computed (seem to match).

    In other words, I test it by decoding original file and re-encoded file with the same decoder, and comparing outputs.

  9. #9
    Member
    Join Date
    Nov 2016
    Location
    US
    Posts
    6
    Thanks
    0
    Thanked 0 Times in 0 Posts
    Ahh I get you!

    I just uploaded the file uncompressed from the original

  10. #10
    Administrator Shelwien's Avatar
    Join Date
    May 2008
    Location
    Kharkov, Ukraine
    Posts
    3,134
    Thanks
    179
    Thanked 921 Times in 469 Posts
    Well, the unpacked files seem to match, so now we have to find the best deflate encoder which allows to set 4k window size.

  11. #11
    Member
    Join Date
    Nov 2016
    Location
    US
    Posts
    6
    Thanks
    0
    Thanked 0 Times in 0 Posts
    Ahhh

    I see!, you answered the question for me

  12. #12
    Member
    Join Date
    Oct 2016
    Location
    mumbai
    Posts
    7
    Thanks
    0
    Thanked 0 Times in 0 Posts
    Thanks a lot....

    Quote Originally Posted by rexferal0009 View Post
    Hello everyone!

    Recently I've started a project working on a decompressing and re-compressing MKD archives from a game.
    I managed to get an IDA dump of the decompression routine, and put together a test program in C.

    That was easy enough, but wow, the format makes no sense, and I have no idea where to start
    with recompression, it appears to be a hybrid LZW/RLE implementation.

    I'm attaching my code, and would love any suggestions...because I just can't seem to wrap my head around it

    #include <stdio.h>#include <stdlib.h>


    // translated routine
    unsigned decompress(unsigned char *input,unsigned char *output)
    {
    unsigned size=((unsigned*)(&input[8]))[0];
    unsigned char op1,op2;
    unsigned input_pos,output_pos;

    for(input_pos=12,output_pos=0,op1=0,op2=0; output_pos<size; op1--,op2>>=1)
    {
    if(op1==0)
    {
    op1=8;
    op2=input[input_pos++];
    }
    if(!(op2 & 1)) // uncompressed data, only copies input (seems to be at most8 bytes long)
    {
    output[output_pos++]=input[input_pos++];
    }
    else
    {
    unsigned char low=input[input_pos++]; // these vars are used to calculate block size
    unsigned char high=input[input_pos++]; // and data to be repeated
    switch(low & 0x0F)
    {
    case 0:
    for(int i=0; i<(input[input_pos]+16); i++) // reuses a previous block of data,
    // seems to be able to handle bigger sizes than the
    // default switch case
    {
    output[output_pos]=output[output_pos-((high<<4) + (low>>4))];
    output_pos++;
    }
    input_pos++;
    break;
    case 1: // repeated data, high=data to repeat, low/16+3= times
    for(int i=0; i<((low>>4) + 3); i++)
    {
    output[output_pos++]=high;
    }
    break;
    case 2:
    for(int i=0; i<((high<<4) + (low>>4) + 18); i++) // uncompressed data, it seems that it copies
    { // data blocks with bigger sizes than line 17
    output[output_pos++]=input[input_pos++];
    }
    break;
    default: // reuse a previous block of data of size at most 15
    for(int i=0; i<(low & 0xF); i++)
    {
    output[output_pos]=output[output_pos-((high<<4) + (low>>4))];
    output_pos++;
    }
    break;
    }
    }
    }
    return output_pos;
    }




    // load file to decompress
    unsigned char* load_file(char *filename)
    {
    FILE *f=fopen(filename,"rb");
    if(f==NULL)
    {
    printf("Error opening input file!\n");
    exit(1);
    }
    fseek(f , 0 , SEEK_END );
    int sz = ftell (f);
    fseek(f , 0 , SEEK_SET );
    unsigned char *data=(unsigned char*)malloc(sz);
    fread(data,1,sz,f);
    fclose(f);
    return data;
    }


    // save compressed file
    void save_file(unsigned char *data,int sz,char *filename)
    {
    FILE *f=fopen(filename,"wb");
    if(f==NULL)
    {
    printf("Error opening output file!\n");
    exit(1);
    }
    fwrite(data,1,sz,f);
    fclose(f);
    }


    int main(int argc,char **argv)
    {
    if(argc!=3)
    {
    printf("Usage:\n\tdecompress <input file> <output file>\n");
    exit(0);
    }
    unsigned char *cdata=load_file(argv[1]); // read compressed data


    int usize=((int*)(&cdata[8]))[0]; // uncompressed data size
    unsigned char *udata=(unsigned char*)malloc(usize); // create uncompressed buffer
    decompress(cdata,udata);
    save_file(udata,usize,argv[2]); // save decompressed data to file
    return 0;
    }



Similar Threads

  1. jpeg file recompression
    By suryakandau@yahoo.co.id in forum Data Compression
    Replies: 7
    Last Post: 9th June 2016, 17:05
  2. Video Recompression
    By Gonzalo in forum Data Compression
    Replies: 6
    Last Post: 21st June 2015, 20:35
  3. LZMA recompression
    By twisted89 in forum Data Compression
    Replies: 4
    Last Post: 4th December 2012, 18:31
  4. Format priority for recompression
    By Shelwien in forum Data Compression
    Replies: 22
    Last Post: 12th March 2011, 00:35
  5. filesharing with built-in recompression
    By Shelwien in forum Data Compression
    Replies: 8
    Last Post: 8th December 2009, 13:42

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •