It really only works with files that have many long strings of repeating bytes. For example, a bitmap file, which has tons of repeating bytes.
With this I compressed a 144k bitmap image to 6k.
However, worst case scenario is that a file would have no repeating bytes, in which the compression ratio would be 2:1, and the output file would be twice as large as what you wanted to compress. This is because, let's say a file has the data: 0xAA 0xBB 0xCC 0xDD 0xEE 0xFF, there are no repeating bytes. Thus, after RLE compressing it, it would store 0x01AA 0x01BB 0x01CC 0x01DD 0x01EE 0x01FF, double the size of the original file.
Notice how the file size is double the amount of packets made, since each packet contains 2 bytes.
/* Proof of concept application for RLE Compression.
RLE compression works as follows:
There is a reduction of the physical size of repeating characters
which is called a "run." A run is encoded in 2 bytes typically.
For example: AAAABBBCC would be 4A 3B 2C
This code is PRETTY MUCH 100% standard compliant with C89, except SEEK_END may cause
unexpected behavior on some systems.
Written by Patrick Cland, 2014/11/23
*/
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
#include "misc.h"
#include
#include
#include
int main(int argc, char* argv[])
{
FILE* fSource, *fDest; /* file pointers */
PACKET* packet; /* indefinite amount of packets */
MODE mode; /* the mode of operations (compress or decompress) */
int offsetCounter; /* offset counter for file operations */
int nPackets; /* amount of packets */
int i,j,k; /* generic counters */
int fLength; /* store file length */
char choice; /* yes or no */
byte length = 0; /* the length to store in a packet */
byte data; /* the data to store in the packet */
switch(argc)
{
case 1: /* no command */
printf("\nNo parameter specified. For usage see -help\n");
return 0;
case 2:
if(!strcmp(argv[1],"-help"))
{
printf("\nUsage:\nCompression: RLE.exe -c source.ext dest.rle\nDecompression: RLE.exe -d source.rle dest.ext\n");
return 0;
}
printf("Parameter Error. Use -help for usage\n");
return 0;
case 4:
fSource = fopen(argv[2],"rb"); /* Open source in read binary */
fDest = fopen(argv[3],"wb"); /* Open dest in write binary */
if((fSource == NULL) || (fDest == NULL))
{
printf("\nThere was a file error\n");
return 0;
}
if(!strcmp(argv[1],"-c")) /* if -c, mode is COMPRESS, -d is for DECOMPRESS */
mode = COMPRESS;
else if(!strcmp(argv[1],"-d"))
mode = DECOMPRESS;
else
{
printf("Invalid code. See -help for usage.\n");
return 0;
}
break;
default:
printf("\nParameter Error. Use -help for usage\n");
return 0;
}
printf("\nOperating with source file [%s], destination file [%s] in mode %d\n",argv[2],argv[3],mode);
/* SWITCH CASE FOR PROGRAM MODE */
switch(mode)
{
case COMPRESS:
/* ###################### ASSEMBLE PACKETS ###################### */
fseek(fSource,0,SEEK_END); /* get the length of the file */
fLength = ftell(fSource); /* store the length for now */
fseek(fSource,0,SEEK_SET); /* move back to the beginning of file so we can operate on it */
packet = (PACKET*)malloc(sizeof(packet)); /* create 1 as of now */
nPackets = 0;
for(offsetCounter = 0x0; offsetCounter < fLength; offsetCounter += length)
{
length = 0; /* reset the length counter */
/* dynamically create more as necessary */
packet = (PACKET*)realloc(packet,(sizeof(PACKET) * (nPackets+1)));
if(packet == NULL)
{
printf("Error reallocating memory for new packets!\n");
return 0;
}
data = readData(fSource, offsetCounter); /* get the data at the file position offsetCounter, this will be compared with every datum retrieved from position j */
for(j = offsetCounter; j < fLength; j++) /* this will scan the file for repeats and log it */
{
k = readData(fSource, j); /* k will store the data at position j */
if(data == k)
{
length++;
if(length >= 0xFF) /* each packet can only support a length up to 255 */
break;
}
else
break; /* end of the run. assemble the packet and move to the next run */
}
packet[nPackets].data = data; /* assemble the packets over here */
packet[nPackets].length = length;
nPackets++; /* increase the amount of packets */
}
printf("\n\nAssembled %d packets!\n",nPackets);
for(i = 0; i < nPackets; i++)
printf("Data for Packet[%d]:\tData: 0x%X \t Length:%d\n",i,packet.data,packet.length);
/* ###################### END OF ASSEMBLE PACKETS ###################### */
/* ###################### WRITE DESTINATION FILE ###################### */
offsetCounter = 0x0; /* reset offset counter */
for(i = 0; i < nPackets; i++)
{
writeData(fDest,offsetCounter,packet[i]);
/*printf("Wrote %d:0x%X to 0x%X\n",packet[i].length,packet[i].data,offsetCounter);*/
offsetCounter += sizeof(PACKET); /* skip 2 bytes to write the next packet */
}
/* ###################### END OF WRITE DESTINATION FILE ###################### */
break; /* exit switch and go to end of program */
case DECOMPRESS:
/* ###################### DECOMPRESSION ###################### */
fseek(fSource,0,SEEK_END); /* get the length of the file */
fLength = ftell(fSource); /* store the length for now */
fseek(fSource,0,SEEK_SET); /* move back to the beginning of file so we can operate on it */
if(fLength % 2 != 0) /* RLE compressed files MUST be an even number of bytes long */
{
printf("Error: File must be an even amount of bytes: Your size: %d bytes", fLength);
break;
}
nPackets = fLength/2;
packet = (PACKET*)malloc( (sizeof(PACKET) * nPackets) ); /* Create as many packets as half the size of the file */
printf("Scanned [%d] packets in the file. Decompress? (Y/N):",nPackets);
scanf("%c",&choice);
if(! ((choice == 'Y') || (choice == 'y')))
{
printf("\nDecompression cancelled.\n");
break;
}
offsetCounter = 0x0; /* reset to go to beginning of file */
for(i = 0; i < nPackets; i++)
{
packet[i].length = readData(fSource,offsetCounter); /* first byte will be the length */
packet[i].data = readData(fSource,offsetCounter + 0x01);/* second byte will be the data */
offsetCounter += 0x02; /* skip to next packet */
printf("Assembled packet[%d]:\tLength:%d \t Data:0x%X\n",i,packet[i].length,packet[i].data);
}
offsetCounter = 0x0; /* reset to go to beginning of file */
for(i = 0; i < nPackets; i++)
{
for(j = 0; j < packet[i].length; j++)
{
writeByte(fDest,offsetCounter,packet[i].data); /* write the data in packet[i] packet[i].length many times */
offsetCounter++; /* increment the offset counter to write the next bit */
}
/*printf("Wrote 0x%X\t%d times to dest file\n",packet[i].data,packet[i].length);*/
}
printf("\nDecompression Success!\n");
break;
}
fclose(fSource);
fclose(fDest);
return 0;
}
[/i][/i][/i][/i][/i][/i][/i][/i][/i][/i][/i][/i][/i]
[i][i][i][i][i][i][i][i][i][i][i][i][i]
[/i][/i][/i][/i][/i][/i][/i][/i][/i][/i][/i][/i][/i]
#ifndef MISC_H
#define MISC_H
#include
typedef unsigned char byte; /* for consistency */
typedef enum /* enum for the mode of operation... and I barely ever use enums, so... */
{
COMPRESS,
DECOMPRESS
} MODE;
typedef struct /* Each packet has a length and a data byte */
{
byte length; /* how many elements of data there is */
byte data; /* whatever byte is being repeated */
} PACKET;
byte readData(FILE* f, int offset)
{
byte data = 0;
fseek(f,offset,SEEK_SET); /* move stream to offset */
fread(&data,sizeof(byte),1,f);
return data;
}
void writeData(FILE* f, int offset, PACKET p)
{
fseek(f,offset,SEEK_SET);
fwrite(&p,sizeof(PACKET), 1 ,f); /* write the packet to the given offset */
}
void writeByte(FILE* f, int offset, byte data)
{
fseek(f,offset,SEEK_SET);
fwrite(&data,sizeof(byte), 1, f);
}
#endif
With this I compressed a 144k bitmap image to 6k.
However, worst case scenario is that a file would have no repeating bytes, in which the compression ratio would be 2:1, and the output file would be twice as large as what you wanted to compress. This is because, let's say a file has the data: 0xAA 0xBB 0xCC 0xDD 0xEE 0xFF, there are no repeating bytes. Thus, after RLE compressing it, it would store 0x01AA 0x01BB 0x01CC 0x01DD 0x01EE 0x01FF, double the size of the original file.
Notice how the file size is double the amount of packets made, since each packet contains 2 bytes.
/* Proof of concept application for RLE Compression.
RLE compression works as follows:
There is a reduction of the physical size of repeating characters
which is called a "run." A run is encoded in 2 bytes typically.
For example: AAAABBBCC would be 4A 3B 2C
This code is PRETTY MUCH 100% standard compliant with C89, except SEEK_END may cause
unexpected behavior on some systems.
Written by Patrick Cland, 2014/11/23
*/
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
#include "misc.h"
#include
#include
#include
int main(int argc, char* argv[])
{
FILE* fSource, *fDest; /* file pointers */
PACKET* packet; /* indefinite amount of packets */
MODE mode; /* the mode of operations (compress or decompress) */
int offsetCounter; /* offset counter for file operations */
int nPackets; /* amount of packets */
int i,j,k; /* generic counters */
int fLength; /* store file length */
char choice; /* yes or no */
byte length = 0; /* the length to store in a packet */
byte data; /* the data to store in the packet */
switch(argc)
{
case 1: /* no command */
printf("\nNo parameter specified. For usage see -help\n");
return 0;
case 2:
if(!strcmp(argv[1],"-help"))
{
printf("\nUsage:\nCompression: RLE.exe -c source.ext dest.rle\nDecompression: RLE.exe -d source.rle dest.ext\n");
return 0;
}
printf("Parameter Error. Use -help for usage\n");
return 0;
case 4:
fSource = fopen(argv[2],"rb"); /* Open source in read binary */
fDest = fopen(argv[3],"wb"); /* Open dest in write binary */
if((fSource == NULL) || (fDest == NULL))
{
printf("\nThere was a file error\n");
return 0;
}
if(!strcmp(argv[1],"-c")) /* if -c, mode is COMPRESS, -d is for DECOMPRESS */
mode = COMPRESS;
else if(!strcmp(argv[1],"-d"))
mode = DECOMPRESS;
else
{
printf("Invalid code. See -help for usage.\n");
return 0;
}
break;
default:
printf("\nParameter Error. Use -help for usage\n");
return 0;
}
printf("\nOperating with source file [%s], destination file [%s] in mode %d\n",argv[2],argv[3],mode);
/* SWITCH CASE FOR PROGRAM MODE */
switch(mode)
{
case COMPRESS:
/* ###################### ASSEMBLE PACKETS ###################### */
fseek(fSource,0,SEEK_END); /* get the length of the file */
fLength = ftell(fSource); /* store the length for now */
fseek(fSource,0,SEEK_SET); /* move back to the beginning of file so we can operate on it */
packet = (PACKET*)malloc(sizeof(packet)); /* create 1 as of now */
nPackets = 0;
for(offsetCounter = 0x0; offsetCounter < fLength; offsetCounter += length)
{
length = 0; /* reset the length counter */
/* dynamically create more as necessary */
packet = (PACKET*)realloc(packet,(sizeof(PACKET) * (nPackets+1)));
if(packet == NULL)
{
printf("Error reallocating memory for new packets!\n");
return 0;
}
data = readData(fSource, offsetCounter); /* get the data at the file position offsetCounter, this will be compared with every datum retrieved from position j */
for(j = offsetCounter; j < fLength; j++) /* this will scan the file for repeats and log it */
{
k = readData(fSource, j); /* k will store the data at position j */
if(data == k)
{
length++;
if(length >= 0xFF) /* each packet can only support a length up to 255 */
break;
}
else
break; /* end of the run. assemble the packet and move to the next run */
}
packet[nPackets].data = data; /* assemble the packets over here */
packet[nPackets].length = length;
nPackets++; /* increase the amount of packets */
}
printf("\n\nAssembled %d packets!\n",nPackets);
for(i = 0; i < nPackets; i++)
printf("Data for Packet[%d]:\tData: 0x%X \t Length:%d\n",i,packet.data,packet.length);
/* ###################### END OF ASSEMBLE PACKETS ###################### */
/* ###################### WRITE DESTINATION FILE ###################### */
offsetCounter = 0x0; /* reset offset counter */
for(i = 0; i < nPackets; i++)
{
writeData(fDest,offsetCounter,packet[i]);
/*printf("Wrote %d:0x%X to 0x%X\n",packet[i].length,packet[i].data,offsetCounter);*/
offsetCounter += sizeof(PACKET); /* skip 2 bytes to write the next packet */
}
/* ###################### END OF WRITE DESTINATION FILE ###################### */
break; /* exit switch and go to end of program */
case DECOMPRESS:
/* ###################### DECOMPRESSION ###################### */
fseek(fSource,0,SEEK_END); /* get the length of the file */
fLength = ftell(fSource); /* store the length for now */
fseek(fSource,0,SEEK_SET); /* move back to the beginning of file so we can operate on it */
if(fLength % 2 != 0) /* RLE compressed files MUST be an even number of bytes long */
{
printf("Error: File must be an even amount of bytes: Your size: %d bytes", fLength);
break;
}
nPackets = fLength/2;
packet = (PACKET*)malloc( (sizeof(PACKET) * nPackets) ); /* Create as many packets as half the size of the file */
printf("Scanned [%d] packets in the file. Decompress? (Y/N):",nPackets);
scanf("%c",&choice);
if(! ((choice == 'Y') || (choice == 'y')))
{
printf("\nDecompression cancelled.\n");
break;
}
offsetCounter = 0x0; /* reset to go to beginning of file */
for(i = 0; i < nPackets; i++)
{
packet[i].length = readData(fSource,offsetCounter); /* first byte will be the length */
packet[i].data = readData(fSource,offsetCounter + 0x01);/* second byte will be the data */
offsetCounter += 0x02; /* skip to next packet */
printf("Assembled packet[%d]:\tLength:%d \t Data:0x%X\n",i,packet[i].length,packet[i].data);
}
offsetCounter = 0x0; /* reset to go to beginning of file */
for(i = 0; i < nPackets; i++)
{
for(j = 0; j < packet[i].length; j++)
{
writeByte(fDest,offsetCounter,packet[i].data); /* write the data in packet[i] packet[i].length many times */
offsetCounter++; /* increment the offset counter to write the next bit */
}
/*printf("Wrote 0x%X\t%d times to dest file\n",packet[i].data,packet[i].length);*/
}
printf("\nDecompression Success!\n");
break;
}
fclose(fSource);
fclose(fDest);
return 0;
}
[/i][/i][/i][/i][/i][/i][/i][/i][/i][/i][/i][/i][/i]
[i][i][i][i][i][i][i][i][i][i][i][i][i]
[/i][/i][/i][/i][/i][/i][/i][/i][/i][/i][/i][/i][/i]
#ifndef MISC_H
#define MISC_H
#include
typedef unsigned char byte; /* for consistency */
typedef enum /* enum for the mode of operation... and I barely ever use enums, so... */
{
COMPRESS,
DECOMPRESS
} MODE;
typedef struct /* Each packet has a length and a data byte */
{
byte length; /* how many elements of data there is */
byte data; /* whatever byte is being repeated */
} PACKET;
byte readData(FILE* f, int offset)
{
byte data = 0;
fseek(f,offset,SEEK_SET); /* move stream to offset */
fread(&data,sizeof(byte),1,f);
return data;
}
void writeData(FILE* f, int offset, PACKET p)
{
fseek(f,offset,SEEK_SET);
fwrite(&p,sizeof(PACKET), 1 ,f); /* write the packet to the given offset */
}
void writeByte(FILE* f, int offset, byte data)
{
fseek(f,offset,SEEK_SET);
fwrite(&data,sizeof(byte), 1, f);
}
#endif