zlib、gzip、zip 等对比
zlib、gzip、zip 等对比
先来看一下
[root@iZrj9hu97fjb3e1xlfktg8Z zlib]# tree
.
├── include
│ ├── zconf.h
│ └── zlib.h
├── lib
│ ├── libz.a
│ ├── libz.so -> libz.so.1.2.11
│ ├── libz.so.1 -> libz.so.1.2.11
│ ├── libz.so.1.2.11
│ └── pkgconfig
│ └── zlib.pc
└── share
└── man
└── man3
└── zlib.3
6 directories, 8 files
仔细观察的话,发现
int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
Bytef *dest;
uLongf *destLen;
const Bytef *source;
uLong sourceLen;
int level;
{
z_stream stream;
int err;
const uInt max = (uInt)-1;
uLong left;
left = *destLen;
*destLen = 0;
stream.zalloc = (alloc_func)0;
stream.zfree = (free_func)0;
stream.opaque = (voidpf)0;
err = deflateInit(&stream, level);
if (err != Z_OK) return err;
stream.next_out = dest;
stream.avail_out = 0;
stream.next_in = (z_const Bytef *)source;
stream.avail_in = 0;
do {
if (stream.avail_out == 0) {
stream.avail_out = left > (uLong)max ? max : (uInt)left;
left -= stream.avail_out;
}
if (stream.avail_in == 0) {
stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen;
sourceLen -= stream.avail_in;
}
err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH);
} while (err == Z_OK);
*destLen = stream.total_out;
deflateEnd(&stream);
return err == Z_STREAM_END ? Z_OK : err;
}
int ZEXPORT compress (dest, destLen, source, sourceLen)
Bytef *dest;
uLongf *destLen;
const Bytef *source;
uLong sourceLen;
{
return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
}
通过参数,我们可以看到
#include <stdio.h>
#include <string.h>
#include <zlib.h>
#include <stdlib.h>
int my_write(char* fname,const char * buffer);
int main(int argc,char** argv){
if(argc == 1){
printf("Please Input A String You want to Compress\n");
return -1;
}
char * str = argv[1];
printf("The String You Want to Compress is: %s\n",str);
//计算需要压缩的字符的长度
uLong sLen = strlen(str);
//压缩之后字符的长度
uLong tLen;
//通过zlib API计算tlen的长度,以便接下来给压缩之后的数据分配空间
tLen = compressBound(sLen);
//分配压缩数据空间
char * cspace;
cspace = malloc(tLen);
//是否分配成功
if(cspace == NULL){
printf("Not enough memory!\n");
return -1;
}
//开始压缩
int result;
result = compress(cspace,&tLen,str,sLen);
if(result == Z_OK){
printf("Compress Sucess!\n");
//保存压缩内容到硬盘
int j = my_write("compressdata.bin",cspace);
if(j==0){
printf("\t-Sucess to write into disk!\n");
}else{
printf("\t-Failure to write into disk!\n");
}
}
//开始解压
result = uncompress(str,&sLen,cspace,tLen);
if(result == Z_OK){
printf("Original String is: %s\n",str);
}else{
printf("uncompress failure!\n");
}
free(cspace);
return 0;
}
int my_write(char* fname,const char * buffer)
{
size_t writesize;
FILE *pFile;
pFile = fopen(fname,"wb");
//fwrite返回区块数量
writesize = fwrite(buffer,strlen(buffer),1,pFile);
fclose(pFile);
if(strlen(buffer)>0 && 1 == writesize){
return 0;
}else{
return 1;
}
}
编译运行
[root@iZrj9hu97fjb3e1xlfktg8Z ~]# gcc zcompress.c -I /tmp/zlib/include/ -lz -L /tmp/zlib/lib/ -o z
[root@iZrj9hu97fjb3e1xlfktg8Z ~]# ./z "This is the string"
The String You Want to Compress is: This is the string
Compress Sucess!
-Sucess to write into disk!
Original String is: This is the string
到这里一切正常,那是不是
那么
The deflation algorithm used by zip and gzip is a variation of LZ77
(Lempel-Ziv 1977, see reference below). It finds duplicated strings in
the input data. The second occurrence of a string is replaced by a
pointer to the previous string, in the form of a pair (distance,
length). Distances are limited to 32K bytes, and lengths are limited
to 258 bytes. When a string does not occur anywhere in the previous
32K bytes, it is emitted as a sequence of literal bytes. (In this
description, ‘string’ must be taken as an arbitrary sequence of bytes,
and is not restricted to printable characters.)
… 2. gzip file format
The pkzip format imposes a lot of overhead in various headers, which
are useful for an archiver but not necessary when only one file is
compressed. gzip uses a much simpler structure. Numbers are in little
endian format, and bit 0 is the least significant bit.
A gzip file is a sequence of compressed members. Each member has the
following structure:
2 bytes magic header 0x1f, 0x8b (\037 \213)
1 byte compression method (0..7 reserved, 8 = deflate)
1 byte flags
bit 0 set: file probably ascii text
bit 1 set: continuation of multi-part gzip file
bit 2 set: extra field present
bit 3 set: original file name present
bit 4 set: file comment present
bit 5 set: file is encrypted
bit 6,7: reserved
4 bytes file modification time in Unix format
1 byte extra flags (depend on compression method)
1 byte operating system on which compression took place
2 bytes optional part number (second part=1)
2 bytes optional extra field length
? bytes optional extra field
? bytes optional original file name, zero terminated
? bytes optional file comment, zero terminated
12 bytes optional encryption header
? bytes compressed data
4 bytes crc32
4 bytes uncompressed input size modulo 2^32
也就是说
1,