我有一个LZ77压缩算法的代码。它可以很好地处理小文件。但是如果我想压缩100个kB和更大的文件,这需要很长时间。
我想,这都是因为这个部分:
do { // searchin for longest mathcing
j++;
i++;
if(!lookBuffer[i]) fread(&lookBuffer[i], 1, 1, from);
if (j == strlen(searchBuffer) && lookBuffer[addJ] == lookBuffer[i]) {
do {
i++;
j++;
if(!lookBuffer[i]) fread(&lookBuffer[i], 1, 1, from);
addJ++;
} while (lookBuffer[addJ] == lookBuffer[i] && i < lookLen);
}
} while (searchBuffer[j] == lookBuffer[i] && i < lookLen);这部分搜索搜索缓冲区和前瞻性缓冲区之间最长的匹配。
那么,有没有更快的方法来找到这种匹配呢?
用于LZ77压缩的完整代码:
void lz77(char *fileName, FILE *from, FILE *path, long long size) {
char *searchBuffer = (char*)malloc((searchLen) * sizeof(char)); // search array
memset(searchBuffer, 0x00, searchLen);
long i = 0, j = 0, length = 0, offset = 0, isMatching = 0, iForSearchBuff = 0, iForLookBuff = 0;
int isFirst = 1, n = 0;
unsigned char symbol;
while(!feof(from)) {
isMatching = 0;
length = 0;
iForSearchBuff = 0;
iForLookBuff = 0;
offset = strlen(searchBuffer);
if (isFirst) { // first symbol
fread(searchBuffer, 1, 1, from);
writeBit(0, 0, *searchBuffer, tmpFile); // writing(0, 0, symbol)
isFirst = 0;
} else {
char *lookBuffer = (char*)malloc((lookLen) * sizeof(char)); // look array
memset(lookBuffer, 0x00, lookLen);
fread(lookBuffer, 1, 1, from);
for (j = 0; j <= strlen(searchBuffer); j++) {
i = 0;
if (searchBuffer[j] == lookBuffer[i]) { // MATCHING
isMatching = 1;
long fixJ = j, fixI = i, addJ = 0;
do { // searchin for longest mathcing
j++;
i++;
if(!lookBuffer[i]) fread(&lookBuffer[i], 1, 1, from);
if (j == strlen(searchBuffer) && lookBuffer[addJ] == lookBuffer[i]) {
do {
i++;
j++;
if(!lookBuffer[i]) fread(&lookBuffer[i], 1, 1, from);
addJ++;
} while (lookBuffer[addJ] == lookBuffer[i] && i < lookLen);
}
} while (searchBuffer[j] == lookBuffer[i] && i < lookLen);
if (((strlen(searchBuffer) - fixJ) <= offset) && ((i - fixI) >= length)) {
length = i - fixI;
offset = strlen(searchBuffer) - fixJ;
iForSearchBuff = fixI;
iForLookBuff = i;
}
j = fixJ;
} else if (j >= (strlen(searchBuffer))) {
if (isMatching) {
if (lookBuffer[iForLookBuff] == '\0') { writeBit(offset, length, ' ', tmpFile);}
else {
writeBit(offset, length, lookBuffer[iForLookBuff], tmpFile);
searchBuffer = insertIntoBuffer(searchBuffer, length + 1, &lookBuffer[iForSearchBuff]);
}
isMatching = 0;
break;
} else {
writeBit(0, 0, lookBuffer[i], tmpFile);
searchBuffer = insertIntoBuffer(searchBuffer, 1, &lookBuffer[i]);
break;
}
}
}
}
}}
发布于 2020-05-30 19:07:45
zlib的放气器使用每个位置的前三个字节作为哈希函数的输入来保存哈希表。对于散列冲突,为每个散列值维护一个列表,其中列表的长度取决于压缩级别。为要压缩的当前位置的前三个字节计算哈希值。对于具有相同哈希值并在允许的距离内的每个前一个位置,将直接比较该位置的数据以找到匹配的长度。然后发出最长的匹配,如果没有三个或更多字节的匹配,则发出文本。
发布于 2020-05-30 15:51:25
我不是专家,但聪明的人设计了高效的搜索算法,可以在这里应用。例如,查看Knuth-Morris-Pratt算法https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm。
发布于 2020-05-30 16:03:51
保持一个由窗口中每个字符串的第一个'n‘字节索引的表--其中'n’是您想要的最小长度匹配(可能是2)。您可能有多个字符串,以相同的'n‘字节开头,因此该表中的条目是字符串列表和窗口中每个字符串的偏移量。您可能希望在窗口末尾偏移最小的情况下选择最长的匹配。特别是处理以同一字节的长时间运行开始的字符串可能是值得的。在向前移动窗口时,需要从表中删除字符串并添加新字符串。
https://stackoverflow.com/questions/62104581
复制相似问题