我正在使用Python的枕头库来读取图像文件。如何使用Huffman编码压缩和解压缩?以下是一项指示:
给您提供了一组示例图像,您的目标是在不丢失任何可感知信息的情况下尽可能压缩它们,-upon解压缩它们应该与原始图像相同。图像本质上存储为一系列颜色点,其中每个点被表示为红色、绿色和蓝色(rgb)的组合。rgb值的每个组件都在0-255之间,因此例如:(100,0,200)将代表紫色。使用固定长度编码,rgb值的每个组件都需要8位编码(28= 256),这意味着整个rgb值需要24位编码。您可以使用像Huffman编码这样的压缩算法来减少更多公共值所需的位数,从而减少编码图像所需的总比特数。
# For my current code I just read the image, get all the rgb and build the tree
from PIL import Image
import sys, string
import copy
codes = {}
def sortFreq(freqs):
letters = freqs.keys()
tuples = []
for let in letters:
tuples.append (freqs[let],let)
tuples.sort()
return tuples
def buildTree(tuples):
while len (tuples) > 1:
leastTwo = tuple (tuples[0:2]) # get the 2 to combine
theRest = tuples[2:] # all the others
combFreq = leastTwo[0][0] + leastTwo[1][0] # the branch points freq
tuples = theRest + [(combFreq, leastTwo)] # add branch point to the end
tuples.sort() # sort it into place
return tuples[0] # Return the single tree inside the list
def trimTree(tree):
# Trim the freq counters off, leaving just the letters
p = tree[1] # ignore freq count in [0]
if type (p) == type (""):
return p # if just a leaf, return it
else:
return (trimTree (p[0]), trimTree (p[1]) # trim left then right and recombine
def assignCodes(node, pat=''):
global codes
if type (node) == type (""):
codes[node] = pat # A leaf. Set its code
else:
assignCodes(node[0], pat+"0") # Branch point. Do the left branch
assignCodes(node[1], pat+"1") # then do the right branch.
dictionary = {}
table = {}
image = Image.open('fall.bmp')
#image.show()
width, height = image.size
px = image.load()
totalpixel = width*height
print ("Total pixel: "+ str(totalpixel))
for x in range (width):
for y in range (height):
# print (px[x, y])
for i in range (3):
if dictionary.get(str(px[x, y][i])) is None:
dictionary[str(px[x, y][i])] = 1
else:
dictionary[str(px[x, y][i])] = dictionary[str(px[x, y][i])] +1
table = copy.deepcopy(dictionary)
#combination = len(dictionary)
#for value in table:
# table[value] = table[value] / (totalpixel * combination) * 100
#print(table)
print(dictionary)
sortdic = sortFreq(dictionary)
tree = buildTree(sortdic)
trim = trimTree(tree)
print(trim)
assignCodes(trim)
print(codes)发布于 2019-11-16 01:20:12
类HuffmanCoding采用要压缩的文本文件的完整路径作为参数。(因为它的数据成员存储特定于输入文件的数据)。
函数返回输出压缩文件的路径。
函数decompress()需要解压缩文件的路径。(并从为压缩而创建的同一个对象中调用decompress(),以便从其数据成员获得代码映射)
import heapq
import os
class HeapNode:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None
def __cmp__(self, other):
if(other == None):
return -1
if(not isinstance(other, HeapNode)):
return -1
return self.freq > other.freq
class HuffmanCoding:
def __init__(self, path):
self.path = path
self.heap = []
self.codes = {}
self.reverse_mapping = {}
# functions for compression:
def make_frequency_dict(self, text):
frequency = {}
for character in text:
if not character in frequency:
frequency[character] = 0
frequency[character] += 1
return frequency
def make_heap(self, frequency):
for key in frequency:
node = HeapNode(key, frequency[key])
heapq.heappush(self.heap, node)
def merge_nodes(self):
while(len(self.heap)>1):
node1 = heapq.heappop(self.heap)
node2 = heapq.heappop(self.heap)
merged = HeapNode(None, node1.freq + node2.freq)
merged.left = node1
merged.right = node2
heapq.heappush(self.heap, merged)
def make_codes_helper(self, root, current_code):
if(root == None):
return
if(root.char != None):
self.codes[root.char] = current_code
self.reverse_mapping[current_code] = root.char
return
self.make_codes_helper(root.left, current_code + "0")
self.make_codes_helper(root.right, current_code + "1")
def make_codes(self):
root = heapq.heappop(self.heap)
current_code = ""
self.make_codes_helper(root, current_code)
def get_encoded_text(self, text):
encoded_text = ""
for character in text:
encoded_text += self.codes[character]
return encoded_text
def pad_encoded_text(self, encoded_text):
extra_padding = 8 - len(encoded_text) % 8
for i in range(extra_padding):
encoded_text += "0"
padded_info = "{0:08b}".format(extra_padding)
encoded_text = padded_info + encoded_text
return encoded_text
def get_byte_array(self, padded_encoded_text):
if(len(padded_encoded_text) % 8 != 0):
print("Encoded text not padded properly")
exit(0)
b = bytearray()
for i in range(0, len(padded_encoded_text), 8):
byte = padded_encoded_text[i:i+8]
b.append(int(byte, 2))
return b
def compress(self):
filename, file_extension = os.path.splitext(self.path)
output_path = filename + ".bin"
with open(self.path, 'r+') as file, open(output_path, 'wb') as output:
text = file.read()
text = text.rstrip()
frequency = self.make_frequency_dict(text)
self.make_heap(frequency)
self.merge_nodes()
self.make_codes()
encoded_text = self.get_encoded_text(text)
padded_encoded_text = self.pad_encoded_text(encoded_text)
b = self.get_byte_array(padded_encoded_text)
output.write(bytes(b))
print("Compressed")
return output_path
""" functions for decompression: """
def remove_padding(self, padded_encoded_text):
padded_info = padded_encoded_text[:8]
extra_padding = int(padded_info, 2)
padded_encoded_text = padded_encoded_text[8:]
encoded_text = padded_encoded_text[:-1*extra_padding]
return encoded_text
def decode_text(self, encoded_text):
current_code = ""
decoded_text = ""
for bit in encoded_text:
current_code += bit
if(current_code in self.reverse_mapping):
character = self.reverse_mapping[current_code]
decoded_text += character
current_code = ""
return decoded_text
def decompress(self, input_path):
filename, file_extension = os.path.splitext(self.path)
output_path = filename + "_decompressed" + ".txt"
with open(input_path, 'rb') as file, open(output_path, 'w') as output:
bit_string = ""
byte = file.read(1)
while(byte != ""):
byte = ord(byte)
bits = bin(byte)[2:].rjust(8, '0')
bit_string += bits
byte = file.read(1)
encoded_text = self.remove_padding(bit_string)
decompressed_text = self.decode_text(encoded_text)
output.write(decompressed_text)
print("Decompressed")
return output_path运行程序:将上述代码保存在huffman.py文件中。
创建一个示例文本文件。或者从sample.txt下载一个示例文件(右击,另存为)
将下面的代码保存在与上述代码相同的目录中,并运行以下python代码(在运行之前编辑下面的路径变量)。将其初始化为文本文件路径)
UseHuffman.py
from huffman import HuffmanCoding
#input file path
path = "/home/ubuntu/Downloads/sample.txt"
h = HuffmanCoding(path)
output_path = h.compress()
h.decompress(output_path)
The compressed .bin file and the decompressed file are both saved in the same directory as of the input file.在上面链接的示例文本文件上运行的结果:
初始大小: 715.3 kB压缩文件大小: 394.0 kB加号,解压缩后的文件与原始文件完全相同,没有任何数据丢失。
这就是霍夫曼编码实现的全部,包括压缩和解压缩。这是很有趣的代码。
上述程序要求使用创建压缩文件的相同对象运行解压缩函数(因为代码映射存储在其数据成员中)。我们还可以使压缩和解压缩功能独立运行,如果在压缩过程中我们也将映射信息存储在压缩文件中(在开始时)。然后,在解压缩过程中,我们将首先从文件中读取映射信息,然后使用映射信息来解压缩rest文件。
https://stackoverflow.com/questions/58873472
复制相似问题