首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >持久化hashlib状态

持久化hashlib状态
EN

Stack Overflow用户
提问于 2010-01-25 07:54:24
回答 5查看 3.2K关注 0票数 6

我想创建一个hashlib实例,update()它,然后以某种方式保持它的状态。稍后,我想使用这个状态数据重新创建对象,并继续使用update()。最后,我想得到数据的总累积运行的hexdigest()。状态持久化必须通过多次运行才能生存。

示例:

代码语言:javascript
复制
import hashlib
m = hashlib.sha1()
m.update('one')
m.update('two')
# somehow, persist the state of m here

#later, possibly in another process
# recreate m from the persisted state
m.update('three')
m.update('four')
print m.hexdigest()
# at this point, m.hexdigest() should be equal to hashlib.sha1().update('onetwothreefour').hextdigest()

编辑:

在2010年,我没有找到一个很好的方法来处理python,最后我用C语言编写了一个小助手应用程序来完成这个任务。然而,有一些伟大的答案,以下是我所不知道的,在当时。

EN

回答 5

Stack Overflow用户

回答已采纳

发布于 2017-06-10 21:11:43

您可以使用ctypes这样做,不需要C中的帮助程序:-

rehash.py

代码语言:javascript
复制
#! /usr/bin/env python

''' A resumable implementation of SHA-256 using ctypes with the OpenSSL crypto library

    Written by PM 2Ring 2014.11.13
'''

from ctypes import *

SHA_LBLOCK = 16
SHA256_DIGEST_LENGTH = 32

class SHA256_CTX(Structure):
    _fields_ = [
        ("h", c_long * 8),
        ("Nl", c_long),
        ("Nh", c_long),
        ("data", c_long * SHA_LBLOCK),
        ("num", c_uint),
        ("md_len", c_uint)
    ]

HashBuffType = c_ubyte * SHA256_DIGEST_LENGTH

#crypto = cdll.LoadLibrary("libcrypto.so")
crypto = cdll.LoadLibrary("libeay32.dll" if os.name == "nt" else "libssl.so")

class sha256(object):
    digest_size = SHA256_DIGEST_LENGTH

    def __init__(self, datastr=None):
        self.ctx = SHA256_CTX()
        crypto.SHA256_Init(byref(self.ctx))
        if datastr:
            self.update(datastr)

    def update(self, datastr):
        crypto.SHA256_Update(byref(self.ctx), datastr, c_int(len(datastr)))

    #Clone the current context
    def _copy_ctx(self):
        ctx = SHA256_CTX()
        pointer(ctx)[0] = self.ctx
        return ctx

    def copy(self):
        other = sha256()
        other.ctx = self._copy_ctx()
        return other

    def digest(self):
        #Preserve context in case we get called before hashing is
        # really finished, since SHA256_Final() clears the SHA256_CTX
        ctx = self._copy_ctx()
        hashbuff = HashBuffType()
        crypto.SHA256_Final(hashbuff, byref(self.ctx))
        self.ctx = ctx
        return str(bytearray(hashbuff))

    def hexdigest(self):
        return self.digest().encode('hex')

#Tests
def main():
    import cPickle
    import hashlib

    data = ("Nobody expects ", "the spammish ", "imposition!")

    print "rehash\n"

    shaA = sha256(''.join(data))
    print shaA.hexdigest()
    print repr(shaA.digest())
    print "digest size =", shaA.digest_size
    print

    shaB = sha256()
    shaB.update(data[0])
    print shaB.hexdigest()

    #Test pickling
    sha_pickle = cPickle.dumps(shaB, -1)
    print "Pickle length:", len(sha_pickle)
    shaC = cPickle.loads(sha_pickle)

    shaC.update(data[1])
    print shaC.hexdigest()

    #Test copying. Note that copy can be pickled
    shaD = shaC.copy()

    shaC.update(data[2])
    print shaC.hexdigest()


    #Verify against hashlib.sha256()
    print "\nhashlib\n"

    shaD = hashlib.sha256(''.join(data))
    print shaD.hexdigest()
    print repr(shaD.digest())
    print "digest size =", shaD.digest_size
    print

    shaE = hashlib.sha256(data[0])
    print shaE.hexdigest()

    shaE.update(data[1])
    print shaE.hexdigest()

    #Test copying. Note that hashlib copy can NOT be pickled
    shaF = shaE.copy()
    shaF.update(data[2])
    print shaF.hexdigest()


if __name__ == '__main__':
    main()

resumable_SHA-256.py

代码语言:javascript
复制
#! /usr/bin/env python

''' Resumable SHA-256 hash for large files using the OpenSSL crypto library

    The hashing process may be interrupted by Control-C (SIGINT) or SIGTERM.
    When a signal is received, hashing continues until the end of the
    current chunk, then the current file position, total file size, and
    the sha object is saved to a file. The name of this file is formed by
    appending '.hash' to the name of the file being hashed.

    Just re-run the program to resume hashing. The '.hash' file will be deleted
    once hashing is completed.

    Written by PM 2Ring 2014.11.14
'''

import cPickle as pickle
import os
import signal
import sys

import rehash

quit = False

blocksize = 1<<16   # 64kB
blocksperchunk = 1<<8

chunksize = blocksize * blocksperchunk

def handler(signum, frame):
    global quit
    print "\nGot signal %d, cleaning up." % signum
    quit = True


def do_hash(fname, filesize):
    hashname = fname + '.hash'
    if os.path.exists(hashname):
        with open(hashname, 'rb') as f:
            pos, fsize, sha = pickle.load(f)
        if fsize != filesize:
            print "Error: file size of '%s' doesn't match size recorded in '%s'" % (fname, hashname)
            print "%d != %d. Aborting" % (fsize, filesize)
            exit(1)
    else:
        pos, fsize, sha = 0, filesize, rehash.sha256()

    finished = False
    with open(fname, 'rb') as f:
        f.seek(pos)
        while not (quit or finished):
            for _ in xrange(blocksperchunk):
                block = f.read(blocksize)
                if block == '':
                    finished = True
                    break
                sha.update(block)

            pos += chunksize
            sys.stderr.write(" %6.2f%% of %d\r" % (100.0 * pos / fsize, fsize))
            if finished or quit:
                break

    if quit:
        with open(hashname, 'wb') as f:
            pickle.dump((pos, fsize, sha), f, -1)
    elif os.path.exists(hashname):
        os.remove(hashname)

    return (not quit), pos, sha.hexdigest()


def main():
    if len(sys.argv) != 2:
        print "Resumable SHA-256 hash of a file."
        print "Usage:\npython %s filename\n" % sys.argv[0]
        exit(1)

    fname = sys.argv[1]
    filesize = os.path.getsize(fname)

    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGTERM, handler)

    finished, pos, hexdigest = do_hash(fname, filesize)
    if finished:
        print "%s  %s" % (hexdigest, fname)
    else:
        print "sha-256 hash of '%s' incomplete" % fname
        print "%s" % hexdigest
        print "%d / %d bytes processed." % (pos, filesize)


if __name__ == '__main__':
    main()

演示

代码语言:javascript
复制
import rehash
import pickle
sha=rehash.sha256("Hello ")
s=pickle.dumps(sha.ctx)
sha=rehash.sha256()
sha.ctx=pickle.loads(s)
sha.update("World")
print sha.hexdigest()

输出

代码语言:javascript
复制
a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e

注意:我想感谢PM2Ring的精彩代码。

票数 4
EN

Stack Overflow用户

发布于 2010-01-25 08:03:13

hashlib.sha1是C库的包装器,所以您将无法对它进行腌制。

它需要实现__getstate____setstate__方法才能访问其内部状态。

如果纯Python实现足以满足您的需求,则可以使用它的sha1实现。

票数 1
EN

Stack Overflow用户

发布于 2017-07-12 18:38:26

我也面临着这个问题,并且没有找到现有的解决方案,所以我最终编写了一个库,它所做的事情与Devesh Saini描述的非常相似:https://github.com/kislyuk/rehash。示例:

代码语言:javascript
复制
import pickle, rehash
hasher = rehash.sha256(b"foo")
state = pickle.dumps(hasher)

hasher2 = pickle.loads(state)
hasher2.update(b"bar")

assert hasher2.hexdigest() == rehash.sha256(b"foobar").hexdigest()
票数 1
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/2130892

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档