我创建Berkeley DB文件的代码:
def create_bdb_object(filename):
bdb = bsddb3.db.DB()
bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
open_flags = bsddb3.db.DB_CREATE | bsddb3.db.DB_EXCL
if os.path.exists(filename) and is_create:
os.remove(filename)
bdb.open(filename, dbtype=bsddb3.db.DB_BTREE, flags=open_flags)
return bdb之后,我将一些被腌制的数据写入到这个文件中。文件的创建没有任何问题。
Update#1:写入文件的代码:
def write_to_the_file(filename, kv_pair_rdd):
bdb_filename = f'{filename}.new'
bdb = create_bdb_object(bdb_filename)
for url, record in kv_pair_rdd.toLocalIterator():
bdb.put(url.encode(), pickle.dumps(record, protocol=2))
bdb.close()
os.rename(bdb_filename, filename)但是,当我试图读取这个文件时,并不是所有的数据都是从它得到的。文件中应该有9条记录,但读完后我只得到4条。
当我做db_dump -p filename时,我会得到9张唱片
从文件中读取数据的代码:
bdb = bsddb3.db.DB()
bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
bdb.open(filename)
bdb_cursor = bdb.cursor()
record = bdb_cursor.first()
while record:
print(record[0], pickle.loads(record[1]))
record = bdb_cursor.next()
bdb_cursor.close()
bdb.close()有人能解释一下我做错了什么吗?
发布于 2019-06-03 09:26:22
调查您的文件中有哪些数据。我使用了您的代码并创建了以下脚本:
import bsddb3
import os
import pickle
def create_bdb_object(filename):
bdb = bsddb3.db.DB()
bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
open_flags = bsddb3.db.DB_CREATE | bsddb3.db.DB_EXCL
if os.path.exists(filename):
os.remove(filename)
bdb.open(filename, dbtype=bsddb3.db.DB_BTREE, flags=open_flags)
return bdb
def write_to_the_file(filename, data):
bdb_filename = f'{filename}.new'
bdb = create_bdb_object(bdb_filename)
for url, record in data.items():
bdb.put(url.encode(), pickle.dumps(record, protocol=2))
bdb.close()
os.rename(bdb_filename, filename)
def read_bdb(bdb_filename):
bdb = bsddb3.db.DB()
bdb.set_flags(bsddb3.db.DB_DUP | bsddb3.db.DB_DUPSORT)
bdb.open(bdb_filename)
bdb_cursor = bdb.cursor()
record = bdb_cursor.first()
counter = 1
while record:
print('Record num: %s, key: %s, value: %s' % (counter, record[0], pickle.loads(record[1])))
record = bdb_cursor.next()
counter += 1
bdb_cursor.close()
bdb.close()
def main():
bdb_filename = '/tmp/bsddb.bdb'
data = {'www.example1.com': 'lorem ipsum 1',
'www.example2.com': 'lorem ipsum 2',
'www.example3.com': 'lorem ipsum 3',
'www.example4.com': 'lorem ipsum 4',
'www.example5.com': 'lorem ipsum 5',
'www.example6.com': 'lorem ipsum 6',
'www.example7.com': 'lorem ipsum 7',
'www.example8.com': 'lorem ipsum 8',
'www.example9.com': 'lorem ipsum 9'}
write_to_the_file(bdb_filename, data)
read_bdb(bdb_filename)
main()它工作得很好,找不到问题,下面是输出:
Record num: 1, key: b'www.example1.com', value: lorem ipsum 1
Record num: 2, key: b'www.example2.com', value: lorem ipsum 2
Record num: 3, key: b'www.example3.com', value: lorem ipsum 3
Record num: 4, key: b'www.example4.com', value: lorem ipsum 4
Record num: 5, key: b'www.example5.com', value: lorem ipsum 5
Record num: 6, key: b'www.example6.com', value: lorem ipsum 6
Record num: 7, key: b'www.example7.com', value: lorem ipsum 7
Record num: 8, key: b'www.example8.com', value: lorem ipsum 8
Record num: 9, key: b'www.example9.com', value: lorem ipsum 9也许,您有一些额外的代码可以修改您的数据。
https://stackoverflow.com/questions/56418344
复制相似问题