考虑一个问题:
例如,如果我有一个重4gb和拆分大小等于1gb的文件。结果是四个文件权重为1gb。
我正在寻找类似于Rx*/Bacon之类的解决方案,或者任何语言中的任何类似库。
发布于 2015-06-03 13:28:13
我在咖啡中使用Highland.js的解决方案:
_ = require('underscore')
H = require('highland')
fs = require('fs')
debug = require('debug')
log = debug('main')
assert = require('assert')
readS = H(fs.createReadStream('walmart.dump')).map((buffer) ->
{ buffer: buffer }
)
MAX_SIZE = 10 ** 7
counter = 0
nextStream = ()->
stream = fs.createWriteStream("result/data#{counter}.txt")
wrapper = H.wrapCallback(stream.write.bind(stream))
counter += 1
return wrapper
debug('profile')('start')
s = readS.scan({
size: 0
stream: nextStream()
}, (acc, {buffer}) ->
debug('scan')(acc, buffer)
acc.size += buffer.length
acc.buffer = buffer
if acc.size > MAX_SIZE
debug('notify')(counter - 1, acc.size)
acc.size = 0
acc.stream = nextStream()
log(acc)
return acc
).filter((x)->x.buffer?)
s.parallel 4
s.flatMap((x) ->
debug('flatMap')(x)
x.stream(x.buffer)
)
.done -> debug('profile')('finish')walmart.dump是一个包含6gb文本的文本文件。拆分649个文件需要:
profile start +0ms
profile finish +53shttps://stackoverflow.com/questions/30591188
复制相似问题