问Python文件查找+在文件中写入奇怪的"NUL“输出
EN

Stack Overflow用户

提问于 2015-05-31 10:16:08

回答 1查看 1.2K关注 0票数 1

我正在编写一个下载程序，它会将url拆分为各个部分并使用线程下载，可能我不会使用“联接”，因为join =无法流(如果所有线程都未完成，则无法写入文件)。

但是问题是f.seek和写输出的文件真的很奇怪，文件的内容总是有"NUL“字符(在Notepad++中)，而文件中的文本只占整个文件的1/3。

大家，谢谢大家的帮助，这是我的2.0版代码，感谢Padraic Cunningham的建议和感叹，我修改我的代码就像你建议的那样:所以请帮我检查代码，我想需要你帮我把它转换成http.server文件流处理方法：

import os, requests
import threading
import urllib3
import urllib.request, urllib.error, urllib.parse
import time
import re

pool = urllib3.PoolManager(maxsize=10)
URL = "https://raw.githubusercontent.com/langpavel/tampermonkey/master/src/emulation.js"
fileName = "1.js"
countsize = 0
#if os.path.exists(fileName):
 #   os.remove(fileName)

def defwrite(filename,data,offset):
  f = open(filename,'wb')
  f.seek(offset)
  f.write(data)
  f.close()

def buildRange(url, numsplits):
    global pool
    value = int(requests.head(url, headers={'Accept-Encoding': 'identity'}).headers.get('content-length', None))
    print("Fullsize: ", value)
    print("Try devide with 3 :", value / 3)
    lst = []
    for i in range(numsplits):
        if i == range(numsplits):
            lst.append('%s-%s' % (i * value//numsplits + 1, i * value//numsplits + 1 + (value - (i * value//numsplits + 1))))
        if i == 0:
            lst.append('%s-%s' % (0, value//numsplits))
        else:
            lst.append('%s-%s' % (i * value//numsplits + 1, (i + 1) * value//numsplits))
    return lst

def main(url=None, splitBy=3):
    global fileName, pool, countsize
    start_time = time.time()
    if not url:
        print("Please Enter some url to begin download.")
        return

    #fileName = "1.jpg"

    #print("%s bytes to download." % sizeInBytes)
   # if not sizeInBytes:
    #    print("Size cannot be determined.")
     #   return
    #sinzeInBytes = buildRange(url, 
    dataDict = {}
    f = open(fileName,'wb')

    # split total num bytes into ranges
    #ranges = buildRange(url,int(sizeInBytes), splitBy)
    ranges = buildRange(url, splitBy)
    print(ranges)
    def downloadChunk(idx, irange):
        print(idx)
        #time.sleep(1*idx)
        #req = urllib.request.Request(url)
        #req.headers['Range'] = 'bytes={}'.format(irange)
        headers = urllib3._collections.HTTPHeaderDict()
        headers.add('Range', 'bytes=' + str(irange))
        data = pool.urlopen('GET', URL, headers=headers).data
        #print(data)
        #print("finish: " + str(irange))
        offset = int(re.sub("(^.*?)-(.*?)$", "\\1", irange))
        print(offset)
       # print(irange)
        f.seek(offset, 0)
        #f.truncate(0)
        #print(f.tell())
        f.write(data)
        #f.read()
        #f.close()
        countsize = countsize + offset


        #defwrite("1.txt", req, re.sub("(^.*?)-", "\\1", str(irange)))

    # create one downloading thread per chunk
    downloaders = [
        threading.Thread(
            target=downloadChunk,
            args=(idx, irange),
        )
        for idx,irange in enumerate(ranges)
        ]


    # start threads, let run in parallel, wait for all to finish
    for th in downloaders:
        th.start()
        #th.isAlive()
    #for th in downloaders:
        #th.join()
        #print(th.join)
    print(countsize)
    #print('done: got {} chunks, total {} bytes'.format(
    #    len(dataDict), sum( (
    ##        len(chunk) for chunk in list(dataDict.values())
     #   ) )
    #))

    #print("--- %s seconds ---" % str(time.time() - start_time))

#    if os.path.exists(fileName):
 #       os.remove(fileName)
     #reassemble file in correct order
    #with open(fileName, 'wb') as fh:
    #    for _idx,chunk in sorted(dataDict.items()):
    #        fh.write(chunk)
    #stream_chunk = 16 * 1024
    #with open(fileName, 'wb') as fp:
    #  while True:
    #      for _idx,chunk in sorted(dataDict.items()):
            #fh.write(chunk)
     #       chunking = chunk.read(stream_chunk)
      #      if not chunk:
       #         break
        #    fp.write(chunking)


   # print("Finished Writing file %s" % fileName)
    #print('file size {} bytes'.format(os.path.getsize(fileName)))

if __name__ == '__main__':
   if os.path.exists(fileName):
     os.remove(fileName)
   main(URL, splitBy=16)

下面是我的代码，请帮助我修复它: 1.0版本，忽略它，2.0版本：

import os, requests
import threading
import urllib3
import urllib.request, urllib.error, urllib.parse
import time
import re

pool = urllib3.PoolManager(maxsize=10)
URL = "https://raw.githubusercontent.com/langpavel/tampermonkey/master/src/emulation.js"
fileName = "1.js"
#if os.path.exists(fileName):
 #   os.remove(fileName)

def defwrite(filename,data,offset):
  f = open(filename,'wb')
  f.seek(offset)
  f.write(data)
  f.close()

def buildRange(value, numsplits):
    lst = []
    for i in range(numsplits):
        if i == range(numsplits):
            lst.append('%s-%s' % (int(round(1 + i * value/(numsplits*1.0),0)), int(value - round(1 + i * value/(numsplits*1.0) + value/(numsplits*1.0)-1, 0))))
        if i == 0:
            lst.append('%s-%s' % (i, int(round(1 + i * value/(numsplits*1.0) + value/(numsplits*1.0)-1, 0))))
        else:
            lst.append('%s-%s' % (int(round(1 + i * value/(numsplits*1.0),0)), int(round(1 + i * value/(numsplits*1.0) + value/(numsplits*1.0)-1, 0))))
    return lst

def main(url=None, splitBy=3):
    global fileName, pool
    start_time = time.time()
    if not url:
        print("Please Enter some url to begin download.")
        return

    #fileName = "1.jpg"
    sizeInBytes = requests.head(url, headers={'Accept-Encoding': 'identity'}).headers.get('content-length', None)
    print("%s bytes to download." % sizeInBytes)
    if not sizeInBytes:
        print("Size cannot be determined.")
        return

    dataDict = {}

    # split total num bytes into ranges
    ranges = buildRange(int(sizeInBytes), splitBy)

    def downloadChunk(idx, irange):
        print(idx)
        #req = urllib.request.Request(url)
        #req.headers['Range'] = 'bytes={}'.format(irange)
        headers = urllib3._collections.HTTPHeaderDict()
        headers.add('Range', 'bytes=' + str(irange))
        data = pool.urlopen('GET', URL, headers=headers).data
        print(data)
        print("finish: " + str(irange))
        offset = int(re.sub("(^.*?)-(.*?)$", "\\1", irange))
        #print(offset)
       # print(irange)
        f = open(fileName,'wb')
        f.seek(offset)
        #f.truncate(0)
        #print(f.tell())
        f.write(data)
        #f.read()
        #f.close()



        #defwrite("1.txt", req, re.sub("(^.*?)-", "\\1", str(irange)))

    # create one downloading thread per chunk
    downloaders = [
        threading.Thread(
            target=downloadChunk,
            args=(idx, irange),
        )
        for idx,irange in enumerate(ranges)
        ]

    # start threads, let run in parallel, wait for all to finish
    for th in downloaders:
        th.start()
        #th.isAlive()
    #for th in downloaders:
        #th.join()
        #print(th.join)

    #print('done: got {} chunks, total {} bytes'.format(
    #    len(dataDict), sum( (
    ##        len(chunk) for chunk in list(dataDict.values())
     #   ) )
    #))

    #print("--- %s seconds ---" % str(time.time() - start_time))

#    if os.path.exists(fileName):
 #       os.remove(fileName)
     #reassemble file in correct order
    #with open(fileName, 'wb') as fh:
    #    for _idx,chunk in sorted(dataDict.items()):
    #        fh.write(chunk)
    #stream_chunk = 16 * 1024
    #with open(fileName, 'wb') as fp:
    #  while True:
    #      for _idx,chunk in sorted(dataDict.items()):
            #fh.write(chunk)
     #       chunking = chunk.read(stream_chunk)
      #      if not chunk:
       #         break
        #    fp.write(chunking)


   # print("Finished Writing file %s" % fileName)
    #print('file size {} bytes'.format(os.path.getsize(fileName)))

if __name__ == '__main__':
    main(URL, splitBy=3)

python

回答 1

Stack Overflow用户

回答已采纳

发布于 2015-05-31 10:23:50

在目标函数为downloadChunk的情况下，使用三个线程，使用wb打开文件三次，覆盖，从而获得1/3的内容。你也会无缘无故地呼吁寻求。如果您想附加到文件中，每次都要使用a打开，或者只在函数之外打开一次。您正在尝试使用空文件进行查找并写入，因此空字节就是从那里来的。

如果您想打开一个用于读写的文件，那么您可以使用行缓冲来查找：

 with open("whatever.file", "r+b",buffering=1) as f

然后使用该文件进行写入，不要一直在函数中打开和覆盖，该文件也必须存在。

票数 1

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/30556258

复制

相似问题

问Python文件查找+在文件中写入奇怪的"NUL“输出
EN

回答 1

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问Python文件查找+在文件中写入奇怪的"NUL“输出EN

回答 1

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问Python文件查找+在文件中写入奇怪的"NUL“输出
EN