首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >Python文件查找+在文件中写入奇怪的"NUL“输出

Python文件查找+在文件中写入奇怪的"NUL“输出
EN

Stack Overflow用户
提问于 2015-05-31 10:16:08
回答 1查看 1.2K关注 0票数 1

我正在编写一个下载程序,它会将url拆分为各个部分并使用线程下载,可能我不会使用“联接”,因为join =无法流(如果所有线程都未完成,则无法写入文件)。

但是问题是f.seek和写输出的文件真的很奇怪,文件的内容总是有"NUL“字符(在Notepad++中),而文件中的文本只占整个文件的1/3。

大家,谢谢大家的帮助,这是我的2.0版代码,感谢Padraic Cunningham的建议和感叹,我修改我的代码就像你建议的那样:所以请帮我检查代码,我想需要你帮我把它转换成http.server文件流处理方法:

代码语言:javascript
复制
import os, requests
import threading
import urllib3
import urllib.request, urllib.error, urllib.parse
import time
import re

pool = urllib3.PoolManager(maxsize=10)
URL = "https://raw.githubusercontent.com/langpavel/tampermonkey/master/src/emulation.js"
fileName = "1.js"
countsize = 0
#if os.path.exists(fileName):
 #   os.remove(fileName)

def defwrite(filename,data,offset):
  f = open(filename,'wb')
  f.seek(offset)
  f.write(data)
  f.close()

def buildRange(url, numsplits):
    global pool
    value = int(requests.head(url, headers={'Accept-Encoding': 'identity'}).headers.get('content-length', None))
    print("Fullsize: ", value)
    print("Try devide with 3 :", value / 3)
    lst = []
    for i in range(numsplits):
        if i == range(numsplits):
            lst.append('%s-%s' % (i * value//numsplits + 1, i * value//numsplits + 1 + (value - (i * value//numsplits + 1))))
        if i == 0:
            lst.append('%s-%s' % (0, value//numsplits))
        else:
            lst.append('%s-%s' % (i * value//numsplits + 1, (i + 1) * value//numsplits))
    return lst

def main(url=None, splitBy=3):
    global fileName, pool, countsize
    start_time = time.time()
    if not url:
        print("Please Enter some url to begin download.")
        return

    #fileName = "1.jpg"

    #print("%s bytes to download." % sizeInBytes)
   # if not sizeInBytes:
    #    print("Size cannot be determined.")
     #   return
    #sinzeInBytes = buildRange(url, 
    dataDict = {}
    f = open(fileName,'wb')

    # split total num bytes into ranges
    #ranges = buildRange(url,int(sizeInBytes), splitBy)
    ranges = buildRange(url, splitBy)
    print(ranges)
    def downloadChunk(idx, irange):
        print(idx)
        #time.sleep(1*idx)
        #req = urllib.request.Request(url)
        #req.headers['Range'] = 'bytes={}'.format(irange)
        headers = urllib3._collections.HTTPHeaderDict()
        headers.add('Range', 'bytes=' + str(irange))
        data = pool.urlopen('GET', URL, headers=headers).data
        #print(data)
        #print("finish: " + str(irange))
        offset = int(re.sub("(^.*?)-(.*?)$", "\\1", irange))
        print(offset)
       # print(irange)
        f.seek(offset, 0)
        #f.truncate(0)
        #print(f.tell())
        f.write(data)
        #f.read()
        #f.close()
        countsize = countsize + offset


        #defwrite("1.txt", req, re.sub("(^.*?)-", "\\1", str(irange)))

    # create one downloading thread per chunk
    downloaders = [
        threading.Thread(
            target=downloadChunk,
            args=(idx, irange),
        )
        for idx,irange in enumerate(ranges)
        ]


    # start threads, let run in parallel, wait for all to finish
    for th in downloaders:
        th.start()
        #th.isAlive()
    #for th in downloaders:
        #th.join()
        #print(th.join)
    print(countsize)
    #print('done: got {} chunks, total {} bytes'.format(
    #    len(dataDict), sum( (
    ##        len(chunk) for chunk in list(dataDict.values())
     #   ) )
    #))

    #print("--- %s seconds ---" % str(time.time() - start_time))

#    if os.path.exists(fileName):
 #       os.remove(fileName)
     #reassemble file in correct order
    #with open(fileName, 'wb') as fh:
    #    for _idx,chunk in sorted(dataDict.items()):
    #        fh.write(chunk)
    #stream_chunk = 16 * 1024
    #with open(fileName, 'wb') as fp:
    #  while True:
    #      for _idx,chunk in sorted(dataDict.items()):
            #fh.write(chunk)
     #       chunking = chunk.read(stream_chunk)
      #      if not chunk:
       #         break
        #    fp.write(chunking)


   # print("Finished Writing file %s" % fileName)
    #print('file size {} bytes'.format(os.path.getsize(fileName)))

if __name__ == '__main__':
   if os.path.exists(fileName):
     os.remove(fileName)
   main(URL, splitBy=16)

下面是我的代码,请帮助我修复它: 1.0版本,忽略它,2.0版本:

代码语言:javascript
复制
import os, requests
import threading
import urllib3
import urllib.request, urllib.error, urllib.parse
import time
import re

pool = urllib3.PoolManager(maxsize=10)
URL = "https://raw.githubusercontent.com/langpavel/tampermonkey/master/src/emulation.js"
fileName = "1.js"
#if os.path.exists(fileName):
 #   os.remove(fileName)

def defwrite(filename,data,offset):
  f = open(filename,'wb')
  f.seek(offset)
  f.write(data)
  f.close()

def buildRange(value, numsplits):
    lst = []
    for i in range(numsplits):
        if i == range(numsplits):
            lst.append('%s-%s' % (int(round(1 + i * value/(numsplits*1.0),0)), int(value - round(1 + i * value/(numsplits*1.0) + value/(numsplits*1.0)-1, 0))))
        if i == 0:
            lst.append('%s-%s' % (i, int(round(1 + i * value/(numsplits*1.0) + value/(numsplits*1.0)-1, 0))))
        else:
            lst.append('%s-%s' % (int(round(1 + i * value/(numsplits*1.0),0)), int(round(1 + i * value/(numsplits*1.0) + value/(numsplits*1.0)-1, 0))))
    return lst

def main(url=None, splitBy=3):
    global fileName, pool
    start_time = time.time()
    if not url:
        print("Please Enter some url to begin download.")
        return

    #fileName = "1.jpg"
    sizeInBytes = requests.head(url, headers={'Accept-Encoding': 'identity'}).headers.get('content-length', None)
    print("%s bytes to download." % sizeInBytes)
    if not sizeInBytes:
        print("Size cannot be determined.")
        return

    dataDict = {}

    # split total num bytes into ranges
    ranges = buildRange(int(sizeInBytes), splitBy)

    def downloadChunk(idx, irange):
        print(idx)
        #req = urllib.request.Request(url)
        #req.headers['Range'] = 'bytes={}'.format(irange)
        headers = urllib3._collections.HTTPHeaderDict()
        headers.add('Range', 'bytes=' + str(irange))
        data = pool.urlopen('GET', URL, headers=headers).data
        print(data)
        print("finish: " + str(irange))
        offset = int(re.sub("(^.*?)-(.*?)$", "\\1", irange))
        #print(offset)
       # print(irange)
        f = open(fileName,'wb')
        f.seek(offset)
        #f.truncate(0)
        #print(f.tell())
        f.write(data)
        #f.read()
        #f.close()



        #defwrite("1.txt", req, re.sub("(^.*?)-", "\\1", str(irange)))

    # create one downloading thread per chunk
    downloaders = [
        threading.Thread(
            target=downloadChunk,
            args=(idx, irange),
        )
        for idx,irange in enumerate(ranges)
        ]

    # start threads, let run in parallel, wait for all to finish
    for th in downloaders:
        th.start()
        #th.isAlive()
    #for th in downloaders:
        #th.join()
        #print(th.join)

    #print('done: got {} chunks, total {} bytes'.format(
    #    len(dataDict), sum( (
    ##        len(chunk) for chunk in list(dataDict.values())
     #   ) )
    #))

    #print("--- %s seconds ---" % str(time.time() - start_time))

#    if os.path.exists(fileName):
 #       os.remove(fileName)
     #reassemble file in correct order
    #with open(fileName, 'wb') as fh:
    #    for _idx,chunk in sorted(dataDict.items()):
    #        fh.write(chunk)
    #stream_chunk = 16 * 1024
    #with open(fileName, 'wb') as fp:
    #  while True:
    #      for _idx,chunk in sorted(dataDict.items()):
            #fh.write(chunk)
     #       chunking = chunk.read(stream_chunk)
      #      if not chunk:
       #         break
        #    fp.write(chunking)


   # print("Finished Writing file %s" % fileName)
    #print('file size {} bytes'.format(os.path.getsize(fileName)))

if __name__ == '__main__':
    main(URL, splitBy=3)
EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2015-05-31 10:23:50

在目标函数为downloadChunk的情况下,使用三个线程,使用wb打开文件三次,覆盖,从而获得1/3的内容。你也会无缘无故地呼吁寻求。如果您想附加到文件中,每次都要使用a打开,或者只在函数之外打开一次。您正在尝试使用空文件进行查找并写入,因此空字节就是从那里来的。

如果您想打开一个用于读写的文件,那么您可以使用行缓冲来查找:

代码语言:javascript
复制
 with open("whatever.file", "r+b",buffering=1) as f

然后使用该文件进行写入,不要一直在函数中打开和覆盖,该文件也必须存在。

票数 1
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/30556258

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档