首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >使用校验和创建Zsync文件存档

使用校验和创建Zsync文件存档
EN

Code Review用户
提问于 2018-10-02 07:04:02
回答 1查看 102关注 0票数 4

我正在通过做来学习。我试图将Java的ZsyncMake 实现移植到Golang。我还使用Go的并发API与goroutine和通道。我有一些Java方面的经验,但从未使用过本机语言。一个立即出现的问题是,Golang中的intint32不一样(因为它依赖于平台,Java的int是4字节),因此我大部分时间都需要转换它。

这是我的密码。在一些评论中,我写了[ASK],以表明我不确定这是否是Go中实现的正确方式

代码语言:javascript
复制
package zsync

import (
    "bufio"
    "crypto/sha1"
    "encoding/binary"
    "encoding/hex"
    "goZsyncmake/md4"
    "goZsyncmake/zsyncOptions"
    "hash"
    "io"
    "log"
    "math"
    "os"
    "strconv"
    "time"
)

var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096

func ZsyncMake(path string, options zsyncOptions.Options) {
    checksum, headers, zsyncFilePath := writeToFile(path, options)
    zsyncFile, err := os.Create(zsyncFilePath)
    if err != nil {
        log.Fatal(err)
    }
    defer zsyncFile.Close()

    bfio := bufio.NewWriter(zsyncFile)
    _, err = bfio.WriteString(headers)
    if err != nil {
        log.Fatal(err)
    }

    _, err = bfio.Write(checksum)
    if err != nil {
        log.Fatal(err)
    }

    bfio.Flush()
}

func writeToFile(path string, options zsyncOptions.Options) ([]byte, string, string) {
    file, err := os.Open(path)
    if err != nil {
        log.Fatal(err)
    }
    defer file.Close()

    outputFileName := file.Name() + ".zsync"

    fileInfo, err := file.Stat()
    if err != nil {
        log.Fatal(err)
    }

    opts := calculateMissingValues(options, file)

    blockSize := opts.BlockSize
    fileLength := fileInfo.Size()
    sequenceMatches := 0
    if fileLength > int64(options.BlockSize) {
        sequenceMatches = 2
    } else {
        sequenceMatches = 1
    }
    weakChecksumLength := weakChecksumLength(fileLength, blockSize, sequenceMatches)
    strongChecksumLength := strongChecksumLength(fileLength, blockSize, sequenceMatches)

    fileDigest := sha1.New()
    blockDigest := md4.New()

    checksum, fileChecksum := computeChecksum(file, blockSize, fileLength, weakChecksumLength, strongChecksumLength, fileDigest, blockDigest)
    strFileChecksum := hex.EncodeToString(fileChecksum)

    // [ASK] I suspect I can improve performance here rather than appending string with +
    strHeader := "zsync: " + ZSYNC_VERSION + "\n" +
        "Filename: " + fileInfo.Name() + "\n" +
        "MTime: " + fileInfo.ModTime().Format(time.RFC1123Z) + "\n" +
        "Blocksize: " + strconv.Itoa(blockSize) + "\n" +
        "Length: " + strconv.Itoa(int(fileLength)) + "\n" +
        "Hash-Lengths: " + strconv.Itoa(sequenceMatches) + "," + strconv.Itoa(weakChecksumLength) + "," + strconv.Itoa(strongChecksumLength) + "\n" +
        "URL: " + opts.Url + "\n" +
        "SHA-1: " + strFileChecksum + "\n\n"

    return checksum, strHeader, outputFileName

}

func sha1HashFile(path string, fileChecksumChannel chan []byte) {
    file, err := os.Open(path)
    if err != nil {
        log.Fatal(err)
    }
    defer file.Close()

    hasher := sha1.New()
    if _, err := io.Copy(hasher, file); err != nil {
        log.Fatal(err)
    }

    fileChecksumChannel <- hasher.Sum(nil)
}

func computeChecksum(f *os.File, blocksize int, fileLength int64, weakLen int, strongLen int, fileDigest hash.Hash, blockDigest hash.Hash) ([]byte, []byte) {

    checksumBytes := make([]byte, 0)
    block := make([]byte, blocksize)

    fileChecksumChannel := make(chan []byte)
    go sha1HashFile(f.Name(), fileChecksumChannel)

    for {
        read, err := f.Read(block)
        if err != nil {
            if err == io.EOF {
                break
            }
            log.Fatal(err)
        }

        if read < blocksize {

            blockSlice := block[read:blocksize]
            for i := range blockSlice {
                blockSlice[i] = byte(0)
            }

        }

        rsum := computeRsum(block)

        unsignedWeakByte := make([]byte, 4)
        binary.BigEndian.PutUint32(unsignedWeakByte, uint32(rsum))

        tempUnsignedWeakByte := unsignedWeakByte[len(unsignedWeakByte)-weakLen:]
        checksumBytes = append(checksumBytes, tempUnsignedWeakByte...)

        blockDigest.Reset()
        blockDigest.Write(block)
        strongBytes := blockDigest.Sum(nil)

        tempUnsignedStrongByte := strongBytes[:strongLen]
        checksumBytes = append(checksumBytes, tempUnsignedStrongByte...)

    }

    fileChecksum := <- fileChecksumChannel

    checksumBytes = append(checksumBytes, fileChecksum...)

    return checksumBytes, fileChecksum

}

 // [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func strongChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
    // estimated number of bytes to allocate for strong checksum
    d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/int64(blocksize))))/math.Log(2) + 20

    // reduced number of bits by sequence matches
    lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))

    // second checksum - not reduced by sequence matches
    lSecond := float64((math.Log(float64(1+fileLength/int64(blocksize)))/math.Log(2) + 20 + 7.9) / 8)

    // return max of two: return no more than 16 bytes (MD4 max)
    return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}

// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go    
func weakChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
    // estimated number of bytes to allocate for the rolling checksum per formula in
    // Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
    d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6

    // reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
    rdc := d / float64(sequenceMatches) / 8
    lrdc := int(math.Ceil(rdc))

    // enforce max and min values
    if lrdc > 4 {
        return 4
    } else {
        if lrdc < 2 {
            return 2
        } else {
            return lrdc
        }
    }
}

// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func computeRsum(block []byte) int {
    var a int16
    var b int16
    l := len(block)
    for i := 0; i < len(block); i++ {
        val := int(unsign(block[i]))
        a += int16(val)
        b += int16(l * val)
        l--
    }
    x := int(a) << 16
    y := int(b) & 0xffff
    return int(x) | int(y)
}

func unsign(b byte) uint8 {
    if b < 0 {
        return b & 0xFF
    } else {
        return b
    }
}

func calculateMissingValues(opts zsyncOptions.Options, f *os.File) zsyncOptions.Options {
    if opts.BlockSize == 0 {
        opts.BlockSize = calculateDefaultBlockSizeForInputFile(f)
    }
    if opts.Filename == "" {
        opts.Filename = f.Name()
    }
    if opts.Url == "" {
        opts.Url = f.Name()
    }
    return opts
}

func calculateDefaultBlockSizeForInputFile(f *os.File) int {
    fileInfo, err := f.Stat()
    if err != nil {
        log.Fatal(err)
    }
    if fileInfo.Size() < 100*1<<20 {
        return BLOCK_SIZE_SMALL
    } else {
        return BLOCK_SIZE_LARGE
    }
}

此外,来自Java背景,我习惯于模块化所有的东西,包括这个选项结构到其他文件。我应该把它模块化吗?

代码语言:javascript
复制
package zsyncOptions

type Options struct {
    BlockSize int
    Filename  string
    Url       string
}
EN

回答 1

Code Review用户

回答已采纳

发布于 2018-12-29 00:52:50

总是返回错误

不要到处使用log.Fatal()!相反,返回带有以下上下文的错误:

代码语言:javascript
复制
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
    return fmt.Errorf("fail to compute checksum: %v", err)
}

直接写入bufio.Writer

您是对的,通过附加字符串来创建标头并不是最好的方法。相反,我们可以直接将头内容写入bufio.writer

代码语言:javascript
复制
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('\n')

buf.WriteString("Filename: ")

..。

还可以使用fmt.Fprintf()在单个调用中执行此操作。

代码语言:javascript
复制
    fmt.Fprintf(buf, `zsync: %s
Filename: %s
MTime: %s
Blocksize: %d
Length: %d
Hash-Lengths: %d,%d,%d
URL: %s
SHA-1: %s

%s`,
        version,
        fileInfo.Name(),
        fileInfo.ModTime().Format(time.RFC1123Z),
        options.BlockSize,
        fileLength,
        sequenceMatches, weakChecksumLength, strongChecksumLength,
        options.Url,
        hex.EncodeToString(fileChecksum),
        checksum,
    )

只读取一次

文件

读取文件是昂贵的,应该只读一次。可以用io/ioutil包在字节片中读取文件的内容。

代码语言:javascript
复制
fileByte, err := ioutil.ReadFile(options.Filename)

然后,您可以在一行中计算它的SHA1校验和:

代码语言:javascript
复制
fileChecksum := sha1.Sum(fileByte)

然后从它创建一个*Reader

代码语言:javascript
复制
reader := bytes.NewReader(fileByte)
for {
        read, err := reader.Read(block)
        ...
}

这还允许我们将文件长度作为int获取,如下所示:

代码语言:javascript
复制
fileLength := len(fileByte)

方法参数

有些params是未使用或冗余的,例如pathoptions.Filename,它们应该被删除。您可以对具有相同类型的参数进行分组,以获得更高的可读性:

代码语言:javascript
复制
func computeChecksum(fileByte []byte, blockSize int, weakLen int, strongLen int) ([]byte, []byte, error) {

可以写成这样:

代码语言:javascript
复制
func computeChecksum(fileByte []byte, blockSize, weakLen, strongLen int) ([]byte, []byte, error) {

Go还允许命名返回以获得更清晰的信息。当一个方法返回两个类型相同的值时,它是有用的,如这里所示。方法签名可以是:

代码语言:javascript
复制
func computeChecksum(fileByte []byte, blockSize, weakLen, strongLen int) (checksum []byte, fileChecksum []byte, err error) {

保持简单,

在这段代码中不需要通道和goroutines,它只会使代码更难读。尽量保持代码的整洁和简单。如果您面临性能问题,请分析代码并优化热点。

可以将computeRsum方法简化为直接返回int32

代码语言:javascript
复制
func computeRsum(block []byte) uint32 {

    a, b, l := 0, 0, len(block)

    for _, v := range block {
        if v < 0 {
            v = v & 0xFF
        }
        a += int(v)
        b += l * int(v)
        l--
    }
    return uint32(a<<16) | uint32(b&0xffff)
}

对常数

使用const块

这块

代码语言:javascript
复制
var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096

应该这样改写:

代码语言:javascript
复制
const (
    version        = "0.6.2"
    blockSizeSmall = 2048
    blockSizeLarge = 4096
)

康斯特是不变的,这就是我们想要的。还请注意,在go中命名常量时首选使用CamelCase。

最终版本

代码语言:javascript
复制
package zsync

import (
    "bufio"
    "bytes"
    "crypto/md5"
    "crypto/sha1"
    "encoding/binary"
    "encoding/hex"
    "fmt"
    "io"
    "io/ioutil"
    "log"
    "math"
    "os"
    "strconv"
    "time"
)

type Options struct {
    BlockSize int
    Filename  string
    Url       string
}

const (
    version        = "0.6.2"
    blockSizeSmall = 2048
    blockSizeLarge = 4096
)

func ZsyncMake(options *Options) {

    err := writeToFile(options)
    if err != nil {
        log.Fatal(err)
    }
}

func writeToFile(options *Options) error {
    file, err := os.Open(options.Filename)
    if err != nil {
        return err
    }

    fileInfo, err := file.Stat()
    if err != nil {
        return err
    }

    fileByte, err := ioutil.ReadFile(options.Filename)
    if err != nil {
        return err
    }
    fileLength := len(fileByte)

    if options.Url == "" {
        options.Url = options.Filename
    }

    if options.BlockSize == 0 {
        if fileLength < 100*1<<20 {
            options.BlockSize = blockSizeSmall
        } else {
            options.BlockSize = blockSizeLarge
        }
    }

    sequenceMatches := 1
    if fileLength > options.BlockSize {
        sequenceMatches = 2
    }
    weakChecksumLength := weakChecksumLength(fileLength, options.BlockSize, sequenceMatches)
    strongChecksumLength := strongChecksumLength(fileLength, options.BlockSize, sequenceMatches)

    checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
    if err != nil {
        return fmt.Errorf("fail to compute checksum: %v", err)
    }

    zsyncFile, err := os.Create(file.Name() + ".zsync")
    if err != nil {
        return err
    }
    defer zsyncFile.Close()

    buf := bufio.NewWriter(zsyncFile)
    buf.WriteString("zsync: ")
    buf.WriteString(version)
    buf.WriteByte('\n')

    buf.WriteString("Filename: ")
    buf.WriteString(fileInfo.Name())
    buf.WriteByte('\n')

    buf.WriteString("MTime: ")
    buf.WriteString(fileInfo.ModTime().Format(time.RFC1123Z))
    buf.WriteByte('\n')

    buf.WriteString("Blocksize: ")
    buf.WriteString(strconv.Itoa(options.BlockSize))
    buf.WriteByte('\n')

    buf.WriteString("Length: ")
    buf.WriteString(strconv.Itoa(int(fileLength)))
    buf.WriteByte('\n')

    buf.WriteString("Hash-Lengths: ")
    buf.WriteString(strconv.Itoa(sequenceMatches))
    buf.WriteByte(',')
    buf.WriteString(strconv.Itoa(weakChecksumLength))
    buf.WriteByte(',')
    buf.WriteString(strconv.Itoa(strongChecksumLength))
    buf.WriteByte('\n')

    buf.WriteString("URL: ")
    buf.WriteString(options.Url)
    buf.WriteByte('\n')

    buf.WriteString("SHA-1: ")
    buf.WriteString(hex.EncodeToString(fileChecksum))
    buf.WriteByte('\n')
    buf.WriteByte('\n')

    buf.Write(checksum)

    return buf.Flush()
}

func computeChecksum(fileByte []byte, blockSize, weakLen, strongLen int) ([]byte, []byte, error) {

    reader := bytes.NewReader(fileByte)

    checksumBytes := bytes.NewBuffer(nil)
    block := make([]byte, blockSize)
    unsignedWeakByte := make([]byte, 4)

    for {
        read, err := reader.Read(block)
        if err != nil {
            if err == io.EOF {
                break
            }
            return nil, nil, fmt.Errorf("fail to read block: %v", err)
        }

        if read < blockSize {

            blockSlice := block[read:blockSize]
            for i := range blockSlice {
                blockSlice[i] = byte(0)
            }
        }

        rsum := computeRsum(block)
        binary.BigEndian.PutUint32(unsignedWeakByte, rsum)

        checksumBytes.Write(unsignedWeakByte[len(unsignedWeakByte)-weakLen:])

        strongBytes := md5.Sum(block)
        checksumBytes.Write(strongBytes[:strongLen])
    }

    fileChecksum := sha1.Sum(fileByte)
    checksumBytes.Write(fileChecksum[:])

    return checksumBytes.Bytes(), fileChecksum[:], nil
}

func computeRsum(block []byte) uint32 {

    a, b, l := 0, 0, len(block)

    for _, v := range block {
        if v < 0 {
            v = v & 0xFF
        }
        a += int(v)
        b += l * int(v)
        l--
    }
    return uint32(a<<16) | uint32(b&0xffff)
}

func strongChecksumLength(fileLength, blocksize, sequenceMatches int) int {
    // estimated number of bytes to allocate for strong checksum
    d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/blocksize)))/math.Log(2) + 20

    // reduced number of bits by sequence matches
    lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))

    // second checksum - not reduced by sequence matches
    lSecond := float64((math.Log(float64(1+fileLength/blocksize))/math.Log(2) + 20 + 7.9) / 8)

    // return max of two: return no more than 16 bytes (MD4 max)
    return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}

func weakChecksumLength(fileLength, blocksize, sequenceMatches int) int {
    // estimated number of bytes to allocate for the rolling checksum per formula in
    // Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
    d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6

    // reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
    rdc := d / float64(sequenceMatches) / 8
    lrdc := int(math.Ceil(rdc))

    // enforce max and min values
    if lrdc > 4 {
        return 4
    }
    if lrdc < 2 {
        return 2
    }
    return lrdc
}

性能

新代码稍微快一点:

代码语言:javascript
复制
goos: linux
goarch: amd64
BenchmarkWriteOld-4        10000        200830 ns/op       40815 B/op         43 allocs/op
BenchmarkWriteNew-4        10000        164985 ns/op       29272 B/op         27 allocs/op
票数 2
EN
页面原文内容由Code Review提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://codereview.stackexchange.com/questions/204752

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档