我正在通过做来学习。我试图将Java的ZsyncMake 实现移植到Golang。我还使用Go的并发API与goroutine和通道。我有一些Java方面的经验,但从未使用过本机语言。一个立即出现的问题是,Golang中的int与int32不一样(因为它依赖于平台,Java的int是4字节),因此我大部分时间都需要转换它。
这是我的密码。在一些评论中,我写了[ASK],以表明我不确定这是否是Go中实现的正确方式
package zsync
import (
"bufio"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"goZsyncmake/md4"
"goZsyncmake/zsyncOptions"
"hash"
"io"
"log"
"math"
"os"
"strconv"
"time"
)
var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096
func ZsyncMake(path string, options zsyncOptions.Options) {
checksum, headers, zsyncFilePath := writeToFile(path, options)
zsyncFile, err := os.Create(zsyncFilePath)
if err != nil {
log.Fatal(err)
}
defer zsyncFile.Close()
bfio := bufio.NewWriter(zsyncFile)
_, err = bfio.WriteString(headers)
if err != nil {
log.Fatal(err)
}
_, err = bfio.Write(checksum)
if err != nil {
log.Fatal(err)
}
bfio.Flush()
}
func writeToFile(path string, options zsyncOptions.Options) ([]byte, string, string) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()
outputFileName := file.Name() + ".zsync"
fileInfo, err := file.Stat()
if err != nil {
log.Fatal(err)
}
opts := calculateMissingValues(options, file)
blockSize := opts.BlockSize
fileLength := fileInfo.Size()
sequenceMatches := 0
if fileLength > int64(options.BlockSize) {
sequenceMatches = 2
} else {
sequenceMatches = 1
}
weakChecksumLength := weakChecksumLength(fileLength, blockSize, sequenceMatches)
strongChecksumLength := strongChecksumLength(fileLength, blockSize, sequenceMatches)
fileDigest := sha1.New()
blockDigest := md4.New()
checksum, fileChecksum := computeChecksum(file, blockSize, fileLength, weakChecksumLength, strongChecksumLength, fileDigest, blockDigest)
strFileChecksum := hex.EncodeToString(fileChecksum)
// [ASK] I suspect I can improve performance here rather than appending string with +
strHeader := "zsync: " + ZSYNC_VERSION + "\n" +
"Filename: " + fileInfo.Name() + "\n" +
"MTime: " + fileInfo.ModTime().Format(time.RFC1123Z) + "\n" +
"Blocksize: " + strconv.Itoa(blockSize) + "\n" +
"Length: " + strconv.Itoa(int(fileLength)) + "\n" +
"Hash-Lengths: " + strconv.Itoa(sequenceMatches) + "," + strconv.Itoa(weakChecksumLength) + "," + strconv.Itoa(strongChecksumLength) + "\n" +
"URL: " + opts.Url + "\n" +
"SHA-1: " + strFileChecksum + "\n\n"
return checksum, strHeader, outputFileName
}
func sha1HashFile(path string, fileChecksumChannel chan []byte) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()
hasher := sha1.New()
if _, err := io.Copy(hasher, file); err != nil {
log.Fatal(err)
}
fileChecksumChannel <- hasher.Sum(nil)
}
func computeChecksum(f *os.File, blocksize int, fileLength int64, weakLen int, strongLen int, fileDigest hash.Hash, blockDigest hash.Hash) ([]byte, []byte) {
checksumBytes := make([]byte, 0)
block := make([]byte, blocksize)
fileChecksumChannel := make(chan []byte)
go sha1HashFile(f.Name(), fileChecksumChannel)
for {
read, err := f.Read(block)
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
if read < blocksize {
blockSlice := block[read:blocksize]
for i := range blockSlice {
blockSlice[i] = byte(0)
}
}
rsum := computeRsum(block)
unsignedWeakByte := make([]byte, 4)
binary.BigEndian.PutUint32(unsignedWeakByte, uint32(rsum))
tempUnsignedWeakByte := unsignedWeakByte[len(unsignedWeakByte)-weakLen:]
checksumBytes = append(checksumBytes, tempUnsignedWeakByte...)
blockDigest.Reset()
blockDigest.Write(block)
strongBytes := blockDigest.Sum(nil)
tempUnsignedStrongByte := strongBytes[:strongLen]
checksumBytes = append(checksumBytes, tempUnsignedStrongByte...)
}
fileChecksum := <- fileChecksumChannel
checksumBytes = append(checksumBytes, fileChecksum...)
return checksumBytes, fileChecksum
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func strongChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
// estimated number of bytes to allocate for strong checksum
d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/int64(blocksize))))/math.Log(2) + 20
// reduced number of bits by sequence matches
lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))
// second checksum - not reduced by sequence matches
lSecond := float64((math.Log(float64(1+fileLength/int64(blocksize)))/math.Log(2) + 20 + 7.9) / 8)
// return max of two: return no more than 16 bytes (MD4 max)
return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func weakChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
// estimated number of bytes to allocate for the rolling checksum per formula in
// Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6
// reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
rdc := d / float64(sequenceMatches) / 8
lrdc := int(math.Ceil(rdc))
// enforce max and min values
if lrdc > 4 {
return 4
} else {
if lrdc < 2 {
return 2
} else {
return lrdc
}
}
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func computeRsum(block []byte) int {
var a int16
var b int16
l := len(block)
for i := 0; i < len(block); i++ {
val := int(unsign(block[i]))
a += int16(val)
b += int16(l * val)
l--
}
x := int(a) << 16
y := int(b) & 0xffff
return int(x) | int(y)
}
func unsign(b byte) uint8 {
if b < 0 {
return b & 0xFF
} else {
return b
}
}
func calculateMissingValues(opts zsyncOptions.Options, f *os.File) zsyncOptions.Options {
if opts.BlockSize == 0 {
opts.BlockSize = calculateDefaultBlockSizeForInputFile(f)
}
if opts.Filename == "" {
opts.Filename = f.Name()
}
if opts.Url == "" {
opts.Url = f.Name()
}
return opts
}
func calculateDefaultBlockSizeForInputFile(f *os.File) int {
fileInfo, err := f.Stat()
if err != nil {
log.Fatal(err)
}
if fileInfo.Size() < 100*1<<20 {
return BLOCK_SIZE_SMALL
} else {
return BLOCK_SIZE_LARGE
}
}此外,来自Java背景,我习惯于模块化所有的东西,包括这个选项结构到其他文件。我应该把它模块化吗?
package zsyncOptions
type Options struct {
BlockSize int
Filename string
Url string
}发布于 2018-12-29 00:52:50
不要到处使用log.Fatal()!相反,返回带有以下上下文的错误:
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
return fmt.Errorf("fail to compute checksum: %v", err)
}bufio.Writer 您是对的,通过附加字符串来创建标头并不是最好的方法。相反,我们可以直接将头内容写入bufio.writer:
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('\n')
buf.WriteString("Filename: ")..。
还可以使用fmt.Fprintf()在单个调用中执行此操作。
fmt.Fprintf(buf, `zsync: %s
Filename: %s
MTime: %s
Blocksize: %d
Length: %d
Hash-Lengths: %d,%d,%d
URL: %s
SHA-1: %s
%s`,
version,
fileInfo.Name(),
fileInfo.ModTime().Format(time.RFC1123Z),
options.BlockSize,
fileLength,
sequenceMatches, weakChecksumLength, strongChecksumLength,
options.Url,
hex.EncodeToString(fileChecksum),
checksum,
)文件
读取文件是昂贵的,应该只读一次。可以用io/ioutil包在字节片中读取文件的内容。
fileByte, err := ioutil.ReadFile(options.Filename)然后,您可以在一行中计算它的SHA1校验和:
fileChecksum := sha1.Sum(fileByte)然后从它创建一个*Reader:
reader := bytes.NewReader(fileByte)
for {
read, err := reader.Read(block)
...
}这还允许我们将文件长度作为int获取,如下所示:
fileLength := len(fileByte)有些params是未使用或冗余的,例如path和options.Filename,它们应该被删除。您可以对具有相同类型的参数进行分组,以获得更高的可读性:
func computeChecksum(fileByte []byte, blockSize int, weakLen int, strongLen int) ([]byte, []byte, error) {可以写成这样:
func computeChecksum(fileByte []byte, blockSize, weakLen, strongLen int) ([]byte, []byte, error) {Go还允许命名返回以获得更清晰的信息。当一个方法返回两个类型相同的值时,它是有用的,如这里所示。方法签名可以是:
func computeChecksum(fileByte []byte, blockSize, weakLen, strongLen int) (checksum []byte, fileChecksum []byte, err error) {在这段代码中不需要通道和goroutines,它只会使代码更难读。尽量保持代码的整洁和简单。如果您面临性能问题,请分析代码并优化热点。
可以将computeRsum方法简化为直接返回int32:
func computeRsum(block []byte) uint32 {
a, b, l := 0, 0, len(block)
for _, v := range block {
if v < 0 {
v = v & 0xFF
}
a += int(v)
b += l * int(v)
l--
}
return uint32(a<<16) | uint32(b&0xffff)
}使用const块
这块
var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096应该这样改写:
const (
version = "0.6.2"
blockSizeSmall = 2048
blockSizeLarge = 4096
)康斯特是不变的,这就是我们想要的。还请注意,在go中命名常量时首选使用CamelCase。
package zsync
import (
"bufio"
"bytes"
"crypto/md5"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"log"
"math"
"os"
"strconv"
"time"
)
type Options struct {
BlockSize int
Filename string
Url string
}
const (
version = "0.6.2"
blockSizeSmall = 2048
blockSizeLarge = 4096
)
func ZsyncMake(options *Options) {
err := writeToFile(options)
if err != nil {
log.Fatal(err)
}
}
func writeToFile(options *Options) error {
file, err := os.Open(options.Filename)
if err != nil {
return err
}
fileInfo, err := file.Stat()
if err != nil {
return err
}
fileByte, err := ioutil.ReadFile(options.Filename)
if err != nil {
return err
}
fileLength := len(fileByte)
if options.Url == "" {
options.Url = options.Filename
}
if options.BlockSize == 0 {
if fileLength < 100*1<<20 {
options.BlockSize = blockSizeSmall
} else {
options.BlockSize = blockSizeLarge
}
}
sequenceMatches := 1
if fileLength > options.BlockSize {
sequenceMatches = 2
}
weakChecksumLength := weakChecksumLength(fileLength, options.BlockSize, sequenceMatches)
strongChecksumLength := strongChecksumLength(fileLength, options.BlockSize, sequenceMatches)
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
return fmt.Errorf("fail to compute checksum: %v", err)
}
zsyncFile, err := os.Create(file.Name() + ".zsync")
if err != nil {
return err
}
defer zsyncFile.Close()
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('\n')
buf.WriteString("Filename: ")
buf.WriteString(fileInfo.Name())
buf.WriteByte('\n')
buf.WriteString("MTime: ")
buf.WriteString(fileInfo.ModTime().Format(time.RFC1123Z))
buf.WriteByte('\n')
buf.WriteString("Blocksize: ")
buf.WriteString(strconv.Itoa(options.BlockSize))
buf.WriteByte('\n')
buf.WriteString("Length: ")
buf.WriteString(strconv.Itoa(int(fileLength)))
buf.WriteByte('\n')
buf.WriteString("Hash-Lengths: ")
buf.WriteString(strconv.Itoa(sequenceMatches))
buf.WriteByte(',')
buf.WriteString(strconv.Itoa(weakChecksumLength))
buf.WriteByte(',')
buf.WriteString(strconv.Itoa(strongChecksumLength))
buf.WriteByte('\n')
buf.WriteString("URL: ")
buf.WriteString(options.Url)
buf.WriteByte('\n')
buf.WriteString("SHA-1: ")
buf.WriteString(hex.EncodeToString(fileChecksum))
buf.WriteByte('\n')
buf.WriteByte('\n')
buf.Write(checksum)
return buf.Flush()
}
func computeChecksum(fileByte []byte, blockSize, weakLen, strongLen int) ([]byte, []byte, error) {
reader := bytes.NewReader(fileByte)
checksumBytes := bytes.NewBuffer(nil)
block := make([]byte, blockSize)
unsignedWeakByte := make([]byte, 4)
for {
read, err := reader.Read(block)
if err != nil {
if err == io.EOF {
break
}
return nil, nil, fmt.Errorf("fail to read block: %v", err)
}
if read < blockSize {
blockSlice := block[read:blockSize]
for i := range blockSlice {
blockSlice[i] = byte(0)
}
}
rsum := computeRsum(block)
binary.BigEndian.PutUint32(unsignedWeakByte, rsum)
checksumBytes.Write(unsignedWeakByte[len(unsignedWeakByte)-weakLen:])
strongBytes := md5.Sum(block)
checksumBytes.Write(strongBytes[:strongLen])
}
fileChecksum := sha1.Sum(fileByte)
checksumBytes.Write(fileChecksum[:])
return checksumBytes.Bytes(), fileChecksum[:], nil
}
func computeRsum(block []byte) uint32 {
a, b, l := 0, 0, len(block)
for _, v := range block {
if v < 0 {
v = v & 0xFF
}
a += int(v)
b += l * int(v)
l--
}
return uint32(a<<16) | uint32(b&0xffff)
}
func strongChecksumLength(fileLength, blocksize, sequenceMatches int) int {
// estimated number of bytes to allocate for strong checksum
d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/blocksize)))/math.Log(2) + 20
// reduced number of bits by sequence matches
lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))
// second checksum - not reduced by sequence matches
lSecond := float64((math.Log(float64(1+fileLength/blocksize))/math.Log(2) + 20 + 7.9) / 8)
// return max of two: return no more than 16 bytes (MD4 max)
return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}
func weakChecksumLength(fileLength, blocksize, sequenceMatches int) int {
// estimated number of bytes to allocate for the rolling checksum per formula in
// Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6
// reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
rdc := d / float64(sequenceMatches) / 8
lrdc := int(math.Ceil(rdc))
// enforce max and min values
if lrdc > 4 {
return 4
}
if lrdc < 2 {
return 2
}
return lrdc
}新代码稍微快一点:
goos: linux
goarch: amd64
BenchmarkWriteOld-4 10000 200830 ns/op 40815 B/op 43 allocs/op
BenchmarkWriteNew-4 10000 164985 ns/op 29272 B/op 27 allocs/ophttps://codereview.stackexchange.com/questions/204752
复制相似问题