Making a Zsync file archive with checksums
I'm learning go by doing it. I tried to port the Java's ZsyncMake implementation into Golang.
I also employ the Go's concurrency API with goroutine and channel.
I have some experience in Java, but never work with native language. One immediately coming problem is int
in Golang isn't the same as int32
(since it depends on the platform; Java's int
is 4 byte), thus I need to cast it most of the time.
Here's my code. In some comments I wrote [ASK]
to indicate that I'm not sure if it's a proper way of implementation in Go
package zsync
import (
"bufio"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"goZsyncmake/md4"
"goZsyncmake/zsyncOptions"
"hash"
"io"
"log"
"math"
"os"
"strconv"
"time"
)
var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096
func ZsyncMake(path string, options zsyncOptions.Options) {
checksum, headers, zsyncFilePath := writeToFile(path, options)
zsyncFile, err := os.Create(zsyncFilePath)
if err != nil {
log.Fatal(err)
}
defer zsyncFile.Close()
bfio := bufio.NewWriter(zsyncFile)
_, err = bfio.WriteString(headers)
if err != nil {
log.Fatal(err)
}
_, err = bfio.Write(checksum)
if err != nil {
log.Fatal(err)
}
bfio.Flush()
}
func writeToFile(path string, options zsyncOptions.Options) (byte, string, string) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()
outputFileName := file.Name() + ".zsync"
fileInfo, err := file.Stat()
if err != nil {
log.Fatal(err)
}
opts := calculateMissingValues(options, file)
blockSize := opts.BlockSize
fileLength := fileInfo.Size()
sequenceMatches := 0
if fileLength > int64(options.BlockSize) {
sequenceMatches = 2
} else {
sequenceMatches = 1
}
weakChecksumLength := weakChecksumLength(fileLength, blockSize, sequenceMatches)
strongChecksumLength := strongChecksumLength(fileLength, blockSize, sequenceMatches)
fileDigest := sha1.New()
blockDigest := md4.New()
checksum, fileChecksum := computeChecksum(file, blockSize, fileLength, weakChecksumLength, strongChecksumLength, fileDigest, blockDigest)
strFileChecksum := hex.EncodeToString(fileChecksum)
// [ASK] I suspect I can improve performance here rather than appending string with +
strHeader := "zsync: " + ZSYNC_VERSION + "n" +
"Filename: " + fileInfo.Name() + "n" +
"MTime: " + fileInfo.ModTime().Format(time.RFC1123Z) + "n" +
"Blocksize: " + strconv.Itoa(blockSize) + "n" +
"Length: " + strconv.Itoa(int(fileLength)) + "n" +
"Hash-Lengths: " + strconv.Itoa(sequenceMatches) + "," + strconv.Itoa(weakChecksumLength) + "," + strconv.Itoa(strongChecksumLength) + "n" +
"URL: " + opts.Url + "n" +
"SHA-1: " + strFileChecksum + "nn"
return checksum, strHeader, outputFileName
}
func sha1HashFile(path string, fileChecksumChannel chan byte) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()
hasher := sha1.New()
if _, err := io.Copy(hasher, file); err != nil {
log.Fatal(err)
}
fileChecksumChannel <- hasher.Sum(nil)
}
func computeChecksum(f *os.File, blocksize int, fileLength int64, weakLen int, strongLen int, fileDigest hash.Hash, blockDigest hash.Hash) (byte, byte) {
checksumBytes := make(byte, 0)
block := make(byte, blocksize)
fileChecksumChannel := make(chan byte)
go sha1HashFile(f.Name(), fileChecksumChannel)
for {
read, err := f.Read(block)
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
if read < blocksize {
blockSlice := block[read:blocksize]
for i := range blockSlice {
blockSlice[i] = byte(0)
}
}
rsum := computeRsum(block)
unsignedWeakByte := make(byte, 4)
binary.BigEndian.PutUint32(unsignedWeakByte, uint32(rsum))
tempUnsignedWeakByte := unsignedWeakByte[len(unsignedWeakByte)-weakLen:]
checksumBytes = append(checksumBytes, tempUnsignedWeakByte...)
blockDigest.Reset()
blockDigest.Write(block)
strongBytes := blockDigest.Sum(nil)
tempUnsignedStrongByte := strongBytes[:strongLen]
checksumBytes = append(checksumBytes, tempUnsignedStrongByte...)
}
fileChecksum := <- fileChecksumChannel
checksumBytes = append(checksumBytes, fileChecksum...)
return checksumBytes, fileChecksum
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func strongChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
// estimated number of bytes to allocate for strong checksum
d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/int64(blocksize))))/math.Log(2) + 20
// reduced number of bits by sequence matches
lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))
// second checksum - not reduced by sequence matches
lSecond := float64((math.Log(float64(1+fileLength/int64(blocksize)))/math.Log(2) + 20 + 7.9) / 8)
// return max of two: return no more than 16 bytes (MD4 max)
return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func weakChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
// estimated number of bytes to allocate for the rolling checksum per formula in
// Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6
// reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
rdc := d / float64(sequenceMatches) / 8
lrdc := int(math.Ceil(rdc))
// enforce max and min values
if lrdc > 4 {
return 4
} else {
if lrdc < 2 {
return 2
} else {
return lrdc
}
}
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func computeRsum(block byte) int {
var a int16
var b int16
l := len(block)
for i := 0; i < len(block); i++ {
val := int(unsign(block[i]))
a += int16(val)
b += int16(l * val)
l--
}
x := int(a) << 16
y := int(b) & 0xffff
return int(x) | int(y)
}
func unsign(b byte) uint8 {
if b < 0 {
return b & 0xFF
} else {
return b
}
}
func calculateMissingValues(opts zsyncOptions.Options, f *os.File) zsyncOptions.Options {
if opts.BlockSize == 0 {
opts.BlockSize = calculateDefaultBlockSizeForInputFile(f)
}
if opts.Filename == "" {
opts.Filename = f.Name()
}
if opts.Url == "" {
opts.Url = f.Name()
}
return opts
}
func calculateDefaultBlockSizeForInputFile(f *os.File) int {
fileInfo, err := f.Stat()
if err != nil {
log.Fatal(err)
}
if fileInfo.Size() < 100*1<<20 {
return BLOCK_SIZE_SMALL
} else {
return BLOCK_SIZE_LARGE
}
}
Also, coming from Java background, I get use to modularize everything, including this Options struct onto other file. Am I suppose to modularize it?
package zsyncOptions
type Options struct {
BlockSize int
Filename string
Url string
}
file go casting checksum
add a comment |
I'm learning go by doing it. I tried to port the Java's ZsyncMake implementation into Golang.
I also employ the Go's concurrency API with goroutine and channel.
I have some experience in Java, but never work with native language. One immediately coming problem is int
in Golang isn't the same as int32
(since it depends on the platform; Java's int
is 4 byte), thus I need to cast it most of the time.
Here's my code. In some comments I wrote [ASK]
to indicate that I'm not sure if it's a proper way of implementation in Go
package zsync
import (
"bufio"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"goZsyncmake/md4"
"goZsyncmake/zsyncOptions"
"hash"
"io"
"log"
"math"
"os"
"strconv"
"time"
)
var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096
func ZsyncMake(path string, options zsyncOptions.Options) {
checksum, headers, zsyncFilePath := writeToFile(path, options)
zsyncFile, err := os.Create(zsyncFilePath)
if err != nil {
log.Fatal(err)
}
defer zsyncFile.Close()
bfio := bufio.NewWriter(zsyncFile)
_, err = bfio.WriteString(headers)
if err != nil {
log.Fatal(err)
}
_, err = bfio.Write(checksum)
if err != nil {
log.Fatal(err)
}
bfio.Flush()
}
func writeToFile(path string, options zsyncOptions.Options) (byte, string, string) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()
outputFileName := file.Name() + ".zsync"
fileInfo, err := file.Stat()
if err != nil {
log.Fatal(err)
}
opts := calculateMissingValues(options, file)
blockSize := opts.BlockSize
fileLength := fileInfo.Size()
sequenceMatches := 0
if fileLength > int64(options.BlockSize) {
sequenceMatches = 2
} else {
sequenceMatches = 1
}
weakChecksumLength := weakChecksumLength(fileLength, blockSize, sequenceMatches)
strongChecksumLength := strongChecksumLength(fileLength, blockSize, sequenceMatches)
fileDigest := sha1.New()
blockDigest := md4.New()
checksum, fileChecksum := computeChecksum(file, blockSize, fileLength, weakChecksumLength, strongChecksumLength, fileDigest, blockDigest)
strFileChecksum := hex.EncodeToString(fileChecksum)
// [ASK] I suspect I can improve performance here rather than appending string with +
strHeader := "zsync: " + ZSYNC_VERSION + "n" +
"Filename: " + fileInfo.Name() + "n" +
"MTime: " + fileInfo.ModTime().Format(time.RFC1123Z) + "n" +
"Blocksize: " + strconv.Itoa(blockSize) + "n" +
"Length: " + strconv.Itoa(int(fileLength)) + "n" +
"Hash-Lengths: " + strconv.Itoa(sequenceMatches) + "," + strconv.Itoa(weakChecksumLength) + "," + strconv.Itoa(strongChecksumLength) + "n" +
"URL: " + opts.Url + "n" +
"SHA-1: " + strFileChecksum + "nn"
return checksum, strHeader, outputFileName
}
func sha1HashFile(path string, fileChecksumChannel chan byte) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()
hasher := sha1.New()
if _, err := io.Copy(hasher, file); err != nil {
log.Fatal(err)
}
fileChecksumChannel <- hasher.Sum(nil)
}
func computeChecksum(f *os.File, blocksize int, fileLength int64, weakLen int, strongLen int, fileDigest hash.Hash, blockDigest hash.Hash) (byte, byte) {
checksumBytes := make(byte, 0)
block := make(byte, blocksize)
fileChecksumChannel := make(chan byte)
go sha1HashFile(f.Name(), fileChecksumChannel)
for {
read, err := f.Read(block)
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
if read < blocksize {
blockSlice := block[read:blocksize]
for i := range blockSlice {
blockSlice[i] = byte(0)
}
}
rsum := computeRsum(block)
unsignedWeakByte := make(byte, 4)
binary.BigEndian.PutUint32(unsignedWeakByte, uint32(rsum))
tempUnsignedWeakByte := unsignedWeakByte[len(unsignedWeakByte)-weakLen:]
checksumBytes = append(checksumBytes, tempUnsignedWeakByte...)
blockDigest.Reset()
blockDigest.Write(block)
strongBytes := blockDigest.Sum(nil)
tempUnsignedStrongByte := strongBytes[:strongLen]
checksumBytes = append(checksumBytes, tempUnsignedStrongByte...)
}
fileChecksum := <- fileChecksumChannel
checksumBytes = append(checksumBytes, fileChecksum...)
return checksumBytes, fileChecksum
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func strongChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
// estimated number of bytes to allocate for strong checksum
d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/int64(blocksize))))/math.Log(2) + 20
// reduced number of bits by sequence matches
lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))
// second checksum - not reduced by sequence matches
lSecond := float64((math.Log(float64(1+fileLength/int64(blocksize)))/math.Log(2) + 20 + 7.9) / 8)
// return max of two: return no more than 16 bytes (MD4 max)
return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func weakChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
// estimated number of bytes to allocate for the rolling checksum per formula in
// Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6
// reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
rdc := d / float64(sequenceMatches) / 8
lrdc := int(math.Ceil(rdc))
// enforce max and min values
if lrdc > 4 {
return 4
} else {
if lrdc < 2 {
return 2
} else {
return lrdc
}
}
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func computeRsum(block byte) int {
var a int16
var b int16
l := len(block)
for i := 0; i < len(block); i++ {
val := int(unsign(block[i]))
a += int16(val)
b += int16(l * val)
l--
}
x := int(a) << 16
y := int(b) & 0xffff
return int(x) | int(y)
}
func unsign(b byte) uint8 {
if b < 0 {
return b & 0xFF
} else {
return b
}
}
func calculateMissingValues(opts zsyncOptions.Options, f *os.File) zsyncOptions.Options {
if opts.BlockSize == 0 {
opts.BlockSize = calculateDefaultBlockSizeForInputFile(f)
}
if opts.Filename == "" {
opts.Filename = f.Name()
}
if opts.Url == "" {
opts.Url = f.Name()
}
return opts
}
func calculateDefaultBlockSizeForInputFile(f *os.File) int {
fileInfo, err := f.Stat()
if err != nil {
log.Fatal(err)
}
if fileInfo.Size() < 100*1<<20 {
return BLOCK_SIZE_SMALL
} else {
return BLOCK_SIZE_LARGE
}
}
Also, coming from Java background, I get use to modularize everything, including this Options struct onto other file. Am I suppose to modularize it?
package zsyncOptions
type Options struct {
BlockSize int
Filename string
Url string
}
file go casting checksum
add a comment |
I'm learning go by doing it. I tried to port the Java's ZsyncMake implementation into Golang.
I also employ the Go's concurrency API with goroutine and channel.
I have some experience in Java, but never work with native language. One immediately coming problem is int
in Golang isn't the same as int32
(since it depends on the platform; Java's int
is 4 byte), thus I need to cast it most of the time.
Here's my code. In some comments I wrote [ASK]
to indicate that I'm not sure if it's a proper way of implementation in Go
package zsync
import (
"bufio"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"goZsyncmake/md4"
"goZsyncmake/zsyncOptions"
"hash"
"io"
"log"
"math"
"os"
"strconv"
"time"
)
var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096
func ZsyncMake(path string, options zsyncOptions.Options) {
checksum, headers, zsyncFilePath := writeToFile(path, options)
zsyncFile, err := os.Create(zsyncFilePath)
if err != nil {
log.Fatal(err)
}
defer zsyncFile.Close()
bfio := bufio.NewWriter(zsyncFile)
_, err = bfio.WriteString(headers)
if err != nil {
log.Fatal(err)
}
_, err = bfio.Write(checksum)
if err != nil {
log.Fatal(err)
}
bfio.Flush()
}
func writeToFile(path string, options zsyncOptions.Options) (byte, string, string) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()
outputFileName := file.Name() + ".zsync"
fileInfo, err := file.Stat()
if err != nil {
log.Fatal(err)
}
opts := calculateMissingValues(options, file)
blockSize := opts.BlockSize
fileLength := fileInfo.Size()
sequenceMatches := 0
if fileLength > int64(options.BlockSize) {
sequenceMatches = 2
} else {
sequenceMatches = 1
}
weakChecksumLength := weakChecksumLength(fileLength, blockSize, sequenceMatches)
strongChecksumLength := strongChecksumLength(fileLength, blockSize, sequenceMatches)
fileDigest := sha1.New()
blockDigest := md4.New()
checksum, fileChecksum := computeChecksum(file, blockSize, fileLength, weakChecksumLength, strongChecksumLength, fileDigest, blockDigest)
strFileChecksum := hex.EncodeToString(fileChecksum)
// [ASK] I suspect I can improve performance here rather than appending string with +
strHeader := "zsync: " + ZSYNC_VERSION + "n" +
"Filename: " + fileInfo.Name() + "n" +
"MTime: " + fileInfo.ModTime().Format(time.RFC1123Z) + "n" +
"Blocksize: " + strconv.Itoa(blockSize) + "n" +
"Length: " + strconv.Itoa(int(fileLength)) + "n" +
"Hash-Lengths: " + strconv.Itoa(sequenceMatches) + "," + strconv.Itoa(weakChecksumLength) + "," + strconv.Itoa(strongChecksumLength) + "n" +
"URL: " + opts.Url + "n" +
"SHA-1: " + strFileChecksum + "nn"
return checksum, strHeader, outputFileName
}
func sha1HashFile(path string, fileChecksumChannel chan byte) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()
hasher := sha1.New()
if _, err := io.Copy(hasher, file); err != nil {
log.Fatal(err)
}
fileChecksumChannel <- hasher.Sum(nil)
}
func computeChecksum(f *os.File, blocksize int, fileLength int64, weakLen int, strongLen int, fileDigest hash.Hash, blockDigest hash.Hash) (byte, byte) {
checksumBytes := make(byte, 0)
block := make(byte, blocksize)
fileChecksumChannel := make(chan byte)
go sha1HashFile(f.Name(), fileChecksumChannel)
for {
read, err := f.Read(block)
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
if read < blocksize {
blockSlice := block[read:blocksize]
for i := range blockSlice {
blockSlice[i] = byte(0)
}
}
rsum := computeRsum(block)
unsignedWeakByte := make(byte, 4)
binary.BigEndian.PutUint32(unsignedWeakByte, uint32(rsum))
tempUnsignedWeakByte := unsignedWeakByte[len(unsignedWeakByte)-weakLen:]
checksumBytes = append(checksumBytes, tempUnsignedWeakByte...)
blockDigest.Reset()
blockDigest.Write(block)
strongBytes := blockDigest.Sum(nil)
tempUnsignedStrongByte := strongBytes[:strongLen]
checksumBytes = append(checksumBytes, tempUnsignedStrongByte...)
}
fileChecksum := <- fileChecksumChannel
checksumBytes = append(checksumBytes, fileChecksum...)
return checksumBytes, fileChecksum
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func strongChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
// estimated number of bytes to allocate for strong checksum
d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/int64(blocksize))))/math.Log(2) + 20
// reduced number of bits by sequence matches
lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))
// second checksum - not reduced by sequence matches
lSecond := float64((math.Log(float64(1+fileLength/int64(blocksize)))/math.Log(2) + 20 + 7.9) / 8)
// return max of two: return no more than 16 bytes (MD4 max)
return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func weakChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
// estimated number of bytes to allocate for the rolling checksum per formula in
// Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6
// reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
rdc := d / float64(sequenceMatches) / 8
lrdc := int(math.Ceil(rdc))
// enforce max and min values
if lrdc > 4 {
return 4
} else {
if lrdc < 2 {
return 2
} else {
return lrdc
}
}
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func computeRsum(block byte) int {
var a int16
var b int16
l := len(block)
for i := 0; i < len(block); i++ {
val := int(unsign(block[i]))
a += int16(val)
b += int16(l * val)
l--
}
x := int(a) << 16
y := int(b) & 0xffff
return int(x) | int(y)
}
func unsign(b byte) uint8 {
if b < 0 {
return b & 0xFF
} else {
return b
}
}
func calculateMissingValues(opts zsyncOptions.Options, f *os.File) zsyncOptions.Options {
if opts.BlockSize == 0 {
opts.BlockSize = calculateDefaultBlockSizeForInputFile(f)
}
if opts.Filename == "" {
opts.Filename = f.Name()
}
if opts.Url == "" {
opts.Url = f.Name()
}
return opts
}
func calculateDefaultBlockSizeForInputFile(f *os.File) int {
fileInfo, err := f.Stat()
if err != nil {
log.Fatal(err)
}
if fileInfo.Size() < 100*1<<20 {
return BLOCK_SIZE_SMALL
} else {
return BLOCK_SIZE_LARGE
}
}
Also, coming from Java background, I get use to modularize everything, including this Options struct onto other file. Am I suppose to modularize it?
package zsyncOptions
type Options struct {
BlockSize int
Filename string
Url string
}
file go casting checksum
I'm learning go by doing it. I tried to port the Java's ZsyncMake implementation into Golang.
I also employ the Go's concurrency API with goroutine and channel.
I have some experience in Java, but never work with native language. One immediately coming problem is int
in Golang isn't the same as int32
(since it depends on the platform; Java's int
is 4 byte), thus I need to cast it most of the time.
Here's my code. In some comments I wrote [ASK]
to indicate that I'm not sure if it's a proper way of implementation in Go
package zsync
import (
"bufio"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"goZsyncmake/md4"
"goZsyncmake/zsyncOptions"
"hash"
"io"
"log"
"math"
"os"
"strconv"
"time"
)
var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096
func ZsyncMake(path string, options zsyncOptions.Options) {
checksum, headers, zsyncFilePath := writeToFile(path, options)
zsyncFile, err := os.Create(zsyncFilePath)
if err != nil {
log.Fatal(err)
}
defer zsyncFile.Close()
bfio := bufio.NewWriter(zsyncFile)
_, err = bfio.WriteString(headers)
if err != nil {
log.Fatal(err)
}
_, err = bfio.Write(checksum)
if err != nil {
log.Fatal(err)
}
bfio.Flush()
}
func writeToFile(path string, options zsyncOptions.Options) (byte, string, string) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()
outputFileName := file.Name() + ".zsync"
fileInfo, err := file.Stat()
if err != nil {
log.Fatal(err)
}
opts := calculateMissingValues(options, file)
blockSize := opts.BlockSize
fileLength := fileInfo.Size()
sequenceMatches := 0
if fileLength > int64(options.BlockSize) {
sequenceMatches = 2
} else {
sequenceMatches = 1
}
weakChecksumLength := weakChecksumLength(fileLength, blockSize, sequenceMatches)
strongChecksumLength := strongChecksumLength(fileLength, blockSize, sequenceMatches)
fileDigest := sha1.New()
blockDigest := md4.New()
checksum, fileChecksum := computeChecksum(file, blockSize, fileLength, weakChecksumLength, strongChecksumLength, fileDigest, blockDigest)
strFileChecksum := hex.EncodeToString(fileChecksum)
// [ASK] I suspect I can improve performance here rather than appending string with +
strHeader := "zsync: " + ZSYNC_VERSION + "n" +
"Filename: " + fileInfo.Name() + "n" +
"MTime: " + fileInfo.ModTime().Format(time.RFC1123Z) + "n" +
"Blocksize: " + strconv.Itoa(blockSize) + "n" +
"Length: " + strconv.Itoa(int(fileLength)) + "n" +
"Hash-Lengths: " + strconv.Itoa(sequenceMatches) + "," + strconv.Itoa(weakChecksumLength) + "," + strconv.Itoa(strongChecksumLength) + "n" +
"URL: " + opts.Url + "n" +
"SHA-1: " + strFileChecksum + "nn"
return checksum, strHeader, outputFileName
}
func sha1HashFile(path string, fileChecksumChannel chan byte) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()
hasher := sha1.New()
if _, err := io.Copy(hasher, file); err != nil {
log.Fatal(err)
}
fileChecksumChannel <- hasher.Sum(nil)
}
func computeChecksum(f *os.File, blocksize int, fileLength int64, weakLen int, strongLen int, fileDigest hash.Hash, blockDigest hash.Hash) (byte, byte) {
checksumBytes := make(byte, 0)
block := make(byte, blocksize)
fileChecksumChannel := make(chan byte)
go sha1HashFile(f.Name(), fileChecksumChannel)
for {
read, err := f.Read(block)
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
if read < blocksize {
blockSlice := block[read:blocksize]
for i := range blockSlice {
blockSlice[i] = byte(0)
}
}
rsum := computeRsum(block)
unsignedWeakByte := make(byte, 4)
binary.BigEndian.PutUint32(unsignedWeakByte, uint32(rsum))
tempUnsignedWeakByte := unsignedWeakByte[len(unsignedWeakByte)-weakLen:]
checksumBytes = append(checksumBytes, tempUnsignedWeakByte...)
blockDigest.Reset()
blockDigest.Write(block)
strongBytes := blockDigest.Sum(nil)
tempUnsignedStrongByte := strongBytes[:strongLen]
checksumBytes = append(checksumBytes, tempUnsignedStrongByte...)
}
fileChecksum := <- fileChecksumChannel
checksumBytes = append(checksumBytes, fileChecksum...)
return checksumBytes, fileChecksum
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func strongChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
// estimated number of bytes to allocate for strong checksum
d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/int64(blocksize))))/math.Log(2) + 20
// reduced number of bits by sequence matches
lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))
// second checksum - not reduced by sequence matches
lSecond := float64((math.Log(float64(1+fileLength/int64(blocksize)))/math.Log(2) + 20 + 7.9) / 8)
// return max of two: return no more than 16 bytes (MD4 max)
return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func weakChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
// estimated number of bytes to allocate for the rolling checksum per formula in
// Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6
// reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
rdc := d / float64(sequenceMatches) / 8
lrdc := int(math.Ceil(rdc))
// enforce max and min values
if lrdc > 4 {
return 4
} else {
if lrdc < 2 {
return 2
} else {
return lrdc
}
}
}
// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func computeRsum(block byte) int {
var a int16
var b int16
l := len(block)
for i := 0; i < len(block); i++ {
val := int(unsign(block[i]))
a += int16(val)
b += int16(l * val)
l--
}
x := int(a) << 16
y := int(b) & 0xffff
return int(x) | int(y)
}
func unsign(b byte) uint8 {
if b < 0 {
return b & 0xFF
} else {
return b
}
}
func calculateMissingValues(opts zsyncOptions.Options, f *os.File) zsyncOptions.Options {
if opts.BlockSize == 0 {
opts.BlockSize = calculateDefaultBlockSizeForInputFile(f)
}
if opts.Filename == "" {
opts.Filename = f.Name()
}
if opts.Url == "" {
opts.Url = f.Name()
}
return opts
}
func calculateDefaultBlockSizeForInputFile(f *os.File) int {
fileInfo, err := f.Stat()
if err != nil {
log.Fatal(err)
}
if fileInfo.Size() < 100*1<<20 {
return BLOCK_SIZE_SMALL
} else {
return BLOCK_SIZE_LARGE
}
}
Also, coming from Java background, I get use to modularize everything, including this Options struct onto other file. Am I suppose to modularize it?
package zsyncOptions
type Options struct {
BlockSize int
Filename string
Url string
}
file go casting checksum
file go casting checksum
edited Oct 2 at 13:57
200_success
128k15150412
128k15150412
asked Oct 2 at 7:04
imeluntuk
4114
4114
add a comment |
add a comment |
1 Answer
1
active
oldest
votes
Always return errors
Don't use log.Fatal()
everywhere ! Instead, return the error with some context:
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
return fmt.Errorf("fail to compute checksum: %v", err)
}
Write to the bufio.Writer
directly
You are right, creating the header by appending strings is not the best way. We could instead write
the header content directly to the bufio.writer
:
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('n')
buf.WriteString("Filename: ")
...
Read the file once
Reading a file is expensive, it should be done only once. It is possible to read the content of a file
in a slice of bytes with the io/ioutil
package
fileByte, err := ioutil.ReadFile(options.Filename)
you can then compute it's SHA1 checksum in one line:
fileChecksum := sha1.Sum(fileByte)
and then create a *Reader
from it:
reader := bytes.NewReader(fileByte)
for {
read, err := reader.Read(block)
...
}
This also allow us to get the file length as an int like this:
fileLength := len(fileByte)
Method parameters
Some params are unused or redundant, for example path
and options.Filename
, they should be removed.
You can group params with same type for more readability:
func computeChecksum(fileByte byte, blockSize int, weakLen int, strongLen int) (byte, byte, error) {
can be written like this:
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {
Go also allows named return for more clarity. It's usefull when a method returns two values of the same type like here. Method
signature could be:
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (checksum byte, fileChecksum byte, err error) {
Keep things simple
There is no need for channels and goroutines in this code, it just make the code harder to read. Try to keep the code clean and simple. If you face performance issues, profile the code and optimize the hotspot.
The computeRsum
method could be simplified a bit to return an int32
directly:
func computeRsum(block byte) uint32 {
a, b, l := 0, 0, len(block)
for _, v := range block {
if v < 0 {
v = v & 0xFF
}
a += int(v)
b += l * int(v)
l--
}
return uint32(a<<16) | uint32(b&0xffff)
}
Use const block for constant
this block
var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096
should be rewritten like this:
const (
version = "0.6.2"
blockSizeSmall = 2048
blockSizeLarge = 4096
)
const are immutable, and that's what we want here. Also note that CamelCase is preferred for naming constant in go
final version
package zsync
import (
"bufio"
"bytes"
"crypto/md5"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"log"
"math"
"os"
"strconv"
"time"
)
type Options struct {
BlockSize int
Filename string
Url string
}
const (
version = "0.6.2"
blockSizeSmall = 2048
blockSizeLarge = 4096
)
func ZsyncMake(options *Options) {
err := writeToFile(options)
if err != nil {
log.Fatal(err)
}
}
func writeToFile(options *Options) error {
file, err := os.Open(options.Filename)
if err != nil {
return err
}
fileInfo, err := file.Stat()
if err != nil {
return err
}
fileByte, err := ioutil.ReadFile(options.Filename)
if err != nil {
return err
}
fileLength := len(fileByte)
if options.Url == "" {
options.Url = options.Filename
}
if options.BlockSize == 0 {
if fileLength < 100*1<<20 {
options.BlockSize = blockSizeSmall
} else {
options.BlockSize = blockSizeLarge
}
}
sequenceMatches := 1
if fileLength > options.BlockSize {
sequenceMatches = 2
}
weakChecksumLength := weakChecksumLength(fileLength, options.BlockSize, sequenceMatches)
strongChecksumLength := strongChecksumLength(fileLength, options.BlockSize, sequenceMatches)
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
return fmt.Errorf("fail to compute checksum: %v", err)
}
zsyncFile, err := os.Create(file.Name() + ".zsync")
if err != nil {
return err
}
defer zsyncFile.Close()
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('n')
buf.WriteString("Filename: ")
buf.WriteString(fileInfo.Name())
buf.WriteByte('n')
buf.WriteString("MTime: ")
buf.WriteString(fileInfo.ModTime().Format(time.RFC1123Z))
buf.WriteByte('n')
buf.WriteString("Blocksize: ")
buf.WriteString(strconv.Itoa(options.BlockSize))
buf.WriteByte('n')
buf.WriteString("Length: ")
buf.WriteString(strconv.Itoa(int(fileLength)))
buf.WriteByte('n')
buf.WriteString("Hash-Lengths: ")
buf.WriteString(strconv.Itoa(sequenceMatches))
buf.WriteByte(',')
buf.WriteString(strconv.Itoa(weakChecksumLength))
buf.WriteByte(',')
buf.WriteString(strconv.Itoa(strongChecksumLength))
buf.WriteByte('n')
buf.WriteString("URL: ")
buf.WriteString(options.Url)
buf.WriteByte('n')
buf.WriteString("SHA-1: ")
buf.WriteString(hex.EncodeToString(fileChecksum))
buf.WriteByte('n')
buf.WriteByte('n')
buf.Write(checksum)
return buf.Flush()
}
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {
reader := bytes.NewReader(fileByte)
checksumBytes := bytes.NewBuffer(nil)
block := make(byte, blockSize)
unsignedWeakByte := make(byte, 4)
for {
read, err := reader.Read(block)
if err != nil {
if err == io.EOF {
break
}
return nil, nil, fmt.Errorf("fail to read block: %v", err)
}
if read < blockSize {
blockSlice := block[read:blockSize]
for i := range blockSlice {
blockSlice[i] = byte(0)
}
}
rsum := computeRsum(block)
binary.BigEndian.PutUint32(unsignedWeakByte, rsum)
checksumBytes.Write(unsignedWeakByte[len(unsignedWeakByte)-weakLen:])
strongBytes := md5.Sum(block)
checksumBytes.Write(strongBytes[:strongLen])
}
fileChecksum := sha1.Sum(fileByte)
checksumBytes.Write(fileChecksum[:])
return checksumBytes.Bytes(), fileChecksum[:], nil
}
func computeRsum(block byte) uint32 {
a, b, l := 0, 0, len(block)
for _, v := range block {
if v < 0 {
v = v & 0xFF
}
a += int(v)
b += l * int(v)
l--
}
return uint32(a<<16) | uint32(b&0xffff)
}
func strongChecksumLength(fileLength, blocksize, sequenceMatches int) int {
// estimated number of bytes to allocate for strong checksum
d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/blocksize)))/math.Log(2) + 20
// reduced number of bits by sequence matches
lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))
// second checksum - not reduced by sequence matches
lSecond := float64((math.Log(float64(1+fileLength/blocksize))/math.Log(2) + 20 + 7.9) / 8)
// return max of two: return no more than 16 bytes (MD4 max)
return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}
func weakChecksumLength(fileLength, blocksize, sequenceMatches int) int {
// estimated number of bytes to allocate for the rolling checksum per formula in
// Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6
// reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
rdc := d / float64(sequenceMatches) / 8
lrdc := int(math.Ceil(rdc))
// enforce max and min values
if lrdc > 4 {
return 4
}
if lrdc < 2 {
return 2
}
return lrdc
}
performance
New code is slightly faster:
goos: linux
goarch: amd64
BenchmarkWriteOld-4 10000 200830 ns/op 40815 B/op 43 allocs/op
BenchmarkWriteNew-4 10000 164985 ns/op 29272 B/op 27 allocs/op
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f204752%2fmaking-a-zsync-file-archive-with-checksums%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
Always return errors
Don't use log.Fatal()
everywhere ! Instead, return the error with some context:
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
return fmt.Errorf("fail to compute checksum: %v", err)
}
Write to the bufio.Writer
directly
You are right, creating the header by appending strings is not the best way. We could instead write
the header content directly to the bufio.writer
:
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('n')
buf.WriteString("Filename: ")
...
Read the file once
Reading a file is expensive, it should be done only once. It is possible to read the content of a file
in a slice of bytes with the io/ioutil
package
fileByte, err := ioutil.ReadFile(options.Filename)
you can then compute it's SHA1 checksum in one line:
fileChecksum := sha1.Sum(fileByte)
and then create a *Reader
from it:
reader := bytes.NewReader(fileByte)
for {
read, err := reader.Read(block)
...
}
This also allow us to get the file length as an int like this:
fileLength := len(fileByte)
Method parameters
Some params are unused or redundant, for example path
and options.Filename
, they should be removed.
You can group params with same type for more readability:
func computeChecksum(fileByte byte, blockSize int, weakLen int, strongLen int) (byte, byte, error) {
can be written like this:
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {
Go also allows named return for more clarity. It's usefull when a method returns two values of the same type like here. Method
signature could be:
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (checksum byte, fileChecksum byte, err error) {
Keep things simple
There is no need for channels and goroutines in this code, it just make the code harder to read. Try to keep the code clean and simple. If you face performance issues, profile the code and optimize the hotspot.
The computeRsum
method could be simplified a bit to return an int32
directly:
func computeRsum(block byte) uint32 {
a, b, l := 0, 0, len(block)
for _, v := range block {
if v < 0 {
v = v & 0xFF
}
a += int(v)
b += l * int(v)
l--
}
return uint32(a<<16) | uint32(b&0xffff)
}
Use const block for constant
this block
var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096
should be rewritten like this:
const (
version = "0.6.2"
blockSizeSmall = 2048
blockSizeLarge = 4096
)
const are immutable, and that's what we want here. Also note that CamelCase is preferred for naming constant in go
final version
package zsync
import (
"bufio"
"bytes"
"crypto/md5"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"log"
"math"
"os"
"strconv"
"time"
)
type Options struct {
BlockSize int
Filename string
Url string
}
const (
version = "0.6.2"
blockSizeSmall = 2048
blockSizeLarge = 4096
)
func ZsyncMake(options *Options) {
err := writeToFile(options)
if err != nil {
log.Fatal(err)
}
}
func writeToFile(options *Options) error {
file, err := os.Open(options.Filename)
if err != nil {
return err
}
fileInfo, err := file.Stat()
if err != nil {
return err
}
fileByte, err := ioutil.ReadFile(options.Filename)
if err != nil {
return err
}
fileLength := len(fileByte)
if options.Url == "" {
options.Url = options.Filename
}
if options.BlockSize == 0 {
if fileLength < 100*1<<20 {
options.BlockSize = blockSizeSmall
} else {
options.BlockSize = blockSizeLarge
}
}
sequenceMatches := 1
if fileLength > options.BlockSize {
sequenceMatches = 2
}
weakChecksumLength := weakChecksumLength(fileLength, options.BlockSize, sequenceMatches)
strongChecksumLength := strongChecksumLength(fileLength, options.BlockSize, sequenceMatches)
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
return fmt.Errorf("fail to compute checksum: %v", err)
}
zsyncFile, err := os.Create(file.Name() + ".zsync")
if err != nil {
return err
}
defer zsyncFile.Close()
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('n')
buf.WriteString("Filename: ")
buf.WriteString(fileInfo.Name())
buf.WriteByte('n')
buf.WriteString("MTime: ")
buf.WriteString(fileInfo.ModTime().Format(time.RFC1123Z))
buf.WriteByte('n')
buf.WriteString("Blocksize: ")
buf.WriteString(strconv.Itoa(options.BlockSize))
buf.WriteByte('n')
buf.WriteString("Length: ")
buf.WriteString(strconv.Itoa(int(fileLength)))
buf.WriteByte('n')
buf.WriteString("Hash-Lengths: ")
buf.WriteString(strconv.Itoa(sequenceMatches))
buf.WriteByte(',')
buf.WriteString(strconv.Itoa(weakChecksumLength))
buf.WriteByte(',')
buf.WriteString(strconv.Itoa(strongChecksumLength))
buf.WriteByte('n')
buf.WriteString("URL: ")
buf.WriteString(options.Url)
buf.WriteByte('n')
buf.WriteString("SHA-1: ")
buf.WriteString(hex.EncodeToString(fileChecksum))
buf.WriteByte('n')
buf.WriteByte('n')
buf.Write(checksum)
return buf.Flush()
}
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {
reader := bytes.NewReader(fileByte)
checksumBytes := bytes.NewBuffer(nil)
block := make(byte, blockSize)
unsignedWeakByte := make(byte, 4)
for {
read, err := reader.Read(block)
if err != nil {
if err == io.EOF {
break
}
return nil, nil, fmt.Errorf("fail to read block: %v", err)
}
if read < blockSize {
blockSlice := block[read:blockSize]
for i := range blockSlice {
blockSlice[i] = byte(0)
}
}
rsum := computeRsum(block)
binary.BigEndian.PutUint32(unsignedWeakByte, rsum)
checksumBytes.Write(unsignedWeakByte[len(unsignedWeakByte)-weakLen:])
strongBytes := md5.Sum(block)
checksumBytes.Write(strongBytes[:strongLen])
}
fileChecksum := sha1.Sum(fileByte)
checksumBytes.Write(fileChecksum[:])
return checksumBytes.Bytes(), fileChecksum[:], nil
}
func computeRsum(block byte) uint32 {
a, b, l := 0, 0, len(block)
for _, v := range block {
if v < 0 {
v = v & 0xFF
}
a += int(v)
b += l * int(v)
l--
}
return uint32(a<<16) | uint32(b&0xffff)
}
func strongChecksumLength(fileLength, blocksize, sequenceMatches int) int {
// estimated number of bytes to allocate for strong checksum
d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/blocksize)))/math.Log(2) + 20
// reduced number of bits by sequence matches
lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))
// second checksum - not reduced by sequence matches
lSecond := float64((math.Log(float64(1+fileLength/blocksize))/math.Log(2) + 20 + 7.9) / 8)
// return max of two: return no more than 16 bytes (MD4 max)
return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}
func weakChecksumLength(fileLength, blocksize, sequenceMatches int) int {
// estimated number of bytes to allocate for the rolling checksum per formula in
// Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6
// reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
rdc := d / float64(sequenceMatches) / 8
lrdc := int(math.Ceil(rdc))
// enforce max and min values
if lrdc > 4 {
return 4
}
if lrdc < 2 {
return 2
}
return lrdc
}
performance
New code is slightly faster:
goos: linux
goarch: amd64
BenchmarkWriteOld-4 10000 200830 ns/op 40815 B/op 43 allocs/op
BenchmarkWriteNew-4 10000 164985 ns/op 29272 B/op 27 allocs/op
add a comment |
Always return errors
Don't use log.Fatal()
everywhere ! Instead, return the error with some context:
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
return fmt.Errorf("fail to compute checksum: %v", err)
}
Write to the bufio.Writer
directly
You are right, creating the header by appending strings is not the best way. We could instead write
the header content directly to the bufio.writer
:
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('n')
buf.WriteString("Filename: ")
...
Read the file once
Reading a file is expensive, it should be done only once. It is possible to read the content of a file
in a slice of bytes with the io/ioutil
package
fileByte, err := ioutil.ReadFile(options.Filename)
you can then compute it's SHA1 checksum in one line:
fileChecksum := sha1.Sum(fileByte)
and then create a *Reader
from it:
reader := bytes.NewReader(fileByte)
for {
read, err := reader.Read(block)
...
}
This also allow us to get the file length as an int like this:
fileLength := len(fileByte)
Method parameters
Some params are unused or redundant, for example path
and options.Filename
, they should be removed.
You can group params with same type for more readability:
func computeChecksum(fileByte byte, blockSize int, weakLen int, strongLen int) (byte, byte, error) {
can be written like this:
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {
Go also allows named return for more clarity. It's usefull when a method returns two values of the same type like here. Method
signature could be:
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (checksum byte, fileChecksum byte, err error) {
Keep things simple
There is no need for channels and goroutines in this code, it just make the code harder to read. Try to keep the code clean and simple. If you face performance issues, profile the code and optimize the hotspot.
The computeRsum
method could be simplified a bit to return an int32
directly:
func computeRsum(block byte) uint32 {
a, b, l := 0, 0, len(block)
for _, v := range block {
if v < 0 {
v = v & 0xFF
}
a += int(v)
b += l * int(v)
l--
}
return uint32(a<<16) | uint32(b&0xffff)
}
Use const block for constant
this block
var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096
should be rewritten like this:
const (
version = "0.6.2"
blockSizeSmall = 2048
blockSizeLarge = 4096
)
const are immutable, and that's what we want here. Also note that CamelCase is preferred for naming constant in go
final version
package zsync
import (
"bufio"
"bytes"
"crypto/md5"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"log"
"math"
"os"
"strconv"
"time"
)
type Options struct {
BlockSize int
Filename string
Url string
}
const (
version = "0.6.2"
blockSizeSmall = 2048
blockSizeLarge = 4096
)
func ZsyncMake(options *Options) {
err := writeToFile(options)
if err != nil {
log.Fatal(err)
}
}
func writeToFile(options *Options) error {
file, err := os.Open(options.Filename)
if err != nil {
return err
}
fileInfo, err := file.Stat()
if err != nil {
return err
}
fileByte, err := ioutil.ReadFile(options.Filename)
if err != nil {
return err
}
fileLength := len(fileByte)
if options.Url == "" {
options.Url = options.Filename
}
if options.BlockSize == 0 {
if fileLength < 100*1<<20 {
options.BlockSize = blockSizeSmall
} else {
options.BlockSize = blockSizeLarge
}
}
sequenceMatches := 1
if fileLength > options.BlockSize {
sequenceMatches = 2
}
weakChecksumLength := weakChecksumLength(fileLength, options.BlockSize, sequenceMatches)
strongChecksumLength := strongChecksumLength(fileLength, options.BlockSize, sequenceMatches)
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
return fmt.Errorf("fail to compute checksum: %v", err)
}
zsyncFile, err := os.Create(file.Name() + ".zsync")
if err != nil {
return err
}
defer zsyncFile.Close()
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('n')
buf.WriteString("Filename: ")
buf.WriteString(fileInfo.Name())
buf.WriteByte('n')
buf.WriteString("MTime: ")
buf.WriteString(fileInfo.ModTime().Format(time.RFC1123Z))
buf.WriteByte('n')
buf.WriteString("Blocksize: ")
buf.WriteString(strconv.Itoa(options.BlockSize))
buf.WriteByte('n')
buf.WriteString("Length: ")
buf.WriteString(strconv.Itoa(int(fileLength)))
buf.WriteByte('n')
buf.WriteString("Hash-Lengths: ")
buf.WriteString(strconv.Itoa(sequenceMatches))
buf.WriteByte(',')
buf.WriteString(strconv.Itoa(weakChecksumLength))
buf.WriteByte(',')
buf.WriteString(strconv.Itoa(strongChecksumLength))
buf.WriteByte('n')
buf.WriteString("URL: ")
buf.WriteString(options.Url)
buf.WriteByte('n')
buf.WriteString("SHA-1: ")
buf.WriteString(hex.EncodeToString(fileChecksum))
buf.WriteByte('n')
buf.WriteByte('n')
buf.Write(checksum)
return buf.Flush()
}
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {
reader := bytes.NewReader(fileByte)
checksumBytes := bytes.NewBuffer(nil)
block := make(byte, blockSize)
unsignedWeakByte := make(byte, 4)
for {
read, err := reader.Read(block)
if err != nil {
if err == io.EOF {
break
}
return nil, nil, fmt.Errorf("fail to read block: %v", err)
}
if read < blockSize {
blockSlice := block[read:blockSize]
for i := range blockSlice {
blockSlice[i] = byte(0)
}
}
rsum := computeRsum(block)
binary.BigEndian.PutUint32(unsignedWeakByte, rsum)
checksumBytes.Write(unsignedWeakByte[len(unsignedWeakByte)-weakLen:])
strongBytes := md5.Sum(block)
checksumBytes.Write(strongBytes[:strongLen])
}
fileChecksum := sha1.Sum(fileByte)
checksumBytes.Write(fileChecksum[:])
return checksumBytes.Bytes(), fileChecksum[:], nil
}
func computeRsum(block byte) uint32 {
a, b, l := 0, 0, len(block)
for _, v := range block {
if v < 0 {
v = v & 0xFF
}
a += int(v)
b += l * int(v)
l--
}
return uint32(a<<16) | uint32(b&0xffff)
}
func strongChecksumLength(fileLength, blocksize, sequenceMatches int) int {
// estimated number of bytes to allocate for strong checksum
d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/blocksize)))/math.Log(2) + 20
// reduced number of bits by sequence matches
lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))
// second checksum - not reduced by sequence matches
lSecond := float64((math.Log(float64(1+fileLength/blocksize))/math.Log(2) + 20 + 7.9) / 8)
// return max of two: return no more than 16 bytes (MD4 max)
return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}
func weakChecksumLength(fileLength, blocksize, sequenceMatches int) int {
// estimated number of bytes to allocate for the rolling checksum per formula in
// Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6
// reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
rdc := d / float64(sequenceMatches) / 8
lrdc := int(math.Ceil(rdc))
// enforce max and min values
if lrdc > 4 {
return 4
}
if lrdc < 2 {
return 2
}
return lrdc
}
performance
New code is slightly faster:
goos: linux
goarch: amd64
BenchmarkWriteOld-4 10000 200830 ns/op 40815 B/op 43 allocs/op
BenchmarkWriteNew-4 10000 164985 ns/op 29272 B/op 27 allocs/op
add a comment |
Always return errors
Don't use log.Fatal()
everywhere ! Instead, return the error with some context:
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
return fmt.Errorf("fail to compute checksum: %v", err)
}
Write to the bufio.Writer
directly
You are right, creating the header by appending strings is not the best way. We could instead write
the header content directly to the bufio.writer
:
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('n')
buf.WriteString("Filename: ")
...
Read the file once
Reading a file is expensive, it should be done only once. It is possible to read the content of a file
in a slice of bytes with the io/ioutil
package
fileByte, err := ioutil.ReadFile(options.Filename)
you can then compute it's SHA1 checksum in one line:
fileChecksum := sha1.Sum(fileByte)
and then create a *Reader
from it:
reader := bytes.NewReader(fileByte)
for {
read, err := reader.Read(block)
...
}
This also allow us to get the file length as an int like this:
fileLength := len(fileByte)
Method parameters
Some params are unused or redundant, for example path
and options.Filename
, they should be removed.
You can group params with same type for more readability:
func computeChecksum(fileByte byte, blockSize int, weakLen int, strongLen int) (byte, byte, error) {
can be written like this:
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {
Go also allows named return for more clarity. It's usefull when a method returns two values of the same type like here. Method
signature could be:
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (checksum byte, fileChecksum byte, err error) {
Keep things simple
There is no need for channels and goroutines in this code, it just make the code harder to read. Try to keep the code clean and simple. If you face performance issues, profile the code and optimize the hotspot.
The computeRsum
method could be simplified a bit to return an int32
directly:
func computeRsum(block byte) uint32 {
a, b, l := 0, 0, len(block)
for _, v := range block {
if v < 0 {
v = v & 0xFF
}
a += int(v)
b += l * int(v)
l--
}
return uint32(a<<16) | uint32(b&0xffff)
}
Use const block for constant
this block
var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096
should be rewritten like this:
const (
version = "0.6.2"
blockSizeSmall = 2048
blockSizeLarge = 4096
)
const are immutable, and that's what we want here. Also note that CamelCase is preferred for naming constant in go
final version
package zsync
import (
"bufio"
"bytes"
"crypto/md5"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"log"
"math"
"os"
"strconv"
"time"
)
type Options struct {
BlockSize int
Filename string
Url string
}
const (
version = "0.6.2"
blockSizeSmall = 2048
blockSizeLarge = 4096
)
func ZsyncMake(options *Options) {
err := writeToFile(options)
if err != nil {
log.Fatal(err)
}
}
func writeToFile(options *Options) error {
file, err := os.Open(options.Filename)
if err != nil {
return err
}
fileInfo, err := file.Stat()
if err != nil {
return err
}
fileByte, err := ioutil.ReadFile(options.Filename)
if err != nil {
return err
}
fileLength := len(fileByte)
if options.Url == "" {
options.Url = options.Filename
}
if options.BlockSize == 0 {
if fileLength < 100*1<<20 {
options.BlockSize = blockSizeSmall
} else {
options.BlockSize = blockSizeLarge
}
}
sequenceMatches := 1
if fileLength > options.BlockSize {
sequenceMatches = 2
}
weakChecksumLength := weakChecksumLength(fileLength, options.BlockSize, sequenceMatches)
strongChecksumLength := strongChecksumLength(fileLength, options.BlockSize, sequenceMatches)
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
return fmt.Errorf("fail to compute checksum: %v", err)
}
zsyncFile, err := os.Create(file.Name() + ".zsync")
if err != nil {
return err
}
defer zsyncFile.Close()
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('n')
buf.WriteString("Filename: ")
buf.WriteString(fileInfo.Name())
buf.WriteByte('n')
buf.WriteString("MTime: ")
buf.WriteString(fileInfo.ModTime().Format(time.RFC1123Z))
buf.WriteByte('n')
buf.WriteString("Blocksize: ")
buf.WriteString(strconv.Itoa(options.BlockSize))
buf.WriteByte('n')
buf.WriteString("Length: ")
buf.WriteString(strconv.Itoa(int(fileLength)))
buf.WriteByte('n')
buf.WriteString("Hash-Lengths: ")
buf.WriteString(strconv.Itoa(sequenceMatches))
buf.WriteByte(',')
buf.WriteString(strconv.Itoa(weakChecksumLength))
buf.WriteByte(',')
buf.WriteString(strconv.Itoa(strongChecksumLength))
buf.WriteByte('n')
buf.WriteString("URL: ")
buf.WriteString(options.Url)
buf.WriteByte('n')
buf.WriteString("SHA-1: ")
buf.WriteString(hex.EncodeToString(fileChecksum))
buf.WriteByte('n')
buf.WriteByte('n')
buf.Write(checksum)
return buf.Flush()
}
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {
reader := bytes.NewReader(fileByte)
checksumBytes := bytes.NewBuffer(nil)
block := make(byte, blockSize)
unsignedWeakByte := make(byte, 4)
for {
read, err := reader.Read(block)
if err != nil {
if err == io.EOF {
break
}
return nil, nil, fmt.Errorf("fail to read block: %v", err)
}
if read < blockSize {
blockSlice := block[read:blockSize]
for i := range blockSlice {
blockSlice[i] = byte(0)
}
}
rsum := computeRsum(block)
binary.BigEndian.PutUint32(unsignedWeakByte, rsum)
checksumBytes.Write(unsignedWeakByte[len(unsignedWeakByte)-weakLen:])
strongBytes := md5.Sum(block)
checksumBytes.Write(strongBytes[:strongLen])
}
fileChecksum := sha1.Sum(fileByte)
checksumBytes.Write(fileChecksum[:])
return checksumBytes.Bytes(), fileChecksum[:], nil
}
func computeRsum(block byte) uint32 {
a, b, l := 0, 0, len(block)
for _, v := range block {
if v < 0 {
v = v & 0xFF
}
a += int(v)
b += l * int(v)
l--
}
return uint32(a<<16) | uint32(b&0xffff)
}
func strongChecksumLength(fileLength, blocksize, sequenceMatches int) int {
// estimated number of bytes to allocate for strong checksum
d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/blocksize)))/math.Log(2) + 20
// reduced number of bits by sequence matches
lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))
// second checksum - not reduced by sequence matches
lSecond := float64((math.Log(float64(1+fileLength/blocksize))/math.Log(2) + 20 + 7.9) / 8)
// return max of two: return no more than 16 bytes (MD4 max)
return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}
func weakChecksumLength(fileLength, blocksize, sequenceMatches int) int {
// estimated number of bytes to allocate for the rolling checksum per formula in
// Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6
// reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
rdc := d / float64(sequenceMatches) / 8
lrdc := int(math.Ceil(rdc))
// enforce max and min values
if lrdc > 4 {
return 4
}
if lrdc < 2 {
return 2
}
return lrdc
}
performance
New code is slightly faster:
goos: linux
goarch: amd64
BenchmarkWriteOld-4 10000 200830 ns/op 40815 B/op 43 allocs/op
BenchmarkWriteNew-4 10000 164985 ns/op 29272 B/op 27 allocs/op
Always return errors
Don't use log.Fatal()
everywhere ! Instead, return the error with some context:
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
return fmt.Errorf("fail to compute checksum: %v", err)
}
Write to the bufio.Writer
directly
You are right, creating the header by appending strings is not the best way. We could instead write
the header content directly to the bufio.writer
:
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('n')
buf.WriteString("Filename: ")
...
Read the file once
Reading a file is expensive, it should be done only once. It is possible to read the content of a file
in a slice of bytes with the io/ioutil
package
fileByte, err := ioutil.ReadFile(options.Filename)
you can then compute it's SHA1 checksum in one line:
fileChecksum := sha1.Sum(fileByte)
and then create a *Reader
from it:
reader := bytes.NewReader(fileByte)
for {
read, err := reader.Read(block)
...
}
This also allow us to get the file length as an int like this:
fileLength := len(fileByte)
Method parameters
Some params are unused or redundant, for example path
and options.Filename
, they should be removed.
You can group params with same type for more readability:
func computeChecksum(fileByte byte, blockSize int, weakLen int, strongLen int) (byte, byte, error) {
can be written like this:
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {
Go also allows named return for more clarity. It's usefull when a method returns two values of the same type like here. Method
signature could be:
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (checksum byte, fileChecksum byte, err error) {
Keep things simple
There is no need for channels and goroutines in this code, it just make the code harder to read. Try to keep the code clean and simple. If you face performance issues, profile the code and optimize the hotspot.
The computeRsum
method could be simplified a bit to return an int32
directly:
func computeRsum(block byte) uint32 {
a, b, l := 0, 0, len(block)
for _, v := range block {
if v < 0 {
v = v & 0xFF
}
a += int(v)
b += l * int(v)
l--
}
return uint32(a<<16) | uint32(b&0xffff)
}
Use const block for constant
this block
var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096
should be rewritten like this:
const (
version = "0.6.2"
blockSizeSmall = 2048
blockSizeLarge = 4096
)
const are immutable, and that's what we want here. Also note that CamelCase is preferred for naming constant in go
final version
package zsync
import (
"bufio"
"bytes"
"crypto/md5"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"log"
"math"
"os"
"strconv"
"time"
)
type Options struct {
BlockSize int
Filename string
Url string
}
const (
version = "0.6.2"
blockSizeSmall = 2048
blockSizeLarge = 4096
)
func ZsyncMake(options *Options) {
err := writeToFile(options)
if err != nil {
log.Fatal(err)
}
}
func writeToFile(options *Options) error {
file, err := os.Open(options.Filename)
if err != nil {
return err
}
fileInfo, err := file.Stat()
if err != nil {
return err
}
fileByte, err := ioutil.ReadFile(options.Filename)
if err != nil {
return err
}
fileLength := len(fileByte)
if options.Url == "" {
options.Url = options.Filename
}
if options.BlockSize == 0 {
if fileLength < 100*1<<20 {
options.BlockSize = blockSizeSmall
} else {
options.BlockSize = blockSizeLarge
}
}
sequenceMatches := 1
if fileLength > options.BlockSize {
sequenceMatches = 2
}
weakChecksumLength := weakChecksumLength(fileLength, options.BlockSize, sequenceMatches)
strongChecksumLength := strongChecksumLength(fileLength, options.BlockSize, sequenceMatches)
checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
if err != nil {
return fmt.Errorf("fail to compute checksum: %v", err)
}
zsyncFile, err := os.Create(file.Name() + ".zsync")
if err != nil {
return err
}
defer zsyncFile.Close()
buf := bufio.NewWriter(zsyncFile)
buf.WriteString("zsync: ")
buf.WriteString(version)
buf.WriteByte('n')
buf.WriteString("Filename: ")
buf.WriteString(fileInfo.Name())
buf.WriteByte('n')
buf.WriteString("MTime: ")
buf.WriteString(fileInfo.ModTime().Format(time.RFC1123Z))
buf.WriteByte('n')
buf.WriteString("Blocksize: ")
buf.WriteString(strconv.Itoa(options.BlockSize))
buf.WriteByte('n')
buf.WriteString("Length: ")
buf.WriteString(strconv.Itoa(int(fileLength)))
buf.WriteByte('n')
buf.WriteString("Hash-Lengths: ")
buf.WriteString(strconv.Itoa(sequenceMatches))
buf.WriteByte(',')
buf.WriteString(strconv.Itoa(weakChecksumLength))
buf.WriteByte(',')
buf.WriteString(strconv.Itoa(strongChecksumLength))
buf.WriteByte('n')
buf.WriteString("URL: ")
buf.WriteString(options.Url)
buf.WriteByte('n')
buf.WriteString("SHA-1: ")
buf.WriteString(hex.EncodeToString(fileChecksum))
buf.WriteByte('n')
buf.WriteByte('n')
buf.Write(checksum)
return buf.Flush()
}
func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {
reader := bytes.NewReader(fileByte)
checksumBytes := bytes.NewBuffer(nil)
block := make(byte, blockSize)
unsignedWeakByte := make(byte, 4)
for {
read, err := reader.Read(block)
if err != nil {
if err == io.EOF {
break
}
return nil, nil, fmt.Errorf("fail to read block: %v", err)
}
if read < blockSize {
blockSlice := block[read:blockSize]
for i := range blockSlice {
blockSlice[i] = byte(0)
}
}
rsum := computeRsum(block)
binary.BigEndian.PutUint32(unsignedWeakByte, rsum)
checksumBytes.Write(unsignedWeakByte[len(unsignedWeakByte)-weakLen:])
strongBytes := md5.Sum(block)
checksumBytes.Write(strongBytes[:strongLen])
}
fileChecksum := sha1.Sum(fileByte)
checksumBytes.Write(fileChecksum[:])
return checksumBytes.Bytes(), fileChecksum[:], nil
}
func computeRsum(block byte) uint32 {
a, b, l := 0, 0, len(block)
for _, v := range block {
if v < 0 {
v = v & 0xFF
}
a += int(v)
b += l * int(v)
l--
}
return uint32(a<<16) | uint32(b&0xffff)
}
func strongChecksumLength(fileLength, blocksize, sequenceMatches int) int {
// estimated number of bytes to allocate for strong checksum
d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/blocksize)))/math.Log(2) + 20
// reduced number of bits by sequence matches
lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))
// second checksum - not reduced by sequence matches
lSecond := float64((math.Log(float64(1+fileLength/blocksize))/math.Log(2) + 20 + 7.9) / 8)
// return max of two: return no more than 16 bytes (MD4 max)
return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}
func weakChecksumLength(fileLength, blocksize, sequenceMatches int) int {
// estimated number of bytes to allocate for the rolling checksum per formula in
// Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6
// reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
rdc := d / float64(sequenceMatches) / 8
lrdc := int(math.Ceil(rdc))
// enforce max and min values
if lrdc > 4 {
return 4
}
if lrdc < 2 {
return 2
}
return lrdc
}
performance
New code is slightly faster:
goos: linux
goarch: amd64
BenchmarkWriteOld-4 10000 200830 ns/op 40815 B/op 43 allocs/op
BenchmarkWriteNew-4 10000 164985 ns/op 29272 B/op 27 allocs/op
answered 1 hour ago
felix
71839
71839
add a comment |
add a comment |
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f204752%2fmaking-a-zsync-file-archive-with-checksums%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown