Making a Zsync file archive with checksums












3














I'm learning go by doing it. I tried to port the Java's ZsyncMake implementation into Golang.
I also employ the Go's concurrency API with goroutine and channel.
I have some experience in Java, but never work with native language. One immediately coming problem is int in Golang isn't the same as int32 (since it depends on the platform; Java's int is 4 byte), thus I need to cast it most of the time.



Here's my code. In some comments I wrote [ASK] to indicate that I'm not sure if it's a proper way of implementation in Go



package zsync

import (
"bufio"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"goZsyncmake/md4"
"goZsyncmake/zsyncOptions"
"hash"
"io"
"log"
"math"
"os"
"strconv"
"time"
)

var ZSYNC_VERSION = "0.6.2"
var BLOCK_SIZE_SMALL = 2048
var BLOCK_SIZE_LARGE = 4096

func ZsyncMake(path string, options zsyncOptions.Options) {
checksum, headers, zsyncFilePath := writeToFile(path, options)
zsyncFile, err := os.Create(zsyncFilePath)
if err != nil {
log.Fatal(err)
}
defer zsyncFile.Close()

bfio := bufio.NewWriter(zsyncFile)
_, err = bfio.WriteString(headers)
if err != nil {
log.Fatal(err)
}

_, err = bfio.Write(checksum)
if err != nil {
log.Fatal(err)
}

bfio.Flush()
}

func writeToFile(path string, options zsyncOptions.Options) (byte, string, string) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()

outputFileName := file.Name() + ".zsync"

fileInfo, err := file.Stat()
if err != nil {
log.Fatal(err)
}

opts := calculateMissingValues(options, file)

blockSize := opts.BlockSize
fileLength := fileInfo.Size()
sequenceMatches := 0
if fileLength > int64(options.BlockSize) {
sequenceMatches = 2
} else {
sequenceMatches = 1
}
weakChecksumLength := weakChecksumLength(fileLength, blockSize, sequenceMatches)
strongChecksumLength := strongChecksumLength(fileLength, blockSize, sequenceMatches)

fileDigest := sha1.New()
blockDigest := md4.New()

checksum, fileChecksum := computeChecksum(file, blockSize, fileLength, weakChecksumLength, strongChecksumLength, fileDigest, blockDigest)
strFileChecksum := hex.EncodeToString(fileChecksum)

// [ASK] I suspect I can improve performance here rather than appending string with +
strHeader := "zsync: " + ZSYNC_VERSION + "n" +
"Filename: " + fileInfo.Name() + "n" +
"MTime: " + fileInfo.ModTime().Format(time.RFC1123Z) + "n" +
"Blocksize: " + strconv.Itoa(blockSize) + "n" +
"Length: " + strconv.Itoa(int(fileLength)) + "n" +
"Hash-Lengths: " + strconv.Itoa(sequenceMatches) + "," + strconv.Itoa(weakChecksumLength) + "," + strconv.Itoa(strongChecksumLength) + "n" +
"URL: " + opts.Url + "n" +
"SHA-1: " + strFileChecksum + "nn"

return checksum, strHeader, outputFileName

}

func sha1HashFile(path string, fileChecksumChannel chan byte) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()

hasher := sha1.New()
if _, err := io.Copy(hasher, file); err != nil {
log.Fatal(err)
}

fileChecksumChannel <- hasher.Sum(nil)
}

func computeChecksum(f *os.File, blocksize int, fileLength int64, weakLen int, strongLen int, fileDigest hash.Hash, blockDigest hash.Hash) (byte, byte) {

checksumBytes := make(byte, 0)
block := make(byte, blocksize)

fileChecksumChannel := make(chan byte)
go sha1HashFile(f.Name(), fileChecksumChannel)

for {
read, err := f.Read(block)
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}

if read < blocksize {

blockSlice := block[read:blocksize]
for i := range blockSlice {
blockSlice[i] = byte(0)
}

}

rsum := computeRsum(block)

unsignedWeakByte := make(byte, 4)
binary.BigEndian.PutUint32(unsignedWeakByte, uint32(rsum))

tempUnsignedWeakByte := unsignedWeakByte[len(unsignedWeakByte)-weakLen:]
checksumBytes = append(checksumBytes, tempUnsignedWeakByte...)

blockDigest.Reset()
blockDigest.Write(block)
strongBytes := blockDigest.Sum(nil)

tempUnsignedStrongByte := strongBytes[:strongLen]
checksumBytes = append(checksumBytes, tempUnsignedStrongByte...)

}

fileChecksum := <- fileChecksumChannel

checksumBytes = append(checksumBytes, fileChecksum...)

return checksumBytes, fileChecksum

}

// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func strongChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
// estimated number of bytes to allocate for strong checksum
d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/int64(blocksize))))/math.Log(2) + 20

// reduced number of bits by sequence matches
lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))

// second checksum - not reduced by sequence matches
lSecond := float64((math.Log(float64(1+fileLength/int64(blocksize)))/math.Log(2) + 20 + 7.9) / 8)

// return max of two: return no more than 16 bytes (MD4 max)
return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
}

// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func weakChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
// estimated number of bytes to allocate for the rolling checksum per formula in
// Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6

// reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
rdc := d / float64(sequenceMatches) / 8
lrdc := int(math.Ceil(rdc))

// enforce max and min values
if lrdc > 4 {
return 4
} else {
if lrdc < 2 {
return 2
} else {
return lrdc
}
}
}

// [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
func computeRsum(block byte) int {
var a int16
var b int16
l := len(block)
for i := 0; i < len(block); i++ {
val := int(unsign(block[i]))
a += int16(val)
b += int16(l * val)
l--
}
x := int(a) << 16
y := int(b) & 0xffff
return int(x) | int(y)
}

func unsign(b byte) uint8 {
if b < 0 {
return b & 0xFF
} else {
return b
}
}

func calculateMissingValues(opts zsyncOptions.Options, f *os.File) zsyncOptions.Options {
if opts.BlockSize == 0 {
opts.BlockSize = calculateDefaultBlockSizeForInputFile(f)
}
if opts.Filename == "" {
opts.Filename = f.Name()
}
if opts.Url == "" {
opts.Url = f.Name()
}
return opts
}

func calculateDefaultBlockSizeForInputFile(f *os.File) int {
fileInfo, err := f.Stat()
if err != nil {
log.Fatal(err)
}
if fileInfo.Size() < 100*1<<20 {
return BLOCK_SIZE_SMALL
} else {
return BLOCK_SIZE_LARGE
}
}


Also, coming from Java background, I get use to modularize everything, including this Options struct onto other file. Am I suppose to modularize it?



package zsyncOptions

type Options struct {
BlockSize int
Filename string
Url string
}









share|improve this question





























    3














    I'm learning go by doing it. I tried to port the Java's ZsyncMake implementation into Golang.
    I also employ the Go's concurrency API with goroutine and channel.
    I have some experience in Java, but never work with native language. One immediately coming problem is int in Golang isn't the same as int32 (since it depends on the platform; Java's int is 4 byte), thus I need to cast it most of the time.



    Here's my code. In some comments I wrote [ASK] to indicate that I'm not sure if it's a proper way of implementation in Go



    package zsync

    import (
    "bufio"
    "crypto/sha1"
    "encoding/binary"
    "encoding/hex"
    "goZsyncmake/md4"
    "goZsyncmake/zsyncOptions"
    "hash"
    "io"
    "log"
    "math"
    "os"
    "strconv"
    "time"
    )

    var ZSYNC_VERSION = "0.6.2"
    var BLOCK_SIZE_SMALL = 2048
    var BLOCK_SIZE_LARGE = 4096

    func ZsyncMake(path string, options zsyncOptions.Options) {
    checksum, headers, zsyncFilePath := writeToFile(path, options)
    zsyncFile, err := os.Create(zsyncFilePath)
    if err != nil {
    log.Fatal(err)
    }
    defer zsyncFile.Close()

    bfio := bufio.NewWriter(zsyncFile)
    _, err = bfio.WriteString(headers)
    if err != nil {
    log.Fatal(err)
    }

    _, err = bfio.Write(checksum)
    if err != nil {
    log.Fatal(err)
    }

    bfio.Flush()
    }

    func writeToFile(path string, options zsyncOptions.Options) (byte, string, string) {
    file, err := os.Open(path)
    if err != nil {
    log.Fatal(err)
    }
    defer file.Close()

    outputFileName := file.Name() + ".zsync"

    fileInfo, err := file.Stat()
    if err != nil {
    log.Fatal(err)
    }

    opts := calculateMissingValues(options, file)

    blockSize := opts.BlockSize
    fileLength := fileInfo.Size()
    sequenceMatches := 0
    if fileLength > int64(options.BlockSize) {
    sequenceMatches = 2
    } else {
    sequenceMatches = 1
    }
    weakChecksumLength := weakChecksumLength(fileLength, blockSize, sequenceMatches)
    strongChecksumLength := strongChecksumLength(fileLength, blockSize, sequenceMatches)

    fileDigest := sha1.New()
    blockDigest := md4.New()

    checksum, fileChecksum := computeChecksum(file, blockSize, fileLength, weakChecksumLength, strongChecksumLength, fileDigest, blockDigest)
    strFileChecksum := hex.EncodeToString(fileChecksum)

    // [ASK] I suspect I can improve performance here rather than appending string with +
    strHeader := "zsync: " + ZSYNC_VERSION + "n" +
    "Filename: " + fileInfo.Name() + "n" +
    "MTime: " + fileInfo.ModTime().Format(time.RFC1123Z) + "n" +
    "Blocksize: " + strconv.Itoa(blockSize) + "n" +
    "Length: " + strconv.Itoa(int(fileLength)) + "n" +
    "Hash-Lengths: " + strconv.Itoa(sequenceMatches) + "," + strconv.Itoa(weakChecksumLength) + "," + strconv.Itoa(strongChecksumLength) + "n" +
    "URL: " + opts.Url + "n" +
    "SHA-1: " + strFileChecksum + "nn"

    return checksum, strHeader, outputFileName

    }

    func sha1HashFile(path string, fileChecksumChannel chan byte) {
    file, err := os.Open(path)
    if err != nil {
    log.Fatal(err)
    }
    defer file.Close()

    hasher := sha1.New()
    if _, err := io.Copy(hasher, file); err != nil {
    log.Fatal(err)
    }

    fileChecksumChannel <- hasher.Sum(nil)
    }

    func computeChecksum(f *os.File, blocksize int, fileLength int64, weakLen int, strongLen int, fileDigest hash.Hash, blockDigest hash.Hash) (byte, byte) {

    checksumBytes := make(byte, 0)
    block := make(byte, blocksize)

    fileChecksumChannel := make(chan byte)
    go sha1HashFile(f.Name(), fileChecksumChannel)

    for {
    read, err := f.Read(block)
    if err != nil {
    if err == io.EOF {
    break
    }
    log.Fatal(err)
    }

    if read < blocksize {

    blockSlice := block[read:blocksize]
    for i := range blockSlice {
    blockSlice[i] = byte(0)
    }

    }

    rsum := computeRsum(block)

    unsignedWeakByte := make(byte, 4)
    binary.BigEndian.PutUint32(unsignedWeakByte, uint32(rsum))

    tempUnsignedWeakByte := unsignedWeakByte[len(unsignedWeakByte)-weakLen:]
    checksumBytes = append(checksumBytes, tempUnsignedWeakByte...)

    blockDigest.Reset()
    blockDigest.Write(block)
    strongBytes := blockDigest.Sum(nil)

    tempUnsignedStrongByte := strongBytes[:strongLen]
    checksumBytes = append(checksumBytes, tempUnsignedStrongByte...)

    }

    fileChecksum := <- fileChecksumChannel

    checksumBytes = append(checksumBytes, fileChecksum...)

    return checksumBytes, fileChecksum

    }

    // [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
    func strongChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
    // estimated number of bytes to allocate for strong checksum
    d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/int64(blocksize))))/math.Log(2) + 20

    // reduced number of bits by sequence matches
    lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))

    // second checksum - not reduced by sequence matches
    lSecond := float64((math.Log(float64(1+fileLength/int64(blocksize)))/math.Log(2) + 20 + 7.9) / 8)

    // return max of two: return no more than 16 bytes (MD4 max)
    return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
    }

    // [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
    func weakChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
    // estimated number of bytes to allocate for the rolling checksum per formula in
    // Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
    d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6

    // reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
    rdc := d / float64(sequenceMatches) / 8
    lrdc := int(math.Ceil(rdc))

    // enforce max and min values
    if lrdc > 4 {
    return 4
    } else {
    if lrdc < 2 {
    return 2
    } else {
    return lrdc
    }
    }
    }

    // [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
    func computeRsum(block byte) int {
    var a int16
    var b int16
    l := len(block)
    for i := 0; i < len(block); i++ {
    val := int(unsign(block[i]))
    a += int16(val)
    b += int16(l * val)
    l--
    }
    x := int(a) << 16
    y := int(b) & 0xffff
    return int(x) | int(y)
    }

    func unsign(b byte) uint8 {
    if b < 0 {
    return b & 0xFF
    } else {
    return b
    }
    }

    func calculateMissingValues(opts zsyncOptions.Options, f *os.File) zsyncOptions.Options {
    if opts.BlockSize == 0 {
    opts.BlockSize = calculateDefaultBlockSizeForInputFile(f)
    }
    if opts.Filename == "" {
    opts.Filename = f.Name()
    }
    if opts.Url == "" {
    opts.Url = f.Name()
    }
    return opts
    }

    func calculateDefaultBlockSizeForInputFile(f *os.File) int {
    fileInfo, err := f.Stat()
    if err != nil {
    log.Fatal(err)
    }
    if fileInfo.Size() < 100*1<<20 {
    return BLOCK_SIZE_SMALL
    } else {
    return BLOCK_SIZE_LARGE
    }
    }


    Also, coming from Java background, I get use to modularize everything, including this Options struct onto other file. Am I suppose to modularize it?



    package zsyncOptions

    type Options struct {
    BlockSize int
    Filename string
    Url string
    }









    share|improve this question



























      3












      3








      3







      I'm learning go by doing it. I tried to port the Java's ZsyncMake implementation into Golang.
      I also employ the Go's concurrency API with goroutine and channel.
      I have some experience in Java, but never work with native language. One immediately coming problem is int in Golang isn't the same as int32 (since it depends on the platform; Java's int is 4 byte), thus I need to cast it most of the time.



      Here's my code. In some comments I wrote [ASK] to indicate that I'm not sure if it's a proper way of implementation in Go



      package zsync

      import (
      "bufio"
      "crypto/sha1"
      "encoding/binary"
      "encoding/hex"
      "goZsyncmake/md4"
      "goZsyncmake/zsyncOptions"
      "hash"
      "io"
      "log"
      "math"
      "os"
      "strconv"
      "time"
      )

      var ZSYNC_VERSION = "0.6.2"
      var BLOCK_SIZE_SMALL = 2048
      var BLOCK_SIZE_LARGE = 4096

      func ZsyncMake(path string, options zsyncOptions.Options) {
      checksum, headers, zsyncFilePath := writeToFile(path, options)
      zsyncFile, err := os.Create(zsyncFilePath)
      if err != nil {
      log.Fatal(err)
      }
      defer zsyncFile.Close()

      bfio := bufio.NewWriter(zsyncFile)
      _, err = bfio.WriteString(headers)
      if err != nil {
      log.Fatal(err)
      }

      _, err = bfio.Write(checksum)
      if err != nil {
      log.Fatal(err)
      }

      bfio.Flush()
      }

      func writeToFile(path string, options zsyncOptions.Options) (byte, string, string) {
      file, err := os.Open(path)
      if err != nil {
      log.Fatal(err)
      }
      defer file.Close()

      outputFileName := file.Name() + ".zsync"

      fileInfo, err := file.Stat()
      if err != nil {
      log.Fatal(err)
      }

      opts := calculateMissingValues(options, file)

      blockSize := opts.BlockSize
      fileLength := fileInfo.Size()
      sequenceMatches := 0
      if fileLength > int64(options.BlockSize) {
      sequenceMatches = 2
      } else {
      sequenceMatches = 1
      }
      weakChecksumLength := weakChecksumLength(fileLength, blockSize, sequenceMatches)
      strongChecksumLength := strongChecksumLength(fileLength, blockSize, sequenceMatches)

      fileDigest := sha1.New()
      blockDigest := md4.New()

      checksum, fileChecksum := computeChecksum(file, blockSize, fileLength, weakChecksumLength, strongChecksumLength, fileDigest, blockDigest)
      strFileChecksum := hex.EncodeToString(fileChecksum)

      // [ASK] I suspect I can improve performance here rather than appending string with +
      strHeader := "zsync: " + ZSYNC_VERSION + "n" +
      "Filename: " + fileInfo.Name() + "n" +
      "MTime: " + fileInfo.ModTime().Format(time.RFC1123Z) + "n" +
      "Blocksize: " + strconv.Itoa(blockSize) + "n" +
      "Length: " + strconv.Itoa(int(fileLength)) + "n" +
      "Hash-Lengths: " + strconv.Itoa(sequenceMatches) + "," + strconv.Itoa(weakChecksumLength) + "," + strconv.Itoa(strongChecksumLength) + "n" +
      "URL: " + opts.Url + "n" +
      "SHA-1: " + strFileChecksum + "nn"

      return checksum, strHeader, outputFileName

      }

      func sha1HashFile(path string, fileChecksumChannel chan byte) {
      file, err := os.Open(path)
      if err != nil {
      log.Fatal(err)
      }
      defer file.Close()

      hasher := sha1.New()
      if _, err := io.Copy(hasher, file); err != nil {
      log.Fatal(err)
      }

      fileChecksumChannel <- hasher.Sum(nil)
      }

      func computeChecksum(f *os.File, blocksize int, fileLength int64, weakLen int, strongLen int, fileDigest hash.Hash, blockDigest hash.Hash) (byte, byte) {

      checksumBytes := make(byte, 0)
      block := make(byte, blocksize)

      fileChecksumChannel := make(chan byte)
      go sha1HashFile(f.Name(), fileChecksumChannel)

      for {
      read, err := f.Read(block)
      if err != nil {
      if err == io.EOF {
      break
      }
      log.Fatal(err)
      }

      if read < blocksize {

      blockSlice := block[read:blocksize]
      for i := range blockSlice {
      blockSlice[i] = byte(0)
      }

      }

      rsum := computeRsum(block)

      unsignedWeakByte := make(byte, 4)
      binary.BigEndian.PutUint32(unsignedWeakByte, uint32(rsum))

      tempUnsignedWeakByte := unsignedWeakByte[len(unsignedWeakByte)-weakLen:]
      checksumBytes = append(checksumBytes, tempUnsignedWeakByte...)

      blockDigest.Reset()
      blockDigest.Write(block)
      strongBytes := blockDigest.Sum(nil)

      tempUnsignedStrongByte := strongBytes[:strongLen]
      checksumBytes = append(checksumBytes, tempUnsignedStrongByte...)

      }

      fileChecksum := <- fileChecksumChannel

      checksumBytes = append(checksumBytes, fileChecksum...)

      return checksumBytes, fileChecksum

      }

      // [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
      func strongChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
      // estimated number of bytes to allocate for strong checksum
      d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/int64(blocksize))))/math.Log(2) + 20

      // reduced number of bits by sequence matches
      lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))

      // second checksum - not reduced by sequence matches
      lSecond := float64((math.Log(float64(1+fileLength/int64(blocksize)))/math.Log(2) + 20 + 7.9) / 8)

      // return max of two: return no more than 16 bytes (MD4 max)
      return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
      }

      // [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
      func weakChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
      // estimated number of bytes to allocate for the rolling checksum per formula in
      // Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
      d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6

      // reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
      rdc := d / float64(sequenceMatches) / 8
      lrdc := int(math.Ceil(rdc))

      // enforce max and min values
      if lrdc > 4 {
      return 4
      } else {
      if lrdc < 2 {
      return 2
      } else {
      return lrdc
      }
      }
      }

      // [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
      func computeRsum(block byte) int {
      var a int16
      var b int16
      l := len(block)
      for i := 0; i < len(block); i++ {
      val := int(unsign(block[i]))
      a += int16(val)
      b += int16(l * val)
      l--
      }
      x := int(a) << 16
      y := int(b) & 0xffff
      return int(x) | int(y)
      }

      func unsign(b byte) uint8 {
      if b < 0 {
      return b & 0xFF
      } else {
      return b
      }
      }

      func calculateMissingValues(opts zsyncOptions.Options, f *os.File) zsyncOptions.Options {
      if opts.BlockSize == 0 {
      opts.BlockSize = calculateDefaultBlockSizeForInputFile(f)
      }
      if opts.Filename == "" {
      opts.Filename = f.Name()
      }
      if opts.Url == "" {
      opts.Url = f.Name()
      }
      return opts
      }

      func calculateDefaultBlockSizeForInputFile(f *os.File) int {
      fileInfo, err := f.Stat()
      if err != nil {
      log.Fatal(err)
      }
      if fileInfo.Size() < 100*1<<20 {
      return BLOCK_SIZE_SMALL
      } else {
      return BLOCK_SIZE_LARGE
      }
      }


      Also, coming from Java background, I get use to modularize everything, including this Options struct onto other file. Am I suppose to modularize it?



      package zsyncOptions

      type Options struct {
      BlockSize int
      Filename string
      Url string
      }









      share|improve this question















      I'm learning go by doing it. I tried to port the Java's ZsyncMake implementation into Golang.
      I also employ the Go's concurrency API with goroutine and channel.
      I have some experience in Java, but never work with native language. One immediately coming problem is int in Golang isn't the same as int32 (since it depends on the platform; Java's int is 4 byte), thus I need to cast it most of the time.



      Here's my code. In some comments I wrote [ASK] to indicate that I'm not sure if it's a proper way of implementation in Go



      package zsync

      import (
      "bufio"
      "crypto/sha1"
      "encoding/binary"
      "encoding/hex"
      "goZsyncmake/md4"
      "goZsyncmake/zsyncOptions"
      "hash"
      "io"
      "log"
      "math"
      "os"
      "strconv"
      "time"
      )

      var ZSYNC_VERSION = "0.6.2"
      var BLOCK_SIZE_SMALL = 2048
      var BLOCK_SIZE_LARGE = 4096

      func ZsyncMake(path string, options zsyncOptions.Options) {
      checksum, headers, zsyncFilePath := writeToFile(path, options)
      zsyncFile, err := os.Create(zsyncFilePath)
      if err != nil {
      log.Fatal(err)
      }
      defer zsyncFile.Close()

      bfio := bufio.NewWriter(zsyncFile)
      _, err = bfio.WriteString(headers)
      if err != nil {
      log.Fatal(err)
      }

      _, err = bfio.Write(checksum)
      if err != nil {
      log.Fatal(err)
      }

      bfio.Flush()
      }

      func writeToFile(path string, options zsyncOptions.Options) (byte, string, string) {
      file, err := os.Open(path)
      if err != nil {
      log.Fatal(err)
      }
      defer file.Close()

      outputFileName := file.Name() + ".zsync"

      fileInfo, err := file.Stat()
      if err != nil {
      log.Fatal(err)
      }

      opts := calculateMissingValues(options, file)

      blockSize := opts.BlockSize
      fileLength := fileInfo.Size()
      sequenceMatches := 0
      if fileLength > int64(options.BlockSize) {
      sequenceMatches = 2
      } else {
      sequenceMatches = 1
      }
      weakChecksumLength := weakChecksumLength(fileLength, blockSize, sequenceMatches)
      strongChecksumLength := strongChecksumLength(fileLength, blockSize, sequenceMatches)

      fileDigest := sha1.New()
      blockDigest := md4.New()

      checksum, fileChecksum := computeChecksum(file, blockSize, fileLength, weakChecksumLength, strongChecksumLength, fileDigest, blockDigest)
      strFileChecksum := hex.EncodeToString(fileChecksum)

      // [ASK] I suspect I can improve performance here rather than appending string with +
      strHeader := "zsync: " + ZSYNC_VERSION + "n" +
      "Filename: " + fileInfo.Name() + "n" +
      "MTime: " + fileInfo.ModTime().Format(time.RFC1123Z) + "n" +
      "Blocksize: " + strconv.Itoa(blockSize) + "n" +
      "Length: " + strconv.Itoa(int(fileLength)) + "n" +
      "Hash-Lengths: " + strconv.Itoa(sequenceMatches) + "," + strconv.Itoa(weakChecksumLength) + "," + strconv.Itoa(strongChecksumLength) + "n" +
      "URL: " + opts.Url + "n" +
      "SHA-1: " + strFileChecksum + "nn"

      return checksum, strHeader, outputFileName

      }

      func sha1HashFile(path string, fileChecksumChannel chan byte) {
      file, err := os.Open(path)
      if err != nil {
      log.Fatal(err)
      }
      defer file.Close()

      hasher := sha1.New()
      if _, err := io.Copy(hasher, file); err != nil {
      log.Fatal(err)
      }

      fileChecksumChannel <- hasher.Sum(nil)
      }

      func computeChecksum(f *os.File, blocksize int, fileLength int64, weakLen int, strongLen int, fileDigest hash.Hash, blockDigest hash.Hash) (byte, byte) {

      checksumBytes := make(byte, 0)
      block := make(byte, blocksize)

      fileChecksumChannel := make(chan byte)
      go sha1HashFile(f.Name(), fileChecksumChannel)

      for {
      read, err := f.Read(block)
      if err != nil {
      if err == io.EOF {
      break
      }
      log.Fatal(err)
      }

      if read < blocksize {

      blockSlice := block[read:blocksize]
      for i := range blockSlice {
      blockSlice[i] = byte(0)
      }

      }

      rsum := computeRsum(block)

      unsignedWeakByte := make(byte, 4)
      binary.BigEndian.PutUint32(unsignedWeakByte, uint32(rsum))

      tempUnsignedWeakByte := unsignedWeakByte[len(unsignedWeakByte)-weakLen:]
      checksumBytes = append(checksumBytes, tempUnsignedWeakByte...)

      blockDigest.Reset()
      blockDigest.Write(block)
      strongBytes := blockDigest.Sum(nil)

      tempUnsignedStrongByte := strongBytes[:strongLen]
      checksumBytes = append(checksumBytes, tempUnsignedStrongByte...)

      }

      fileChecksum := <- fileChecksumChannel

      checksumBytes = append(checksumBytes, fileChecksum...)

      return checksumBytes, fileChecksum

      }

      // [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
      func strongChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
      // estimated number of bytes to allocate for strong checksum
      d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/int64(blocksize))))/math.Log(2) + 20

      // reduced number of bits by sequence matches
      lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))

      // second checksum - not reduced by sequence matches
      lSecond := float64((math.Log(float64(1+fileLength/int64(blocksize)))/math.Log(2) + 20 + 7.9) / 8)

      // return max of two: return no more than 16 bytes (MD4 max)
      return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
      }

      // [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
      func weakChecksumLength(fileLength int64, blocksize int, sequenceMatches int) int {
      // estimated number of bytes to allocate for the rolling checksum per formula in
      // Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
      d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6

      // reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
      rdc := d / float64(sequenceMatches) / 8
      lrdc := int(math.Ceil(rdc))

      // enforce max and min values
      if lrdc > 4 {
      return 4
      } else {
      if lrdc < 2 {
      return 2
      } else {
      return lrdc
      }
      }
      }

      // [ASK] A lot of type casting happen here, not sure if it's a good practice in Go
      func computeRsum(block byte) int {
      var a int16
      var b int16
      l := len(block)
      for i := 0; i < len(block); i++ {
      val := int(unsign(block[i]))
      a += int16(val)
      b += int16(l * val)
      l--
      }
      x := int(a) << 16
      y := int(b) & 0xffff
      return int(x) | int(y)
      }

      func unsign(b byte) uint8 {
      if b < 0 {
      return b & 0xFF
      } else {
      return b
      }
      }

      func calculateMissingValues(opts zsyncOptions.Options, f *os.File) zsyncOptions.Options {
      if opts.BlockSize == 0 {
      opts.BlockSize = calculateDefaultBlockSizeForInputFile(f)
      }
      if opts.Filename == "" {
      opts.Filename = f.Name()
      }
      if opts.Url == "" {
      opts.Url = f.Name()
      }
      return opts
      }

      func calculateDefaultBlockSizeForInputFile(f *os.File) int {
      fileInfo, err := f.Stat()
      if err != nil {
      log.Fatal(err)
      }
      if fileInfo.Size() < 100*1<<20 {
      return BLOCK_SIZE_SMALL
      } else {
      return BLOCK_SIZE_LARGE
      }
      }


      Also, coming from Java background, I get use to modularize everything, including this Options struct onto other file. Am I suppose to modularize it?



      package zsyncOptions

      type Options struct {
      BlockSize int
      Filename string
      Url string
      }






      file go casting checksum






      share|improve this question















      share|improve this question













      share|improve this question




      share|improve this question








      edited Oct 2 at 13:57









      200_success

      128k15150412




      128k15150412










      asked Oct 2 at 7:04









      imeluntuk

      4114




      4114






















          1 Answer
          1






          active

          oldest

          votes


















          0














          Always return errors



          Don't use log.Fatal() everywhere ! Instead, return the error with some context:



          checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
          if err != nil {
          return fmt.Errorf("fail to compute checksum: %v", err)
          }


          Write to the bufio.Writer directly



          You are right, creating the header by appending strings is not the best way. We could instead write
          the header content directly to the bufio.writer:



          buf := bufio.NewWriter(zsyncFile)
          buf.WriteString("zsync: ")
          buf.WriteString(version)
          buf.WriteByte('n')

          buf.WriteString("Filename: ")


          ...



          Read the file once



          Reading a file is expensive, it should be done only once. It is possible to read the content of a file
          in a slice of bytes with the io/ioutil package



          fileByte, err := ioutil.ReadFile(options.Filename)


          you can then compute it's SHA1 checksum in one line:



          fileChecksum := sha1.Sum(fileByte)


          and then create a *Reader from it:



          reader := bytes.NewReader(fileByte)
          for {
          read, err := reader.Read(block)
          ...
          }


          This also allow us to get the file length as an int like this:



          fileLength := len(fileByte)


          Method parameters



          Some params are unused or redundant, for example path and options.Filename, they should be removed.
          You can group params with same type for more readability:



          func computeChecksum(fileByte byte, blockSize int, weakLen int, strongLen int) (byte, byte, error) {


          can be written like this:



          func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {


          Go also allows named return for more clarity. It's usefull when a method returns two values of the same type like here. Method
          signature could be:



          func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (checksum byte, fileChecksum byte, err error) {


          Keep things simple



          There is no need for channels and goroutines in this code, it just make the code harder to read. Try to keep the code clean and simple. If you face performance issues, profile the code and optimize the hotspot.



          The computeRsum method could be simplified a bit to return an int32 directly:



          func computeRsum(block byte) uint32 {

          a, b, l := 0, 0, len(block)

          for _, v := range block {
          if v < 0 {
          v = v & 0xFF
          }
          a += int(v)
          b += l * int(v)
          l--
          }
          return uint32(a<<16) | uint32(b&0xffff)
          }


          Use const block for constant



          this block



          var ZSYNC_VERSION = "0.6.2"
          var BLOCK_SIZE_SMALL = 2048
          var BLOCK_SIZE_LARGE = 4096


          should be rewritten like this:



          const (
          version = "0.6.2"
          blockSizeSmall = 2048
          blockSizeLarge = 4096
          )


          const are immutable, and that's what we want here. Also note that CamelCase is preferred for naming constant in go



          final version



          package zsync

          import (
          "bufio"
          "bytes"
          "crypto/md5"
          "crypto/sha1"
          "encoding/binary"
          "encoding/hex"
          "fmt"
          "io"
          "io/ioutil"
          "log"
          "math"
          "os"
          "strconv"
          "time"
          )

          type Options struct {
          BlockSize int
          Filename string
          Url string
          }

          const (
          version = "0.6.2"
          blockSizeSmall = 2048
          blockSizeLarge = 4096
          )

          func ZsyncMake(options *Options) {

          err := writeToFile(options)
          if err != nil {
          log.Fatal(err)
          }
          }

          func writeToFile(options *Options) error {
          file, err := os.Open(options.Filename)
          if err != nil {
          return err
          }

          fileInfo, err := file.Stat()
          if err != nil {
          return err
          }

          fileByte, err := ioutil.ReadFile(options.Filename)
          if err != nil {
          return err
          }
          fileLength := len(fileByte)

          if options.Url == "" {
          options.Url = options.Filename
          }

          if options.BlockSize == 0 {
          if fileLength < 100*1<<20 {
          options.BlockSize = blockSizeSmall
          } else {
          options.BlockSize = blockSizeLarge
          }
          }

          sequenceMatches := 1
          if fileLength > options.BlockSize {
          sequenceMatches = 2
          }
          weakChecksumLength := weakChecksumLength(fileLength, options.BlockSize, sequenceMatches)
          strongChecksumLength := strongChecksumLength(fileLength, options.BlockSize, sequenceMatches)

          checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
          if err != nil {
          return fmt.Errorf("fail to compute checksum: %v", err)
          }

          zsyncFile, err := os.Create(file.Name() + ".zsync")
          if err != nil {
          return err
          }
          defer zsyncFile.Close()

          buf := bufio.NewWriter(zsyncFile)
          buf.WriteString("zsync: ")
          buf.WriteString(version)
          buf.WriteByte('n')

          buf.WriteString("Filename: ")
          buf.WriteString(fileInfo.Name())
          buf.WriteByte('n')

          buf.WriteString("MTime: ")
          buf.WriteString(fileInfo.ModTime().Format(time.RFC1123Z))
          buf.WriteByte('n')

          buf.WriteString("Blocksize: ")
          buf.WriteString(strconv.Itoa(options.BlockSize))
          buf.WriteByte('n')

          buf.WriteString("Length: ")
          buf.WriteString(strconv.Itoa(int(fileLength)))
          buf.WriteByte('n')

          buf.WriteString("Hash-Lengths: ")
          buf.WriteString(strconv.Itoa(sequenceMatches))
          buf.WriteByte(',')
          buf.WriteString(strconv.Itoa(weakChecksumLength))
          buf.WriteByte(',')
          buf.WriteString(strconv.Itoa(strongChecksumLength))
          buf.WriteByte('n')

          buf.WriteString("URL: ")
          buf.WriteString(options.Url)
          buf.WriteByte('n')

          buf.WriteString("SHA-1: ")
          buf.WriteString(hex.EncodeToString(fileChecksum))
          buf.WriteByte('n')
          buf.WriteByte('n')

          buf.Write(checksum)

          return buf.Flush()
          }

          func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {

          reader := bytes.NewReader(fileByte)

          checksumBytes := bytes.NewBuffer(nil)
          block := make(byte, blockSize)
          unsignedWeakByte := make(byte, 4)

          for {
          read, err := reader.Read(block)
          if err != nil {
          if err == io.EOF {
          break
          }
          return nil, nil, fmt.Errorf("fail to read block: %v", err)
          }

          if read < blockSize {

          blockSlice := block[read:blockSize]
          for i := range blockSlice {
          blockSlice[i] = byte(0)
          }
          }

          rsum := computeRsum(block)
          binary.BigEndian.PutUint32(unsignedWeakByte, rsum)

          checksumBytes.Write(unsignedWeakByte[len(unsignedWeakByte)-weakLen:])

          strongBytes := md5.Sum(block)
          checksumBytes.Write(strongBytes[:strongLen])
          }

          fileChecksum := sha1.Sum(fileByte)
          checksumBytes.Write(fileChecksum[:])

          return checksumBytes.Bytes(), fileChecksum[:], nil
          }

          func computeRsum(block byte) uint32 {

          a, b, l := 0, 0, len(block)

          for _, v := range block {
          if v < 0 {
          v = v & 0xFF
          }
          a += int(v)
          b += l * int(v)
          l--
          }
          return uint32(a<<16) | uint32(b&0xffff)
          }

          func strongChecksumLength(fileLength, blocksize, sequenceMatches int) int {
          // estimated number of bytes to allocate for strong checksum
          d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/blocksize)))/math.Log(2) + 20

          // reduced number of bits by sequence matches
          lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))

          // second checksum - not reduced by sequence matches
          lSecond := float64((math.Log(float64(1+fileLength/blocksize))/math.Log(2) + 20 + 7.9) / 8)

          // return max of two: return no more than 16 bytes (MD4 max)
          return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
          }

          func weakChecksumLength(fileLength, blocksize, sequenceMatches int) int {
          // estimated number of bytes to allocate for the rolling checksum per formula in
          // Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
          d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6

          // reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
          rdc := d / float64(sequenceMatches) / 8
          lrdc := int(math.Ceil(rdc))

          // enforce max and min values
          if lrdc > 4 {
          return 4
          }
          if lrdc < 2 {
          return 2
          }
          return lrdc
          }


          performance



          New code is slightly faster:



          goos: linux
          goarch: amd64
          BenchmarkWriteOld-4 10000 200830 ns/op 40815 B/op 43 allocs/op
          BenchmarkWriteNew-4 10000 164985 ns/op 29272 B/op 27 allocs/op





          share|improve this answer





















            Your Answer





            StackExchange.ifUsing("editor", function () {
            return StackExchange.using("mathjaxEditing", function () {
            StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
            StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
            });
            });
            }, "mathjax-editing");

            StackExchange.ifUsing("editor", function () {
            StackExchange.using("externalEditor", function () {
            StackExchange.using("snippets", function () {
            StackExchange.snippets.init();
            });
            });
            }, "code-snippets");

            StackExchange.ready(function() {
            var channelOptions = {
            tags: "".split(" "),
            id: "196"
            };
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function() {
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled) {
            StackExchange.using("snippets", function() {
            createEditor();
            });
            }
            else {
            createEditor();
            }
            });

            function createEditor() {
            StackExchange.prepareEditor({
            heartbeatType: 'answer',
            autoActivateHeartbeat: false,
            convertImagesToLinks: false,
            noModals: true,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: null,
            bindNavPrevention: true,
            postfix: "",
            imageUploader: {
            brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
            contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
            allowUrls: true
            },
            onDemand: true,
            discardSelector: ".discard-answer"
            ,immediatelyShowMarkdownHelp:true
            });


            }
            });














            draft saved

            draft discarded


















            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f204752%2fmaking-a-zsync-file-archive-with-checksums%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown

























            1 Answer
            1






            active

            oldest

            votes








            1 Answer
            1






            active

            oldest

            votes









            active

            oldest

            votes






            active

            oldest

            votes









            0














            Always return errors



            Don't use log.Fatal() everywhere ! Instead, return the error with some context:



            checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
            if err != nil {
            return fmt.Errorf("fail to compute checksum: %v", err)
            }


            Write to the bufio.Writer directly



            You are right, creating the header by appending strings is not the best way. We could instead write
            the header content directly to the bufio.writer:



            buf := bufio.NewWriter(zsyncFile)
            buf.WriteString("zsync: ")
            buf.WriteString(version)
            buf.WriteByte('n')

            buf.WriteString("Filename: ")


            ...



            Read the file once



            Reading a file is expensive, it should be done only once. It is possible to read the content of a file
            in a slice of bytes with the io/ioutil package



            fileByte, err := ioutil.ReadFile(options.Filename)


            you can then compute it's SHA1 checksum in one line:



            fileChecksum := sha1.Sum(fileByte)


            and then create a *Reader from it:



            reader := bytes.NewReader(fileByte)
            for {
            read, err := reader.Read(block)
            ...
            }


            This also allow us to get the file length as an int like this:



            fileLength := len(fileByte)


            Method parameters



            Some params are unused or redundant, for example path and options.Filename, they should be removed.
            You can group params with same type for more readability:



            func computeChecksum(fileByte byte, blockSize int, weakLen int, strongLen int) (byte, byte, error) {


            can be written like this:



            func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {


            Go also allows named return for more clarity. It's usefull when a method returns two values of the same type like here. Method
            signature could be:



            func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (checksum byte, fileChecksum byte, err error) {


            Keep things simple



            There is no need for channels and goroutines in this code, it just make the code harder to read. Try to keep the code clean and simple. If you face performance issues, profile the code and optimize the hotspot.



            The computeRsum method could be simplified a bit to return an int32 directly:



            func computeRsum(block byte) uint32 {

            a, b, l := 0, 0, len(block)

            for _, v := range block {
            if v < 0 {
            v = v & 0xFF
            }
            a += int(v)
            b += l * int(v)
            l--
            }
            return uint32(a<<16) | uint32(b&0xffff)
            }


            Use const block for constant



            this block



            var ZSYNC_VERSION = "0.6.2"
            var BLOCK_SIZE_SMALL = 2048
            var BLOCK_SIZE_LARGE = 4096


            should be rewritten like this:



            const (
            version = "0.6.2"
            blockSizeSmall = 2048
            blockSizeLarge = 4096
            )


            const are immutable, and that's what we want here. Also note that CamelCase is preferred for naming constant in go



            final version



            package zsync

            import (
            "bufio"
            "bytes"
            "crypto/md5"
            "crypto/sha1"
            "encoding/binary"
            "encoding/hex"
            "fmt"
            "io"
            "io/ioutil"
            "log"
            "math"
            "os"
            "strconv"
            "time"
            )

            type Options struct {
            BlockSize int
            Filename string
            Url string
            }

            const (
            version = "0.6.2"
            blockSizeSmall = 2048
            blockSizeLarge = 4096
            )

            func ZsyncMake(options *Options) {

            err := writeToFile(options)
            if err != nil {
            log.Fatal(err)
            }
            }

            func writeToFile(options *Options) error {
            file, err := os.Open(options.Filename)
            if err != nil {
            return err
            }

            fileInfo, err := file.Stat()
            if err != nil {
            return err
            }

            fileByte, err := ioutil.ReadFile(options.Filename)
            if err != nil {
            return err
            }
            fileLength := len(fileByte)

            if options.Url == "" {
            options.Url = options.Filename
            }

            if options.BlockSize == 0 {
            if fileLength < 100*1<<20 {
            options.BlockSize = blockSizeSmall
            } else {
            options.BlockSize = blockSizeLarge
            }
            }

            sequenceMatches := 1
            if fileLength > options.BlockSize {
            sequenceMatches = 2
            }
            weakChecksumLength := weakChecksumLength(fileLength, options.BlockSize, sequenceMatches)
            strongChecksumLength := strongChecksumLength(fileLength, options.BlockSize, sequenceMatches)

            checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
            if err != nil {
            return fmt.Errorf("fail to compute checksum: %v", err)
            }

            zsyncFile, err := os.Create(file.Name() + ".zsync")
            if err != nil {
            return err
            }
            defer zsyncFile.Close()

            buf := bufio.NewWriter(zsyncFile)
            buf.WriteString("zsync: ")
            buf.WriteString(version)
            buf.WriteByte('n')

            buf.WriteString("Filename: ")
            buf.WriteString(fileInfo.Name())
            buf.WriteByte('n')

            buf.WriteString("MTime: ")
            buf.WriteString(fileInfo.ModTime().Format(time.RFC1123Z))
            buf.WriteByte('n')

            buf.WriteString("Blocksize: ")
            buf.WriteString(strconv.Itoa(options.BlockSize))
            buf.WriteByte('n')

            buf.WriteString("Length: ")
            buf.WriteString(strconv.Itoa(int(fileLength)))
            buf.WriteByte('n')

            buf.WriteString("Hash-Lengths: ")
            buf.WriteString(strconv.Itoa(sequenceMatches))
            buf.WriteByte(',')
            buf.WriteString(strconv.Itoa(weakChecksumLength))
            buf.WriteByte(',')
            buf.WriteString(strconv.Itoa(strongChecksumLength))
            buf.WriteByte('n')

            buf.WriteString("URL: ")
            buf.WriteString(options.Url)
            buf.WriteByte('n')

            buf.WriteString("SHA-1: ")
            buf.WriteString(hex.EncodeToString(fileChecksum))
            buf.WriteByte('n')
            buf.WriteByte('n')

            buf.Write(checksum)

            return buf.Flush()
            }

            func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {

            reader := bytes.NewReader(fileByte)

            checksumBytes := bytes.NewBuffer(nil)
            block := make(byte, blockSize)
            unsignedWeakByte := make(byte, 4)

            for {
            read, err := reader.Read(block)
            if err != nil {
            if err == io.EOF {
            break
            }
            return nil, nil, fmt.Errorf("fail to read block: %v", err)
            }

            if read < blockSize {

            blockSlice := block[read:blockSize]
            for i := range blockSlice {
            blockSlice[i] = byte(0)
            }
            }

            rsum := computeRsum(block)
            binary.BigEndian.PutUint32(unsignedWeakByte, rsum)

            checksumBytes.Write(unsignedWeakByte[len(unsignedWeakByte)-weakLen:])

            strongBytes := md5.Sum(block)
            checksumBytes.Write(strongBytes[:strongLen])
            }

            fileChecksum := sha1.Sum(fileByte)
            checksumBytes.Write(fileChecksum[:])

            return checksumBytes.Bytes(), fileChecksum[:], nil
            }

            func computeRsum(block byte) uint32 {

            a, b, l := 0, 0, len(block)

            for _, v := range block {
            if v < 0 {
            v = v & 0xFF
            }
            a += int(v)
            b += l * int(v)
            l--
            }
            return uint32(a<<16) | uint32(b&0xffff)
            }

            func strongChecksumLength(fileLength, blocksize, sequenceMatches int) int {
            // estimated number of bytes to allocate for strong checksum
            d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/blocksize)))/math.Log(2) + 20

            // reduced number of bits by sequence matches
            lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))

            // second checksum - not reduced by sequence matches
            lSecond := float64((math.Log(float64(1+fileLength/blocksize))/math.Log(2) + 20 + 7.9) / 8)

            // return max of two: return no more than 16 bytes (MD4 max)
            return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
            }

            func weakChecksumLength(fileLength, blocksize, sequenceMatches int) int {
            // estimated number of bytes to allocate for the rolling checksum per formula in
            // Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
            d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6

            // reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
            rdc := d / float64(sequenceMatches) / 8
            lrdc := int(math.Ceil(rdc))

            // enforce max and min values
            if lrdc > 4 {
            return 4
            }
            if lrdc < 2 {
            return 2
            }
            return lrdc
            }


            performance



            New code is slightly faster:



            goos: linux
            goarch: amd64
            BenchmarkWriteOld-4 10000 200830 ns/op 40815 B/op 43 allocs/op
            BenchmarkWriteNew-4 10000 164985 ns/op 29272 B/op 27 allocs/op





            share|improve this answer


























              0














              Always return errors



              Don't use log.Fatal() everywhere ! Instead, return the error with some context:



              checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
              if err != nil {
              return fmt.Errorf("fail to compute checksum: %v", err)
              }


              Write to the bufio.Writer directly



              You are right, creating the header by appending strings is not the best way. We could instead write
              the header content directly to the bufio.writer:



              buf := bufio.NewWriter(zsyncFile)
              buf.WriteString("zsync: ")
              buf.WriteString(version)
              buf.WriteByte('n')

              buf.WriteString("Filename: ")


              ...



              Read the file once



              Reading a file is expensive, it should be done only once. It is possible to read the content of a file
              in a slice of bytes with the io/ioutil package



              fileByte, err := ioutil.ReadFile(options.Filename)


              you can then compute it's SHA1 checksum in one line:



              fileChecksum := sha1.Sum(fileByte)


              and then create a *Reader from it:



              reader := bytes.NewReader(fileByte)
              for {
              read, err := reader.Read(block)
              ...
              }


              This also allow us to get the file length as an int like this:



              fileLength := len(fileByte)


              Method parameters



              Some params are unused or redundant, for example path and options.Filename, they should be removed.
              You can group params with same type for more readability:



              func computeChecksum(fileByte byte, blockSize int, weakLen int, strongLen int) (byte, byte, error) {


              can be written like this:



              func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {


              Go also allows named return for more clarity. It's usefull when a method returns two values of the same type like here. Method
              signature could be:



              func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (checksum byte, fileChecksum byte, err error) {


              Keep things simple



              There is no need for channels and goroutines in this code, it just make the code harder to read. Try to keep the code clean and simple. If you face performance issues, profile the code and optimize the hotspot.



              The computeRsum method could be simplified a bit to return an int32 directly:



              func computeRsum(block byte) uint32 {

              a, b, l := 0, 0, len(block)

              for _, v := range block {
              if v < 0 {
              v = v & 0xFF
              }
              a += int(v)
              b += l * int(v)
              l--
              }
              return uint32(a<<16) | uint32(b&0xffff)
              }


              Use const block for constant



              this block



              var ZSYNC_VERSION = "0.6.2"
              var BLOCK_SIZE_SMALL = 2048
              var BLOCK_SIZE_LARGE = 4096


              should be rewritten like this:



              const (
              version = "0.6.2"
              blockSizeSmall = 2048
              blockSizeLarge = 4096
              )


              const are immutable, and that's what we want here. Also note that CamelCase is preferred for naming constant in go



              final version



              package zsync

              import (
              "bufio"
              "bytes"
              "crypto/md5"
              "crypto/sha1"
              "encoding/binary"
              "encoding/hex"
              "fmt"
              "io"
              "io/ioutil"
              "log"
              "math"
              "os"
              "strconv"
              "time"
              )

              type Options struct {
              BlockSize int
              Filename string
              Url string
              }

              const (
              version = "0.6.2"
              blockSizeSmall = 2048
              blockSizeLarge = 4096
              )

              func ZsyncMake(options *Options) {

              err := writeToFile(options)
              if err != nil {
              log.Fatal(err)
              }
              }

              func writeToFile(options *Options) error {
              file, err := os.Open(options.Filename)
              if err != nil {
              return err
              }

              fileInfo, err := file.Stat()
              if err != nil {
              return err
              }

              fileByte, err := ioutil.ReadFile(options.Filename)
              if err != nil {
              return err
              }
              fileLength := len(fileByte)

              if options.Url == "" {
              options.Url = options.Filename
              }

              if options.BlockSize == 0 {
              if fileLength < 100*1<<20 {
              options.BlockSize = blockSizeSmall
              } else {
              options.BlockSize = blockSizeLarge
              }
              }

              sequenceMatches := 1
              if fileLength > options.BlockSize {
              sequenceMatches = 2
              }
              weakChecksumLength := weakChecksumLength(fileLength, options.BlockSize, sequenceMatches)
              strongChecksumLength := strongChecksumLength(fileLength, options.BlockSize, sequenceMatches)

              checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
              if err != nil {
              return fmt.Errorf("fail to compute checksum: %v", err)
              }

              zsyncFile, err := os.Create(file.Name() + ".zsync")
              if err != nil {
              return err
              }
              defer zsyncFile.Close()

              buf := bufio.NewWriter(zsyncFile)
              buf.WriteString("zsync: ")
              buf.WriteString(version)
              buf.WriteByte('n')

              buf.WriteString("Filename: ")
              buf.WriteString(fileInfo.Name())
              buf.WriteByte('n')

              buf.WriteString("MTime: ")
              buf.WriteString(fileInfo.ModTime().Format(time.RFC1123Z))
              buf.WriteByte('n')

              buf.WriteString("Blocksize: ")
              buf.WriteString(strconv.Itoa(options.BlockSize))
              buf.WriteByte('n')

              buf.WriteString("Length: ")
              buf.WriteString(strconv.Itoa(int(fileLength)))
              buf.WriteByte('n')

              buf.WriteString("Hash-Lengths: ")
              buf.WriteString(strconv.Itoa(sequenceMatches))
              buf.WriteByte(',')
              buf.WriteString(strconv.Itoa(weakChecksumLength))
              buf.WriteByte(',')
              buf.WriteString(strconv.Itoa(strongChecksumLength))
              buf.WriteByte('n')

              buf.WriteString("URL: ")
              buf.WriteString(options.Url)
              buf.WriteByte('n')

              buf.WriteString("SHA-1: ")
              buf.WriteString(hex.EncodeToString(fileChecksum))
              buf.WriteByte('n')
              buf.WriteByte('n')

              buf.Write(checksum)

              return buf.Flush()
              }

              func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {

              reader := bytes.NewReader(fileByte)

              checksumBytes := bytes.NewBuffer(nil)
              block := make(byte, blockSize)
              unsignedWeakByte := make(byte, 4)

              for {
              read, err := reader.Read(block)
              if err != nil {
              if err == io.EOF {
              break
              }
              return nil, nil, fmt.Errorf("fail to read block: %v", err)
              }

              if read < blockSize {

              blockSlice := block[read:blockSize]
              for i := range blockSlice {
              blockSlice[i] = byte(0)
              }
              }

              rsum := computeRsum(block)
              binary.BigEndian.PutUint32(unsignedWeakByte, rsum)

              checksumBytes.Write(unsignedWeakByte[len(unsignedWeakByte)-weakLen:])

              strongBytes := md5.Sum(block)
              checksumBytes.Write(strongBytes[:strongLen])
              }

              fileChecksum := sha1.Sum(fileByte)
              checksumBytes.Write(fileChecksum[:])

              return checksumBytes.Bytes(), fileChecksum[:], nil
              }

              func computeRsum(block byte) uint32 {

              a, b, l := 0, 0, len(block)

              for _, v := range block {
              if v < 0 {
              v = v & 0xFF
              }
              a += int(v)
              b += l * int(v)
              l--
              }
              return uint32(a<<16) | uint32(b&0xffff)
              }

              func strongChecksumLength(fileLength, blocksize, sequenceMatches int) int {
              // estimated number of bytes to allocate for strong checksum
              d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/blocksize)))/math.Log(2) + 20

              // reduced number of bits by sequence matches
              lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))

              // second checksum - not reduced by sequence matches
              lSecond := float64((math.Log(float64(1+fileLength/blocksize))/math.Log(2) + 20 + 7.9) / 8)

              // return max of two: return no more than 16 bytes (MD4 max)
              return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
              }

              func weakChecksumLength(fileLength, blocksize, sequenceMatches int) int {
              // estimated number of bytes to allocate for the rolling checksum per formula in
              // Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
              d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6

              // reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
              rdc := d / float64(sequenceMatches) / 8
              lrdc := int(math.Ceil(rdc))

              // enforce max and min values
              if lrdc > 4 {
              return 4
              }
              if lrdc < 2 {
              return 2
              }
              return lrdc
              }


              performance



              New code is slightly faster:



              goos: linux
              goarch: amd64
              BenchmarkWriteOld-4 10000 200830 ns/op 40815 B/op 43 allocs/op
              BenchmarkWriteNew-4 10000 164985 ns/op 29272 B/op 27 allocs/op





              share|improve this answer
























                0












                0








                0






                Always return errors



                Don't use log.Fatal() everywhere ! Instead, return the error with some context:



                checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
                if err != nil {
                return fmt.Errorf("fail to compute checksum: %v", err)
                }


                Write to the bufio.Writer directly



                You are right, creating the header by appending strings is not the best way. We could instead write
                the header content directly to the bufio.writer:



                buf := bufio.NewWriter(zsyncFile)
                buf.WriteString("zsync: ")
                buf.WriteString(version)
                buf.WriteByte('n')

                buf.WriteString("Filename: ")


                ...



                Read the file once



                Reading a file is expensive, it should be done only once. It is possible to read the content of a file
                in a slice of bytes with the io/ioutil package



                fileByte, err := ioutil.ReadFile(options.Filename)


                you can then compute it's SHA1 checksum in one line:



                fileChecksum := sha1.Sum(fileByte)


                and then create a *Reader from it:



                reader := bytes.NewReader(fileByte)
                for {
                read, err := reader.Read(block)
                ...
                }


                This also allow us to get the file length as an int like this:



                fileLength := len(fileByte)


                Method parameters



                Some params are unused or redundant, for example path and options.Filename, they should be removed.
                You can group params with same type for more readability:



                func computeChecksum(fileByte byte, blockSize int, weakLen int, strongLen int) (byte, byte, error) {


                can be written like this:



                func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {


                Go also allows named return for more clarity. It's usefull when a method returns two values of the same type like here. Method
                signature could be:



                func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (checksum byte, fileChecksum byte, err error) {


                Keep things simple



                There is no need for channels and goroutines in this code, it just make the code harder to read. Try to keep the code clean and simple. If you face performance issues, profile the code and optimize the hotspot.



                The computeRsum method could be simplified a bit to return an int32 directly:



                func computeRsum(block byte) uint32 {

                a, b, l := 0, 0, len(block)

                for _, v := range block {
                if v < 0 {
                v = v & 0xFF
                }
                a += int(v)
                b += l * int(v)
                l--
                }
                return uint32(a<<16) | uint32(b&0xffff)
                }


                Use const block for constant



                this block



                var ZSYNC_VERSION = "0.6.2"
                var BLOCK_SIZE_SMALL = 2048
                var BLOCK_SIZE_LARGE = 4096


                should be rewritten like this:



                const (
                version = "0.6.2"
                blockSizeSmall = 2048
                blockSizeLarge = 4096
                )


                const are immutable, and that's what we want here. Also note that CamelCase is preferred for naming constant in go



                final version



                package zsync

                import (
                "bufio"
                "bytes"
                "crypto/md5"
                "crypto/sha1"
                "encoding/binary"
                "encoding/hex"
                "fmt"
                "io"
                "io/ioutil"
                "log"
                "math"
                "os"
                "strconv"
                "time"
                )

                type Options struct {
                BlockSize int
                Filename string
                Url string
                }

                const (
                version = "0.6.2"
                blockSizeSmall = 2048
                blockSizeLarge = 4096
                )

                func ZsyncMake(options *Options) {

                err := writeToFile(options)
                if err != nil {
                log.Fatal(err)
                }
                }

                func writeToFile(options *Options) error {
                file, err := os.Open(options.Filename)
                if err != nil {
                return err
                }

                fileInfo, err := file.Stat()
                if err != nil {
                return err
                }

                fileByte, err := ioutil.ReadFile(options.Filename)
                if err != nil {
                return err
                }
                fileLength := len(fileByte)

                if options.Url == "" {
                options.Url = options.Filename
                }

                if options.BlockSize == 0 {
                if fileLength < 100*1<<20 {
                options.BlockSize = blockSizeSmall
                } else {
                options.BlockSize = blockSizeLarge
                }
                }

                sequenceMatches := 1
                if fileLength > options.BlockSize {
                sequenceMatches = 2
                }
                weakChecksumLength := weakChecksumLength(fileLength, options.BlockSize, sequenceMatches)
                strongChecksumLength := strongChecksumLength(fileLength, options.BlockSize, sequenceMatches)

                checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
                if err != nil {
                return fmt.Errorf("fail to compute checksum: %v", err)
                }

                zsyncFile, err := os.Create(file.Name() + ".zsync")
                if err != nil {
                return err
                }
                defer zsyncFile.Close()

                buf := bufio.NewWriter(zsyncFile)
                buf.WriteString("zsync: ")
                buf.WriteString(version)
                buf.WriteByte('n')

                buf.WriteString("Filename: ")
                buf.WriteString(fileInfo.Name())
                buf.WriteByte('n')

                buf.WriteString("MTime: ")
                buf.WriteString(fileInfo.ModTime().Format(time.RFC1123Z))
                buf.WriteByte('n')

                buf.WriteString("Blocksize: ")
                buf.WriteString(strconv.Itoa(options.BlockSize))
                buf.WriteByte('n')

                buf.WriteString("Length: ")
                buf.WriteString(strconv.Itoa(int(fileLength)))
                buf.WriteByte('n')

                buf.WriteString("Hash-Lengths: ")
                buf.WriteString(strconv.Itoa(sequenceMatches))
                buf.WriteByte(',')
                buf.WriteString(strconv.Itoa(weakChecksumLength))
                buf.WriteByte(',')
                buf.WriteString(strconv.Itoa(strongChecksumLength))
                buf.WriteByte('n')

                buf.WriteString("URL: ")
                buf.WriteString(options.Url)
                buf.WriteByte('n')

                buf.WriteString("SHA-1: ")
                buf.WriteString(hex.EncodeToString(fileChecksum))
                buf.WriteByte('n')
                buf.WriteByte('n')

                buf.Write(checksum)

                return buf.Flush()
                }

                func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {

                reader := bytes.NewReader(fileByte)

                checksumBytes := bytes.NewBuffer(nil)
                block := make(byte, blockSize)
                unsignedWeakByte := make(byte, 4)

                for {
                read, err := reader.Read(block)
                if err != nil {
                if err == io.EOF {
                break
                }
                return nil, nil, fmt.Errorf("fail to read block: %v", err)
                }

                if read < blockSize {

                blockSlice := block[read:blockSize]
                for i := range blockSlice {
                blockSlice[i] = byte(0)
                }
                }

                rsum := computeRsum(block)
                binary.BigEndian.PutUint32(unsignedWeakByte, rsum)

                checksumBytes.Write(unsignedWeakByte[len(unsignedWeakByte)-weakLen:])

                strongBytes := md5.Sum(block)
                checksumBytes.Write(strongBytes[:strongLen])
                }

                fileChecksum := sha1.Sum(fileByte)
                checksumBytes.Write(fileChecksum[:])

                return checksumBytes.Bytes(), fileChecksum[:], nil
                }

                func computeRsum(block byte) uint32 {

                a, b, l := 0, 0, len(block)

                for _, v := range block {
                if v < 0 {
                v = v & 0xFF
                }
                a += int(v)
                b += l * int(v)
                l--
                }
                return uint32(a<<16) | uint32(b&0xffff)
                }

                func strongChecksumLength(fileLength, blocksize, sequenceMatches int) int {
                // estimated number of bytes to allocate for strong checksum
                d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/blocksize)))/math.Log(2) + 20

                // reduced number of bits by sequence matches
                lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))

                // second checksum - not reduced by sequence matches
                lSecond := float64((math.Log(float64(1+fileLength/blocksize))/math.Log(2) + 20 + 7.9) / 8)

                // return max of two: return no more than 16 bytes (MD4 max)
                return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
                }

                func weakChecksumLength(fileLength, blocksize, sequenceMatches int) int {
                // estimated number of bytes to allocate for the rolling checksum per formula in
                // Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
                d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6

                // reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
                rdc := d / float64(sequenceMatches) / 8
                lrdc := int(math.Ceil(rdc))

                // enforce max and min values
                if lrdc > 4 {
                return 4
                }
                if lrdc < 2 {
                return 2
                }
                return lrdc
                }


                performance



                New code is slightly faster:



                goos: linux
                goarch: amd64
                BenchmarkWriteOld-4 10000 200830 ns/op 40815 B/op 43 allocs/op
                BenchmarkWriteNew-4 10000 164985 ns/op 29272 B/op 27 allocs/op





                share|improve this answer












                Always return errors



                Don't use log.Fatal() everywhere ! Instead, return the error with some context:



                checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
                if err != nil {
                return fmt.Errorf("fail to compute checksum: %v", err)
                }


                Write to the bufio.Writer directly



                You are right, creating the header by appending strings is not the best way. We could instead write
                the header content directly to the bufio.writer:



                buf := bufio.NewWriter(zsyncFile)
                buf.WriteString("zsync: ")
                buf.WriteString(version)
                buf.WriteByte('n')

                buf.WriteString("Filename: ")


                ...



                Read the file once



                Reading a file is expensive, it should be done only once. It is possible to read the content of a file
                in a slice of bytes with the io/ioutil package



                fileByte, err := ioutil.ReadFile(options.Filename)


                you can then compute it's SHA1 checksum in one line:



                fileChecksum := sha1.Sum(fileByte)


                and then create a *Reader from it:



                reader := bytes.NewReader(fileByte)
                for {
                read, err := reader.Read(block)
                ...
                }


                This also allow us to get the file length as an int like this:



                fileLength := len(fileByte)


                Method parameters



                Some params are unused or redundant, for example path and options.Filename, they should be removed.
                You can group params with same type for more readability:



                func computeChecksum(fileByte byte, blockSize int, weakLen int, strongLen int) (byte, byte, error) {


                can be written like this:



                func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {


                Go also allows named return for more clarity. It's usefull when a method returns two values of the same type like here. Method
                signature could be:



                func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (checksum byte, fileChecksum byte, err error) {


                Keep things simple



                There is no need for channels and goroutines in this code, it just make the code harder to read. Try to keep the code clean and simple. If you face performance issues, profile the code and optimize the hotspot.



                The computeRsum method could be simplified a bit to return an int32 directly:



                func computeRsum(block byte) uint32 {

                a, b, l := 0, 0, len(block)

                for _, v := range block {
                if v < 0 {
                v = v & 0xFF
                }
                a += int(v)
                b += l * int(v)
                l--
                }
                return uint32(a<<16) | uint32(b&0xffff)
                }


                Use const block for constant



                this block



                var ZSYNC_VERSION = "0.6.2"
                var BLOCK_SIZE_SMALL = 2048
                var BLOCK_SIZE_LARGE = 4096


                should be rewritten like this:



                const (
                version = "0.6.2"
                blockSizeSmall = 2048
                blockSizeLarge = 4096
                )


                const are immutable, and that's what we want here. Also note that CamelCase is preferred for naming constant in go



                final version



                package zsync

                import (
                "bufio"
                "bytes"
                "crypto/md5"
                "crypto/sha1"
                "encoding/binary"
                "encoding/hex"
                "fmt"
                "io"
                "io/ioutil"
                "log"
                "math"
                "os"
                "strconv"
                "time"
                )

                type Options struct {
                BlockSize int
                Filename string
                Url string
                }

                const (
                version = "0.6.2"
                blockSizeSmall = 2048
                blockSizeLarge = 4096
                )

                func ZsyncMake(options *Options) {

                err := writeToFile(options)
                if err != nil {
                log.Fatal(err)
                }
                }

                func writeToFile(options *Options) error {
                file, err := os.Open(options.Filename)
                if err != nil {
                return err
                }

                fileInfo, err := file.Stat()
                if err != nil {
                return err
                }

                fileByte, err := ioutil.ReadFile(options.Filename)
                if err != nil {
                return err
                }
                fileLength := len(fileByte)

                if options.Url == "" {
                options.Url = options.Filename
                }

                if options.BlockSize == 0 {
                if fileLength < 100*1<<20 {
                options.BlockSize = blockSizeSmall
                } else {
                options.BlockSize = blockSizeLarge
                }
                }

                sequenceMatches := 1
                if fileLength > options.BlockSize {
                sequenceMatches = 2
                }
                weakChecksumLength := weakChecksumLength(fileLength, options.BlockSize, sequenceMatches)
                strongChecksumLength := strongChecksumLength(fileLength, options.BlockSize, sequenceMatches)

                checksum, fileChecksum, err := computeChecksum(fileByte, options.BlockSize, weakChecksumLength, strongChecksumLength)
                if err != nil {
                return fmt.Errorf("fail to compute checksum: %v", err)
                }

                zsyncFile, err := os.Create(file.Name() + ".zsync")
                if err != nil {
                return err
                }
                defer zsyncFile.Close()

                buf := bufio.NewWriter(zsyncFile)
                buf.WriteString("zsync: ")
                buf.WriteString(version)
                buf.WriteByte('n')

                buf.WriteString("Filename: ")
                buf.WriteString(fileInfo.Name())
                buf.WriteByte('n')

                buf.WriteString("MTime: ")
                buf.WriteString(fileInfo.ModTime().Format(time.RFC1123Z))
                buf.WriteByte('n')

                buf.WriteString("Blocksize: ")
                buf.WriteString(strconv.Itoa(options.BlockSize))
                buf.WriteByte('n')

                buf.WriteString("Length: ")
                buf.WriteString(strconv.Itoa(int(fileLength)))
                buf.WriteByte('n')

                buf.WriteString("Hash-Lengths: ")
                buf.WriteString(strconv.Itoa(sequenceMatches))
                buf.WriteByte(',')
                buf.WriteString(strconv.Itoa(weakChecksumLength))
                buf.WriteByte(',')
                buf.WriteString(strconv.Itoa(strongChecksumLength))
                buf.WriteByte('n')

                buf.WriteString("URL: ")
                buf.WriteString(options.Url)
                buf.WriteByte('n')

                buf.WriteString("SHA-1: ")
                buf.WriteString(hex.EncodeToString(fileChecksum))
                buf.WriteByte('n')
                buf.WriteByte('n')

                buf.Write(checksum)

                return buf.Flush()
                }

                func computeChecksum(fileByte byte, blockSize, weakLen, strongLen int) (byte, byte, error) {

                reader := bytes.NewReader(fileByte)

                checksumBytes := bytes.NewBuffer(nil)
                block := make(byte, blockSize)
                unsignedWeakByte := make(byte, 4)

                for {
                read, err := reader.Read(block)
                if err != nil {
                if err == io.EOF {
                break
                }
                return nil, nil, fmt.Errorf("fail to read block: %v", err)
                }

                if read < blockSize {

                blockSlice := block[read:blockSize]
                for i := range blockSlice {
                blockSlice[i] = byte(0)
                }
                }

                rsum := computeRsum(block)
                binary.BigEndian.PutUint32(unsignedWeakByte, rsum)

                checksumBytes.Write(unsignedWeakByte[len(unsignedWeakByte)-weakLen:])

                strongBytes := md5.Sum(block)
                checksumBytes.Write(strongBytes[:strongLen])
                }

                fileChecksum := sha1.Sum(fileByte)
                checksumBytes.Write(fileChecksum[:])

                return checksumBytes.Bytes(), fileChecksum[:], nil
                }

                func computeRsum(block byte) uint32 {

                a, b, l := 0, 0, len(block)

                for _, v := range block {
                if v < 0 {
                v = v & 0xFF
                }
                a += int(v)
                b += l * int(v)
                l--
                }
                return uint32(a<<16) | uint32(b&0xffff)
                }

                func strongChecksumLength(fileLength, blocksize, sequenceMatches int) int {
                // estimated number of bytes to allocate for strong checksum
                d := (math.Log(float64(fileLength))+math.Log(float64(1+fileLength/blocksize)))/math.Log(2) + 20

                // reduced number of bits by sequence matches
                lFirst := float64(math.Ceil(d / float64(sequenceMatches) / 8))

                // second checksum - not reduced by sequence matches
                lSecond := float64((math.Log(float64(1+fileLength/blocksize))/math.Log(2) + 20 + 7.9) / 8)

                // return max of two: return no more than 16 bytes (MD4 max)
                return int(math.Min(float64(16), math.Max(lFirst, lSecond)))
                }

                func weakChecksumLength(fileLength, blocksize, sequenceMatches int) int {
                // estimated number of bytes to allocate for the rolling checksum per formula in
                // Weak Checksum section of http://zsync.moria.org.uk/paper/ch02s03.html
                d := (math.Log(float64(fileLength))+math.Log(float64(blocksize)))/math.Log(2) - 8.6

                // reduced number of bits by sequence matches per http://zsync.moria.org.uk/paper/ch02s04.html
                rdc := d / float64(sequenceMatches) / 8
                lrdc := int(math.Ceil(rdc))

                // enforce max and min values
                if lrdc > 4 {
                return 4
                }
                if lrdc < 2 {
                return 2
                }
                return lrdc
                }


                performance



                New code is slightly faster:



                goos: linux
                goarch: amd64
                BenchmarkWriteOld-4 10000 200830 ns/op 40815 B/op 43 allocs/op
                BenchmarkWriteNew-4 10000 164985 ns/op 29272 B/op 27 allocs/op






                share|improve this answer












                share|improve this answer



                share|improve this answer










                answered 1 hour ago









                felix

                71839




                71839






























                    draft saved

                    draft discarded




















































                    Thanks for contributing an answer to Code Review Stack Exchange!


                    • Please be sure to answer the question. Provide details and share your research!

                    But avoid



                    • Asking for help, clarification, or responding to other answers.

                    • Making statements based on opinion; back them up with references or personal experience.


                    Use MathJax to format equations. MathJax reference.


                    To learn more, see our tips on writing great answers.





                    Some of your past answers have not been well-received, and you're in danger of being blocked from answering.


                    Please pay close attention to the following guidance:


                    • Please be sure to answer the question. Provide details and share your research!

                    But avoid



                    • Asking for help, clarification, or responding to other answers.

                    • Making statements based on opinion; back them up with references or personal experience.


                    To learn more, see our tips on writing great answers.




                    draft saved


                    draft discarded














                    StackExchange.ready(
                    function () {
                    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f204752%2fmaking-a-zsync-file-archive-with-checksums%23new-answer', 'question_page');
                    }
                    );

                    Post as a guest















                    Required, but never shown





















































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown

































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown







                    Popular posts from this blog

                    Morgemoulin

                    Scott Moir

                    Souastre