Go grep command clone
I am trying to make a simple clone of the command line utility grep. I'm working on it just for fun and to practice Go. The thing is, I am trying to make it in a way that if several files are given at the command line, a different goroutine is generated to speed up the process. Even so, standard grep still gets results faster by an amount of around 60% less processing time.
Here it is the code for it. Can you please provide some opinions about it?
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
var strRex string
var filenames string
var regRex *regexp.Regexp
var wg sync.WaitGroup
var allResults result
var verbose = false
var recursive string
var recursiveFileList string
var fileFilter string
var rexfileFilter *regexp.Regexp
var inverseSearch bool
func init() {
var rexError error
flag.StringVar(&strRex, "r", "", "Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false, "It sets verbose output (Basically showing filename and line number for each match)")
flag.BoolVar(&inverseSearch, "i", false, "It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "", "Recursively find all files starting from the current folder and apply the given search to them")
flag.StringVar(&fileFilter, "FF", "", "Filter to be applied to the filenames when used recursevily")
flag.Parse()
if strRex == "" {
fmt.Fprintf(os.Stderr, "The '-r' (regular expression flag is mandatory)n")
os.Exit(1)
}
regRex, rexError = regexp.Compile(strRex)
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", strRex, rexError.Error())
os.Exit(2)
}
rexfileFilter, rexError = regexp.Compile(fileFilter)
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", rexfileFilter, rexError.Error())
os.Exit(3)
}
if recursive != "" {
var err error
filenames, err = walkDir(recursive)
if err != nil {
fmt.Fprintf(os.Stderr, "%s", err.Error())
}
} else {
filenames = flag.Args()
}
}
func main() {
stat, err := os.Stdin.Stat()
if err != nil {
fmt.Fprintf(os.Stderr, "There is an error reading from stdin : %s", err)
os.Exit(3)
}
if (stat.Mode() & os.ModeNamedPipe) != 0 {
grepStdin(os.Stdin, regRex)
} else {
chResults := make(chan *result, 4)
wg.Add(len(filenames))
for _, fn := range filenames {
go grep(fn, regRex, &wg, chResults)
}
go func(wait *sync.WaitGroup, ch chan<- *result) {
wg.Wait()
close(ch)
}(&wg, chResults)
for res := range chResults {
if verbose {
formatRes(res, 1)
} else {
formatRes(res, 2)
}
}
}
}
func grepStdin(ptr io.Reader, reg *regexp.Regexp) {
bf := bufio.NewScanner(ptr)
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this :
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: lineno,
Error: nil,
}, 3)
}
lineno++
}
}
func grep(file string, reg *regexp.Regexp, wait *sync.WaitGroup, ch chan<- *result) {
fd, err := os.Open(file)
if err != nil {
ch <- &result{
Filename: file,
Error: err,
}
}
bf := bufio.NewScanner(fd)
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this :
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
ch <- &result{
Filename: file,
Line: line,
LineNumber: lineno,
Error: nil,
}
}
lineno++
}
wg.Done()
}
func formatRes(r *result, format int) {
if format == 1 {
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
fmt.Fprintf(os.Stderr, "%s - %s n", r.Filename, r.Error)
}
}
if format == 2 {
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
fmt.Fprintf(os.Stderr, "%s - %s n", r.Filename, r.Error)
}
}
if format == 3 {
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
fmt.Fprintf(os.Stderr, "%sn", r.Error)
}
}
}
func walkDir(path string) (string, error) {
list := make(string, 0, 50)
err := filepath.Walk(".",
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if fileFilter != "" {
if rexfileFilter.Match(byte(filepath.Base(path))) {
list = append(list, path)
}
} else {
list = append(list, path)
}
return nil // Unreachable code
})
if err != nil {
return nil, err
}
return list, nil
}
regex go concurrency search
New contributor
add a comment |
I am trying to make a simple clone of the command line utility grep. I'm working on it just for fun and to practice Go. The thing is, I am trying to make it in a way that if several files are given at the command line, a different goroutine is generated to speed up the process. Even so, standard grep still gets results faster by an amount of around 60% less processing time.
Here it is the code for it. Can you please provide some opinions about it?
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
var strRex string
var filenames string
var regRex *regexp.Regexp
var wg sync.WaitGroup
var allResults result
var verbose = false
var recursive string
var recursiveFileList string
var fileFilter string
var rexfileFilter *regexp.Regexp
var inverseSearch bool
func init() {
var rexError error
flag.StringVar(&strRex, "r", "", "Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false, "It sets verbose output (Basically showing filename and line number for each match)")
flag.BoolVar(&inverseSearch, "i", false, "It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "", "Recursively find all files starting from the current folder and apply the given search to them")
flag.StringVar(&fileFilter, "FF", "", "Filter to be applied to the filenames when used recursevily")
flag.Parse()
if strRex == "" {
fmt.Fprintf(os.Stderr, "The '-r' (regular expression flag is mandatory)n")
os.Exit(1)
}
regRex, rexError = regexp.Compile(strRex)
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", strRex, rexError.Error())
os.Exit(2)
}
rexfileFilter, rexError = regexp.Compile(fileFilter)
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", rexfileFilter, rexError.Error())
os.Exit(3)
}
if recursive != "" {
var err error
filenames, err = walkDir(recursive)
if err != nil {
fmt.Fprintf(os.Stderr, "%s", err.Error())
}
} else {
filenames = flag.Args()
}
}
func main() {
stat, err := os.Stdin.Stat()
if err != nil {
fmt.Fprintf(os.Stderr, "There is an error reading from stdin : %s", err)
os.Exit(3)
}
if (stat.Mode() & os.ModeNamedPipe) != 0 {
grepStdin(os.Stdin, regRex)
} else {
chResults := make(chan *result, 4)
wg.Add(len(filenames))
for _, fn := range filenames {
go grep(fn, regRex, &wg, chResults)
}
go func(wait *sync.WaitGroup, ch chan<- *result) {
wg.Wait()
close(ch)
}(&wg, chResults)
for res := range chResults {
if verbose {
formatRes(res, 1)
} else {
formatRes(res, 2)
}
}
}
}
func grepStdin(ptr io.Reader, reg *regexp.Regexp) {
bf := bufio.NewScanner(ptr)
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this :
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: lineno,
Error: nil,
}, 3)
}
lineno++
}
}
func grep(file string, reg *regexp.Regexp, wait *sync.WaitGroup, ch chan<- *result) {
fd, err := os.Open(file)
if err != nil {
ch <- &result{
Filename: file,
Error: err,
}
}
bf := bufio.NewScanner(fd)
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this :
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
ch <- &result{
Filename: file,
Line: line,
LineNumber: lineno,
Error: nil,
}
}
lineno++
}
wg.Done()
}
func formatRes(r *result, format int) {
if format == 1 {
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
fmt.Fprintf(os.Stderr, "%s - %s n", r.Filename, r.Error)
}
}
if format == 2 {
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
fmt.Fprintf(os.Stderr, "%s - %s n", r.Filename, r.Error)
}
}
if format == 3 {
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
fmt.Fprintf(os.Stderr, "%sn", r.Error)
}
}
}
func walkDir(path string) (string, error) {
list := make(string, 0, 50)
err := filepath.Walk(".",
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if fileFilter != "" {
if rexfileFilter.Match(byte(filepath.Base(path))) {
list = append(list, path)
}
} else {
list = append(list, path)
}
return nil // Unreachable code
})
if err != nil {
return nil, err
}
return list, nil
}
regex go concurrency search
New contributor
3
Code must be correct. To be sure that code is correct, code must be readable. I find your double-spaced code to be unreadable.
– peterSO
7 hours ago
add a comment |
I am trying to make a simple clone of the command line utility grep. I'm working on it just for fun and to practice Go. The thing is, I am trying to make it in a way that if several files are given at the command line, a different goroutine is generated to speed up the process. Even so, standard grep still gets results faster by an amount of around 60% less processing time.
Here it is the code for it. Can you please provide some opinions about it?
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
var strRex string
var filenames string
var regRex *regexp.Regexp
var wg sync.WaitGroup
var allResults result
var verbose = false
var recursive string
var recursiveFileList string
var fileFilter string
var rexfileFilter *regexp.Regexp
var inverseSearch bool
func init() {
var rexError error
flag.StringVar(&strRex, "r", "", "Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false, "It sets verbose output (Basically showing filename and line number for each match)")
flag.BoolVar(&inverseSearch, "i", false, "It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "", "Recursively find all files starting from the current folder and apply the given search to them")
flag.StringVar(&fileFilter, "FF", "", "Filter to be applied to the filenames when used recursevily")
flag.Parse()
if strRex == "" {
fmt.Fprintf(os.Stderr, "The '-r' (regular expression flag is mandatory)n")
os.Exit(1)
}
regRex, rexError = regexp.Compile(strRex)
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", strRex, rexError.Error())
os.Exit(2)
}
rexfileFilter, rexError = regexp.Compile(fileFilter)
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", rexfileFilter, rexError.Error())
os.Exit(3)
}
if recursive != "" {
var err error
filenames, err = walkDir(recursive)
if err != nil {
fmt.Fprintf(os.Stderr, "%s", err.Error())
}
} else {
filenames = flag.Args()
}
}
func main() {
stat, err := os.Stdin.Stat()
if err != nil {
fmt.Fprintf(os.Stderr, "There is an error reading from stdin : %s", err)
os.Exit(3)
}
if (stat.Mode() & os.ModeNamedPipe) != 0 {
grepStdin(os.Stdin, regRex)
} else {
chResults := make(chan *result, 4)
wg.Add(len(filenames))
for _, fn := range filenames {
go grep(fn, regRex, &wg, chResults)
}
go func(wait *sync.WaitGroup, ch chan<- *result) {
wg.Wait()
close(ch)
}(&wg, chResults)
for res := range chResults {
if verbose {
formatRes(res, 1)
} else {
formatRes(res, 2)
}
}
}
}
func grepStdin(ptr io.Reader, reg *regexp.Regexp) {
bf := bufio.NewScanner(ptr)
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this :
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: lineno,
Error: nil,
}, 3)
}
lineno++
}
}
func grep(file string, reg *regexp.Regexp, wait *sync.WaitGroup, ch chan<- *result) {
fd, err := os.Open(file)
if err != nil {
ch <- &result{
Filename: file,
Error: err,
}
}
bf := bufio.NewScanner(fd)
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this :
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
ch <- &result{
Filename: file,
Line: line,
LineNumber: lineno,
Error: nil,
}
}
lineno++
}
wg.Done()
}
func formatRes(r *result, format int) {
if format == 1 {
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
fmt.Fprintf(os.Stderr, "%s - %s n", r.Filename, r.Error)
}
}
if format == 2 {
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
fmt.Fprintf(os.Stderr, "%s - %s n", r.Filename, r.Error)
}
}
if format == 3 {
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
fmt.Fprintf(os.Stderr, "%sn", r.Error)
}
}
}
func walkDir(path string) (string, error) {
list := make(string, 0, 50)
err := filepath.Walk(".",
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if fileFilter != "" {
if rexfileFilter.Match(byte(filepath.Base(path))) {
list = append(list, path)
}
} else {
list = append(list, path)
}
return nil // Unreachable code
})
if err != nil {
return nil, err
}
return list, nil
}
regex go concurrency search
New contributor
I am trying to make a simple clone of the command line utility grep. I'm working on it just for fun and to practice Go. The thing is, I am trying to make it in a way that if several files are given at the command line, a different goroutine is generated to speed up the process. Even so, standard grep still gets results faster by an amount of around 60% less processing time.
Here it is the code for it. Can you please provide some opinions about it?
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
var strRex string
var filenames string
var regRex *regexp.Regexp
var wg sync.WaitGroup
var allResults result
var verbose = false
var recursive string
var recursiveFileList string
var fileFilter string
var rexfileFilter *regexp.Regexp
var inverseSearch bool
func init() {
var rexError error
flag.StringVar(&strRex, "r", "", "Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false, "It sets verbose output (Basically showing filename and line number for each match)")
flag.BoolVar(&inverseSearch, "i", false, "It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "", "Recursively find all files starting from the current folder and apply the given search to them")
flag.StringVar(&fileFilter, "FF", "", "Filter to be applied to the filenames when used recursevily")
flag.Parse()
if strRex == "" {
fmt.Fprintf(os.Stderr, "The '-r' (regular expression flag is mandatory)n")
os.Exit(1)
}
regRex, rexError = regexp.Compile(strRex)
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", strRex, rexError.Error())
os.Exit(2)
}
rexfileFilter, rexError = regexp.Compile(fileFilter)
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", rexfileFilter, rexError.Error())
os.Exit(3)
}
if recursive != "" {
var err error
filenames, err = walkDir(recursive)
if err != nil {
fmt.Fprintf(os.Stderr, "%s", err.Error())
}
} else {
filenames = flag.Args()
}
}
func main() {
stat, err := os.Stdin.Stat()
if err != nil {
fmt.Fprintf(os.Stderr, "There is an error reading from stdin : %s", err)
os.Exit(3)
}
if (stat.Mode() & os.ModeNamedPipe) != 0 {
grepStdin(os.Stdin, regRex)
} else {
chResults := make(chan *result, 4)
wg.Add(len(filenames))
for _, fn := range filenames {
go grep(fn, regRex, &wg, chResults)
}
go func(wait *sync.WaitGroup, ch chan<- *result) {
wg.Wait()
close(ch)
}(&wg, chResults)
for res := range chResults {
if verbose {
formatRes(res, 1)
} else {
formatRes(res, 2)
}
}
}
}
func grepStdin(ptr io.Reader, reg *regexp.Regexp) {
bf := bufio.NewScanner(ptr)
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this :
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: lineno,
Error: nil,
}, 3)
}
lineno++
}
}
func grep(file string, reg *regexp.Regexp, wait *sync.WaitGroup, ch chan<- *result) {
fd, err := os.Open(file)
if err != nil {
ch <- &result{
Filename: file,
Error: err,
}
}
bf := bufio.NewScanner(fd)
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this :
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
ch <- &result{
Filename: file,
Line: line,
LineNumber: lineno,
Error: nil,
}
}
lineno++
}
wg.Done()
}
func formatRes(r *result, format int) {
if format == 1 {
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
fmt.Fprintf(os.Stderr, "%s - %s n", r.Filename, r.Error)
}
}
if format == 2 {
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
fmt.Fprintf(os.Stderr, "%s - %s n", r.Filename, r.Error)
}
}
if format == 3 {
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
fmt.Fprintf(os.Stderr, "%sn", r.Error)
}
}
}
func walkDir(path string) (string, error) {
list := make(string, 0, 50)
err := filepath.Walk(".",
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if fileFilter != "" {
if rexfileFilter.Match(byte(filepath.Base(path))) {
list = append(list, path)
}
} else {
list = append(list, path)
}
return nil // Unreachable code
})
if err != nil {
return nil, err
}
return list, nil
}
regex go concurrency search
regex go concurrency search
New contributor
New contributor
edited 10 mins ago
200_success
128k15150412
128k15150412
New contributor
asked 13 hours ago
Matias Barrios
1184
1184
New contributor
New contributor
3
Code must be correct. To be sure that code is correct, code must be readable. I find your double-spaced code to be unreadable.
– peterSO
7 hours ago
add a comment |
3
Code must be correct. To be sure that code is correct, code must be readable. I find your double-spaced code to be unreadable.
– peterSO
7 hours ago
3
3
Code must be correct. To be sure that code is correct, code must be readable. I find your double-spaced code to be unreadable.
– peterSO
7 hours ago
Code must be correct. To be sure that code is correct, code must be readable. I find your double-spaced code to be unreadable.
– peterSO
7 hours ago
add a comment |
2 Answers
2
active
oldest
votes
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
As in other C-like languages, golang evaluates binary logical operators conditionally left-to-right. As written, the program is often going to evaluate the reg.Match() twice, because it appears twice and in each subexpression it is tested before inverseSearch
. As this is the program's most expensive operation, that's significant for performance.
if line := bf.Text(); (!inverseSearch && reg.Match(byte(line))) || (inverseSearch && !reg.Match(byte(line))) {
should avoid the double evaluation. Or write an xor helper function.
Other things:
Don't test for stdin being usable if you aren't reading from it. It's common for programs executing a program in a subprocess to close filehandles that the program shouldn't need to use. In other words /bin/grep foo filename <&-
works, your ./grep -r foo filename <&-
does not and should.
Rewrite/replace grepStdin() so that it reuses grep(). Nobody likes having the algorithm implemented twice in the same program. You can use /dev/stdin as a filename for stdin on *nix; or, keep the file opening part separate and have a common function for grepping over the opened file handle used by both codepaths.
The format
parameter to formatRes uses magic constant values that one has to read its implementation to understand. It would be better to replace it with verbose bool
so the verbose flag can be passed directly and its meaning is then obvious. The stdin case would be better not to treat specially here also.
New contributor
Thanks so much for your comments. I'll review all of this!
– Matias Barrios
4 hours ago
add a comment |
The thing is, even so, standard grep still gets results faster by an amount of 60 % less processing time
For more information on what makes GNU grep fast, read this by the original author.
Here is some feedback on writing idiomatic Go.
Lines
Your code is oddly formatted.
It is very uncommon to have empty lines everywhere. In your code, empty lines more than double the length of your code. In most cases, I only add extra lines to separate functions, structs, if statements, for loops, etc.
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
Becomes:
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
Line length
Keep lines to a length of 80 characters. It makes code easier to read on smaller monitors or if you split your screen to view multiple things.
flag.StringVar(&strRex, "r", "", "Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false, "It sets verbose output (Basically showing filename and line number for each match)")
flag.BoolVar(&inverseSearch, "i", false, "It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "", "Recursively find all files starting from the current folder and apply the given search to them")
flag.StringVar(&fileFilter, "FF", "", "Filter to be applied to the filenames when used recursevily")
Becomes:
flag.StringVar(&strRex, "r", "",
"Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false,
"It sets verbose output (Basically showing filename and line number "+
"for each match)")
flag.BoolVar(&inverseSearch, "i", false,
"It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "",
"Recursively find all files starting from the current folder and "+
"apply the given search to them")
flag.StringVar(&fileFilter, "FF", "",
"Filter to be applied to the filenames when used recursevily")
Use a C-style for loop
In grepStdin()
you initialize lineno
, increment it and only use it in the for loop. That's a standard for loop. Once inside the for loop, we can rename it to l
because it's purpose is clear from it's usage.
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this:
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: lineno,
Error: nil,
}, 3)
}
lineno++
}
Becomes:
for l := 1; bf.Scan(); l++ {
// There is no XOR in Golang, so you ahve to do this:
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: l,
Error: nil,
}, 3)
}
}
Combine multiple var
declarations
You can combine multiple variables declared with var
as such:
var strRex string
var filenames string
var regRex *regexp.Regexp
var wg sync.WaitGroup
var allResults result
var verbose = false
var recursive string
var recursiveFileList string
var fileFilter string
var rexfileFilter *regexp.Regexp
var inverseSearch bool
Becomes:
var (
strRex string
filenames string
regRex *regexp.Regexp
wg sync.WaitGroup
allResults result
verbose = false
recursive string
recursiveFileList string
fileFilter string
rexfileFilter *regexp.Regexp
inverseSearch bool
)
This is far more readable.
Use log
to write to standard error
You can utilize the log
package to print (possibly fatal) errors to standard error.
fmt.Fprintf(os.Stderr,
"The '-r' (regular expression flag is mandatory)n")
os.Exit(1)
Becomes:
if strRex == "" {
log.Fatalln("The regular expression flag '-r' is mandatory")
}
And
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", strRex, rexError.Error())
os.Exit(2)
}
Becomes:
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %sn", strRex,
rexError)
}
Notice that you don't need to call Error()
to get the string from it.
By doing this, you won't get custom return values. I don't think they're very useful in lieu of a good error message.
Combine if and assignment
var err error
filenames, err = walkDir(recursive)
if err != nil {
fmt.Fprintf(os.Stderr, "%s", err.Error())
}
Becomes:
var err error
if filenames, err = walkDir(recursive); err != nil {
log.Println(err)
}
Move things out of the global scope
Your wg
wait group is in the global scope. It doesn't need to be. You already pass wait
to grep()
, so use it.
allResults
is never used. recursiveFileList
is never used.
We can also move all of your flag variables to a common struct
. This is just a preference, but it tells readers what is a flag and what isn't.
Use a switch statement
In formatRes
you can use a switch statement instead of multiple if statements.
You can also clean up how you print things, like using Println
instead of Printf
.
switch format {
case 1:
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 2:
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 3:
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
log.Println(r.Error)
}
}
Move your condition to a separate function
Your condition
(reg.Match(byte(line)) && !fl.inverseSearch) || (!reg.Match(byte(line)) && fl.inverseSearch)
Is long, and as Colin points out, you can take advantage of short circuiting.
Go does not provide XOR, but it doesn't need to. I'll leave that for you to implement.
We can define a function as such:
func match(reg *regexp.Regexp, line string) bool {
return !fl.inverseSearch && reg.Match(byte(line)) || (fl.inverseSearch && !reg.Match(byte(line)))
}
Use grepStdin()
in grep()
As Colin says, they contain duplicate code. I'll leave that for you to implement.
Conclusion
There are many other places to clean up the code, but I think you'll get the gist. Here is the final code I ended up with:
package main
import (
"bufio"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
type flags struct {
strRex string
recursive string
fileFilter string
verbose bool
inverseSearch bool
}
var (
fl flags
filenames string
regRex *regexp.Regexp
rexfileFilter *regexp.Regexp
)
func init() {
dfl := flags{
strRex: "",
verbose: false,
inverseSearch: false,
recursive: "",
fileFilter: "",
}
var rexError error
flag.StringVar(&fl.strRex, "r", dfl.strRex,
"Regular expresion to match against the input files")
flag.StringVar(&fl.recursive, "R", dfl.recursive,
"Recursively find all files starting from the current folder and "+
"apply the given search to them")
flag.StringVar(&fl.fileFilter, "FF", dfl.fileFilter,
"Filter to be applied to the filenames when used recursevily")
flag.BoolVar(&fl.verbose, "v", dfl.verbose,
"It sets verbose output (Basically showing filename and line number "+
"for each match)")
flag.BoolVar(&fl.inverseSearch, "i", dfl.inverseSearch,
"It does what you might expect.. reverse the search")
flag.Parse()
if fl.strRex == "" {
log.Fatalln("The regular expression flag '-r' is mandatory")
}
regRex, rexError = regexp.Compile(fl.strRex)
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %sn", fl.strRex,
rexError)
}
rexfileFilter, rexError = regexp.Compile(fl.fileFilter)
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %s", rexfileFilter,
rexError)
}
if fl.recursive != "" {
var err error
if filenames, err = walkDir(fl.recursive); err != nil {
log.Println(err)
}
} else {
filenames = flag.Args()
}
}
func main() {
stat, err := os.Stdin.Stat()
if err != nil {
log.Fatalf("There is an error reading from stdin: %s", err)
}
var wait sync.WaitGroup
if (stat.Mode() & os.ModeNamedPipe) != 0 {
grepStdin(os.Stdin, regRex)
} else {
chResults := make(chan *result, 4)
wait.Add(len(filenames))
for _, fn := range filenames {
go grep(fn, regRex, &wait, chResults)
}
go func(wait *sync.WaitGroup, ch chan<- *result) {
wait.Wait()
close(ch)
}(&wait, chResults)
for res := range chResults {
if fl.verbose {
formatRes(res, 1)
} else {
formatRes(res, 2)
}
}
}
}
func match(reg *regexp.Regexp, line string) bool {
return !fl.inverseSearch && reg.Match(byte(line)) || (fl.inverseSearch && !reg.Match(byte(line)))
}
func grepStdin(ptr io.Reader, reg *regexp.Regexp) {
bf := bufio.NewScanner(ptr)
for l := 1; bf.Scan(); l++ {
if line := bf.Text(); match(reg, line) {
formatRes(&result{
Line: line,
LineNumber: l,
Error: nil,
}, 3)
}
}
}
func grep(file string, reg *regexp.Regexp, wait *sync.WaitGroup,
ch chan<- *result) {
fd, err := os.Open(file)
if err != nil {
ch <- &result{
Filename: file,
Error: err,
}
}
bf := bufio.NewScanner(fd)
for l := 1; bf.Scan(); l++ {
if line := bf.Text(); match(reg, line) {
ch <- &result{
Filename: file,
Line: line,
LineNumber: l,
Error: nil,
}
}
}
wait.Done()
}
func formatRes(r *result, format int) {
switch format {
case 1:
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 2:
if r.Error == nil {
fmt.Println(r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 3:
if r.Error == nil {
fmt.Println(r.Line)
} else {
log.Println(r.Error)
}
}
}
func walkDir(path string) (string, error) {
list := make(string, 0, 50)
err := filepath.Walk(".",
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if fl.fileFilter != "" {
if rexfileFilter.Match(byte(filepath.Base(path))) {
list = append(list, path)
}
} else {
list = append(list, path)
}
return nil
})
if err != nil {
return nil, err
}
return list, nil
}
In my opinion, it's far more readable and to-the-point.
Hope this helps!
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Matias Barrios is a new contributor. Be nice, and check out our Code of Conduct.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f210591%2fgo-grep-command-clone%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
2 Answers
2
active
oldest
votes
2 Answers
2
active
oldest
votes
active
oldest
votes
active
oldest
votes
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
As in other C-like languages, golang evaluates binary logical operators conditionally left-to-right. As written, the program is often going to evaluate the reg.Match() twice, because it appears twice and in each subexpression it is tested before inverseSearch
. As this is the program's most expensive operation, that's significant for performance.
if line := bf.Text(); (!inverseSearch && reg.Match(byte(line))) || (inverseSearch && !reg.Match(byte(line))) {
should avoid the double evaluation. Or write an xor helper function.
Other things:
Don't test for stdin being usable if you aren't reading from it. It's common for programs executing a program in a subprocess to close filehandles that the program shouldn't need to use. In other words /bin/grep foo filename <&-
works, your ./grep -r foo filename <&-
does not and should.
Rewrite/replace grepStdin() so that it reuses grep(). Nobody likes having the algorithm implemented twice in the same program. You can use /dev/stdin as a filename for stdin on *nix; or, keep the file opening part separate and have a common function for grepping over the opened file handle used by both codepaths.
The format
parameter to formatRes uses magic constant values that one has to read its implementation to understand. It would be better to replace it with verbose bool
so the verbose flag can be passed directly and its meaning is then obvious. The stdin case would be better not to treat specially here also.
New contributor
Thanks so much for your comments. I'll review all of this!
– Matias Barrios
4 hours ago
add a comment |
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
As in other C-like languages, golang evaluates binary logical operators conditionally left-to-right. As written, the program is often going to evaluate the reg.Match() twice, because it appears twice and in each subexpression it is tested before inverseSearch
. As this is the program's most expensive operation, that's significant for performance.
if line := bf.Text(); (!inverseSearch && reg.Match(byte(line))) || (inverseSearch && !reg.Match(byte(line))) {
should avoid the double evaluation. Or write an xor helper function.
Other things:
Don't test for stdin being usable if you aren't reading from it. It's common for programs executing a program in a subprocess to close filehandles that the program shouldn't need to use. In other words /bin/grep foo filename <&-
works, your ./grep -r foo filename <&-
does not and should.
Rewrite/replace grepStdin() so that it reuses grep(). Nobody likes having the algorithm implemented twice in the same program. You can use /dev/stdin as a filename for stdin on *nix; or, keep the file opening part separate and have a common function for grepping over the opened file handle used by both codepaths.
The format
parameter to formatRes uses magic constant values that one has to read its implementation to understand. It would be better to replace it with verbose bool
so the verbose flag can be passed directly and its meaning is then obvious. The stdin case would be better not to treat specially here also.
New contributor
Thanks so much for your comments. I'll review all of this!
– Matias Barrios
4 hours ago
add a comment |
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
As in other C-like languages, golang evaluates binary logical operators conditionally left-to-right. As written, the program is often going to evaluate the reg.Match() twice, because it appears twice and in each subexpression it is tested before inverseSearch
. As this is the program's most expensive operation, that's significant for performance.
if line := bf.Text(); (!inverseSearch && reg.Match(byte(line))) || (inverseSearch && !reg.Match(byte(line))) {
should avoid the double evaluation. Or write an xor helper function.
Other things:
Don't test for stdin being usable if you aren't reading from it. It's common for programs executing a program in a subprocess to close filehandles that the program shouldn't need to use. In other words /bin/grep foo filename <&-
works, your ./grep -r foo filename <&-
does not and should.
Rewrite/replace grepStdin() so that it reuses grep(). Nobody likes having the algorithm implemented twice in the same program. You can use /dev/stdin as a filename for stdin on *nix; or, keep the file opening part separate and have a common function for grepping over the opened file handle used by both codepaths.
The format
parameter to formatRes uses magic constant values that one has to read its implementation to understand. It would be better to replace it with verbose bool
so the verbose flag can be passed directly and its meaning is then obvious. The stdin case would be better not to treat specially here also.
New contributor
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
As in other C-like languages, golang evaluates binary logical operators conditionally left-to-right. As written, the program is often going to evaluate the reg.Match() twice, because it appears twice and in each subexpression it is tested before inverseSearch
. As this is the program's most expensive operation, that's significant for performance.
if line := bf.Text(); (!inverseSearch && reg.Match(byte(line))) || (inverseSearch && !reg.Match(byte(line))) {
should avoid the double evaluation. Or write an xor helper function.
Other things:
Don't test for stdin being usable if you aren't reading from it. It's common for programs executing a program in a subprocess to close filehandles that the program shouldn't need to use. In other words /bin/grep foo filename <&-
works, your ./grep -r foo filename <&-
does not and should.
Rewrite/replace grepStdin() so that it reuses grep(). Nobody likes having the algorithm implemented twice in the same program. You can use /dev/stdin as a filename for stdin on *nix; or, keep the file opening part separate and have a common function for grepping over the opened file handle used by both codepaths.
The format
parameter to formatRes uses magic constant values that one has to read its implementation to understand. It would be better to replace it with verbose bool
so the verbose flag can be passed directly and its meaning is then obvious. The stdin case would be better not to treat specially here also.
New contributor
New contributor
answered 4 hours ago
Colin Phipps
1563
1563
New contributor
New contributor
Thanks so much for your comments. I'll review all of this!
– Matias Barrios
4 hours ago
add a comment |
Thanks so much for your comments. I'll review all of this!
– Matias Barrios
4 hours ago
Thanks so much for your comments. I'll review all of this!
– Matias Barrios
4 hours ago
Thanks so much for your comments. I'll review all of this!
– Matias Barrios
4 hours ago
add a comment |
The thing is, even so, standard grep still gets results faster by an amount of 60 % less processing time
For more information on what makes GNU grep fast, read this by the original author.
Here is some feedback on writing idiomatic Go.
Lines
Your code is oddly formatted.
It is very uncommon to have empty lines everywhere. In your code, empty lines more than double the length of your code. In most cases, I only add extra lines to separate functions, structs, if statements, for loops, etc.
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
Becomes:
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
Line length
Keep lines to a length of 80 characters. It makes code easier to read on smaller monitors or if you split your screen to view multiple things.
flag.StringVar(&strRex, "r", "", "Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false, "It sets verbose output (Basically showing filename and line number for each match)")
flag.BoolVar(&inverseSearch, "i", false, "It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "", "Recursively find all files starting from the current folder and apply the given search to them")
flag.StringVar(&fileFilter, "FF", "", "Filter to be applied to the filenames when used recursevily")
Becomes:
flag.StringVar(&strRex, "r", "",
"Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false,
"It sets verbose output (Basically showing filename and line number "+
"for each match)")
flag.BoolVar(&inverseSearch, "i", false,
"It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "",
"Recursively find all files starting from the current folder and "+
"apply the given search to them")
flag.StringVar(&fileFilter, "FF", "",
"Filter to be applied to the filenames when used recursevily")
Use a C-style for loop
In grepStdin()
you initialize lineno
, increment it and only use it in the for loop. That's a standard for loop. Once inside the for loop, we can rename it to l
because it's purpose is clear from it's usage.
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this:
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: lineno,
Error: nil,
}, 3)
}
lineno++
}
Becomes:
for l := 1; bf.Scan(); l++ {
// There is no XOR in Golang, so you ahve to do this:
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: l,
Error: nil,
}, 3)
}
}
Combine multiple var
declarations
You can combine multiple variables declared with var
as such:
var strRex string
var filenames string
var regRex *regexp.Regexp
var wg sync.WaitGroup
var allResults result
var verbose = false
var recursive string
var recursiveFileList string
var fileFilter string
var rexfileFilter *regexp.Regexp
var inverseSearch bool
Becomes:
var (
strRex string
filenames string
regRex *regexp.Regexp
wg sync.WaitGroup
allResults result
verbose = false
recursive string
recursiveFileList string
fileFilter string
rexfileFilter *regexp.Regexp
inverseSearch bool
)
This is far more readable.
Use log
to write to standard error
You can utilize the log
package to print (possibly fatal) errors to standard error.
fmt.Fprintf(os.Stderr,
"The '-r' (regular expression flag is mandatory)n")
os.Exit(1)
Becomes:
if strRex == "" {
log.Fatalln("The regular expression flag '-r' is mandatory")
}
And
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", strRex, rexError.Error())
os.Exit(2)
}
Becomes:
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %sn", strRex,
rexError)
}
Notice that you don't need to call Error()
to get the string from it.
By doing this, you won't get custom return values. I don't think they're very useful in lieu of a good error message.
Combine if and assignment
var err error
filenames, err = walkDir(recursive)
if err != nil {
fmt.Fprintf(os.Stderr, "%s", err.Error())
}
Becomes:
var err error
if filenames, err = walkDir(recursive); err != nil {
log.Println(err)
}
Move things out of the global scope
Your wg
wait group is in the global scope. It doesn't need to be. You already pass wait
to grep()
, so use it.
allResults
is never used. recursiveFileList
is never used.
We can also move all of your flag variables to a common struct
. This is just a preference, but it tells readers what is a flag and what isn't.
Use a switch statement
In formatRes
you can use a switch statement instead of multiple if statements.
You can also clean up how you print things, like using Println
instead of Printf
.
switch format {
case 1:
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 2:
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 3:
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
log.Println(r.Error)
}
}
Move your condition to a separate function
Your condition
(reg.Match(byte(line)) && !fl.inverseSearch) || (!reg.Match(byte(line)) && fl.inverseSearch)
Is long, and as Colin points out, you can take advantage of short circuiting.
Go does not provide XOR, but it doesn't need to. I'll leave that for you to implement.
We can define a function as such:
func match(reg *regexp.Regexp, line string) bool {
return !fl.inverseSearch && reg.Match(byte(line)) || (fl.inverseSearch && !reg.Match(byte(line)))
}
Use grepStdin()
in grep()
As Colin says, they contain duplicate code. I'll leave that for you to implement.
Conclusion
There are many other places to clean up the code, but I think you'll get the gist. Here is the final code I ended up with:
package main
import (
"bufio"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
type flags struct {
strRex string
recursive string
fileFilter string
verbose bool
inverseSearch bool
}
var (
fl flags
filenames string
regRex *regexp.Regexp
rexfileFilter *regexp.Regexp
)
func init() {
dfl := flags{
strRex: "",
verbose: false,
inverseSearch: false,
recursive: "",
fileFilter: "",
}
var rexError error
flag.StringVar(&fl.strRex, "r", dfl.strRex,
"Regular expresion to match against the input files")
flag.StringVar(&fl.recursive, "R", dfl.recursive,
"Recursively find all files starting from the current folder and "+
"apply the given search to them")
flag.StringVar(&fl.fileFilter, "FF", dfl.fileFilter,
"Filter to be applied to the filenames when used recursevily")
flag.BoolVar(&fl.verbose, "v", dfl.verbose,
"It sets verbose output (Basically showing filename and line number "+
"for each match)")
flag.BoolVar(&fl.inverseSearch, "i", dfl.inverseSearch,
"It does what you might expect.. reverse the search")
flag.Parse()
if fl.strRex == "" {
log.Fatalln("The regular expression flag '-r' is mandatory")
}
regRex, rexError = regexp.Compile(fl.strRex)
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %sn", fl.strRex,
rexError)
}
rexfileFilter, rexError = regexp.Compile(fl.fileFilter)
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %s", rexfileFilter,
rexError)
}
if fl.recursive != "" {
var err error
if filenames, err = walkDir(fl.recursive); err != nil {
log.Println(err)
}
} else {
filenames = flag.Args()
}
}
func main() {
stat, err := os.Stdin.Stat()
if err != nil {
log.Fatalf("There is an error reading from stdin: %s", err)
}
var wait sync.WaitGroup
if (stat.Mode() & os.ModeNamedPipe) != 0 {
grepStdin(os.Stdin, regRex)
} else {
chResults := make(chan *result, 4)
wait.Add(len(filenames))
for _, fn := range filenames {
go grep(fn, regRex, &wait, chResults)
}
go func(wait *sync.WaitGroup, ch chan<- *result) {
wait.Wait()
close(ch)
}(&wait, chResults)
for res := range chResults {
if fl.verbose {
formatRes(res, 1)
} else {
formatRes(res, 2)
}
}
}
}
func match(reg *regexp.Regexp, line string) bool {
return !fl.inverseSearch && reg.Match(byte(line)) || (fl.inverseSearch && !reg.Match(byte(line)))
}
func grepStdin(ptr io.Reader, reg *regexp.Regexp) {
bf := bufio.NewScanner(ptr)
for l := 1; bf.Scan(); l++ {
if line := bf.Text(); match(reg, line) {
formatRes(&result{
Line: line,
LineNumber: l,
Error: nil,
}, 3)
}
}
}
func grep(file string, reg *regexp.Regexp, wait *sync.WaitGroup,
ch chan<- *result) {
fd, err := os.Open(file)
if err != nil {
ch <- &result{
Filename: file,
Error: err,
}
}
bf := bufio.NewScanner(fd)
for l := 1; bf.Scan(); l++ {
if line := bf.Text(); match(reg, line) {
ch <- &result{
Filename: file,
Line: line,
LineNumber: l,
Error: nil,
}
}
}
wait.Done()
}
func formatRes(r *result, format int) {
switch format {
case 1:
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 2:
if r.Error == nil {
fmt.Println(r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 3:
if r.Error == nil {
fmt.Println(r.Line)
} else {
log.Println(r.Error)
}
}
}
func walkDir(path string) (string, error) {
list := make(string, 0, 50)
err := filepath.Walk(".",
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if fl.fileFilter != "" {
if rexfileFilter.Match(byte(filepath.Base(path))) {
list = append(list, path)
}
} else {
list = append(list, path)
}
return nil
})
if err != nil {
return nil, err
}
return list, nil
}
In my opinion, it's far more readable and to-the-point.
Hope this helps!
add a comment |
The thing is, even so, standard grep still gets results faster by an amount of 60 % less processing time
For more information on what makes GNU grep fast, read this by the original author.
Here is some feedback on writing idiomatic Go.
Lines
Your code is oddly formatted.
It is very uncommon to have empty lines everywhere. In your code, empty lines more than double the length of your code. In most cases, I only add extra lines to separate functions, structs, if statements, for loops, etc.
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
Becomes:
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
Line length
Keep lines to a length of 80 characters. It makes code easier to read on smaller monitors or if you split your screen to view multiple things.
flag.StringVar(&strRex, "r", "", "Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false, "It sets verbose output (Basically showing filename and line number for each match)")
flag.BoolVar(&inverseSearch, "i", false, "It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "", "Recursively find all files starting from the current folder and apply the given search to them")
flag.StringVar(&fileFilter, "FF", "", "Filter to be applied to the filenames when used recursevily")
Becomes:
flag.StringVar(&strRex, "r", "",
"Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false,
"It sets verbose output (Basically showing filename and line number "+
"for each match)")
flag.BoolVar(&inverseSearch, "i", false,
"It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "",
"Recursively find all files starting from the current folder and "+
"apply the given search to them")
flag.StringVar(&fileFilter, "FF", "",
"Filter to be applied to the filenames when used recursevily")
Use a C-style for loop
In grepStdin()
you initialize lineno
, increment it and only use it in the for loop. That's a standard for loop. Once inside the for loop, we can rename it to l
because it's purpose is clear from it's usage.
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this:
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: lineno,
Error: nil,
}, 3)
}
lineno++
}
Becomes:
for l := 1; bf.Scan(); l++ {
// There is no XOR in Golang, so you ahve to do this:
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: l,
Error: nil,
}, 3)
}
}
Combine multiple var
declarations
You can combine multiple variables declared with var
as such:
var strRex string
var filenames string
var regRex *regexp.Regexp
var wg sync.WaitGroup
var allResults result
var verbose = false
var recursive string
var recursiveFileList string
var fileFilter string
var rexfileFilter *regexp.Regexp
var inverseSearch bool
Becomes:
var (
strRex string
filenames string
regRex *regexp.Regexp
wg sync.WaitGroup
allResults result
verbose = false
recursive string
recursiveFileList string
fileFilter string
rexfileFilter *regexp.Regexp
inverseSearch bool
)
This is far more readable.
Use log
to write to standard error
You can utilize the log
package to print (possibly fatal) errors to standard error.
fmt.Fprintf(os.Stderr,
"The '-r' (regular expression flag is mandatory)n")
os.Exit(1)
Becomes:
if strRex == "" {
log.Fatalln("The regular expression flag '-r' is mandatory")
}
And
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", strRex, rexError.Error())
os.Exit(2)
}
Becomes:
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %sn", strRex,
rexError)
}
Notice that you don't need to call Error()
to get the string from it.
By doing this, you won't get custom return values. I don't think they're very useful in lieu of a good error message.
Combine if and assignment
var err error
filenames, err = walkDir(recursive)
if err != nil {
fmt.Fprintf(os.Stderr, "%s", err.Error())
}
Becomes:
var err error
if filenames, err = walkDir(recursive); err != nil {
log.Println(err)
}
Move things out of the global scope
Your wg
wait group is in the global scope. It doesn't need to be. You already pass wait
to grep()
, so use it.
allResults
is never used. recursiveFileList
is never used.
We can also move all of your flag variables to a common struct
. This is just a preference, but it tells readers what is a flag and what isn't.
Use a switch statement
In formatRes
you can use a switch statement instead of multiple if statements.
You can also clean up how you print things, like using Println
instead of Printf
.
switch format {
case 1:
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 2:
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 3:
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
log.Println(r.Error)
}
}
Move your condition to a separate function
Your condition
(reg.Match(byte(line)) && !fl.inverseSearch) || (!reg.Match(byte(line)) && fl.inverseSearch)
Is long, and as Colin points out, you can take advantage of short circuiting.
Go does not provide XOR, but it doesn't need to. I'll leave that for you to implement.
We can define a function as such:
func match(reg *regexp.Regexp, line string) bool {
return !fl.inverseSearch && reg.Match(byte(line)) || (fl.inverseSearch && !reg.Match(byte(line)))
}
Use grepStdin()
in grep()
As Colin says, they contain duplicate code. I'll leave that for you to implement.
Conclusion
There are many other places to clean up the code, but I think you'll get the gist. Here is the final code I ended up with:
package main
import (
"bufio"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
type flags struct {
strRex string
recursive string
fileFilter string
verbose bool
inverseSearch bool
}
var (
fl flags
filenames string
regRex *regexp.Regexp
rexfileFilter *regexp.Regexp
)
func init() {
dfl := flags{
strRex: "",
verbose: false,
inverseSearch: false,
recursive: "",
fileFilter: "",
}
var rexError error
flag.StringVar(&fl.strRex, "r", dfl.strRex,
"Regular expresion to match against the input files")
flag.StringVar(&fl.recursive, "R", dfl.recursive,
"Recursively find all files starting from the current folder and "+
"apply the given search to them")
flag.StringVar(&fl.fileFilter, "FF", dfl.fileFilter,
"Filter to be applied to the filenames when used recursevily")
flag.BoolVar(&fl.verbose, "v", dfl.verbose,
"It sets verbose output (Basically showing filename and line number "+
"for each match)")
flag.BoolVar(&fl.inverseSearch, "i", dfl.inverseSearch,
"It does what you might expect.. reverse the search")
flag.Parse()
if fl.strRex == "" {
log.Fatalln("The regular expression flag '-r' is mandatory")
}
regRex, rexError = regexp.Compile(fl.strRex)
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %sn", fl.strRex,
rexError)
}
rexfileFilter, rexError = regexp.Compile(fl.fileFilter)
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %s", rexfileFilter,
rexError)
}
if fl.recursive != "" {
var err error
if filenames, err = walkDir(fl.recursive); err != nil {
log.Println(err)
}
} else {
filenames = flag.Args()
}
}
func main() {
stat, err := os.Stdin.Stat()
if err != nil {
log.Fatalf("There is an error reading from stdin: %s", err)
}
var wait sync.WaitGroup
if (stat.Mode() & os.ModeNamedPipe) != 0 {
grepStdin(os.Stdin, regRex)
} else {
chResults := make(chan *result, 4)
wait.Add(len(filenames))
for _, fn := range filenames {
go grep(fn, regRex, &wait, chResults)
}
go func(wait *sync.WaitGroup, ch chan<- *result) {
wait.Wait()
close(ch)
}(&wait, chResults)
for res := range chResults {
if fl.verbose {
formatRes(res, 1)
} else {
formatRes(res, 2)
}
}
}
}
func match(reg *regexp.Regexp, line string) bool {
return !fl.inverseSearch && reg.Match(byte(line)) || (fl.inverseSearch && !reg.Match(byte(line)))
}
func grepStdin(ptr io.Reader, reg *regexp.Regexp) {
bf := bufio.NewScanner(ptr)
for l := 1; bf.Scan(); l++ {
if line := bf.Text(); match(reg, line) {
formatRes(&result{
Line: line,
LineNumber: l,
Error: nil,
}, 3)
}
}
}
func grep(file string, reg *regexp.Regexp, wait *sync.WaitGroup,
ch chan<- *result) {
fd, err := os.Open(file)
if err != nil {
ch <- &result{
Filename: file,
Error: err,
}
}
bf := bufio.NewScanner(fd)
for l := 1; bf.Scan(); l++ {
if line := bf.Text(); match(reg, line) {
ch <- &result{
Filename: file,
Line: line,
LineNumber: l,
Error: nil,
}
}
}
wait.Done()
}
func formatRes(r *result, format int) {
switch format {
case 1:
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 2:
if r.Error == nil {
fmt.Println(r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 3:
if r.Error == nil {
fmt.Println(r.Line)
} else {
log.Println(r.Error)
}
}
}
func walkDir(path string) (string, error) {
list := make(string, 0, 50)
err := filepath.Walk(".",
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if fl.fileFilter != "" {
if rexfileFilter.Match(byte(filepath.Base(path))) {
list = append(list, path)
}
} else {
list = append(list, path)
}
return nil
})
if err != nil {
return nil, err
}
return list, nil
}
In my opinion, it's far more readable and to-the-point.
Hope this helps!
add a comment |
The thing is, even so, standard grep still gets results faster by an amount of 60 % less processing time
For more information on what makes GNU grep fast, read this by the original author.
Here is some feedback on writing idiomatic Go.
Lines
Your code is oddly formatted.
It is very uncommon to have empty lines everywhere. In your code, empty lines more than double the length of your code. In most cases, I only add extra lines to separate functions, structs, if statements, for loops, etc.
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
Becomes:
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
Line length
Keep lines to a length of 80 characters. It makes code easier to read on smaller monitors or if you split your screen to view multiple things.
flag.StringVar(&strRex, "r", "", "Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false, "It sets verbose output (Basically showing filename and line number for each match)")
flag.BoolVar(&inverseSearch, "i", false, "It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "", "Recursively find all files starting from the current folder and apply the given search to them")
flag.StringVar(&fileFilter, "FF", "", "Filter to be applied to the filenames when used recursevily")
Becomes:
flag.StringVar(&strRex, "r", "",
"Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false,
"It sets verbose output (Basically showing filename and line number "+
"for each match)")
flag.BoolVar(&inverseSearch, "i", false,
"It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "",
"Recursively find all files starting from the current folder and "+
"apply the given search to them")
flag.StringVar(&fileFilter, "FF", "",
"Filter to be applied to the filenames when used recursevily")
Use a C-style for loop
In grepStdin()
you initialize lineno
, increment it and only use it in the for loop. That's a standard for loop. Once inside the for loop, we can rename it to l
because it's purpose is clear from it's usage.
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this:
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: lineno,
Error: nil,
}, 3)
}
lineno++
}
Becomes:
for l := 1; bf.Scan(); l++ {
// There is no XOR in Golang, so you ahve to do this:
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: l,
Error: nil,
}, 3)
}
}
Combine multiple var
declarations
You can combine multiple variables declared with var
as such:
var strRex string
var filenames string
var regRex *regexp.Regexp
var wg sync.WaitGroup
var allResults result
var verbose = false
var recursive string
var recursiveFileList string
var fileFilter string
var rexfileFilter *regexp.Regexp
var inverseSearch bool
Becomes:
var (
strRex string
filenames string
regRex *regexp.Regexp
wg sync.WaitGroup
allResults result
verbose = false
recursive string
recursiveFileList string
fileFilter string
rexfileFilter *regexp.Regexp
inverseSearch bool
)
This is far more readable.
Use log
to write to standard error
You can utilize the log
package to print (possibly fatal) errors to standard error.
fmt.Fprintf(os.Stderr,
"The '-r' (regular expression flag is mandatory)n")
os.Exit(1)
Becomes:
if strRex == "" {
log.Fatalln("The regular expression flag '-r' is mandatory")
}
And
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", strRex, rexError.Error())
os.Exit(2)
}
Becomes:
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %sn", strRex,
rexError)
}
Notice that you don't need to call Error()
to get the string from it.
By doing this, you won't get custom return values. I don't think they're very useful in lieu of a good error message.
Combine if and assignment
var err error
filenames, err = walkDir(recursive)
if err != nil {
fmt.Fprintf(os.Stderr, "%s", err.Error())
}
Becomes:
var err error
if filenames, err = walkDir(recursive); err != nil {
log.Println(err)
}
Move things out of the global scope
Your wg
wait group is in the global scope. It doesn't need to be. You already pass wait
to grep()
, so use it.
allResults
is never used. recursiveFileList
is never used.
We can also move all of your flag variables to a common struct
. This is just a preference, but it tells readers what is a flag and what isn't.
Use a switch statement
In formatRes
you can use a switch statement instead of multiple if statements.
You can also clean up how you print things, like using Println
instead of Printf
.
switch format {
case 1:
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 2:
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 3:
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
log.Println(r.Error)
}
}
Move your condition to a separate function
Your condition
(reg.Match(byte(line)) && !fl.inverseSearch) || (!reg.Match(byte(line)) && fl.inverseSearch)
Is long, and as Colin points out, you can take advantage of short circuiting.
Go does not provide XOR, but it doesn't need to. I'll leave that for you to implement.
We can define a function as such:
func match(reg *regexp.Regexp, line string) bool {
return !fl.inverseSearch && reg.Match(byte(line)) || (fl.inverseSearch && !reg.Match(byte(line)))
}
Use grepStdin()
in grep()
As Colin says, they contain duplicate code. I'll leave that for you to implement.
Conclusion
There are many other places to clean up the code, but I think you'll get the gist. Here is the final code I ended up with:
package main
import (
"bufio"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
type flags struct {
strRex string
recursive string
fileFilter string
verbose bool
inverseSearch bool
}
var (
fl flags
filenames string
regRex *regexp.Regexp
rexfileFilter *regexp.Regexp
)
func init() {
dfl := flags{
strRex: "",
verbose: false,
inverseSearch: false,
recursive: "",
fileFilter: "",
}
var rexError error
flag.StringVar(&fl.strRex, "r", dfl.strRex,
"Regular expresion to match against the input files")
flag.StringVar(&fl.recursive, "R", dfl.recursive,
"Recursively find all files starting from the current folder and "+
"apply the given search to them")
flag.StringVar(&fl.fileFilter, "FF", dfl.fileFilter,
"Filter to be applied to the filenames when used recursevily")
flag.BoolVar(&fl.verbose, "v", dfl.verbose,
"It sets verbose output (Basically showing filename and line number "+
"for each match)")
flag.BoolVar(&fl.inverseSearch, "i", dfl.inverseSearch,
"It does what you might expect.. reverse the search")
flag.Parse()
if fl.strRex == "" {
log.Fatalln("The regular expression flag '-r' is mandatory")
}
regRex, rexError = regexp.Compile(fl.strRex)
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %sn", fl.strRex,
rexError)
}
rexfileFilter, rexError = regexp.Compile(fl.fileFilter)
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %s", rexfileFilter,
rexError)
}
if fl.recursive != "" {
var err error
if filenames, err = walkDir(fl.recursive); err != nil {
log.Println(err)
}
} else {
filenames = flag.Args()
}
}
func main() {
stat, err := os.Stdin.Stat()
if err != nil {
log.Fatalf("There is an error reading from stdin: %s", err)
}
var wait sync.WaitGroup
if (stat.Mode() & os.ModeNamedPipe) != 0 {
grepStdin(os.Stdin, regRex)
} else {
chResults := make(chan *result, 4)
wait.Add(len(filenames))
for _, fn := range filenames {
go grep(fn, regRex, &wait, chResults)
}
go func(wait *sync.WaitGroup, ch chan<- *result) {
wait.Wait()
close(ch)
}(&wait, chResults)
for res := range chResults {
if fl.verbose {
formatRes(res, 1)
} else {
formatRes(res, 2)
}
}
}
}
func match(reg *regexp.Regexp, line string) bool {
return !fl.inverseSearch && reg.Match(byte(line)) || (fl.inverseSearch && !reg.Match(byte(line)))
}
func grepStdin(ptr io.Reader, reg *regexp.Regexp) {
bf := bufio.NewScanner(ptr)
for l := 1; bf.Scan(); l++ {
if line := bf.Text(); match(reg, line) {
formatRes(&result{
Line: line,
LineNumber: l,
Error: nil,
}, 3)
}
}
}
func grep(file string, reg *regexp.Regexp, wait *sync.WaitGroup,
ch chan<- *result) {
fd, err := os.Open(file)
if err != nil {
ch <- &result{
Filename: file,
Error: err,
}
}
bf := bufio.NewScanner(fd)
for l := 1; bf.Scan(); l++ {
if line := bf.Text(); match(reg, line) {
ch <- &result{
Filename: file,
Line: line,
LineNumber: l,
Error: nil,
}
}
}
wait.Done()
}
func formatRes(r *result, format int) {
switch format {
case 1:
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 2:
if r.Error == nil {
fmt.Println(r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 3:
if r.Error == nil {
fmt.Println(r.Line)
} else {
log.Println(r.Error)
}
}
}
func walkDir(path string) (string, error) {
list := make(string, 0, 50)
err := filepath.Walk(".",
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if fl.fileFilter != "" {
if rexfileFilter.Match(byte(filepath.Base(path))) {
list = append(list, path)
}
} else {
list = append(list, path)
}
return nil
})
if err != nil {
return nil, err
}
return list, nil
}
In my opinion, it's far more readable and to-the-point.
Hope this helps!
The thing is, even so, standard grep still gets results faster by an amount of 60 % less processing time
For more information on what makes GNU grep fast, read this by the original author.
Here is some feedback on writing idiomatic Go.
Lines
Your code is oddly formatted.
It is very uncommon to have empty lines everywhere. In your code, empty lines more than double the length of your code. In most cases, I only add extra lines to separate functions, structs, if statements, for loops, etc.
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
Becomes:
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
Line length
Keep lines to a length of 80 characters. It makes code easier to read on smaller monitors or if you split your screen to view multiple things.
flag.StringVar(&strRex, "r", "", "Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false, "It sets verbose output (Basically showing filename and line number for each match)")
flag.BoolVar(&inverseSearch, "i", false, "It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "", "Recursively find all files starting from the current folder and apply the given search to them")
flag.StringVar(&fileFilter, "FF", "", "Filter to be applied to the filenames when used recursevily")
Becomes:
flag.StringVar(&strRex, "r", "",
"Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false,
"It sets verbose output (Basically showing filename and line number "+
"for each match)")
flag.BoolVar(&inverseSearch, "i", false,
"It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "",
"Recursively find all files starting from the current folder and "+
"apply the given search to them")
flag.StringVar(&fileFilter, "FF", "",
"Filter to be applied to the filenames when used recursevily")
Use a C-style for loop
In grepStdin()
you initialize lineno
, increment it and only use it in the for loop. That's a standard for loop. Once inside the for loop, we can rename it to l
because it's purpose is clear from it's usage.
var lineno = 1
for bf.Scan() {
// There is no XOR in Golang, so you ahve to do this:
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: lineno,
Error: nil,
}, 3)
}
lineno++
}
Becomes:
for l := 1; bf.Scan(); l++ {
// There is no XOR in Golang, so you ahve to do this:
if line := bf.Text(); (reg.Match(byte(line)) && !inverseSearch) || (!reg.Match(byte(line)) && inverseSearch) {
formatRes(&result{
Line: line,
LineNumber: l,
Error: nil,
}, 3)
}
}
Combine multiple var
declarations
You can combine multiple variables declared with var
as such:
var strRex string
var filenames string
var regRex *regexp.Regexp
var wg sync.WaitGroup
var allResults result
var verbose = false
var recursive string
var recursiveFileList string
var fileFilter string
var rexfileFilter *regexp.Regexp
var inverseSearch bool
Becomes:
var (
strRex string
filenames string
regRex *regexp.Regexp
wg sync.WaitGroup
allResults result
verbose = false
recursive string
recursiveFileList string
fileFilter string
rexfileFilter *regexp.Regexp
inverseSearch bool
)
This is far more readable.
Use log
to write to standard error
You can utilize the log
package to print (possibly fatal) errors to standard error.
fmt.Fprintf(os.Stderr,
"The '-r' (regular expression flag is mandatory)n")
os.Exit(1)
Becomes:
if strRex == "" {
log.Fatalln("The regular expression flag '-r' is mandatory")
}
And
if rexError != nil {
fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", strRex, rexError.Error())
os.Exit(2)
}
Becomes:
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %sn", strRex,
rexError)
}
Notice that you don't need to call Error()
to get the string from it.
By doing this, you won't get custom return values. I don't think they're very useful in lieu of a good error message.
Combine if and assignment
var err error
filenames, err = walkDir(recursive)
if err != nil {
fmt.Fprintf(os.Stderr, "%s", err.Error())
}
Becomes:
var err error
if filenames, err = walkDir(recursive); err != nil {
log.Println(err)
}
Move things out of the global scope
Your wg
wait group is in the global scope. It doesn't need to be. You already pass wait
to grep()
, so use it.
allResults
is never used. recursiveFileList
is never used.
We can also move all of your flag variables to a common struct
. This is just a preference, but it tells readers what is a flag and what isn't.
Use a switch statement
In formatRes
you can use a switch statement instead of multiple if statements.
You can also clean up how you print things, like using Println
instead of Printf
.
switch format {
case 1:
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 2:
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 3:
if r.Error == nil {
fmt.Printf("%sn", r.Line)
} else {
log.Println(r.Error)
}
}
Move your condition to a separate function
Your condition
(reg.Match(byte(line)) && !fl.inverseSearch) || (!reg.Match(byte(line)) && fl.inverseSearch)
Is long, and as Colin points out, you can take advantage of short circuiting.
Go does not provide XOR, but it doesn't need to. I'll leave that for you to implement.
We can define a function as such:
func match(reg *regexp.Regexp, line string) bool {
return !fl.inverseSearch && reg.Match(byte(line)) || (fl.inverseSearch && !reg.Match(byte(line)))
}
Use grepStdin()
in grep()
As Colin says, they contain duplicate code. I'll leave that for you to implement.
Conclusion
There are many other places to clean up the code, but I think you'll get the gist. Here is the final code I ended up with:
package main
import (
"bufio"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"regexp"
"sync"
)
type result struct {
Filename string
Line string
LineNumber int
Error error
}
type flags struct {
strRex string
recursive string
fileFilter string
verbose bool
inverseSearch bool
}
var (
fl flags
filenames string
regRex *regexp.Regexp
rexfileFilter *regexp.Regexp
)
func init() {
dfl := flags{
strRex: "",
verbose: false,
inverseSearch: false,
recursive: "",
fileFilter: "",
}
var rexError error
flag.StringVar(&fl.strRex, "r", dfl.strRex,
"Regular expresion to match against the input files")
flag.StringVar(&fl.recursive, "R", dfl.recursive,
"Recursively find all files starting from the current folder and "+
"apply the given search to them")
flag.StringVar(&fl.fileFilter, "FF", dfl.fileFilter,
"Filter to be applied to the filenames when used recursevily")
flag.BoolVar(&fl.verbose, "v", dfl.verbose,
"It sets verbose output (Basically showing filename and line number "+
"for each match)")
flag.BoolVar(&fl.inverseSearch, "i", dfl.inverseSearch,
"It does what you might expect.. reverse the search")
flag.Parse()
if fl.strRex == "" {
log.Fatalln("The regular expression flag '-r' is mandatory")
}
regRex, rexError = regexp.Compile(fl.strRex)
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %sn", fl.strRex,
rexError)
}
rexfileFilter, rexError = regexp.Compile(fl.fileFilter)
if rexError != nil {
log.Fatalf("Your regex '%s' cant compile. Error : %s", rexfileFilter,
rexError)
}
if fl.recursive != "" {
var err error
if filenames, err = walkDir(fl.recursive); err != nil {
log.Println(err)
}
} else {
filenames = flag.Args()
}
}
func main() {
stat, err := os.Stdin.Stat()
if err != nil {
log.Fatalf("There is an error reading from stdin: %s", err)
}
var wait sync.WaitGroup
if (stat.Mode() & os.ModeNamedPipe) != 0 {
grepStdin(os.Stdin, regRex)
} else {
chResults := make(chan *result, 4)
wait.Add(len(filenames))
for _, fn := range filenames {
go grep(fn, regRex, &wait, chResults)
}
go func(wait *sync.WaitGroup, ch chan<- *result) {
wait.Wait()
close(ch)
}(&wait, chResults)
for res := range chResults {
if fl.verbose {
formatRes(res, 1)
} else {
formatRes(res, 2)
}
}
}
}
func match(reg *regexp.Regexp, line string) bool {
return !fl.inverseSearch && reg.Match(byte(line)) || (fl.inverseSearch && !reg.Match(byte(line)))
}
func grepStdin(ptr io.Reader, reg *regexp.Regexp) {
bf := bufio.NewScanner(ptr)
for l := 1; bf.Scan(); l++ {
if line := bf.Text(); match(reg, line) {
formatRes(&result{
Line: line,
LineNumber: l,
Error: nil,
}, 3)
}
}
}
func grep(file string, reg *regexp.Regexp, wait *sync.WaitGroup,
ch chan<- *result) {
fd, err := os.Open(file)
if err != nil {
ch <- &result{
Filename: file,
Error: err,
}
}
bf := bufio.NewScanner(fd)
for l := 1; bf.Scan(); l++ {
if line := bf.Text(); match(reg, line) {
ch <- &result{
Filename: file,
Line: line,
LineNumber: l,
Error: nil,
}
}
}
wait.Done()
}
func formatRes(r *result, format int) {
switch format {
case 1:
if r.Error == nil {
fmt.Printf("%d - %s - %sn", r.LineNumber, r.Filename, r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 2:
if r.Error == nil {
fmt.Println(r.Line)
} else {
log.Printf("%s - %sn", r.Filename, r.Error)
}
break
case 3:
if r.Error == nil {
fmt.Println(r.Line)
} else {
log.Println(r.Error)
}
}
}
func walkDir(path string) (string, error) {
list := make(string, 0, 50)
err := filepath.Walk(".",
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if fl.fileFilter != "" {
if rexfileFilter.Match(byte(filepath.Base(path))) {
list = append(list, path)
}
} else {
list = append(list, path)
}
return nil
})
if err != nil {
return nil, err
}
return list, nil
}
In my opinion, it's far more readable and to-the-point.
Hope this helps!
edited 2 hours ago
answered 2 hours ago
esote
1,8471933
1,8471933
add a comment |
add a comment |
Matias Barrios is a new contributor. Be nice, and check out our Code of Conduct.
Matias Barrios is a new contributor. Be nice, and check out our Code of Conduct.
Matias Barrios is a new contributor. Be nice, and check out our Code of Conduct.
Matias Barrios is a new contributor. Be nice, and check out our Code of Conduct.
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f210591%2fgo-grep-command-clone%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
3
Code must be correct. To be sure that code is correct, code must be readable. I find your double-spaced code to be unreadable.
– peterSO
7 hours ago