Identify duplicates

Alan A. A. Donovan and Brian W. Kernighan, The Go Programming Language. Addison-Wesley, 2016, Chapter 1, Tutorial

Exercise 1.4

dup “is partly inspired by the Unix uniq command, which looks for adjacent duplicate lines.”

“Modify dup2 to print the names of all files in which each duplicated line occurs.”

My solution

I modified countLines() to return true if a duplicate line was found (if the count for any line in counts was incremented and now is a value > 1). Then I modified main() to use the boolean return value of countLines() to print the file name if a duplicate line was found in it.

package main

import (
	"bufio"
	"fmt"
	"os"
)


func countLines(f *os.File, counts map[string]int) bool {
	foundDup := false
	input := bufio.NewScanner(f)
	for input.Scan() {
		line := input.Text()
		curr := counts[line]
		counts[line]++
		if counts[line] > curr && counts[line] > 1 {
			foundDup = true
		}
	}
	return foundDup
}


func main() {
	counts := make(map[string]int)
	files := os.Args[1:]
	if len(files) == 0 {
		countLines(os.Stdin, counts)
	} else {
		for _, arg := range files {
			f, err := os.Open(arg)
			if err != nil {
				fmt.Fprint(os.Stderr, "dup2: %v\n", err)
				continue
			}
			foundDup := countLines(f, counts)
			if foundDup == true {
				fmt.Print("File contains duplicate line: ")
				fmt.Println(f.Name())
			}
			f.Close()
		}
	}
	for line, n := range counts {
		if n > 1 {
			fmt.Printf("%d\t%s\n", n, line)
		}
	}
}

Output

data1.txt:

foo
bar
baz

data2.txt:

foo
bar
bar
baz
qux

data3.txt:

foo
foo
bar
bar

$ go run solution.go data{1,2,3}.txt
File contains duplicate line: data2.txt
File contains duplicate line: data3.txt
2	baz
4	foo
5	bar