Identify duplicates
Alan A. A. Donovan and Brian W. Kernighan, The Go Programming Language. Addison-Wesley, 2016, Chapter 1, Tutorial
See also Go: getting oriented
Exercise 1.4
dup
“is partly inspired by the Unix uniq
command, which looks for
adjacent duplicate lines.”
“Modify dup2
to print the names of all files in which each duplicated
line occurs.”
My solution
I modified countLines()
to return true
if a duplicate line was found
(if the count for any line in counts
was incremented and now is
a value > 1). Then I modified main()
to use the boolean return value
of countLines()
to print the file name if a duplicate line was found
in it.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
package main
import (
"bufio"
"fmt"
"os"
)
func countLines(f *os.File, counts map[string]int) bool {
foundDup := false
input := bufio.NewScanner(f)
for input.Scan() {
line := input.Text()
curr := counts[line]
counts[line]++
if counts[line] > curr && counts[line] > 1 {
foundDup = true
}
}
return foundDup
}
func main() {
counts := make(map[string]int)
files := os.Args[1:]
if len(files) == 0 {
countLines(os.Stdin, counts)
} else {
for _, arg := range files {
f, err := os.Open(arg)
if err != nil {
fmt.Fprint(os.Stderr, "dup2: %v\n", err)
continue
}
foundDup := countLines(f, counts)
if foundDup == true {
fmt.Print("File contains duplicate line: ")
fmt.Println(f.Name())
}
f.Close()
}
}
for line, n := range counts {
if n > 1 {
fmt.Printf("%d\t%s\n", n, line)
}
}
}
Output
data1.txt:
1
2
3
foo
bar
baz
data2.txt:
1
2
3
4
5
foo
bar
bar
baz
qux
data3.txt:
1
2
3
4
foo
foo
bar
bar
1
2
3
4
5
6
$ go run solution.go data{1,2,3}.txt
File contains duplicate line: data2.txt
File contains duplicate line: data3.txt
2 baz
4 foo
5 bar