1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
#!/bin/sh
# input:
# key (tab) string (tab) page numbers
# command command 123
# command, data command, [data] 11
# command, display command, [display] 11, 54, 63, 75
# command, model command, [model] 11
# command, quit command, [quit] 5, 16
# output:
# key (tab) string (tab) page numbers
# key command 123
# key [data] 11
# key [display] ...
# key [model] ...
# key [quit] ...
awk '
BEGIN { FS = OFS = "\t" }
{ line[NR] = $0; x[NR] = $2 "\t" $3; y[NR] = $1 }
# find a sequence that have the same prefix
# dump prefix, then each instance with spaces instead of prefix
END {
for (i = 1; i <= NR; i = j+1) {
j = findrun(i) # returns last elem of run
if (j > i)
printrun(i, j)
else
print y[i], x[i]
}
}
function findrun(s, j, p, np) { # find y[s],y[s+1]... with same prefix
p = prefix(y[s])
np = length(p)
for (j = s+1; j <= NR; j++) {
if (y[j] == p) # same, so include
continue
if (index(y[j], p) != 1) # no match
break
c = substr(y[j], np+1, 1)
if (c != " " && c != ",") # has to be whole word prefix
break
}
return j-1
}
function prefix(s, n) { # find 1st word of s: same sort key, minus ,
gsub(/,/, "", s)
n = index(s, " ")
if (n > 0)
return substr(s, 1, n-1)
else
return s
}
function printrun(s, e, i) { # move [...] to end, "see" to front
s1 = 0; e1 = 0; p1 = 0; i1 = 0
for (i = s; i <= e; i++) {
if (x[i] ~ /{see/) { # see, see also
sx[s1] = x[i]
sy[s1] = y[i]
s1++
} else if (x[i] ~ /^\[/) { # prefix word is [...]
px[p1] = x[i]
py[p1] = y[i]
p1++
} else if (x[i] ~ /\[.*\]/) { # [...] somewhere else
ex[e1] = x[i]
ey[e1] = y[i]
e1++
} else { # none of the above
ix[i1] = x[i]
iy[i1] = y[i]
i1++
}
}
if (e-s+1 != s1 + p1 + i1 + e1) print "oh shit" >"/dev/stderr"
for (i = 0; i < s1; i++) # "see", one/line
print sy[i], sx[i]
if (i1 > 1)
printgroup(ix,iy,0,i1) # non [...] items
else if (i1 == 1)
print iy[0], ix[0]
if (e1 > 1)
printgroup(ex,ey,0,e1) # prefix [...] items
else if (e1 == 1)
print ey[0], ex[0]
# for (i = 0; i < p1; i++) # [prefix] ... items
# print py[i], px[i]
if (p1 > 1)
printgroup(px,py,0,p1) # [prefix] ... items
else if (p1 == 1)
print py[0], px[0]
}
function printgroup(x, y, s, e, i, j) {
split(x[s], f23)
if (split(f23[1], temp, " ") > 1) {
pfx = temp[1] " " temp[2] # 2-word prefix
for (i = s+1; i < e; i++) {
if (index(x[i], pfx) != 1)
break
c = substr(x[i], length(pfx)+1, 1)
if (c != " " && c != ",") # has to be whole word prefix
break
}
if (i == e) {
# print "got a run with", pfx
sub(/ /, "@", f23[1])
for (i = s; i < e; i++)
sub(/ /, "@", x[i]) # take @ out later
}
}
n = sub(/,?[ ~]+.*/, "", f23[1]) # zap rest of line
sub(/,$/, "", f23[1])
if (n > 0) { # some change, so not a single word
sub(/@/, " ", f23[1])
print y[s], f23[1] # print main entry
}
for (j = s; j < e; j++) {
split(x[j], f23)
sub(/^[^, ]+[, ]+/, " ", f23[1])
sub(/@/, " ", f23[1])
print y[s], f23[1], f23[2]
}
}
' $*
|