aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/index/hierarchy
blob: 16cda592a81335f002739aaae607437fbc33f07f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/bin/sh

# input:
#	key (tab) string (tab) page numbers
#		command	command	123
#		command, data	command, [data]	11
#		command, display	command, [display]	11, 54, 63, 75
#		command, model	command, [model]	11
#		command, quit	command, [quit]	5, 16
# output:
#	key (tab) string (tab) page numbers
#		key	command  123
#		key	   [data]  11
#		key	   [display] ...
#		key	   [model] ...
#		key	   [quit] ...

awk '
BEGIN	{ FS = OFS = "\t" }

{	line[NR] = $0; x[NR] = $2 "\t" $3; y[NR] = $1 }

# find a sequence that have the same prefix
# dump prefix, then each instance with spaces instead of prefix
END {
	for (i = 1; i <= NR; i = j+1) {
		j = findrun(i)		# returns last elem of run
		if (j > i)
			printrun(i, j)
		else
			print y[i], x[i]
	}
}

function findrun(s,	j, p, np) {	# find y[s],y[s+1]... with same prefix
	p = prefix(y[s])
	np = length(p)
	for (j = s+1; j <= NR; j++) {
		if (y[j] == p)			# same, so include
			continue
		if (index(y[j], p) != 1)	# no match
			break
		c = substr(y[j], np+1, 1)
		if (c != " " && c != ",")	# has to be whole word prefix
			break
	}
	return j-1
}

function prefix(s,	n) {	# find 1st word of s: same sort key, minus ,
	gsub(/,/, "", s)
	n = index(s, " ")
	if (n > 0)
		return substr(s, 1, n-1)
	else
		return s
}

function printrun(s, e,		i) {	# move [...] to end, "see" to front
	s1 = 0; e1 = 0; p1 = 0; i1 = 0
	for (i = s; i <= e; i++) {
		if (x[i] ~ /{see/) {		# see, see also
			sx[s1] = x[i]
			sy[s1] = y[i]
			s1++
		} else if (x[i] ~ /^\[/) {	# prefix word is [...]
			px[p1] = x[i]
			py[p1] = y[i]
			p1++
		} else if (x[i] ~ /\[.*\]/) {	# [...] somewhere else
			ex[e1] = x[i]
			ey[e1] = y[i]
			e1++
		} else {			# none of the above
			ix[i1] = x[i]
			iy[i1] = y[i]
			i1++
		}
	}
	if (e-s+1 != s1 + p1 + i1 + e1) print "oh shit" >"/dev/stderr"

	for (i = 0; i < s1; i++)	# "see", one/line
		print sy[i], sx[i]
	if (i1 > 1)
		printgroup(ix,iy,0,i1)	# non [...] items
	else if (i1 == 1)
		print iy[0], ix[0]
	if (e1 > 1)
		printgroup(ex,ey,0,e1)	# prefix [...] items
	else if (e1 == 1)
		print ey[0], ex[0]
	# for (i = 0; i < p1; i++)	# [prefix] ... items
	# 	print py[i], px[i]
	if (p1 > 1)
		printgroup(px,py,0,p1)	# [prefix] ... items
	else if (p1 == 1)
		print py[0], px[0]
}

function printgroup(x, y, s, e,		i, j) {
	split(x[s], f23)
	if (split(f23[1], temp, " ") > 1) {
		pfx = temp[1] " " temp[2]	# 2-word prefix
		for (i = s+1; i < e; i++) {
			if (index(x[i], pfx) != 1)
				break
			c = substr(x[i], length(pfx)+1, 1)
			if (c != " " && c != ",")	# has to be whole word prefix
				break
		}
		if (i == e) {
			# print "got a run with", pfx
			sub(/ /, "@", f23[1])
			for (i = s; i < e; i++)
				sub(/ /, "@", x[i])	# take @ out later
		}
	}
	n = sub(/,?[ ~]+.*/, "", f23[1]) # zap rest of line

	sub(/,$/, "", f23[1])
	if (n > 0) {	# some change, so not a single word
		sub(/@/, " ", f23[1])
		print y[s], f23[1]	# print main entry
	}
	for (j = s; j < e; j++) {
		split(x[j], f23)
		sub(/^[^, ]+[, ]+/, "   ", f23[1])
		sub(/@/, " ", f23[1])
		print y[s], f23[1], f23[2]
	}
}

' $*