diff options
Diffstat (limited to 'src/cmd/index/hierarchy')
-rw-r--r-- | src/cmd/index/hierarchy | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/src/cmd/index/hierarchy b/src/cmd/index/hierarchy new file mode 100644 index 00000000..16cda592 --- /dev/null +++ b/src/cmd/index/hierarchy @@ -0,0 +1,133 @@ +#!/bin/sh + +# input: +# key (tab) string (tab) page numbers +# command command 123 +# command, data command, [data] 11 +# command, display command, [display] 11, 54, 63, 75 +# command, model command, [model] 11 +# command, quit command, [quit] 5, 16 +# output: +# key (tab) string (tab) page numbers +# key command 123 +# key [data] 11 +# key [display] ... +# key [model] ... +# key [quit] ... + +awk ' +BEGIN { FS = OFS = "\t" } + +{ line[NR] = $0; x[NR] = $2 "\t" $3; y[NR] = $1 } + +# find a sequence that have the same prefix +# dump prefix, then each instance with spaces instead of prefix +END { + for (i = 1; i <= NR; i = j+1) { + j = findrun(i) # returns last elem of run + if (j > i) + printrun(i, j) + else + print y[i], x[i] + } +} + +function findrun(s, j, p, np) { # find y[s],y[s+1]... with same prefix + p = prefix(y[s]) + np = length(p) + for (j = s+1; j <= NR; j++) { + if (y[j] == p) # same, so include + continue + if (index(y[j], p) != 1) # no match + break + c = substr(y[j], np+1, 1) + if (c != " " && c != ",") # has to be whole word prefix + break + } + return j-1 +} + +function prefix(s, n) { # find 1st word of s: same sort key, minus , + gsub(/,/, "", s) + n = index(s, " ") + if (n > 0) + return substr(s, 1, n-1) + else + return s +} + +function printrun(s, e, i) { # move [...] to end, "see" to front + s1 = 0; e1 = 0; p1 = 0; i1 = 0 + for (i = s; i <= e; i++) { + if (x[i] ~ /{see/) { # see, see also + sx[s1] = x[i] + sy[s1] = y[i] + s1++ + } else if (x[i] ~ /^\[/) { # prefix word is [...] + px[p1] = x[i] + py[p1] = y[i] + p1++ + } else if (x[i] ~ /\[.*\]/) { # [...] somewhere else + ex[e1] = x[i] + ey[e1] = y[i] + e1++ + } else { # none of the above + ix[i1] = x[i] + iy[i1] = y[i] + i1++ + } + } + if (e-s+1 != s1 + p1 + i1 + e1) print "oh shit" >"/dev/stderr" + + for (i = 0; i < s1; i++) # "see", one/line + print sy[i], sx[i] + if (i1 > 1) + printgroup(ix,iy,0,i1) # non [...] items + else if (i1 == 1) + print iy[0], ix[0] + if (e1 > 1) + printgroup(ex,ey,0,e1) # prefix [...] items + else if (e1 == 1) + print ey[0], ex[0] + # for (i = 0; i < p1; i++) # [prefix] ... items + # print py[i], px[i] + if (p1 > 1) + printgroup(px,py,0,p1) # [prefix] ... items + else if (p1 == 1) + print py[0], px[0] +} + +function printgroup(x, y, s, e, i, j) { + split(x[s], f23) + if (split(f23[1], temp, " ") > 1) { + pfx = temp[1] " " temp[2] # 2-word prefix + for (i = s+1; i < e; i++) { + if (index(x[i], pfx) != 1) + break + c = substr(x[i], length(pfx)+1, 1) + if (c != " " && c != ",") # has to be whole word prefix + break + } + if (i == e) { + # print "got a run with", pfx + sub(/ /, "@", f23[1]) + for (i = s; i < e; i++) + sub(/ /, "@", x[i]) # take @ out later + } + } + n = sub(/,?[ ~]+.*/, "", f23[1]) # zap rest of line + + sub(/,$/, "", f23[1]) + if (n > 0) { # some change, so not a single word + sub(/@/, " ", f23[1]) + print y[s], f23[1] # print main entry + } + for (j = s; j < e; j++) { + split(x[j], f23) + sub(/^[^, ]+[, ]+/, " ", f23[1]) + sub(/@/, " ", f23[1]) + print y[s], f23[1], f23[2] + } +} + +' $* |