diff options
Diffstat (limited to 'src/cmd/index/gen.key')
-rw-r--r-- | src/cmd/index/gen.key | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/src/cmd/index/gen.key b/src/cmd/index/gen.key new file mode 100644 index 00000000..a7922763 --- /dev/null +++ b/src/cmd/index/gen.key @@ -0,0 +1,57 @@ +awk ' # gen.key +# Input: Each input line has one of the following two forms: +# string (tab) numlist +# string " %key " sort.key (tab) numlist +# Output: Each output line has the form: +# sort.key (tab) string (tab) numlist + +BEGIN { FS = OFS = "\t" } + +/ %key / { # use sort.key if it is provided + i = index($1, " %key ") + print substr($1, i+6), substr($1, 1, i-1), $2 + next + } + + { # generate sort.key (in $2, by modifying string) if it is not provided + $3 = $2 + $2 = $1 + + #Modify sort.key + # Remove some troff commands + gsub(/\\f\(..|\\f.|\\s[+-][0-9]|\\s[0-9][0-9]?/, "", $2) + + # underscore -> 0, so "foo_gorp" sorts before "food" + gsub(/_/, "0", $2) + + # quote character is %, space character is ~ + quoted = 0 + if ($2 ~ /%/) { # hide quoted literals in Q + quoted = 1 + gsub(/%%/, "QQ0QQ", $2) + gsub(/%\[/, "QQ1QQ", $2) + gsub(/%\]/, "QQ2QQ", $2) + gsub(/%\{/, "QQ3QQ", $2) + gsub(/%\}/, "QQ4QQ", $2) + gsub(/%~/, "QQ5QQ", $2) + } + gsub(/%e/, "\\", $2) # implement troff escape + gsub(/~/, " ", $2) # remove tildes + gsub(/[%\[\]\{\}]/, "", $2) # remove % and font-changing []{} + if (quoted) { # restore literals but without escape charcter + gsub(/QQ0QQ/, "%", $2) + gsub(/QQ1QQ/, "[", $2) + gsub(/QQ2QQ/, "]", $2) + gsub(/QQ3QQ/, "{", $2) + gsub(/QQ4QQ/, "}", $2) + gsub(/QQ5QQ/, "~", $2) + } + if ($2 ~ /^[^a-zA-Z]+$/) # purely nonalphabetic lines go first + $2 = " " $2 + else if ($2 ~ /^[0-9]/) # lines with eading digits come next + $2 = " " $2 + # otherwise whatever final.sort does +} + + { print $2, $1, $3 } +' $* |