aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/index/gen.key
blob: a79227638e80f976ac2ac8c7145bb9bdd3e0292e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
awk ' # gen.key
#   Input: Each input line has one of the following two forms:
#	string                   (tab) numlist
#	string " %key " sort.key (tab) numlist
#   Output: Each output line has the form:
#	sort.key (tab) string (tab) numlist

BEGIN {	FS = OFS = "\t" }

/ %key / { # use sort.key if it is provided
	   i = index($1, " %key ")
	   print substr($1, i+6), substr($1, 1, i-1), $2
	   next
	 }

	{ # generate sort.key (in $2, by modifying string) if it is not provided
	$3 = $2
	$2 = $1

	#Modify sort.key
	# Remove some troff commands
	gsub(/\\f\(..|\\f.|\\s[+-][0-9]|\\s[0-9][0-9]?/, "", $2)

	# underscore -> 0, so "foo_gorp" sorts before "food"
	gsub(/_/, "0", $2)

	# quote character is %, space character is ~
	quoted = 0
	if ($2 ~ /%/) {  # hide quoted literals in Q
		quoted = 1
		gsub(/%%/,  "QQ0QQ", $2)
		gsub(/%\[/, "QQ1QQ", $2)
		gsub(/%\]/, "QQ2QQ", $2)
		gsub(/%\{/, "QQ3QQ", $2)
		gsub(/%\}/, "QQ4QQ", $2)
		gsub(/%~/,  "QQ5QQ", $2)
	}
	gsub(/%e/, "\\", $2)		# implement troff escape
	gsub(/~/, " ", $2)		# remove tildes
	gsub(/[%\[\]\{\}]/, "", $2)	# remove % and font-changing []{}
	if (quoted) {  # restore literals but without escape charcter
		gsub(/QQ0QQ/, "%", $2)
		gsub(/QQ1QQ/, "[", $2)
		gsub(/QQ2QQ/, "]", $2)
		gsub(/QQ3QQ/, "{", $2)
		gsub(/QQ4QQ/, "}", $2)
		gsub(/QQ5QQ/, "~", $2)
	}
	if ($2 ~ /^[^a-zA-Z]+$/)	# purely nonalphabetic lines go first
		$2 = "  " $2
	else if ($2 ~ /^[0-9]/)		# lines with eading digits come next
		$2 = " " $2
					# otherwise whatever final.sort does
}

	{ print $2, $1, $3 } 
' $*