diff options
author | Fazlul Shahriar <fshahriar@gmail.com> | 2019-10-29 10:04:06 -0400 |
---|---|---|
committer | Dan Cross <crossd@gmail.com> | 2019-10-29 10:04:06 -0400 |
commit | 1309450668aa571dee97f4373f9555b4fddcf1aa (patch) | |
tree | 43bca68e1a2a0a4d5061bfae4daf0b2f4fea8a23 /src/cmd/awk | |
parent | 715807d706cd13bc583588477a84090fbf02e057 (diff) | |
download | plan9port-1309450668aa571dee97f4373f9555b4fddcf1aa.tar.gz plan9port-1309450668aa571dee97f4373f9555b4fddcf1aa.tar.bz2 plan9port-1309450668aa571dee97f4373f9555b4fddcf1aa.zip |
awk: split record into runes for empty FS (#292)
awk was splitting records into bytes instead of runes for empty FS.
For example, this was printing only the first byte of the utf-8 encoding
of é:
echo é | awk 'BEGIN{FS=""}{print $1}'
The change just copies how the `split` function handles runes.
Originally reported by kris on twitter:
https://twitter.com/p9luv/status/1180436083433201665
Diffstat (limited to 'src/cmd/awk')
-rw-r--r-- | src/cmd/awk/lib.c | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/src/cmd/awk/lib.c b/src/cmd/awk/lib.c index 6a6849c5..3eb30687 100644 --- a/src/cmd/awk/lib.c +++ b/src/cmd/awk/lib.c @@ -29,6 +29,7 @@ THIS SOFTWARE. #include <errno.h> #include <stdlib.h> #include <stdarg.h> +#include <utf.h> #include "awk.h" #include "y.tab.h" @@ -293,15 +294,19 @@ void fldbld(void) /* create fields from current record */ } *fr = 0; } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */ - for (i = 0; *r != 0; r++) { - char buf[2]; + int nb; + for (i = 0; *r != 0; r += nb) { + Rune rr; + char buf[UTFmax+1]; + i++; if (i > nfields) growfldtab(i); if (freeable(fldtab[i])) xfree(fldtab[i]->sval); - buf[0] = *r; - buf[1] = 0; + nb = chartorune(&rr, r); + memmove(buf, r, nb); + buf[nb] = '\0'; fldtab[i]->sval = tostring(buf); fldtab[i]->tval = FLD | STR; } |