|
|
utf8pad: fix byte-seek issue with negative width codepoints in the range >= 127 - stagit-gopher - static git page generator for gopher |
|
|
 |
git clone git://git.codemadness.org/stagit-gopher (git://git.codemadness.org) |
|
|
 |
Log |
|
|
 |
Files |
|
|
 |
Refs |
|
|
 |
README |
|
|
 |
LICENSE |
|
|
|
--- |
|
|
 |
commit 05a08e8ab50a8da5b2896c3f5887801d059f48dd |
|
|
 |
parent a9c90b585f158f98dd0997d1509e83f85dd87498 |
|
|
 |
Author: Hiltjo Posthuma <hiltjo@codemadness.org> (mailto://) |
application/vnd.lotus-organizer |
|
|
Date: Sat, 9 Jan 2021 16:19:18 +0100 |
|
|
|
|
|
|
|
utf8pad: fix byte-seek issue with negative width codepoints in the range >= 127 |
|
|
|
|
|
|
|
For example: "\xef\xbf\xb7" (codepoint 0xfff7), returns wcwidth(wc) == -1. |
|
|
|
The next byte was incorrected seeked, but the codepoint itself was valid |
|
|
|
(mbtowc). |
|
|
|
|
|
|
|
Diffstat: |
|
|
|
M stagit-gopher-index.c | 7 +++---- |
|
|
|
M stagit-gopher.c | 7 +++---- |
|
|
|
|
|
|
|
2 files changed, 6 insertions(+), 8 deletions(-) |
|
|
|
--- |
|
|
 |
diff --git a/stagit-gopher-index.c b/stagit-gopher-index.c |
|
|
|
@@ -38,19 +38,18 @@ utf8pad(char *buf, size_t bufsiz, const char *s, size_t len, int pad) |
|
|
|
|
|
|
|
slen = strlen(s); |
|
|
|
for (i = 0; i < slen; i += inc) { |
|
|
|
- inc = 1; |
|
|
|
+ inc = 1; /* next byte */ |
|
|
|
if ((unsigned char)s[i] < 32) |
|
|
|
continue; |
|
|
|
|
|
|
|
rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4); |
|
|
|
+ inc = rl; |
|
|
|
if (rl < 0) { |
|
|
|
mbtowc(NULL, NULL, 0); /* reset state */ |
|
|
|
- inc = 1; /* next byte */ |
|
|
|
+ inc = 1; /* invalid, seek next byte */ |
|
|
|
w = 1; /* replacement char is one width */ |
|
|
|
} else if ((w = wcwidth(wc)) == -1) { |
|
|
|
continue; |
|
|
|
- } else { |
|
|
|
- inc = rl; |
|
|
|
} |
|
|
|
|
|
|
|
if (col + w > len || (col + w == len && s[i + inc])) { |
|
|
 |
diff --git a/stagit-gopher.c b/stagit-gopher.c |
|
|
|
@@ -100,19 +100,18 @@ utf8pad(char *buf, size_t bufsiz, const char *s, size_t len, int pad) |
|
|
|
|
|
|
|
slen = strlen(s); |
|
|
|
for (i = 0; i < slen; i += inc) { |
|
|
|
- inc = 1; |
|
|
|
+ inc = 1; /* next byte */ |
|
|
|
if ((unsigned char)s[i] < 32) |
|
|
|
continue; |
|
|
|
|
|
|
|
rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4); |
|
|
|
+ inc = rl; |
|
|
|
if (rl < 0) { |
|
|
|
mbtowc(NULL, NULL, 0); /* reset state */ |
|
|
|
- inc = 1; /* next byte */ |
|
|
|
+ inc = 1; /* invalid, seek next byte */ |
|
|
|
w = 1; /* replacement char is one width */ |
|
|
|
} else if ((w = wcwidth(wc)) == -1) { |
|
|
|
continue; |
|
|
|
- } else { |
|
|
|
- inc = rl; |
|
|
|
} |
|
|
|
|
|
|
|
if (col + w > len || (col + w == len && s[i + inc])) { |
|