|
|
utf8pad: improve padded printing and printing invalid unicode characters - stagit-gopher - static git page generator for gopher |
|
|
 |
git clone git://git.codemadness.org/stagit-gopher (git://git.codemadness.org) |
|
|
 |
Log |
|
|
 |
Files |
|
|
 |
Refs |
|
|
 |
README |
|
|
 |
LICENSE |
|
|
|
--- |
|
|
 |
commit 554a9fe2e9d12defd9d6253871d8261d3f3ef3c6 |
|
|
 |
parent 7b93d02cd8f26ab9a25d967c72c359a22c91eb74 |
|
|
 |
Author: Hiltjo Posthuma <hiltjo@codemadness.org> (mailto://) |
application/vnd.lotus-organizer |
|
|
Date: Sat, 9 Jan 2021 14:56:51 +0100 |
|
|
|
|
|
|
|
utf8pad: improve padded printing and printing invalid unicode characters |
|
|
|
|
|
|
|
- Use unicode replacement character (codepoint 0xfffd) when a codepoint is |
|
|
|
invalid and proceed printing the rest of the characters. |
|
|
|
|
|
|
|
- When a codepoint is invalid reset the internal state of mbtowc(3), from the |
|
|
|
OpenBSD man page: |
|
|
|
|
|
|
|
" If a call to mbtowc() resulted in an undefined internal state, mbtowc() |
|
|
|
must be called with s set to NULL to reset the internal state before it |
|
|
|
can safely be used again." |
|
|
|
|
|
|
|
- Make the function return 0 when `len` is 0 (this should not be not an error). |
|
|
|
|
|
|
|
Diffstat: |
|
|
|
M stagit-gopher-index.c | 59 ++++++++++++++++++++++--------- |
|
|
|
M stagit-gopher.c | 58 ++++++++++++++++++++++--------- |
|
|
|
|
|
|
|
2 files changed, 83 insertions(+), 34 deletions(-) |
|
|
|
--- |
|
|
 |
diff --git a/stagit-gopher-index.c b/stagit-gopher-index.c |
|
|
|
@@ -10,6 +10,9 @@ |
|
|
|
|
|
|
|
#include <git2.h> |
|
|
|
|
|
|
|
+#define PAD_TRUNCATE_SYMBOL "\xe2\x80\xa6" /* symbol: "ellipsis" */ |
|
|
|
+#define UTF_INVALID_SYMBOL "\xef\xbf\xbd" /* symbol: "replacement" */ |
|
|
|
+ |
|
|
|
static git_repository *repo; |
|
|
|
|
|
|
|
static const char *relpath = ""; |
|
|
|
@@ -17,40 +20,62 @@ static const char *relpath = ""; |
|
|
|
static char description[255] = "Repositories"; |
|
|
|
static char *name = ""; |
|
|
|
|
|
|
|
-/* format `len' columns of characters. If string is shorter pad the rest |
|
|
|
+/* Format `len' columns of characters. If string is shorter pad the rest |
|
|
|
* with characters `pad`. */ |
|
|
|
int |
|
|
|
utf8pad(char *buf, size_t bufsiz, const char *s, size_t len, int pad) |
|
|
|
{ |
|
|
|
wchar_t wc; |
|
|
|
size_t col = 0, i, slen, siz = 0; |
|
|
|
- int rl, w; |
|
|
|
+ int inc, rl, w; |
|
|
|
|
|
|
|
- if (!len) |
|
|
|
+ if (!bufsiz) |
|
|
|
return -1; |
|
|
|
+ if (!len) { |
|
|
|
+ buf[0] = '\0'; |
|
|
|
+ return 0; |
|
|
|
+ } |
|
|
|
|
|
|
|
slen = strlen(s); |
|
|
|
- for (i = 0; i < slen; i += rl) { |
|
|
|
- if ((rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4)) <= 0) |
|
|
|
- break; |
|
|
|
- if ((w = wcwidth(wc)) == -1) |
|
|
|
+ for (i = 0; i < slen; i += inc) { |
|
|
|
+ inc = 1; |
|
|
|
+ if ((unsigned char)s[i] < 32) |
|
|
|
continue; |
|
|
|
- if (col + w > len || (col + w == len && s[i + rl])) { |
|
|
|
+ |
|
|
|
+ rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4); |
|
|
|
+ if (rl < 0) { |
|
|
|
+ mbtowc(NULL, NULL, 0); /* reset state */ |
|
|
|
+ inc = 1; /* next byte */ |
|
|
|
+ w = 1; /* replacement char is one width */ |
|
|
|
+ } else if ((w = wcwidth(wc)) == -1) { |
|
|
|
+ continue; |
|
|
|
+ } else { |
|
|
|
+ inc = rl; |
|
|
|
+ } |
|
|
|
+ |
|
|
|
+ if (col + w > len || (col + w == len && s[i + inc])) { |
|
|
|
if (siz + 4 >= bufsiz) |
|
|
|
return -1; |
|
|
|
- memcpy(&buf[siz], "\xe2\x80\xa6", 3); |
|
|
|
- siz += 3; |
|
|
|
- if (col + w == len && w > 1) |
|
|
|
- buf[siz++] = pad; |
|
|
|
+ memcpy(&buf[siz], PAD_TRUNCATE_SYMBOL, sizeof(PAD_TRUNCATE_SYMBOL) - 1); |
|
|
|
+ siz += sizeof(PAD_TRUNCATE_SYMBOL) - 1; |
|
|
|
buf[siz] = '\0'; |
|
|
|
- return 0; |
|
|
|
+ col++; |
|
|
|
+ break; |
|
|
|
+ } else if (rl < 0) { |
|
|
|
+ if (siz + 4 >= bufsiz) |
|
|
|
+ return -1; |
|
|
|
+ memcpy(&buf[siz], UTF_INVALID_SYMBOL, sizeof(UTF_INVALID_SYMBOL) - 1); |
|
|
|
+ siz += sizeof(UTF_INVALID_SYMBOL) - 1; |
|
|
|
+ buf[siz] = '\0'; |
|
|
|
+ col++; |
|
|
|
+ continue; |
|
|
|
} |
|
|
|
- if (siz + rl + 1 >= bufsiz) |
|
|
|
+ if (siz + inc + 1 >= bufsiz) |
|
|
|
return -1; |
|
|
|
- memcpy(&buf[siz], &s[i], rl); |
|
|
|
- col += w; |
|
|
|
- siz += rl; |
|
|
|
+ memcpy(&buf[siz], &s[i], inc); |
|
|
|
+ siz += inc; |
|
|
|
buf[siz] = '\0'; |
|
|
|
+ col += w; |
|
|
|
} |
|
|
|
|
|
|
|
len -= col; |
|
|
 |
diff --git a/stagit-gopher.c b/stagit-gopher.c |
|
|
|
@@ -19,6 +19,8 @@ |
|
|
|
#include "compat.h" |
|
|
|
|
|
|
|
#define LEN(s) (sizeof(s)/sizeof(*s)) |
|
|
|
+#define PAD_TRUNCATE_SYMBOL "\xe2\x80\xa6" /* symbol: "ellipsis" */ |
|
|
|
+#define UTF_INVALID_SYMBOL "\xef\xbf\xbd" /* symbol: "replacement" */ |
|
|
|
|
|
|
|
struct deltainfo { |
|
|
|
git_patch *patch; |
|
|
|
@@ -80,40 +82,62 @@ static char lastoidstr[GIT_OID_HEXSZ + 2]; /* id + newline + NUL byte */ |
|
|
|
static FILE *rcachefp, *wcachefp; |
|
|
|
static const char *cachefile; |
|
|
|
|
|
|
|
-/* format `len' columns of characters. If string is shorter pad the rest |
|
|
|
+/* Format `len' columns of characters. If string is shorter pad the rest |
|
|
|
* with characters `pad`. */ |
|
|
|
int |
|
|
|
utf8pad(char *buf, size_t bufsiz, const char *s, size_t len, int pad) |
|
|
|
{ |
|
|
|
wchar_t wc; |
|
|
|
size_t col = 0, i, slen, siz = 0; |
|
|
|
- int rl, w; |
|
|
|
+ int inc, rl, w; |
|
|
|
|
|
|
|
- if (!len) |
|
|
|
+ if (!bufsiz) |
|
|
|
return -1; |
|
|
|
+ if (!len) { |
|
|
|
+ buf[0] = '\0'; |
|
|
|
+ return 0; |
|
|
|
+ } |
|
|
|
|
|
|
|
slen = strlen(s); |
|
|
|
- for (i = 0; i < slen; i += rl) { |
|
|
|
- if ((rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4)) <= 0) |
|
|
|
- break; |
|
|
|
- if ((w = wcwidth(wc)) == -1) |
|
|
|
+ for (i = 0; i < slen; i += inc) { |
|
|
|
+ inc = 1; |
|
|
|
+ if ((unsigned char)s[i] < 32) |
|
|
|
+ continue; |
|
|
|
+ |
|
|
|
+ rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4); |
|
|
|
+ if (rl < 0) { |
|
|
|
+ mbtowc(NULL, NULL, 0); /* reset state */ |
|
|
|
+ inc = 1; /* next byte */ |
|
|
|
+ w = 1; /* replacement char is one width */ |
|
|
|
+ } else if ((w = wcwidth(wc)) == -1) { |
|
|
|
continue; |
|
|
|
- if (col + w > len || (col + w == len && s[i + rl])) { |
|
|
|
+ } else { |
|
|
|
+ inc = rl; |
|
|
|
+ } |
|
|
|
+ |
|
|
|
+ if (col + w > len || (col + w == len && s[i + inc])) { |
|
|
|
if (siz + 4 >= bufsiz) |
|
|
|
return -1; |
|
|
|
- memcpy(&buf[siz], "\xe2\x80\xa6", 3); |
|
|
|
- siz += 3; |
|
|
|
- if (col + w == len && w > 1) |
|
|
|
- buf[siz++] = pad; |
|
|
|
+ memcpy(&buf[siz], PAD_TRUNCATE_SYMBOL, sizeof(PAD_TRUNCATE_SYMBOL) - 1); |
|
|
|
+ siz += sizeof(PAD_TRUNCATE_SYMBOL) - 1; |
|
|
|
buf[siz] = '\0'; |
|
|
|
- return 0; |
|
|
|
+ col++; |
|
|
|
+ break; |
|
|
|
+ } else if (rl < 0) { |
|
|
|
+ if (siz + 4 >= bufsiz) |
|
|
|
+ return -1; |
|
|
|
+ memcpy(&buf[siz], UTF_INVALID_SYMBOL, sizeof(UTF_INVALID_SYMBOL) - 1); |
|
|
|
+ siz += sizeof(UTF_INVALID_SYMBOL) - 1; |
|
|
|
+ buf[siz] = '\0'; |
|
|
|
+ col++; |
|
|
|
+ continue; |
|
|
|
} |
|
|
|
- if (siz + rl + 1 >= bufsiz) |
|
|
|
+ if (siz + inc + 1 >= bufsiz) |
|
|
|
return -1; |
|
|
|
- memcpy(&buf[siz], &s[i], rl); |
|
|
|
- col += w; |
|
|
|
- siz += rl; |
|
|
|
+ memcpy(&buf[siz], &s[i], inc); |
|
|
|
+ siz += inc; |
|
|
|
buf[siz] = '\0'; |
|
|
|
+ col += w; |
|
|
|
} |
|
|
|
|
|
|
|
len -= col; |
|