|
|
utf8.c - iomenu - interactive terminal-based selection menu |
|
|
 |
git clone git://bitreich.org/iomenu git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/iomenu (git://bitreich.org) |
|
|
 |
Log |
|
|
 |
Files |
|
|
 |
Refs |
|
|
 |
Tags |
|
|
 |
README |
|
|
 |
LICENSE |
|
|
|
--- |
|
|
|
utf8.c (3174B) |
|
|
|
--- |
|
|
|
1 #include "utf8.h" |
|
|
|
2 #include <stddef.h> |
|
|
|
3 #include <stdint.h> |
|
|
|
4 |
|
|
|
5 /* |
|
|
|
6 * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> |
|
|
|
7 * |
|
|
|
8 * Permission is hereby granted, free of charge, to any person obtaining a copy |
|
|
|
9 * of this software and associated documentation files (the "Software"), to |
|
|
|
10 * deal in the Software without restriction, including without limitation the |
|
|
|
11 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
|
|
|
12 * sell copies of the Software, and to permit persons to whom the Software is |
|
|
|
13 * furnished to do so, subject to the following conditions: |
|
|
|
14 * |
|
|
|
15 * The above copyright notice and this permission notice shall be included in |
|
|
|
16 * all copies or substantial portions of the Software. |
|
|
|
17 * |
|
|
|
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
|
|
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
|
|
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
|
|
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
|
|
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
|
|
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
|
|
|
24 * IN THE SOFTWARE. |
|
|
|
25 */ |
|
|
|
26 |
|
|
|
27 size_t |
|
|
|
28 utf8_encode(char *dest, uint32_t u) |
|
|
|
29 { |
|
|
|
30 size_t v, n, n2; |
|
|
|
31 |
|
|
|
32 if (u <= 0x7f) { |
|
|
|
33 if (dest != NULL) |
|
|
|
34 *dest = u; |
|
|
|
35 return 1; |
|
|
|
36 } |
|
|
|
37 |
|
|
|
38 for (v = 0x3f, n = 0; v >= u; ++n) |
|
|
|
39 v = (v << 5) | 0x1f; |
|
|
|
40 if (v >= 0x7fffffff) |
|
|
|
41 return 0; /* cannot be encoded */ |
|
|
|
42 |
|
|
|
43 if (dest == NULL) |
|
|
|
44 return 1 + n; |
|
|
|
45 |
|
|
|
46 *dest++ = (0xff << (7 - n)) | (u >> n * 6); |
|
|
|
47 for (n2 = n - 1; n2 ; --n2) { |
|
|
|
48 *dest++ = 0x80 | (u & 0x3f); |
|
|
|
49 u >>= 6; |
|
|
|
50 } |
|
|
|
51 return 1 + n; |
|
|
|
52 } |
|
|
|
53 |
|
|
|
54 /* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> * |
|
|
|
55 * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */ |
|
|
|
56 |
|
|
|
57 static const uint8_t utf8d[] = { |
|
|
|
58 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 00..1f */ |
|
|
|
59 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 20..3f */ |
|
|
|
60 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 40..5f */ |
|
|
|
61 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 60..7f */ |
|
|
|
62 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, /* 80..9f */ |
|
|
|
63 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* a0..bf */ |
|
|
|
64 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* c0..df */ |
|
|
|
65 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, /* e0..ef */ |
|
|
|
66 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, /* f0..ff */ |
|
|
|
67 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, /* s0..s0 */ |
|
|
|
68 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, /* s1..s2 */ |
|
|
|
69 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, /* s3..s4 */ |
|
|
|
70 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, /* s5..s6 */ |
|
|
|
71 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* s7..s8 */ |
|
|
|
72 }; |
|
|
|
73 |
|
|
|
74 uint32_t |
|
|
|
75 utf8_decode(uint32_t *state, uint32_t *codep, uint32_t byte) |
|
|
|
76 { |
|
|
|
77 uint32_t type = utf8d[byte]; |
|
|
|
78 |
|
|
|
79 *codep = (*state != UTF8_ACCEPT) |
|
|
|
80 ? (byte & 0x3fu) | (*codep << 6) |
|
|
|
81 : (0xff >> type) & (byte); |
|
|
|
82 *state = utf8d[256 + *state*16 + type]; |
|
|
|
83 return *state; |
|
|
|
84 } |
|