|
|
tutf8.c - iomenu - interactive terminal-based selection menu |
|
|
 |
git clone git://bitreich.org/iomenu git://hg6vgqziawt5s4dj.onion/iomenu (git://bitreich.org) |
|
|
 |
Log |
|
|
 |
Files |
|
|
 |
Refs |
|
|
 |
Tags |
|
|
 |
README |
|
|
 |
LICENSE |
|
|
|
--- |
|
|
|
tutf8.c (3175B) |
|
|
|
--- |
|
|
|
1 #include "utf8.h" |
|
|
|
2 |
|
|
|
3 #include <stddef.h> |
|
|
|
4 #include <stdint.h> |
|
|
|
5 |
|
|
|
6 /* |
|
|
|
7 * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> |
|
|
|
8 * |
|
|
|
9 * Permission is hereby granted, free of charge, to any person obtaining a copy |
|
|
|
10 * of this software and associated documentation files (the "Software"), to |
|
|
|
11 * deal in the Software without restriction, including without limitation the |
|
|
|
12 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
|
|
|
13 * sell copies of the Software, and to permit persons to whom the Software is |
|
|
|
14 * furnished to do so, subject to the following conditions: |
|
|
|
15 * |
|
|
|
16 * The above copyright notice and this permission notice shall be included in |
|
|
|
17 * all copies or substantial portions of the Software. |
|
|
|
18 * |
|
|
|
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
|
|
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
|
|
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
|
|
22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
|
|
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
|
|
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
|
|
|
25 * IN THE SOFTWARE. |
|
|
|
26 */ |
|
|
|
27 |
|
|
|
28 size_t |
|
|
|
29 utf8_encode(char *dest, uint32_t u) |
|
|
|
30 { |
|
|
|
31 size_t v, n, n2; |
|
|
|
32 |
|
|
|
33 if (u <= 0x7f) { |
|
|
|
34 if (dest != NULL) |
|
|
|
35 *dest = u; |
|
|
|
36 return 1; |
|
|
|
37 } |
|
|
|
38 |
|
|
|
39 for (v = 0x3f, n = 0; v >= u; ++n) |
|
|
|
40 v = (v << 5) | 0x1f; |
|
|
|
41 if (v >= 0x7fffffff) |
|
|
|
42 return 0; /* cannot be encoded */ |
|
|
|
43 |
|
|
|
44 if (dest == NULL) |
|
|
|
45 return 1 + n; |
|
|
|
46 |
|
|
|
47 *dest++ = (0xff << (7 - n)) | (u >> n * 6); |
|
|
|
48 for (n2 = n - 1; n2 ; --n2) { |
|
|
|
49 *dest++ = 0x80 | (u & 0x3f); |
|
|
|
50 u >>= 6; |
|
|
|
51 } |
|
|
|
52 return 1 + n; |
|
|
|
53 } |
|
|
|
54 |
|
|
|
55 /* Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> * |
|
|
|
56 * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */ |
|
|
|
57 |
|
|
|
58 static const uint8_t utf8d[] = { |
|
|
|
59 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 00..1f */ |
|
|
|
60 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 20..3f */ |
|
|
|
61 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 40..5f */ |
|
|
|
62 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 60..7f */ |
|
|
|
63 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, /* 80..9f */ |
|
|
|
64 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* a0..bf */ |
|
|
|
65 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* c0..df */ |
|
|
|
66 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, /* e0..ef */ |
|
|
|
67 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, /* f0..ff */ |
|
|
|
68 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, /* s0..s0 */ |
|
|
|
69 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, /* s1..s2 */ |
|
|
|
70 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, /* s3..s4 */ |
|
|
|
71 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, /* s5..s6 */ |
|
|
|
72 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* s7..s8 */ |
|
|
|
73 }; |
|
|
|
74 |
|
|
|
75 uint32_t |
|
|
|
76 utf8_decode(uint32_t *state, uint32_t *codep, uint32_t byte) |
|
|
|
77 { |
|
|
|
78 uint32_t type = utf8d[byte]; |
|
|
|
79 |
|
|
|
80 *codep = (*state != UTF8_ACCEPT) |
|
|
|
81 ? (byte & 0x3fu) | (*codep << 6) |
|
|
|
82 : (0xff >> type) & (byte); |
|
|
|
83 *state = utf8d[256 + *state*16 + type]; |
|
|
|
84 return *state; |
|
|
|
85 } |
|