|
|
utf8expr.c - utf8expr - expr(1) for UTF-8 |
|
|
 |
git clone git://bitreich.org/utf8expr/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/utf8expr/ (git://bitreich.org) |
|
|
 |
Log |
|
|
 |
Files |
|
|
 |
Refs |
|
|
 |
Tags |
|
|
 |
LICENSE |
|
|
|
--- |
|
|
|
utf8expr.c (2190B) |
|
|
|
--- |
|
|
|
1 /* |
|
|
|
2 * Copy me if you can. |
|
|
|
3 * by 20h |
|
|
|
4 */ |
|
|
|
5 |
|
|
|
6 #include <unistd.h> |
|
|
|
7 #include <string.h> |
|
|
|
8 #include <stdlib.h> |
|
|
|
9 #include <stdio.h> |
|
|
|
10 #include <libgen.h> |
|
|
|
11 |
|
|
|
12 #include "arg.h" |
|
|
|
13 |
|
|
|
14 char *argv0; |
|
|
|
15 |
|
|
|
16 /* |
|
|
|
17 * Idea taken from: |
|
|
|
18 * http://canonical.org/~kragen/strlen-utf8.html |
|
|
|
19 */ |
|
|
|
20 size_t |
|
|
|
21 utf8strlen(char *s) |
|
|
|
22 { |
|
|
|
23 size_t i; |
|
|
|
24 |
|
|
|
25 i = 0; |
|
|
|
26 for (; s[0]; s++) { |
|
|
|
27 if ((s[0] & 0xc0) != 0x80) |
|
|
|
28 i++; |
|
|
|
29 } |
|
|
|
30 |
|
|
|
31 return i; |
|
|
|
32 } |
|
|
|
33 |
|
|
|
34 char * |
|
|
|
35 utf8strchr(char *s, char *c) |
|
|
|
36 { |
|
|
|
37 size_t j, cl; |
|
|
|
38 |
|
|
|
39 cl = strlen(c); |
|
|
|
40 if (cl == 0) |
|
|
|
41 return NULL; |
|
|
|
42 |
|
|
|
43 for (j = 0; ; s++) { |
|
|
|
44 if (j > 6) |
|
|
|
45 return NULL; |
|
|
|
46 j++; |
|
|
|
47 |
|
|
|
48 if ((s[0] & 0xc0) != 0x80 || s[0] == '\0') { |
|
|
|
49 if (cl == j) { |
|
|
|
50 if (!memcmp(&s[-j], c, cl)) |
|
|
|
51 return &s[-j]; |
|
|
|
52 } |
|
|
|
53 j = 0; |
|
|
|
54 |
|
|
|
55 if (s[0] == '\0') |
|
|
|
56 break; |
|
|
|
57 } |
|
|
|
58 } |
|
|
|
59 |
|
|
|
60 return NULL; |
|
|
|
61 } |
|
|
|
62 |
|
|
|
63 char * |
|
|
|
64 utf8substr(char *s, size_t pos, size_t *length) |
|
|
|
65 { |
|
|
|
66 size_t i, j, rl; |
|
|
|
67 char *ret; |
|
|
|
68 |
|
|
|
69 if (*length < 1) |
|
|
|
70 return NULL; |
|
|
|
71 |
|
|
|
72 ret = NULL; |
|
|
|
73 rl = 0; |
|
|
|
74 for (i = 0, j = 0; *length > 0; s++) { |
|
|
|
75 if (j > 6) |
|
|
|
76 return NULL; |
|
|
|
77 j++; |
|
|
|
78 |
|
|
|
79 if (ret != NULL) |
|
|
|
80 rl++; |
|
|
|
81 |
|
|
|
82 if ((s[0] & 0xc0) != 0x80 || s[0] == '\0') { |
|
|
|
83 if (i >= pos) { |
|
|
|
84 if (ret == NULL) { |
|
|
|
85 ret = &s[-j]; |
|
|
|
86 rl = j; |
|
|
|
87 } |
|
|
|
88 (*length)--; |
|
|
|
89 } |
|
|
|
90 i++; |
|
|
|
91 j = 0; |
|
|
|
92 |
|
|
|
93 if (s[0] == '\0') |
|
|
|
94 break; |
|
|
|
95 } |
|
|
|
96 } |
|
|
|
97 |
|
|
|
98 *length = rl; |
|
|
|
99 return ret; |
|
|
|
100 } |
|
|
|
101 |
|
|
|
102 size_t |
|
|
|
103 utf8index(char *s, char *chars) |
|
|
|
104 { |
|
|
|
105 size_t i, j; |
|
|
|
106 char c[7]; |
|
|
|
107 |
|
|
|
108 j = 0; |
|
|
|
109 for (i = 0; ; s++) { |
|
|
|
110 if (j > 6) |
|
|
|
111 return 0; |
|
|
|
112 j++; |
|
|
|
113 |
|
|
|
114 if ((s[0] & 0xc0) != 0x80 || s[0] == '\0') { |
|
|
|
115 memset(c, 0, sizeof(c)); |
|
|
|
116 memmove(c, &s[-j], j); |
|
|
|
117 if (utf8strchr(chars, c)) |
|
|
|
118 return i; |
|
|
|
119 i++; |
|
|
|
120 j = 0; |
|
|
|
121 |
|
|
|
122 if (s[0] == '\0') |
|
|
|
123 break; |
|
|
|
124 } |
|
|
|
125 } |
|
|
|
126 |
|
|
|
127 return 0; |
|
|
|
128 } |
|
|
|
129 |
|
|
|
130 void |
|
|
|
131 usage(void) |
|
|
|
132 { |
|
|
|
133 fprintf(stderr, "usage: %s [substr|index|length] str [args ...]\n", |
|
|
|
134 basename(argv0)); |
|
|
|
135 exit(1); |
|
|
|
136 } |
|
|
|
137 |
|
|
|
138 int |
|
|
|
139 main(int argc, char *argv[]) |
|
|
|
140 { |
|
|
|
141 char *s; |
|
|
|
142 size_t len; |
|
|
|
143 |
|
|
|
144 argv0 = argv[0]; |
|
|
|
145 |
|
|
|
146 if (argc < 3) |
|
|
|
147 usage(); |
|
|
|
148 |
|
|
|
149 switch(argv[1][0]) { |
|
|
|
150 case 'i': |
|
|
|
151 if (argc < 4) |
|
|
|
152 usage(); |
|
|
|
153 printf("%ld\n", utf8index(argv[2], argv[3])); |
|
|
|
154 break; |
|
|
|
155 case 'l': |
|
|
|
156 printf("%ld\n", utf8strlen(argv[2])); |
|
|
|
157 break; |
|
|
|
158 case 's': |
|
|
|
159 if (argc < 5) |
|
|
|
160 usage(); |
|
|
|
161 len = atoi(argv[4]); |
|
|
|
162 s = utf8substr(argv[2], atoi(argv[3]), &len); |
|
|
|
163 if (s == NULL) |
|
|
|
164 return -1; |
|
|
|
165 printf("%.*s\n", (int)len, s); |
|
|
|
166 break; |
|
|
|
167 default: |
|
|
|
168 usage(); |
|
|
|
169 }; |
|
|
|
170 |
|
|
|
171 return 0; |
|
|
|
172 } |
|
|
|
173 |
|