|
|
xml2tsv.c - xml2tsv - a simple xml-to-tsv converter, based on xmlparser |
|
|
 |
Log |
|
|
 |
Files |
|
|
 |
Refs |
|
|
 |
Tags |
|
|
 |
README |
|
|
 |
LICENSE |
|
|
|
--- |
|
|
|
xml2tsv.c (4522B) |
|
|
|
--- |
|
|
|
1 /* |
|
|
|
2 * (c) 2020 Vincenzo "KatolaZ" Nicosia <katolaz@freaknet.org> |
|
|
|
3 * |
|
|
|
4 * A simple xml-to-tsv converter, based on xmlparser by Hiltjo Posthuma |
|
|
|
5 * http://codemadness.org/git/xmlparser/ |
|
|
|
6 * |
|
|
|
7 * You can use, distribute, modify, and/or redistribute this program under |
|
|
|
8 * the terms of the ISC LICENSE. See LICENSE for details. |
|
|
|
9 * |
|
|
|
10 */ |
|
|
|
11 |
|
|
|
12 |
|
|
|
13 #include <sys/types.h> |
|
|
|
14 |
|
|
|
15 #include <stdio.h> |
|
|
|
16 #include <string.h> |
|
|
|
17 #include <ctype.h> |
|
|
|
18 |
|
|
|
19 #include "xml.h" |
|
|
|
20 #include "config.h" |
|
|
|
21 |
|
|
|
22 |
|
|
|
23 #define FALSE 0 |
|
|
|
24 #define TRUE 1 |
|
|
|
25 |
|
|
|
26 /* tag stack */ |
|
|
|
27 |
|
|
|
28 typedef struct { |
|
|
|
29 char s[STR_MAX]; |
|
|
|
30 int ref; |
|
|
|
31 } taginfo_t; |
|
|
|
32 |
|
|
|
33 |
|
|
|
34 |
|
|
|
35 typedef struct { |
|
|
|
36 int top; |
|
|
|
37 taginfo_t st[DEPTH_MAX]; |
|
|
|
38 } tstack_t; |
|
|
|
39 |
|
|
|
40 int stack_empty(tstack_t *t){ |
|
|
|
41 return (t->top < 0); |
|
|
|
42 } |
|
|
|
43 |
|
|
|
44 int stack_push(tstack_t *t, const char *c){ |
|
|
|
45 if (t->top < DEPTH_MAX){ |
|
|
|
46 t->top ++; |
|
|
|
47 strncpy(t->st[t->top].s, c, STR_MAX - 1); |
|
|
|
48 t->st[t->top].s[STR_MAX - 1] = '\0'; |
|
|
|
49 t->st[t->top].ref = 0; |
|
|
|
50 if (t->top){ |
|
|
|
51 /* Increment the ref counter of the parent tag*/ |
|
|
|
52 t->st[t->top-1].ref += 1; |
|
|
|
53 } |
|
|
|
54 return 0; |
|
|
|
55 } |
|
|
|
56 return -1; |
|
|
|
57 } |
|
|
|
58 |
|
|
|
59 taginfo_t* stack_pop(tstack_t *t){ |
|
|
|
60 if (!stack_empty(t)) |
|
|
|
61 return t->st + t->top--; |
|
|
|
62 return NULL; |
|
|
|
63 } |
|
|
|
64 |
|
|
|
65 taginfo_t* stack_peek(tstack_t *t){ |
|
|
|
66 if (!stack_empty(t)) |
|
|
|
67 return t->st + t->top; |
|
|
|
68 return NULL; |
|
|
|
69 } |
|
|
|
70 |
|
|
|
71 void stack_init(tstack_t *t){ |
|
|
|
72 t->top = -1; |
|
|
|
73 } |
|
|
|
74 |
|
|
|
75 |
|
|
|
76 /* utility functions */ |
|
|
|
77 |
|
|
|
78 /* quote_print: quote \\, \n, \t, and strip other ctrl chars */ |
|
|
|
79 void quote_print(const char *s){ |
|
|
|
80 const char *tmp = s; |
|
|
|
81 size_t len; |
|
|
|
82 int i; |
|
|
|
83 while (*tmp != '\0'){ |
|
|
|
84 len = strcspn(tmp, "\\\n\t"); |
|
|
|
85 for(i=0; i<len; i++, tmp++){ |
|
|
|
86 if (!iscntrl((unsigned char)*tmp)){ |
|
|
|
87 putchar(*tmp); |
|
|
|
88 } |
|
|
|
89 } |
|
|
|
90 switch (*tmp){ |
|
|
|
91 case '\n': |
|
|
|
92 if (len > 0){ |
|
|
|
93 fputs("\\n", stdout); |
|
|
|
94 } |
|
|
|
95 tmp ++; |
|
|
|
96 break; |
|
|
|
97 case '\t': |
|
|
|
98 fputs("\\t", stdout); |
|
|
|
99 tmp ++; |
|
|
|
100 break; |
|
|
|
101 case '\r': |
|
|
|
102 fputs("\\r", stdout); |
|
|
|
103 tmp ++; |
|
|
|
104 break; |
|
|
|
105 case '\\': |
|
|
|
106 fputs("\\\\", stdout); |
|
|
|
107 tmp ++; |
|
|
|
108 break; |
|
|
|
109 } |
|
|
|
110 } |
|
|
|
111 } |
|
|
|
112 |
|
|
|
113 void print_cur_str(tstack_t *t){ |
|
|
|
114 int i; |
|
|
|
115 for (i=0; i<=t->top; i++){ |
|
|
|
116 putchar('/'); |
|
|
|
117 fputs(t->st[i].s, stdout); |
|
|
|
118 } |
|
|
|
119 } |
|
|
|
120 |
|
|
|
121 void print_cur_str_fp(FILE *f, tstack_t *t){ |
|
|
|
122 int i; |
|
|
|
123 for (i=0; i<=t->top; i++){ |
|
|
|
124 fputc('/', f); |
|
|
|
125 fputs(t->st[i].s, f); |
|
|
|
126 } |
|
|
|
127 } |
|
|
|
128 |
|
|
|
129 /* global variables */ |
|
|
|
130 |
|
|
|
131 tstack_t st; |
|
|
|
132 char emitsep; |
|
|
|
133 |
|
|
|
134 /* XML callbacks */ |
|
|
|
135 |
|
|
|
136 void |
|
|
|
137 xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al, |
|
|
|
138 const char *v, size_t vl) |
|
|
|
139 { |
|
|
|
140 fputs(v, stdout); |
|
|
|
141 } |
|
|
|
142 |
|
|
|
143 void |
|
|
|
144 xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al, |
|
|
|
145 const char *v, size_t vl) |
|
|
|
146 { |
|
|
|
147 char buf[16]; |
|
|
|
148 int n; |
|
|
|
149 |
|
|
|
150 if ((n = xml_entitytostr(v, buf, sizeof(buf))) > 0) |
|
|
|
151 xmlattr(x, t, tl, a, al, buf, (size_t)n); |
|
|
|
152 else |
|
|
|
153 xmlattr(x, t, tl, a, al, v, vl); |
|
|
|
154 } |
|
|
|
155 |
|
|
|
156 void |
|
|
|
157 xmlattrstart(XMLParser *x, const char *t, size_t tl, const char *a, size_t al) |
|
|
|
158 { |
|
|
|
159 putchar(SEP); |
|
|
|
160 fputs(a, stdout); |
|
|
|
161 putchar(SATTR); |
|
|
|
162 } |
|
|
|
163 |
|
|
|
164 void |
|
|
|
165 xmlcdatastart(XMLParser *x) |
|
|
|
166 { |
|
|
|
167 putchar(SEP); |
|
|
|
168 } |
|
|
|
169 |
|
|
|
170 void |
|
|
|
171 xmlcdata(XMLParser *x, const char *d, size_t dl) |
|
|
|
172 { |
|
|
|
173 quote_print(d); |
|
|
|
174 } |
|
|
|
175 |
|
|
|
176 void |
|
|
|
177 xmldata(XMLParser *x, const char *d, size_t dl) |
|
|
|
178 { |
|
|
|
179 if ((strcspn(d, " \t\n") || (strspn(d, " \t\n")<strlen(d) && !stack_peek(&st)->ref)) && emitsep){ |
|
|
|
180 putchar(SEP); |
|
|
|
181 emitsep = FALSE; |
|
|
|
182 } |
|
|
|
183 quote_print(d); |
|
|
|
184 } |
|
|
|
185 |
|
|
|
186 void |
|
|
|
187 xmldataend(XMLParser *x) |
|
|
|
188 { |
|
|
|
189 emitsep = FALSE; |
|
|
|
190 } |
|
|
|
191 |
|
|
|
192 void |
|
|
|
193 xmldataentity(XMLParser *x, const char *d, size_t dl) |
|
|
|
194 { |
|
|
|
195 char buf[16]; |
|
|
|
196 int n; |
|
|
|
197 |
|
|
|
198 if ((n = xml_entitytostr(d, buf, sizeof(buf))) > 0) |
|
|
|
199 xmldata(x, buf, (size_t)n); |
|
|
|
200 else |
|
|
|
201 xmldata(x, d, dl); |
|
|
|
202 } |
|
|
|
203 |
|
|
|
204 void |
|
|
|
205 xmldatastart(XMLParser *x) |
|
|
|
206 { |
|
|
|
207 emitsep = TRUE; |
|
|
|
208 } |
|
|
|
209 |
|
|
|
210 void |
|
|
|
211 xmltagend(XMLParser *x, const char *t, size_t tl, int isshort) |
|
|
|
212 { |
|
|
|
213 char *tag; |
|
|
|
214 if (stack_empty(&st)){ |
|
|
|
215 fprintf(stderr, "Error: tag-end '%s' before any open tag\n", t); |
|
|
|
216 } |
|
|
|
217 tag = stack_pop(&st)->s; |
|
|
|
218 if (strcmp(t, tag)){ |
|
|
|
219 fprintf(stderr, "Error: tag-end '%s' closes tag '%s'\n", t, tag); |
|
|
|
220 } |
|
|
|
221 } |
|
|
|
222 |
|
|
|
223 void |
|
|
|
224 xmltagstart(XMLParser *x, const char *t, size_t tl) |
|
|
|
225 { |
|
|
|
226 if (stack_push(&st, t)){ |
|
|
|
227 fprintf(stderr, "Error: stack full. Ignoring tag '%s' (parent tag: '%s')\n", t, stack_peek(&st)->s); |
|
|
|
228 return; |
|
|
|
229 } |
|
|
|
230 putchar('\n'); |
|
|
|
231 print_cur_str(&st); |
|
|
|
232 } |
|
|
|
233 |
|
|
|
234 int |
|
|
|
235 main(void) |
|
|
|
236 { |
|
|
|
237 stack_init(&st); |
|
|
|
238 emitsep = FALSE; |
|
|
|
239 XMLParser x = { 0 }; |
|
|
|
240 |
|
|
|
241 x.xmlattr = xmlattr; |
|
|
|
242 x.xmlattrstart = xmlattrstart; |
|
|
|
243 x.xmlattrentity = xmlattrentity; |
|
|
|
244 x.xmlcdatastart = xmlcdatastart; |
|
|
|
245 x.xmlcdata = xmlcdata; |
|
|
|
246 x.xmldata = xmldata; |
|
|
|
247 x.xmldataend = xmldataend; |
|
|
|
248 x.xmldataentity = xmldataentity; |
|
|
|
249 x.xmldatastart = xmldatastart; |
|
|
|
250 x.xmltagend = xmltagend; |
|
|
|
251 x.xmltagstart = xmltagstart; |
|
|
|
252 |
|
|
|
253 xml_parse(&x); |
|
|
|
254 putchar('\n'); |
|
|
|
255 if (! stack_empty(&st)) { |
|
|
|
256 fprintf(stderr, "Error: tags still open at EOF: "); |
|
|
|
257 print_cur_str_fp(stderr, &st); |
|
|
|
258 fprintf(stderr, "\n"); |
|
|
|
259 } |
|
|
|
260 return 0; |
|
|
|
261 } |
|