Wireshark  4.3.0
The Wireshark network protocol analyzer
tvbparse.h
Go to the documentation of this file.
1 
14 /*
15  The intention behind this is to ease the writing of dissectors that have to
16  parse text without the need of writing into buffers.
17 
18  It was originally written to avoid using lex and yacc for the xml dissector.
19 
20  the parser is able to look for wanted elements these can be:
21 
22  simple tokens:
23  - a char out of a string of needles
24  - a char not belonging to a string of needles
25  - a sequence of chars that belong to a set of chars
26  - a sequence of chars that do not belong to a set of chars
27  - a string
28  - a caseless string
29  - all the characters up to a certain wanted element (included or excluded)
30 
31  composed elements:
32  - one of a given group of wanted elements
33  - a sequence of wanted elements
34  - some (at least one) instances of a wanted element
35 
36  Once a wanted element is successfully extracted, by either tvbparse_get or
37  tvbparse_find, the parser will invoke a given callback
38  before and another one after every of its component's subelement's callbacks
39  are being called.
40 
41  If tvbparse_get or tvbparse_find fail to extract the wanted element the
42  subelements callbacks are not going to be invoked.
43 
44  The wanted elements are instantiated once by the proto_register_xxx function.
45 
46  The parser is instantiated for every packet and it mantains its state.
47 
48  The element's data is destroyed before the next packet is dissected.
49  */
50 
51 #ifndef _TVB_PARSE_H_
52 #define _TVB_PARSE_H_
53 
54 #include <epan/tvbuff.h>
55 #include <glib.h>
56 #include "ws_symbol_export.h"
57 
58 typedef struct _tvbparse_elem_t tvbparse_elem_t;
60 typedef struct _tvbparse_t tvbparse_t;
61 
62 
63 /*
64  * a callback function to be called before or after an element has been
65  * successfuly extracted.
66  *
67  * Note that if the token belongs to a composed token the callbacks of the
68  * components won't be called unless the composed token is successfully
69  * extracted.
70  *
71  * tvbparse_data: the private data of the parser
72  * wanted_data: the private data of the wanted element
73  * elem: the extracted element
74  */
75 typedef void (*tvbparse_action_t)(void* tvbparse_data, const void* wanted_data, struct _tvbparse_elem_t* elem);
76 
77 typedef int (*tvbparse_condition_t)
78 (tvbparse_t*, const int,
79  const tvbparse_wanted_t*,
80  tvbparse_elem_t**);
81 
82 
83 typedef enum {
84  TP_UNTIL_INCLUDE, /* last elem is included, its span is spent by the parser */
85  TP_UNTIL_SPEND, /* last elem is not included, but its span is spent by the parser */
86  TP_UNTIL_LEAVE /* last elem is not included, neither its span is spent by the parser */
87 } until_mode_t;
88 
89 
91  int id;
92  tvbparse_condition_t condition;
93 
94  union {
95  const gchar* str;
96  struct _tvbparse_wanted_t** handle;
97  struct {
98  union {
99  gint64 i;
100  guint64 u;
101  gdouble f;
102  } value;
103  } number;
104  enum ftenum ftenum;
105  struct {
106  until_mode_t mode;
107  const tvbparse_wanted_t* subelem;
108  } until;
109  struct {
110  wmem_map_t* table;
111  struct _tvbparse_wanted_t* key;
112  struct _tvbparse_wanted_t* other;
113  } hash;
114  GPtrArray* elems;
115  const tvbparse_wanted_t* subelem;
116  void* p;
117  } control;
118 
119  int len;
120 
121  guint min;
122  guint max;
123 
124  const void* data;
125 
126  tvbparse_action_t before;
127  tvbparse_action_t after;
128 };
129 
130 /* an instance of a per packet parser */
131 struct _tvbparse_t {
132  wmem_allocator_t* scope;
133  tvbuff_t* tvb;
134  int offset;
135  int end_offset;
136  void* data;
137  const tvbparse_wanted_t* ignore;
138  int recursion_depth;
139 };
140 
141 
142 /* a matching token returned by either tvbparser_get or tvb_parser_find */
144  int id;
145 
146  tvbparse_t* parser;
147  tvbuff_t* tvb;
148  int offset;
149  int len;
150 
151  void* data;
152 
153  struct _tvbparse_elem_t* sub;
154 
155  struct _tvbparse_elem_t* next;
156  struct _tvbparse_elem_t* last;
157 
158  const tvbparse_wanted_t* wanted;
159 };
160 
161 
162 /*
163  * definition of wanted token types
164  *
165  * the following functions define the tokens we will be able to look for in a tvb
166  * common parameters are:
167  *
168  * id: an arbitrary id that will be copied to the eventual token (don't use 0)
169  * private_data: persistent data to be passed to the callback action (wanted_data)
170  * before_cb: an callback function to be called before those of the subelements
171  * after_cb: an callback function to be called after those of the subelements
172  */
173 
174 
175 /*
176  * a char element.
177  *
178  * When looked for it returns a simple element one character long if the char
179  * at the current offset matches one of the needles.
180  */
181 WS_DLL_PUBLIC
182 tvbparse_wanted_t* tvbparse_char(const int id,
183  const gchar* needles,
184  const void* private_data,
185  tvbparse_action_t before_cb,
186  tvbparse_action_t after_cb);
187 
188 /*
189  * a not_char element.
190  *
191  * When looked for it returns a simple element one character long if the char
192  * at the current offset does not match one of the needles.
193  */
194 WS_DLL_PUBLIC
195 tvbparse_wanted_t* tvbparse_not_char(const int id,
196  const gchar* needle,
197  const void* private_data,
198  tvbparse_action_t before_cb,
199  tvbparse_action_t after_cb);
200 
201 /*
202  * a chars element
203  *
204  * When looked for it returns a simple element one or more characters long if
205  * one or more char(s) starting from the current offset match one of the needles.
206  * An element will be returned if at least min_len chars are given (1 if it's 0)
207  * It will get at most max_len chars or as much as it can if max_len is 0.
208  */
209 WS_DLL_PUBLIC
210 tvbparse_wanted_t* tvbparse_chars(const int id,
211  const guint min_len,
212  const guint max_len,
213  const gchar* needles,
214  const void* private_data,
215  tvbparse_action_t before_cb,
216  tvbparse_action_t after_cb);
217 
218 /*
219  * a not_chars element
220  *
221  * When looked for it returns a simple element one or more characters long if
222  * one or more char(s) starting from the current offset do not match one of the
223  * needles.
224  * An element will be returned if at least min_len chars are given (1 if it's 0)
225  * It will get at most max_len chars or as much as it can if max_len is 0.
226  */
227 WS_DLL_PUBLIC
228 tvbparse_wanted_t* tvbparse_not_chars(const int id,
229  const guint min_len,
230  const guint max_len,
231  const gchar* needles,
232  const void* private_data,
233  tvbparse_action_t before_cb,
234  tvbparse_action_t after_cb);
235 
236 /*
237  * a string element
238  *
239  * When looked for it returns a simple element if we have the given string at
240  * the current offset
241  */
242 WS_DLL_PUBLIC
243 tvbparse_wanted_t* tvbparse_string(const int id,
244  const gchar* string,
245  const void* private_data,
246  tvbparse_action_t before_cb,
247  tvbparse_action_t after_cb);
248 
249 /*
250  * casestring
251  *
252  * When looked for it returns a simple element if we have a matching string at
253  * the current offset
254  */
255 WS_DLL_PUBLIC
256 tvbparse_wanted_t* tvbparse_casestring(const int id,
257  const gchar* str,
258  const void* data,
259  tvbparse_action_t before_cb,
260  tvbparse_action_t after_cb);
261 
262 /*
263  * until
264  *
265  * When looked for it returns a simple element containing all the characters
266  * found until the first match of the ending element if the ending element is
267  * found.
268  *
269  * When looking for until elements it calls tvbparse_find so it can be very slow.
270  *
271  * It won't have a subelement, the ending's callbacks won't get called.
272  */
273 
274 /*
275  * op_mode values determine how the terminating element and the current offset
276  * of the parser are handled
277  */
278 WS_DLL_PUBLIC
279 tvbparse_wanted_t* tvbparse_until(const int id,
280  const void* private_data,
281  tvbparse_action_t before_cb,
282  tvbparse_action_t after_cb,
283  const tvbparse_wanted_t* ending,
284  until_mode_t until_mode);
285 
286 /*
287  * one_of
288  *
289  * When looked for it will try to match to the given candidates and return a
290  * composed element whose subelement is the first match.
291  *
292  * The list of candidates is terminated with a NULL
293  *
294  */
295 WS_DLL_PUBLIC
296 tvbparse_wanted_t* tvbparse_set_oneof(const int id,
297  const void* private_data,
298  tvbparse_action_t before_cb,
299  tvbparse_action_t after_cb,
300  ...);
301 
302 /*
303  * hashed
304  */
305 WS_DLL_PUBLIC
306 tvbparse_wanted_t* tvbparse_hashed(const int id,
307  const void* data,
308  tvbparse_action_t before_cb,
309  tvbparse_action_t after_cb,
310  tvbparse_wanted_t* key,
311  tvbparse_wanted_t* other,
312  ...);
313 
314 WS_DLL_PUBLIC
315 void tvbparse_hashed_add(tvbparse_wanted_t* w, ...);
316 
317 /*
318  * sequence
319  *
320  * When looked for it will try to match in order all the given candidates. If
321  * every candidate is found in the given order it will return a composed
322  * element whose subelements are the matcheed elemets.
323  *
324  * The list of candidates is terminated with a NULL.
325  *
326  */
327 WS_DLL_PUBLIC
328 tvbparse_wanted_t* tvbparse_set_seq(const int id,
329  const void* private_data,
330  tvbparse_action_t before_cb,
331  tvbparse_action_t after_cb,
332  ...);
333 
334 /*
335  * some
336  *
337  * When looked for it will try to match the given candidate at least min times
338  * and at most max times. If the given candidate is matched at least min times
339  * a composed element is returned.
340  *
341  */
342 WS_DLL_PUBLIC
343 tvbparse_wanted_t* tvbparse_some(const int id,
344  const guint min,
345  const guint max,
346  const void* private_data,
347  tvbparse_action_t before_cb,
348  tvbparse_action_t after_cb,
349  const tvbparse_wanted_t* wanted);
350 
351 #define tvbparse_one_or_more(id, private_data, before_cb, after_cb, wanted)\
352  tvbparse_some(id, 1, G_MAXINT, private_data, before_cb, after_cb, wanted)
353 
354 
355 /*
356  * handle
357  *
358  * this is a pointer to a pointer to a wanted element (that might have not
359  * been initialized yet) so that recursive structures
360  */
361 WS_DLL_PUBLIC
362 tvbparse_wanted_t* tvbparse_handle(tvbparse_wanted_t** handle);
363 
364 /* quoted
365  * this is a composed candidate, that will try to match a quoted string
366  * (included the quotes) including into it every escaped quote.
367  *
368  * C strings are matched with tvbparse_quoted(-1,NULL,NULL,NULL,"\"","\\")
369  */
370 WS_DLL_PUBLIC
371 tvbparse_wanted_t* tvbparse_quoted(const int id,
372  const void* data,
373  tvbparse_action_t before_cb,
374  tvbparse_action_t after_cb,
375  const char quote,
376  const char escape);
377 
378 /*
379  * a helper callback for quoted strings that will shrink the token to contain
380  * only the string andnot the quotes
381  */
382 WS_DLL_PUBLIC
383 void tvbparse_shrink_token_cb(void* tvbparse_data,
384  const void* wanted_data,
385  tvbparse_elem_t* tok);
386 
387 
388 
389 
390 /* initialize the parser (at every packet)
391  * scope: memory scope/pool
392  * tvb: what are we parsing?
393  * offset: from where
394  * len: for how many bytes
395  * private_data: will be passed to the action callbacks
396  * ignore: a wanted token type to be ignored (the associated cb WILL be called when it matches)
397  */
398 WS_DLL_PUBLIC
399 tvbparse_t* tvbparse_init(wmem_allocator_t *scope,
400  tvbuff_t* tvb,
401  const int offset,
402  int len,
403  void* private_data,
404  const tvbparse_wanted_t* ignore);
405 
406 /* reset the parser */
407 WS_DLL_PUBLIC
408 gboolean tvbparse_reset(tvbparse_t* tt, const int offset, int len);
409 
410 WS_DLL_PUBLIC
411 guint tvbparse_curr_offset(tvbparse_t* tt);
412 guint tvbparse_len_left(tvbparse_t* tt);
413 
414 
415 
416 /*
417  * This will look for the wanted token at the current offset or after any given
418  * number of ignored tokens returning FALSE if there's no match or TRUE if there
419  * is a match.
420  * The parser will be left in its original state and no callbacks will be called.
421  */
422 WS_DLL_PUBLIC
423 gboolean tvbparse_peek(tvbparse_t* tt,
424  const tvbparse_wanted_t* wanted);
425 
426 /*
427  * This will look for the wanted token at the current offset or after any given
428  * number of ignored tokens returning NULL if there's no match.
429  * if there is a match it will set the offset of the current parser after
430  * the end of the token
431  */
432 WS_DLL_PUBLIC
433 tvbparse_elem_t* tvbparse_get(tvbparse_t* tt,
434  const tvbparse_wanted_t* wanted);
435 
436 /*
437  * Like tvbparse_get but this will look for a wanted token even beyond the
438  * current offset.
439  * This function is slow.
440  */
441 WS_DLL_PUBLIC
442 tvbparse_elem_t* tvbparse_find(tvbparse_t* tt,
443  const tvbparse_wanted_t* wanted);
444 
445 
446 WS_DLL_PUBLIC
447 void tvbparse_tree_add_elem(proto_tree* tree, tvbparse_elem_t* curr);
448 
449 #endif
Definition: proto.h:904
Definition: tvbparse.h:143
Definition: tvbparse.h:131
Definition: tvbparse.h:90
Definition: wmem_allocator.h:27
Definition: wmem_map.c:44
Definition: tvbuff-int.h:35