Wireshark  4.3.0
The Wireshark network protocol analyzer
unicode-utils.h
Go to the documentation of this file.
1 /* unicode-utils.h
2  * Unicode utility definitions
3  *
4  * Wireshark - Network traffic analyzer
5  * By Gerald Combs <gerald@wireshark.org>
6  * Copyright 2006 Gerald Combs
7  *
8  * SPDX-License-Identifier: GPL-2.0-or-later
9  */
10 
11 #ifndef __UNICODEUTIL_H__
12 #define __UNICODEUTIL_H__
13 
14 #include <wireshark.h>
15 
16 #ifdef _WIN32
17 #include <windows.h>
18 #include <tchar.h>
19 #include <wchar.h>
20 #endif
21 
27 #ifdef __cplusplus
28 extern "C" {
29 #endif
30 
31 #ifdef WS_DEBUG_UTF_8
32 #define DEBUG_UTF_8_ENABLED true
33 #else
34 #define DEBUG_UTF_8_ENABLED false
35 #endif
36 
37 #define _CHECK_UTF_8(level, str, len) \
38  do { \
39  const char *__uni_endptr; \
40  if (DEBUG_UTF_8_ENABLED && (str) != NULL && \
41  !g_utf8_validate(str, len, &__uni_endptr)) { \
42  ws_log_utf8(str, len, __uni_endptr); \
43  } \
44  } while (0)
45 
46 #define WS_UTF_8_CHECK(str, len) \
47  _CHECK_UTF_8(LOG_LEVEL_DEBUG, str, len)
48 
49 #define WS_UTF_8_DEBUG_HERE(str, len) \
50  _CHECK_UTF_8(LOG_LEVEL_ECHO, str, len)
51 
52 WSUTIL_EXPORT
53 const int ws_utf8_seqlen[256];
54 
60 #define ws_utf8_char_len(ch) (ws_utf8_seqlen[(ch)])
61 
62 /*
63  * Given a wmem scope, a pointer, and a length, treat the string of bytes
64  * referred to by the pointer and length as a UTF-8 string, and return a
65  * pointer to a UTF-8 string, allocated using the wmem scope, with all
66  * ill-formed sequences replaced with the Unicode REPLACEMENT CHARACTER
67  * according to the recommended "best practices" given in the Unicode
68  * Standard and specified by W3C/WHATWG.
69  */
70 WS_DLL_PUBLIC uint8_t *
71 ws_utf8_make_valid(wmem_allocator_t *scope, const uint8_t *ptr, ssize_t length);
72 
73 /*
74  * Same as ws_utf8_make_valid() but returns a wmem_strbuf_t.
75  */
76 WS_DLL_PUBLIC wmem_strbuf_t *
77 ws_utf8_make_valid_strbuf(wmem_allocator_t *scope, const uint8_t *ptr, ssize_t length);
78 
79 #ifdef _WIN32
80 
88 WS_DLL_PUBLIC
89 const wchar_t * utf_8to16(const char *utf8str);
90 
97 WS_DLL_PUBLIC
98 void utf_8to16_snprintf(TCHAR *utf16buf, int utf16buf_len, const char* fmt, ...)
99 G_GNUC_PRINTF(3, 4);
100 
108 WS_DLL_PUBLIC
109 char * utf_16to8(const wchar_t *utf16str);
110 
118 WS_DLL_PUBLIC
119 char **arg_list_utf_16to8(int argc, wchar_t *wc_argv[]);
120 
121 #endif /* _WIN32 */
122 
123 /*
124  * defines for helping with UTF-16 surrogate pairs
125  */
126 
127 #define IS_LEAD_SURROGATE(uchar2) \
128  ((uchar2) >= 0xd800 && (uchar2) < 0xdc00)
129 #define IS_TRAIL_SURROGATE(uchar2) \
130  ((uchar2) >= 0xdc00 && (uchar2) < 0xe000)
131 #define SURROGATE_VALUE(lead, trail) \
132  (((((lead) - 0xd800) << 10) | ((trail) - 0xdc00)) + 0x10000)
133 
134 #ifdef __cplusplus
135 }
136 #endif
137 
138 #endif /* __UNICODEUTIL_H__ */
Definition: wmem_allocator.h:27
Definition: wmem_strbuf.h:42