Wireshark  4.3.0
The Wireshark network protocol analyzer
charsets.h
Go to the documentation of this file.
1 
10 #ifndef __CHARSETS_H__
11 #define __CHARSETS_H__
12 
13 #include "ws_symbol_export.h"
14 
15 #ifdef __cplusplus
16 extern "C" {
17 #endif /* __cplusplus */
18 
19 /*
20  * Translation tables that map the upper 128 code points in single-byte
21  * "extended ASCII" character encodings to Unicode code points in the
22  * Basic Multilingual Plane.
23  */
24 
25 /* Table for windows-1250 */
26 extern const gunichar2 charset_table_cp1250[0x80];
27 /* Table for windows-1251 */
28 extern const gunichar2 charset_table_cp1251[0x80];
29 /* Table for windows-1252 */
30 extern const gunichar2 charset_table_cp1252[0x80];
31 
32 /* Tables for ISO-8859-X */
33 extern const gunichar2 charset_table_iso_8859_2[0x80];
34 extern const gunichar2 charset_table_iso_8859_3[0x80];
35 extern const gunichar2 charset_table_iso_8859_4[0x80];
36 extern const gunichar2 charset_table_iso_8859_5[0x80];
37 extern const gunichar2 charset_table_iso_8859_6[0x80];
38 extern const gunichar2 charset_table_iso_8859_7[0x80];
39 extern const gunichar2 charset_table_iso_8859_8[0x80];
40 extern const gunichar2 charset_table_iso_8859_9[0x80];
41 extern const gunichar2 charset_table_iso_8859_10[0x80];
42 extern const gunichar2 charset_table_iso_8859_11[0x80];
43 extern const gunichar2 charset_table_iso_8859_13[0x80];
44 extern const gunichar2 charset_table_iso_8859_14[0x80];
45 extern const gunichar2 charset_table_iso_8859_15[0x80];
46 extern const gunichar2 charset_table_iso_8859_16[0x80];
47 
48 /* Tables for Mac character sets */
49 extern const gunichar2 charset_table_mac_roman[0x80];
50 
51 /* Tables for DOS code pages */
52 extern const gunichar2 charset_table_cp437[0x80];
53 extern const gunichar2 charset_table_cp855[0x80];
54 extern const gunichar2 charset_table_cp866[0x80];
55 
56 /*
57  * Translation tables that map the lower 128 code points in single-byte
58  * ISO 646-based character encodings to Unicode code points in the
59  * Basic Multilingual Plane.
60  */
61 extern const gunichar2 charset_table_iso_646_basic[0x80];
62 
63 /* Tables for EBCDIC code pages */
64 extern const gunichar2 charset_table_ebcdic[256];
65 extern const gunichar2 charset_table_ebcdic_cp037[256];
66 extern const gunichar2 charset_table_ebcdic_cp500[256];
67 
68 /*
69  * Given a wmem scope, a pointer, and a length, treat the string of bytes
70  * referred to by the pointer and length as an ASCII string, with all bytes
71  * with the high-order bit set being invalid, and return a pointer to a
72  * UTF-8 string, allocated using the wmem scope.
73  *
74  * Octets with the highest bit set will be converted to the Unicode
75  * REPLACEMENT CHARACTER.
76  */
77 WS_DLL_PUBLIC guint8 *
78 get_ascii_string(wmem_allocator_t *scope, const guint8 *ptr, gint length);
79 
80 /*
81  * Given a wmem scope, a pointer, and a length, treat the string of bytes
82  * referred to by the pointer and length as a UTF-8 string, and return a
83  * pointer to a UTF-8 string, allocated using the wmem scope, with all
84  * ill-formed sequences replaced with the Unicode REPLACEMENT CHARACTER
85  * according to the recommended "best practices" given in the Unicode
86  * Standard and specified by W3C/WHATWG.
87  */
88 WS_DLL_PUBLIC guint8 *
89 get_utf_8_string(wmem_allocator_t *scope, const guint8 *ptr, gint length);
90 
91 /*
92  * Given a wmem scope, a pointer, a length, and a translation table,
93  * treat the string of bytes referred to by the pointer and length as a
94  * string encoded using one octet per character, with octets with the
95  * high-order bit clear being mapped by the translation table to 2-byte
96  * Unicode Basic Multilingual Plane characters (including REPLACEMENT
97  * CHARACTER) and octets with the high-order bit set being mapped to
98  * REPLACEMENT CHARACTER, and return a pointer to a UTF-8 string,
99  * allocated using the wmem scope.
100  */
101 WS_DLL_PUBLIC guint8 *
102 get_iso_646_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const gunichar2 table[0x80]);
103 
104 /*
105  * Given a wmem scope, a pointer, and a length, treat the string of bytes
106  * referred to by the pointer and length as an ISO 8859/1 string, and
107  * return a pointer to a UTF-8 string, allocated using the wmem scope.
108  */
109 WS_DLL_PUBLIC guint8 *
110 get_8859_1_string(wmem_allocator_t *scope, const guint8 *ptr, gint length);
111 
112 /*
113  * Given a wmem scope, a pointer, a length, and a translation table with
114  * 128 entries, treat the string of bytes referred to by the pointer and
115  * length as a string encoded using one octet per character, with octets
116  * with the high-order bit clear being ASCII and octets with the high-order
117  * bit set being mapped by the translation table to 2-byte Unicode Basic
118  * Multilingual Plane characters (including REPLACEMENT CHARACTER), and
119  * return a pointer to a UTF-8 string, allocated using the wmem scope.
120  */
121 WS_DLL_PUBLIC guint8 *
122 get_unichar2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const gunichar2 table[0x80]);
123 
124 /*
125  * Given a wmem scope, a pointer, and a length, treat the string of bytes
126  * referred to by the pointer and length as a UCS-2 encoded string
127  * containing characters from the Basic Multilingual Plane (plane 0) of
128  * Unicode, and return a pointer to a UTF-8 string, allocated with the
129  * wmem scope.
130  *
131  * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN,
132  * possibly ORed with ENC_BOM.
133  *
134  * Specify length in bytes.
135  */
136 WS_DLL_PUBLIC guint8 *
137 get_ucs_2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, guint encoding);
138 
139 /*
140  * Given a wmem scope, a pointer, and a length, treat the string of bytes
141  * referred to by the pointer and length as a UTF-16 encoded string, and
142  * return a pointer to a UTF-8 string, allocated with the wmem scope.
143  *
144  * See RFC 2781 section 2.2.
145  *
146  * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN,
147  * possibly ORed with ENC_BOM.
148  *
149  * Specify length in bytes.
150  */
151 WS_DLL_PUBLIC guint8 *
152 get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, guint encoding);
153 
154 /*
155  * Given a wmem scope, a pointer, and a length, treat the string of bytes
156  * referred to by the pointer and length as a UCS-4 encoded string, and
157  * return a pointer to a UTF-8 string, allocated with the wmem scope.
158  *
159  * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN,
160  * possibly ORed with ENC_BOM.
161  *
162  * Specify length in bytes.
163  */
164 WS_DLL_PUBLIC guint8 *
165 get_ucs_4_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, guint encoding);
166 
167 WS_DLL_PUBLIC guint8 *
168 get_ts_23_038_7bits_string_packed(wmem_allocator_t *scope, const guint8 *ptr,
169  const gint bit_offset, gint no_of_chars);
170 
171 WS_DLL_PUBLIC guint8 *
172 get_ts_23_038_7bits_string_unpacked(wmem_allocator_t *scope, const guint8 *ptr,
173  gint length);
174 
175 WS_DLL_PUBLIC guint8 *
176 get_etsi_ts_102_221_annex_a_string(wmem_allocator_t *scope, const guint8 *ptr,
177  gint length);
178 
179 WS_DLL_PUBLIC guint8 *
180 get_ascii_7bits_string(wmem_allocator_t *scope, const guint8 *ptr,
181  const gint bit_offset, gint no_of_chars);
182 
183 /*
184  * Given a wmem scope, a pointer, a length, and a translation table with
185  * 256 entries, treat the string of bytes referred to by the pointer and
186  * length as a string encoded using one octet per character, with octets
187  * being mapped by the translation table to 2-byte Unicode Basic Multilingual
188  * Plane characters (including REPLACEMENT CHARACTER), and return a
189  * pointer to a UTF-8 string, allocated using the wmem scope.
190  */
191 WS_DLL_PUBLIC guint8 *
192 get_nonascii_unichar2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const gunichar2 table[256]);
193 
194 /*
195  * Given a wmem scope, a pointer, and a length, treat the bytes referred to
196  * by the pointer and length as a GB18030 encoded string, and return a pointer
197  * to a UTF-8 string, allocated using the wmem scope, converted having
198  * substituted REPLACEMENT CHARACTER according to the Unicode Standard
199  * 5.22 U+FFFD Substitution for Conversion.
200  * ( https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf )
201  *
202  * As expected, this will also decode GBK and GB2312 strings.
203  */
204 WS_DLL_PUBLIC guint8 *
205 get_gb18030_string(wmem_allocator_t *scope, const guint8 *ptr, gint length);
206 
207 /*
208  * Given a wmem scope, a pointer, and a length, treat the bytes referred to
209  * by the pointer and length as a EUC-KR encoded string, and return a pointer
210  * to a UTF-8 string, allocated using the wmem scope, converted having
211  * substituted REPLACEMENT CHARACTER according to the Unicode Standard
212  * 5.22 U+FFFD Substitution for Conversion.
213  * ( https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf )
214  */
215 WS_DLL_PUBLIC guint8 *
216 get_euc_kr_string(wmem_allocator_t *scope, const guint8 *ptr, gint length);
217 
218 WS_DLL_PUBLIC guint8 *
219 get_t61_string(wmem_allocator_t *scope, const guint8 *ptr, gint length);
220 
221 WS_DLL_PUBLIC guint8 *
222 get_dect_standard_8bits_string(wmem_allocator_t *scope, const guint8 *ptr, gint length);
223 #ifdef __cplusplus
224 }
225 #endif /* __cplusplus */
226 
227 #endif /* __CHARSETS_H__ */
228 
229 /*
230  * Editor modelines - https://www.wireshark.org/tools/modelines.html
231  *
232  * Local variables:
233  * c-basic-offset: 4
234  * tab-width: 8
235  * indent-tabs-mode: nil
236  * End:
237  *
238  * vi: set shiftwidth=4 tabstop=8 expandtab:
239  * :indentSize=4:tabSize=8:noTabs=true:
240  */
Definition: wmem_allocator.h:27