1 | /* GRegex -- regular expression API wrapper around PCRE. |
2 | * |
3 | * Copyright (C) 1999, 2000 Scott Wimer |
4 | * Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com> |
5 | * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org> |
6 | * |
7 | * SPDX-License-Identifier: LGPL-2.1-or-later |
8 | * |
9 | * This library is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU Lesser General Public |
11 | * License as published by the Free Software Foundation; either |
12 | * version 2.1 of the License, or (at your option) any later version. |
13 | * |
14 | * This library is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | * Lesser General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU Lesser General Public License |
20 | * along with this library; if not, see <http://www.gnu.org/licenses/>. |
21 | */ |
22 | |
23 | #ifndef __G_REGEX_H__ |
24 | #define __G_REGEX_H__ |
25 | |
26 | #if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION) |
27 | #error "Only <glib.h> can be included directly." |
28 | #endif |
29 | |
30 | #include <glib/gerror.h> |
31 | #include <glib/gstring.h> |
32 | |
33 | G_BEGIN_DECLS |
34 | |
35 | /** |
36 | * GRegexError: |
37 | * @G_REGEX_ERROR_COMPILE: Compilation of the regular expression failed. |
38 | * @G_REGEX_ERROR_OPTIMIZE: Optimization of the regular expression failed. |
39 | * @G_REGEX_ERROR_REPLACE: Replacement failed due to an ill-formed replacement |
40 | * string. |
41 | * @G_REGEX_ERROR_MATCH: The match process failed. |
42 | * @G_REGEX_ERROR_INTERNAL: Internal error of the regular expression engine. |
43 | * Since 2.16 |
44 | * @G_REGEX_ERROR_STRAY_BACKSLASH: "\\" at end of pattern. Since 2.16 |
45 | * @G_REGEX_ERROR_MISSING_CONTROL_CHAR: "\\c" at end of pattern. Since 2.16 |
46 | * @G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: Unrecognized character follows "\\". |
47 | * Since 2.16 |
48 | * @G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: Numbers out of order in "{}" |
49 | * quantifier. Since 2.16 |
50 | * @G_REGEX_ERROR_QUANTIFIER_TOO_BIG: Number too big in "{}" quantifier. |
51 | * Since 2.16 |
52 | * @G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS: Missing terminating "]" for |
53 | * character class. Since 2.16 |
54 | * @G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS: Invalid escape sequence |
55 | * in character class. Since 2.16 |
56 | * @G_REGEX_ERROR_RANGE_OUT_OF_ORDER: Range out of order in character class. |
57 | * Since 2.16 |
58 | * @G_REGEX_ERROR_NOTHING_TO_REPEAT: Nothing to repeat. Since 2.16 |
59 | * @G_REGEX_ERROR_UNRECOGNIZED_CHARACTER: Unrecognized character after "(?", |
60 | * "(?<" or "(?P". Since 2.16 |
61 | * @G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS: POSIX named classes are |
62 | * supported only within a class. Since 2.16 |
63 | * @G_REGEX_ERROR_UNMATCHED_PARENTHESIS: Missing terminating ")" or ")" |
64 | * without opening "(". Since 2.16 |
65 | * @G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE: Reference to non-existent |
66 | * subpattern. Since 2.16 |
67 | * @G_REGEX_ERROR_UNTERMINATED_COMMENT: Missing terminating ")" after comment. |
68 | * Since 2.16 |
69 | * @G_REGEX_ERROR_EXPRESSION_TOO_LARGE: Regular expression too large. |
70 | * Since 2.16 |
71 | * @G_REGEX_ERROR_MEMORY_ERROR: Failed to get memory. Since 2.16 |
72 | * @G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND: Lookbehind assertion is not |
73 | * fixed length. Since 2.16 |
74 | * @G_REGEX_ERROR_MALFORMED_CONDITION: Malformed number or name after "(?(". |
75 | * Since 2.16 |
76 | * @G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES: Conditional group contains |
77 | * more than two branches. Since 2.16 |
78 | * @G_REGEX_ERROR_ASSERTION_EXPECTED: Assertion expected after "(?(". |
79 | * Since 2.16 |
80 | * @G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME: Unknown POSIX class name. |
81 | * Since 2.16 |
82 | * @G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED: POSIX collating |
83 | * elements are not supported. Since 2.16 |
84 | * @G_REGEX_ERROR_HEX_CODE_TOO_LARGE: Character value in "\\x{...}" sequence |
85 | * is too large. Since 2.16 |
86 | * @G_REGEX_ERROR_INVALID_CONDITION: Invalid condition "(?(0)". Since 2.16 |
87 | * @G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND: \\C not allowed in |
88 | * lookbehind assertion. Since 2.16 |
89 | * @G_REGEX_ERROR_INFINITE_LOOP: Recursive call could loop indefinitely. |
90 | * Since 2.16 |
91 | * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR: Missing terminator |
92 | * in subpattern name. Since 2.16 |
93 | * @G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME: Two named subpatterns have |
94 | * the same name. Since 2.16 |
95 | * @G_REGEX_ERROR_MALFORMED_PROPERTY: Malformed "\\P" or "\\p" sequence. |
96 | * Since 2.16 |
97 | * @G_REGEX_ERROR_UNKNOWN_PROPERTY: Unknown property name after "\\P" or |
98 | * "\\p". Since 2.16 |
99 | * @G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG: Subpattern name is too long |
100 | * (maximum 32 characters). Since 2.16 |
101 | * @G_REGEX_ERROR_TOO_MANY_SUBPATTERNS: Too many named subpatterns (maximum |
102 | * 10,000). Since 2.16 |
103 | * @G_REGEX_ERROR_INVALID_OCTAL_VALUE: Octal value is greater than "\\377". |
104 | * Since 2.16 |
105 | * @G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: "DEFINE" group contains more |
106 | * than one branch. Since 2.16 |
107 | * @G_REGEX_ERROR_DEFINE_REPETION: Repeating a "DEFINE" group is not allowed. |
108 | * This error is never raised. Since: 2.16 Deprecated: 2.34 |
109 | * @G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: Inconsistent newline options. |
110 | * Since 2.16 |
111 | * @G_REGEX_ERROR_MISSING_BACK_REFERENCE: "\\g" is not followed by a braced, |
112 | * angle-bracketed, or quoted name or number, or by a plain number. Since: 2.16 |
113 | * @G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: relative reference must not be zero. Since: 2.34 |
114 | * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: the backtracing |
115 | * control verb used does not allow an argument. Since: 2.34 |
116 | * @G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: unknown backtracing |
117 | * control verb. Since: 2.34 |
118 | * @G_REGEX_ERROR_NUMBER_TOO_BIG: number is too big in escape sequence. Since: 2.34 |
119 | * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: Missing subpattern name. Since: 2.34 |
120 | * @G_REGEX_ERROR_MISSING_DIGIT: Missing digit. Since 2.34 |
121 | * @G_REGEX_ERROR_INVALID_DATA_CHARACTER: In JavaScript compatibility mode, |
122 | * "[" is an invalid data character. Since: 2.34 |
123 | * @G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: different names for subpatterns of the |
124 | * same number are not allowed. Since: 2.34 |
125 | * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: the backtracing control |
126 | * verb requires an argument. Since: 2.34 |
127 | * @G_REGEX_ERROR_INVALID_CONTROL_CHAR: "\\c" must be followed by an ASCII |
128 | * character. Since: 2.34 |
129 | * @G_REGEX_ERROR_MISSING_NAME: "\\k" is not followed by a braced, angle-bracketed, or |
130 | * quoted name. Since: 2.34 |
131 | * @G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: "\\N" is not supported in a class. Since: 2.34 |
132 | * @G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: too many forward references. Since: 2.34 |
133 | * @G_REGEX_ERROR_NAME_TOO_LONG: the name is too long in "(*MARK)", "(*PRUNE)", |
134 | * "(*SKIP)", or "(*THEN)". Since: 2.34 |
135 | * @G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: the character value in the \\u sequence is |
136 | * too large. Since: 2.34 |
137 | * |
138 | * Error codes returned by regular expressions functions. |
139 | * |
140 | * Since: 2.14 |
141 | */ |
142 | typedef enum |
143 | { |
144 | G_REGEX_ERROR_COMPILE, |
145 | G_REGEX_ERROR_OPTIMIZE, |
146 | G_REGEX_ERROR_REPLACE, |
147 | G_REGEX_ERROR_MATCH, |
148 | G_REGEX_ERROR_INTERNAL, |
149 | |
150 | /* These are the error codes from PCRE + 100 */ |
151 | G_REGEX_ERROR_STRAY_BACKSLASH = 101, |
152 | G_REGEX_ERROR_MISSING_CONTROL_CHAR = 102, |
153 | G_REGEX_ERROR_UNRECOGNIZED_ESCAPE = 103, |
154 | G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER = 104, |
155 | G_REGEX_ERROR_QUANTIFIER_TOO_BIG = 105, |
156 | G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS = 106, |
157 | G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS = 107, |
158 | G_REGEX_ERROR_RANGE_OUT_OF_ORDER = 108, |
159 | G_REGEX_ERROR_NOTHING_TO_REPEAT = 109, |
160 | G_REGEX_ERROR_UNRECOGNIZED_CHARACTER = 112, |
161 | G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS = 113, |
162 | G_REGEX_ERROR_UNMATCHED_PARENTHESIS = 114, |
163 | G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE = 115, |
164 | = 118, |
165 | G_REGEX_ERROR_EXPRESSION_TOO_LARGE = 120, |
166 | G_REGEX_ERROR_MEMORY_ERROR = 121, |
167 | G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND = 125, |
168 | G_REGEX_ERROR_MALFORMED_CONDITION = 126, |
169 | G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES = 127, |
170 | G_REGEX_ERROR_ASSERTION_EXPECTED = 128, |
171 | G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME = 130, |
172 | G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED = 131, |
173 | G_REGEX_ERROR_HEX_CODE_TOO_LARGE = 134, |
174 | G_REGEX_ERROR_INVALID_CONDITION = 135, |
175 | G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND = 136, |
176 | G_REGEX_ERROR_INFINITE_LOOP = 140, |
177 | G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR = 142, |
178 | G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME = 143, |
179 | G_REGEX_ERROR_MALFORMED_PROPERTY = 146, |
180 | G_REGEX_ERROR_UNKNOWN_PROPERTY = 147, |
181 | G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG = 148, |
182 | G_REGEX_ERROR_TOO_MANY_SUBPATTERNS = 149, |
183 | G_REGEX_ERROR_INVALID_OCTAL_VALUE = 151, |
184 | G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE = 154, |
185 | G_REGEX_ERROR_DEFINE_REPETION = 155, |
186 | G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS = 156, |
187 | G_REGEX_ERROR_MISSING_BACK_REFERENCE = 157, |
188 | G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE = 158, |
189 | G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN = 159, |
190 | G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB = 160, |
191 | G_REGEX_ERROR_NUMBER_TOO_BIG = 161, |
192 | G_REGEX_ERROR_MISSING_SUBPATTERN_NAME = 162, |
193 | G_REGEX_ERROR_MISSING_DIGIT = 163, |
194 | G_REGEX_ERROR_INVALID_DATA_CHARACTER = 164, |
195 | = 165, |
196 | G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED = 166, |
197 | G_REGEX_ERROR_INVALID_CONTROL_CHAR = 168, |
198 | G_REGEX_ERROR_MISSING_NAME = 169, |
199 | G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS = 171, |
200 | G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES = 172, |
201 | G_REGEX_ERROR_NAME_TOO_LONG = 175, |
202 | G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176 |
203 | } GRegexError; |
204 | |
205 | /** |
206 | * G_REGEX_ERROR: |
207 | * |
208 | * Error domain for regular expressions. Errors in this domain will be |
209 | * from the #GRegexError enumeration. See #GError for information on |
210 | * error domains. |
211 | * |
212 | * Since: 2.14 |
213 | */ |
214 | #define G_REGEX_ERROR g_regex_error_quark () |
215 | |
216 | GLIB_AVAILABLE_IN_ALL |
217 | GQuark g_regex_error_quark (void); |
218 | |
219 | /** |
220 | * GRegexCompileFlags: |
221 | * @G_REGEX_DEFAULT: No special options set. Since: 2.74 |
222 | * @G_REGEX_CASELESS: Letters in the pattern match both upper- and |
223 | * lowercase letters. This option can be changed within a pattern |
224 | * by a "(?i)" option setting. |
225 | * @G_REGEX_MULTILINE: By default, GRegex treats the strings as consisting |
226 | * of a single line of characters (even if it actually contains |
227 | * newlines). The "start of line" metacharacter ("^") matches only |
228 | * at the start of the string, while the "end of line" metacharacter |
229 | * ("$") matches only at the end of the string, or before a terminating |
230 | * newline (unless %G_REGEX_DOLLAR_ENDONLY is set). When |
231 | * %G_REGEX_MULTILINE is set, the "start of line" and "end of line" |
232 | * constructs match immediately following or immediately before any |
233 | * newline in the string, respectively, as well as at the very start |
234 | * and end. This can be changed within a pattern by a "(?m)" option |
235 | * setting. |
236 | * @G_REGEX_DOTALL: A dot metacharacter (".") in the pattern matches all |
237 | * characters, including newlines. Without it, newlines are excluded. |
238 | * This option can be changed within a pattern by a ("?s") option setting. |
239 | * @G_REGEX_EXTENDED: Whitespace data characters in the pattern are |
240 | * totally ignored except when escaped or inside a character class. |
241 | * Whitespace does not include the VT character (code 11). In addition, |
242 | * characters between an unescaped "#" outside a character class and |
243 | * the next newline character, inclusive, are also ignored. This can |
244 | * be changed within a pattern by a "(?x)" option setting. |
245 | * @G_REGEX_ANCHORED: The pattern is forced to be "anchored", that is, |
246 | * it is constrained to match only at the first matching point in the |
247 | * string that is being searched. This effect can also be achieved by |
248 | * appropriate constructs in the pattern itself such as the "^" |
249 | * metacharacter. |
250 | * @G_REGEX_DOLLAR_ENDONLY: A dollar metacharacter ("$") in the pattern |
251 | * matches only at the end of the string. Without this option, a |
252 | * dollar also matches immediately before the final character if |
253 | * it is a newline (but not before any other newlines). This option |
254 | * is ignored if %G_REGEX_MULTILINE is set. |
255 | * @G_REGEX_UNGREEDY: Inverts the "greediness" of the quantifiers so that |
256 | * they are not greedy by default, but become greedy if followed by "?". |
257 | * It can also be set by a "(?U)" option setting within the pattern. |
258 | * @G_REGEX_RAW: Usually strings must be valid UTF-8 strings, using this |
259 | * flag they are considered as a raw sequence of bytes. |
260 | * @G_REGEX_NO_AUTO_CAPTURE: Disables the use of numbered capturing |
261 | * parentheses in the pattern. Any opening parenthesis that is not |
262 | * followed by "?" behaves as if it were followed by "?:" but named |
263 | * parentheses can still be used for capturing (and they acquire numbers |
264 | * in the usual way). |
265 | * @G_REGEX_OPTIMIZE: Since 2.74 and the port to pcre2, requests JIT |
266 | * compilation, which, if the just-in-time compiler is available, further |
267 | * processes a compiled pattern into machine code that executes much |
268 | * faster. However, it comes at the cost of extra processing before the |
269 | * match is performed, so it is most beneficial to use this when the same |
270 | * compiled pattern is used for matching many times. Before 2.74 this |
271 | * option used the built-in non-JIT optimizations in pcre1. |
272 | * @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the |
273 | * first newline. Since: 2.34 |
274 | * @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not |
275 | * be unique. This can be helpful for certain types of pattern when it |
276 | * is known that only one instance of the named subpattern can ever be |
277 | * matched. |
278 | * @G_REGEX_NEWLINE_CR: Usually any newline character or character sequence is |
279 | * recognized. If this option is set, the only recognized newline character |
280 | * is '\r'. |
281 | * @G_REGEX_NEWLINE_LF: Usually any newline character or character sequence is |
282 | * recognized. If this option is set, the only recognized newline character |
283 | * is '\n'. |
284 | * @G_REGEX_NEWLINE_CRLF: Usually any newline character or character sequence is |
285 | * recognized. If this option is set, the only recognized newline character |
286 | * sequence is '\r\n'. |
287 | * @G_REGEX_NEWLINE_ANYCRLF: Usually any newline character or character sequence |
288 | * is recognized. If this option is set, the only recognized newline character |
289 | * sequences are '\r', '\n', and '\r\n'. Since: 2.34 |
290 | * @G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence |
291 | * is recognised. If this option is set, then "\R" only recognizes the newline |
292 | * characters '\r', '\n' and '\r\n'. Since: 2.34 |
293 | * @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with |
294 | * JavaScript rather than PCRE. Since GLib 2.74 this is no longer supported, |
295 | * as libpcre2 does not support it. Since: 2.34 Deprecated: 2.74 |
296 | * |
297 | * Flags specifying compile-time options. |
298 | * |
299 | * Since: 2.14 |
300 | */ |
301 | /* Remember to update G_REGEX_COMPILE_MASK in gregex.c after |
302 | * adding a new flag. |
303 | */ |
304 | typedef enum |
305 | { |
306 | G_REGEX_DEFAULT GLIB_AVAILABLE_ENUMERATOR_IN_2_74 = 0, |
307 | G_REGEX_CASELESS = 1 << 0, |
308 | G_REGEX_MULTILINE = 1 << 1, |
309 | G_REGEX_DOTALL = 1 << 2, |
310 | G_REGEX_EXTENDED = 1 << 3, |
311 | G_REGEX_ANCHORED = 1 << 4, |
312 | G_REGEX_DOLLAR_ENDONLY = 1 << 5, |
313 | G_REGEX_UNGREEDY = 1 << 9, |
314 | G_REGEX_RAW = 1 << 11, |
315 | G_REGEX_NO_AUTO_CAPTURE = 1 << 12, |
316 | G_REGEX_OPTIMIZE = 1 << 13, |
317 | G_REGEX_FIRSTLINE = 1 << 18, |
318 | G_REGEX_DUPNAMES = 1 << 19, |
319 | G_REGEX_NEWLINE_CR = 1 << 20, |
320 | G_REGEX_NEWLINE_LF = 1 << 21, |
321 | G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF, |
322 | G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22, |
323 | G_REGEX_BSR_ANYCRLF = 1 << 23, |
324 | G_REGEX_JAVASCRIPT_COMPAT GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 25 |
325 | } GRegexCompileFlags; |
326 | |
327 | /** |
328 | * GRegexMatchFlags: |
329 | * @G_REGEX_MATCH_DEFAULT: No special options set. Since: 2.74 |
330 | * @G_REGEX_MATCH_ANCHORED: The pattern is forced to be "anchored", that is, |
331 | * it is constrained to match only at the first matching point in the |
332 | * string that is being searched. This effect can also be achieved by |
333 | * appropriate constructs in the pattern itself such as the "^" |
334 | * metacharacter. |
335 | * @G_REGEX_MATCH_NOTBOL: Specifies that first character of the string is |
336 | * not the beginning of a line, so the circumflex metacharacter should |
337 | * not match before it. Setting this without %G_REGEX_MULTILINE (at |
338 | * compile time) causes circumflex never to match. This option affects |
339 | * only the behaviour of the circumflex metacharacter, it does not |
340 | * affect "\A". |
341 | * @G_REGEX_MATCH_NOTEOL: Specifies that the end of the subject string is |
342 | * not the end of a line, so the dollar metacharacter should not match |
343 | * it nor (except in multiline mode) a newline immediately before it. |
344 | * Setting this without %G_REGEX_MULTILINE (at compile time) causes |
345 | * dollar never to match. This option affects only the behaviour of |
346 | * the dollar metacharacter, it does not affect "\Z" or "\z". |
347 | * @G_REGEX_MATCH_NOTEMPTY: An empty string is not considered to be a valid |
348 | * match if this option is set. If there are alternatives in the pattern, |
349 | * they are tried. If all the alternatives match the empty string, the |
350 | * entire match fails. For example, if the pattern "a?b?" is applied to |
351 | * a string not beginning with "a" or "b", it matches the empty string |
352 | * at the start of the string. With this flag set, this match is not |
353 | * valid, so GRegex searches further into the string for occurrences |
354 | * of "a" or "b". |
355 | * @G_REGEX_MATCH_PARTIAL: Turns on the partial matching feature, for more |
356 | * documentation on partial matching see g_match_info_is_partial_match(). |
357 | * @G_REGEX_MATCH_NEWLINE_CR: Overrides the newline definition set when |
358 | * creating a new #GRegex, setting the '\r' character as line terminator. |
359 | * @G_REGEX_MATCH_NEWLINE_LF: Overrides the newline definition set when |
360 | * creating a new #GRegex, setting the '\n' character as line terminator. |
361 | * @G_REGEX_MATCH_NEWLINE_CRLF: Overrides the newline definition set when |
362 | * creating a new #GRegex, setting the '\r\n' characters sequence as line terminator. |
363 | * @G_REGEX_MATCH_NEWLINE_ANY: Overrides the newline definition set when |
364 | * creating a new #GRegex, any Unicode newline sequence |
365 | * is recognised as a newline. These are '\r', '\n' and '\rn', and the |
366 | * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF), |
367 | * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and |
368 | * U+2029 PARAGRAPH SEPARATOR. |
369 | * @G_REGEX_MATCH_NEWLINE_ANYCRLF: Overrides the newline definition set when |
370 | * creating a new #GRegex; any '\r', '\n', or '\r\n' character sequence |
371 | * is recognized as a newline. Since: 2.34 |
372 | * @G_REGEX_MATCH_BSR_ANYCRLF: Overrides the newline definition for "\R" set when |
373 | * creating a new #GRegex; only '\r', '\n', or '\r\n' character sequences |
374 | * are recognized as a newline by "\R". Since: 2.34 |
375 | * @G_REGEX_MATCH_BSR_ANY: Overrides the newline definition for "\R" set when |
376 | * creating a new #GRegex; any Unicode newline character or character sequence |
377 | * are recognized as a newline by "\R". These are '\r', '\n' and '\rn', and the |
378 | * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF), |
379 | * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and |
380 | * U+2029 PARAGRAPH SEPARATOR. Since: 2.34 |
381 | * @G_REGEX_MATCH_PARTIAL_SOFT: An alias for %G_REGEX_MATCH_PARTIAL. Since: 2.34 |
382 | * @G_REGEX_MATCH_PARTIAL_HARD: Turns on the partial matching feature. In contrast to |
383 | * to %G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match |
384 | * is found, without continuing to search for a possible complete match. See |
385 | * g_match_info_is_partial_match() for more information. Since: 2.34 |
386 | * @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like %G_REGEX_MATCH_NOTEMPTY, but only applied to |
387 | * the start of the matched string. For anchored |
388 | * patterns this can only happen for pattern containing "\K". Since: 2.34 |
389 | * |
390 | * Flags specifying match-time options. |
391 | * |
392 | * Since: 2.14 |
393 | */ |
394 | /* Remember to update G_REGEX_MATCH_MASK in gregex.c after |
395 | * adding a new flag. */ |
396 | typedef enum |
397 | { |
398 | G_REGEX_MATCH_DEFAULT GLIB_AVAILABLE_ENUMERATOR_IN_2_74 = 0, |
399 | G_REGEX_MATCH_ANCHORED = 1 << 4, |
400 | G_REGEX_MATCH_NOTBOL = 1 << 7, |
401 | G_REGEX_MATCH_NOTEOL = 1 << 8, |
402 | G_REGEX_MATCH_NOTEMPTY = 1 << 10, |
403 | G_REGEX_MATCH_PARTIAL = 1 << 15, |
404 | G_REGEX_MATCH_NEWLINE_CR = 1 << 20, |
405 | G_REGEX_MATCH_NEWLINE_LF = 1 << 21, |
406 | G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF, |
407 | G_REGEX_MATCH_NEWLINE_ANY = 1 << 22, |
408 | G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY, |
409 | G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23, |
410 | G_REGEX_MATCH_BSR_ANY = 1 << 24, |
411 | G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL, |
412 | G_REGEX_MATCH_PARTIAL_HARD = 1 << 27, |
413 | G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28 |
414 | } GRegexMatchFlags; |
415 | |
416 | typedef struct _GRegex GRegex; |
417 | |
418 | |
419 | /** |
420 | * GMatchInfo: |
421 | * |
422 | * A GMatchInfo is an opaque struct used to return information about |
423 | * matches. |
424 | */ |
425 | typedef struct _GMatchInfo GMatchInfo; |
426 | |
427 | /** |
428 | * GRegexEvalCallback: |
429 | * @match_info: the #GMatchInfo generated by the match. |
430 | * Use g_match_info_get_regex() and g_match_info_get_string() if you |
431 | * need the #GRegex or the matched string. |
432 | * @result: a #GString containing the new string |
433 | * @user_data: user data passed to g_regex_replace_eval() |
434 | * |
435 | * Specifies the type of the function passed to g_regex_replace_eval(). |
436 | * It is called for each occurrence of the pattern in the string passed |
437 | * to g_regex_replace_eval(), and it should append the replacement to |
438 | * @result. |
439 | * |
440 | * Returns: %FALSE to continue the replacement process, %TRUE to stop it |
441 | * |
442 | * Since: 2.14 |
443 | */ |
444 | typedef gboolean (*GRegexEvalCallback) (const GMatchInfo *match_info, |
445 | GString *result, |
446 | gpointer user_data); |
447 | |
448 | |
449 | GLIB_AVAILABLE_IN_ALL |
450 | GRegex *g_regex_new (const gchar *pattern, |
451 | GRegexCompileFlags compile_options, |
452 | GRegexMatchFlags match_options, |
453 | GError **error); |
454 | GLIB_AVAILABLE_IN_ALL |
455 | GRegex *g_regex_ref (GRegex *regex); |
456 | GLIB_AVAILABLE_IN_ALL |
457 | void g_regex_unref (GRegex *regex); |
458 | GLIB_AVAILABLE_IN_ALL |
459 | const gchar *g_regex_get_pattern (const GRegex *regex); |
460 | GLIB_AVAILABLE_IN_ALL |
461 | gint g_regex_get_max_backref (const GRegex *regex); |
462 | GLIB_AVAILABLE_IN_ALL |
463 | gint g_regex_get_capture_count (const GRegex *regex); |
464 | GLIB_AVAILABLE_IN_ALL |
465 | gboolean g_regex_get_has_cr_or_lf (const GRegex *regex); |
466 | GLIB_AVAILABLE_IN_2_38 |
467 | gint g_regex_get_max_lookbehind (const GRegex *regex); |
468 | GLIB_AVAILABLE_IN_ALL |
469 | gint g_regex_get_string_number (const GRegex *regex, |
470 | const gchar *name); |
471 | GLIB_AVAILABLE_IN_ALL |
472 | gchar *g_regex_escape_string (const gchar *string, |
473 | gint length); |
474 | GLIB_AVAILABLE_IN_ALL |
475 | gchar *g_regex_escape_nul (const gchar *string, |
476 | gint length); |
477 | |
478 | GLIB_AVAILABLE_IN_ALL |
479 | GRegexCompileFlags g_regex_get_compile_flags (const GRegex *regex); |
480 | GLIB_AVAILABLE_IN_ALL |
481 | GRegexMatchFlags g_regex_get_match_flags (const GRegex *regex); |
482 | |
483 | /* Matching. */ |
484 | GLIB_AVAILABLE_IN_ALL |
485 | gboolean g_regex_match_simple (const gchar *pattern, |
486 | const gchar *string, |
487 | GRegexCompileFlags compile_options, |
488 | GRegexMatchFlags match_options); |
489 | GLIB_AVAILABLE_IN_ALL |
490 | gboolean g_regex_match (const GRegex *regex, |
491 | const gchar *string, |
492 | GRegexMatchFlags match_options, |
493 | GMatchInfo **match_info); |
494 | GLIB_AVAILABLE_IN_ALL |
495 | gboolean g_regex_match_full (const GRegex *regex, |
496 | const gchar *string, |
497 | gssize string_len, |
498 | gint start_position, |
499 | GRegexMatchFlags match_options, |
500 | GMatchInfo **match_info, |
501 | GError **error); |
502 | GLIB_AVAILABLE_IN_ALL |
503 | gboolean g_regex_match_all (const GRegex *regex, |
504 | const gchar *string, |
505 | GRegexMatchFlags match_options, |
506 | GMatchInfo **match_info); |
507 | GLIB_AVAILABLE_IN_ALL |
508 | gboolean g_regex_match_all_full (const GRegex *regex, |
509 | const gchar *string, |
510 | gssize string_len, |
511 | gint start_position, |
512 | GRegexMatchFlags match_options, |
513 | GMatchInfo **match_info, |
514 | GError **error); |
515 | |
516 | /* String splitting. */ |
517 | GLIB_AVAILABLE_IN_ALL |
518 | gchar **g_regex_split_simple (const gchar *pattern, |
519 | const gchar *string, |
520 | GRegexCompileFlags compile_options, |
521 | GRegexMatchFlags match_options); |
522 | GLIB_AVAILABLE_IN_ALL |
523 | gchar **g_regex_split (const GRegex *regex, |
524 | const gchar *string, |
525 | GRegexMatchFlags match_options); |
526 | GLIB_AVAILABLE_IN_ALL |
527 | gchar **g_regex_split_full (const GRegex *regex, |
528 | const gchar *string, |
529 | gssize string_len, |
530 | gint start_position, |
531 | GRegexMatchFlags match_options, |
532 | gint max_tokens, |
533 | GError **error); |
534 | |
535 | /* String replacement. */ |
536 | GLIB_AVAILABLE_IN_ALL |
537 | gchar *g_regex_replace (const GRegex *regex, |
538 | const gchar *string, |
539 | gssize string_len, |
540 | gint start_position, |
541 | const gchar *replacement, |
542 | GRegexMatchFlags match_options, |
543 | GError **error); |
544 | GLIB_AVAILABLE_IN_ALL |
545 | gchar *g_regex_replace_literal (const GRegex *regex, |
546 | const gchar *string, |
547 | gssize string_len, |
548 | gint start_position, |
549 | const gchar *replacement, |
550 | GRegexMatchFlags match_options, |
551 | GError **error); |
552 | GLIB_AVAILABLE_IN_ALL |
553 | gchar *g_regex_replace_eval (const GRegex *regex, |
554 | const gchar *string, |
555 | gssize string_len, |
556 | gint start_position, |
557 | GRegexMatchFlags match_options, |
558 | GRegexEvalCallback eval, |
559 | gpointer user_data, |
560 | GError **error); |
561 | GLIB_AVAILABLE_IN_ALL |
562 | gboolean g_regex_check_replacement (const gchar *replacement, |
563 | gboolean *has_references, |
564 | GError **error); |
565 | |
566 | /* Match info */ |
567 | GLIB_AVAILABLE_IN_ALL |
568 | GRegex *g_match_info_get_regex (const GMatchInfo *match_info); |
569 | GLIB_AVAILABLE_IN_ALL |
570 | const gchar *g_match_info_get_string (const GMatchInfo *match_info); |
571 | |
572 | GLIB_AVAILABLE_IN_ALL |
573 | GMatchInfo *g_match_info_ref (GMatchInfo *match_info); |
574 | GLIB_AVAILABLE_IN_ALL |
575 | void g_match_info_unref (GMatchInfo *match_info); |
576 | GLIB_AVAILABLE_IN_ALL |
577 | void g_match_info_free (GMatchInfo *match_info); |
578 | GLIB_AVAILABLE_IN_ALL |
579 | gboolean g_match_info_next (GMatchInfo *match_info, |
580 | GError **error); |
581 | GLIB_AVAILABLE_IN_ALL |
582 | gboolean g_match_info_matches (const GMatchInfo *match_info); |
583 | GLIB_AVAILABLE_IN_ALL |
584 | gint g_match_info_get_match_count (const GMatchInfo *match_info); |
585 | GLIB_AVAILABLE_IN_ALL |
586 | gboolean g_match_info_is_partial_match (const GMatchInfo *match_info); |
587 | GLIB_AVAILABLE_IN_ALL |
588 | gchar *g_match_info_expand_references(const GMatchInfo *match_info, |
589 | const gchar *string_to_expand, |
590 | GError **error); |
591 | GLIB_AVAILABLE_IN_ALL |
592 | gchar *g_match_info_fetch (const GMatchInfo *match_info, |
593 | gint match_num); |
594 | GLIB_AVAILABLE_IN_ALL |
595 | gboolean g_match_info_fetch_pos (const GMatchInfo *match_info, |
596 | gint match_num, |
597 | gint *start_pos, |
598 | gint *end_pos); |
599 | GLIB_AVAILABLE_IN_ALL |
600 | gchar *g_match_info_fetch_named (const GMatchInfo *match_info, |
601 | const gchar *name); |
602 | GLIB_AVAILABLE_IN_ALL |
603 | gboolean g_match_info_fetch_named_pos (const GMatchInfo *match_info, |
604 | const gchar *name, |
605 | gint *start_pos, |
606 | gint *end_pos); |
607 | GLIB_AVAILABLE_IN_ALL |
608 | gchar **g_match_info_fetch_all (const GMatchInfo *match_info); |
609 | |
610 | G_END_DECLS |
611 | |
612 | #endif /* __G_REGEX_H__ */ |
613 | |