1/* GRegex -- regular expression API wrapper around PCRE.
2 *
3 * Copyright (C) 1999, 2000 Scott Wimer
4 * Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com>
5 * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org>
6 *
7 * SPDX-License-Identifier: LGPL-2.1-or-later
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this library; if not, see <http://www.gnu.org/licenses/>.
21 */
22
23#ifndef __G_REGEX_H__
24#define __G_REGEX_H__
25
26#if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION)
27#error "Only <glib.h> can be included directly."
28#endif
29
30#include <glib/gerror.h>
31#include <glib/gstring.h>
32
33G_BEGIN_DECLS
34
35/**
36 * GRegexError:
37 * @G_REGEX_ERROR_COMPILE: Compilation of the regular expression failed.
38 * @G_REGEX_ERROR_OPTIMIZE: Optimization of the regular expression failed.
39 * @G_REGEX_ERROR_REPLACE: Replacement failed due to an ill-formed replacement
40 * string.
41 * @G_REGEX_ERROR_MATCH: The match process failed.
42 * @G_REGEX_ERROR_INTERNAL: Internal error of the regular expression engine.
43 * Since 2.16
44 * @G_REGEX_ERROR_STRAY_BACKSLASH: "\\" at end of pattern. Since 2.16
45 * @G_REGEX_ERROR_MISSING_CONTROL_CHAR: "\\c" at end of pattern. Since 2.16
46 * @G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: Unrecognized character follows "\\".
47 * Since 2.16
48 * @G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: Numbers out of order in "{}"
49 * quantifier. Since 2.16
50 * @G_REGEX_ERROR_QUANTIFIER_TOO_BIG: Number too big in "{}" quantifier.
51 * Since 2.16
52 * @G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS: Missing terminating "]" for
53 * character class. Since 2.16
54 * @G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS: Invalid escape sequence
55 * in character class. Since 2.16
56 * @G_REGEX_ERROR_RANGE_OUT_OF_ORDER: Range out of order in character class.
57 * Since 2.16
58 * @G_REGEX_ERROR_NOTHING_TO_REPEAT: Nothing to repeat. Since 2.16
59 * @G_REGEX_ERROR_UNRECOGNIZED_CHARACTER: Unrecognized character after "(?",
60 * "(?<" or "(?P". Since 2.16
61 * @G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS: POSIX named classes are
62 * supported only within a class. Since 2.16
63 * @G_REGEX_ERROR_UNMATCHED_PARENTHESIS: Missing terminating ")" or ")"
64 * without opening "(". Since 2.16
65 * @G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE: Reference to non-existent
66 * subpattern. Since 2.16
67 * @G_REGEX_ERROR_UNTERMINATED_COMMENT: Missing terminating ")" after comment.
68 * Since 2.16
69 * @G_REGEX_ERROR_EXPRESSION_TOO_LARGE: Regular expression too large.
70 * Since 2.16
71 * @G_REGEX_ERROR_MEMORY_ERROR: Failed to get memory. Since 2.16
72 * @G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND: Lookbehind assertion is not
73 * fixed length. Since 2.16
74 * @G_REGEX_ERROR_MALFORMED_CONDITION: Malformed number or name after "(?(".
75 * Since 2.16
76 * @G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES: Conditional group contains
77 * more than two branches. Since 2.16
78 * @G_REGEX_ERROR_ASSERTION_EXPECTED: Assertion expected after "(?(".
79 * Since 2.16
80 * @G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME: Unknown POSIX class name.
81 * Since 2.16
82 * @G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED: POSIX collating
83 * elements are not supported. Since 2.16
84 * @G_REGEX_ERROR_HEX_CODE_TOO_LARGE: Character value in "\\x{...}" sequence
85 * is too large. Since 2.16
86 * @G_REGEX_ERROR_INVALID_CONDITION: Invalid condition "(?(0)". Since 2.16
87 * @G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND: \\C not allowed in
88 * lookbehind assertion. Since 2.16
89 * @G_REGEX_ERROR_INFINITE_LOOP: Recursive call could loop indefinitely.
90 * Since 2.16
91 * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR: Missing terminator
92 * in subpattern name. Since 2.16
93 * @G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME: Two named subpatterns have
94 * the same name. Since 2.16
95 * @G_REGEX_ERROR_MALFORMED_PROPERTY: Malformed "\\P" or "\\p" sequence.
96 * Since 2.16
97 * @G_REGEX_ERROR_UNKNOWN_PROPERTY: Unknown property name after "\\P" or
98 * "\\p". Since 2.16
99 * @G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG: Subpattern name is too long
100 * (maximum 32 characters). Since 2.16
101 * @G_REGEX_ERROR_TOO_MANY_SUBPATTERNS: Too many named subpatterns (maximum
102 * 10,000). Since 2.16
103 * @G_REGEX_ERROR_INVALID_OCTAL_VALUE: Octal value is greater than "\\377".
104 * Since 2.16
105 * @G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: "DEFINE" group contains more
106 * than one branch. Since 2.16
107 * @G_REGEX_ERROR_DEFINE_REPETION: Repeating a "DEFINE" group is not allowed.
108 * This error is never raised. Since: 2.16 Deprecated: 2.34
109 * @G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: Inconsistent newline options.
110 * Since 2.16
111 * @G_REGEX_ERROR_MISSING_BACK_REFERENCE: "\\g" is not followed by a braced,
112 * angle-bracketed, or quoted name or number, or by a plain number. Since: 2.16
113 * @G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: relative reference must not be zero. Since: 2.34
114 * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: the backtracing
115 * control verb used does not allow an argument. Since: 2.34
116 * @G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: unknown backtracing
117 * control verb. Since: 2.34
118 * @G_REGEX_ERROR_NUMBER_TOO_BIG: number is too big in escape sequence. Since: 2.34
119 * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: Missing subpattern name. Since: 2.34
120 * @G_REGEX_ERROR_MISSING_DIGIT: Missing digit. Since 2.34
121 * @G_REGEX_ERROR_INVALID_DATA_CHARACTER: In JavaScript compatibility mode,
122 * "[" is an invalid data character. Since: 2.34
123 * @G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: different names for subpatterns of the
124 * same number are not allowed. Since: 2.34
125 * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: the backtracing control
126 * verb requires an argument. Since: 2.34
127 * @G_REGEX_ERROR_INVALID_CONTROL_CHAR: "\\c" must be followed by an ASCII
128 * character. Since: 2.34
129 * @G_REGEX_ERROR_MISSING_NAME: "\\k" is not followed by a braced, angle-bracketed, or
130 * quoted name. Since: 2.34
131 * @G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: "\\N" is not supported in a class. Since: 2.34
132 * @G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: too many forward references. Since: 2.34
133 * @G_REGEX_ERROR_NAME_TOO_LONG: the name is too long in "(*MARK)", "(*PRUNE)",
134 * "(*SKIP)", or "(*THEN)". Since: 2.34
135 * @G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: the character value in the \\u sequence is
136 * too large. Since: 2.34
137 *
138 * Error codes returned by regular expressions functions.
139 *
140 * Since: 2.14
141 */
142typedef enum
143{
144 G_REGEX_ERROR_COMPILE,
145 G_REGEX_ERROR_OPTIMIZE,
146 G_REGEX_ERROR_REPLACE,
147 G_REGEX_ERROR_MATCH,
148 G_REGEX_ERROR_INTERNAL,
149
150 /* These are the error codes from PCRE + 100 */
151 G_REGEX_ERROR_STRAY_BACKSLASH = 101,
152 G_REGEX_ERROR_MISSING_CONTROL_CHAR = 102,
153 G_REGEX_ERROR_UNRECOGNIZED_ESCAPE = 103,
154 G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER = 104,
155 G_REGEX_ERROR_QUANTIFIER_TOO_BIG = 105,
156 G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS = 106,
157 G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS = 107,
158 G_REGEX_ERROR_RANGE_OUT_OF_ORDER = 108,
159 G_REGEX_ERROR_NOTHING_TO_REPEAT = 109,
160 G_REGEX_ERROR_UNRECOGNIZED_CHARACTER = 112,
161 G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS = 113,
162 G_REGEX_ERROR_UNMATCHED_PARENTHESIS = 114,
163 G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE = 115,
164 G_REGEX_ERROR_UNTERMINATED_COMMENT = 118,
165 G_REGEX_ERROR_EXPRESSION_TOO_LARGE = 120,
166 G_REGEX_ERROR_MEMORY_ERROR = 121,
167 G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND = 125,
168 G_REGEX_ERROR_MALFORMED_CONDITION = 126,
169 G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES = 127,
170 G_REGEX_ERROR_ASSERTION_EXPECTED = 128,
171 G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME = 130,
172 G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED = 131,
173 G_REGEX_ERROR_HEX_CODE_TOO_LARGE = 134,
174 G_REGEX_ERROR_INVALID_CONDITION = 135,
175 G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND = 136,
176 G_REGEX_ERROR_INFINITE_LOOP = 140,
177 G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR = 142,
178 G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME = 143,
179 G_REGEX_ERROR_MALFORMED_PROPERTY = 146,
180 G_REGEX_ERROR_UNKNOWN_PROPERTY = 147,
181 G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG = 148,
182 G_REGEX_ERROR_TOO_MANY_SUBPATTERNS = 149,
183 G_REGEX_ERROR_INVALID_OCTAL_VALUE = 151,
184 G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE = 154,
185 G_REGEX_ERROR_DEFINE_REPETION = 155,
186 G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS = 156,
187 G_REGEX_ERROR_MISSING_BACK_REFERENCE = 157,
188 G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE = 158,
189 G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN = 159,
190 G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB = 160,
191 G_REGEX_ERROR_NUMBER_TOO_BIG = 161,
192 G_REGEX_ERROR_MISSING_SUBPATTERN_NAME = 162,
193 G_REGEX_ERROR_MISSING_DIGIT = 163,
194 G_REGEX_ERROR_INVALID_DATA_CHARACTER = 164,
195 G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME = 165,
196 G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED = 166,
197 G_REGEX_ERROR_INVALID_CONTROL_CHAR = 168,
198 G_REGEX_ERROR_MISSING_NAME = 169,
199 G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS = 171,
200 G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES = 172,
201 G_REGEX_ERROR_NAME_TOO_LONG = 175,
202 G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176
203} GRegexError;
204
205/**
206 * G_REGEX_ERROR:
207 *
208 * Error domain for regular expressions. Errors in this domain will be
209 * from the #GRegexError enumeration. See #GError for information on
210 * error domains.
211 *
212 * Since: 2.14
213 */
214#define G_REGEX_ERROR g_regex_error_quark ()
215
216GLIB_AVAILABLE_IN_ALL
217GQuark g_regex_error_quark (void);
218
219/**
220 * GRegexCompileFlags:
221 * @G_REGEX_DEFAULT: No special options set. Since: 2.74
222 * @G_REGEX_CASELESS: Letters in the pattern match both upper- and
223 * lowercase letters. This option can be changed within a pattern
224 * by a "(?i)" option setting.
225 * @G_REGEX_MULTILINE: By default, GRegex treats the strings as consisting
226 * of a single line of characters (even if it actually contains
227 * newlines). The "start of line" metacharacter ("^") matches only
228 * at the start of the string, while the "end of line" metacharacter
229 * ("$") matches only at the end of the string, or before a terminating
230 * newline (unless %G_REGEX_DOLLAR_ENDONLY is set). When
231 * %G_REGEX_MULTILINE is set, the "start of line" and "end of line"
232 * constructs match immediately following or immediately before any
233 * newline in the string, respectively, as well as at the very start
234 * and end. This can be changed within a pattern by a "(?m)" option
235 * setting.
236 * @G_REGEX_DOTALL: A dot metacharacter (".") in the pattern matches all
237 * characters, including newlines. Without it, newlines are excluded.
238 * This option can be changed within a pattern by a ("?s") option setting.
239 * @G_REGEX_EXTENDED: Whitespace data characters in the pattern are
240 * totally ignored except when escaped or inside a character class.
241 * Whitespace does not include the VT character (code 11). In addition,
242 * characters between an unescaped "#" outside a character class and
243 * the next newline character, inclusive, are also ignored. This can
244 * be changed within a pattern by a "(?x)" option setting.
245 * @G_REGEX_ANCHORED: The pattern is forced to be "anchored", that is,
246 * it is constrained to match only at the first matching point in the
247 * string that is being searched. This effect can also be achieved by
248 * appropriate constructs in the pattern itself such as the "^"
249 * metacharacter.
250 * @G_REGEX_DOLLAR_ENDONLY: A dollar metacharacter ("$") in the pattern
251 * matches only at the end of the string. Without this option, a
252 * dollar also matches immediately before the final character if
253 * it is a newline (but not before any other newlines). This option
254 * is ignored if %G_REGEX_MULTILINE is set.
255 * @G_REGEX_UNGREEDY: Inverts the "greediness" of the quantifiers so that
256 * they are not greedy by default, but become greedy if followed by "?".
257 * It can also be set by a "(?U)" option setting within the pattern.
258 * @G_REGEX_RAW: Usually strings must be valid UTF-8 strings, using this
259 * flag they are considered as a raw sequence of bytes.
260 * @G_REGEX_NO_AUTO_CAPTURE: Disables the use of numbered capturing
261 * parentheses in the pattern. Any opening parenthesis that is not
262 * followed by "?" behaves as if it were followed by "?:" but named
263 * parentheses can still be used for capturing (and they acquire numbers
264 * in the usual way).
265 * @G_REGEX_OPTIMIZE: Since 2.74 and the port to pcre2, requests JIT
266 * compilation, which, if the just-in-time compiler is available, further
267 * processes a compiled pattern into machine code that executes much
268 * faster. However, it comes at the cost of extra processing before the
269 * match is performed, so it is most beneficial to use this when the same
270 * compiled pattern is used for matching many times. Before 2.74 this
271 * option used the built-in non-JIT optimizations in pcre1.
272 * @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the
273 * first newline. Since: 2.34
274 * @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not
275 * be unique. This can be helpful for certain types of pattern when it
276 * is known that only one instance of the named subpattern can ever be
277 * matched.
278 * @G_REGEX_NEWLINE_CR: Usually any newline character or character sequence is
279 * recognized. If this option is set, the only recognized newline character
280 * is '\r'.
281 * @G_REGEX_NEWLINE_LF: Usually any newline character or character sequence is
282 * recognized. If this option is set, the only recognized newline character
283 * is '\n'.
284 * @G_REGEX_NEWLINE_CRLF: Usually any newline character or character sequence is
285 * recognized. If this option is set, the only recognized newline character
286 * sequence is '\r\n'.
287 * @G_REGEX_NEWLINE_ANYCRLF: Usually any newline character or character sequence
288 * is recognized. If this option is set, the only recognized newline character
289 * sequences are '\r', '\n', and '\r\n'. Since: 2.34
290 * @G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence
291 * is recognised. If this option is set, then "\R" only recognizes the newline
292 * characters '\r', '\n' and '\r\n'. Since: 2.34
293 * @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
294 * JavaScript rather than PCRE. Since GLib 2.74 this is no longer supported,
295 * as libpcre2 does not support it. Since: 2.34 Deprecated: 2.74
296 *
297 * Flags specifying compile-time options.
298 *
299 * Since: 2.14
300 */
301/* Remember to update G_REGEX_COMPILE_MASK in gregex.c after
302 * adding a new flag.
303 */
304typedef enum
305{
306 G_REGEX_DEFAULT GLIB_AVAILABLE_ENUMERATOR_IN_2_74 = 0,
307 G_REGEX_CASELESS = 1 << 0,
308 G_REGEX_MULTILINE = 1 << 1,
309 G_REGEX_DOTALL = 1 << 2,
310 G_REGEX_EXTENDED = 1 << 3,
311 G_REGEX_ANCHORED = 1 << 4,
312 G_REGEX_DOLLAR_ENDONLY = 1 << 5,
313 G_REGEX_UNGREEDY = 1 << 9,
314 G_REGEX_RAW = 1 << 11,
315 G_REGEX_NO_AUTO_CAPTURE = 1 << 12,
316 G_REGEX_OPTIMIZE = 1 << 13,
317 G_REGEX_FIRSTLINE = 1 << 18,
318 G_REGEX_DUPNAMES = 1 << 19,
319 G_REGEX_NEWLINE_CR = 1 << 20,
320 G_REGEX_NEWLINE_LF = 1 << 21,
321 G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
322 G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
323 G_REGEX_BSR_ANYCRLF = 1 << 23,
324 G_REGEX_JAVASCRIPT_COMPAT GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 25
325} GRegexCompileFlags;
326
327/**
328 * GRegexMatchFlags:
329 * @G_REGEX_MATCH_DEFAULT: No special options set. Since: 2.74
330 * @G_REGEX_MATCH_ANCHORED: The pattern is forced to be "anchored", that is,
331 * it is constrained to match only at the first matching point in the
332 * string that is being searched. This effect can also be achieved by
333 * appropriate constructs in the pattern itself such as the "^"
334 * metacharacter.
335 * @G_REGEX_MATCH_NOTBOL: Specifies that first character of the string is
336 * not the beginning of a line, so the circumflex metacharacter should
337 * not match before it. Setting this without %G_REGEX_MULTILINE (at
338 * compile time) causes circumflex never to match. This option affects
339 * only the behaviour of the circumflex metacharacter, it does not
340 * affect "\A".
341 * @G_REGEX_MATCH_NOTEOL: Specifies that the end of the subject string is
342 * not the end of a line, so the dollar metacharacter should not match
343 * it nor (except in multiline mode) a newline immediately before it.
344 * Setting this without %G_REGEX_MULTILINE (at compile time) causes
345 * dollar never to match. This option affects only the behaviour of
346 * the dollar metacharacter, it does not affect "\Z" or "\z".
347 * @G_REGEX_MATCH_NOTEMPTY: An empty string is not considered to be a valid
348 * match if this option is set. If there are alternatives in the pattern,
349 * they are tried. If all the alternatives match the empty string, the
350 * entire match fails. For example, if the pattern "a?b?" is applied to
351 * a string not beginning with "a" or "b", it matches the empty string
352 * at the start of the string. With this flag set, this match is not
353 * valid, so GRegex searches further into the string for occurrences
354 * of "a" or "b".
355 * @G_REGEX_MATCH_PARTIAL: Turns on the partial matching feature, for more
356 * documentation on partial matching see g_match_info_is_partial_match().
357 * @G_REGEX_MATCH_NEWLINE_CR: Overrides the newline definition set when
358 * creating a new #GRegex, setting the '\r' character as line terminator.
359 * @G_REGEX_MATCH_NEWLINE_LF: Overrides the newline definition set when
360 * creating a new #GRegex, setting the '\n' character as line terminator.
361 * @G_REGEX_MATCH_NEWLINE_CRLF: Overrides the newline definition set when
362 * creating a new #GRegex, setting the '\r\n' characters sequence as line terminator.
363 * @G_REGEX_MATCH_NEWLINE_ANY: Overrides the newline definition set when
364 * creating a new #GRegex, any Unicode newline sequence
365 * is recognised as a newline. These are '\r', '\n' and '\rn', and the
366 * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF),
367 * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and
368 * U+2029 PARAGRAPH SEPARATOR.
369 * @G_REGEX_MATCH_NEWLINE_ANYCRLF: Overrides the newline definition set when
370 * creating a new #GRegex; any '\r', '\n', or '\r\n' character sequence
371 * is recognized as a newline. Since: 2.34
372 * @G_REGEX_MATCH_BSR_ANYCRLF: Overrides the newline definition for "\R" set when
373 * creating a new #GRegex; only '\r', '\n', or '\r\n' character sequences
374 * are recognized as a newline by "\R". Since: 2.34
375 * @G_REGEX_MATCH_BSR_ANY: Overrides the newline definition for "\R" set when
376 * creating a new #GRegex; any Unicode newline character or character sequence
377 * are recognized as a newline by "\R". These are '\r', '\n' and '\rn', and the
378 * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF),
379 * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and
380 * U+2029 PARAGRAPH SEPARATOR. Since: 2.34
381 * @G_REGEX_MATCH_PARTIAL_SOFT: An alias for %G_REGEX_MATCH_PARTIAL. Since: 2.34
382 * @G_REGEX_MATCH_PARTIAL_HARD: Turns on the partial matching feature. In contrast to
383 * to %G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match
384 * is found, without continuing to search for a possible complete match. See
385 * g_match_info_is_partial_match() for more information. Since: 2.34
386 * @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like %G_REGEX_MATCH_NOTEMPTY, but only applied to
387 * the start of the matched string. For anchored
388 * patterns this can only happen for pattern containing "\K". Since: 2.34
389 *
390 * Flags specifying match-time options.
391 *
392 * Since: 2.14
393 */
394/* Remember to update G_REGEX_MATCH_MASK in gregex.c after
395 * adding a new flag. */
396typedef enum
397{
398 G_REGEX_MATCH_DEFAULT GLIB_AVAILABLE_ENUMERATOR_IN_2_74 = 0,
399 G_REGEX_MATCH_ANCHORED = 1 << 4,
400 G_REGEX_MATCH_NOTBOL = 1 << 7,
401 G_REGEX_MATCH_NOTEOL = 1 << 8,
402 G_REGEX_MATCH_NOTEMPTY = 1 << 10,
403 G_REGEX_MATCH_PARTIAL = 1 << 15,
404 G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
405 G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
406 G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
407 G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
408 G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
409 G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
410 G_REGEX_MATCH_BSR_ANY = 1 << 24,
411 G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
412 G_REGEX_MATCH_PARTIAL_HARD = 1 << 27,
413 G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
414} GRegexMatchFlags;
415
416typedef struct _GRegex GRegex;
417
418
419/**
420 * GMatchInfo:
421 *
422 * A GMatchInfo is an opaque struct used to return information about
423 * matches.
424 */
425typedef struct _GMatchInfo GMatchInfo;
426
427/**
428 * GRegexEvalCallback:
429 * @match_info: the #GMatchInfo generated by the match.
430 * Use g_match_info_get_regex() and g_match_info_get_string() if you
431 * need the #GRegex or the matched string.
432 * @result: a #GString containing the new string
433 * @user_data: user data passed to g_regex_replace_eval()
434 *
435 * Specifies the type of the function passed to g_regex_replace_eval().
436 * It is called for each occurrence of the pattern in the string passed
437 * to g_regex_replace_eval(), and it should append the replacement to
438 * @result.
439 *
440 * Returns: %FALSE to continue the replacement process, %TRUE to stop it
441 *
442 * Since: 2.14
443 */
444typedef gboolean (*GRegexEvalCallback) (const GMatchInfo *match_info,
445 GString *result,
446 gpointer user_data);
447
448
449GLIB_AVAILABLE_IN_ALL
450GRegex *g_regex_new (const gchar *pattern,
451 GRegexCompileFlags compile_options,
452 GRegexMatchFlags match_options,
453 GError **error);
454GLIB_AVAILABLE_IN_ALL
455GRegex *g_regex_ref (GRegex *regex);
456GLIB_AVAILABLE_IN_ALL
457void g_regex_unref (GRegex *regex);
458GLIB_AVAILABLE_IN_ALL
459const gchar *g_regex_get_pattern (const GRegex *regex);
460GLIB_AVAILABLE_IN_ALL
461gint g_regex_get_max_backref (const GRegex *regex);
462GLIB_AVAILABLE_IN_ALL
463gint g_regex_get_capture_count (const GRegex *regex);
464GLIB_AVAILABLE_IN_ALL
465gboolean g_regex_get_has_cr_or_lf (const GRegex *regex);
466GLIB_AVAILABLE_IN_2_38
467gint g_regex_get_max_lookbehind (const GRegex *regex);
468GLIB_AVAILABLE_IN_ALL
469gint g_regex_get_string_number (const GRegex *regex,
470 const gchar *name);
471GLIB_AVAILABLE_IN_ALL
472gchar *g_regex_escape_string (const gchar *string,
473 gint length);
474GLIB_AVAILABLE_IN_ALL
475gchar *g_regex_escape_nul (const gchar *string,
476 gint length);
477
478GLIB_AVAILABLE_IN_ALL
479GRegexCompileFlags g_regex_get_compile_flags (const GRegex *regex);
480GLIB_AVAILABLE_IN_ALL
481GRegexMatchFlags g_regex_get_match_flags (const GRegex *regex);
482
483/* Matching. */
484GLIB_AVAILABLE_IN_ALL
485gboolean g_regex_match_simple (const gchar *pattern,
486 const gchar *string,
487 GRegexCompileFlags compile_options,
488 GRegexMatchFlags match_options);
489GLIB_AVAILABLE_IN_ALL
490gboolean g_regex_match (const GRegex *regex,
491 const gchar *string,
492 GRegexMatchFlags match_options,
493 GMatchInfo **match_info);
494GLIB_AVAILABLE_IN_ALL
495gboolean g_regex_match_full (const GRegex *regex,
496 const gchar *string,
497 gssize string_len,
498 gint start_position,
499 GRegexMatchFlags match_options,
500 GMatchInfo **match_info,
501 GError **error);
502GLIB_AVAILABLE_IN_ALL
503gboolean g_regex_match_all (const GRegex *regex,
504 const gchar *string,
505 GRegexMatchFlags match_options,
506 GMatchInfo **match_info);
507GLIB_AVAILABLE_IN_ALL
508gboolean g_regex_match_all_full (const GRegex *regex,
509 const gchar *string,
510 gssize string_len,
511 gint start_position,
512 GRegexMatchFlags match_options,
513 GMatchInfo **match_info,
514 GError **error);
515
516/* String splitting. */
517GLIB_AVAILABLE_IN_ALL
518gchar **g_regex_split_simple (const gchar *pattern,
519 const gchar *string,
520 GRegexCompileFlags compile_options,
521 GRegexMatchFlags match_options);
522GLIB_AVAILABLE_IN_ALL
523gchar **g_regex_split (const GRegex *regex,
524 const gchar *string,
525 GRegexMatchFlags match_options);
526GLIB_AVAILABLE_IN_ALL
527gchar **g_regex_split_full (const GRegex *regex,
528 const gchar *string,
529 gssize string_len,
530 gint start_position,
531 GRegexMatchFlags match_options,
532 gint max_tokens,
533 GError **error);
534
535/* String replacement. */
536GLIB_AVAILABLE_IN_ALL
537gchar *g_regex_replace (const GRegex *regex,
538 const gchar *string,
539 gssize string_len,
540 gint start_position,
541 const gchar *replacement,
542 GRegexMatchFlags match_options,
543 GError **error);
544GLIB_AVAILABLE_IN_ALL
545gchar *g_regex_replace_literal (const GRegex *regex,
546 const gchar *string,
547 gssize string_len,
548 gint start_position,
549 const gchar *replacement,
550 GRegexMatchFlags match_options,
551 GError **error);
552GLIB_AVAILABLE_IN_ALL
553gchar *g_regex_replace_eval (const GRegex *regex,
554 const gchar *string,
555 gssize string_len,
556 gint start_position,
557 GRegexMatchFlags match_options,
558 GRegexEvalCallback eval,
559 gpointer user_data,
560 GError **error);
561GLIB_AVAILABLE_IN_ALL
562gboolean g_regex_check_replacement (const gchar *replacement,
563 gboolean *has_references,
564 GError **error);
565
566/* Match info */
567GLIB_AVAILABLE_IN_ALL
568GRegex *g_match_info_get_regex (const GMatchInfo *match_info);
569GLIB_AVAILABLE_IN_ALL
570const gchar *g_match_info_get_string (const GMatchInfo *match_info);
571
572GLIB_AVAILABLE_IN_ALL
573GMatchInfo *g_match_info_ref (GMatchInfo *match_info);
574GLIB_AVAILABLE_IN_ALL
575void g_match_info_unref (GMatchInfo *match_info);
576GLIB_AVAILABLE_IN_ALL
577void g_match_info_free (GMatchInfo *match_info);
578GLIB_AVAILABLE_IN_ALL
579gboolean g_match_info_next (GMatchInfo *match_info,
580 GError **error);
581GLIB_AVAILABLE_IN_ALL
582gboolean g_match_info_matches (const GMatchInfo *match_info);
583GLIB_AVAILABLE_IN_ALL
584gint g_match_info_get_match_count (const GMatchInfo *match_info);
585GLIB_AVAILABLE_IN_ALL
586gboolean g_match_info_is_partial_match (const GMatchInfo *match_info);
587GLIB_AVAILABLE_IN_ALL
588gchar *g_match_info_expand_references(const GMatchInfo *match_info,
589 const gchar *string_to_expand,
590 GError **error);
591GLIB_AVAILABLE_IN_ALL
592gchar *g_match_info_fetch (const GMatchInfo *match_info,
593 gint match_num);
594GLIB_AVAILABLE_IN_ALL
595gboolean g_match_info_fetch_pos (const GMatchInfo *match_info,
596 gint match_num,
597 gint *start_pos,
598 gint *end_pos);
599GLIB_AVAILABLE_IN_ALL
600gchar *g_match_info_fetch_named (const GMatchInfo *match_info,
601 const gchar *name);
602GLIB_AVAILABLE_IN_ALL
603gboolean g_match_info_fetch_named_pos (const GMatchInfo *match_info,
604 const gchar *name,
605 gint *start_pos,
606 gint *end_pos);
607GLIB_AVAILABLE_IN_ALL
608gchar **g_match_info_fetch_all (const GMatchInfo *match_info);
609
610G_END_DECLS
611
612#endif /* __G_REGEX_H__ */
613