0001 # 0002 # Secret Labs' Regular Expression Engine 0003 # 0004 # various symbols used by the regular expression engine. 0005 # run this script to update the _sre include files! 0006 # 0007 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. 0008 # 0009 # See the sre.py file for information on usage and redistribution. 0010 # 0011 0012 """Internal support module for sre""" 0013 0014 # update when constants are added or removed 0015 0016 MAGIC = 20031017 0017 0018 # max code word in this release 0019 0020 MAXREPEAT = 65535 0021 0022 # SRE standard exception (access as sre.error) 0023 # should this really be here? 0024 0025 class error(Exception): 0026 pass 0027 0028 # operators 0029 0030 FAILURE = "failure" 0031 SUCCESS = "success" 0032 0033 ANY = "any" 0034 ANY_ALL = "any_all" 0035 ASSERT = "assert" 0036 ASSERT_NOT = "assert_not" 0037 AT = "at" 0038 BIGCHARSET = "bigcharset" 0039 BRANCH = "branch" 0040 CALL = "call" 0041 CATEGORY = "category" 0042 CHARSET = "charset" 0043 GROUPREF = "groupref" 0044 GROUPREF_IGNORE = "groupref_ignore" 0045 GROUPREF_EXISTS = "groupref_exists" 0046 IN = "in" 0047 IN_IGNORE = "in_ignore" 0048 INFO = "info" 0049 JUMP = "jump" 0050 LITERAL = "literal" 0051 LITERAL_IGNORE = "literal_ignore" 0052 MARK = "mark" 0053 MAX_REPEAT = "max_repeat" 0054 MAX_UNTIL = "max_until" 0055 MIN_REPEAT = "min_repeat" 0056 MIN_UNTIL = "min_until" 0057 NEGATE = "negate" 0058 NOT_LITERAL = "not_literal" 0059 NOT_LITERAL_IGNORE = "not_literal_ignore" 0060 RANGE = "range" 0061 REPEAT = "repeat" 0062 REPEAT_ONE = "repeat_one" 0063 SUBPATTERN = "subpattern" 0064 MIN_REPEAT_ONE = "min_repeat_one" 0065 0066 # positions 0067 AT_BEGINNING = "at_beginning" 0068 AT_BEGINNING_LINE = "at_beginning_line" 0069 AT_BEGINNING_STRING = "at_beginning_string" 0070 AT_BOUNDARY = "at_boundary" 0071 AT_NON_BOUNDARY = "at_non_boundary" 0072 AT_END = "at_end" 0073 AT_END_LINE = "at_end_line" 0074 AT_END_STRING = "at_end_string" 0075 AT_LOC_BOUNDARY = "at_loc_boundary" 0076 AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" 0077 AT_UNI_BOUNDARY = "at_uni_boundary" 0078 AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" 0079 0080 # categories 0081 CATEGORY_DIGIT = "category_digit" 0082 CATEGORY_NOT_DIGIT = "category_not_digit" 0083 CATEGORY_SPACE = "category_space" 0084 CATEGORY_NOT_SPACE = "category_not_space" 0085 CATEGORY_WORD = "category_word" 0086 CATEGORY_NOT_WORD = "category_not_word" 0087 CATEGORY_LINEBREAK = "category_linebreak" 0088 CATEGORY_NOT_LINEBREAK = "category_not_linebreak" 0089 CATEGORY_LOC_WORD = "category_loc_word" 0090 CATEGORY_LOC_NOT_WORD = "category_loc_not_word" 0091 CATEGORY_UNI_DIGIT = "category_uni_digit" 0092 CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" 0093 CATEGORY_UNI_SPACE = "category_uni_space" 0094 CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" 0095 CATEGORY_UNI_WORD = "category_uni_word" 0096 CATEGORY_UNI_NOT_WORD = "category_uni_not_word" 0097 CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" 0098 CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" 0099 0100 OPCODES = [ 0101 0102 # failure=0 success=1 (just because it looks better that way :-) 0103 FAILURE, SUCCESS, 0104 0105 ANY, ANY_ALL, 0106 ASSERT, ASSERT_NOT, 0107 AT, 0108 BRANCH, 0109 CALL, 0110 CATEGORY, 0111 CHARSET, BIGCHARSET, 0112 GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, 0113 IN, IN_IGNORE, 0114 INFO, 0115 JUMP, 0116 LITERAL, LITERAL_IGNORE, 0117 MARK, 0118 MAX_UNTIL, 0119 MIN_UNTIL, 0120 NOT_LITERAL, NOT_LITERAL_IGNORE, 0121 NEGATE, 0122 RANGE, 0123 REPEAT, 0124 REPEAT_ONE, 0125 SUBPATTERN, 0126 MIN_REPEAT_ONE 0127 0128 ] 0129 0130 ATCODES = [ 0131 AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, 0132 AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, 0133 AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, 0134 AT_UNI_NON_BOUNDARY 0135 ] 0136 0137 CHCODES = [ 0138 CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, 0139 CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, 0140 CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, 0141 CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, 0142 CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, 0143 CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, 0144 CATEGORY_UNI_NOT_LINEBREAK 0145 ] 0146 0147 def makedict(list): 0148 d = {} 0149 i = 0 0150 for item in list: 0151 d[item] = i 0152 i = i + 1 0153 return d 0154 0155 OPCODES = makedict(OPCODES) 0156 ATCODES = makedict(ATCODES) 0157 CHCODES = makedict(CHCODES) 0158 0159 # replacement operations for "ignore case" mode 0160 OP_IGNORE = { 0161 GROUPREF: GROUPREF_IGNORE, 0162 IN: IN_IGNORE, 0163 LITERAL: LITERAL_IGNORE, 0164 NOT_LITERAL: NOT_LITERAL_IGNORE 0165 } 0166 0167 AT_MULTILINE = { 0168 AT_BEGINNING: AT_BEGINNING_LINE, 0169 AT_END: AT_END_LINE 0170 } 0171 0172 AT_LOCALE = { 0173 AT_BOUNDARY: AT_LOC_BOUNDARY, 0174 AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY 0175 } 0176 0177 AT_UNICODE = { 0178 AT_BOUNDARY: AT_UNI_BOUNDARY, 0179 AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY 0180 } 0181 0182 CH_LOCALE = { 0183 CATEGORY_DIGIT: CATEGORY_DIGIT, 0184 CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, 0185 CATEGORY_SPACE: CATEGORY_SPACE, 0186 CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, 0187 CATEGORY_WORD: CATEGORY_LOC_WORD, 0188 CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, 0189 CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, 0190 CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK 0191 } 0192 0193 CH_UNICODE = { 0194 CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, 0195 CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, 0196 CATEGORY_SPACE: CATEGORY_UNI_SPACE, 0197 CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, 0198 CATEGORY_WORD: CATEGORY_UNI_WORD, 0199 CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, 0200 CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, 0201 CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK 0202 } 0203 0204 # flags 0205 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) 0206 SRE_FLAG_IGNORECASE = 2 # case insensitive 0207 SRE_FLAG_LOCALE = 4 # honour system locale 0208 SRE_FLAG_MULTILINE = 8 # treat target as multiline string 0209 SRE_FLAG_DOTALL = 16 # treat target as a single string 0210 SRE_FLAG_UNICODE = 32 # use unicode locale 0211 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments 0212 SRE_FLAG_DEBUG = 128 # debugging 0213 0214 # flags for INFO primitive 0215 SRE_INFO_PREFIX = 1 # has prefix 0216 SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) 0217 SRE_INFO_CHARSET = 4 # pattern starts with character from given set 0218 0219 if __name__ == "__main__": 0220 def dump(f, d, prefix): 0221 items = d.items() 0222 items.sort(key=lambda a: a[1]) 0223 for k, v in items: 0224 f.write("#define %s_%s %s\n" % (prefix, k.upper(), v)) 0225 f = open("sre_constants.h", "w") 0226 f.write("""\ 0227 /* 0228 * Secret Labs' Regular Expression Engine 0229 * 0230 * regular expression matching engine 0231 * 0232 * NOTE: This file is generated by sre_constants.py. If you need 0233 * to change anything in here, edit sre_constants.py and run it. 0234 * 0235 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 0236 * 0237 * See the _sre.c file for information on usage and redistribution. 0238 */ 0239 0240 """) 0241 0242 f.write("#define SRE_MAGIC %d\n" % MAGIC) 0243 0244 dump(f, OPCODES, "SRE_OP") 0245 dump(f, ATCODES, "SRE") 0246 dump(f, CHCODES, "SRE") 0247 0248 f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) 0249 f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) 0250 f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) 0251 f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) 0252 f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) 0253 f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) 0254 f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) 0255 0256 f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) 0257 f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) 0258 f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) 0259 0260 f.close() 0261 print "done" 0262
Generated by PyXR 0.9.4