1
|
1
|
new file mode 100644
|
...
|
...
|
@@ -0,0 +1,3601 @@
|
|
1
|
+# module pyparsing.py
|
|
2
|
+#
|
|
3
|
+# Copyright (c) 2003-2008 Paul T. McGuire
|
|
4
|
+#
|
|
5
|
+# Permission is hereby granted, free of charge, to any person obtaining
|
|
6
|
+# a copy of this software and associated documentation files (the
|
|
7
|
+# "Software"), to deal in the Software without restriction, including
|
|
8
|
+# without limitation the rights to use, copy, modify, merge, publish,
|
|
9
|
+# distribute, sublicense, and/or sell copies of the Software, and to
|
|
10
|
+# permit persons to whom the Software is furnished to do so, subject to
|
|
11
|
+# the following conditions:
|
|
12
|
+#
|
|
13
|
+# The above copyright notice and this permission notice shall be
|
|
14
|
+# included in all copies or substantial portions of the Software.
|
|
15
|
+#
|
|
16
|
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18
|
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
19
|
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
20
|
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
21
|
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
22
|
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
23
|
+#
|
|
24
|
+#from __future__ import generators
|
|
25
|
+
|
|
26
|
+__doc__ = \
|
|
27
|
+"""
|
|
28
|
+pyparsing module - Classes and methods to define and execute parsing grammars
|
|
29
|
+
|
|
30
|
+The pyparsing module is an alternative approach to creating and executing simple grammars,
|
|
31
|
+vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
|
|
32
|
+don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
|
|
33
|
+provides a library of classes that you use to construct the grammar directly in Python.
|
|
34
|
+
|
|
35
|
+Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
|
|
36
|
+
|
|
37
|
+ from pyparsing import Word, alphas
|
|
38
|
+
|
|
39
|
+ # define grammar of a greeting
|
|
40
|
+ greet = Word( alphas ) + "," + Word( alphas ) + "!"
|
|
41
|
+
|
|
42
|
+ hello = "Hello, World!"
|
|
43
|
+ print hello, "->", greet.parseString( hello )
|
|
44
|
+
|
|
45
|
+The program outputs the following::
|
|
46
|
+
|
|
47
|
+ Hello, World! -> ['Hello', ',', 'World', '!']
|
|
48
|
+
|
|
49
|
+The Python representation of the grammar is quite readable, owing to the self-explanatory
|
|
50
|
+class names, and the use of '+', '|' and '^' operators.
|
|
51
|
+
|
|
52
|
+The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
|
|
53
|
+object with named attributes.
|
|
54
|
+
|
|
55
|
+The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
|
|
56
|
+ - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
|
|
57
|
+ - quoted strings
|
|
58
|
+ - embedded comments
|
|
59
|
+"""
|
|
60
|
+
|
|
61
|
+__version__ = "1.5.0"
|
|
62
|
+__versionTime__ = "28 May 2008 10:05"
|
|
63
|
+__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
|
|
64
|
+
|
|
65
|
+import string
|
|
66
|
+from weakref import ref as wkref
|
|
67
|
+import copy,sys
|
|
68
|
+import warnings
|
|
69
|
+import re
|
|
70
|
+import sre_constants
|
|
71
|
+import xml.sax.saxutils
|
|
72
|
+#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
|
|
73
|
+
|
|
74
|
+__all__ = [
|
|
75
|
+'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
|
|
76
|
+'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
|
|
77
|
+'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
|
|
78
|
+'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
|
|
79
|
+'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
|
|
80
|
+'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
|
|
81
|
+'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
|
|
82
|
+'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
|
|
83
|
+'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
|
|
84
|
+'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
|
|
85
|
+'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
|
|
86
|
+'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
|
|
87
|
+'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
|
|
88
|
+'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
|
|
89
|
+'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
|
|
90
|
+'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
|
|
91
|
+'indentedBlock',
|
|
92
|
+]
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+"""
|
|
96
|
+Detect if we are running version 3.X and make appropriate changes
|
|
97
|
+Robert A. Clark
|
|
98
|
+"""
|
|
99
|
+if sys.version_info[0] > 2:
|
|
100
|
+ _PY3K = True
|
|
101
|
+ _MAX_INT = sys.maxsize
|
|
102
|
+ basestring = str
|
|
103
|
+else:
|
|
104
|
+ _PY3K = False
|
|
105
|
+ _MAX_INT = sys.maxint
|
|
106
|
+
|
|
107
|
+if not _PY3K:
|
|
108
|
+ def _ustr(obj):
|
|
109
|
+ """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
|
|
110
|
+ str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
|
|
111
|
+ then < returns the unicode object | encodes it with the default encoding | ... >.
|
|
112
|
+ """
|
|
113
|
+ try:
|
|
114
|
+ # If this works, then _ustr(obj) has the same behaviour as str(obj), so
|
|
115
|
+ # it won't break any existing code.
|
|
116
|
+ return str(obj)
|
|
117
|
+
|
|
118
|
+ except UnicodeEncodeError:
|
|
119
|
+ # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
|
|
120
|
+ # state that "The return value must be a string object". However, does a
|
|
121
|
+ # unicode object (being a subclass of basestring) count as a "string
|
|
122
|
+ # object"?
|
|
123
|
+ # If so, then return a unicode object:
|
|
124
|
+ return unicode(obj)
|
|
125
|
+ # Else encode it... but how? There are many choices... :)
|
|
126
|
+ # Replace unprintables with escape codes?
|
|
127
|
+ #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
|
|
128
|
+ # Replace unprintables with question marks?
|
|
129
|
+ #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
|
|
130
|
+ # ...
|
|
131
|
+else:
|
|
132
|
+ _ustr = str
|
|
133
|
+
|
|
134
|
+def _str2dict(strg):
|
|
135
|
+ return dict( [(c,0) for c in strg] )
|
|
136
|
+ #~ return set( [c for c in strg] )
|
|
137
|
+
|
|
138
|
+class _Constants(object):
|
|
139
|
+ pass
|
|
140
|
+
|
|
141
|
+if not _PY3K:
|
|
142
|
+ alphas = string.lowercase + string.uppercase
|
|
143
|
+else:
|
|
144
|
+ alphas = string.ascii_lowercase + string.ascii_uppercase
|
|
145
|
+nums = string.digits
|
|
146
|
+hexnums = nums + "ABCDEFabcdef"
|
|
147
|
+alphanums = alphas + nums
|
|
148
|
+_bslash = "\\"
|
|
149
|
+printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
|
|
150
|
+
|
|
151
|
+class ParseBaseException(Exception):
|
|
152
|
+ """base exception class for all parsing runtime exceptions"""
|
|
153
|
+ __slots__ = ( "loc","msg","pstr","parserElement" )
|
|
154
|
+ # Performance tuning: we construct a *lot* of these, so keep this
|
|
155
|
+ # constructor as small and fast as possible
|
|
156
|
+ def __init__( self, pstr, loc=0, msg=None, elem=None ):
|
|
157
|
+ self.loc = loc
|
|
158
|
+ if msg is None:
|
|
159
|
+ self.msg = pstr
|
|
160
|
+ self.pstr = ""
|
|
161
|
+ else:
|
|
162
|
+ self.msg = msg
|
|
163
|
+ self.pstr = pstr
|
|
164
|
+ self.parserElement = elem
|
|
165
|
+
|
|
166
|
+ def __getattr__( self, aname ):
|
|
167
|
+ """supported attributes by name are:
|
|
168
|
+ - lineno - returns the line number of the exception text
|
|
169
|
+ - col - returns the column number of the exception text
|
|
170
|
+ - line - returns the line containing the exception text
|
|
171
|
+ """
|
|
172
|
+ if( aname == "lineno" ):
|
|
173
|
+ return lineno( self.loc, self.pstr )
|
|
174
|
+ elif( aname in ("col", "column") ):
|
|
175
|
+ return col( self.loc, self.pstr )
|
|
176
|
+ elif( aname == "line" ):
|
|
177
|
+ return line( self.loc, self.pstr )
|
|
178
|
+ else:
|
|
179
|
+ raise AttributeError(aname)
|
|
180
|
+
|
|
181
|
+ def __str__( self ):
|
|
182
|
+ return "%s (at char %d), (line:%d, col:%d)" % \
|
|
183
|
+ ( self.msg, self.loc, self.lineno, self.column )
|
|
184
|
+ def __repr__( self ):
|
|
185
|
+ return _ustr(self)
|
|
186
|
+ def markInputline( self, markerString = ">!<" ):
|
|
187
|
+ """Extracts the exception line from the input string, and marks
|
|
188
|
+ the location of the exception with a special symbol.
|
|
189
|
+ """
|
|
190
|
+ line_str = self.line
|
|
191
|
+ line_column = self.column - 1
|
|
192
|
+ if markerString:
|
|
193
|
+ line_str = "".join( [line_str[:line_column],
|
|
194
|
+ markerString, line_str[line_column:]])
|
|
195
|
+ return line_str.strip()
|
|
196
|
+
|
|
197
|
+class ParseException(ParseBaseException):
|
|
198
|
+ """exception thrown when parse expressions don't match class;
|
|
199
|
+ supported attributes by name are:
|
|
200
|
+ - lineno - returns the line number of the exception text
|
|
201
|
+ - col - returns the column number of the exception text
|
|
202
|
+ - line - returns the line containing the exception text
|
|
203
|
+ """
|
|
204
|
+ pass
|
|
205
|
+
|
|
206
|
+class ParseFatalException(ParseBaseException):
|
|
207
|
+ """user-throwable exception thrown when inconsistent parse content
|
|
208
|
+ is found; stops all parsing immediately"""
|
|
209
|
+ pass
|
|
210
|
+
|
|
211
|
+class ParseSyntaxException(ParseFatalException):
|
|
212
|
+ """just like ParseFatalException, but thrown internally when an
|
|
213
|
+ ErrorStop indicates that parsing is to stop immediately because
|
|
214
|
+ an unbacktrackable syntax error has been found"""
|
|
215
|
+ def __init__(self, pe):
|
|
216
|
+ super(ParseSyntaxException, self).__init__(
|
|
217
|
+ pe.pstr, pe.loc, pe.msg, pe.parserElement)
|
|
218
|
+
|
|
219
|
+#~ class ReparseException(ParseBaseException):
|
|
220
|
+ #~ """Experimental class - parse actions can raise this exception to cause
|
|
221
|
+ #~ pyparsing to reparse the input string:
|
|
222
|
+ #~ - with a modified input string, and/or
|
|
223
|
+ #~ - with a modified start location
|
|
224
|
+ #~ Set the values of the ReparseException in the constructor, and raise the
|
|
225
|
+ #~ exception in a parse action to cause pyparsing to use the new string/location.
|
|
226
|
+ #~ Setting the values as None causes no change to be made.
|
|
227
|
+ #~ """
|
|
228
|
+ #~ def __init_( self, newstring, restartLoc ):
|
|
229
|
+ #~ self.newParseText = newstring
|
|
230
|
+ #~ self.reparseLoc = restartLoc
|
|
231
|
+
|
|
232
|
+class RecursiveGrammarException(Exception):
|
|
233
|
+ """exception thrown by validate() if the grammar could be improperly recursive"""
|
|
234
|
+ def __init__( self, parseElementList ):
|
|
235
|
+ self.parseElementTrace = parseElementList
|
|
236
|
+
|
|
237
|
+ def __str__( self ):
|
|
238
|
+ return "RecursiveGrammarException: %s" % self.parseElementTrace
|
|
239
|
+
|
|
240
|
+class _ParseResultsWithOffset(object):
|
|
241
|
+ def __init__(self,p1,p2):
|
|
242
|
+ self.tup = (p1,p2)
|
|
243
|
+ def __getitem__(self,i):
|
|
244
|
+ return self.tup[i]
|
|
245
|
+ def __repr__(self):
|
|
246
|
+ return repr(self.tup)
|
|
247
|
+
|
|
248
|
+class ParseResults(object):
|
|
249
|
+ """Structured parse results, to provide multiple means of access to the parsed data:
|
|
250
|
+ - as a list (len(results))
|
|
251
|
+ - by list index (results[0], results[1], etc.)
|
|
252
|
+ - by attribute (results.<resultsName>)
|
|
253
|
+ """
|
|
254
|
+ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
|
|
255
|
+ def __new__(cls, toklist, name=None, asList=True, modal=True ):
|
|
256
|
+ if isinstance(toklist, cls):
|
|
257
|
+ return toklist
|
|
258
|
+ retobj = object.__new__(cls)
|
|
259
|
+ retobj.__doinit = True
|
|
260
|
+ return retobj
|
|
261
|
+
|
|
262
|
+ # Performance tuning: we construct a *lot* of these, so keep this
|
|
263
|
+ # constructor as small and fast as possible
|
|
264
|
+ def __init__( self, toklist, name=None, asList=True, modal=True ):
|
|
265
|
+ if self.__doinit:
|
|
266
|
+ self.__doinit = False
|
|
267
|
+ self.__name = None
|
|
268
|
+ self.__parent = None
|
|
269
|
+ self.__accumNames = {}
|
|
270
|
+ if isinstance(toklist, list):
|
|
271
|
+ self.__toklist = toklist[:]
|
|
272
|
+ else:
|
|
273
|
+ self.__toklist = [toklist]
|
|
274
|
+ self.__tokdict = dict()
|
|
275
|
+
|
|
276
|
+ # this line is related to debugging the asXML bug
|
|
277
|
+ #~ asList = False
|
|
278
|
+
|
|
279
|
+ if name:
|
|
280
|
+ if not modal:
|
|
281
|
+ self.__accumNames[name] = 0
|
|
282
|
+ if isinstance(name,int):
|
|
283
|
+ name = _ustr(name) # will always return a str, but use _ustr for consistency
|
|
284
|
+ self.__name = name
|
|
285
|
+ if not toklist in (None,'',[]):
|
|
286
|
+ if isinstance(toklist,basestring):
|
|
287
|
+ toklist = [ toklist ]
|
|
288
|
+ if asList:
|
|
289
|
+ if isinstance(toklist,ParseResults):
|
|
290
|
+ self[name] = _ParseResultsWithOffset(toklist.copy(),-1)
|
|
291
|
+ else:
|
|
292
|
+ self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1)
|
|
293
|
+ self[name].__name = name
|
|
294
|
+ else:
|
|
295
|
+ try:
|
|
296
|
+ self[name] = toklist[0]
|
|
297
|
+ except (KeyError,TypeError):
|
|
298
|
+ self[name] = toklist
|
|
299
|
+
|
|
300
|
+ def __getitem__( self, i ):
|
|
301
|
+ if isinstance( i, (int,slice) ):
|
|
302
|
+ return self.__toklist[i]
|
|
303
|
+ else:
|
|
304
|
+ if i not in self.__accumNames:
|
|
305
|
+ return self.__tokdict[i][-1][0]
|
|
306
|
+ else:
|
|
307
|
+ return ParseResults([ v[0] for v in self.__tokdict[i] ])
|
|
308
|
+
|
|
309
|
+ def __setitem__( self, k, v ):
|
|
310
|
+ if isinstance(v,_ParseResultsWithOffset):
|
|
311
|
+ self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
|
|
312
|
+ sub = v[0]
|
|
313
|
+ elif isinstance(k,int):
|
|
314
|
+ self.__toklist[k] = v
|
|
315
|
+ sub = v
|
|
316
|
+ else:
|
|
317
|
+ self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
|
|
318
|
+ sub = v
|
|
319
|
+ if isinstance(sub,ParseResults):
|
|
320
|
+ sub.__parent = wkref(self)
|
|
321
|
+
|
|
322
|
+ def __delitem__( self, i ):
|
|
323
|
+ if isinstance(i,(int,slice)):
|
|
324
|
+ mylen = len( self.__toklist )
|
|
325
|
+ del self.__toklist[i]
|
|
326
|
+
|
|
327
|
+ # convert int to slice
|
|
328
|
+ if isinstance(i, int):
|
|
329
|
+ if i < 0:
|
|
330
|
+ i += mylen
|
|
331
|
+ i = slice(i, i+1)
|
|
332
|
+ # get removed indices
|
|
333
|
+ removed = list(range(*i.indices(mylen)))
|
|
334
|
+ removed.reverse()
|
|
335
|
+ # fixup indices in token dictionary
|
|
336
|
+ for name in self.__tokdict:
|
|
337
|
+ occurrences = self.__tokdict[name]
|
|
338
|
+ for j in removed:
|
|
339
|
+ for k, (value, position) in enumerate(occurrences):
|
|
340
|
+ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
|
|
341
|
+ else:
|
|
342
|
+ del self.__tokdict[i]
|
|
343
|
+
|
|
344
|
+ def __contains__( self, k ):
|
|
345
|
+ return k in self.__tokdict
|
|
346
|
+
|
|
347
|
+ def __len__( self ): return len( self.__toklist )
|
|
348
|
+ def __bool__(self): return len( self.__toklist ) > 0
|
|
349
|
+ __nonzero__ = __bool__
|
|
350
|
+ def __iter__( self ): return iter( self.__toklist )
|
|
351
|
+ def __reversed__( self ): return iter( reversed(self.__toklist) )
|
|
352
|
+ def keys( self ):
|
|
353
|
+ """Returns all named result keys."""
|
|
354
|
+ return self.__tokdict.keys()
|
|
355
|
+
|
|
356
|
+ def pop( self, index=-1 ):
|
|
357
|
+ """Removes and returns item at specified index (default=last).
|
|
358
|
+ Will work with either numeric indices or dict-key indicies."""
|
|
359
|
+ ret = self[index]
|
|
360
|
+ del self[index]
|
|
361
|
+ return ret
|
|
362
|
+
|
|
363
|
+ def get(self, key, defaultValue=None):
|
|
364
|
+ """Returns named result matching the given key, or if there is no
|
|
365
|
+ such name, then returns the given defaultValue or None if no
|
|
366
|
+ defaultValue is specified."""
|
|
367
|
+ if key in self:
|
|
368
|
+ return self[key]
|
|
369
|
+ else:
|
|
370
|
+ return defaultValue
|
|
371
|
+
|
|
372
|
+ def insert( self, index, insStr ):
|
|
373
|
+ self.__toklist.insert(index, insStr)
|
|
374
|
+ # fixup indices in token dictionary
|
|
375
|
+ for name in self.__tokdict:
|
|
376
|
+ occurrences = self.__tokdict[name]
|
|
377
|
+ for k, (value, position) in enumerate(occurrences):
|
|
378
|
+ occurrences[k] = _ParseResultsWithOffset(value, position + (position > j))
|
|
379
|
+
|
|
380
|
+ def items( self ):
|
|
381
|
+ """Returns all named result keys and values as a list of tuples."""
|
|
382
|
+ return [(k,self[k]) for k in self.__tokdict]
|
|
383
|
+
|
|
384
|
+ def values( self ):
|
|
385
|
+ """Returns all named result values."""
|
|
386
|
+ return [ v[-1][0] for v in self.__tokdict.values() ]
|
|
387
|
+
|
|
388
|
+ def __getattr__( self, name ):
|
|
389
|
+ if name not in self.__slots__:
|
|
390
|
+ if name in self.__tokdict:
|
|
391
|
+ if name not in self.__accumNames:
|
|
392
|
+ return self.__tokdict[name][-1][0]
|
|
393
|
+ else:
|
|
394
|
+ return ParseResults([ v[0] for v in self.__tokdict[name] ])
|
|
395
|
+ else:
|
|
396
|
+ return ""
|
|
397
|
+ return None
|
|
398
|
+
|
|
399
|
+ def __add__( self, other ):
|
|
400
|
+ ret = self.copy()
|
|
401
|
+ ret += other
|
|
402
|
+ return ret
|
|
403
|
+
|
|
404
|
+ def __iadd__( self, other ):
|
|
405
|
+ if other.__tokdict:
|
|
406
|
+ offset = len(self.__toklist)
|
|
407
|
+ addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
|
|
408
|
+ otheritems = other.__tokdict.items()
|
|
409
|
+ otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
|
|
410
|
+ for (k,vlist) in otheritems for v in vlist]
|
|
411
|
+ for k,v in otherdictitems:
|
|
412
|
+ self[k] = v
|
|
413
|
+ if isinstance(v[0],ParseResults):
|
|
414
|
+ v[0].__parent = wkref(self)
|
|
415
|
+ self.__toklist += other.__toklist
|
|
416
|
+ self.__accumNames.update( other.__accumNames )
|
|
417
|
+ del other
|
|
418
|
+ return self
|
|
419
|
+
|
|
420
|
+ def __repr__( self ):
|
|
421
|
+ return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
|
|
422
|
+
|
|
423
|
+ def __str__( self ):
|
|
424
|
+ out = "["
|
|
425
|
+ sep = ""
|
|
426
|
+ for i in self.__toklist:
|
|
427
|
+ if isinstance(i, ParseResults):
|
|
428
|
+ out += sep + _ustr(i)
|
|
429
|
+ else:
|
|
430
|
+ out += sep + repr(i)
|
|
431
|
+ sep = ", "
|
|
432
|
+ out += "]"
|
|
433
|
+ return out
|
|
434
|
+
|
|
435
|
+ def _asStringList( self, sep='' ):
|
|
436
|
+ out = []
|
|
437
|
+ for item in self.__toklist:
|
|
438
|
+ if out and sep:
|
|
439
|
+ out.append(sep)
|
|
440
|
+ if isinstance( item, ParseResults ):
|
|
441
|
+ out += item._asStringList()
|
|
442
|
+ else:
|
|
443
|
+ out.append( _ustr(item) )
|
|
444
|
+ return out
|
|
445
|
+
|
|
446
|
+ def asList( self ):
|
|
447
|
+ """Returns the parse results as a nested list of matching tokens, all converted to strings."""
|
|
448
|
+ out = []
|
|
449
|
+ for res in self.__toklist:
|
|
450
|
+ if isinstance(res,ParseResults):
|
|
451
|
+ out.append( res.asList() )
|
|
452
|
+ else:
|
|
453
|
+ out.append( res )
|
|
454
|
+ return out
|
|
455
|
+
|
|
456
|
+ def asDict( self ):
|
|
457
|
+ """Returns the named parse results as dictionary."""
|
|
458
|
+ return dict( self.items() )
|
|
459
|
+
|
|
460
|
+ def copy( self ):
|
|
461
|
+ """Returns a new copy of a ParseResults object."""
|
|
462
|
+ ret = ParseResults( self.__toklist )
|
|
463
|
+ ret.__tokdict = self.__tokdict.copy()
|
|
464
|
+ ret.__parent = self.__parent
|
|
465
|
+ ret.__accumNames.update( self.__accumNames )
|
|
466
|
+ ret.__name = self.__name
|
|
467
|
+ return ret
|
|
468
|
+
|
|
469
|
+ def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
|
|
470
|
+ """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
|
|
471
|
+ nl = "\n"
|
|
472
|
+ out = []
|
|
473
|
+ namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
|
|
474
|
+ for v in vlist ] )
|
|
475
|
+ nextLevelIndent = indent + " "
|
|
476
|
+
|
|
477
|
+ # collapse out indents if formatting is not desired
|
|
478
|
+ if not formatted:
|
|
479
|
+ indent = ""
|
|
480
|
+ nextLevelIndent = ""
|
|
481
|
+ nl = ""
|
|
482
|
+
|
|
483
|
+ selfTag = None
|
|
484
|
+ if doctag is not None:
|
|
485
|
+ selfTag = doctag
|
|
486
|
+ else:
|
|
487
|
+ if self.__name:
|
|
488
|
+ selfTag = self.__name
|
|
489
|
+
|
|
490
|
+ if not selfTag:
|
|
491
|
+ if namedItemsOnly:
|
|
492
|
+ return ""
|
|
493
|
+ else:
|
|
494
|
+ selfTag = "ITEM"
|
|
495
|
+
|
|
496
|
+ out += [ nl, indent, "<", selfTag, ">" ]
|
|
497
|
+
|
|
498
|
+ worklist = self.__toklist
|
|
499
|
+ for i,res in enumerate(worklist):
|
|
500
|
+ if isinstance(res,ParseResults):
|
|
501
|
+ if i in namedItems:
|
|
502
|
+ out += [ res.asXML(namedItems[i],
|
|
503
|
+ namedItemsOnly and doctag is None,
|
|
504
|
+ nextLevelIndent,
|
|
505
|
+ formatted)]
|
|
506
|
+ else:
|
|
507
|
+ out += [ res.asXML(None,
|
|
508
|
+ namedItemsOnly and doctag is None,
|
|
509
|
+ nextLevelIndent,
|
|
510
|
+ formatted)]
|
|
511
|
+ else:
|
|
512
|
+ # individual token, see if there is a name for it
|
|
513
|
+ resTag = None
|
|
514
|
+ if i in namedItems:
|
|
515
|
+ resTag = namedItems[i]
|
|
516
|
+ if not resTag:
|
|
517
|
+ if namedItemsOnly:
|
|
518
|
+ continue
|
|
519
|
+ else:
|
|
520
|
+ resTag = "ITEM"
|
|
521
|
+ xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
|
|
522
|
+ out += [ nl, nextLevelIndent, "<", resTag, ">",
|
|
523
|
+ xmlBodyText,
|
|
524
|
+ "</", resTag, ">" ]
|
|
525
|
+
|
|
526
|
+ out += [ nl, indent, "</", selfTag, ">" ]
|
|
527
|
+ return "".join(out)
|
|
528
|
+
|
|
529
|
+ def __lookup(self,sub):
|
|
530
|
+ for k,vlist in self.__tokdict.items():
|
|
531
|
+ for v,loc in vlist:
|
|
532
|
+ if sub is v:
|
|
533
|
+ return k
|
|
534
|
+ return None
|
|
535
|
+
|
|
536
|
+ def getName(self):
|
|
537
|
+ """Returns the results name for this token expression."""
|
|
538
|
+ if self.__name:
|
|
539
|
+ return self.__name
|
|
540
|
+ elif self.__parent:
|
|
541
|
+ par = self.__parent()
|
|
542
|
+ if par:
|
|
543
|
+ return par.__lookup(self)
|
|
544
|
+ else:
|
|
545
|
+ return None
|
|
546
|
+ elif (len(self) == 1 and
|
|
547
|
+ len(self.__tokdict) == 1 and
|
|
548
|
+ self.__tokdict.values()[0][0][1] in (0,-1)):
|
|
549
|
+ return self.__tokdict.keys()[0]
|
|
550
|
+ else:
|
|
551
|
+ return None
|
|
552
|
+
|
|
553
|
+ def dump(self,indent='',depth=0):
|
|
554
|
+ """Diagnostic method for listing out the contents of a ParseResults.
|
|
555
|
+ Accepts an optional indent argument so that this string can be embedded
|
|
556
|
+ in a nested display of other data."""
|
|
557
|
+ out = []
|
|
558
|
+ out.append( indent+_ustr(self.asList()) )
|
|
559
|
+ keys = self.items()
|
|
560
|
+ keys.sort()
|
|
561
|
+ for k,v in keys:
|
|
562
|
+ if out:
|
|
563
|
+ out.append('\n')
|
|
564
|
+ out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
|
|
565
|
+ if isinstance(v,ParseResults):
|
|
566
|
+ if v.keys():
|
|
567
|
+ #~ out.append('\n')
|
|
568
|
+ out.append( v.dump(indent,depth+1) )
|
|
569
|
+ #~ out.append('\n')
|
|
570
|
+ else:
|
|
571
|
+ out.append(_ustr(v))
|
|
572
|
+ else:
|
|
573
|
+ out.append(_ustr(v))
|
|
574
|
+ #~ out.append('\n')
|
|
575
|
+ return "".join(out)
|
|
576
|
+
|
|
577
|
+ # add support for pickle protocol
|
|
578
|
+ def __getstate__(self):
|
|
579
|
+ return ( self.__toklist,
|
|
580
|
+ ( self.__tokdict.copy(),
|
|
581
|
+ self.__parent is not None and self.__parent() or None,
|
|
582
|
+ self.__accumNames,
|
|
583
|
+ self.__name ) )
|
|
584
|
+
|
|
585
|
+ def __setstate__(self,state):
|
|
586
|
+ self.__toklist = state[0]
|
|
587
|
+ self.__tokdict, \
|
|
588
|
+ par, \
|
|
589
|
+ inAccumNames, \
|
|
590
|
+ self.__name = state[1]
|
|
591
|
+ self.__accumNames = {}
|
|
592
|
+ self.__accumNames.update(inAccumNames)
|
|
593
|
+ if par is not None:
|
|
594
|
+ self.__parent = wkref(par)
|
|
595
|
+ else:
|
|
596
|
+ self.__parent = None
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+def col (loc,strg):
|
|
600
|
+ """Returns current column within a string, counting newlines as line separators.
|
|
601
|
+ The first column is number 1.
|
|
602
|
+
|
|
603
|
+ Note: the default parsing behavior is to expand tabs in the input string
|
|
604
|
+ before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
|
|
605
|
+ on parsing strings containing <TAB>s, and suggested methods to maintain a
|
|
606
|
+ consistent view of the parsed string, the parse location, and line and column
|
|
607
|
+ positions within the parsed string.
|
|
608
|
+ """
|
|
609
|
+ return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
|
|
610
|
+
|
|
611
|
+def lineno(loc,strg):
|
|
612
|
+ """Returns current line number within a string, counting newlines as line separators.
|
|
613
|
+ The first line is number 1.
|
|
614
|
+
|
|
615
|
+ Note: the default parsing behavior is to expand tabs in the input string
|
|
616
|
+ before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
|
|
617
|
+ on parsing strings containing <TAB>s, and suggested methods to maintain a
|
|
618
|
+ consistent view of the parsed string, the parse location, and line and column
|
|
619
|
+ positions within the parsed string.
|
|
620
|
+ """
|
|
621
|
+ return strg.count("\n",0,loc) + 1
|
|
622
|
+
|
|
623
|
+def line( loc, strg ):
|
|
624
|
+ """Returns the line of text containing loc within a string, counting newlines as line separators.
|
|
625
|
+ """
|
|
626
|
+ lastCR = strg.rfind("\n", 0, loc)
|
|
627
|
+ nextCR = strg.find("\n", loc)
|
|
628
|
+ if nextCR > 0:
|
|
629
|
+ return strg[lastCR+1:nextCR]
|
|
630
|
+ else:
|
|
631
|
+ return strg[lastCR+1:]
|
|
632
|
+
|
|
633
|
+def _defaultStartDebugAction( instring, loc, expr ):
|
|
634
|
+ print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
|
|
635
|
+
|
|
636
|
+def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
|
|
637
|
+ print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
|
|
638
|
+
|
|
639
|
+def _defaultExceptionDebugAction( instring, loc, expr, exc ):
|
|
640
|
+ print ("Exception raised:" + _ustr(exc))
|
|
641
|
+
|
|
642
|
+def nullDebugAction(*args):
|
|
643
|
+ """'Do-nothing' debug action, to suppress debugging output during parsing."""
|
|
644
|
+ pass
|
|
645
|
+
|
|
646
|
+class ParserElement(object):
|
|
647
|
+ """Abstract base level parser element class."""
|
|
648
|
+ DEFAULT_WHITE_CHARS = " \n\t\r"
|
|
649
|
+
|
|
650
|
+ def setDefaultWhitespaceChars( chars ):
|
|
651
|
+ """Overrides the default whitespace chars
|
|
652
|
+ """
|
|
653
|
+ ParserElement.DEFAULT_WHITE_CHARS = chars
|
|
654
|
+ setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
|
|
655
|
+
|
|
656
|
+ def __init__( self, savelist=False ):
|
|
657
|
+ self.parseAction = list()
|
|
658
|
+ self.failAction = None
|
|
659
|
+ #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
|
|
660
|
+ self.strRepr = None
|
|
661
|
+ self.resultsName = None
|
|
662
|
+ self.saveAsList = savelist
|
|
663
|
+ self.skipWhitespace = True
|
|
664
|
+ self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
|
|
665
|
+ self.copyDefaultWhiteChars = True
|
|
666
|
+ self.mayReturnEmpty = False # used when checking for left-recursion
|
|
667
|
+ self.keepTabs = False
|
|
668
|
+ self.ignoreExprs = list()
|
|
669
|
+ self.debug = False
|
|
670
|
+ self.streamlined = False
|
|
671
|
+ self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
|
|
672
|
+ self.errmsg = ""
|
|
673
|
+ self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
|
|
674
|
+ self.debugActions = ( None, None, None ) #custom debug actions
|
|
675
|
+ self.re = None
|
|
676
|
+ self.callPreparse = True # used to avoid redundant calls to preParse
|
|
677
|
+ self.callDuringTry = False
|
|
678
|
+
|
|
679
|
+ def copy( self ):
|
|
680
|
+ """Make a copy of this ParserElement. Useful for defining different parse actions
|
|
681
|
+ for the same parsing pattern, using copies of the original parse element."""
|
|
682
|
+ cpy = copy.copy( self )
|
|
683
|
+ cpy.parseAction = self.parseAction[:]
|
|
684
|
+ cpy.ignoreExprs = self.ignoreExprs[:]
|
|
685
|
+ if self.copyDefaultWhiteChars:
|
|
686
|
+ cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
|
|
687
|
+ return cpy
|
|
688
|
+
|
|
689
|
+ def setName( self, name ):
|
|
690
|
+ """Define name for this expression, for use in debugging."""
|
|
691
|
+ self.name = name
|
|
692
|
+ self.errmsg = "Expected " + self.name
|
|
693
|
+ if hasattr(self,"exception"):
|
|
694
|
+ self.exception.msg = self.errmsg
|
|
695
|
+ return self
|
|
696
|
+
|
|
697
|
+ def setResultsName( self, name, listAllMatches=False ):
|
|
698
|
+ """Define name for referencing matching tokens as a nested attribute
|
|
699
|
+ of the returned parse results.
|
|
700
|
+ NOTE: this returns a *copy* of the original ParserElement object;
|
|
701
|
+ this is so that the client can define a basic element, such as an
|
|
702
|
+ integer, and reference it in multiple places with different names.
|
|
703
|
+ """
|
|
704
|
+ newself = self.copy()
|
|
705
|
+ newself.resultsName = name
|
|
706
|
+ newself.modalResults = not listAllMatches
|
|
707
|
+ return newself
|
|
708
|
+
|
|
709
|
+ def setBreak(self,breakFlag = True):
|
|
710
|
+ """Method to invoke the Python pdb debugger when this element is
|
|
711
|
+ about to be parsed. Set breakFlag to True to enable, False to
|
|
712
|
+ disable.
|
|
713
|
+ """
|
|
714
|
+ if breakFlag:
|
|
715
|
+ _parseMethod = self._parse
|
|
716
|
+ def breaker(instring, loc, doActions=True, callPreParse=True):
|
|
717
|
+ import pdb
|
|
718
|
+ pdb.set_trace()
|
|
719
|
+ _parseMethod( instring, loc, doActions, callPreParse )
|
|
720
|
+ breaker._originalParseMethod = _parseMethod
|
|
721
|
+ self._parse = breaker
|
|
722
|
+ else:
|
|
723
|
+ if hasattr(self._parse,"_originalParseMethod"):
|
|
724
|
+ self._parse = self._parse._originalParseMethod
|
|
725
|
+ return self
|
|
726
|
+
|
|
727
|
+ def _normalizeParseActionArgs( f ):
|
|
728
|
+ """Internal method used to decorate parse actions that take fewer than 3 arguments,
|
|
729
|
+ so that all parse actions can be called as f(s,l,t)."""
|
|
730
|
+ STAR_ARGS = 4
|
|
731
|
+
|
|
732
|
+ try:
|
|
733
|
+ restore = None
|
|
734
|
+ if isinstance(f,type):
|
|
735
|
+ restore = f
|
|
736
|
+ f = f.__init__
|
|
737
|
+ if not _PY3K:
|
|
738
|
+ codeObj = f.func_code
|
|
739
|
+ else:
|
|
740
|
+ codeObj = f.code
|
|
741
|
+ if codeObj.co_flags & STAR_ARGS:
|
|
742
|
+ return f
|
|
743
|
+ numargs = codeObj.co_argcount
|
|
744
|
+ if not _PY3K:
|
|
745
|
+ if hasattr(f,"im_self"):
|
|
746
|
+ numargs -= 1
|
|
747
|
+ else:
|
|
748
|
+ if hasattr(f,"__self__"):
|
|
749
|
+ numargs -= 1
|
|
750
|
+ if restore:
|
|
751
|
+ f = restore
|
|
752
|
+ except AttributeError:
|
|
753
|
+ try:
|
|
754
|
+ if not _PY3K:
|
|
755
|
+ call_im_func_code = f.__call__.im_func.func_code
|
|
756
|
+ else:
|
|
757
|
+ call_im_func_code = f.__code__
|
|
758
|
+
|
|
759
|
+ # not a function, must be a callable object, get info from the
|
|
760
|
+ # im_func binding of its bound __call__ method
|
|
761
|
+ if call_im_func_code.co_flags & STAR_ARGS:
|
|
762
|
+ return f
|
|
763
|
+ numargs = call_im_func_code.co_argcount
|
|
764
|
+ if not _PY3K:
|
|
765
|
+ if hasattr(f.__call__,"im_self"):
|
|
766
|
+ numargs -= 1
|
|
767
|
+ else:
|
|
768
|
+ if hasattr(f.__call__,"__self__"):
|
|
769
|
+ numargs -= 0
|
|
770
|
+ except AttributeError:
|
|
771
|
+ if not _PY3K:
|
|
772
|
+ call_func_code = f.__call__.func_code
|
|
773
|
+ else:
|
|
774
|
+ call_func_code = f.__call__.__code__
|
|
775
|
+ # not a bound method, get info directly from __call__ method
|
|
776
|
+ if call_func_code.co_flags & STAR_ARGS:
|
|
777
|
+ return f
|
|
778
|
+ numargs = call_func_code.co_argcount
|
|
779
|
+ if not _PY3K:
|
|
780
|
+ if hasattr(f.__call__,"im_self"):
|
|
781
|
+ numargs -= 1
|
|
782
|
+ else:
|
|
783
|
+ if hasattr(f.__call__,"__self__"):
|
|
784
|
+ numargs -= 1
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+ #~ print ("adding function %s with %d args" % (f.func_name,numargs))
|
|
788
|
+ if numargs == 3:
|
|
789
|
+ return f
|
|
790
|
+ else:
|
|
791
|
+ if numargs > 3:
|
|
792
|
+ def tmp(s,l,t):
|
|
793
|
+ return f(f.__call__.__self__, s,l,t)
|
|
794
|
+ if numargs == 2:
|
|
795
|
+ def tmp(s,l,t):
|
|
796
|
+ return f(l,t)
|
|
797
|
+ elif numargs == 1:
|
|
798
|
+ def tmp(s,l,t):
|
|
799
|
+ return f(t)
|
|
800
|
+ else: #~ numargs == 0:
|
|
801
|
+ def tmp(s,l,t):
|
|
802
|
+ return f()
|
|
803
|
+ try:
|
|
804
|
+ tmp.__name__ = f.__name__
|
|
805
|
+ except (AttributeError,TypeError):
|
|
806
|
+ # no need for special handling if attribute doesnt exist
|
|
807
|
+ pass
|
|
808
|
+ try:
|
|
809
|
+ tmp.__doc__ = f.__doc__
|
|
810
|
+ except (AttributeError,TypeError):
|
|
811
|
+ # no need for special handling if attribute doesnt exist
|
|
812
|
+ pass
|
|
813
|
+ try:
|
|
814
|
+ tmp.__dict__.update(f.__dict__)
|
|
815
|
+ except (AttributeError,TypeError):
|
|
816
|
+ # no need for special handling if attribute doesnt exist
|
|
817
|
+ pass
|
|
818
|
+ return tmp
|
|
819
|
+ _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
|
|
820
|
+
|
|
821
|
+ def setParseAction( self, *fns, **kwargs ):
|
|
822
|
+ """Define action to perform when successfully matching parse element definition.
|
|
823
|
+ Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
|
|
824
|
+ fn(loc,toks), fn(toks), or just fn(), where:
|
|
825
|
+ - s = the original string being parsed (see note below)
|
|
826
|
+ - loc = the location of the matching substring
|
|
827
|
+ - toks = a list of the matched tokens, packaged as a ParseResults object
|
|
828
|
+ If the functions in fns modify the tokens, they can return them as the return
|
|
829
|
+ value from fn, and the modified list of tokens will replace the original.
|
|
830
|
+ Otherwise, fn does not need to return any value.
|
|
831
|
+
|
|
832
|
+ Note: the default parsing behavior is to expand tabs in the input string
|
|
833
|
+ before starting the parsing process. See L{I{parseString}<parseString>} for more information
|
|
834
|
+ on parsing strings containing <TAB>s, and suggested methods to maintain a
|
|
835
|
+ consistent view of the parsed string, the parse location, and line and column
|
|
836
|
+ positions within the parsed string.
|
|
837
|
+ """
|
|
838
|
+ self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
|
|
839
|
+ self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
|
|
840
|
+ return self
|
|
841
|
+
|
|
842
|
+ def addParseAction( self, *fns, **kwargs ):
|
|
843
|
+ """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
|
|
844
|
+ self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
|
|
845
|
+ self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
|
|
846
|
+ return self
|
|
847
|
+
|
|
848
|
+ def setFailAction( self, fn ):
|
|
849
|
+ """Define action to perform if parsing fails at this expression.
|
|
850
|
+ Fail acton fn is a callable function that takes the arguments
|
|
851
|
+ fn(s,loc,expr,err) where:
|
|
852
|
+ - s = string being parsed
|
|
853
|
+ - loc = location where expression match was attempted and failed
|
|
854
|
+ - expr = the parse expression that failed
|
|
855
|
+ - err = the exception thrown
|
|
856
|
+ The function returns no value. It may throw ParseFatalException
|
|
857
|
+ if it is desired to stop parsing immediately."""
|
|
858
|
+ self.failAction = fn
|
|
859
|
+ return self
|
|
860
|
+
|
|
861
|
+ def _skipIgnorables( self, instring, loc ):
|
|
862
|
+ exprsFound = True
|
|
863
|
+ while exprsFound:
|
|
864
|
+ exprsFound = False
|
|
865
|
+ for e in self.ignoreExprs:
|
|
866
|
+ try:
|
|
867
|
+ while 1:
|
|
868
|
+ loc,dummy = e._parse( instring, loc )
|
|
869
|
+ exprsFound = True
|
|
870
|
+ except ParseException:
|
|
871
|
+ pass
|
|
872
|
+ return loc
|
|
873
|
+
|
|
874
|
+ def preParse( self, instring, loc ):
|
|
875
|
+ if self.ignoreExprs:
|
|
876
|
+ loc = self._skipIgnorables( instring, loc )
|
|
877
|
+
|
|
878
|
+ if self.skipWhitespace:
|
|
879
|
+ wt = self.whiteChars
|
|
880
|
+ instrlen = len(instring)
|
|
881
|
+ while loc < instrlen and instring[loc] in wt:
|
|
882
|
+ loc += 1
|
|
883
|
+
|
|
884
|
+ return loc
|
|
885
|
+
|
|
886
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
887
|
+ return loc, []
|
|
888
|
+
|
|
889
|
+ def postParse( self, instring, loc, tokenlist ):
|
|
890
|
+ return tokenlist
|
|
891
|
+
|
|
892
|
+ #~ @profile
|
|
893
|
+ def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
|
|
894
|
+ debugging = ( self.debug ) #and doActions )
|
|
895
|
+
|
|
896
|
+ if debugging or self.failAction:
|
|
897
|
+ #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
|
|
898
|
+ if (self.debugActions[0] ):
|
|
899
|
+ self.debugActions[0]( instring, loc, self )
|
|
900
|
+ if callPreParse and self.callPreparse:
|
|
901
|
+ preloc = self.preParse( instring, loc )
|
|
902
|
+ else:
|
|
903
|
+ preloc = loc
|
|
904
|
+ tokensStart = loc
|
|
905
|
+ try:
|
|
906
|
+ try:
|
|
907
|
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
|
|
908
|
+ except IndexError:
|
|
909
|
+ raise ParseException( instring, len(instring), self.errmsg, self )
|
|
910
|
+ except ParseBaseException, err:
|
|
911
|
+ #~ print ("Exception raised:", err)
|
|
912
|
+ if self.debugActions[2]:
|
|
913
|
+ self.debugActions[2]( instring, tokensStart, self, err )
|
|
914
|
+ if self.failAction:
|
|
915
|
+ self.failAction( instring, tokensStart, self, err )
|
|
916
|
+ raise
|
|
917
|
+ else:
|
|
918
|
+ if callPreParse and self.callPreparse:
|
|
919
|
+ preloc = self.preParse( instring, loc )
|
|
920
|
+ else:
|
|
921
|
+ preloc = loc
|
|
922
|
+ tokensStart = loc
|
|
923
|
+ if self.mayIndexError or loc >= len(instring):
|
|
924
|
+ try:
|
|
925
|
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
|
|
926
|
+ except IndexError:
|
|
927
|
+ raise ParseException( instring, len(instring), self.errmsg, self )
|
|
928
|
+ else:
|
|
929
|
+ loc,tokens = self.parseImpl( instring, preloc, doActions )
|
|
930
|
+
|
|
931
|
+ tokens = self.postParse( instring, loc, tokens )
|
|
932
|
+
|
|
933
|
+ retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
|
|
934
|
+ if self.parseAction and (doActions or self.callDuringTry):
|
|
935
|
+ if debugging:
|
|
936
|
+ try:
|
|
937
|
+ for fn in self.parseAction:
|
|
938
|
+ tokens = fn( instring, tokensStart, retTokens )
|
|
939
|
+ if tokens is not None:
|
|
940
|
+ retTokens = ParseResults( tokens,
|
|
941
|
+ self.resultsName,
|
|
942
|
+ asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
|
|
943
|
+ modal=self.modalResults )
|
|
944
|
+ except ParseBaseException, err:
|
|
945
|
+ #~ print "Exception raised in user parse action:", err
|
|
946
|
+ if (self.debugActions[2] ):
|
|
947
|
+ self.debugActions[2]( instring, tokensStart, self, err )
|
|
948
|
+ raise
|
|
949
|
+ else:
|
|
950
|
+ for fn in self.parseAction:
|
|
951
|
+ tokens = fn( instring, tokensStart, retTokens )
|
|
952
|
+ if tokens is not None:
|
|
953
|
+ retTokens = ParseResults( tokens,
|
|
954
|
+ self.resultsName,
|
|
955
|
+ asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
|
|
956
|
+ modal=self.modalResults )
|
|
957
|
+
|
|
958
|
+ if debugging:
|
|
959
|
+ #~ print ("Matched",self,"->",retTokens.asList())
|
|
960
|
+ if (self.debugActions[1] ):
|
|
961
|
+ self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
|
|
962
|
+
|
|
963
|
+ return loc, retTokens
|
|
964
|
+
|
|
965
|
+ def tryParse( self, instring, loc ):
|
|
966
|
+ try:
|
|
967
|
+ return self._parse( instring, loc, doActions=False )[0]
|
|
968
|
+ except ParseFatalException:
|
|
969
|
+ raise ParseException( instring, loc, self.errmsg, self)
|
|
970
|
+
|
|
971
|
+ # this method gets repeatedly called during backtracking with the same arguments -
|
|
972
|
+ # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
|
|
973
|
+ def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
|
|
974
|
+ lookup = (self,instring,loc,callPreParse,doActions)
|
|
975
|
+ if lookup in ParserElement._exprArgCache:
|
|
976
|
+ value = ParserElement._exprArgCache[ lookup ]
|
|
977
|
+ if isinstance(value,Exception):
|
|
978
|
+ raise value
|
|
979
|
+ return value
|
|
980
|
+ else:
|
|
981
|
+ try:
|
|
982
|
+ value = self._parseNoCache( instring, loc, doActions, callPreParse )
|
|
983
|
+ ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
|
|
984
|
+ return value
|
|
985
|
+ except ParseBaseException, pe:
|
|
986
|
+ ParserElement._exprArgCache[ lookup ] = pe
|
|
987
|
+ raise
|
|
988
|
+
|
|
989
|
+ _parse = _parseNoCache
|
|
990
|
+
|
|
991
|
+ # argument cache for optimizing repeated calls when backtracking through recursive expressions
|
|
992
|
+ _exprArgCache = {}
|
|
993
|
+ def resetCache():
|
|
994
|
+ ParserElement._exprArgCache.clear()
|
|
995
|
+ resetCache = staticmethod(resetCache)
|
|
996
|
+
|
|
997
|
+ _packratEnabled = False
|
|
998
|
+ def enablePackrat():
|
|
999
|
+ """Enables "packrat" parsing, which adds memoizing to the parsing logic.
|
|
1000
|
+ Repeated parse attempts at the same string location (which happens
|
|
1001
|
+ often in many complex grammars) can immediately return a cached value,
|
|
1002
|
+ instead of re-executing parsing/validating code. Memoizing is done of
|
|
1003
|
+ both valid results and parsing exceptions.
|
|
1004
|
+
|
|
1005
|
+ This speedup may break existing programs that use parse actions that
|
|
1006
|
+ have side-effects. For this reason, packrat parsing is disabled when
|
|
1007
|
+ you first import pyparsing. To activate the packrat feature, your
|
|
1008
|
+ program must call the class method ParserElement.enablePackrat(). If
|
|
1009
|
+ your program uses psyco to "compile as you go", you must call
|
|
1010
|
+ enablePackrat before calling psyco.full(). If you do not do this,
|
|
1011
|
+ Python will crash. For best results, call enablePackrat() immediately
|
|
1012
|
+ after importing pyparsing.
|
|
1013
|
+ """
|
|
1014
|
+ if not ParserElement._packratEnabled:
|
|
1015
|
+ ParserElement._packratEnabled = True
|
|
1016
|
+ ParserElement._parse = ParserElement._parseCache
|
|
1017
|
+ enablePackrat = staticmethod(enablePackrat)
|
|
1018
|
+
|
|
1019
|
+ def parseString( self, instring, parseAll=False ):
|
|
1020
|
+ """Execute the parse expression with the given string.
|
|
1021
|
+ This is the main interface to the client code, once the complete
|
|
1022
|
+ expression has been built.
|
|
1023
|
+
|
|
1024
|
+ If you want the grammar to require that the entire input string be
|
|
1025
|
+ successfully parsed, then set parseAll to True (equivalent to ending
|
|
1026
|
+ the grammar with StringEnd()).
|
|
1027
|
+
|
|
1028
|
+ Note: parseString implicitly calls expandtabs() on the input string,
|
|
1029
|
+ in order to report proper column numbers in parse actions.
|
|
1030
|
+ If the input string contains tabs and
|
|
1031
|
+ the grammar uses parse actions that use the loc argument to index into the
|
|
1032
|
+ string being parsed, you can ensure you have a consistent view of the input
|
|
1033
|
+ string by:
|
|
1034
|
+ - calling parseWithTabs on your grammar before calling parseString
|
|
1035
|
+ (see L{I{parseWithTabs}<parseWithTabs>})
|
|
1036
|
+ - define your parse action using the full (s,loc,toks) signature, and
|
|
1037
|
+ reference the input string using the parse action's s argument
|
|
1038
|
+ - explictly expand the tabs in your input string before calling
|
|
1039
|
+ parseString
|
|
1040
|
+ """
|
|
1041
|
+ ParserElement.resetCache()
|
|
1042
|
+ if not self.streamlined:
|
|
1043
|
+ self.streamline()
|
|
1044
|
+ #~ self.saveAsList = True
|
|
1045
|
+ for e in self.ignoreExprs:
|
|
1046
|
+ e.streamline()
|
|
1047
|
+ if not self.keepTabs:
|
|
1048
|
+ instring = instring.expandtabs()
|
|
1049
|
+ loc, tokens = self._parse( instring, 0 )
|
|
1050
|
+ if parseAll:
|
|
1051
|
+ StringEnd()._parse( instring, loc )
|
|
1052
|
+ return tokens
|
|
1053
|
+
|
|
1054
|
+ def scanString( self, instring, maxMatches=_MAX_INT ):
|
|
1055
|
+ """Scan the input string for expression matches. Each match will return the
|
|
1056
|
+ matching tokens, start location, and end location. May be called with optional
|
|
1057
|
+ maxMatches argument, to clip scanning after 'n' matches are found.
|
|
1058
|
+
|
|
1059
|
+ Note that the start and end locations are reported relative to the string
|
|
1060
|
+ being parsed. See L{I{parseString}<parseString>} for more information on parsing
|
|
1061
|
+ strings with embedded tabs."""
|
|
1062
|
+ if not self.streamlined:
|
|
1063
|
+ self.streamline()
|
|
1064
|
+ for e in self.ignoreExprs:
|
|
1065
|
+ e.streamline()
|
|
1066
|
+
|
|
1067
|
+ if not self.keepTabs:
|
|
1068
|
+ instring = _ustr(instring).expandtabs()
|
|
1069
|
+ instrlen = len(instring)
|
|
1070
|
+ loc = 0
|
|
1071
|
+ preparseFn = self.preParse
|
|
1072
|
+ parseFn = self._parse
|
|
1073
|
+ ParserElement.resetCache()
|
|
1074
|
+ matches = 0
|
|
1075
|
+ while loc <= instrlen and matches < maxMatches:
|
|
1076
|
+ try:
|
|
1077
|
+ preloc = preparseFn( instring, loc )
|
|
1078
|
+ nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
|
|
1079
|
+ except ParseException:
|
|
1080
|
+ loc = preloc+1
|
|
1081
|
+ else:
|
|
1082
|
+ matches += 1
|
|
1083
|
+ yield tokens, preloc, nextLoc
|
|
1084
|
+ loc = nextLoc
|
|
1085
|
+
|
|
1086
|
+ def transformString( self, instring ):
|
|
1087
|
+ """Extension to scanString, to modify matching text with modified tokens that may
|
|
1088
|
+ be returned from a parse action. To use transformString, define a grammar and
|
|
1089
|
+ attach a parse action to it that modifies the returned token list.
|
|
1090
|
+ Invoking transformString() on a target string will then scan for matches,
|
|
1091
|
+ and replace the matched text patterns according to the logic in the parse
|
|
1092
|
+ action. transformString() returns the resulting transformed string."""
|
|
1093
|
+ out = []
|
|
1094
|
+ lastE = 0
|
|
1095
|
+ # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
|
|
1096
|
+ # keep string locs straight between transformString and scanString
|
|
1097
|
+ self.keepTabs = True
|
|
1098
|
+ for t,s,e in self.scanString( instring ):
|
|
1099
|
+ out.append( instring[lastE:s] )
|
|
1100
|
+ if t:
|
|
1101
|
+ if isinstance(t,ParseResults):
|
|
1102
|
+ out += t.asList()
|
|
1103
|
+ elif isinstance(t,list):
|
|
1104
|
+ out += t
|
|
1105
|
+ else:
|
|
1106
|
+ out.append(t)
|
|
1107
|
+ lastE = e
|
|
1108
|
+ out.append(instring[lastE:])
|
|
1109
|
+ return "".join(map(_ustr,out))
|
|
1110
|
+
|
|
1111
|
+ def searchString( self, instring, maxMatches=_MAX_INT ):
|
|
1112
|
+ """Another extension to scanString, simplifying the access to the tokens found
|
|
1113
|
+ to match the given parse expression. May be called with optional
|
|
1114
|
+ maxMatches argument, to clip searching after 'n' matches are found.
|
|
1115
|
+ """
|
|
1116
|
+ return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
|
|
1117
|
+
|
|
1118
|
+ def __add__(self, other ):
|
|
1119
|
+ """Implementation of + operator - returns And"""
|
|
1120
|
+ if isinstance( other, basestring ):
|
|
1121
|
+ other = Literal( other )
|
|
1122
|
+ if not isinstance( other, ParserElement ):
|
|
1123
|
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1124
|
+ SyntaxWarning, stacklevel=2)
|
|
1125
|
+ return None
|
|
1126
|
+ return And( [ self, other ] )
|
|
1127
|
+
|
|
1128
|
+ def __radd__(self, other ):
|
|
1129
|
+ """Implementation of + operator when left operand is not a ParserElement"""
|
|
1130
|
+ if isinstance( other, basestring ):
|
|
1131
|
+ other = Literal( other )
|
|
1132
|
+ if not isinstance( other, ParserElement ):
|
|
1133
|
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1134
|
+ SyntaxWarning, stacklevel=2)
|
|
1135
|
+ return None
|
|
1136
|
+ return other + self
|
|
1137
|
+
|
|
1138
|
+ def __sub__(self, other):
|
|
1139
|
+ """Implementation of - operator, returns And with error stop"""
|
|
1140
|
+ if isinstance( other, basestring ):
|
|
1141
|
+ other = Literal( other )
|
|
1142
|
+ if not isinstance( other, ParserElement ):
|
|
1143
|
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1144
|
+ SyntaxWarning, stacklevel=2)
|
|
1145
|
+ return None
|
|
1146
|
+ return And( [ self, And._ErrorStop(), other ] )
|
|
1147
|
+
|
|
1148
|
+ def __rsub__(self, other ):
|
|
1149
|
+ """Implementation of - operator when left operand is not a ParserElement"""
|
|
1150
|
+ if isinstance( other, basestring ):
|
|
1151
|
+ other = Literal( other )
|
|
1152
|
+ if not isinstance( other, ParserElement ):
|
|
1153
|
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1154
|
+ SyntaxWarning, stacklevel=2)
|
|
1155
|
+ return None
|
|
1156
|
+ return other - self
|
|
1157
|
+
|
|
1158
|
+ def __mul__(self,other):
|
|
1159
|
+ if isinstance(other,int):
|
|
1160
|
+ minElements, optElements = other,0
|
|
1161
|
+ elif isinstance(other,tuple):
|
|
1162
|
+ if len(other)==0:
|
|
1163
|
+ other = (None,None)
|
|
1164
|
+ elif len(other)==1:
|
|
1165
|
+ other = (other[0],None)
|
|
1166
|
+ if len(other)==2:
|
|
1167
|
+ if other[0] is None:
|
|
1168
|
+ other = (0, other[1])
|
|
1169
|
+ if isinstance(other[0],int) and other[1] is None:
|
|
1170
|
+ if other[0] == 0:
|
|
1171
|
+ return ZeroOrMore(self)
|
|
1172
|
+ if other[0] == 1:
|
|
1173
|
+ return OneOrMore(self)
|
|
1174
|
+ else:
|
|
1175
|
+ return self*other[0] + ZeroOrMore(self)
|
|
1176
|
+ elif isinstance(other[0],int) and isinstance(other[1],int):
|
|
1177
|
+ minElements, optElements = other
|
|
1178
|
+ optElements -= minElements
|
|
1179
|
+ else:
|
|
1180
|
+ raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
|
|
1181
|
+ else:
|
|
1182
|
+ raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
|
|
1183
|
+ else:
|
|
1184
|
+ raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
|
|
1185
|
+
|
|
1186
|
+ if minElements < 0:
|
|
1187
|
+ raise ValueError("cannot multiply ParserElement by negative value")
|
|
1188
|
+ if optElements < 0:
|
|
1189
|
+ raise ValueError("second tuple value must be greater or equal to first tuple value")
|
|
1190
|
+ if minElements == optElements == 0:
|
|
1191
|
+ raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
|
|
1192
|
+
|
|
1193
|
+ if (optElements):
|
|
1194
|
+ def makeOptionalList(n):
|
|
1195
|
+ if n>1:
|
|
1196
|
+ return Optional(self + makeOptionalList(n-1))
|
|
1197
|
+ else:
|
|
1198
|
+ return Optional(self)
|
|
1199
|
+ if minElements:
|
|
1200
|
+ if minElements == 1:
|
|
1201
|
+ ret = self + makeOptionalList(optElements)
|
|
1202
|
+ else:
|
|
1203
|
+ ret = And([self]*minElements) + makeOptionalList(optElements)
|
|
1204
|
+ else:
|
|
1205
|
+ ret = makeOptionalList(optElements)
|
|
1206
|
+ else:
|
|
1207
|
+ if minElements == 1:
|
|
1208
|
+ ret = self
|
|
1209
|
+ else:
|
|
1210
|
+ ret = And([self]*minElements)
|
|
1211
|
+ return ret
|
|
1212
|
+
|
|
1213
|
+ def __rmul__(self, other):
|
|
1214
|
+ return self.__mul__(other)
|
|
1215
|
+
|
|
1216
|
+ def __or__(self, other ):
|
|
1217
|
+ """Implementation of | operator - returns MatchFirst"""
|
|
1218
|
+ if isinstance( other, basestring ):
|
|
1219
|
+ other = Literal( other )
|
|
1220
|
+ if not isinstance( other, ParserElement ):
|
|
1221
|
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1222
|
+ SyntaxWarning, stacklevel=2)
|
|
1223
|
+ return None
|
|
1224
|
+ return MatchFirst( [ self, other ] )
|
|
1225
|
+
|
|
1226
|
+ def __ror__(self, other ):
|
|
1227
|
+ """Implementation of | operator when left operand is not a ParserElement"""
|
|
1228
|
+ if isinstance( other, basestring ):
|
|
1229
|
+ other = Literal( other )
|
|
1230
|
+ if not isinstance( other, ParserElement ):
|
|
1231
|
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1232
|
+ SyntaxWarning, stacklevel=2)
|
|
1233
|
+ return None
|
|
1234
|
+ return other | self
|
|
1235
|
+
|
|
1236
|
+ def __xor__(self, other ):
|
|
1237
|
+ """Implementation of ^ operator - returns Or"""
|
|
1238
|
+ if isinstance( other, basestring ):
|
|
1239
|
+ other = Literal( other )
|
|
1240
|
+ if not isinstance( other, ParserElement ):
|
|
1241
|
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1242
|
+ SyntaxWarning, stacklevel=2)
|
|
1243
|
+ return None
|
|
1244
|
+ return Or( [ self, other ] )
|
|
1245
|
+
|
|
1246
|
+ def __rxor__(self, other ):
|
|
1247
|
+ """Implementation of ^ operator when left operand is not a ParserElement"""
|
|
1248
|
+ if isinstance( other, basestring ):
|
|
1249
|
+ other = Literal( other )
|
|
1250
|
+ if not isinstance( other, ParserElement ):
|
|
1251
|
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1252
|
+ SyntaxWarning, stacklevel=2)
|
|
1253
|
+ return None
|
|
1254
|
+ return other ^ self
|
|
1255
|
+
|
|
1256
|
+ def __and__(self, other ):
|
|
1257
|
+ """Implementation of & operator - returns Each"""
|
|
1258
|
+ if isinstance( other, basestring ):
|
|
1259
|
+ other = Literal( other )
|
|
1260
|
+ if not isinstance( other, ParserElement ):
|
|
1261
|
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1262
|
+ SyntaxWarning, stacklevel=2)
|
|
1263
|
+ return None
|
|
1264
|
+ return Each( [ self, other ] )
|
|
1265
|
+
|
|
1266
|
+ def __rand__(self, other ):
|
|
1267
|
+ """Implementation of & operator when left operand is not a ParserElement"""
|
|
1268
|
+ if isinstance( other, basestring ):
|
|
1269
|
+ other = Literal( other )
|
|
1270
|
+ if not isinstance( other, ParserElement ):
|
|
1271
|
+ warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1272
|
+ SyntaxWarning, stacklevel=2)
|
|
1273
|
+ return None
|
|
1274
|
+ return other & self
|
|
1275
|
+
|
|
1276
|
+ def __invert__( self ):
|
|
1277
|
+ """Implementation of ~ operator - returns NotAny"""
|
|
1278
|
+ return NotAny( self )
|
|
1279
|
+
|
|
1280
|
+ def __call__(self, name):
|
|
1281
|
+ """Shortcut for setResultsName, with listAllMatches=default::
|
|
1282
|
+ userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
|
|
1283
|
+ could be written as::
|
|
1284
|
+ userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
|
|
1285
|
+ """
|
|
1286
|
+ return self.setResultsName(name)
|
|
1287
|
+
|
|
1288
|
+ def suppress( self ):
|
|
1289
|
+ """Suppresses the output of this ParserElement; useful to keep punctuation from
|
|
1290
|
+ cluttering up returned output.
|
|
1291
|
+ """
|
|
1292
|
+ return Suppress( self )
|
|
1293
|
+
|
|
1294
|
+ def leaveWhitespace( self ):
|
|
1295
|
+ """Disables the skipping of whitespace before matching the characters in the
|
|
1296
|
+ ParserElement's defined pattern. This is normally only used internally by
|
|
1297
|
+ the pyparsing module, but may be needed in some whitespace-sensitive grammars.
|
|
1298
|
+ """
|
|
1299
|
+ self.skipWhitespace = False
|
|
1300
|
+ return self
|
|
1301
|
+
|
|
1302
|
+ def setWhitespaceChars( self, chars ):
|
|
1303
|
+ """Overrides the default whitespace chars
|
|
1304
|
+ """
|
|
1305
|
+ self.skipWhitespace = True
|
|
1306
|
+ self.whiteChars = chars
|
|
1307
|
+ self.copyDefaultWhiteChars = False
|
|
1308
|
+ return self
|
|
1309
|
+
|
|
1310
|
+ def parseWithTabs( self ):
|
|
1311
|
+ """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
|
|
1312
|
+ Must be called before parseString when the input grammar contains elements that
|
|
1313
|
+ match <TAB> characters."""
|
|
1314
|
+ self.keepTabs = True
|
|
1315
|
+ return self
|
|
1316
|
+
|
|
1317
|
+ def ignore( self, other ):
|
|
1318
|
+ """Define expression to be ignored (e.g., comments) while doing pattern
|
|
1319
|
+ matching; may be called repeatedly, to define multiple comment or other
|
|
1320
|
+ ignorable patterns.
|
|
1321
|
+ """
|
|
1322
|
+ if isinstance( other, Suppress ):
|
|
1323
|
+ if other not in self.ignoreExprs:
|
|
1324
|
+ self.ignoreExprs.append( other )
|
|
1325
|
+ else:
|
|
1326
|
+ self.ignoreExprs.append( Suppress( other ) )
|
|
1327
|
+ return self
|
|
1328
|
+
|
|
1329
|
+ def setDebugActions( self, startAction, successAction, exceptionAction ):
|
|
1330
|
+ """Enable display of debugging messages while doing pattern matching."""
|
|
1331
|
+ self.debugActions = (startAction or _defaultStartDebugAction,
|
|
1332
|
+ successAction or _defaultSuccessDebugAction,
|
|
1333
|
+ exceptionAction or _defaultExceptionDebugAction)
|
|
1334
|
+ self.debug = True
|
|
1335
|
+ return self
|
|
1336
|
+
|
|
1337
|
+ def setDebug( self, flag=True ):
|
|
1338
|
+ """Enable display of debugging messages while doing pattern matching.
|
|
1339
|
+ Set flag to True to enable, False to disable."""
|
|
1340
|
+ if flag:
|
|
1341
|
+ self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
|
|
1342
|
+ else:
|
|
1343
|
+ self.debug = False
|
|
1344
|
+ return self
|
|
1345
|
+
|
|
1346
|
+ def __str__( self ):
|
|
1347
|
+ return self.name
|
|
1348
|
+
|
|
1349
|
+ def __repr__( self ):
|
|
1350
|
+ return _ustr(self)
|
|
1351
|
+
|
|
1352
|
+ def streamline( self ):
|
|
1353
|
+ self.streamlined = True
|
|
1354
|
+ self.strRepr = None
|
|
1355
|
+ return self
|
|
1356
|
+
|
|
1357
|
+ def checkRecursion( self, parseElementList ):
|
|
1358
|
+ pass
|
|
1359
|
+
|
|
1360
|
+ def validate( self, validateTrace=[] ):
|
|
1361
|
+ """Check defined expressions for valid structure, check for infinite recursive definitions."""
|
|
1362
|
+ self.checkRecursion( [] )
|
|
1363
|
+
|
|
1364
|
+ def parseFile( self, file_or_filename ):
|
|
1365
|
+ """Execute the parse expression on the given file or filename.
|
|
1366
|
+ If a filename is specified (instead of a file object),
|
|
1367
|
+ the entire file is opened, read, and closed before parsing.
|
|
1368
|
+ """
|
|
1369
|
+ try:
|
|
1370
|
+ file_contents = file_or_filename.read()
|
|
1371
|
+ except AttributeError:
|
|
1372
|
+ f = open(file_or_filename, "rb")
|
|
1373
|
+ file_contents = f.read()
|
|
1374
|
+ f.close()
|
|
1375
|
+ return self.parseString(file_contents)
|
|
1376
|
+
|
|
1377
|
+ def getException(self):
|
|
1378
|
+ return ParseException("",0,self.errmsg,self)
|
|
1379
|
+
|
|
1380
|
+ def __getattr__(self,aname):
|
|
1381
|
+ if aname == "myException":
|
|
1382
|
+ self.myException = ret = self.getException();
|
|
1383
|
+ return ret;
|
|
1384
|
+ else:
|
|
1385
|
+ raise AttributeError("no such attribute " + aname)
|
|
1386
|
+
|
|
1387
|
+ def __eq__(self,other):
|
|
1388
|
+ if isinstance(other, basestring):
|
|
1389
|
+ try:
|
|
1390
|
+ (self + StringEnd()).parseString(_ustr(other))
|
|
1391
|
+ return True
|
|
1392
|
+ except ParseBaseException:
|
|
1393
|
+ return False
|
|
1394
|
+ else:
|
|
1395
|
+ return super(ParserElement,self)==other
|
|
1396
|
+
|
|
1397
|
+ def __hash__(self):
|
|
1398
|
+ return hash(id(self))
|
|
1399
|
+
|
|
1400
|
+ def __req__(self,other):
|
|
1401
|
+ return self == other
|
|
1402
|
+
|
|
1403
|
+
|
|
1404
|
+class Token(ParserElement):
|
|
1405
|
+ """Abstract ParserElement subclass, for defining atomic matching patterns."""
|
|
1406
|
+ def __init__( self ):
|
|
1407
|
+ super(Token,self).__init__( savelist=False )
|
|
1408
|
+ #self.myException = ParseException("",0,"",self)
|
|
1409
|
+
|
|
1410
|
+ def setName(self, name):
|
|
1411
|
+ s = super(Token,self).setName(name)
|
|
1412
|
+ self.errmsg = "Expected " + self.name
|
|
1413
|
+ #s.myException.msg = self.errmsg
|
|
1414
|
+ return s
|
|
1415
|
+
|
|
1416
|
+
|
|
1417
|
+class Empty(Token):
|
|
1418
|
+ """An empty token, will always match."""
|
|
1419
|
+ def __init__( self ):
|
|
1420
|
+ super(Empty,self).__init__()
|
|
1421
|
+ self.name = "Empty"
|
|
1422
|
+ self.mayReturnEmpty = True
|
|
1423
|
+ self.mayIndexError = False
|
|
1424
|
+
|
|
1425
|
+
|
|
1426
|
+class NoMatch(Token):
|
|
1427
|
+ """A token that will never match."""
|
|
1428
|
+ def __init__( self ):
|
|
1429
|
+ super(NoMatch,self).__init__()
|
|
1430
|
+ self.name = "NoMatch"
|
|
1431
|
+ self.mayReturnEmpty = True
|
|
1432
|
+ self.mayIndexError = False
|
|
1433
|
+ self.errmsg = "Unmatchable token"
|
|
1434
|
+ #self.myException.msg = self.errmsg
|
|
1435
|
+
|
|
1436
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
1437
|
+ exc = self.myException
|
|
1438
|
+ exc.loc = loc
|
|
1439
|
+ exc.pstr = instring
|
|
1440
|
+ raise exc
|
|
1441
|
+
|
|
1442
|
+
|
|
1443
|
+class Literal(Token):
|
|
1444
|
+ """Token to exactly match a specified string."""
|
|
1445
|
+ def __init__( self, matchString ):
|
|
1446
|
+ super(Literal,self).__init__()
|
|
1447
|
+ self.match = matchString
|
|
1448
|
+ self.matchLen = len(matchString)
|
|
1449
|
+ try:
|
|
1450
|
+ self.firstMatchChar = matchString[0]
|
|
1451
|
+ except IndexError:
|
|
1452
|
+ warnings.warn("null string passed to Literal; use Empty() instead",
|
|
1453
|
+ SyntaxWarning, stacklevel=2)
|
|
1454
|
+ self.__class__ = Empty
|
|
1455
|
+ self.name = '"%s"' % _ustr(self.match)
|
|
1456
|
+ self.errmsg = "Expected " + self.name
|
|
1457
|
+ self.mayReturnEmpty = False
|
|
1458
|
+ #self.myException.msg = self.errmsg
|
|
1459
|
+ self.mayIndexError = False
|
|
1460
|
+
|
|
1461
|
+ # Performance tuning: this routine gets called a *lot*
|
|
1462
|
+ # if this is a single character match string and the first character matches,
|
|
1463
|
+ # short-circuit as quickly as possible, and avoid calling startswith
|
|
1464
|
+ #~ @profile
|
|
1465
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
1466
|
+ if (instring[loc] == self.firstMatchChar and
|
|
1467
|
+ (self.matchLen==1 or instring.startswith(self.match,loc)) ):
|
|
1468
|
+ return loc+self.matchLen, self.match
|
|
1469
|
+ #~ raise ParseException( instring, loc, self.errmsg )
|
|
1470
|
+ exc = self.myException
|
|
1471
|
+ exc.loc = loc
|
|
1472
|
+ exc.pstr = instring
|
|
1473
|
+ raise exc
|
|
1474
|
+_L = Literal
|
|
1475
|
+
|
|
1476
|
+class Keyword(Token):
|
|
1477
|
+ """Token to exactly match a specified string as a keyword, that is, it must be
|
|
1478
|
+ immediately followed by a non-keyword character. Compare with Literal::
|
|
1479
|
+ Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
|
|
1480
|
+ Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
|
|
1481
|
+ Accepts two optional constructor arguments in addition to the keyword string:
|
|
1482
|
+ identChars is a string of characters that would be valid identifier characters,
|
|
1483
|
+ defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
|
|
1484
|
+ matching, default is False.
|
|
1485
|
+ """
|
|
1486
|
+ DEFAULT_KEYWORD_CHARS = alphanums+"_$"
|
|
1487
|
+
|
|
1488
|
+ def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
|
|
1489
|
+ super(Keyword,self).__init__()
|
|
1490
|
+ self.match = matchString
|
|
1491
|
+ self.matchLen = len(matchString)
|
|
1492
|
+ try:
|
|
1493
|
+ self.firstMatchChar = matchString[0]
|
|
1494
|
+ except IndexError:
|
|
1495
|
+ warnings.warn("null string passed to Keyword; use Empty() instead",
|
|
1496
|
+ SyntaxWarning, stacklevel=2)
|
|
1497
|
+ self.name = '"%s"' % self.match
|
|
1498
|
+ self.errmsg = "Expected " + self.name
|
|
1499
|
+ self.mayReturnEmpty = False
|
|
1500
|
+ #self.myException.msg = self.errmsg
|
|
1501
|
+ self.mayIndexError = False
|
|
1502
|
+ self.caseless = caseless
|
|
1503
|
+ if caseless:
|
|
1504
|
+ self.caselessmatch = matchString.upper()
|
|
1505
|
+ identChars = identChars.upper()
|
|
1506
|
+ self.identChars = _str2dict(identChars)
|
|
1507
|
+
|
|
1508
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
1509
|
+ if self.caseless:
|
|
1510
|
+ if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
|
|
1511
|
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
|
|
1512
|
+ (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
|
|
1513
|
+ return loc+self.matchLen, self.match
|
|
1514
|
+ else:
|
|
1515
|
+ if (instring[loc] == self.firstMatchChar and
|
|
1516
|
+ (self.matchLen==1 or instring.startswith(self.match,loc)) and
|
|
1517
|
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
|
|
1518
|
+ (loc == 0 or instring[loc-1] not in self.identChars) ):
|
|
1519
|
+ return loc+self.matchLen, self.match
|
|
1520
|
+ #~ raise ParseException( instring, loc, self.errmsg )
|
|
1521
|
+ exc = self.myException
|
|
1522
|
+ exc.loc = loc
|
|
1523
|
+ exc.pstr = instring
|
|
1524
|
+ raise exc
|
|
1525
|
+
|
|
1526
|
+ def copy(self):
|
|
1527
|
+ c = super(Keyword,self).copy()
|
|
1528
|
+ c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
|
|
1529
|
+ return c
|
|
1530
|
+
|
|
1531
|
+ def setDefaultKeywordChars( chars ):
|
|
1532
|
+ """Overrides the default Keyword chars
|
|
1533
|
+ """
|
|
1534
|
+ Keyword.DEFAULT_KEYWORD_CHARS = chars
|
|
1535
|
+ setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
|
|
1536
|
+
|
|
1537
|
+
|
|
1538
|
+class CaselessLiteral(Literal):
|
|
1539
|
+ """Token to match a specified string, ignoring case of letters.
|
|
1540
|
+ Note: the matched results will always be in the case of the given
|
|
1541
|
+ match string, NOT the case of the input text.
|
|
1542
|
+ """
|
|
1543
|
+ def __init__( self, matchString ):
|
|
1544
|
+ super(CaselessLiteral,self).__init__( matchString.upper() )
|
|
1545
|
+ # Preserve the defining literal.
|
|
1546
|
+ self.returnString = matchString
|
|
1547
|
+ self.name = "'%s'" % self.returnString
|
|
1548
|
+ self.errmsg = "Expected " + self.name
|
|
1549
|
+ #self.myException.msg = self.errmsg
|
|
1550
|
+
|
|
1551
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
1552
|
+ if instring[ loc:loc+self.matchLen ].upper() == self.match:
|
|
1553
|
+ return loc+self.matchLen, self.returnString
|
|
1554
|
+ #~ raise ParseException( instring, loc, self.errmsg )
|
|
1555
|
+ exc = self.myException
|
|
1556
|
+ exc.loc = loc
|
|
1557
|
+ exc.pstr = instring
|
|
1558
|
+ raise exc
|
|
1559
|
+
|
|
1560
|
+class CaselessKeyword(Keyword):
|
|
1561
|
+ def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
|
|
1562
|
+ super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
|
|
1563
|
+
|
|
1564
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
1565
|
+ if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
|
|
1566
|
+ (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
|
|
1567
|
+ return loc+self.matchLen, self.match
|
|
1568
|
+ #~ raise ParseException( instring, loc, self.errmsg )
|
|
1569
|
+ exc = self.myException
|
|
1570
|
+ exc.loc = loc
|
|
1571
|
+ exc.pstr = instring
|
|
1572
|
+ raise exc
|
|
1573
|
+
|
|
1574
|
+class Word(Token):
|
|
1575
|
+ """Token for matching words composed of allowed character sets.
|
|
1576
|
+ Defined with string containing all allowed initial characters,
|
|
1577
|
+ an optional string containing allowed body characters (if omitted,
|
|
1578
|
+ defaults to the initial character set), and an optional minimum,
|
|
1579
|
+ maximum, and/or exact length. The default value for min is 1 (a
|
|
1580
|
+ minimum value < 1 is not valid); the default values for max and exact
|
|
1581
|
+ are 0, meaning no maximum or exact length restriction.
|
|
1582
|
+ """
|
|
1583
|
+ def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
|
|
1584
|
+ super(Word,self).__init__()
|
|
1585
|
+ self.initCharsOrig = initChars
|
|
1586
|
+ self.initChars = _str2dict(initChars)
|
|
1587
|
+ if bodyChars :
|
|
1588
|
+ self.bodyCharsOrig = bodyChars
|
|
1589
|
+ self.bodyChars = _str2dict(bodyChars)
|
|
1590
|
+ else:
|
|
1591
|
+ self.bodyCharsOrig = initChars
|
|
1592
|
+ self.bodyChars = _str2dict(initChars)
|
|
1593
|
+
|
|
1594
|
+ self.maxSpecified = max > 0
|
|
1595
|
+
|
|
1596
|
+ if min < 1:
|
|
1597
|
+ raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
|
|
1598
|
+
|
|
1599
|
+ self.minLen = min
|
|
1600
|
+
|
|
1601
|
+ if max > 0:
|
|
1602
|
+ self.maxLen = max
|
|
1603
|
+ else:
|
|
1604
|
+ self.maxLen = _MAX_INT
|
|
1605
|
+
|
|
1606
|
+ if exact > 0:
|
|
1607
|
+ self.maxLen = exact
|
|
1608
|
+ self.minLen = exact
|
|
1609
|
+
|
|
1610
|
+ self.name = _ustr(self)
|
|
1611
|
+ self.errmsg = "Expected " + self.name
|
|
1612
|
+ #self.myException.msg = self.errmsg
|
|
1613
|
+ self.mayIndexError = False
|
|
1614
|
+ self.asKeyword = asKeyword
|
|
1615
|
+
|
|
1616
|
+ if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
|
|
1617
|
+ if self.bodyCharsOrig == self.initCharsOrig:
|
|
1618
|
+ self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
|
|
1619
|
+ elif len(self.bodyCharsOrig) == 1:
|
|
1620
|
+ self.reString = "%s[%s]*" % \
|
|
1621
|
+ (re.escape(self.initCharsOrig),
|
|
1622
|
+ _escapeRegexRangeChars(self.bodyCharsOrig),)
|
|
1623
|
+ else:
|
|
1624
|
+ self.reString = "[%s][%s]*" % \
|
|
1625
|
+ (_escapeRegexRangeChars(self.initCharsOrig),
|
|
1626
|
+ _escapeRegexRangeChars(self.bodyCharsOrig),)
|
|
1627
|
+ if self.asKeyword:
|
|
1628
|
+ self.reString = r"\b"+self.reString+r"\b"
|
|
1629
|
+ try:
|
|
1630
|
+ self.re = re.compile( self.reString )
|
|
1631
|
+ except:
|
|
1632
|
+ self.re = None
|
|
1633
|
+
|
|
1634
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
1635
|
+ if self.re:
|
|
1636
|
+ result = self.re.match(instring,loc)
|
|
1637
|
+ if not result:
|
|
1638
|
+ exc = self.myException
|
|
1639
|
+ exc.loc = loc
|
|
1640
|
+ exc.pstr = instring
|
|
1641
|
+ raise exc
|
|
1642
|
+
|
|
1643
|
+ loc = result.end()
|
|
1644
|
+ return loc,result.group()
|
|
1645
|
+
|
|
1646
|
+ if not(instring[ loc ] in self.initChars):
|
|
1647
|
+ #~ raise ParseException( instring, loc, self.errmsg )
|
|
1648
|
+ exc = self.myException
|
|
1649
|
+ exc.loc = loc
|
|
1650
|
+ exc.pstr = instring
|
|
1651
|
+ raise exc
|
|
1652
|
+ start = loc
|
|
1653
|
+ loc += 1
|
|
1654
|
+ instrlen = len(instring)
|
|
1655
|
+ bodychars = self.bodyChars
|
|
1656
|
+ maxloc = start + self.maxLen
|
|
1657
|
+ maxloc = min( maxloc, instrlen )
|
|
1658
|
+ while loc < maxloc and instring[loc] in bodychars:
|
|
1659
|
+ loc += 1
|
|
1660
|
+
|
|
1661
|
+ throwException = False
|
|
1662
|
+ if loc - start < self.minLen:
|
|
1663
|
+ throwException = True
|
|
1664
|
+ if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
|
|
1665
|
+ throwException = True
|
|
1666
|
+ if self.asKeyword:
|
|
1667
|
+ if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
|
|
1668
|
+ throwException = True
|
|
1669
|
+
|
|
1670
|
+ if throwException:
|
|
1671
|
+ #~ raise ParseException( instring, loc, self.errmsg )
|
|
1672
|
+ exc = self.myException
|
|
1673
|
+ exc.loc = loc
|
|
1674
|
+ exc.pstr = instring
|
|
1675
|
+ raise exc
|
|
1676
|
+
|
|
1677
|
+ return loc, instring[start:loc]
|
|
1678
|
+
|
|
1679
|
+ def __str__( self ):
|
|
1680
|
+ try:
|
|
1681
|
+ return super(Word,self).__str__()
|
|
1682
|
+ except:
|
|
1683
|
+ pass
|
|
1684
|
+
|
|
1685
|
+
|
|
1686
|
+ if self.strRepr is None:
|
|
1687
|
+
|
|
1688
|
+ def charsAsStr(s):
|
|
1689
|
+ if len(s)>4:
|
|
1690
|
+ return s[:4]+"..."
|
|
1691
|
+ else:
|
|
1692
|
+ return s
|
|
1693
|
+
|
|
1694
|
+ if ( self.initCharsOrig != self.bodyCharsOrig ):
|
|
1695
|
+ self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
|
|
1696
|
+ else:
|
|
1697
|
+ self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
|
|
1698
|
+
|
|
1699
|
+ return self.strRepr
|
|
1700
|
+
|
|
1701
|
+
|
|
1702
|
+class Regex(Token):
|
|
1703
|
+ """Token for matching strings that match a given regular expression.
|
|
1704
|
+ Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
|
|
1705
|
+ """
|
|
1706
|
+ def __init__( self, pattern, flags=0):
|
|
1707
|
+ """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
|
|
1708
|
+ super(Regex,self).__init__()
|
|
1709
|
+
|
|
1710
|
+ if len(pattern) == 0:
|
|
1711
|
+ warnings.warn("null string passed to Regex; use Empty() instead",
|
|
1712
|
+ SyntaxWarning, stacklevel=2)
|
|
1713
|
+
|
|
1714
|
+ self.pattern = pattern
|
|
1715
|
+ self.flags = flags
|
|
1716
|
+
|
|
1717
|
+ try:
|
|
1718
|
+ self.re = re.compile(self.pattern, self.flags)
|
|
1719
|
+ self.reString = self.pattern
|
|
1720
|
+ except sre_constants.error:
|
|
1721
|
+ warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
|
|
1722
|
+ SyntaxWarning, stacklevel=2)
|
|
1723
|
+ raise
|
|
1724
|
+
|
|
1725
|
+ self.name = _ustr(self)
|
|
1726
|
+ self.errmsg = "Expected " + self.name
|
|
1727
|
+ #self.myException.msg = self.errmsg
|
|
1728
|
+ self.mayIndexError = False
|
|
1729
|
+ self.mayReturnEmpty = True
|
|
1730
|
+
|
|
1731
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
1732
|
+ result = self.re.match(instring,loc)
|
|
1733
|
+ if not result:
|
|
1734
|
+ exc = self.myException
|
|
1735
|
+ exc.loc = loc
|
|
1736
|
+ exc.pstr = instring
|
|
1737
|
+ raise exc
|
|
1738
|
+
|
|
1739
|
+ loc = result.end()
|
|
1740
|
+ d = result.groupdict()
|
|
1741
|
+ ret = ParseResults(result.group())
|
|
1742
|
+ if d:
|
|
1743
|
+ for k in d:
|
|
1744
|
+ ret[k] = d[k]
|
|
1745
|
+ return loc,ret
|
|
1746
|
+
|
|
1747
|
+ def __str__( self ):
|
|
1748
|
+ try:
|
|
1749
|
+ return super(Regex,self).__str__()
|
|
1750
|
+ except:
|
|
1751
|
+ pass
|
|
1752
|
+
|
|
1753
|
+ if self.strRepr is None:
|
|
1754
|
+ self.strRepr = "Re:(%s)" % repr(self.pattern)
|
|
1755
|
+
|
|
1756
|
+ return self.strRepr
|
|
1757
|
+
|
|
1758
|
+
|
|
1759
|
+class QuotedString(Token):
|
|
1760
|
+ """Token for matching strings that are delimited by quoting characters.
|
|
1761
|
+ """
|
|
1762
|
+ def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
|
|
1763
|
+ """
|
|
1764
|
+ Defined with the following parameters:
|
|
1765
|
+ - quoteChar - string of one or more characters defining the quote delimiting string
|
|
1766
|
+ - escChar - character to escape quotes, typically backslash (default=None)
|
|
1767
|
+ - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
|
|
1768
|
+ - multiline - boolean indicating whether quotes can span multiple lines (default=False)
|
|
1769
|
+ - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
|
|
1770
|
+ - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
|
|
1771
|
+ """
|
|
1772
|
+ super(QuotedString,self).__init__()
|
|
1773
|
+
|
|
1774
|
+ # remove white space from quote chars - wont work anyway
|
|
1775
|
+ quoteChar = quoteChar.strip()
|
|
1776
|
+ if len(quoteChar) == 0:
|
|
1777
|
+ warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
|
|
1778
|
+ raise SyntaxError()
|
|
1779
|
+
|
|
1780
|
+ if endQuoteChar is None:
|
|
1781
|
+ endQuoteChar = quoteChar
|
|
1782
|
+ else:
|
|
1783
|
+ endQuoteChar = endQuoteChar.strip()
|
|
1784
|
+ if len(endQuoteChar) == 0:
|
|
1785
|
+ warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
|
|
1786
|
+ raise SyntaxError()
|
|
1787
|
+
|
|
1788
|
+ self.quoteChar = quoteChar
|
|
1789
|
+ self.quoteCharLen = len(quoteChar)
|
|
1790
|
+ self.firstQuoteChar = quoteChar[0]
|
|
1791
|
+ self.endQuoteChar = endQuoteChar
|
|
1792
|
+ self.endQuoteCharLen = len(endQuoteChar)
|
|
1793
|
+ self.escChar = escChar
|
|
1794
|
+ self.escQuote = escQuote
|
|
1795
|
+ self.unquoteResults = unquoteResults
|
|
1796
|
+
|
|
1797
|
+ if multiline:
|
|
1798
|
+ self.flags = re.MULTILINE | re.DOTALL
|
|
1799
|
+ self.pattern = r'%s(?:[^%s%s]' % \
|
|
1800
|
+ ( re.escape(self.quoteChar),
|
|
1801
|
+ _escapeRegexRangeChars(self.endQuoteChar[0]),
|
|
1802
|
+ (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
|
|
1803
|
+ else:
|
|
1804
|
+ self.flags = 0
|
|
1805
|
+ self.pattern = r'%s(?:[^%s\n\r%s]' % \
|
|
1806
|
+ ( re.escape(self.quoteChar),
|
|
1807
|
+ _escapeRegexRangeChars(self.endQuoteChar[0]),
|
|
1808
|
+ (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
|
|
1809
|
+ if len(self.endQuoteChar) > 1:
|
|
1810
|
+ self.pattern += (
|
|
1811
|
+ '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
|
|
1812
|
+ _escapeRegexRangeChars(self.endQuoteChar[i]))
|
|
1813
|
+ for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
|
|
1814
|
+ )
|
|
1815
|
+ if escQuote:
|
|
1816
|
+ self.pattern += (r'|(?:%s)' % re.escape(escQuote))
|
|
1817
|
+ if escChar:
|
|
1818
|
+ self.pattern += (r'|(?:%s.)' % re.escape(escChar))
|
|
1819
|
+ self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
|
|
1820
|
+ self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
|
|
1821
|
+
|
|
1822
|
+ try:
|
|
1823
|
+ self.re = re.compile(self.pattern, self.flags)
|
|
1824
|
+ self.reString = self.pattern
|
|
1825
|
+ except sre_constants.error:
|
|
1826
|
+ warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
|
|
1827
|
+ SyntaxWarning, stacklevel=2)
|
|
1828
|
+ raise
|
|
1829
|
+
|
|
1830
|
+ self.name = _ustr(self)
|
|
1831
|
+ self.errmsg = "Expected " + self.name
|
|
1832
|
+ #self.myException.msg = self.errmsg
|
|
1833
|
+ self.mayIndexError = False
|
|
1834
|
+ self.mayReturnEmpty = True
|
|
1835
|
+
|
|
1836
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
1837
|
+ result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
|
|
1838
|
+ if not result:
|
|
1839
|
+ exc = self.myException
|
|
1840
|
+ exc.loc = loc
|
|
1841
|
+ exc.pstr = instring
|
|
1842
|
+ raise exc
|
|
1843
|
+
|
|
1844
|
+ loc = result.end()
|
|
1845
|
+ ret = result.group()
|
|
1846
|
+
|
|
1847
|
+ if self.unquoteResults:
|
|
1848
|
+
|
|
1849
|
+ # strip off quotes
|
|
1850
|
+ ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
|
|
1851
|
+
|
|
1852
|
+ if isinstance(ret,basestring):
|
|
1853
|
+ # replace escaped characters
|
|
1854
|
+ if self.escChar:
|
|
1855
|
+ ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
|
|
1856
|
+
|
|
1857
|
+ # replace escaped quotes
|
|
1858
|
+ if self.escQuote:
|
|
1859
|
+ ret = ret.replace(self.escQuote, self.endQuoteChar)
|
|
1860
|
+
|
|
1861
|
+ return loc, ret
|
|
1862
|
+
|
|
1863
|
+ def __str__( self ):
|
|
1864
|
+ try:
|
|
1865
|
+ return super(QuotedString,self).__str__()
|
|
1866
|
+ except:
|
|
1867
|
+ pass
|
|
1868
|
+
|
|
1869
|
+ if self.strRepr is None:
|
|
1870
|
+ self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
|
|
1871
|
+
|
|
1872
|
+ return self.strRepr
|
|
1873
|
+
|
|
1874
|
+
|
|
1875
|
+class CharsNotIn(Token):
|
|
1876
|
+ """Token for matching words composed of characters *not* in a given set.
|
|
1877
|
+ Defined with string containing all disallowed characters, and an optional
|
|
1878
|
+ minimum, maximum, and/or exact length. The default value for min is 1 (a
|
|
1879
|
+ minimum value < 1 is not valid); the default values for max and exact
|
|
1880
|
+ are 0, meaning no maximum or exact length restriction.
|
|
1881
|
+ """
|
|
1882
|
+ def __init__( self, notChars, min=1, max=0, exact=0 ):
|
|
1883
|
+ super(CharsNotIn,self).__init__()
|
|
1884
|
+ self.skipWhitespace = False
|
|
1885
|
+ self.notChars = notChars
|
|
1886
|
+
|
|
1887
|
+ if min < 1:
|
|
1888
|
+ raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
|
|
1889
|
+
|
|
1890
|
+ self.minLen = min
|
|
1891
|
+
|
|
1892
|
+ if max > 0:
|
|
1893
|
+ self.maxLen = max
|
|
1894
|
+ else:
|
|
1895
|
+ self.maxLen = _MAX_INT
|
|
1896
|
+
|
|
1897
|
+ if exact > 0:
|
|
1898
|
+ self.maxLen = exact
|
|
1899
|
+ self.minLen = exact
|
|
1900
|
+
|
|
1901
|
+ self.name = _ustr(self)
|
|
1902
|
+ self.errmsg = "Expected " + self.name
|
|
1903
|
+ self.mayReturnEmpty = ( self.minLen == 0 )
|
|
1904
|
+ #self.myException.msg = self.errmsg
|
|
1905
|
+ self.mayIndexError = False
|
|
1906
|
+
|
|
1907
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
1908
|
+ if instring[loc] in self.notChars:
|
|
1909
|
+ #~ raise ParseException( instring, loc, self.errmsg )
|
|
1910
|
+ exc = self.myException
|
|
1911
|
+ exc.loc = loc
|
|
1912
|
+ exc.pstr = instring
|
|
1913
|
+ raise exc
|
|
1914
|
+
|
|
1915
|
+ start = loc
|
|
1916
|
+ loc += 1
|
|
1917
|
+ notchars = self.notChars
|
|
1918
|
+ maxlen = min( start+self.maxLen, len(instring) )
|
|
1919
|
+ while loc < maxlen and \
|
|
1920
|
+ (instring[loc] not in notchars):
|
|
1921
|
+ loc += 1
|
|
1922
|
+
|
|
1923
|
+ if loc - start < self.minLen:
|
|
1924
|
+ #~ raise ParseException( instring, loc, self.errmsg )
|
|
1925
|
+ exc = self.myException
|
|
1926
|
+ exc.loc = loc
|
|
1927
|
+ exc.pstr = instring
|
|
1928
|
+ raise exc
|
|
1929
|
+
|
|
1930
|
+ return loc, instring[start:loc]
|
|
1931
|
+
|
|
1932
|
+ def __str__( self ):
|
|
1933
|
+ try:
|
|
1934
|
+ return super(CharsNotIn, self).__str__()
|
|
1935
|
+ except:
|
|
1936
|
+ pass
|
|
1937
|
+
|
|
1938
|
+ if self.strRepr is None:
|
|
1939
|
+ if len(self.notChars) > 4:
|
|
1940
|
+ self.strRepr = "!W:(%s...)" % self.notChars[:4]
|
|
1941
|
+ else:
|
|
1942
|
+ self.strRepr = "!W:(%s)" % self.notChars
|
|
1943
|
+
|
|
1944
|
+ return self.strRepr
|
|
1945
|
+
|
|
1946
|
+class White(Token):
|
|
1947
|
+ """Special matching class for matching whitespace. Normally, whitespace is ignored
|
|
1948
|
+ by pyparsing grammars. This class is included when some whitespace structures
|
|
1949
|
+ are significant. Define with a string containing the whitespace characters to be
|
|
1950
|
+ matched; default is " \\t\\n". Also takes optional min, max, and exact arguments,
|
|
1951
|
+ as defined for the Word class."""
|
|
1952
|
+ whiteStrs = {
|
|
1953
|
+ " " : "<SPC>",
|
|
1954
|
+ "\t": "<TAB>",
|
|
1955
|
+ "\n": "<LF>",
|
|
1956
|
+ "\r": "<CR>",
|
|
1957
|
+ "\f": "<FF>",
|
|
1958
|
+ }
|
|
1959
|
+ def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
|
|
1960
|
+ super(White,self).__init__()
|
|
1961
|
+ self.matchWhite = ws
|
|
1962
|
+ self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
|
|
1963
|
+ #~ self.leaveWhitespace()
|
|
1964
|
+ self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
|
|
1965
|
+ self.mayReturnEmpty = True
|
|
1966
|
+ self.errmsg = "Expected " + self.name
|
|
1967
|
+ #self.myException.msg = self.errmsg
|
|
1968
|
+
|
|
1969
|
+ self.minLen = min
|
|
1970
|
+
|
|
1971
|
+ if max > 0:
|
|
1972
|
+ self.maxLen = max
|
|
1973
|
+ else:
|
|
1974
|
+ self.maxLen = _MAX_INT
|
|
1975
|
+
|
|
1976
|
+ if exact > 0:
|
|
1977
|
+ self.maxLen = exact
|
|
1978
|
+ self.minLen = exact
|
|
1979
|
+
|
|
1980
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
1981
|
+ if not(instring[ loc ] in self.matchWhite):
|
|
1982
|
+ #~ raise ParseException( instring, loc, self.errmsg )
|
|
1983
|
+ exc = self.myException
|
|
1984
|
+ exc.loc = loc
|
|
1985
|
+ exc.pstr = instring
|
|
1986
|
+ raise exc
|
|
1987
|
+ start = loc
|
|
1988
|
+ loc += 1
|
|
1989
|
+ maxloc = start + self.maxLen
|
|
1990
|
+ maxloc = min( maxloc, len(instring) )
|
|
1991
|
+ while loc < maxloc and instring[loc] in self.matchWhite:
|
|
1992
|
+ loc += 1
|
|
1993
|
+
|
|
1994
|
+ if loc - start < self.minLen:
|
|
1995
|
+ #~ raise ParseException( instring, loc, self.errmsg )
|
|
1996
|
+ exc = self.myException
|
|
1997
|
+ exc.loc = loc
|
|
1998
|
+ exc.pstr = instring
|
|
1999
|
+ raise exc
|
|
2000
|
+
|
|
2001
|
+ return loc, instring[start:loc]
|
|
2002
|
+
|
|
2003
|
+
|
|
2004
|
+class _PositionToken(Token):
|
|
2005
|
+ def __init__( self ):
|
|
2006
|
+ super(_PositionToken,self).__init__()
|
|
2007
|
+ self.name=self.__class__.__name__
|
|
2008
|
+ self.mayReturnEmpty = True
|
|
2009
|
+ self.mayIndexError = False
|
|
2010
|
+
|
|
2011
|
+class GoToColumn(_PositionToken):
|
|
2012
|
+ """Token to advance to a specific column of input text; useful for tabular report scraping."""
|
|
2013
|
+ def __init__( self, colno ):
|
|
2014
|
+ super(GoToColumn,self).__init__()
|
|
2015
|
+ self.col = colno
|
|
2016
|
+
|
|
2017
|
+ def preParse( self, instring, loc ):
|
|
2018
|
+ if col(loc,instring) != self.col:
|
|
2019
|
+ instrlen = len(instring)
|
|
2020
|
+ if self.ignoreExprs:
|
|
2021
|
+ loc = self._skipIgnorables( instring, loc )
|
|
2022
|
+ while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
|
|
2023
|
+ loc += 1
|
|
2024
|
+ return loc
|
|
2025
|
+
|
|
2026
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2027
|
+ thiscol = col( loc, instring )
|
|
2028
|
+ if thiscol > self.col:
|
|
2029
|
+ raise ParseException( instring, loc, "Text not in expected column", self )
|
|
2030
|
+ newloc = loc + self.col - thiscol
|
|
2031
|
+ ret = instring[ loc: newloc ]
|
|
2032
|
+ return newloc, ret
|
|
2033
|
+
|
|
2034
|
+class LineStart(_PositionToken):
|
|
2035
|
+ """Matches if current position is at the beginning of a line within the parse string"""
|
|
2036
|
+ def __init__( self ):
|
|
2037
|
+ super(LineStart,self).__init__()
|
|
2038
|
+ self.setWhitespaceChars( " \t" )
|
|
2039
|
+ self.errmsg = "Expected start of line"
|
|
2040
|
+ #self.myException.msg = self.errmsg
|
|
2041
|
+
|
|
2042
|
+ def preParse( self, instring, loc ):
|
|
2043
|
+ preloc = super(LineStart,self).preParse(instring,loc)
|
|
2044
|
+ if instring[preloc] == "\n":
|
|
2045
|
+ loc += 1
|
|
2046
|
+ return loc
|
|
2047
|
+
|
|
2048
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2049
|
+ if not( loc==0 or
|
|
2050
|
+ (loc == self.preParse( instring, 0 )) or
|
|
2051
|
+ (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
|
|
2052
|
+ #~ raise ParseException( instring, loc, "Expected start of line" )
|
|
2053
|
+ exc = self.myException
|
|
2054
|
+ exc.loc = loc
|
|
2055
|
+ exc.pstr = instring
|
|
2056
|
+ raise exc
|
|
2057
|
+ return loc, []
|
|
2058
|
+
|
|
2059
|
+class LineEnd(_PositionToken):
|
|
2060
|
+ """Matches if current position is at the end of a line within the parse string"""
|
|
2061
|
+ def __init__( self ):
|
|
2062
|
+ super(LineEnd,self).__init__()
|
|
2063
|
+ self.setWhitespaceChars( " \t" )
|
|
2064
|
+ self.errmsg = "Expected end of line"
|
|
2065
|
+ #self.myException.msg = self.errmsg
|
|
2066
|
+
|
|
2067
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2068
|
+ if loc<len(instring):
|
|
2069
|
+ if instring[loc] == "\n":
|
|
2070
|
+ return loc+1, "\n"
|
|
2071
|
+ else:
|
|
2072
|
+ #~ raise ParseException( instring, loc, "Expected end of line" )
|
|
2073
|
+ exc = self.myException
|
|
2074
|
+ exc.loc = loc
|
|
2075
|
+ exc.pstr = instring
|
|
2076
|
+ raise exc
|
|
2077
|
+ elif loc == len(instring):
|
|
2078
|
+ return loc+1, []
|
|
2079
|
+ else:
|
|
2080
|
+ exc = self.myException
|
|
2081
|
+ exc.loc = loc
|
|
2082
|
+ exc.pstr = instring
|
|
2083
|
+ raise exc
|
|
2084
|
+
|
|
2085
|
+class StringStart(_PositionToken):
|
|
2086
|
+ """Matches if current position is at the beginning of the parse string"""
|
|
2087
|
+ def __init__( self ):
|
|
2088
|
+ super(StringStart,self).__init__()
|
|
2089
|
+ self.errmsg = "Expected start of text"
|
|
2090
|
+ #self.myException.msg = self.errmsg
|
|
2091
|
+
|
|
2092
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2093
|
+ if loc != 0:
|
|
2094
|
+ # see if entire string up to here is just whitespace and ignoreables
|
|
2095
|
+ if loc != self.preParse( instring, 0 ):
|
|
2096
|
+ #~ raise ParseException( instring, loc, "Expected start of text" )
|
|
2097
|
+ exc = self.myException
|
|
2098
|
+ exc.loc = loc
|
|
2099
|
+ exc.pstr = instring
|
|
2100
|
+ raise exc
|
|
2101
|
+ return loc, []
|
|
2102
|
+
|
|
2103
|
+class StringEnd(_PositionToken):
|
|
2104
|
+ """Matches if current position is at the end of the parse string"""
|
|
2105
|
+ def __init__( self ):
|
|
2106
|
+ super(StringEnd,self).__init__()
|
|
2107
|
+ self.errmsg = "Expected end of text"
|
|
2108
|
+ #self.myException.msg = self.errmsg
|
|
2109
|
+
|
|
2110
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2111
|
+ if loc < len(instring):
|
|
2112
|
+ #~ raise ParseException( instring, loc, "Expected end of text" )
|
|
2113
|
+ exc = self.myException
|
|
2114
|
+ exc.loc = loc
|
|
2115
|
+ exc.pstr = instring
|
|
2116
|
+ raise exc
|
|
2117
|
+ elif loc == len(instring):
|
|
2118
|
+ return loc+1, []
|
|
2119
|
+ elif loc > len(instring):
|
|
2120
|
+ return loc, []
|
|
2121
|
+ else:
|
|
2122
|
+ exc = self.myException
|
|
2123
|
+ exc.loc = loc
|
|
2124
|
+ exc.pstr = instring
|
|
2125
|
+ raise exc
|
|
2126
|
+
|
|
2127
|
+class WordStart(_PositionToken):
|
|
2128
|
+ """Matches if the current position is at the beginning of a Word, and
|
|
2129
|
+ is not preceded by any character in a given set of wordChars
|
|
2130
|
+ (default=printables). To emulate the \b behavior of regular expressions,
|
|
2131
|
+ use WordStart(alphanums). WordStart will also match at the beginning of
|
|
2132
|
+ the string being parsed, or at the beginning of a line.
|
|
2133
|
+ """
|
|
2134
|
+ def __init__(self, wordChars = printables):
|
|
2135
|
+ super(WordStart,self).__init__()
|
|
2136
|
+ self.wordChars = _str2dict(wordChars)
|
|
2137
|
+ self.errmsg = "Not at the start of a word"
|
|
2138
|
+
|
|
2139
|
+ def parseImpl(self, instring, loc, doActions=True ):
|
|
2140
|
+ if loc != 0:
|
|
2141
|
+ if (instring[loc-1] in self.wordChars or
|
|
2142
|
+ instring[loc] not in self.wordChars):
|
|
2143
|
+ exc = self.myException
|
|
2144
|
+ exc.loc = loc
|
|
2145
|
+ exc.pstr = instring
|
|
2146
|
+ raise exc
|
|
2147
|
+ return loc, []
|
|
2148
|
+
|
|
2149
|
+class WordEnd(_PositionToken):
|
|
2150
|
+ """Matches if the current position is at the end of a Word, and
|
|
2151
|
+ is not followed by any character in a given set of wordChars
|
|
2152
|
+ (default=printables). To emulate the \b behavior of regular expressions,
|
|
2153
|
+ use WordEnd(alphanums). WordEnd will also match at the end of
|
|
2154
|
+ the string being parsed, or at the end of a line.
|
|
2155
|
+ """
|
|
2156
|
+ def __init__(self, wordChars = printables):
|
|
2157
|
+ super(WordEnd,self).__init__()
|
|
2158
|
+ self.wordChars = _str2dict(wordChars)
|
|
2159
|
+ self.skipWhitespace = False
|
|
2160
|
+ self.errmsg = "Not at the end of a word"
|
|
2161
|
+
|
|
2162
|
+ def parseImpl(self, instring, loc, doActions=True ):
|
|
2163
|
+ instrlen = len(instring)
|
|
2164
|
+ if instrlen>0 and loc<instrlen:
|
|
2165
|
+ if (instring[loc] in self.wordChars or
|
|
2166
|
+ instring[loc-1] not in self.wordChars):
|
|
2167
|
+ #~ raise ParseException( instring, loc, "Expected end of word" )
|
|
2168
|
+ exc = self.myException
|
|
2169
|
+ exc.loc = loc
|
|
2170
|
+ exc.pstr = instring
|
|
2171
|
+ raise exc
|
|
2172
|
+ return loc, []
|
|
2173
|
+
|
|
2174
|
+
|
|
2175
|
+class ParseExpression(ParserElement):
|
|
2176
|
+ """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
|
|
2177
|
+ def __init__( self, exprs, savelist = False ):
|
|
2178
|
+ super(ParseExpression,self).__init__(savelist)
|
|
2179
|
+ if isinstance( exprs, list ):
|
|
2180
|
+ self.exprs = exprs
|
|
2181
|
+ elif isinstance( exprs, basestring ):
|
|
2182
|
+ self.exprs = [ Literal( exprs ) ]
|
|
2183
|
+ else:
|
|
2184
|
+ self.exprs = [ exprs ]
|
|
2185
|
+ self.callPreparse = False
|
|
2186
|
+
|
|
2187
|
+ def __getitem__( self, i ):
|
|
2188
|
+ return self.exprs[i]
|
|
2189
|
+
|
|
2190
|
+ def append( self, other ):
|
|
2191
|
+ self.exprs.append( other )
|
|
2192
|
+ self.strRepr = None
|
|
2193
|
+ return self
|
|
2194
|
+
|
|
2195
|
+ def leaveWhitespace( self ):
|
|
2196
|
+ """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
|
|
2197
|
+ all contained expressions."""
|
|
2198
|
+ self.skipWhitespace = False
|
|
2199
|
+ self.exprs = [ e.copy() for e in self.exprs ]
|
|
2200
|
+ for e in self.exprs:
|
|
2201
|
+ e.leaveWhitespace()
|
|
2202
|
+ return self
|
|
2203
|
+
|
|
2204
|
+ def ignore( self, other ):
|
|
2205
|
+ if isinstance( other, Suppress ):
|
|
2206
|
+ if other not in self.ignoreExprs:
|
|
2207
|
+ super( ParseExpression, self).ignore( other )
|
|
2208
|
+ for e in self.exprs:
|
|
2209
|
+ e.ignore( self.ignoreExprs[-1] )
|
|
2210
|
+ else:
|
|
2211
|
+ super( ParseExpression, self).ignore( other )
|
|
2212
|
+ for e in self.exprs:
|
|
2213
|
+ e.ignore( self.ignoreExprs[-1] )
|
|
2214
|
+ return self
|
|
2215
|
+
|
|
2216
|
+ def __str__( self ):
|
|
2217
|
+ try:
|
|
2218
|
+ return super(ParseExpression,self).__str__()
|
|
2219
|
+ except:
|
|
2220
|
+ pass
|
|
2221
|
+
|
|
2222
|
+ if self.strRepr is None:
|
|
2223
|
+ self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
|
|
2224
|
+ return self.strRepr
|
|
2225
|
+
|
|
2226
|
+ def streamline( self ):
|
|
2227
|
+ super(ParseExpression,self).streamline()
|
|
2228
|
+
|
|
2229
|
+ for e in self.exprs:
|
|
2230
|
+ e.streamline()
|
|
2231
|
+
|
|
2232
|
+ # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
|
|
2233
|
+ # but only if there are no parse actions or resultsNames on the nested And's
|
|
2234
|
+ # (likewise for Or's and MatchFirst's)
|
|
2235
|
+ if ( len(self.exprs) == 2 ):
|
|
2236
|
+ other = self.exprs[0]
|
|
2237
|
+ if ( isinstance( other, self.__class__ ) and
|
|
2238
|
+ not(other.parseAction) and
|
|
2239
|
+ other.resultsName is None and
|
|
2240
|
+ not other.debug ):
|
|
2241
|
+ self.exprs = other.exprs[:] + [ self.exprs[1] ]
|
|
2242
|
+ self.strRepr = None
|
|
2243
|
+ self.mayReturnEmpty |= other.mayReturnEmpty
|
|
2244
|
+ self.mayIndexError |= other.mayIndexError
|
|
2245
|
+
|
|
2246
|
+ other = self.exprs[-1]
|
|
2247
|
+ if ( isinstance( other, self.__class__ ) and
|
|
2248
|
+ not(other.parseAction) and
|
|
2249
|
+ other.resultsName is None and
|
|
2250
|
+ not other.debug ):
|
|
2251
|
+ self.exprs = self.exprs[:-1] + other.exprs[:]
|
|
2252
|
+ self.strRepr = None
|
|
2253
|
+ self.mayReturnEmpty |= other.mayReturnEmpty
|
|
2254
|
+ self.mayIndexError |= other.mayIndexError
|
|
2255
|
+
|
|
2256
|
+ return self
|
|
2257
|
+
|
|
2258
|
+ def setResultsName( self, name, listAllMatches=False ):
|
|
2259
|
+ ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
|
|
2260
|
+ return ret
|
|
2261
|
+
|
|
2262
|
+ def validate( self, validateTrace=[] ):
|
|
2263
|
+ tmp = validateTrace[:]+[self]
|
|
2264
|
+ for e in self.exprs:
|
|
2265
|
+ e.validate(tmp)
|
|
2266
|
+ self.checkRecursion( [] )
|
|
2267
|
+
|
|
2268
|
+class And(ParseExpression):
|
|
2269
|
+ """Requires all given ParseExpressions to be found in the given order.
|
|
2270
|
+ Expressions may be separated by whitespace.
|
|
2271
|
+ May be constructed using the '+' operator.
|
|
2272
|
+ """
|
|
2273
|
+
|
|
2274
|
+ class _ErrorStop(Empty):
|
|
2275
|
+ def __new__(cls,*args,**kwargs):
|
|
2276
|
+ return And._ErrorStop.instance
|
|
2277
|
+ _ErrorStop.instance = Empty()
|
|
2278
|
+ _ErrorStop.instance.leaveWhitespace()
|
|
2279
|
+
|
|
2280
|
+ def __init__( self, exprs, savelist = True ):
|
|
2281
|
+ super(And,self).__init__(exprs, savelist)
|
|
2282
|
+ self.mayReturnEmpty = True
|
|
2283
|
+ for e in self.exprs:
|
|
2284
|
+ if not e.mayReturnEmpty:
|
|
2285
|
+ self.mayReturnEmpty = False
|
|
2286
|
+ break
|
|
2287
|
+ self.setWhitespaceChars( exprs[0].whiteChars )
|
|
2288
|
+ self.skipWhitespace = exprs[0].skipWhitespace
|
|
2289
|
+ self.callPreparse = True
|
|
2290
|
+
|
|
2291
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2292
|
+ # pass False as last arg to _parse for first element, since we already
|
|
2293
|
+ # pre-parsed the string as part of our And pre-parsing
|
|
2294
|
+ loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
|
|
2295
|
+ errorStop = False
|
|
2296
|
+ for e in self.exprs[1:]:
|
|
2297
|
+ if e is And._ErrorStop.instance:
|
|
2298
|
+ errorStop = True
|
|
2299
|
+ continue
|
|
2300
|
+ if errorStop:
|
|
2301
|
+ try:
|
|
2302
|
+ loc, exprtokens = e._parse( instring, loc, doActions )
|
|
2303
|
+ except ParseBaseException, pe:
|
|
2304
|
+ raise ParseSyntaxException(pe)
|
|
2305
|
+ except IndexError, ie:
|
|
2306
|
+ raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
|
|
2307
|
+ else:
|
|
2308
|
+ loc, exprtokens = e._parse( instring, loc, doActions )
|
|
2309
|
+ if exprtokens or exprtokens.keys():
|
|
2310
|
+ resultlist += exprtokens
|
|
2311
|
+ return loc, resultlist
|
|
2312
|
+
|
|
2313
|
+ def __iadd__(self, other ):
|
|
2314
|
+ if isinstance( other, basestring ):
|
|
2315
|
+ other = Literal( other )
|
|
2316
|
+ return self.append( other ) #And( [ self, other ] )
|
|
2317
|
+
|
|
2318
|
+ def checkRecursion( self, parseElementList ):
|
|
2319
|
+ subRecCheckList = parseElementList[:] + [ self ]
|
|
2320
|
+ for e in self.exprs:
|
|
2321
|
+ e.checkRecursion( subRecCheckList )
|
|
2322
|
+ if not e.mayReturnEmpty:
|
|
2323
|
+ break
|
|
2324
|
+
|
|
2325
|
+ def __str__( self ):
|
|
2326
|
+ if hasattr(self,"name"):
|
|
2327
|
+ return self.name
|
|
2328
|
+
|
|
2329
|
+ if self.strRepr is None:
|
|
2330
|
+ self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
|
|
2331
|
+
|
|
2332
|
+ return self.strRepr
|
|
2333
|
+
|
|
2334
|
+
|
|
2335
|
+class Or(ParseExpression):
|
|
2336
|
+ """Requires that at least one ParseExpression is found.
|
|
2337
|
+ If two expressions match, the expression that matches the longest string will be used.
|
|
2338
|
+ May be constructed using the '^' operator.
|
|
2339
|
+ """
|
|
2340
|
+ def __init__( self, exprs, savelist = False ):
|
|
2341
|
+ super(Or,self).__init__(exprs, savelist)
|
|
2342
|
+ self.mayReturnEmpty = False
|
|
2343
|
+ for e in self.exprs:
|
|
2344
|
+ if e.mayReturnEmpty:
|
|
2345
|
+ self.mayReturnEmpty = True
|
|
2346
|
+ break
|
|
2347
|
+
|
|
2348
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2349
|
+ maxExcLoc = -1
|
|
2350
|
+ maxMatchLoc = -1
|
|
2351
|
+ maxException = None
|
|
2352
|
+ for e in self.exprs:
|
|
2353
|
+ try:
|
|
2354
|
+ loc2 = e.tryParse( instring, loc )
|
|
2355
|
+ except ParseException, err:
|
|
2356
|
+ if err.loc > maxExcLoc:
|
|
2357
|
+ maxException = err
|
|
2358
|
+ maxExcLoc = err.loc
|
|
2359
|
+ except IndexError:
|
|
2360
|
+ if len(instring) > maxExcLoc:
|
|
2361
|
+ maxException = ParseException(instring,len(instring),e.errmsg,self)
|
|
2362
|
+ maxExcLoc = len(instring)
|
|
2363
|
+ else:
|
|
2364
|
+ if loc2 > maxMatchLoc:
|
|
2365
|
+ maxMatchLoc = loc2
|
|
2366
|
+ maxMatchExp = e
|
|
2367
|
+
|
|
2368
|
+ if maxMatchLoc < 0:
|
|
2369
|
+ if maxException is not None:
|
|
2370
|
+ raise maxException
|
|
2371
|
+ else:
|
|
2372
|
+ raise ParseException(instring, loc, "no defined alternatives to match", self)
|
|
2373
|
+
|
|
2374
|
+ return maxMatchExp._parse( instring, loc, doActions )
|
|
2375
|
+
|
|
2376
|
+ def __ixor__(self, other ):
|
|
2377
|
+ if isinstance( other, basestring ):
|
|
2378
|
+ other = Literal( other )
|
|
2379
|
+ return self.append( other ) #Or( [ self, other ] )
|
|
2380
|
+
|
|
2381
|
+ def __str__( self ):
|
|
2382
|
+ if hasattr(self,"name"):
|
|
2383
|
+ return self.name
|
|
2384
|
+
|
|
2385
|
+ if self.strRepr is None:
|
|
2386
|
+ self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
|
|
2387
|
+
|
|
2388
|
+ return self.strRepr
|
|
2389
|
+
|
|
2390
|
+ def checkRecursion( self, parseElementList ):
|
|
2391
|
+ subRecCheckList = parseElementList[:] + [ self ]
|
|
2392
|
+ for e in self.exprs:
|
|
2393
|
+ e.checkRecursion( subRecCheckList )
|
|
2394
|
+
|
|
2395
|
+
|
|
2396
|
+class MatchFirst(ParseExpression):
|
|
2397
|
+ """Requires that at least one ParseExpression is found.
|
|
2398
|
+ If two expressions match, the first one listed is the one that will match.
|
|
2399
|
+ May be constructed using the '|' operator.
|
|
2400
|
+ """
|
|
2401
|
+ def __init__( self, exprs, savelist = False ):
|
|
2402
|
+ super(MatchFirst,self).__init__(exprs, savelist)
|
|
2403
|
+ if exprs:
|
|
2404
|
+ self.mayReturnEmpty = False
|
|
2405
|
+ for e in self.exprs:
|
|
2406
|
+ if e.mayReturnEmpty:
|
|
2407
|
+ self.mayReturnEmpty = True
|
|
2408
|
+ break
|
|
2409
|
+ else:
|
|
2410
|
+ self.mayReturnEmpty = True
|
|
2411
|
+
|
|
2412
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2413
|
+ maxExcLoc = -1
|
|
2414
|
+ maxException = None
|
|
2415
|
+ for e in self.exprs:
|
|
2416
|
+ try:
|
|
2417
|
+ ret = e._parse( instring, loc, doActions )
|
|
2418
|
+ return ret
|
|
2419
|
+ except ParseException, err:
|
|
2420
|
+ if err.loc > maxExcLoc:
|
|
2421
|
+ maxException = err
|
|
2422
|
+ maxExcLoc = err.loc
|
|
2423
|
+ except IndexError:
|
|
2424
|
+ if len(instring) > maxExcLoc:
|
|
2425
|
+ maxException = ParseException(instring,len(instring),e.errmsg,self)
|
|
2426
|
+ maxExcLoc = len(instring)
|
|
2427
|
+
|
|
2428
|
+ # only got here if no expression matched, raise exception for match that made it the furthest
|
|
2429
|
+ else:
|
|
2430
|
+ if maxException is not None:
|
|
2431
|
+ raise maxException
|
|
2432
|
+ else:
|
|
2433
|
+ raise ParseException(instring, loc, "no defined alternatives to match", self)
|
|
2434
|
+
|
|
2435
|
+ def __ior__(self, other ):
|
|
2436
|
+ if isinstance( other, basestring ):
|
|
2437
|
+ other = Literal( other )
|
|
2438
|
+ return self.append( other ) #MatchFirst( [ self, other ] )
|
|
2439
|
+
|
|
2440
|
+ def __str__( self ):
|
|
2441
|
+ if hasattr(self,"name"):
|
|
2442
|
+ return self.name
|
|
2443
|
+
|
|
2444
|
+ if self.strRepr is None:
|
|
2445
|
+ self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
|
|
2446
|
+
|
|
2447
|
+ return self.strRepr
|
|
2448
|
+
|
|
2449
|
+ def checkRecursion( self, parseElementList ):
|
|
2450
|
+ subRecCheckList = parseElementList[:] + [ self ]
|
|
2451
|
+ for e in self.exprs:
|
|
2452
|
+ e.checkRecursion( subRecCheckList )
|
|
2453
|
+
|
|
2454
|
+
|
|
2455
|
+class Each(ParseExpression):
|
|
2456
|
+ """Requires all given ParseExpressions to be found, but in any order.
|
|
2457
|
+ Expressions may be separated by whitespace.
|
|
2458
|
+ May be constructed using the '&' operator.
|
|
2459
|
+ """
|
|
2460
|
+ def __init__( self, exprs, savelist = True ):
|
|
2461
|
+ super(Each,self).__init__(exprs, savelist)
|
|
2462
|
+ self.mayReturnEmpty = True
|
|
2463
|
+ for e in self.exprs:
|
|
2464
|
+ if not e.mayReturnEmpty:
|
|
2465
|
+ self.mayReturnEmpty = False
|
|
2466
|
+ break
|
|
2467
|
+ self.skipWhitespace = True
|
|
2468
|
+ self.initExprGroups = True
|
|
2469
|
+
|
|
2470
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2471
|
+ if self.initExprGroups:
|
|
2472
|
+ self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
|
|
2473
|
+ self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
|
|
2474
|
+ self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
|
|
2475
|
+ self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
|
|
2476
|
+ self.required += self.multirequired
|
|
2477
|
+ self.initExprGroups = False
|
|
2478
|
+ tmpLoc = loc
|
|
2479
|
+ tmpReqd = self.required[:]
|
|
2480
|
+ tmpOpt = self.optionals[:]
|
|
2481
|
+ matchOrder = []
|
|
2482
|
+
|
|
2483
|
+ keepMatching = True
|
|
2484
|
+ while keepMatching:
|
|
2485
|
+ tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
|
|
2486
|
+ failed = []
|
|
2487
|
+ for e in tmpExprs:
|
|
2488
|
+ try:
|
|
2489
|
+ tmpLoc = e.tryParse( instring, tmpLoc )
|
|
2490
|
+ except ParseException:
|
|
2491
|
+ failed.append(e)
|
|
2492
|
+ else:
|
|
2493
|
+ matchOrder.append(e)
|
|
2494
|
+ if e in tmpReqd:
|
|
2495
|
+ tmpReqd.remove(e)
|
|
2496
|
+ elif e in tmpOpt:
|
|
2497
|
+ tmpOpt.remove(e)
|
|
2498
|
+ if len(failed) == len(tmpExprs):
|
|
2499
|
+ keepMatching = False
|
|
2500
|
+
|
|
2501
|
+ if tmpReqd:
|
|
2502
|
+ missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
|
|
2503
|
+ raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
|
|
2504
|
+
|
|
2505
|
+ # add any unmatched Optionals, in case they have default values defined
|
|
2506
|
+ matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
|
|
2507
|
+
|
|
2508
|
+ resultlist = []
|
|
2509
|
+ for e in matchOrder:
|
|
2510
|
+ loc,results = e._parse(instring,loc,doActions)
|
|
2511
|
+ resultlist.append(results)
|
|
2512
|
+
|
|
2513
|
+ finalResults = ParseResults([])
|
|
2514
|
+ for r in resultlist:
|
|
2515
|
+ dups = {}
|
|
2516
|
+ for k in r.keys():
|
|
2517
|
+ if k in finalResults.keys():
|
|
2518
|
+ tmp = ParseResults(finalResults[k])
|
|
2519
|
+ tmp += ParseResults(r[k])
|
|
2520
|
+ dups[k] = tmp
|
|
2521
|
+ finalResults += ParseResults(r)
|
|
2522
|
+ for k,v in dups.items():
|
|
2523
|
+ finalResults[k] = v
|
|
2524
|
+ return loc, finalResults
|
|
2525
|
+
|
|
2526
|
+ def __str__( self ):
|
|
2527
|
+ if hasattr(self,"name"):
|
|
2528
|
+ return self.name
|
|
2529
|
+
|
|
2530
|
+ if self.strRepr is None:
|
|
2531
|
+ self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
|
|
2532
|
+
|
|
2533
|
+ return self.strRepr
|
|
2534
|
+
|
|
2535
|
+ def checkRecursion( self, parseElementList ):
|
|
2536
|
+ subRecCheckList = parseElementList[:] + [ self ]
|
|
2537
|
+ for e in self.exprs:
|
|
2538
|
+ e.checkRecursion( subRecCheckList )
|
|
2539
|
+
|
|
2540
|
+
|
|
2541
|
+class ParseElementEnhance(ParserElement):
|
|
2542
|
+ """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
|
|
2543
|
+ def __init__( self, expr, savelist=False ):
|
|
2544
|
+ super(ParseElementEnhance,self).__init__(savelist)
|
|
2545
|
+ if isinstance( expr, basestring ):
|
|
2546
|
+ expr = Literal(expr)
|
|
2547
|
+ self.expr = expr
|
|
2548
|
+ self.strRepr = None
|
|
2549
|
+ if expr is not None:
|
|
2550
|
+ self.mayIndexError = expr.mayIndexError
|
|
2551
|
+ self.mayReturnEmpty = expr.mayReturnEmpty
|
|
2552
|
+ self.setWhitespaceChars( expr.whiteChars )
|
|
2553
|
+ self.skipWhitespace = expr.skipWhitespace
|
|
2554
|
+ self.saveAsList = expr.saveAsList
|
|
2555
|
+ self.callPreparse = expr.callPreparse
|
|
2556
|
+ self.ignoreExprs.extend(expr.ignoreExprs)
|
|
2557
|
+
|
|
2558
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2559
|
+ if self.expr is not None:
|
|
2560
|
+ return self.expr._parse( instring, loc, doActions, callPreParse=False )
|
|
2561
|
+ else:
|
|
2562
|
+ raise ParseException("",loc,self.errmsg,self)
|
|
2563
|
+
|
|
2564
|
+ def leaveWhitespace( self ):
|
|
2565
|
+ self.skipWhitespace = False
|
|
2566
|
+ self.expr = self.expr.copy()
|
|
2567
|
+ if self.expr is not None:
|
|
2568
|
+ self.expr.leaveWhitespace()
|
|
2569
|
+ return self
|
|
2570
|
+
|
|
2571
|
+ def ignore( self, other ):
|
|
2572
|
+ if isinstance( other, Suppress ):
|
|
2573
|
+ if other not in self.ignoreExprs:
|
|
2574
|
+ super( ParseElementEnhance, self).ignore( other )
|
|
2575
|
+ if self.expr is not None:
|
|
2576
|
+ self.expr.ignore( self.ignoreExprs[-1] )
|
|
2577
|
+ else:
|
|
2578
|
+ super( ParseElementEnhance, self).ignore( other )
|
|
2579
|
+ if self.expr is not None:
|
|
2580
|
+ self.expr.ignore( self.ignoreExprs[-1] )
|
|
2581
|
+ return self
|
|
2582
|
+
|
|
2583
|
+ def streamline( self ):
|
|
2584
|
+ super(ParseElementEnhance,self).streamline()
|
|
2585
|
+ if self.expr is not None:
|
|
2586
|
+ self.expr.streamline()
|
|
2587
|
+ return self
|
|
2588
|
+
|
|
2589
|
+ def checkRecursion( self, parseElementList ):
|
|
2590
|
+ if self in parseElementList:
|
|
2591
|
+ raise RecursiveGrammarException( parseElementList+[self] )
|
|
2592
|
+ subRecCheckList = parseElementList[:] + [ self ]
|
|
2593
|
+ if self.expr is not None:
|
|
2594
|
+ self.expr.checkRecursion( subRecCheckList )
|
|
2595
|
+
|
|
2596
|
+ def validate( self, validateTrace=[] ):
|
|
2597
|
+ tmp = validateTrace[:]+[self]
|
|
2598
|
+ if self.expr is not None:
|
|
2599
|
+ self.expr.validate(tmp)
|
|
2600
|
+ self.checkRecursion( [] )
|
|
2601
|
+
|
|
2602
|
+ def __str__( self ):
|
|
2603
|
+ try:
|
|
2604
|
+ return super(ParseElementEnhance,self).__str__()
|
|
2605
|
+ except:
|
|
2606
|
+ pass
|
|
2607
|
+
|
|
2608
|
+ if self.strRepr is None and self.expr is not None:
|
|
2609
|
+ self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
|
|
2610
|
+ return self.strRepr
|
|
2611
|
+
|
|
2612
|
+
|
|
2613
|
+class FollowedBy(ParseElementEnhance):
|
|
2614
|
+ """Lookahead matching of the given parse expression. FollowedBy
|
|
2615
|
+ does *not* advance the parsing position within the input string, it only
|
|
2616
|
+ verifies that the specified parse expression matches at the current
|
|
2617
|
+ position. FollowedBy always returns a null token list."""
|
|
2618
|
+ def __init__( self, expr ):
|
|
2619
|
+ super(FollowedBy,self).__init__(expr)
|
|
2620
|
+ self.mayReturnEmpty = True
|
|
2621
|
+
|
|
2622
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2623
|
+ self.expr.tryParse( instring, loc )
|
|
2624
|
+ return loc, []
|
|
2625
|
+
|
|
2626
|
+
|
|
2627
|
+class NotAny(ParseElementEnhance):
|
|
2628
|
+ """Lookahead to disallow matching with the given parse expression. NotAny
|
|
2629
|
+ does *not* advance the parsing position within the input string, it only
|
|
2630
|
+ verifies that the specified parse expression does *not* match at the current
|
|
2631
|
+ position. Also, NotAny does *not* skip over leading whitespace. NotAny
|
|
2632
|
+ always returns a null token list. May be constructed using the '~' operator."""
|
|
2633
|
+ def __init__( self, expr ):
|
|
2634
|
+ super(NotAny,self).__init__(expr)
|
|
2635
|
+ #~ self.leaveWhitespace()
|
|
2636
|
+ self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
|
|
2637
|
+ self.mayReturnEmpty = True
|
|
2638
|
+ self.errmsg = "Found unwanted token, "+_ustr(self.expr)
|
|
2639
|
+ #self.myException = ParseException("",0,self.errmsg,self)
|
|
2640
|
+
|
|
2641
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2642
|
+ try:
|
|
2643
|
+ self.expr.tryParse( instring, loc )
|
|
2644
|
+ except (ParseException,IndexError):
|
|
2645
|
+ pass
|
|
2646
|
+ else:
|
|
2647
|
+ #~ raise ParseException(instring, loc, self.errmsg )
|
|
2648
|
+ exc = self.myException
|
|
2649
|
+ exc.loc = loc
|
|
2650
|
+ exc.pstr = instring
|
|
2651
|
+ raise exc
|
|
2652
|
+ return loc, []
|
|
2653
|
+
|
|
2654
|
+ def __str__( self ):
|
|
2655
|
+ if hasattr(self,"name"):
|
|
2656
|
+ return self.name
|
|
2657
|
+
|
|
2658
|
+ if self.strRepr is None:
|
|
2659
|
+ self.strRepr = "~{" + _ustr(self.expr) + "}"
|
|
2660
|
+
|
|
2661
|
+ return self.strRepr
|
|
2662
|
+
|
|
2663
|
+
|
|
2664
|
+class ZeroOrMore(ParseElementEnhance):
|
|
2665
|
+ """Optional repetition of zero or more of the given expression."""
|
|
2666
|
+ def __init__( self, expr ):
|
|
2667
|
+ super(ZeroOrMore,self).__init__(expr)
|
|
2668
|
+ self.mayReturnEmpty = True
|
|
2669
|
+
|
|
2670
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2671
|
+ tokens = []
|
|
2672
|
+ try:
|
|
2673
|
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
|
|
2674
|
+ hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
|
|
2675
|
+ while 1:
|
|
2676
|
+ if hasIgnoreExprs:
|
|
2677
|
+ preloc = self._skipIgnorables( instring, loc )
|
|
2678
|
+ else:
|
|
2679
|
+ preloc = loc
|
|
2680
|
+ loc, tmptokens = self.expr._parse( instring, preloc, doActions )
|
|
2681
|
+ if tmptokens or tmptokens.keys():
|
|
2682
|
+ tokens += tmptokens
|
|
2683
|
+ except (ParseException,IndexError):
|
|
2684
|
+ pass
|
|
2685
|
+
|
|
2686
|
+ return loc, tokens
|
|
2687
|
+
|
|
2688
|
+ def __str__( self ):
|
|
2689
|
+ if hasattr(self,"name"):
|
|
2690
|
+ return self.name
|
|
2691
|
+
|
|
2692
|
+ if self.strRepr is None:
|
|
2693
|
+ self.strRepr = "[" + _ustr(self.expr) + "]..."
|
|
2694
|
+
|
|
2695
|
+ return self.strRepr
|
|
2696
|
+
|
|
2697
|
+ def setResultsName( self, name, listAllMatches=False ):
|
|
2698
|
+ ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
|
|
2699
|
+ ret.saveAsList = True
|
|
2700
|
+ return ret
|
|
2701
|
+
|
|
2702
|
+
|
|
2703
|
+class OneOrMore(ParseElementEnhance):
|
|
2704
|
+ """Repetition of one or more of the given expression."""
|
|
2705
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2706
|
+ # must be at least one
|
|
2707
|
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
|
|
2708
|
+ try:
|
|
2709
|
+ hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
|
|
2710
|
+ while 1:
|
|
2711
|
+ if hasIgnoreExprs:
|
|
2712
|
+ preloc = self._skipIgnorables( instring, loc )
|
|
2713
|
+ else:
|
|
2714
|
+ preloc = loc
|
|
2715
|
+ loc, tmptokens = self.expr._parse( instring, preloc, doActions )
|
|
2716
|
+ if tmptokens or tmptokens.keys():
|
|
2717
|
+ tokens += tmptokens
|
|
2718
|
+ except (ParseException,IndexError):
|
|
2719
|
+ pass
|
|
2720
|
+
|
|
2721
|
+ return loc, tokens
|
|
2722
|
+
|
|
2723
|
+ def __str__( self ):
|
|
2724
|
+ if hasattr(self,"name"):
|
|
2725
|
+ return self.name
|
|
2726
|
+
|
|
2727
|
+ if self.strRepr is None:
|
|
2728
|
+ self.strRepr = "{" + _ustr(self.expr) + "}..."
|
|
2729
|
+
|
|
2730
|
+ return self.strRepr
|
|
2731
|
+
|
|
2732
|
+ def setResultsName( self, name, listAllMatches=False ):
|
|
2733
|
+ ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
|
|
2734
|
+ ret.saveAsList = True
|
|
2735
|
+ return ret
|
|
2736
|
+
|
|
2737
|
+class _NullToken(object):
|
|
2738
|
+ def __bool__(self):
|
|
2739
|
+ return False
|
|
2740
|
+ __nonzero__ = __bool__
|
|
2741
|
+ def __str__(self):
|
|
2742
|
+ return ""
|
|
2743
|
+
|
|
2744
|
+_optionalNotMatched = _NullToken()
|
|
2745
|
+class Optional(ParseElementEnhance):
|
|
2746
|
+ """Optional matching of the given expression.
|
|
2747
|
+ A default return string can also be specified, if the optional expression
|
|
2748
|
+ is not found.
|
|
2749
|
+ """
|
|
2750
|
+ def __init__( self, exprs, default=_optionalNotMatched ):
|
|
2751
|
+ super(Optional,self).__init__( exprs, savelist=False )
|
|
2752
|
+ self.defaultValue = default
|
|
2753
|
+ self.mayReturnEmpty = True
|
|
2754
|
+
|
|
2755
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2756
|
+ try:
|
|
2757
|
+ loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
|
|
2758
|
+ except (ParseException,IndexError):
|
|
2759
|
+ if self.defaultValue is not _optionalNotMatched:
|
|
2760
|
+ if self.expr.resultsName:
|
|
2761
|
+ tokens = ParseResults([ self.defaultValue ])
|
|
2762
|
+ tokens[self.expr.resultsName] = self.defaultValue
|
|
2763
|
+ else:
|
|
2764
|
+ tokens = [ self.defaultValue ]
|
|
2765
|
+ else:
|
|
2766
|
+ tokens = []
|
|
2767
|
+ return loc, tokens
|
|
2768
|
+
|
|
2769
|
+ def __str__( self ):
|
|
2770
|
+ if hasattr(self,"name"):
|
|
2771
|
+ return self.name
|
|
2772
|
+
|
|
2773
|
+ if self.strRepr is None:
|
|
2774
|
+ self.strRepr = "[" + _ustr(self.expr) + "]"
|
|
2775
|
+
|
|
2776
|
+ return self.strRepr
|
|
2777
|
+
|
|
2778
|
+
|
|
2779
|
+class SkipTo(ParseElementEnhance):
|
|
2780
|
+ """Token for skipping over all undefined text until the matched expression is found.
|
|
2781
|
+ If include is set to true, the matched expression is also consumed. The ignore
|
|
2782
|
+ argument is used to define grammars (typically quoted strings and comments) that
|
|
2783
|
+ might contain false matches.
|
|
2784
|
+ """
|
|
2785
|
+ def __init__( self, other, include=False, ignore=None ):
|
|
2786
|
+ super( SkipTo, self ).__init__( other )
|
|
2787
|
+ if ignore is not None:
|
|
2788
|
+ self.expr = self.expr.copy()
|
|
2789
|
+ self.expr.ignore(ignore)
|
|
2790
|
+ self.mayReturnEmpty = True
|
|
2791
|
+ self.mayIndexError = False
|
|
2792
|
+ self.includeMatch = include
|
|
2793
|
+ self.asList = False
|
|
2794
|
+ self.errmsg = "No match found for "+_ustr(self.expr)
|
|
2795
|
+ #self.myException = ParseException("",0,self.errmsg,self)
|
|
2796
|
+
|
|
2797
|
+ def parseImpl( self, instring, loc, doActions=True ):
|
|
2798
|
+ startLoc = loc
|
|
2799
|
+ instrlen = len(instring)
|
|
2800
|
+ expr = self.expr
|
|
2801
|
+ while loc <= instrlen:
|
|
2802
|
+ try:
|
|
2803
|
+ loc = expr._skipIgnorables( instring, loc )
|
|
2804
|
+ expr._parse( instring, loc, doActions=False, callPreParse=False )
|
|
2805
|
+ if self.includeMatch:
|
|
2806
|
+ skipText = instring[startLoc:loc]
|
|
2807
|
+ loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
|
|
2808
|
+ if mat:
|
|
2809
|
+ skipRes = ParseResults( skipText )
|
|
2810
|
+ skipRes += mat
|
|
2811
|
+ return loc, [ skipRes ]
|
|
2812
|
+ else:
|
|
2813
|
+ return loc, [ skipText ]
|
|
2814
|
+ else:
|
|
2815
|
+ return loc, [ instring[startLoc:loc] ]
|
|
2816
|
+ except (ParseException,IndexError):
|
|
2817
|
+ loc += 1
|
|
2818
|
+ exc = self.myException
|
|
2819
|
+ exc.loc = loc
|
|
2820
|
+ exc.pstr = instring
|
|
2821
|
+ raise exc
|
|
2822
|
+
|
|
2823
|
+class Forward(ParseElementEnhance):
|
|
2824
|
+ """Forward declaration of an expression to be defined later -
|
|
2825
|
+ used for recursive grammars, such as algebraic infix notation.
|
|
2826
|
+ When the expression is known, it is assigned to the Forward variable using the '<<' operator.
|
|
2827
|
+
|
|
2828
|
+ Note: take care when assigning to Forward not to overlook precedence of operators.
|
|
2829
|
+ Specifically, '|' has a lower precedence than '<<', so that::
|
|
2830
|
+ fwdExpr << a | b | c
|
|
2831
|
+ will actually be evaluated as::
|
|
2832
|
+ (fwdExpr << a) | b | c
|
|
2833
|
+ thereby leaving b and c out as parseable alternatives. It is recommended that you
|
|
2834
|
+ explicitly group the values inserted into the Forward::
|
|
2835
|
+ fwdExpr << (a | b | c)
|
|
2836
|
+ """
|
|
2837
|
+ def __init__( self, other=None ):
|
|
2838
|
+ super(Forward,self).__init__( other, savelist=False )
|
|
2839
|
+
|
|
2840
|
+ def __lshift__( self, other ):
|
|
2841
|
+ if isinstance( other, basestring ):
|
|
2842
|
+ other = Literal(other)
|
|
2843
|
+ self.expr = other
|
|
2844
|
+ self.mayReturnEmpty = other.mayReturnEmpty
|
|
2845
|
+ self.strRepr = None
|
|
2846
|
+ self.mayIndexError = self.expr.mayIndexError
|
|
2847
|
+ self.mayReturnEmpty = self.expr.mayReturnEmpty
|
|
2848
|
+ self.setWhitespaceChars( self.expr.whiteChars )
|
|
2849
|
+ self.skipWhitespace = self.expr.skipWhitespace
|
|
2850
|
+ self.saveAsList = self.expr.saveAsList
|
|
2851
|
+ self.ignoreExprs.extend(self.expr.ignoreExprs)
|
|
2852
|
+ return None
|
|
2853
|
+
|
|
2854
|
+ def leaveWhitespace( self ):
|
|
2855
|
+ self.skipWhitespace = False
|
|
2856
|
+ return self
|
|
2857
|
+
|
|
2858
|
+ def streamline( self ):
|
|
2859
|
+ if not self.streamlined:
|
|
2860
|
+ self.streamlined = True
|
|
2861
|
+ if self.expr is not None:
|
|
2862
|
+ self.expr.streamline()
|
|
2863
|
+ return self
|
|
2864
|
+
|
|
2865
|
+ def validate( self, validateTrace=[] ):
|
|
2866
|
+ if self not in validateTrace:
|
|
2867
|
+ tmp = validateTrace[:]+[self]
|
|
2868
|
+ if self.expr is not None:
|
|
2869
|
+ self.expr.validate(tmp)
|
|
2870
|
+ self.checkRecursion([])
|
|
2871
|
+
|
|
2872
|
+ def __str__( self ):
|
|
2873
|
+ if hasattr(self,"name"):
|
|
2874
|
+ return self.name
|
|
2875
|
+
|
|
2876
|
+ self.__class__ = _ForwardNoRecurse
|
|
2877
|
+ try:
|
|
2878
|
+ if self.expr is not None:
|
|
2879
|
+ retString = _ustr(self.expr)
|
|
2880
|
+ else:
|
|
2881
|
+ retString = "None"
|
|
2882
|
+ finally:
|
|
2883
|
+ self.__class__ = Forward
|
|
2884
|
+ return "Forward: "+retString
|
|
2885
|
+
|
|
2886
|
+ def copy(self):
|
|
2887
|
+ if self.expr is not None:
|
|
2888
|
+ return super(Forward,self).copy()
|
|
2889
|
+ else:
|
|
2890
|
+ ret = Forward()
|
|
2891
|
+ ret << self
|
|
2892
|
+ return ret
|
|
2893
|
+
|
|
2894
|
+class _ForwardNoRecurse(Forward):
|
|
2895
|
+ def __str__( self ):
|
|
2896
|
+ return "..."
|
|
2897
|
+
|
|
2898
|
+class TokenConverter(ParseElementEnhance):
|
|
2899
|
+ """Abstract subclass of ParseExpression, for converting parsed results."""
|
|
2900
|
+ def __init__( self, expr, savelist=False ):
|
|
2901
|
+ super(TokenConverter,self).__init__( expr )#, savelist )
|
|
2902
|
+ self.saveAsList = False
|
|
2903
|
+
|
|
2904
|
+class Upcase(TokenConverter):
|
|
2905
|
+ """Converter to upper case all matching tokens."""
|
|
2906
|
+ def __init__(self, *args):
|
|
2907
|
+ super(Upcase,self).__init__(*args)
|
|
2908
|
+ warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
|
|
2909
|
+ DeprecationWarning,stacklevel=2)
|
|
2910
|
+
|
|
2911
|
+ def postParse( self, instring, loc, tokenlist ):
|
|
2912
|
+ return list(map( string.upper, tokenlist ))
|
|
2913
|
+
|
|
2914
|
+
|
|
2915
|
+class Combine(TokenConverter):
|
|
2916
|
+ """Converter to concatenate all matching tokens to a single string.
|
|
2917
|
+ By default, the matching patterns must also be contiguous in the input string;
|
|
2918
|
+ this can be disabled by specifying 'adjacent=False' in the constructor.
|
|
2919
|
+ """
|
|
2920
|
+ def __init__( self, expr, joinString="", adjacent=True ):
|
|
2921
|
+ super(Combine,self).__init__( expr )
|
|
2922
|
+ # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
|
|
2923
|
+ if adjacent:
|
|
2924
|
+ self.leaveWhitespace()
|
|
2925
|
+ self.adjacent = adjacent
|
|
2926
|
+ self.skipWhitespace = True
|
|
2927
|
+ self.joinString = joinString
|
|
2928
|
+
|
|
2929
|
+ def ignore( self, other ):
|
|
2930
|
+ if self.adjacent:
|
|
2931
|
+ ParserElement.ignore(self, other)
|
|
2932
|
+ else:
|
|
2933
|
+ super( Combine, self).ignore( other )
|
|
2934
|
+ return self
|
|
2935
|
+
|
|
2936
|
+ def postParse( self, instring, loc, tokenlist ):
|
|
2937
|
+ retToks = tokenlist.copy()
|
|
2938
|
+ del retToks[:]
|
|
2939
|
+ retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
|
|
2940
|
+
|
|
2941
|
+ if self.resultsName and len(retToks.keys())>0:
|
|
2942
|
+ return [ retToks ]
|
|
2943
|
+ else:
|
|
2944
|
+ return retToks
|
|
2945
|
+
|
|
2946
|
+class Group(TokenConverter):
|
|
2947
|
+ """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
|
|
2948
|
+ def __init__( self, expr ):
|
|
2949
|
+ super(Group,self).__init__( expr )
|
|
2950
|
+ self.saveAsList = True
|
|
2951
|
+
|
|
2952
|
+ def postParse( self, instring, loc, tokenlist ):
|
|
2953
|
+ return [ tokenlist ]
|
|
2954
|
+
|
|
2955
|
+class Dict(TokenConverter):
|
|
2956
|
+ """Converter to return a repetitive expression as a list, but also as a dictionary.
|
|
2957
|
+ Each element can also be referenced using the first token in the expression as its key.
|
|
2958
|
+ Useful for tabular report scraping when the first column can be used as a item key.
|
|
2959
|
+ """
|
|
2960
|
+ def __init__( self, exprs ):
|
|
2961
|
+ super(Dict,self).__init__( exprs )
|
|
2962
|
+ self.saveAsList = True
|
|
2963
|
+
|
|
2964
|
+ def postParse( self, instring, loc, tokenlist ):
|
|
2965
|
+ for i,tok in enumerate(tokenlist):
|
|
2966
|
+ if len(tok) == 0:
|
|
2967
|
+ continue
|
|
2968
|
+ ikey = tok[0]
|
|
2969
|
+ if isinstance(ikey,int):
|
|
2970
|
+ ikey = _ustr(tok[0]).strip()
|
|
2971
|
+ if len(tok)==1:
|
|
2972
|
+ tokenlist[ikey] = _ParseResultsWithOffset("",i)
|
|
2973
|
+ elif len(tok)==2 and not isinstance(tok[1],ParseResults):
|
|
2974
|
+ tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
|
|
2975
|
+ else:
|
|
2976
|
+ dictvalue = tok.copy() #ParseResults(i)
|
|
2977
|
+ del dictvalue[0]
|
|
2978
|
+ if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
|
|
2979
|
+ tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
|
|
2980
|
+ else:
|
|
2981
|
+ tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
|
|
2982
|
+
|
|
2983
|
+ if self.resultsName:
|
|
2984
|
+ return [ tokenlist ]
|
|
2985
|
+ else:
|
|
2986
|
+ return tokenlist
|
|
2987
|
+
|
|
2988
|
+
|
|
2989
|
+class Suppress(TokenConverter):
|
|
2990
|
+ """Converter for ignoring the results of a parsed expression."""
|
|
2991
|
+ def postParse( self, instring, loc, tokenlist ):
|
|
2992
|
+ return []
|
|
2993
|
+
|
|
2994
|
+ def suppress( self ):
|
|
2995
|
+ return self
|
|
2996
|
+
|
|
2997
|
+
|
|
2998
|
+class OnlyOnce(object):
|
|
2999
|
+ """Wrapper for parse actions, to ensure they are only called once."""
|
|
3000
|
+ def __init__(self, methodCall):
|
|
3001
|
+ self.callable = ParserElement._normalizeParseActionArgs(methodCall)
|
|
3002
|
+ self.called = False
|
|
3003
|
+ def __call__(self,s,l,t):
|
|
3004
|
+ if not self.called:
|
|
3005
|
+ results = self.callable(s,l,t)
|
|
3006
|
+ self.called = True
|
|
3007
|
+ return results
|
|
3008
|
+ raise ParseException(s,l,"")
|
|
3009
|
+ def reset(self):
|
|
3010
|
+ self.called = False
|
|
3011
|
+
|
|
3012
|
+def traceParseAction(f):
|
|
3013
|
+ """Decorator for debugging parse actions."""
|
|
3014
|
+ f = ParserElement._normalizeParseActionArgs(f)
|
|
3015
|
+ def z(*paArgs):
|
|
3016
|
+ thisFunc = f.func_name
|
|
3017
|
+ s,l,t = paArgs[-3:]
|
|
3018
|
+ if len(paArgs)>3:
|
|
3019
|
+ thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
|
|
3020
|
+ sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
|
|
3021
|
+ try:
|
|
3022
|
+ ret = f(*paArgs)
|
|
3023
|
+ except Exception, exc:
|
|
3024
|
+ sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
|
|
3025
|
+ raise
|
|
3026
|
+ sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
|
|
3027
|
+ return ret
|
|
3028
|
+ try:
|
|
3029
|
+ z.__name__ = f.__name__
|
|
3030
|
+ except AttributeError:
|
|
3031
|
+ pass
|
|
3032
|
+ return z
|
|
3033
|
+
|
|
3034
|
+#
|
|
3035
|
+# global helpers
|
|
3036
|
+#
|
|
3037
|
+def delimitedList( expr, delim=",", combine=False ):
|
|
3038
|
+ """Helper to define a delimited list of expressions - the delimiter defaults to ','.
|
|
3039
|
+ By default, the list elements and delimiters can have intervening whitespace, and
|
|
3040
|
+ comments, but this can be overridden by passing 'combine=True' in the constructor.
|
|
3041
|
+ If combine is set to True, the matching tokens are returned as a single token
|
|
3042
|
+ string, with the delimiters included; otherwise, the matching tokens are returned
|
|
3043
|
+ as a list of tokens, with the delimiters suppressed.
|
|
3044
|
+ """
|
|
3045
|
+ dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
|
|
3046
|
+ if combine:
|
|
3047
|
+ return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
|
|
3048
|
+ else:
|
|
3049
|
+ return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
|
|
3050
|
+
|
|
3051
|
+def countedArray( expr ):
|
|
3052
|
+ """Helper to define a counted list of expressions.
|
|
3053
|
+ This helper defines a pattern of the form::
|
|
3054
|
+ integer expr expr expr...
|
|
3055
|
+ where the leading integer tells how many expr expressions follow.
|
|
3056
|
+ The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
|
|
3057
|
+ """
|
|
3058
|
+ arrayExpr = Forward()
|
|
3059
|
+ def countFieldParseAction(s,l,t):
|
|
3060
|
+ n = int(t[0])
|
|
3061
|
+ arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
|
|
3062
|
+ return []
|
|
3063
|
+ return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
|
|
3064
|
+
|
|
3065
|
+def _flatten(L):
|
|
3066
|
+ if type(L) is not list: return [L]
|
|
3067
|
+ if L == []: return L
|
|
3068
|
+ return _flatten(L[0]) + _flatten(L[1:])
|
|
3069
|
+
|
|
3070
|
+def matchPreviousLiteral(expr):
|
|
3071
|
+ """Helper to define an expression that is indirectly defined from
|
|
3072
|
+ the tokens matched in a previous expression, that is, it looks
|
|
3073
|
+ for a 'repeat' of a previous expression. For example::
|
|
3074
|
+ first = Word(nums)
|
|
3075
|
+ second = matchPreviousLiteral(first)
|
|
3076
|
+ matchExpr = first + ":" + second
|
|
3077
|
+ will match "1:1", but not "1:2". Because this matches a
|
|
3078
|
+ previous literal, will also match the leading "1:1" in "1:10".
|
|
3079
|
+ If this is not desired, use matchPreviousExpr.
|
|
3080
|
+ Do *not* use with packrat parsing enabled.
|
|
3081
|
+ """
|
|
3082
|
+ rep = Forward()
|
|
3083
|
+ def copyTokenToRepeater(s,l,t):
|
|
3084
|
+ if t:
|
|
3085
|
+ if len(t) == 1:
|
|
3086
|
+ rep << t[0]
|
|
3087
|
+ else:
|
|
3088
|
+ # flatten t tokens
|
|
3089
|
+ tflat = _flatten(t.asList())
|
|
3090
|
+ rep << And( [ Literal(tt) for tt in tflat ] )
|
|
3091
|
+ else:
|
|
3092
|
+ rep << Empty()
|
|
3093
|
+ expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
|
|
3094
|
+ return rep
|
|
3095
|
+
|
|
3096
|
+def matchPreviousExpr(expr):
|
|
3097
|
+ """Helper to define an expression that is indirectly defined from
|
|
3098
|
+ the tokens matched in a previous expression, that is, it looks
|
|
3099
|
+ for a 'repeat' of a previous expression. For example::
|
|
3100
|
+ first = Word(nums)
|
|
3101
|
+ second = matchPreviousExpr(first)
|
|
3102
|
+ matchExpr = first + ":" + second
|
|
3103
|
+ will match "1:1", but not "1:2". Because this matches by
|
|
3104
|
+ expressions, will *not* match the leading "1:1" in "1:10";
|
|
3105
|
+ the expressions are evaluated first, and then compared, so
|
|
3106
|
+ "1" is compared with "10".
|
|
3107
|
+ Do *not* use with packrat parsing enabled.
|
|
3108
|
+ """
|
|
3109
|
+ rep = Forward()
|
|
3110
|
+ e2 = expr.copy()
|
|
3111
|
+ rep << e2
|
|
3112
|
+ def copyTokenToRepeater(s,l,t):
|
|
3113
|
+ matchTokens = _flatten(t.asList())
|
|
3114
|
+ def mustMatchTheseTokens(s,l,t):
|
|
3115
|
+ theseTokens = _flatten(t.asList())
|
|
3116
|
+ if theseTokens != matchTokens:
|
|
3117
|
+ raise ParseException("",0,"")
|
|
3118
|
+ rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
|
|
3119
|
+ expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
|
|
3120
|
+ return rep
|
|
3121
|
+
|
|
3122
|
+def _escapeRegexRangeChars(s):
|
|
3123
|
+ #~ escape these chars: ^-]
|
|
3124
|
+ for c in r"\^-]":
|
|
3125
|
+ s = s.replace(c,"\\"+c)
|
|
3126
|
+ s = s.replace("\n",r"\n")
|
|
3127
|
+ s = s.replace("\t",r"\t")
|
|
3128
|
+ return _ustr(s)
|
|
3129
|
+
|
|
3130
|
+def oneOf( strs, caseless=False, useRegex=True ):
|
|
3131
|
+ """Helper to quickly define a set of alternative Literals, and makes sure to do
|
|
3132
|
+ longest-first testing when there is a conflict, regardless of the input order,
|
|
3133
|
+ but returns a MatchFirst for best performance.
|
|
3134
|
+
|
|
3135
|
+ Parameters:
|
|
3136
|
+ - strs - a string of space-delimited literals, or a list of string literals
|
|
3137
|
+ - caseless - (default=False) - treat all literals as caseless
|
|
3138
|
+ - useRegex - (default=True) - as an optimization, will generate a Regex
|
|
3139
|
+ object; otherwise, will generate a MatchFirst object (if caseless=True, or
|
|
3140
|
+ if creating a Regex raises an exception)
|
|
3141
|
+ """
|
|
3142
|
+ if caseless:
|
|
3143
|
+ isequal = ( lambda a,b: a.upper() == b.upper() )
|
|
3144
|
+ masks = ( lambda a,b: b.upper().startswith(a.upper()) )
|
|
3145
|
+ parseElementClass = CaselessLiteral
|
|
3146
|
+ else:
|
|
3147
|
+ isequal = ( lambda a,b: a == b )
|
|
3148
|
+ masks = ( lambda a,b: b.startswith(a) )
|
|
3149
|
+ parseElementClass = Literal
|
|
3150
|
+
|
|
3151
|
+ if isinstance(strs,(list,tuple)):
|
|
3152
|
+ symbols = strs[:]
|
|
3153
|
+ elif isinstance(strs,basestring):
|
|
3154
|
+ symbols = strs.split()
|
|
3155
|
+ else:
|
|
3156
|
+ warnings.warn("Invalid argument to oneOf, expected string or list",
|
|
3157
|
+ SyntaxWarning, stacklevel=2)
|
|
3158
|
+
|
|
3159
|
+ i = 0
|
|
3160
|
+ while i < len(symbols)-1:
|
|
3161
|
+ cur = symbols[i]
|
|
3162
|
+ for j,other in enumerate(symbols[i+1:]):
|
|
3163
|
+ if ( isequal(other, cur) ):
|
|
3164
|
+ del symbols[i+j+1]
|
|
3165
|
+ break
|
|
3166
|
+ elif ( masks(cur, other) ):
|
|
3167
|
+ del symbols[i+j+1]
|
|
3168
|
+ symbols.insert(i,other)
|
|
3169
|
+ cur = other
|
|
3170
|
+ break
|
|
3171
|
+ else:
|
|
3172
|
+ i += 1
|
|
3173
|
+
|
|
3174
|
+ if not caseless and useRegex:
|
|
3175
|
+ #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
|
|
3176
|
+ try:
|
|
3177
|
+ if len(symbols)==len("".join(symbols)):
|
|
3178
|
+ return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
|
|
3179
|
+ else:
|
|
3180
|
+ return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
|
|
3181
|
+ except:
|
|
3182
|
+ warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
|
|
3183
|
+ SyntaxWarning, stacklevel=2)
|
|
3184
|
+
|
|
3185
|
+
|
|
3186
|
+ # last resort, just use MatchFirst
|
|
3187
|
+ return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
|
|
3188
|
+
|
|
3189
|
+def dictOf( key, value ):
|
|
3190
|
+ """Helper to easily and clearly define a dictionary by specifying the respective patterns
|
|
3191
|
+ for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
|
|
3192
|
+ in the proper order. The key pattern can include delimiting markers or punctuation,
|
|
3193
|
+ as long as they are suppressed, thereby leaving the significant key text. The value
|
|
3194
|
+ pattern can include named results, so that the Dict results can include named token
|
|
3195
|
+ fields.
|
|
3196
|
+ """
|
|
3197
|
+ return Dict( ZeroOrMore( Group ( key + value ) ) )
|
|
3198
|
+
|
|
3199
|
+# convenience constants for positional expressions
|
|
3200
|
+empty = Empty().setName("empty")
|
|
3201
|
+lineStart = LineStart().setName("lineStart")
|
|
3202
|
+lineEnd = LineEnd().setName("lineEnd")
|
|
3203
|
+stringStart = StringStart().setName("stringStart")
|
|
3204
|
+stringEnd = StringEnd().setName("stringEnd")
|
|
3205
|
+
|
|
3206
|
+_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
|
|
3207
|
+_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
|
|
3208
|
+_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
|
|
3209
|
+_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
|
|
3210
|
+_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
|
|
3211
|
+_charRange = Group(_singleChar + Suppress("-") + _singleChar)
|
|
3212
|
+_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
|
|
3213
|
+
|
|
3214
|
+_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
|
|
3215
|
+
|
|
3216
|
+def srange(s):
|
|
3217
|
+ r"""Helper to easily define string ranges for use in Word construction. Borrows
|
|
3218
|
+ syntax from regexp '[]' string range definitions::
|
|
3219
|
+ srange("[0-9]") -> "0123456789"
|
|
3220
|
+ srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
|
|
3221
|
+ srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
|
|
3222
|
+ The input string must be enclosed in []'s, and the returned string is the expanded
|
|
3223
|
+ character set joined into a single string.
|
|
3224
|
+ The values enclosed in the []'s may be::
|
|
3225
|
+ a single character
|
|
3226
|
+ an escaped character with a leading backslash (such as \- or \])
|
|
3227
|
+ an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
|
|
3228
|
+ an escaped octal character with a leading '\0' (\041, which is a '!' character)
|
|
3229
|
+ a range of any of the above, separated by a dash ('a-z', etc.)
|
|
3230
|
+ any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
|
|
3231
|
+ """
|
|
3232
|
+ try:
|
|
3233
|
+ return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
|
|
3234
|
+ except:
|
|
3235
|
+ return ""
|
|
3236
|
+
|
|
3237
|
+def matchOnlyAtCol(n):
|
|
3238
|
+ """Helper method for defining parse actions that require matching at a specific
|
|
3239
|
+ column in the input text.
|
|
3240
|
+ """
|
|
3241
|
+ def verifyCol(strg,locn,toks):
|
|
3242
|
+ if col(locn,strg) != n:
|
|
3243
|
+ raise ParseException(strg,locn,"matched token not at column %d" % n)
|
|
3244
|
+ return verifyCol
|
|
3245
|
+
|
|
3246
|
+def replaceWith(replStr):
|
|
3247
|
+ """Helper method for common parse actions that simply return a literal value. Especially
|
|
3248
|
+ useful when used with transformString().
|
|
3249
|
+ """
|
|
3250
|
+ def _replFunc(*args):
|
|
3251
|
+ return [replStr]
|
|
3252
|
+ return _replFunc
|
|
3253
|
+
|
|
3254
|
+def removeQuotes(s,l,t):
|
|
3255
|
+ """Helper parse action for removing quotation marks from parsed quoted strings.
|
|
3256
|
+ To use, add this parse action to quoted string using::
|
|
3257
|
+ quotedString.setParseAction( removeQuotes )
|
|
3258
|
+ """
|
|
3259
|
+ return t[0][1:-1]
|
|
3260
|
+
|
|
3261
|
+def upcaseTokens(s,l,t):
|
|
3262
|
+ """Helper parse action to convert tokens to upper case."""
|
|
3263
|
+ return [ tt.upper() for tt in map(_ustr,t) ]
|
|
3264
|
+
|
|
3265
|
+def downcaseTokens(s,l,t):
|
|
3266
|
+ """Helper parse action to convert tokens to lower case."""
|
|
3267
|
+ return [ tt.lower() for tt in map(_ustr,t) ]
|
|
3268
|
+
|
|
3269
|
+def keepOriginalText(s,startLoc,t):
|
|
3270
|
+ """Helper parse action to preserve original parsed text,
|
|
3271
|
+ overriding any nested parse actions."""
|
|
3272
|
+ try:
|
|
3273
|
+ endloc = getTokensEndLoc()
|
|
3274
|
+ except ParseException:
|
|
3275
|
+ raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
|
|
3276
|
+ del t[:]
|
|
3277
|
+ t += ParseResults(s[startLoc:endloc])
|
|
3278
|
+ return t
|
|
3279
|
+
|
|
3280
|
+def getTokensEndLoc():
|
|
3281
|
+ """Method to be called from within a parse action to determine the end
|
|
3282
|
+ location of the parsed tokens."""
|
|
3283
|
+ import inspect
|
|
3284
|
+ fstack = inspect.stack()
|
|
3285
|
+ try:
|
|
3286
|
+ # search up the stack (through intervening argument normalizers) for correct calling routine
|
|
3287
|
+ for f in fstack[2:]:
|
|
3288
|
+ if f[3] == "_parseNoCache":
|
|
3289
|
+ endloc = f[0].f_locals["loc"]
|
|
3290
|
+ return endloc
|
|
3291
|
+ else:
|
|
3292
|
+ raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
|
|
3293
|
+ finally:
|
|
3294
|
+ del fstack
|
|
3295
|
+
|
|
3296
|
+def _makeTags(tagStr, xml):
|
|
3297
|
+ """Internal helper to construct opening and closing tag expressions, given a tag name"""
|
|
3298
|
+ if isinstance(tagStr,basestring):
|
|
3299
|
+ resname = tagStr
|
|
3300
|
+ tagStr = Keyword(tagStr, caseless=not xml)
|
|
3301
|
+ else:
|
|
3302
|
+ resname = tagStr.name
|
|
3303
|
+
|
|
3304
|
+ tagAttrName = Word(alphas,alphanums+"_-:")
|
|
3305
|
+ if (xml):
|
|
3306
|
+ tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
|
|
3307
|
+ openTag = Suppress("<") + tagStr + \
|
|
3308
|
+ Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
|
|
3309
|
+ Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
|
|
3310
|
+ else:
|
|
3311
|
+ printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
|
|
3312
|
+ tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
|
|
3313
|
+ openTag = Suppress("<") + tagStr + \
|
|
3314
|
+ Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
|
|
3315
|
+ Optional( Suppress("=") + tagAttrValue ) ))) + \
|
|
3316
|
+ Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
|
|
3317
|
+ closeTag = Combine(_L("</") + tagStr + ">")
|
|
3318
|
+
|
|
3319
|
+ openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
|
|
3320
|
+ closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
|
|
3321
|
+
|
|
3322
|
+ return openTag, closeTag
|
|
3323
|
+
|
|
3324
|
+def makeHTMLTags(tagStr):
|
|
3325
|
+ """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
|
|
3326
|
+ return _makeTags( tagStr, False )
|
|
3327
|
+
|
|
3328
|
+def makeXMLTags(tagStr):
|
|
3329
|
+ """Helper to construct opening and closing tag expressions for XML, given a tag name"""
|
|
3330
|
+ return _makeTags( tagStr, True )
|
|
3331
|
+
|
|
3332
|
+def withAttribute(*args,**attrDict):
|
|
3333
|
+ """Helper to create a validating parse action to be used with start tags created
|
|
3334
|
+ with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
|
|
3335
|
+ with a required attribute value, to avoid false matches on common tags such as
|
|
3336
|
+ <TD> or <DIV>.
|
|
3337
|
+
|
|
3338
|
+ Call withAttribute with a series of attribute names and values. Specify the list
|
|
3339
|
+ of filter attributes names and values as:
|
|
3340
|
+ - keyword arguments, as in (class="Customer",align="right"), or
|
|
3341
|
+ - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
|
|
3342
|
+ For attribute names with a namespace prefix, you must use the second form. Attribute
|
|
3343
|
+ names are matched insensitive to upper/lower case.
|
|
3344
|
+
|
|
3345
|
+ To verify that the attribute exists, but without specifying a value, pass
|
|
3346
|
+ withAttribute.ANY_VALUE as the value.
|
|
3347
|
+ """
|
|
3348
|
+ if args:
|
|
3349
|
+ attrs = args[:]
|
|
3350
|
+ else:
|
|
3351
|
+ attrs = attrDict.items()
|
|
3352
|
+ attrs = [(k,v) for k,v in attrs]
|
|
3353
|
+ def pa(s,l,tokens):
|
|
3354
|
+ for attrName,attrValue in attrs:
|
|
3355
|
+ if attrName not in tokens:
|
|
3356
|
+ raise ParseException(s,l,"no matching attribute " + attrName)
|
|
3357
|
+ if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
|
|
3358
|
+ raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
|
|
3359
|
+ (attrName, tokens[attrName], attrValue))
|
|
3360
|
+ return pa
|
|
3361
|
+withAttribute.ANY_VALUE = object()
|
|
3362
|
+
|
|
3363
|
+opAssoc = _Constants()
|
|
3364
|
+opAssoc.LEFT = object()
|
|
3365
|
+opAssoc.RIGHT = object()
|
|
3366
|
+
|
|
3367
|
+def operatorPrecedence( baseExpr, opList ):
|
|
3368
|
+ """Helper method for constructing grammars of expressions made up of
|
|
3369
|
+ operators working in a precedence hierarchy. Operators may be unary or
|
|
3370
|
+ binary, left- or right-associative. Parse actions can also be attached
|
|
3371
|
+ to operator expressions.
|
|
3372
|
+
|
|
3373
|
+ Parameters:
|
|
3374
|
+ - baseExpr - expression representing the most basic element for the nested
|
|
3375
|
+ - opList - list of tuples, one for each operator precedence level in the
|
|
3376
|
+ expression grammar; each tuple is of the form
|
|
3377
|
+ (opExpr, numTerms, rightLeftAssoc, parseAction), where:
|
|
3378
|
+ - opExpr is the pyparsing expression for the operator;
|
|
3379
|
+ may also be a string, which will be converted to a Literal;
|
|
3380
|
+ if numTerms is 3, opExpr is a tuple of two expressions, for the
|
|
3381
|
+ two operators separating the 3 terms
|
|
3382
|
+ - numTerms is the number of terms for this operator (must
|
|
3383
|
+ be 1, 2, or 3)
|
|
3384
|
+ - rightLeftAssoc is the indicator whether the operator is
|
|
3385
|
+ right or left associative, using the pyparsing-defined
|
|
3386
|
+ constants opAssoc.RIGHT and opAssoc.LEFT.
|
|
3387
|
+ - parseAction is the parse action to be associated with
|
|
3388
|
+ expressions matching this operator expression (the
|
|
3389
|
+ parse action tuple member may be omitted)
|
|
3390
|
+ """
|
|
3391
|
+ ret = Forward()
|
|
3392
|
+ lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
|
|
3393
|
+ for i,operDef in enumerate(opList):
|
|
3394
|
+ opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
|
|
3395
|
+ if arity == 3:
|
|
3396
|
+ if opExpr is None or len(opExpr) != 2:
|
|
3397
|
+ raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
|
|
3398
|
+ opExpr1, opExpr2 = opExpr
|
|
3399
|
+ thisExpr = Forward()#.setName("expr%d" % i)
|
|
3400
|
+ if rightLeftAssoc == opAssoc.LEFT:
|
|
3401
|
+ if arity == 1:
|
|
3402
|
+ matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
|
|
3403
|
+ elif arity == 2:
|
|
3404
|
+ if opExpr is not None:
|
|
3405
|
+ matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
|
|
3406
|
+ else:
|
|
3407
|
+ matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
|
|
3408
|
+ elif arity == 3:
|
|
3409
|
+ matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
|
|
3410
|
+ Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
|
|
3411
|
+ else:
|
|
3412
|
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
|
|
3413
|
+ elif rightLeftAssoc == opAssoc.RIGHT:
|
|
3414
|
+ if arity == 1:
|
|
3415
|
+ # try to avoid LR with this extra test
|
|
3416
|
+ if not isinstance(opExpr, Optional):
|
|
3417
|
+ opExpr = Optional(opExpr)
|
|
3418
|
+ matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
|
|
3419
|
+ elif arity == 2:
|
|
3420
|
+ if opExpr is not None:
|
|
3421
|
+ matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
|
|
3422
|
+ else:
|
|
3423
|
+ matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
|
|
3424
|
+ elif arity == 3:
|
|
3425
|
+ matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
|
|
3426
|
+ Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
|
|
3427
|
+ else:
|
|
3428
|
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
|
|
3429
|
+ else:
|
|
3430
|
+ raise ValueError("operator must indicate right or left associativity")
|
|
3431
|
+ if pa:
|
|
3432
|
+ matchExpr.setParseAction( pa )
|
|
3433
|
+ thisExpr << ( matchExpr | lastExpr )
|
|
3434
|
+ lastExpr = thisExpr
|
|
3435
|
+ ret << lastExpr
|
|
3436
|
+ return ret
|
|
3437
|
+
|
|
3438
|
+dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
|
|
3439
|
+sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
|
|
3440
|
+quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
|
|
3441
|
+unicodeString = Combine(_L('u') + quotedString.copy())
|
|
3442
|
+
|
|
3443
|
+def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
|
|
3444
|
+ """Helper method for defining nested lists enclosed in opening and closing
|
|
3445
|
+ delimiters ("(" and ")" are the default).
|
|
3446
|
+
|
|
3447
|
+ Parameters:
|
|
3448
|
+ - opener - opening character for a nested list (default="("); can also be a pyparsing expression
|
|
3449
|
+ - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
|
|
3450
|
+ - content - expression for items within the nested lists (default=None)
|
|
3451
|
+ - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
|
|
3452
|
+
|
|
3453
|
+ If an expression is not provided for the content argument, the nested
|
|
3454
|
+ expression will capture all whitespace-delimited content between delimiters
|
|
3455
|
+ as a list of separate values.
|
|
3456
|
+
|
|
3457
|
+ Use the ignoreExpr argument to define expressions that may contain
|
|
3458
|
+ opening or closing characters that should not be treated as opening
|
|
3459
|
+ or closing characters for nesting, such as quotedString or a comment
|
|
3460
|
+ expression. Specify multiple expressions using an Or or MatchFirst.
|
|
3461
|
+ The default is quotedString, but if no expressions are to be ignored,
|
|
3462
|
+ then pass None for this argument.
|
|
3463
|
+ """
|
|
3464
|
+ if opener == closer:
|
|
3465
|
+ raise ValueError("opening and closing strings cannot be the same")
|
|
3466
|
+ if content is None:
|
|
3467
|
+ if isinstance(opener,basestring) and isinstance(closer,basestring):
|
|
3468
|
+ if ignoreExpr is not None:
|
|
3469
|
+ content = (Combine(OneOrMore(~ignoreExpr +
|
|
3470
|
+ CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
|
|
3471
|
+ ).setParseAction(lambda t:t[0].strip()))
|
|
3472
|
+ else:
|
|
3473
|
+ content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
|
|
3474
|
+ else:
|
|
3475
|
+ raise ValueError("opening and closing arguments must be strings if no content expression is given")
|
|
3476
|
+ ret = Forward()
|
|
3477
|
+ if ignoreExpr is not None:
|
|
3478
|
+ ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
|
|
3479
|
+ else:
|
|
3480
|
+ ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
|
|
3481
|
+ return ret
|
|
3482
|
+
|
|
3483
|
+def indentedBlock(blockStatementExpr, indentStack, indent=True):
|
|
3484
|
+ """Helper method for defining space-delimited indentation blocks, such as
|
|
3485
|
+ those used to define block statements in Python source code.
|
|
3486
|
+
|
|
3487
|
+ Parameters:
|
|
3488
|
+ - blockStatementExpr - expression defining syntax of statement that
|
|
3489
|
+ is repeated within the indented block
|
|
3490
|
+ - indentStack - list created by caller to manage indentation stack
|
|
3491
|
+ (multiple statementWithIndentedBlock expressions within a single grammar
|
|
3492
|
+ should share a common indentStack)
|
|
3493
|
+ - indent - boolean indicating whether block must be indented beyond the
|
|
3494
|
+ the current level; set to False for block of left-most statements
|
|
3495
|
+ (default=True)
|
|
3496
|
+
|
|
3497
|
+ A valid block must contain at least one blockStatement.
|
|
3498
|
+ """
|
|
3499
|
+ def checkPeerIndent(s,l,t):
|
|
3500
|
+ if l >= len(s): return
|
|
3501
|
+ curCol = col(l,s)
|
|
3502
|
+ if curCol != indentStack[-1]:
|
|
3503
|
+ if curCol > indentStack[-1]:
|
|
3504
|
+ raise ParseFatalException(s,l,"illegal nesting")
|
|
3505
|
+ raise ParseException(s,l,"not a peer entry")
|
|
3506
|
+
|
|
3507
|
+ def checkSubIndent(s,l,t):
|
|
3508
|
+ curCol = col(l,s)
|
|
3509
|
+ if curCol > indentStack[-1]:
|
|
3510
|
+ indentStack.append( curCol )
|
|
3511
|
+ else:
|
|
3512
|
+ raise ParseException(s,l,"not a subentry")
|
|
3513
|
+
|
|
3514
|
+ def checkUnindent(s,l,t):
|
|
3515
|
+ if l >= len(s): return
|
|
3516
|
+ curCol = col(l,s)
|
|
3517
|
+ if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
|
|
3518
|
+ raise ParseException(s,l,"not an unindent")
|
|
3519
|
+ indentStack.pop()
|
|
3520
|
+
|
|
3521
|
+ NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
|
|
3522
|
+ INDENT = Empty() + Empty().setParseAction(checkSubIndent)
|
|
3523
|
+ PEER = Empty().setParseAction(checkPeerIndent)
|
|
3524
|
+ UNDENT = Empty().setParseAction(checkUnindent)
|
|
3525
|
+ if indent:
|
|
3526
|
+ smExpr = Group( Optional(NL) +
|
|
3527
|
+ FollowedBy(blockStatementExpr) +
|
|
3528
|
+ INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
|
|
3529
|
+ else:
|
|
3530
|
+ smExpr = Group( Optional(NL) +
|
|
3531
|
+ (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
|
|
3532
|
+ blockStatementExpr.ignore("\\" + LineEnd())
|
|
3533
|
+ return smExpr
|
|
3534
|
+
|
|
3535
|
+alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
|
|
3536
|
+punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
|
|
3537
|
+
|
|
3538
|
+anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
|
|
3539
|
+commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
|
|
3540
|
+_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
|
|
3541
|
+replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
|
|
3542
|
+
|
|
3543
|
+# it's easy to get these comment structures wrong - they're very common, so may as well make them available
|
|
3544
|
+cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
|
|
3545
|
+
|
|
3546
|
+htmlComment = Regex(r"<!--[\s\S]*?-->")
|
|
3547
|
+restOfLine = Regex(r".*").leaveWhitespace()
|
|
3548
|
+dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
|
|
3549
|
+cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
|
|
3550
|
+
|
|
3551
|
+javaStyleComment = cppStyleComment
|
|
3552
|
+pythonStyleComment = Regex(r"#.*").setName("Python style comment")
|
|
3553
|
+_noncomma = "".join( [ c for c in printables if c != "," ] )
|
|
3554
|
+_commasepitem = Combine(OneOrMore(Word(_noncomma) +
|
|
3555
|
+ Optional( Word(" \t") +
|
|
3556
|
+ ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
|
|
3557
|
+commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
|
|
3558
|
+
|
|
3559
|
+
|
|
3560
|
+if __name__ == "__main__":
|
|
3561
|
+
|
|
3562
|
+ def test( teststring ):
|
|
3563
|
+ try:
|
|
3564
|
+ tokens = simpleSQL.parseString( teststring )
|
|
3565
|
+ tokenlist = tokens.asList()
|
|
3566
|
+ print (teststring + "->" + str(tokenlist))
|
|
3567
|
+ print ("tokens = " + str(tokens))
|
|
3568
|
+ print ("tokens.columns = " + str(tokens.columns))
|
|
3569
|
+ print ("tokens.tables = " + str(tokens.tables))
|
|
3570
|
+ print (tokens.asXML("SQL",True))
|
|
3571
|
+ except ParseBaseException,err:
|
|
3572
|
+ print (teststring + "->")
|
|
3573
|
+ print (err.line)
|
|
3574
|
+ print (" "*(err.column-1) + "^")
|
|
3575
|
+ print (err)
|
|
3576
|
+ print()
|
|
3577
|
+
|
|
3578
|
+ selectToken = CaselessLiteral( "select" )
|
|
3579
|
+ fromToken = CaselessLiteral( "from" )
|
|
3580
|
+
|
|
3581
|
+ ident = Word( alphas, alphanums + "_$" )
|
|
3582
|
+ columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
|
|
3583
|
+ columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
|
|
3584
|
+ tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
|
|
3585
|
+ tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
|
|
3586
|
+ simpleSQL = ( selectToken + \
|
|
3587
|
+ ( '*' | columnNameList ).setResultsName( "columns" ) + \
|
|
3588
|
+ fromToken + \
|
|
3589
|
+ tableNameList.setResultsName( "tables" ) )
|
|
3590
|
+
|
|
3591
|
+ test( "SELECT * from XYZZY, ABC" )
|
|
3592
|
+ test( "select * from SYS.XYZZY" )
|
|
3593
|
+ test( "Select A from Sys.dual" )
|
|
3594
|
+ test( "Select AA,BB,CC from Sys.dual" )
|
|
3595
|
+ test( "Select A, B, C from Sys.dual" )
|
|
3596
|
+ test( "Select A, B, C from Sys.dual" )
|
|
3597
|
+ test( "Xelect A, B, C from Sys.dual" )
|
|
3598
|
+ test( "Select A, B, C frox Sys.dual" )
|
|
3599
|
+ test( "Select" )
|
|
3600
|
+ test( "Select ^^^ frox Sys.dual" )
|
|
3601
|
+ test( "Select A, B, C from Sys.dual, Table2 " )
|