Fixes https://github.com/jrfonseca/xdot.py/issues/73
| ... | ... |
@@ -54,11 +54,11 @@ class Scanner: |
| 54 | 54 |
flags = re.DOTALL |
| 55 | 55 |
if self.ignorecase: |
| 56 | 56 |
flags |= re.IGNORECASE |
| 57 |
- self.tokens_re = re.compile( |
|
| 58 |
- b'|'.join([b'(' + regexp + b')'
|
|
| 59 |
- for type, regexp, test_lit in self.tokens]), |
|
| 60 |
- flags |
|
| 61 |
- ) |
|
| 57 |
+ self.tokens_re = re.compile( |
|
| 58 |
+ b'|'.join([b'(' + regexp + b')'
|
|
| 59 |
+ for type, regexp, test_lit in self.tokens]), |
|
| 60 |
+ flags |
|
| 61 |
+ ) |
|
| 62 | 62 |
|
| 63 | 63 |
def next(self, buf, pos): |
| 64 | 64 |
if pos >= len(buf): |
| ... | ... |
@@ -54,10 +54,11 @@ class Scanner: |
| 54 | 54 |
flags = re.DOTALL |
| 55 | 55 |
if self.ignorecase: |
| 56 | 56 |
flags |= re.IGNORECASE |
| 57 |
- self.tokens_re = re.compile( |
|
| 58 |
- b'|'.join([b'(' + regexp + b')' for type, regexp, test_lit in self.tokens]),
|
|
| 59 |
- flags |
|
| 60 |
- ) |
|
| 57 |
+ self.tokens_re = re.compile( |
|
| 58 |
+ b'|'.join([b'(' + regexp + b')'
|
|
| 59 |
+ for type, regexp, test_lit in self.tokens]), |
|
| 60 |
+ flags |
|
| 61 |
+ ) |
|
| 61 | 62 |
|
| 62 | 63 |
def next(self, buf, pos): |
| 63 | 64 |
if pos >= len(buf): |
| ... | ... |
@@ -71,7 +72,7 @@ class Scanner: |
| 71 | 72 |
type = self.literals.get(text, type) |
| 72 | 73 |
return type, text, pos |
| 73 | 74 |
else: |
| 74 |
- c = buf[pos : pos + 1] |
|
| 75 |
+ c = buf[pos:pos+1] |
|
| 75 | 76 |
return self.symbols.get(c, None), c, pos + 1 |
| 76 | 77 |
|
| 77 | 78 |
|
| ... | ... |
@@ -81,11 +82,11 @@ class DotScanner(Scanner): |
| 81 | 82 |
tokens = [ |
| 82 | 83 |
# whitespace and comments |
| 83 | 84 |
(SKIP, |
| 84 |
- br'[ \t\f\r\n\v]+|' |
|
| 85 |
- br'//[^\r\n]*|' |
|
| 86 |
- br'/\*.*?\*/|' |
|
| 87 |
- br'#[^\r\n]*', |
|
| 88 |
- False), |
|
| 85 |
+ br'[ \t\f\r\n\v]+|' |
|
| 86 |
+ br'//[^\r\n]*|' |
|
| 87 |
+ br'/\*.*?\*/|' |
|
| 88 |
+ br'#[^\r\n]*', |
|
| 89 |
+ False), |
|
| 89 | 90 |
|
| 90 | 91 |
# Alphanumeric IDs |
| 91 | 92 |
(ID, br'[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*', True), |
| 1 | 1 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,129 @@ |
| 1 |
+# Copyright 2008-2015 Jose Fonseca |
|
| 2 |
+# |
|
| 3 |
+# This program is free software: you can redistribute it and/or modify it |
|
| 4 |
+# under the terms of the GNU Lesser General Public License as published |
|
| 5 |
+# by the Free Software Foundation, either version 3 of the License, or |
|
| 6 |
+# (at your option) any later version. |
|
| 7 |
+# |
|
| 8 |
+# This program is distributed in the hope that it will be useful, |
|
| 9 |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 10 |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
| 11 |
+# GNU Lesser General Public License for more details. |
|
| 12 |
+# |
|
| 13 |
+# You should have received a copy of the GNU Lesser General Public License |
|
| 14 |
+# along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
| 15 |
+# |
|
| 16 |
+import re |
|
| 17 |
+ |
|
| 18 |
+EOF = -1 |
|
| 19 |
+SKIP = -2 |
|
| 20 |
+ |
|
| 21 |
+ID = 0 |
|
| 22 |
+STR_ID = 1 |
|
| 23 |
+HTML_ID = 2 |
|
| 24 |
+EDGE_OP = 3 |
|
| 25 |
+ |
|
| 26 |
+LSQUARE = 4 |
|
| 27 |
+RSQUARE = 5 |
|
| 28 |
+LCURLY = 6 |
|
| 29 |
+RCURLY = 7 |
|
| 30 |
+COMMA = 8 |
|
| 31 |
+COLON = 9 |
|
| 32 |
+SEMI = 10 |
|
| 33 |
+EQUAL = 11 |
|
| 34 |
+PLUS = 12 |
|
| 35 |
+ |
|
| 36 |
+STRICT = 13 |
|
| 37 |
+GRAPH = 14 |
|
| 38 |
+DIGRAPH = 15 |
|
| 39 |
+NODE = 16 |
|
| 40 |
+EDGE = 17 |
|
| 41 |
+SUBGRAPH = 18 |
|
| 42 |
+ |
|
| 43 |
+ |
|
| 44 |
+class Scanner: |
|
| 45 |
+ """Stateless scanner.""" |
|
| 46 |
+ |
|
| 47 |
+ # should be overriden by derived classes |
|
| 48 |
+ tokens = [] |
|
| 49 |
+ symbols = {}
|
|
| 50 |
+ literals = {}
|
|
| 51 |
+ ignorecase = False |
|
| 52 |
+ |
|
| 53 |
+ def __init__(self): |
|
| 54 |
+ flags = re.DOTALL |
|
| 55 |
+ if self.ignorecase: |
|
| 56 |
+ flags |= re.IGNORECASE |
|
| 57 |
+ self.tokens_re = re.compile( |
|
| 58 |
+ b'|'.join([b'(' + regexp + b')' for type, regexp, test_lit in self.tokens]),
|
|
| 59 |
+ flags |
|
| 60 |
+ ) |
|
| 61 |
+ |
|
| 62 |
+ def next(self, buf, pos): |
|
| 63 |
+ if pos >= len(buf): |
|
| 64 |
+ return EOF, b'', pos |
|
| 65 |
+ mo = self.tokens_re.match(buf, pos) |
|
| 66 |
+ if mo: |
|
| 67 |
+ text = mo.group() |
|
| 68 |
+ type, regexp, test_lit = self.tokens[mo.lastindex - 1] |
|
| 69 |
+ pos = mo.end() |
|
| 70 |
+ if test_lit: |
|
| 71 |
+ type = self.literals.get(text, type) |
|
| 72 |
+ return type, text, pos |
|
| 73 |
+ else: |
|
| 74 |
+ c = buf[pos : pos + 1] |
|
| 75 |
+ return self.symbols.get(c, None), c, pos + 1 |
|
| 76 |
+ |
|
| 77 |
+ |
|
| 78 |
+class DotScanner(Scanner): |
|
| 79 |
+ |
|
| 80 |
+ # token regular expression table |
|
| 81 |
+ tokens = [ |
|
| 82 |
+ # whitespace and comments |
|
| 83 |
+ (SKIP, |
|
| 84 |
+ br'[ \t\f\r\n\v]+|' |
|
| 85 |
+ br'//[^\r\n]*|' |
|
| 86 |
+ br'/\*.*?\*/|' |
|
| 87 |
+ br'#[^\r\n]*', |
|
| 88 |
+ False), |
|
| 89 |
+ |
|
| 90 |
+ # Alphanumeric IDs |
|
| 91 |
+ (ID, br'[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*', True), |
|
| 92 |
+ |
|
| 93 |
+ # Numeric IDs |
|
| 94 |
+ (ID, br'-?(?:\.[0-9]+|[0-9]+(?:\.[0-9]*)?)', False), |
|
| 95 |
+ |
|
| 96 |
+ # String IDs |
|
| 97 |
+ (STR_ID, br'"[^"\\]*(?:\\.[^"\\]*)*"', False), |
|
| 98 |
+ |
|
| 99 |
+ # HTML IDs |
|
| 100 |
+ (HTML_ID, br'<[^<>]*(?:<[^<>]*>[^<>]*)*>', False), |
|
| 101 |
+ |
|
| 102 |
+ # Edge operators |
|
| 103 |
+ (EDGE_OP, br'-[>-]', False), |
|
| 104 |
+ ] |
|
| 105 |
+ |
|
| 106 |
+ # symbol table |
|
| 107 |
+ symbols = {
|
|
| 108 |
+ b'[': LSQUARE, |
|
| 109 |
+ b']': RSQUARE, |
|
| 110 |
+ b'{': LCURLY,
|
|
| 111 |
+ b'}': RCURLY, |
|
| 112 |
+ b',': COMMA, |
|
| 113 |
+ b':': COLON, |
|
| 114 |
+ b';': SEMI, |
|
| 115 |
+ b'=': EQUAL, |
|
| 116 |
+ b'+': PLUS, |
|
| 117 |
+ } |
|
| 118 |
+ |
|
| 119 |
+ # literal table |
|
| 120 |
+ literals = {
|
|
| 121 |
+ b'strict': STRICT, |
|
| 122 |
+ b'graph': GRAPH, |
|
| 123 |
+ b'digraph': DIGRAPH, |
|
| 124 |
+ b'node': NODE, |
|
| 125 |
+ b'edge': EDGE, |
|
| 126 |
+ b'subgraph': SUBGRAPH, |
|
| 127 |
+ } |
|
| 128 |
+ |
|
| 129 |
+ ignorecase = True |