Fixes https://github.com/jrfonseca/xdot.py/issues/73
... | ... |
@@ -54,11 +54,11 @@ class Scanner: |
54 | 54 |
flags = re.DOTALL |
55 | 55 |
if self.ignorecase: |
56 | 56 |
flags |= re.IGNORECASE |
57 |
- self.tokens_re = re.compile( |
|
58 |
- b'|'.join([b'(' + regexp + b')' |
|
59 |
- for type, regexp, test_lit in self.tokens]), |
|
60 |
- flags |
|
61 |
- ) |
|
57 |
+ self.tokens_re = re.compile( |
|
58 |
+ b'|'.join([b'(' + regexp + b')' |
|
59 |
+ for type, regexp, test_lit in self.tokens]), |
|
60 |
+ flags |
|
61 |
+ ) |
|
62 | 62 |
|
63 | 63 |
def next(self, buf, pos): |
64 | 64 |
if pos >= len(buf): |
... | ... |
@@ -54,10 +54,11 @@ class Scanner: |
54 | 54 |
flags = re.DOTALL |
55 | 55 |
if self.ignorecase: |
56 | 56 |
flags |= re.IGNORECASE |
57 |
- self.tokens_re = re.compile( |
|
58 |
- b'|'.join([b'(' + regexp + b')' for type, regexp, test_lit in self.tokens]), |
|
59 |
- flags |
|
60 |
- ) |
|
57 |
+ self.tokens_re = re.compile( |
|
58 |
+ b'|'.join([b'(' + regexp + b')' |
|
59 |
+ for type, regexp, test_lit in self.tokens]), |
|
60 |
+ flags |
|
61 |
+ ) |
|
61 | 62 |
|
62 | 63 |
def next(self, buf, pos): |
63 | 64 |
if pos >= len(buf): |
... | ... |
@@ -71,7 +72,7 @@ class Scanner: |
71 | 72 |
type = self.literals.get(text, type) |
72 | 73 |
return type, text, pos |
73 | 74 |
else: |
74 |
- c = buf[pos : pos + 1] |
|
75 |
+ c = buf[pos:pos+1] |
|
75 | 76 |
return self.symbols.get(c, None), c, pos + 1 |
76 | 77 |
|
77 | 78 |
|
... | ... |
@@ -81,11 +82,11 @@ class DotScanner(Scanner): |
81 | 82 |
tokens = [ |
82 | 83 |
# whitespace and comments |
83 | 84 |
(SKIP, |
84 |
- br'[ \t\f\r\n\v]+|' |
|
85 |
- br'//[^\r\n]*|' |
|
86 |
- br'/\*.*?\*/|' |
|
87 |
- br'#[^\r\n]*', |
|
88 |
- False), |
|
85 |
+ br'[ \t\f\r\n\v]+|' |
|
86 |
+ br'//[^\r\n]*|' |
|
87 |
+ br'/\*.*?\*/|' |
|
88 |
+ br'#[^\r\n]*', |
|
89 |
+ False), |
|
89 | 90 |
|
90 | 91 |
# Alphanumeric IDs |
91 | 92 |
(ID, br'[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*', True), |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,129 @@ |
1 |
+# Copyright 2008-2015 Jose Fonseca |
|
2 |
+# |
|
3 |
+# This program is free software: you can redistribute it and/or modify it |
|
4 |
+# under the terms of the GNU Lesser General Public License as published |
|
5 |
+# by the Free Software Foundation, either version 3 of the License, or |
|
6 |
+# (at your option) any later version. |
|
7 |
+# |
|
8 |
+# This program is distributed in the hope that it will be useful, |
|
9 |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
11 |
+# GNU Lesser General Public License for more details. |
|
12 |
+# |
|
13 |
+# You should have received a copy of the GNU Lesser General Public License |
|
14 |
+# along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
15 |
+# |
|
16 |
+import re |
|
17 |
+ |
|
18 |
+EOF = -1 |
|
19 |
+SKIP = -2 |
|
20 |
+ |
|
21 |
+ID = 0 |
|
22 |
+STR_ID = 1 |
|
23 |
+HTML_ID = 2 |
|
24 |
+EDGE_OP = 3 |
|
25 |
+ |
|
26 |
+LSQUARE = 4 |
|
27 |
+RSQUARE = 5 |
|
28 |
+LCURLY = 6 |
|
29 |
+RCURLY = 7 |
|
30 |
+COMMA = 8 |
|
31 |
+COLON = 9 |
|
32 |
+SEMI = 10 |
|
33 |
+EQUAL = 11 |
|
34 |
+PLUS = 12 |
|
35 |
+ |
|
36 |
+STRICT = 13 |
|
37 |
+GRAPH = 14 |
|
38 |
+DIGRAPH = 15 |
|
39 |
+NODE = 16 |
|
40 |
+EDGE = 17 |
|
41 |
+SUBGRAPH = 18 |
|
42 |
+ |
|
43 |
+ |
|
44 |
+class Scanner: |
|
45 |
+ """Stateless scanner.""" |
|
46 |
+ |
|
47 |
+ # should be overriden by derived classes |
|
48 |
+ tokens = [] |
|
49 |
+ symbols = {} |
|
50 |
+ literals = {} |
|
51 |
+ ignorecase = False |
|
52 |
+ |
|
53 |
+ def __init__(self): |
|
54 |
+ flags = re.DOTALL |
|
55 |
+ if self.ignorecase: |
|
56 |
+ flags |= re.IGNORECASE |
|
57 |
+ self.tokens_re = re.compile( |
|
58 |
+ b'|'.join([b'(' + regexp + b')' for type, regexp, test_lit in self.tokens]), |
|
59 |
+ flags |
|
60 |
+ ) |
|
61 |
+ |
|
62 |
+ def next(self, buf, pos): |
|
63 |
+ if pos >= len(buf): |
|
64 |
+ return EOF, b'', pos |
|
65 |
+ mo = self.tokens_re.match(buf, pos) |
|
66 |
+ if mo: |
|
67 |
+ text = mo.group() |
|
68 |
+ type, regexp, test_lit = self.tokens[mo.lastindex - 1] |
|
69 |
+ pos = mo.end() |
|
70 |
+ if test_lit: |
|
71 |
+ type = self.literals.get(text, type) |
|
72 |
+ return type, text, pos |
|
73 |
+ else: |
|
74 |
+ c = buf[pos : pos + 1] |
|
75 |
+ return self.symbols.get(c, None), c, pos + 1 |
|
76 |
+ |
|
77 |
+ |
|
78 |
+class DotScanner(Scanner): |
|
79 |
+ |
|
80 |
+ # token regular expression table |
|
81 |
+ tokens = [ |
|
82 |
+ # whitespace and comments |
|
83 |
+ (SKIP, |
|
84 |
+ br'[ \t\f\r\n\v]+|' |
|
85 |
+ br'//[^\r\n]*|' |
|
86 |
+ br'/\*.*?\*/|' |
|
87 |
+ br'#[^\r\n]*', |
|
88 |
+ False), |
|
89 |
+ |
|
90 |
+ # Alphanumeric IDs |
|
91 |
+ (ID, br'[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*', True), |
|
92 |
+ |
|
93 |
+ # Numeric IDs |
|
94 |
+ (ID, br'-?(?:\.[0-9]+|[0-9]+(?:\.[0-9]*)?)', False), |
|
95 |
+ |
|
96 |
+ # String IDs |
|
97 |
+ (STR_ID, br'"[^"\\]*(?:\\.[^"\\]*)*"', False), |
|
98 |
+ |
|
99 |
+ # HTML IDs |
|
100 |
+ (HTML_ID, br'<[^<>]*(?:<[^<>]*>[^<>]*)*>', False), |
|
101 |
+ |
|
102 |
+ # Edge operators |
|
103 |
+ (EDGE_OP, br'-[>-]', False), |
|
104 |
+ ] |
|
105 |
+ |
|
106 |
+ # symbol table |
|
107 |
+ symbols = { |
|
108 |
+ b'[': LSQUARE, |
|
109 |
+ b']': RSQUARE, |
|
110 |
+ b'{': LCURLY, |
|
111 |
+ b'}': RCURLY, |
|
112 |
+ b',': COMMA, |
|
113 |
+ b':': COLON, |
|
114 |
+ b';': SEMI, |
|
115 |
+ b'=': EQUAL, |
|
116 |
+ b'+': PLUS, |
|
117 |
+ } |
|
118 |
+ |
|
119 |
+ # literal table |
|
120 |
+ literals = { |
|
121 |
+ b'strict': STRICT, |
|
122 |
+ b'graph': GRAPH, |
|
123 |
+ b'digraph': DIGRAPH, |
|
124 |
+ b'node': NODE, |
|
125 |
+ b'edge': EDGE, |
|
126 |
+ b'subgraph': SUBGRAPH, |
|
127 |
+ } |
|
128 |
+ |
|
129 |
+ ignorecase = True |