... | ... |
@@ -44,7 +44,9 @@ class ParseError(Exception): |
44 | 44 |
self.col = col |
45 | 45 |
|
46 | 46 |
def __str__(self): |
47 |
- return ':'.join([str(part) for part in (self.filename, self.line, self.col, self.msg) if part != None]) |
|
47 |
+ return ':'.join([str(part) for part in |
|
48 |
+ (self.filename, self.line, self.col, self.msg) |
|
49 |
+ if part is not None]) |
|
48 | 50 |
|
49 | 51 |
|
50 | 52 |
class Lexer: |
... | ... |
@@ -55,7 +57,7 @@ class Lexer: |
55 | 57 |
|
56 | 58 |
newline_re = re.compile(br'\r\n?|\n') |
57 | 59 |
|
58 |
- def __init__(self, buf = None, pos = 0, filename = None, fp = None): |
|
60 |
+ def __init__(self, buf=None, pos=0, filename=None, fp=None): |
|
59 | 61 |
if fp is not None: |
60 | 62 |
try: |
61 | 63 |
fileno = fp.fileno() |
... | ... |
@@ -69,7 +71,7 @@ class Lexer: |
69 | 71 |
# map the whole file into memory |
70 | 72 |
if length: |
71 | 73 |
# length must not be zero |
72 |
- buf = mmap.mmap(fileno, length, access = mmap.ACCESS_READ) |
|
74 |
+ buf = mmap.mmap(fileno, length, access=mmap.ACCESS_READ) |
|
73 | 75 |
pos = os.lseek(fileno, 0, 1) |
74 | 76 |
else: |
75 | 77 |
buf = b'' |
... | ... |
@@ -108,7 +110,7 @@ class Lexer: |
108 | 110 |
raise ParseError(msg, self.filename, line, col) |
109 | 111 |
else: |
110 | 112 |
break |
111 |
- return Token(type = type, text = text, line = line, col = col) |
|
113 |
+ return Token(type=type, text=text, line=line, col=col) |
|
112 | 114 |
|
113 | 115 |
def consume(self, text): |
114 | 116 |
# update line number |
... | ... |
@@ -124,7 +126,7 @@ class Lexer: |
124 | 126 |
if tabpos == -1: |
125 | 127 |
break |
126 | 128 |
self.col += tabpos - pos |
127 |
- self.col = ((self.col - 1)//self.tabsize + 1)*self.tabsize + 1 |
|
129 |
+ self.col = ((self.col - 1) // self.tabsize + 1) * self.tabsize + 1 |
|
128 | 130 |
pos = tabpos + 1 |
129 | 131 |
self.col += len(text) - pos |
130 | 132 |
|
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,158 @@ |
1 |
+# Copyright 2008-2015 Jose Fonseca |
|
2 |
+# |
|
3 |
+# This program is free software: you can redistribute it and/or modify it |
|
4 |
+# under the terms of the GNU Lesser General Public License as published |
|
5 |
+# by the Free Software Foundation, either version 3 of the License, or |
|
6 |
+# (at your option) any later version. |
|
7 |
+# |
|
8 |
+# This program is distributed in the hope that it will be useful, |
|
9 |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
11 |
+# GNU Lesser General Public License for more details. |
|
12 |
+# |
|
13 |
+# You should have received a copy of the GNU Lesser General Public License |
|
14 |
+# along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
15 |
+# |
|
16 |
+import os |
|
17 |
+import re |
|
18 |
+ |
|
19 |
+from .scanner import DotScanner |
|
20 |
+ |
|
21 |
+EOF = -1 |
|
22 |
+SKIP = -2 |
|
23 |
+ |
|
24 |
+ID = 0 |
|
25 |
+STR_ID = 1 |
|
26 |
+HTML_ID = 2 |
|
27 |
+ |
|
28 |
+ |
|
29 |
+class Token: |
|
30 |
+ |
|
31 |
+ def __init__(self, type, text, line, col): |
|
32 |
+ self.type = type |
|
33 |
+ self.text = text |
|
34 |
+ self.line = line |
|
35 |
+ self.col = col |
|
36 |
+ |
|
37 |
+ |
|
38 |
+class ParseError(Exception): |
|
39 |
+ |
|
40 |
+ def __init__(self, msg=None, filename=None, line=None, col=None): |
|
41 |
+ self.msg = msg |
|
42 |
+ self.filename = filename |
|
43 |
+ self.line = line |
|
44 |
+ self.col = col |
|
45 |
+ |
|
46 |
+ def __str__(self): |
|
47 |
+ return ':'.join([str(part) for part in (self.filename, self.line, self.col, self.msg) if part != None]) |
|
48 |
+ |
|
49 |
+ |
|
50 |
+class Lexer: |
|
51 |
+ |
|
52 |
+ # should be overriden by derived classes |
|
53 |
+ scanner = None |
|
54 |
+ tabsize = 8 |
|
55 |
+ |
|
56 |
+ newline_re = re.compile(br'\r\n?|\n') |
|
57 |
+ |
|
58 |
+ def __init__(self, buf = None, pos = 0, filename = None, fp = None): |
|
59 |
+ if fp is not None: |
|
60 |
+ try: |
|
61 |
+ fileno = fp.fileno() |
|
62 |
+ length = os.path.getsize(fp.name) |
|
63 |
+ import mmap |
|
64 |
+ except: |
|
65 |
+ # read whole file into memory |
|
66 |
+ buf = fp.read() |
|
67 |
+ pos = 0 |
|
68 |
+ else: |
|
69 |
+ # map the whole file into memory |
|
70 |
+ if length: |
|
71 |
+ # length must not be zero |
|
72 |
+ buf = mmap.mmap(fileno, length, access = mmap.ACCESS_READ) |
|
73 |
+ pos = os.lseek(fileno, 0, 1) |
|
74 |
+ else: |
|
75 |
+ buf = b'' |
|
76 |
+ pos = 0 |
|
77 |
+ |
|
78 |
+ if filename is None: |
|
79 |
+ try: |
|
80 |
+ filename = fp.name |
|
81 |
+ except AttributeError: |
|
82 |
+ filename = None |
|
83 |
+ |
|
84 |
+ self.buf = buf |
|
85 |
+ self.pos = pos |
|
86 |
+ self.line = 1 |
|
87 |
+ self.col = 1 |
|
88 |
+ self.filename = filename |
|
89 |
+ |
|
90 |
+ def __next__(self): |
|
91 |
+ while True: |
|
92 |
+ # save state |
|
93 |
+ pos = self.pos |
|
94 |
+ line = self.line |
|
95 |
+ col = self.col |
|
96 |
+ |
|
97 |
+ type, text, endpos = self.scanner.next(self.buf, pos) |
|
98 |
+ assert isinstance(text, bytes) |
|
99 |
+ assert pos + len(text) == endpos |
|
100 |
+ self.consume(text) |
|
101 |
+ type, text = self.filter(type, text) |
|
102 |
+ self.pos = endpos |
|
103 |
+ |
|
104 |
+ if type == SKIP: |
|
105 |
+ continue |
|
106 |
+ elif type is None: |
|
107 |
+ msg = 'unexpected char %r' % (text,) |
|
108 |
+ raise ParseError(msg, self.filename, line, col) |
|
109 |
+ else: |
|
110 |
+ break |
|
111 |
+ return Token(type = type, text = text, line = line, col = col) |
|
112 |
+ |
|
113 |
+ def consume(self, text): |
|
114 |
+ # update line number |
|
115 |
+ pos = 0 |
|
116 |
+ for mo in self.newline_re.finditer(text, pos): |
|
117 |
+ self.line += 1 |
|
118 |
+ self.col = 1 |
|
119 |
+ pos = mo.end() |
|
120 |
+ |
|
121 |
+ # update column number |
|
122 |
+ while True: |
|
123 |
+ tabpos = text.find(b'\t', pos) |
|
124 |
+ if tabpos == -1: |
|
125 |
+ break |
|
126 |
+ self.col += tabpos - pos |
|
127 |
+ self.col = ((self.col - 1)//self.tabsize + 1)*self.tabsize + 1 |
|
128 |
+ pos = tabpos + 1 |
|
129 |
+ self.col += len(text) - pos |
|
130 |
+ |
|
131 |
+ |
|
132 |
+class DotLexer(Lexer): |
|
133 |
+ |
|
134 |
+ scanner = DotScanner() |
|
135 |
+ |
|
136 |
+ def filter(self, type, text): |
|
137 |
+ # TODO: handle charset |
|
138 |
+ if type == STR_ID: |
|
139 |
+ text = text[1:-1] |
|
140 |
+ |
|
141 |
+ # line continuations |
|
142 |
+ text = text.replace(b'\\\r\n', b'') |
|
143 |
+ text = text.replace(b'\\\r', b'') |
|
144 |
+ text = text.replace(b'\\\n', b'') |
|
145 |
+ |
|
146 |
+ # quotes |
|
147 |
+ text = text.replace(b'\\"', b'"') |
|
148 |
+ |
|
149 |
+ # layout engines recognize other escape codes (many non-standard) |
|
150 |
+ # but we don't translate them here |
|
151 |
+ |
|
152 |
+ type = ID |
|
153 |
+ |
|
154 |
+ elif type == HTML_ID: |
|
155 |
+ text = text[1:-1] |
|
156 |
+ type = ID |
|
157 |
+ |
|
158 |
+ return type, text |