Browse code

Fix most flake8 errors

Peter Hill authored on 02/07/2016 10:27:27 • Jose Fonseca committed on 10/07/2016 08:40:15
Showing 1 changed files
... ...
@@ -44,7 +44,9 @@ class ParseError(Exception):
44 44
         self.col = col
45 45
 
46 46
     def __str__(self):
47
-        return ':'.join([str(part) for part in (self.filename, self.line, self.col, self.msg) if part != None])
47
+        return ':'.join([str(part) for part in
48
+                         (self.filename, self.line, self.col, self.msg)
49
+                         if part is not None])
48 50
 
49 51
 
50 52
 class Lexer:
... ...
@@ -55,7 +57,7 @@ class Lexer:
55 57
 
56 58
     newline_re = re.compile(br'\r\n?|\n')
57 59
 
58
-    def __init__(self, buf = None, pos = 0, filename = None, fp = None):
60
+    def __init__(self, buf=None, pos=0, filename=None, fp=None):
59 61
         if fp is not None:
60 62
             try:
61 63
                 fileno = fp.fileno()
... ...
@@ -69,7 +71,7 @@ class Lexer:
69 71
                 # map the whole file into memory
70 72
                 if length:
71 73
                     # length must not be zero
72
-                    buf = mmap.mmap(fileno, length, access = mmap.ACCESS_READ)
74
+                    buf = mmap.mmap(fileno, length, access=mmap.ACCESS_READ)
73 75
                     pos = os.lseek(fileno, 0, 1)
74 76
                 else:
75 77
                     buf = b''
... ...
@@ -108,7 +110,7 @@ class Lexer:
108 110
                 raise ParseError(msg, self.filename, line, col)
109 111
             else:
110 112
                 break
111
-        return Token(type = type, text = text, line = line, col = col)
113
+        return Token(type=type, text=text, line=line, col=col)
112 114
 
113 115
     def consume(self, text):
114 116
         # update line number
... ...
@@ -124,7 +126,7 @@ class Lexer:
124 126
             if tabpos == -1:
125 127
                 break
126 128
             self.col += tabpos - pos
127
-            self.col = ((self.col - 1)//self.tabsize + 1)*self.tabsize + 1
129
+            self.col = ((self.col - 1) // self.tabsize + 1) * self.tabsize + 1
128 130
             pos = tabpos + 1
129 131
         self.col += len(text) - pos
130 132
 
Browse code

Cleaner splitting into separate modules

Peter Hill authored on 02/07/2016 09:45:05 • Jose Fonseca committed on 10/07/2016 08:40:15
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,158 @@
1
+# Copyright 2008-2015 Jose Fonseca
2
+#
3
+# This program is free software: you can redistribute it and/or modify it
4
+# under the terms of the GNU Lesser General Public License as published
5
+# by the Free Software Foundation, either version 3 of the License, or
6
+# (at your option) any later version.
7
+#
8
+# This program is distributed in the hope that it will be useful,
9
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+# GNU Lesser General Public License for more details.
12
+#
13
+# You should have received a copy of the GNU Lesser General Public License
14
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
+#
16
+import os
17
+import re
18
+
19
+from .scanner import DotScanner
20
+
21
+EOF = -1
22
+SKIP = -2
23
+
24
+ID = 0
25
+STR_ID = 1
26
+HTML_ID = 2
27
+
28
+
29
+class Token:
30
+
31
+    def __init__(self, type, text, line, col):
32
+        self.type = type
33
+        self.text = text
34
+        self.line = line
35
+        self.col = col
36
+
37
+
38
+class ParseError(Exception):
39
+
40
+    def __init__(self, msg=None, filename=None, line=None, col=None):
41
+        self.msg = msg
42
+        self.filename = filename
43
+        self.line = line
44
+        self.col = col
45
+
46
+    def __str__(self):
47
+        return ':'.join([str(part) for part in (self.filename, self.line, self.col, self.msg) if part != None])
48
+
49
+
50
+class Lexer:
51
+
52
+    # should be overriden by derived classes
53
+    scanner = None
54
+    tabsize = 8
55
+
56
+    newline_re = re.compile(br'\r\n?|\n')
57
+
58
+    def __init__(self, buf = None, pos = 0, filename = None, fp = None):
59
+        if fp is not None:
60
+            try:
61
+                fileno = fp.fileno()
62
+                length = os.path.getsize(fp.name)
63
+                import mmap
64
+            except:
65
+                # read whole file into memory
66
+                buf = fp.read()
67
+                pos = 0
68
+            else:
69
+                # map the whole file into memory
70
+                if length:
71
+                    # length must not be zero
72
+                    buf = mmap.mmap(fileno, length, access = mmap.ACCESS_READ)
73
+                    pos = os.lseek(fileno, 0, 1)
74
+                else:
75
+                    buf = b''
76
+                    pos = 0
77
+
78
+            if filename is None:
79
+                try:
80
+                    filename = fp.name
81
+                except AttributeError:
82
+                    filename = None
83
+
84
+        self.buf = buf
85
+        self.pos = pos
86
+        self.line = 1
87
+        self.col = 1
88
+        self.filename = filename
89
+
90
+    def __next__(self):
91
+        while True:
92
+            # save state
93
+            pos = self.pos
94
+            line = self.line
95
+            col = self.col
96
+
97
+            type, text, endpos = self.scanner.next(self.buf, pos)
98
+            assert isinstance(text, bytes)
99
+            assert pos + len(text) == endpos
100
+            self.consume(text)
101
+            type, text = self.filter(type, text)
102
+            self.pos = endpos
103
+
104
+            if type == SKIP:
105
+                continue
106
+            elif type is None:
107
+                msg = 'unexpected char %r' % (text,)
108
+                raise ParseError(msg, self.filename, line, col)
109
+            else:
110
+                break
111
+        return Token(type = type, text = text, line = line, col = col)
112
+
113
+    def consume(self, text):
114
+        # update line number
115
+        pos = 0
116
+        for mo in self.newline_re.finditer(text, pos):
117
+            self.line += 1
118
+            self.col = 1
119
+            pos = mo.end()
120
+
121
+        # update column number
122
+        while True:
123
+            tabpos = text.find(b'\t', pos)
124
+            if tabpos == -1:
125
+                break
126
+            self.col += tabpos - pos
127
+            self.col = ((self.col - 1)//self.tabsize + 1)*self.tabsize + 1
128
+            pos = tabpos + 1
129
+        self.col += len(text) - pos
130
+
131
+
132
+class DotLexer(Lexer):
133
+
134
+    scanner = DotScanner()
135
+
136
+    def filter(self, type, text):
137
+        # TODO: handle charset
138
+        if type == STR_ID:
139
+            text = text[1:-1]
140
+
141
+            # line continuations
142
+            text = text.replace(b'\\\r\n', b'')
143
+            text = text.replace(b'\\\r', b'')
144
+            text = text.replace(b'\\\n', b'')
145
+
146
+            # quotes
147
+            text = text.replace(b'\\"', b'"')
148
+
149
+            # layout engines recognize other escape codes (many non-standard)
150
+            # but we don't translate them here
151
+
152
+            type = ID
153
+
154
+        elif type == HTML_ID:
155
+            text = text[1:-1]
156
+            type = ID
157
+
158
+        return type, text