This is because several fields in xdot have number of bytes, not
characters.
... | ... |
@@ -80,7 +80,7 @@ def main(): |
80 | 80 |
sys.stdout.write(arg + '\n') |
81 | 81 |
sys.stdout.flush() |
82 | 82 |
name, ext = os.path.splitext(os.path.basename(arg)) |
83 |
- dotcode = open(arg).read() |
|
83 |
+ dotcode = open(arg, 'rb').read() |
|
84 | 84 |
widget = TestDotWidget(name) |
85 | 85 |
window = DotWindow(widget) |
86 | 86 |
window.connect('delete-event', Gtk.main_quit) |
... | ... |
@@ -564,13 +564,17 @@ class XDotAttrParser: |
564 | 564 |
return self.pos < len(self.buf) |
565 | 565 |
|
566 | 566 |
def read_code(self): |
567 |
- pos = self.buf.find(" ", self.pos) |
|
567 |
+ pos = self.buf.find(b" ", self.pos) |
|
568 | 568 |
res = self.buf[self.pos:pos] |
569 | 569 |
self.pos = pos + 1 |
570 |
- while self.pos < len(self.buf) and self.buf[self.pos].isspace(): |
|
571 |
- self.pos += 1 |
|
570 |
+ self.skip_space() |
|
571 |
+ res = res.decode('utf-8') |
|
572 | 572 |
return res |
573 | 573 |
|
574 |
+ def skip_space(self): |
|
575 |
+ while self.pos < len(self.buf) and self.buf[self.pos : self.pos + 1].isspace(): |
|
576 |
+ self.pos += 1 |
|
577 |
+ |
|
574 | 578 |
def read_int(self): |
575 | 579 |
return int(self.read_code()) |
576 | 580 |
|
... | ... |
@@ -584,11 +588,11 @@ class XDotAttrParser: |
584 | 588 |
|
585 | 589 |
def read_text(self): |
586 | 590 |
num = self.read_int() |
587 |
- pos = self.buf.find("-", self.pos) + 1 |
|
591 |
+ pos = self.buf.find(b"-", self.pos) + 1 |
|
588 | 592 |
self.pos = pos + num |
589 | 593 |
res = self.buf[pos:self.pos] |
590 |
- while self.pos < len(self.buf) and self.buf[self.pos].isspace(): |
|
591 |
- self.pos += 1 |
|
594 |
+ self.skip_space() |
|
595 |
+ res = res.decode('utf-8') |
|
592 | 596 |
return res |
593 | 597 |
|
594 | 598 |
def read_polygon(self): |
... | ... |
@@ -819,13 +823,13 @@ class Scanner: |
819 | 823 |
if self.ignorecase: |
820 | 824 |
flags |= re.IGNORECASE |
821 | 825 |
self.tokens_re = re.compile( |
822 |
- '|'.join(['(' + regexp + ')' for type, regexp, test_lit in self.tokens]), |
|
826 |
+ b'|'.join([b'(' + regexp + b')' for type, regexp, test_lit in self.tokens]), |
|
823 | 827 |
flags |
824 | 828 |
) |
825 | 829 |
|
826 | 830 |
def next(self, buf, pos): |
827 | 831 |
if pos >= len(buf): |
828 |
- return EOF, '', pos |
|
832 |
+ return EOF, b'', pos |
|
829 | 833 |
mo = self.tokens_re.match(buf, pos) |
830 | 834 |
if mo: |
831 | 835 |
text = mo.group() |
... | ... |
@@ -835,7 +839,7 @@ class Scanner: |
835 | 839 |
type = self.literals.get(text, type) |
836 | 840 |
return type, text, pos |
837 | 841 |
else: |
838 |
- c = buf[pos] |
|
842 |
+ c = buf[pos : pos + 1] |
|
839 | 843 |
return self.symbols.get(c, None), c, pos + 1 |
840 | 844 |
|
841 | 845 |
|
... | ... |
@@ -854,7 +858,7 @@ class Lexer: |
854 | 858 |
scanner = None |
855 | 859 |
tabsize = 8 |
856 | 860 |
|
857 |
- newline_re = re.compile(r'\r\n?|\n') |
|
861 |
+ newline_re = re.compile(br'\r\n?|\n') |
|
858 | 862 |
|
859 | 863 |
def __init__(self, buf = None, pos = 0, filename = None, fp = None): |
860 | 864 |
if fp is not None: |
... | ... |
@@ -873,7 +877,7 @@ class Lexer: |
873 | 877 |
buf = mmap.mmap(fileno, length, access = mmap.ACCESS_READ) |
874 | 878 |
pos = os.lseek(fileno, 0, 1) |
875 | 879 |
else: |
876 |
- buf = '' |
|
880 |
+ buf = b'' |
|
877 | 881 |
pos = 0 |
878 | 882 |
|
879 | 883 |
if filename is None: |
... | ... |
@@ -896,6 +900,7 @@ class Lexer: |
896 | 900 |
col = self.col |
897 | 901 |
|
898 | 902 |
type, text, endpos = self.scanner.next(self.buf, pos) |
903 |
+ assert isinstance(text, bytes) |
|
899 | 904 |
assert pos + len(text) == endpos |
900 | 905 |
self.consume(text) |
901 | 906 |
type, text = self.filter(type, text) |
... | ... |
@@ -904,11 +909,7 @@ class Lexer: |
904 | 909 |
if type == SKIP: |
905 | 910 |
continue |
906 | 911 |
elif type is None: |
907 |
- msg = 'unexpected char ' |
|
908 |
- if text >= ' ' and text <= '~': |
|
909 |
- msg += "'%s'" % text |
|
910 |
- else: |
|
911 |
- msg += "0x%X" % ord(text) |
|
912 |
+ msg = 'unexpected char %r' % (text,) |
|
912 | 913 |
raise ParseError(msg, self.filename, line, col) |
913 | 914 |
else: |
914 | 915 |
break |
... | ... |
@@ -924,7 +925,7 @@ class Lexer: |
924 | 925 |
|
925 | 926 |
# update column number |
926 | 927 |
while True: |
927 |
- tabpos = text.find('\t', pos) |
|
928 |
+ tabpos = text.find(b'\t', pos) |
|
928 | 929 |
if tabpos == -1: |
929 | 930 |
break |
930 | 931 |
self.col += tabpos - pos |
... | ... |
@@ -986,49 +987,49 @@ class DotScanner(Scanner): |
986 | 987 |
tokens = [ |
987 | 988 |
# whitespace and comments |
988 | 989 |
(SKIP, |
989 |
- r'[ \t\f\r\n\v]+|' |
|
990 |
- r'//[^\r\n]*|' |
|
991 |
- r'/\*.*?\*/|' |
|
992 |
- r'#[^\r\n]*', |
|
990 |
+ br'[ \t\f\r\n\v]+|' |
|
991 |
+ br'//[^\r\n]*|' |
|
992 |
+ br'/\*.*?\*/|' |
|
993 |
+ br'#[^\r\n]*', |
|
993 | 994 |
False), |
994 | 995 |
|
995 | 996 |
# Alphanumeric IDs |
996 |
- (ID, r'[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*', True), |
|
997 |
+ (ID, br'[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*', True), |
|
997 | 998 |
|
998 | 999 |
# Numeric IDs |
999 |
- (ID, r'-?(?:\.[0-9]+|[0-9]+(?:\.[0-9]*)?)', False), |
|
1000 |
+ (ID, br'-?(?:\.[0-9]+|[0-9]+(?:\.[0-9]*)?)', False), |
|
1000 | 1001 |
|
1001 | 1002 |
# String IDs |
1002 |
- (STR_ID, r'"[^"\\]*(?:\\.[^"\\]*)*"', False), |
|
1003 |
+ (STR_ID, br'"[^"\\]*(?:\\.[^"\\]*)*"', False), |
|
1003 | 1004 |
|
1004 | 1005 |
# HTML IDs |
1005 |
- (HTML_ID, r'<[^<>]*(?:<[^<>]*>[^<>]*)*>', False), |
|
1006 |
+ (HTML_ID, br'<[^<>]*(?:<[^<>]*>[^<>]*)*>', False), |
|
1006 | 1007 |
|
1007 | 1008 |
# Edge operators |
1008 |
- (EDGE_OP, r'-[>-]', False), |
|
1009 |
+ (EDGE_OP, br'-[>-]', False), |
|
1009 | 1010 |
] |
1010 | 1011 |
|
1011 | 1012 |
# symbol table |
1012 | 1013 |
symbols = { |
1013 |
- '[': LSQUARE, |
|
1014 |
- ']': RSQUARE, |
|
1015 |
- '{': LCURLY, |
|
1016 |
- '}': RCURLY, |
|
1017 |
- ',': COMMA, |
|
1018 |
- ':': COLON, |
|
1019 |
- ';': SEMI, |
|
1020 |
- '=': EQUAL, |
|
1021 |
- '+': PLUS, |
|
1014 |
+ b'[': LSQUARE, |
|
1015 |
+ b']': RSQUARE, |
|
1016 |
+ b'{': LCURLY, |
|
1017 |
+ b'}': RCURLY, |
|
1018 |
+ b',': COMMA, |
|
1019 |
+ b':': COLON, |
|
1020 |
+ b';': SEMI, |
|
1021 |
+ b'=': EQUAL, |
|
1022 |
+ b'+': PLUS, |
|
1022 | 1023 |
} |
1023 | 1024 |
|
1024 | 1025 |
# literal table |
1025 | 1026 |
literals = { |
1026 |
- 'strict': STRICT, |
|
1027 |
- 'graph': GRAPH, |
|
1028 |
- 'digraph': DIGRAPH, |
|
1029 |
- 'node': NODE, |
|
1030 |
- 'edge': EDGE, |
|
1031 |
- 'subgraph': SUBGRAPH, |
|
1027 |
+ b'strict': STRICT, |
|
1028 |
+ b'graph': GRAPH, |
|
1029 |
+ b'digraph': DIGRAPH, |
|
1030 |
+ b'node': NODE, |
|
1031 |
+ b'edge': EDGE, |
|
1032 |
+ b'subgraph': SUBGRAPH, |
|
1032 | 1033 |
} |
1033 | 1034 |
|
1034 | 1035 |
ignorecase = True |
... | ... |
@@ -1044,12 +1045,12 @@ class DotLexer(Lexer): |
1044 | 1045 |
text = text[1:-1] |
1045 | 1046 |
|
1046 | 1047 |
# line continuations |
1047 |
- text = text.replace('\\\r\n', '') |
|
1048 |
- text = text.replace('\\\r', '') |
|
1049 |
- text = text.replace('\\\n', '') |
|
1048 |
+ text = text.replace(b'\\\r\n', b'') |
|
1049 |
+ text = text.replace(b'\\\r', b'') |
|
1050 |
+ text = text.replace(b'\\\n', b'') |
|
1050 | 1051 |
|
1051 | 1052 |
# quotes |
1052 |
- text = text.replace('\\"', '"') |
|
1053 |
+ text = text.replace(b'\\"', b'"') |
|
1053 | 1054 |
|
1054 | 1055 |
# layout engines recognize other escape codes (many non-standard) |
1055 | 1056 |
# but we don't translate them here |
... | ... |
@@ -1137,6 +1138,7 @@ class DotParser(Parser): |
1137 | 1138 |
self.consume() |
1138 | 1139 |
while self.lookahead.type != RSQUARE: |
1139 | 1140 |
name, value = self.parse_attr() |
1141 |
+ name = name.decode('utf-8') |
|
1140 | 1142 |
attrs[name] = value |
1141 | 1143 |
if self.lookahead.type == COMMA: |
1142 | 1144 |
self.consume() |
... | ... |
@@ -1149,7 +1151,7 @@ class DotParser(Parser): |
1149 | 1151 |
self.consume() |
1150 | 1152 |
value = self.parse_id() |
1151 | 1153 |
else: |
1152 |
- value = 'true' |
|
1154 |
+ value = b'true' |
|
1153 | 1155 |
return name, value |
1154 | 1156 |
|
1155 | 1157 |
def parse_node_id(self): |
... | ... |
@@ -1218,7 +1220,7 @@ class XDotParser(DotParser): |
1218 | 1220 |
return |
1219 | 1221 |
|
1220 | 1222 |
if bb: |
1221 |
- xmin, ymin, xmax, ymax = map(float, bb.split(",")) |
|
1223 |
+ xmin, ymin, xmax, ymax = map(float, bb.split(b",")) |
|
1222 | 1224 |
|
1223 | 1225 |
self.xoffset = -xmin |
1224 | 1226 |
self.yoffset = -ymax |
... | ... |
@@ -1275,17 +1277,16 @@ class XDotParser(DotParser): |
1275 | 1277 |
|
1276 | 1278 |
def parse(self): |
1277 | 1279 |
DotParser.parse(self) |
1278 |
- |
|
1279 | 1280 |
return Graph(self.width, self.height, self.shapes, self.nodes, self.edges) |
1280 | 1281 |
|
1281 | 1282 |
def parse_node_pos(self, pos): |
1282 |
- x, y = pos.split(",") |
|
1283 |
+ x, y = pos.split(b",") |
|
1283 | 1284 |
return self.transform(float(x), float(y)) |
1284 | 1285 |
|
1285 | 1286 |
def parse_edge_pos(self, pos): |
1286 | 1287 |
points = [] |
1287 |
- for entry in pos.split(' '): |
|
1288 |
- fields = entry.split(',') |
|
1288 |
+ for entry in pos.split(b' '): |
|
1289 |
+ fields = entry.split(b',') |
|
1289 | 1290 |
try: |
1290 | 1291 |
x, y = fields |
1291 | 1292 |
except ValueError: |
... | ... |
@@ -1567,7 +1568,7 @@ class DotWidget(Gtk.DrawingArea): |
1567 | 1568 |
stdout=subprocess.PIPE, |
1568 | 1569 |
stderr=subprocess.PIPE, |
1569 | 1570 |
shell=False, |
1570 |
- universal_newlines=True |
|
1571 |
+ universal_newlines=False |
|
1571 | 1572 |
) |
1572 | 1573 |
except OSError as exc: |
1573 | 1574 |
error = '%s: %s' % (self.filter, exc.strerror) |
... | ... |
@@ -1576,6 +1577,7 @@ class DotWidget(Gtk.DrawingArea): |
1576 | 1577 |
xdotcode, error = p.communicate(dotcode) |
1577 | 1578 |
error = error.rstrip() |
1578 | 1579 |
if error: |
1580 |
+ error = error.decode() |
|
1579 | 1581 |
sys.stderr.write(error + '\n') |
1580 | 1582 |
if p.returncode != 0: |
1581 | 1583 |
dialog = Gtk.MessageDialog(type=Gtk.MessageType.ERROR, |
... | ... |
@@ -1589,6 +1591,8 @@ class DotWidget(Gtk.DrawingArea): |
1589 | 1591 |
|
1590 | 1592 |
def set_dotcode(self, dotcode, filename=None): |
1591 | 1593 |
self.openfilename = None |
1594 |
+ if isinstance(dotcode, str): |
|
1595 |
+ dotcode = dotcode.encode('utf-8') |
|
1592 | 1596 |
xdotcode = self.run_filter(dotcode) |
1593 | 1597 |
if xdotcode is None: |
1594 | 1598 |
return False |
... | ... |
@@ -1611,6 +1615,7 @@ class DotWidget(Gtk.DrawingArea): |
1611 | 1615 |
return True |
1612 | 1616 |
|
1613 | 1617 |
def set_xdotcode(self, xdotcode): |
1618 |
+ assert isinstance(xdotcode, bytes) |
|
1614 | 1619 |
parser = XDotParser(xdotcode) |
1615 | 1620 |
self.graph = parser.parse() |
1616 | 1621 |
self.zoom_image(self.zoom_ratio, center=True) |