This is because several fields in xdot have number of bytes, not
characters.
| ... | ... |
@@ -80,7 +80,7 @@ def main(): |
| 80 | 80 |
sys.stdout.write(arg + '\n') |
| 81 | 81 |
sys.stdout.flush() |
| 82 | 82 |
name, ext = os.path.splitext(os.path.basename(arg)) |
| 83 |
- dotcode = open(arg).read() |
|
| 83 |
+ dotcode = open(arg, 'rb').read() |
|
| 84 | 84 |
widget = TestDotWidget(name) |
| 85 | 85 |
window = DotWindow(widget) |
| 86 | 86 |
window.connect('delete-event', Gtk.main_quit)
|
| ... | ... |
@@ -564,13 +564,17 @@ class XDotAttrParser: |
| 564 | 564 |
return self.pos < len(self.buf) |
| 565 | 565 |
|
| 566 | 566 |
def read_code(self): |
| 567 |
- pos = self.buf.find(" ", self.pos)
|
|
| 567 |
+ pos = self.buf.find(b" ", self.pos) |
|
| 568 | 568 |
res = self.buf[self.pos:pos] |
| 569 | 569 |
self.pos = pos + 1 |
| 570 |
- while self.pos < len(self.buf) and self.buf[self.pos].isspace(): |
|
| 571 |
- self.pos += 1 |
|
| 570 |
+ self.skip_space() |
|
| 571 |
+ res = res.decode('utf-8')
|
|
| 572 | 572 |
return res |
| 573 | 573 |
|
| 574 |
+ def skip_space(self): |
|
| 575 |
+ while self.pos < len(self.buf) and self.buf[self.pos : self.pos + 1].isspace(): |
|
| 576 |
+ self.pos += 1 |
|
| 577 |
+ |
|
| 574 | 578 |
def read_int(self): |
| 575 | 579 |
return int(self.read_code()) |
| 576 | 580 |
|
| ... | ... |
@@ -584,11 +588,11 @@ class XDotAttrParser: |
| 584 | 588 |
|
| 585 | 589 |
def read_text(self): |
| 586 | 590 |
num = self.read_int() |
| 587 |
- pos = self.buf.find("-", self.pos) + 1
|
|
| 591 |
+ pos = self.buf.find(b"-", self.pos) + 1 |
|
| 588 | 592 |
self.pos = pos + num |
| 589 | 593 |
res = self.buf[pos:self.pos] |
| 590 |
- while self.pos < len(self.buf) and self.buf[self.pos].isspace(): |
|
| 591 |
- self.pos += 1 |
|
| 594 |
+ self.skip_space() |
|
| 595 |
+ res = res.decode('utf-8')
|
|
| 592 | 596 |
return res |
| 593 | 597 |
|
| 594 | 598 |
def read_polygon(self): |
| ... | ... |
@@ -819,13 +823,13 @@ class Scanner: |
| 819 | 823 |
if self.ignorecase: |
| 820 | 824 |
flags |= re.IGNORECASE |
| 821 | 825 |
self.tokens_re = re.compile( |
| 822 |
- '|'.join(['(' + regexp + ')' for type, regexp, test_lit in self.tokens]),
|
|
| 826 |
+ b'|'.join([b'(' + regexp + b')' for type, regexp, test_lit in self.tokens]),
|
|
| 823 | 827 |
flags |
| 824 | 828 |
) |
| 825 | 829 |
|
| 826 | 830 |
def next(self, buf, pos): |
| 827 | 831 |
if pos >= len(buf): |
| 828 |
- return EOF, '', pos |
|
| 832 |
+ return EOF, b'', pos |
|
| 829 | 833 |
mo = self.tokens_re.match(buf, pos) |
| 830 | 834 |
if mo: |
| 831 | 835 |
text = mo.group() |
| ... | ... |
@@ -835,7 +839,7 @@ class Scanner: |
| 835 | 839 |
type = self.literals.get(text, type) |
| 836 | 840 |
return type, text, pos |
| 837 | 841 |
else: |
| 838 |
- c = buf[pos] |
|
| 842 |
+ c = buf[pos : pos + 1] |
|
| 839 | 843 |
return self.symbols.get(c, None), c, pos + 1 |
| 840 | 844 |
|
| 841 | 845 |
|
| ... | ... |
@@ -854,7 +858,7 @@ class Lexer: |
| 854 | 858 |
scanner = None |
| 855 | 859 |
tabsize = 8 |
| 856 | 860 |
|
| 857 |
- newline_re = re.compile(r'\r\n?|\n') |
|
| 861 |
+ newline_re = re.compile(br'\r\n?|\n') |
|
| 858 | 862 |
|
| 859 | 863 |
def __init__(self, buf = None, pos = 0, filename = None, fp = None): |
| 860 | 864 |
if fp is not None: |
| ... | ... |
@@ -873,7 +877,7 @@ class Lexer: |
| 873 | 877 |
buf = mmap.mmap(fileno, length, access = mmap.ACCESS_READ) |
| 874 | 878 |
pos = os.lseek(fileno, 0, 1) |
| 875 | 879 |
else: |
| 876 |
- buf = '' |
|
| 880 |
+ buf = b'' |
|
| 877 | 881 |
pos = 0 |
| 878 | 882 |
|
| 879 | 883 |
if filename is None: |
| ... | ... |
@@ -896,6 +900,7 @@ class Lexer: |
| 896 | 900 |
col = self.col |
| 897 | 901 |
|
| 898 | 902 |
type, text, endpos = self.scanner.next(self.buf, pos) |
| 903 |
+ assert isinstance(text, bytes) |
|
| 899 | 904 |
assert pos + len(text) == endpos |
| 900 | 905 |
self.consume(text) |
| 901 | 906 |
type, text = self.filter(type, text) |
| ... | ... |
@@ -904,11 +909,7 @@ class Lexer: |
| 904 | 909 |
if type == SKIP: |
| 905 | 910 |
continue |
| 906 | 911 |
elif type is None: |
| 907 |
- msg = 'unexpected char ' |
|
| 908 |
- if text >= ' ' and text <= '~': |
|
| 909 |
- msg += "'%s'" % text |
|
| 910 |
- else: |
|
| 911 |
- msg += "0x%X" % ord(text) |
|
| 912 |
+ msg = 'unexpected char %r' % (text,) |
|
| 912 | 913 |
raise ParseError(msg, self.filename, line, col) |
| 913 | 914 |
else: |
| 914 | 915 |
break |
| ... | ... |
@@ -924,7 +925,7 @@ class Lexer: |
| 924 | 925 |
|
| 925 | 926 |
# update column number |
| 926 | 927 |
while True: |
| 927 |
- tabpos = text.find('\t', pos)
|
|
| 928 |
+ tabpos = text.find(b'\t', pos) |
|
| 928 | 929 |
if tabpos == -1: |
| 929 | 930 |
break |
| 930 | 931 |
self.col += tabpos - pos |
| ... | ... |
@@ -986,49 +987,49 @@ class DotScanner(Scanner): |
| 986 | 987 |
tokens = [ |
| 987 | 988 |
# whitespace and comments |
| 988 | 989 |
(SKIP, |
| 989 |
- r'[ \t\f\r\n\v]+|' |
|
| 990 |
- r'//[^\r\n]*|' |
|
| 991 |
- r'/\*.*?\*/|' |
|
| 992 |
- r'#[^\r\n]*', |
|
| 990 |
+ br'[ \t\f\r\n\v]+|' |
|
| 991 |
+ br'//[^\r\n]*|' |
|
| 992 |
+ br'/\*.*?\*/|' |
|
| 993 |
+ br'#[^\r\n]*', |
|
| 993 | 994 |
False), |
| 994 | 995 |
|
| 995 | 996 |
# Alphanumeric IDs |
| 996 |
- (ID, r'[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*', True), |
|
| 997 |
+ (ID, br'[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*', True), |
|
| 997 | 998 |
|
| 998 | 999 |
# Numeric IDs |
| 999 |
- (ID, r'-?(?:\.[0-9]+|[0-9]+(?:\.[0-9]*)?)', False), |
|
| 1000 |
+ (ID, br'-?(?:\.[0-9]+|[0-9]+(?:\.[0-9]*)?)', False), |
|
| 1000 | 1001 |
|
| 1001 | 1002 |
# String IDs |
| 1002 |
- (STR_ID, r'"[^"\\]*(?:\\.[^"\\]*)*"', False), |
|
| 1003 |
+ (STR_ID, br'"[^"\\]*(?:\\.[^"\\]*)*"', False), |
|
| 1003 | 1004 |
|
| 1004 | 1005 |
# HTML IDs |
| 1005 |
- (HTML_ID, r'<[^<>]*(?:<[^<>]*>[^<>]*)*>', False), |
|
| 1006 |
+ (HTML_ID, br'<[^<>]*(?:<[^<>]*>[^<>]*)*>', False), |
|
| 1006 | 1007 |
|
| 1007 | 1008 |
# Edge operators |
| 1008 |
- (EDGE_OP, r'-[>-]', False), |
|
| 1009 |
+ (EDGE_OP, br'-[>-]', False), |
|
| 1009 | 1010 |
] |
| 1010 | 1011 |
|
| 1011 | 1012 |
# symbol table |
| 1012 | 1013 |
symbols = {
|
| 1013 |
- '[': LSQUARE, |
|
| 1014 |
- ']': RSQUARE, |
|
| 1015 |
- '{': LCURLY,
|
|
| 1016 |
- '}': RCURLY, |
|
| 1017 |
- ',': COMMA, |
|
| 1018 |
- ':': COLON, |
|
| 1019 |
- ';': SEMI, |
|
| 1020 |
- '=': EQUAL, |
|
| 1021 |
- '+': PLUS, |
|
| 1014 |
+ b'[': LSQUARE, |
|
| 1015 |
+ b']': RSQUARE, |
|
| 1016 |
+ b'{': LCURLY,
|
|
| 1017 |
+ b'}': RCURLY, |
|
| 1018 |
+ b',': COMMA, |
|
| 1019 |
+ b':': COLON, |
|
| 1020 |
+ b';': SEMI, |
|
| 1021 |
+ b'=': EQUAL, |
|
| 1022 |
+ b'+': PLUS, |
|
| 1022 | 1023 |
} |
| 1023 | 1024 |
|
| 1024 | 1025 |
# literal table |
| 1025 | 1026 |
literals = {
|
| 1026 |
- 'strict': STRICT, |
|
| 1027 |
- 'graph': GRAPH, |
|
| 1028 |
- 'digraph': DIGRAPH, |
|
| 1029 |
- 'node': NODE, |
|
| 1030 |
- 'edge': EDGE, |
|
| 1031 |
- 'subgraph': SUBGRAPH, |
|
| 1027 |
+ b'strict': STRICT, |
|
| 1028 |
+ b'graph': GRAPH, |
|
| 1029 |
+ b'digraph': DIGRAPH, |
|
| 1030 |
+ b'node': NODE, |
|
| 1031 |
+ b'edge': EDGE, |
|
| 1032 |
+ b'subgraph': SUBGRAPH, |
|
| 1032 | 1033 |
} |
| 1033 | 1034 |
|
| 1034 | 1035 |
ignorecase = True |
| ... | ... |
@@ -1044,12 +1045,12 @@ class DotLexer(Lexer): |
| 1044 | 1045 |
text = text[1:-1] |
| 1045 | 1046 |
|
| 1046 | 1047 |
# line continuations |
| 1047 |
- text = text.replace('\\\r\n', '')
|
|
| 1048 |
- text = text.replace('\\\r', '')
|
|
| 1049 |
- text = text.replace('\\\n', '')
|
|
| 1048 |
+ text = text.replace(b'\\\r\n', b'') |
|
| 1049 |
+ text = text.replace(b'\\\r', b'') |
|
| 1050 |
+ text = text.replace(b'\\\n', b'') |
|
| 1050 | 1051 |
|
| 1051 | 1052 |
# quotes |
| 1052 |
- text = text.replace('\\"', '"')
|
|
| 1053 |
+ text = text.replace(b'\\"', b'"') |
|
| 1053 | 1054 |
|
| 1054 | 1055 |
# layout engines recognize other escape codes (many non-standard) |
| 1055 | 1056 |
# but we don't translate them here |
| ... | ... |
@@ -1137,6 +1138,7 @@ class DotParser(Parser): |
| 1137 | 1138 |
self.consume() |
| 1138 | 1139 |
while self.lookahead.type != RSQUARE: |
| 1139 | 1140 |
name, value = self.parse_attr() |
| 1141 |
+ name = name.decode('utf-8')
|
|
| 1140 | 1142 |
attrs[name] = value |
| 1141 | 1143 |
if self.lookahead.type == COMMA: |
| 1142 | 1144 |
self.consume() |
| ... | ... |
@@ -1149,7 +1151,7 @@ class DotParser(Parser): |
| 1149 | 1151 |
self.consume() |
| 1150 | 1152 |
value = self.parse_id() |
| 1151 | 1153 |
else: |
| 1152 |
- value = 'true' |
|
| 1154 |
+ value = b'true' |
|
| 1153 | 1155 |
return name, value |
| 1154 | 1156 |
|
| 1155 | 1157 |
def parse_node_id(self): |
| ... | ... |
@@ -1218,7 +1220,7 @@ class XDotParser(DotParser): |
| 1218 | 1220 |
return |
| 1219 | 1221 |
|
| 1220 | 1222 |
if bb: |
| 1221 |
- xmin, ymin, xmax, ymax = map(float, bb.split(","))
|
|
| 1223 |
+ xmin, ymin, xmax, ymax = map(float, bb.split(b",")) |
|
| 1222 | 1224 |
|
| 1223 | 1225 |
self.xoffset = -xmin |
| 1224 | 1226 |
self.yoffset = -ymax |
| ... | ... |
@@ -1275,17 +1277,16 @@ class XDotParser(DotParser): |
| 1275 | 1277 |
|
| 1276 | 1278 |
def parse(self): |
| 1277 | 1279 |
DotParser.parse(self) |
| 1278 |
- |
|
| 1279 | 1280 |
return Graph(self.width, self.height, self.shapes, self.nodes, self.edges) |
| 1280 | 1281 |
|
| 1281 | 1282 |
def parse_node_pos(self, pos): |
| 1282 |
- x, y = pos.split(",")
|
|
| 1283 |
+ x, y = pos.split(b",") |
|
| 1283 | 1284 |
return self.transform(float(x), float(y)) |
| 1284 | 1285 |
|
| 1285 | 1286 |
def parse_edge_pos(self, pos): |
| 1286 | 1287 |
points = [] |
| 1287 |
- for entry in pos.split(' '):
|
|
| 1288 |
- fields = entry.split(',')
|
|
| 1288 |
+ for entry in pos.split(b' '): |
|
| 1289 |
+ fields = entry.split(b',') |
|
| 1289 | 1290 |
try: |
| 1290 | 1291 |
x, y = fields |
| 1291 | 1292 |
except ValueError: |
| ... | ... |
@@ -1567,7 +1568,7 @@ class DotWidget(Gtk.DrawingArea): |
| 1567 | 1568 |
stdout=subprocess.PIPE, |
| 1568 | 1569 |
stderr=subprocess.PIPE, |
| 1569 | 1570 |
shell=False, |
| 1570 |
- universal_newlines=True |
|
| 1571 |
+ universal_newlines=False |
|
| 1571 | 1572 |
) |
| 1572 | 1573 |
except OSError as exc: |
| 1573 | 1574 |
error = '%s: %s' % (self.filter, exc.strerror) |
| ... | ... |
@@ -1576,6 +1577,7 @@ class DotWidget(Gtk.DrawingArea): |
| 1576 | 1577 |
xdotcode, error = p.communicate(dotcode) |
| 1577 | 1578 |
error = error.rstrip() |
| 1578 | 1579 |
if error: |
| 1580 |
+ error = error.decode() |
|
| 1579 | 1581 |
sys.stderr.write(error + '\n') |
| 1580 | 1582 |
if p.returncode != 0: |
| 1581 | 1583 |
dialog = Gtk.MessageDialog(type=Gtk.MessageType.ERROR, |
| ... | ... |
@@ -1589,6 +1591,8 @@ class DotWidget(Gtk.DrawingArea): |
| 1589 | 1591 |
|
| 1590 | 1592 |
def set_dotcode(self, dotcode, filename=None): |
| 1591 | 1593 |
self.openfilename = None |
| 1594 |
+ if isinstance(dotcode, str): |
|
| 1595 |
+ dotcode = dotcode.encode('utf-8')
|
|
| 1592 | 1596 |
xdotcode = self.run_filter(dotcode) |
| 1593 | 1597 |
if xdotcode is None: |
| 1594 | 1598 |
return False |
| ... | ... |
@@ -1611,6 +1615,7 @@ class DotWidget(Gtk.DrawingArea): |
| 1611 | 1615 |
return True |
| 1612 | 1616 |
|
| 1613 | 1617 |
def set_xdotcode(self, xdotcode): |
| 1618 |
+ assert isinstance(xdotcode, bytes) |
|
| 1614 | 1619 |
parser = XDotParser(xdotcode) |
| 1615 | 1620 |
self.graph = parser.parse() |
| 1616 | 1621 |
self.zoom_image(self.zoom_ratio, center=True) |