git.rcrnstn.net - rcrnstn/xdot.py

History View file @ 8da24ad

@@ -564,13 +564,17 @@ class XDotAttrParser:
                              return self.pos < len(self.buf)
                          def read_code(self):
                     -        pos = self.buf.find(" ", self.pos)
                     +        pos = self.buf.find(b" ", self.pos)
                              res = self.buf[self.pos:pos]
                              self.pos = pos + 1
                     -        while self.pos < len(self.buf) and self.buf[self.pos].isspace():
                     -            self.pos += 1
                     +        self.skip_space()
                     +        res = res.decode('utf-8')
                              return res
                     +    def skip_space(self):
                     +        while self.pos < len(self.buf) and self.buf[self.pos : self.pos + 1].isspace():
                     +            self.pos += 1
+                    +
                          def read_int(self):
                              return int(self.read_code())
@@ -584,11 +588,11 @@ class XDotAttrParser:
                          def read_text(self):
                              num = self.read_int()
                     -        pos = self.buf.find("-", self.pos) + 1
                     +        pos = self.buf.find(b"-", self.pos) + 1
                              self.pos = pos + num
                              res = self.buf[pos:self.pos]
                     -        while self.pos < len(self.buf) and self.buf[self.pos].isspace():
                     -            self.pos += 1
                     +        self.skip_space()
                     +        res = res.decode('utf-8')
                              return res
                          def read_polygon(self):
@@ -819,13 +823,13 @@ class Scanner:
                              if self.ignorecase:
                                  flags |= re.IGNORECASE
                              self.tokens_re = re.compile(
                     -            '|'.join(['(' + regexp + ')' for type, regexp, test_lit in self.tokens]),
                     +            b'|'.join([b'(' + regexp + b')' for type, regexp, test_lit in self.tokens]),
                                   flags
+                             )
                          def next(self, buf, pos):
                              if pos >= len(buf):
                     -            return EOF, '', pos
                     +            return EOF, b'', pos
                              mo = self.tokens_re.match(buf, pos)
                              if mo:
                                  text = mo.group()
@@ -835,7 +839,7 @@ class Scanner:
                                      type = self.literals.get(text, type)
                                  return type, text, pos
                              else:
                     -            c = buf[pos]
                     +            c = buf[pos : pos + 1]
                                  return self.symbols.get(c, None), c, pos + 1
@@ -854,7 +858,7 @@ class Lexer:
                          scanner = None
                          tabsize = 8
                     -    newline_re = re.compile(r'\r\n?|\n')
                     +    newline_re = re.compile(br'\r\n?|\n')
                          def __init__(self, buf = None, pos = 0, filename = None, fp = None):
                              if fp is not None:
@@ -873,7 +877,7 @@ class Lexer:
                                          buf = mmap.mmap(fileno, length, access = mmap.ACCESS_READ)
                                          pos = os.lseek(fileno, 0, 1)
                                      else:
                     -                    buf = ''
                     +                    buf = b''
                                          pos = 0
                                  if filename is None:
@@ -896,6 +900,7 @@ class Lexer:
                                  col = self.col
                                  type, text, endpos = self.scanner.next(self.buf, pos)
                     +            assert isinstance(text, bytes)
                                  assert pos + len(text) == endpos
                                  self.consume(text)
                                  type, text = self.filter(type, text)
@@ -904,11 +909,7 @@ class Lexer:
                                  if type == SKIP:
                                      continue
                                  elif type is None:
                     -                msg = 'unexpected char '
                     -                if text >= ' ' and text <= '~':
                     -                    msg += "'%s'" % text
                     -                else:
                     -                    msg += "0x%X" % ord(text)
                     +                msg = 'unexpected char %r' % (text,)
                                      raise ParseError(msg, self.filename, line, col)
                                  else:
                                      break
@@ -924,7 +925,7 @@ class Lexer:
                              # update column number
                              while True:
                     -            tabpos = text.find('\t', pos)
                     +            tabpos = text.find(b'\t', pos)
                                  if tabpos == -1:
                                      break
                                  self.col += tabpos - pos
@@ -986,49 +987,49 @@ class DotScanner(Scanner):
                          tokens = [
                              # whitespace and comments
                              (SKIP,
                     -            r'[ \t\f\r\n\v]+|'
                     -            r'//[^\r\n]*|'
                     -            r'/\*.*?\*/|'
                     -            r'#[^\r\n]*',
                     +            br'[ \t\f\r\n\v]+|'
                     +            br'//[^\r\n]*|'
                     +            br'/\*.*?\*/|'
                     +            br'#[^\r\n]*',
                              False),
                              # Alphanumeric IDs
                     -        (ID, r'[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*', True),
                     +        (ID, br'[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*', True),
                              # Numeric IDs
                     -        (ID, r'-?(?:\.[0-9]+|[0-9]+(?:\.[0-9]*)?)', False),
                     +        (ID, br'-?(?:\.[0-9]+|[0-9]+(?:\.[0-9]*)?)', False),
                              # String IDs
                     -        (STR_ID, r'"[^"\\]*(?:\\.[^"\\]*)*"', False),
                     +        (STR_ID, br'"[^"\\]*(?:\\.[^"\\]*)*"', False),
                              # HTML IDs
                     -        (HTML_ID, r'<[^<>]*(?:<[^<>]*>[^<>]*)*>', False),
                     +        (HTML_ID, br'<[^<>]*(?:<[^<>]*>[^<>]*)*>', False),
                              # Edge operators
                     -        (EDGE_OP, r'-[>-]', False),
                     +        (EDGE_OP, br'-[>-]', False),
+                         ]
                          # symbol table
                          symbols = {
                     -        '[': LSQUARE,
                     -        ']': RSQUARE,
                     -        '{': LCURLY,
                     -        '}': RCURLY,
                     -        ',': COMMA,
                     -        ':': COLON,
                     -        ';': SEMI,
                     -        '=': EQUAL,
                     -        '+': PLUS,
                     +        b'[': LSQUARE,
                     +        b']': RSQUARE,
                     +        b'{': LCURLY,
                     +        b'}': RCURLY,
                     +        b',': COMMA,
                     +        b':': COLON,
                     +        b';': SEMI,
                     +        b'=': EQUAL,
                     +        b'+': PLUS,
+                         }
                          # literal table
                          literals = {
                     -        'strict': STRICT,
                     -        'graph': GRAPH,
                     -        'digraph': DIGRAPH,
                     -        'node': NODE,
                     -        'edge': EDGE,
                     -        'subgraph': SUBGRAPH,
                     +        b'strict': STRICT,
                     +        b'graph': GRAPH,
                     +        b'digraph': DIGRAPH,
                     +        b'node': NODE,
                     +        b'edge': EDGE,
                     +        b'subgraph': SUBGRAPH,
+                         }
                          ignorecase = True
@@ -1044,12 +1045,12 @@ class DotLexer(Lexer):
                                  text = text[1:-1]
                                  # line continuations
                     -            text = text.replace('\\\r\n', '')
                     -            text = text.replace('\\\r', '')
                     -            text = text.replace('\\\n', '')
                     +            text = text.replace(b'\\\r\n', b'')
                     +            text = text.replace(b'\\\r', b'')
                     +            text = text.replace(b'\\\n', b'')
                                  # quotes
                     -            text = text.replace('\\"', '"')
                     +            text = text.replace(b'\\"', b'"')
                                  # layout engines recognize other escape codes (many non-standard)
                                  # but we don't translate them here
@@ -1137,6 +1138,7 @@ class DotParser(Parser):
                                  self.consume()
                                  while self.lookahead.type != RSQUARE:
                                      name, value = self.parse_attr()
                     +                name = name.decode('utf-8')
                                      attrs[name] = value
                                      if self.lookahead.type == COMMA:
                                          self.consume()
@@ -1149,7 +1151,7 @@ class DotParser(Parser):
                                  self.consume()
                                  value = self.parse_id()
                              else:
                     -            value = 'true'
                     +            value = b'true'
                              return name, value
                          def parse_node_id(self):
@@ -1218,7 +1220,7 @@ class XDotParser(DotParser):
                                      return
                                  if bb:
                     -                xmin, ymin, xmax, ymax = map(float, bb.split(","))
                     +                xmin, ymin, xmax, ymax = map(float, bb.split(b","))
                                      self.xoffset = -xmin
                                      self.yoffset = -ymax
@@ -1275,17 +1277,16 @@ class XDotParser(DotParser):
                          def parse(self):
                              DotParser.parse(self)
+                    -
                              return Graph(self.width, self.height, self.shapes, self.nodes, self.edges)
                          def parse_node_pos(self, pos):
                     -        x, y = pos.split(",")
                     +        x, y = pos.split(b",")
                              return self.transform(float(x), float(y))
                          def parse_edge_pos(self, pos):
                              points = []
                     -        for entry in pos.split(' '):
                     -            fields = entry.split(',')
                     +        for entry in pos.split(b' '):
                     +            fields = entry.split(b',')
                                  try:
                                      x, y = fields
                                  except ValueError:
@@ -1567,7 +1568,7 @@ class DotWidget(Gtk.DrawingArea):
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      shell=False,
                     -                universal_newlines=True
                     +                universal_newlines=False
+                                 )
                              except OSError as exc:
                                  error = '%s: %s' % (self.filter, exc.strerror)
@@ -1576,6 +1577,7 @@ class DotWidget(Gtk.DrawingArea):
                                  xdotcode, error = p.communicate(dotcode)
                              error = error.rstrip()
                              if error:
                     +            error = error.decode()
                                  sys.stderr.write(error + '\n')
                              if p.returncode != 0:
                                  dialog = Gtk.MessageDialog(type=Gtk.MessageType.ERROR,
@@ -1589,6 +1591,8 @@ class DotWidget(Gtk.DrawingArea):
                          def set_dotcode(self, dotcode, filename=None):
                              self.openfilename = None
                     +        if isinstance(dotcode, str):
                     +            dotcode = dotcode.encode('utf-8')
                              xdotcode = self.run_filter(dotcode)
                              if xdotcode is None:
                                  return False
@@ -1611,6 +1615,7 @@ class DotWidget(Gtk.DrawingArea):
                                  return True
                          def set_xdotcode(self, xdotcode):
                     +        assert isinstance(xdotcode, bytes)
                              parser = XDotParser(xdotcode)
                              self.graph = parser.parse()
                              self.zoom_image(self.zoom_ratio, center=True)

@@ -80,7 +80,7 @@ def main():
                              sys.stdout.write(arg + '\n')
                              sys.stdout.flush()
                              name, ext = os.path.splitext(os.path.basename(arg))
                     -        dotcode = open(arg).read()
                     +        dotcode = open(arg, 'rb').read()
                              widget = TestDotWidget(name)
                              window = DotWindow(widget)
                              window.connect('delete-event', Gtk.main_quit)

Parse xdot with bytes, not unicode strings.