Browse code

Change grep to internal

Robert Cranston authored on 22/01/2023 00:33:32
Showing 2 changed files

... ...
@@ -153,9 +153,9 @@ Environment variables:
153 153
     `enums-tree`, `params`, `params-tree`, `audit,` and `audit-tree` commands.
154 154
     It uses the same format (and defaults) as GREP_COLORS, i.e. a
155 155
     colon-separated list of capabilties: `ms` (matching selected), `fn` (file
156
-    name), `ln` (line number), `se` (separators). Added custom capabilities
157
-    are: `ve` (version), `ex` (extension), `un` (unsupported). Defaults to
158
-    `ms=01;31:fn=35:ln=32:se=36:ve=01;34:ex=34:un=01;33`.
156
+    name), `ln` (line and column number), `se` (separators). Added custom
157
+    capabilities are: `ve` (version), `ex` (extension), `un` (unsupported).
158
+    Defaults to `ms=01;31:fn=35:ln=32:se=36:ve=01;34:ex=34:un=01;33`.
159 159
 ```
160 160
 
161 161
 ## References
... ...
@@ -110,9 +110,9 @@ Environment variables:
110 110
     `enums-tree`, `params`, `params-tree`, `audit,` and `audit-tree` commands.
111 111
     It uses the same format (and defaults) as GREP_COLORS, i.e. a
112 112
     colon-separated list of capabilties: `ms` (matching selected), `fn` (file
113
-    name), `ln` (line number), `se` (separators). Added custom capabilities
114
-    are: `ve` (version), `ex` (extension), `un` (unsupported). Defaults to
115
-    `ms=01;31:fn=35:ln=32:se=36:ve=01;34:ex=34:un=01;33`.
113
+    name), `ln` (line and column number), `se` (separators). Added custom
114
+    capabilities are: `ve` (version), `ex` (extension), `un` (unsupported).
115
+    Defaults to `ms=01;31:fn=35:ln=32:se=36:ve=01;34:ex=34:un=01;33`.
116 116
 """
117 117
 
118 118
 
... ...
@@ -125,6 +125,7 @@ import functools
125 125
 import subprocess
126 126
 import shutil
127 127
 import shlex
128
+import fnmatch
128 129
 import urllib.request
129 130
 import docopt
130 131
 from lxml import etree
... ...
@@ -139,6 +140,7 @@ USER_AGENT    = 'Mozilla/5.0'
139 140
 REGEX         = r'\b(gl|GL_)[0-9A-Z][0-9A-Za-z_]+\b'
140 141
 EXCLUDE_DIRS  = ['.?*', '_*']
141 142
 EXCLUDE_FILES = ['README*', 'TODO*']
143
+BINARY_PEEK   = 1024
142 144
 INDENT        = 2
143 145
 ENV_XDG = lambda var, default: (
144 146
     os.environ.get(f'GLREGISTRY_{var}') or
... ...
@@ -297,21 +299,45 @@ def grep(
297 299
     exclude_files=EXCLUDE_FILES,
298 300
     silent=False,
299 301
 ):
300
-    path          = path if path else '.'
301
-    cmd           = ['grep', '-EIrno']
302
-    exclude_dirs  = [f'--exclude-dir={exclude}' for exclude in exclude_dirs]
303
-    exclude_files = [f'--exclude={exclude}'     for exclude in exclude_files]
304
-    process = subprocess.run(
305
-        [*cmd, *exclude_dirs, *exclude_files, regex, path],
306
-        stdout=subprocess.PIPE,
307
-        stderr=subprocess.DEVNULL if silent else None,
308
-        text=True,
309
-    )
310
-    for string in process.stdout.splitlines():
311
-        string           = removeprefix(f'.{os.path.sep}', string)
312
-        file, line, name = string.split(':', 2)
313
-        line             = int(line)
314
-        yield file, line, name
302
+    path = path if path else '.'
303
+    def onerror(error, file=None):
304
+        file = removeprefix(f'.{os.path.sep}', file or error.filename)
305
+        if silent:
306
+            pass
307
+        elif isinstance(error, OSError):
308
+            log(f"{file}: {error.strerror}")
309
+        elif isinstance(error, UnicodeDecodeError):
310
+            log(f"{file}: {error.reason}")
311
+        else:
312
+            log(f"{file}: {error}")
313
+    def exclude(excludes, names):
314
+        names = set(names)
315
+        for exclude in excludes:
316
+            names -= set(fnmatch.filter(names, exclude))
317
+        return sorted(names)
318
+    def grep_file(file):
319
+        try:
320
+            with open(file, 'rb') as f:
321
+                if 0 in f.read(BINARY_PEEK):
322
+                    return
323
+            with open(file, errors='ignore') as f:
324
+                file = removeprefix(f'.{os.path.sep}', file)
325
+                for line, string in enumerate(f):
326
+                    for match in re.finditer(regex, string):
327
+                        column, name = match.start(), match.group()
328
+                        yield file, line+1, column+1, name
329
+        except Exception as error:
330
+            onerror(error, file)
331
+    if os.path.isfile(path):
332
+        for match in grep_file(path):
333
+            yield match
334
+    else:
335
+        for root, dirs, files in os.walk(path, onerror=onerror):
336
+            dirs [:] = exclude(exclude_dirs,  dirs)
337
+            files[:] = exclude(exclude_files, files)
338
+            for file in files:
339
+                for match in grep_file(os.path.join(root, file)):
340
+                    yield match
315 341
 
316 342
 
317 343
 ## Commands
... ...
@@ -551,25 +577,26 @@ def params_tree(xml, group=None):
551 577
 ### `audit_`
552 578
 def audit_(xml, path=None):
553 579
     audit_ = collections.defaultdict(lambda: collections.defaultdict(list))
554
-    for file, line, name in grep(path):
580
+    for file, line, column, name in grep(path):
555 581
         supports_ = supports(xml, name)
556 582
         if not supports_:
557 583
             supports_ = ['UNSUPPORTED']
558
-        audit_[tuple(supports_)][name].append([file, line])
584
+        audit_[tuple(supports_)][name].append([file, line, column])
559 585
     return audit_
560 586
 
561 587
 
562 588
 ### `audit`
563 589
 def audit(xml, path=None):
564
-    for file, line, supports, name in sorted(
565
-        [file, line, supports, name]
566
-        for supports, names  in audit_(xml, path).items()
567
-        for name, locations  in names.items()
568
-        for file, line       in locations
590
+    for file, line, column, supports, name in sorted(
591
+        [file, line, column, supports, name]
592
+        for supports, names    in audit_(xml, path).items()
593
+        for name, locations    in names.items()
594
+        for file, line, column in locations
569 595
     ):
570 596
         yield indentjoin(0, ':', [
571 597
             color('fn', file),
572 598
             color('ln', line),
599
+            color('ln', column),
573 600
             indentjoin(0, ',', color_supports(supports)),
574 601
             color('ms', name),
575 602
         ])
... ...
@@ -581,10 +608,11 @@ def audit_tree(xml, path=None):
581 608
         yield indentjoin(0, ',', color_supports(supports))
582 609
         for name, locations in sorted(names.items()):
583 610
             yield indentjoin(1, '', [color('ms', name)])
584
-            for file, line in sorted(locations):
611
+            for file, line, column in sorted(locations):
585 612
                 yield indentjoin(2, ':', [
586 613
                     color('fn', file),
587 614
                     color('ln', line),
615
+                    color('ln', column),
588 616
                 ])
589 617
 
590 618