Browse code

Change grep to internal

Robert Cranston authored on 22/01/2023 00:33:32
Showing 2 changed files

... ...
@@ -153,9 +153,9 @@ Environment variables:
153 153
     `enums-tree`, `params`, `params-tree`, `audit,` and `audit-tree` commands.
154 154
     It uses the same format (and defaults) as GREP_COLORS, i.e. a
155 155
     colon-separated list of capabilties: `ms` (matching selected), `fn` (file
156
-    name), `ln` (line number), `se` (separators). Added custom capabilities
157
-    are: `ve` (version), `ex` (extension), `un` (unsupported). Defaults to
158
-    `ms=01;31:fn=35:ln=32:se=36:ve=01;34:ex=34:un=01;33`.
156
+    name), `ln` (line and column number), `se` (separators). Added custom
157
+    capabilities are: `ve` (version), `ex` (extension), `un` (unsupported).
158
+    Defaults to `ms=01;31:fn=35:ln=32:se=36:ve=01;34:ex=34:un=01;33`.
159 159
 ```
160 160
 
161 161
 ## References
... ...
@@ -110,9 +110,9 @@ Environment variables:
110 110
     `enums-tree`, `params`, `params-tree`, `audit,` and `audit-tree` commands.
111 111
     It uses the same format (and defaults) as GREP_COLORS, i.e. a
112 112
     colon-separated list of capabilties: `ms` (matching selected), `fn` (file
113
-    name), `ln` (line number), `se` (separators). Added custom capabilities
114
-    are: `ve` (version), `ex` (extension), `un` (unsupported). Defaults to
115
-    `ms=01;31:fn=35:ln=32:se=36:ve=01;34:ex=34:un=01;33`.
113
+    name), `ln` (line and column number), `se` (separators). Added custom
114
+    capabilities are: `ve` (version), `ex` (extension), `un` (unsupported).
115
+    Defaults to `ms=01;31:fn=35:ln=32:se=36:ve=01;34:ex=34:un=01;33`.
116 116
 """
117 117
 
118 118
 
... ...
@@ -125,6 +125,7 @@ import functools
125 125
 import subprocess
126 126
 import shutil
127 127
 import shlex
128
+import fnmatch
128 129
 import urllib.request
129 130
 import docopt
130 131
 import lxml.etree
... ...
@@ -139,6 +140,7 @@ USER_AGENT    = 'Mozilla/5.0'
139 140
 REGEX         = r'\b(gl|GL_)[0-9A-Z][0-9A-Za-z_]+\b'
140 141
 EXCLUDE_DIRS  = ['.?*', '_*']
141 142
 EXCLUDE_FILES = ['README*', 'TODO*']
143
+BINARY_PEEK   = 1024
142 144
 INDENT        = 2
143 145
 ENV_XDG = lambda var, default: os.environ.get(
144 146
     f'GLREGISTRY_{var}',
... ...
@@ -304,21 +306,39 @@ def grep(
304 306
     exclude_files=EXCLUDE_FILES,
305 307
     silent=False,
306 308
 ):
307
-    path          = path if path else '.'
308
-    cmd           = ['grep', '-EIrno']
309
-    exclude_dirs  = [f'--exclude-dir={exclude}' for exclude in exclude_dirs]
310
-    exclude_files = [f'--exclude={exclude}'     for exclude in exclude_files]
311
-    process = subprocess.run(
312
-        [*cmd, *exclude_dirs, *exclude_files, regex, path],
313
-        stdout=subprocess.PIPE,
314
-        stderr=subprocess.DEVNULL if silent else None,
315
-        text=True,
316
-    )
317
-    for string in process.stdout.splitlines():
318
-        string           = removeprefix(f'.{os.path.sep}', string)
319
-        file, line, name = string.split(':', 2)
320
-        line             = int(line)
321
-        yield file, line, name
309
+    path = path if path else '.'
310
+    def onerror(error, file=None):
311
+        file = removeprefix(f'.{os.path.sep}', file or error.filename)
312
+        if silent:
313
+            pass
314
+        elif isinstance(error, OSError):
315
+            log(f"{file}: {error.strerror}")
316
+        elif isinstance(error, UnicodeDecodeError):
317
+            log(f"{file}: {error.reason}")
318
+        else:
319
+            log(f"{file}: {error}")
320
+    def exclude(excludes, names):
321
+        names = set(names)
322
+        for exclude in excludes:
323
+            names -= set(fnmatch.filter(names, exclude))
324
+        return sorted(names)
325
+    for root, dirs, files in os.walk(path, onerror=onerror):
326
+        dirs [:] = exclude(exclude_dirs,  dirs)
327
+        files[:] = exclude(exclude_files, files)
328
+        for file in files:
329
+            file = os.path.join(root, file)
330
+            try:
331
+                with open(file, 'rb') as f:
332
+                    if 0 in f.read(BINARY_PEEK):
333
+                        continue
334
+                with open(file, errors='ignore') as f:
335
+                    file = removeprefix(f'.{os.path.sep}', file)
336
+                    for line, string in enumerate(f):
337
+                        for match in re.finditer(regex, string):
338
+                            column, name = match.start(), match.group()
339
+                            yield file, line+1, column+1, name
340
+            except Exception as error:
341
+                onerror(error, file)
322 342
 
323 343
 
324 344
 ## Commands
... ...
@@ -558,25 +578,26 @@ def params_tree(xml, group=None):
558 578
 ### `audit_`
559 579
 def audit_(xml, path=None):
560 580
     audit_ = collections.defaultdict(lambda: collections.defaultdict(list))
561
-    for file, line, name in grep(path):
581
+    for file, line, column, name in grep(path):
562 582
         supports_ = supports(xml, name)
563 583
         if not supports_:
564 584
             supports_ = ['UNSUPPORTED']
565
-        audit_[tuple(supports_)][name].append([file, line])
585
+        audit_[tuple(supports_)][name].append([file, line, column])
566 586
     return audit_
567 587
 
568 588
 
569 589
 ### `audit`
570 590
 def audit(xml, path=None):
571
-    for file, line, supports, name in sorted(
572
-        [file, line, supports, name]
573
-        for supports, names  in audit_(xml, path).items()
574
-        for name, locations  in names.items()
575
-        for file, line       in locations
591
+    for file, line, column, supports, name in sorted(
592
+        [file, line, column, supports, name]
593
+        for supports, names    in audit_(xml, path).items()
594
+        for name, locations    in names.items()
595
+        for file, line, column in locations
576 596
     ):
577 597
         yield indentjoin(0, ':', [
578 598
             color('fn', file),
579 599
             color('ln', line),
600
+            color('ln', column),
580 601
             indentjoin(0, ',', color_supports(supports)),
581 602
             color('ms', name),
582 603
         ])
... ...
@@ -588,10 +609,11 @@ def audit_tree(xml, path=None):
588 609
         yield indentjoin(0, ',', color_supports(supports))
589 610
         for name, locations in sorted(names.items()):
590 611
             yield indentjoin(1, '', [color('ms', name)])
591
-            for file, line in sorted(locations):
612
+            for file, line, column in sorted(locations):
592 613
                 yield indentjoin(2, ':', [
593 614
                     color('fn', file),
594 615
                     color('ln', line),
616
+                    color('ln', column),
595 617
                 ])
596 618
 
597 619