Browse code

Add implementation

Robert Cranston authored on 04/06/2020 17:59:34
Showing 3 changed files

... ...
@@ -2,6 +2,107 @@
2 2
 
3 3
 A Python library to ease processing of video frames.
4 4
 
5
+`pyffstream` is mostly a wrapper around external [FFmpeg][] processes (via
6
+[`ffmpeg-python`][]) intended to be used with [OpenCV][].
7
+
8
+[FFmpeg]: https://ffmpeg.org
9
+[`ffmpeg-python`]: https://github.com/kkroening/ffmpeg-python
10
+[OpenCV]: https://opencv.org
11
+
12
+## Usage
13
+
14
+`pyffstream` provides your application with a command line interface (CLI) and
15
+handles reading and writing video files (with the original audio intact),
16
+filtering the frames through a callback that you define. The API consists
17
+mainly of the single function
18
+
19
+```python
20
+run(description, process, init=None, args_pre=None, args_post=None)
21
+```
22
+
23
+that takes the application description to present in the CLI and some
24
+callbacks, all but one being optional:
25
+
26
+-   `process(args, state, frame, frame_num)`:
27
+
28
+    Takes:
29
+
30
+    -   `args`: [`argparse.Namespace`][] containing parsed command line
31
+        arguments.
32
+    -   `state`: arbitrary object returned by `init` (see below).
33
+    -   `frame`: [`numpy.array`][] with shape
34
+        `(args.working_width, args.working_height, 3)` containing RGB data to
35
+        be processed.
36
+    -   `frame_num`: integer in the range [`args.frame_start`,
37
+        `args.frame_end`) representing the number of the current frame.
38
+
39
+    Returns:
40
+
41
+    -   `output_frame`: [`numpy.array`][] with shape
42
+        `(args.output_width, args.output_height, 3)` containing RGB output.
43
+    -   `debug_frame`: [`numpy.array`][] with shape
44
+        `(args.working_width, args.working_height, 3)` containing RGB debug
45
+        output.
46
+
47
+-   `init(args)`:
48
+
49
+    Takes:
50
+
51
+    -   `args`: [`argparse.Namespace`][] containing parsed command line
52
+        arguments.
53
+
54
+    Returns:
55
+
56
+    -   `state`: arbitrary object passed to `process` (see above).
57
+
58
+-   `args_pre(parser)`, `args_post(parser)`:
59
+
60
+    Takes:
61
+
62
+    -   `args`: [`argparse.ArgumentParser`][] before (for `args_pre`) or after
63
+        (for `args_post`) being filled with `pyffstream` arguments.
64
+
65
+    Returns nothing.
66
+
67
+A number of hepler functions are also supplied:
68
+
69
+-   `resize(image, width, height, resample)`: wrapper around
70
+    [`PIL.Image.resize`][].
71
+-   `text(image, text, x, y, thickness, font=cv2.FONT_HERSHEY_SIMPLEX)`:
72
+    wrapper around [`cv2.putText`][] that automatically adjusts text placement
73
+    and scaling.
74
+-   `fix_rect(args, x, y, w, h)`: forces a rectangle to aspect ratio
75
+    `args.output_aspect` and to be inside `(0, 0, args.working_width,
76
+    args.working_height)`.
77
+
78
+[`argparse.Namespace`]: https://docs.python.org/3/library/argparse.html#argparse.Namespace
79
+[`argparse.ArgumentParser`]: https://docs.python.org/3/library/argparse.html#argparse.ArgumentParser
80
+[`numpy.array`]: https://numpy.org/doc/stable/reference/generated/numpy.array.html
81
+[`PIL.Image.resize`]: https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.resize
82
+[`cv2.putText`]: https://docs.opencv.org/master/d6/d6e/group__imgproc__draw.html#ga5126f47f883d730f633d74f07456c576
83
+
84
+## Installation
85
+
86
+### With `pip`
87
+
88
+```sh
89
+python3 -m pip install git+https://git.rcrnstn.net/rcrnstn/pyffstream
90
+```
91
+
92
+### In `setuptools` `setup.py`
93
+
94
+```python
95
+from setuptools import setup
96
+
97
+setup(
98
+    # ...
99
+    install_requires=[
100
+        'pyffstream @ git+https://git.rcrnstn.net/rcrnstn/pyffstream',
101
+    ],
102
+    # ...
103
+)
104
+```
105
+
5 106
 ## License
6 107
 
7 108
 Licensed under the [ISC license][], see the [`LICENSE`](LICENSE) file.
8 109
new file mode 100644
... ...
@@ -0,0 +1,391 @@
1
+"""
2
+Python library to ease processing of video frames.
3
+"""
4
+
5
+import sys
6
+import os
7
+import signal
8
+import argparse
9
+import json
10
+import datetime
11
+import shlex
12
+import ffmpeg  # ffmpeg-python
13
+import numpy as np
14
+import PIL.Image
15
+import cv2
16
+
17
+
18
+def error(message):
19
+    print(f"{sys.argv[0]}: Error: {message}", file=sys.stderr)
20
+    exit(1)
21
+
22
+
23
+def resize(image, width, height, resample):
24
+    return np.array(
25
+        PIL.Image
26
+        .fromarray(image)
27
+        .resize((width, height), resample)
28
+    )
29
+
30
+
31
+def text(image, text, x, y, thickness, font=cv2.FONT_HERSHEY_SIMPLEX):
32
+    margin = 10
33
+    height, width = image.shape[:2]
34
+    size, _ = cv2.getTextSize(text, font, 1, 1)
35
+    scale = min(1, (width - 2 * margin) / size[0])
36
+    x = np.clip(x, margin, width - int(scale * size[0]) - margin)
37
+    y = np.clip(y, margin + int(scale * size[1]), height - margin)
38
+    black = (0, 0, 0)
39
+    white = (255, 255, 255)
40
+    image = cv2.putText(image, text, (x, y), font, scale, black, 3 * thickness)
41
+    image = cv2.putText(image, text, (x, y), font, scale, white, thickness)
42
+    return image
43
+
44
+
45
+def fix_rect(args, x, y, w, h):
46
+    # Grow into correct aspect ratio.
47
+    nw = max(w, int(h * args.output_aspect))
48
+    nh = max(h, int(w / args.output_aspect))
49
+    # Optionally shrink into working image size.
50
+    scale = min(1, args.working_width / nw, args.working_height / nh)
51
+    nw = int(nw * scale)
52
+    nh = int(nh * scale)
53
+    # Center new region on old region.
54
+    nx = x - (nw - w) // 2
55
+    ny = y - (nh - h) // 2
56
+    # Push into working image.
57
+    nx = max(0, nx) - max(0, (nx+nw) - args.working_width)
58
+    ny = max(0, ny) - max(0, (ny+nh) - args.working_height)
59
+    # Return.
60
+    return nx, ny, nw, nh
61
+
62
+
63
+def run(description, process, init=None, args_pre=None, args_post=None):
64
+    # Parse arguments
65
+    parser = argparse.ArgumentParser(description=description)
66
+    if args_pre is not None:
67
+        args_pre(parser)
68
+    parser.add_argument(
69
+        '--start-frame', metavar='FRAME', type=int,
70
+        help="starting input frame to process (inclusive)")
71
+    parser.add_argument(
72
+        '--end-frame', metavar='FRAME', type=int,
73
+        help="ending input frame to process (exclusive)")
74
+    parser.add_argument(
75
+        '--decimate', metavar="COUNT", type=int, default=1,
76
+        help="only use every %(metavar)s input frame")
77
+    parser.add_argument(
78
+        '--width', type=int,
79
+        help="width of output video")
80
+    parser.add_argument(
81
+        '--height', type=int,
82
+        help="height of output video")
83
+    parser.add_argument(
84
+        '--codec',
85
+        help="codec of output video"
86
+    )
87
+    parser.add_argument(
88
+        '--pix-fmt',
89
+        help="pixel format of output video")
90
+    parser.add_argument(
91
+        '--extra-args', metavar='ARGS',
92
+        help="""
93
+        extra arguments to pass to FFmpeg, as a JSON object (do not specify the
94
+        leading dash for keys, use a null value for arguments that do not take
95
+        a parameter)
96
+        """)
97
+    parser.add_argument(
98
+        '--no-preview', action='store_true',
99
+        help="do not show any previews")
100
+    parser.add_argument(
101
+        '--no-write', action='store_true',
102
+        help="do not write any files")
103
+    parser.add_argument(
104
+        '--no-audio', action='store_true',
105
+        help="do not include audio in output files")
106
+    parser.add_argument(
107
+        '--overwrite', action='store_true',
108
+        help="overwrite output files")
109
+    parser.add_argument(
110
+        '--debug', metavar='DEBUG_FILE',
111
+        help="produce debug output (leave empty to base filename on input)")
112
+    parser.add_argument(
113
+        '--output', metavar='OUTPUT_FILE',
114
+        help="produce final output (leave empty to base filename on input)")
115
+    parser.add_argument(
116
+        'input', metavar='INPUT_FILE',
117
+        help="input file")
118
+    if args_post is not None:
119
+        args_post(parser)
120
+    args = parser.parse_args()
121
+
122
+    # Check arguments.
123
+    root, ext = os.path.splitext(os.path.basename(args.input))
124
+    for file in ["debug", "output"]:
125
+        if getattr(args, file) == "":
126
+            setattr(args, file, f"{root}_{file}{ext}")
127
+    if os.path.isfile(args.output) and not args.overwrite:
128
+        error(f"File exists: '{args.output}', use --overwrite to overwrite.")
129
+    if os.path.isfile(args.debug) and not args.overwrite:
130
+        error(f"File exists: '{args.debug}', use --overwrite to overwrite.")
131
+    if not os.path.isfile(args.input):
132
+        error(f"File does not exist: '{args.input}'.")
133
+
134
+    # Probe input.
135
+    probe = ffmpeg.probe(args.input, select_streams='v')
136
+
137
+    # Common parameters.
138
+    args.duration = float(probe['format']['duration'])
139
+    args.frame_count = int(probe['streams'][0]['nb_frames'])
140
+
141
+    # Input parameters.
142
+    args.input_codec = probe['streams'][0]['codec_name']
143
+    args.input_pix_fmt = probe['streams'][0]['pix_fmt']
144
+    args.input_width = probe['streams'][0]['width']
145
+    args.input_height = probe['streams'][0]['height']
146
+    args.input_aspect = args.input_width / args.input_height
147
+    args.input_fps = (lambda x, y: x / y)(
148
+        *map(int, probe['streams'][0]['r_frame_rate'].split("/"))
149
+    )
150
+
151
+    # Output parameters.
152
+    args.output_codec = args.codec or args.input_codec
153
+    args.output_pix_fmt = args.pix_fmt or args.input_pix_fmt
154
+    args.output_width = args.width or args.input_width
155
+    args.output_height = args.height or int(
156
+        args.output_width / args.input_aspect
157
+    )
158
+    args.output_aspect = args.output_width / args.output_height
159
+    args.output_fps = args.input_fps / (args.decimate or 1)
160
+
161
+    # Working parameters.
162
+    args.working_width = min(
163
+        args.input_width,
164
+        2 * max(
165
+            args.output_width,
166
+            int(args.output_height * args.input_aspect)
167
+        )
168
+    )
169
+    args.working_height = min(
170
+        args.input_height,
171
+        2 * max(
172
+            args.output_height,
173
+            int(args.output_width / args.input_aspect)
174
+        )
175
+    )
176
+    args.working_width += args.working_width % 2
177
+    args.working_height += args.working_height % 2
178
+    args.thickness = max(1, int(args.working_width / 1000))
179
+
180
+    # Fill in default arguments.
181
+    if args.start_frame is None:
182
+        args.start_frame = 0
183
+    if args.end_frame is None:
184
+        args.end_frame = args.frame_count
185
+    if args.extra_args is None:
186
+        args.extra_args = {}
187
+    else:
188
+        try:
189
+            args.extra_args = json.loads(args.extra_args)
190
+        except json.decoder.JSONDecodeError:
191
+            error(f"Extra arguments is not valid JSON.")
192
+        if type(args.extra_args) is not dict:
193
+            error(f"Extra arguments is not a JSON object.")
194
+
195
+    # Open files.
196
+    debug_size = f'{args.working_width}x{args.working_height}'
197
+    output_size = f'{args.output_width}x{args.output_height}'
198
+    pipe_args = {
199
+        'format': 'rawvideo',
200
+        'pix_fmt': 'rgb24',
201
+    }
202
+    output_args = {
203
+        'vcodec': args.output_codec,
204
+        'pix_fmt': args.output_pix_fmt,
205
+        'shortest': None,
206
+        **args.extra_args,
207
+    }
208
+    audio_args = (
209
+        [
210
+            ffmpeg
211
+            .input(args.input)
212
+            .audio
213
+            # This works badly for some reason.
214
+            .filter(
215
+                'atrim',
216
+                start=f"{args.start_frame/args.input_fps}s",
217
+                end=f"{args.end_frame/args.input_fps}s",
218
+            )
219
+        ]
220
+        if not args.no_audio else []
221
+    )
222
+    input_stream = (
223
+        ffmpeg
224
+        .input(args.input)
225
+        .output('pipe:', **pipe_args)
226
+        .global_args('-loglevel', 'error')
227
+        # .global_args('-stats')
228
+        .run_async(pipe_stdout=True)
229
+    )
230
+    if not args.no_write:
231
+        if args.debug:
232
+            debug_stream = (
233
+                ffmpeg
234
+                .input('pipe:', **pipe_args, s=debug_size, r=args.output_fps)
235
+                .output(
236
+                    *audio_args, args.debug, **output_args, r=args.output_fps)
237
+                .global_args('-loglevel', 'error')
238
+                .run_async(pipe_stdin=True, overwrite_output=args.overwrite)
239
+            )
240
+        if args.output:
241
+            output_stream = (
242
+                ffmpeg
243
+                .input('pipe:', **pipe_args, s=output_size, r=args.output_fps)
244
+                .output(
245
+                    *audio_args, args.output, **output_args, r=args.output_fps)
246
+                .global_args('-loglevel', 'error')
247
+                .run_async(pipe_stdin=True, overwrite_output=args.overwrite)
248
+            )
249
+
250
+    # Set up signal handler.
251
+    sigint = False
252
+
253
+    def sigint_handler(signum, frame):
254
+        nonlocal sigint
255
+        sigint = True
256
+
257
+    signal.signal(signal.SIGINT, sigint_handler)
258
+
259
+    # Call init.
260
+    if init is not None:
261
+        state = init(args)
262
+    else:
263
+        state = None
264
+
265
+    # Process.
266
+    try:
267
+        for frame_num in range(args.frame_count):
268
+            # Check for end frame.
269
+            if frame_num >= args.end_frame:
270
+                break
271
+
272
+            # Gather and print info.
273
+            time_now = datetime.timedelta(
274
+                seconds=int(frame_num / args.input_fps))
275
+            time_duration = datetime.timedelta(
276
+                seconds=int(args.duration))
277
+            elapsed_frames = f"{frame_num} / {args.frame_count-1}"
278
+            elapsed_time = f"{time_now} / {time_duration}"
279
+            argv = sys.argv
280
+            if len(argv) >= 1:
281
+                argv[0] = os.path.basename(argv[0])
282
+            argv = " ".join(map(shlex.quote, argv))
283
+            sys.stdout.write(f"{elapsed_frames} ({elapsed_time})\r")
284
+
285
+            # Read input.
286
+            if sigint:
287
+                break
288
+            input_bytes = input_stream.stdout.read(
289
+                args.input_width * args.input_height * 3
290
+            )
291
+            if not input_bytes:
292
+                break
293
+            input_frame = (
294
+                np
295
+                .frombuffer(input_bytes, np.uint8)
296
+                .reshape([args.input_height, args.input_width, 3])
297
+            )
298
+
299
+            # Check for start frame.
300
+            if frame_num < args.start_frame:
301
+                continue
302
+
303
+            # Check for decimate frame.
304
+            if (frame_num - args.start_frame) % args.decimate != 0:
305
+                continue
306
+
307
+            # Resize to working size.
308
+            frame = resize(
309
+                input_frame,
310
+                args.working_width,
311
+                args.working_height,
312
+                PIL.Image.NEAREST,
313
+            )
314
+
315
+            # Call process.
316
+            output_frame, debug_frame = process(args, state, frame, frame_num)
317
+
318
+            # Show info.
319
+            if args.debug:
320
+                debug_frame = text(
321
+                    debug_frame,
322
+                    elapsed_frames,
323
+                    0,
324
+                    0,
325
+                    args.thickness,
326
+                )
327
+                debug_frame = text(
328
+                    debug_frame,
329
+                    elapsed_time,
330
+                    args.working_width,
331
+                    0,
332
+                    args.thickness,
333
+                )
334
+                debug_frame = text(
335
+                    debug_frame,
336
+                    argv,
337
+                    0,
338
+                    args.working_height,
339
+                    args.thickness,
340
+                )
341
+
342
+            # Show preview windows.
343
+            if not args.no_preview:
344
+                if args.debug:
345
+                    cv2.imshow(
346
+                        f"{args.debug}",
347
+                        cv2.cvtColor(debug_frame, cv2.COLOR_RGB2BGR),
348
+                    )
349
+                if args.output:
350
+                    cv2.imshow(
351
+                        f"{args.output}",
352
+                        cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR),
353
+                    )
354
+                if cv2.waitKey(1) in (ord('q'), 27):
355
+                    break
356
+
357
+            # Write files.
358
+            if not args.no_write:
359
+                if sigint:
360
+                    break
361
+                try:
362
+                    if args.debug:
363
+                        debug_stream.stdin.write(
364
+                            debug_frame
365
+                            .astype(np.uint8)
366
+                            .tobytes()
367
+                        )
368
+                    if args.output:
369
+                        output_stream.stdin.write(
370
+                            output_frame
371
+                            .astype(np.uint8)
372
+                            .tobytes()
373
+                        )
374
+                except BrokenPipeError:
375
+                    # FFmpeg has probably written some error message to stderr,
376
+                    # so just break.
377
+                    break
378
+    except KeyboardInterrupt:
379
+        pass
380
+    finally:
381
+        print("")
382
+        # Close and wait.
383
+        if not args.no_write:
384
+            if args.debug:
385
+                debug_stream.stdin.close()
386
+                debug_stream.wait()
387
+            if args.output:
388
+                output_stream.stdin.close()
389
+                output_stream.wait()
390
+        input_stream.send_signal(signal.SIGINT)
391
+        input_stream.communicate()
0 392
new file mode 100644
... ...
@@ -0,0 +1,27 @@
1
+from setuptools import setup
2
+
3
+with open("README.md") as f:
4
+    long_description = f.read()
5
+
6
+setup(
7
+    name='pyffstream',
8
+    version='1.0.0',
9
+    description="Python library to ease processing of video frames",
10
+    long_description=long_description,
11
+    long_description_content_type='text/markdown',
12
+    url='https://git.rcrnstn.net/rcrnstn/pyffstream',
13
+    author="Robert Cranston",
14
+    keywords='ffmpeg opencv',
15
+    classifiers=[
16
+        'Programming Language :: Python :: 3',
17
+        'License :: OSI Approved :: ISC License (ISCL)',
18
+    ],
19
+    python_requires='>=3.3, <4',  # shlex.quote
20
+    install_requires=[
21
+        'ffmpeg-python',
22
+        'numpy',
23
+        'Pillow',
24
+        'opencv-python',
25
+    ],
26
+    py_modules=['pyffstream'],
27
+)