Browse code

Add implementation

Robert Cranston authored on 04/06/2020 17:59:34
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,391 @@
1
+"""
2
+Python library to ease processing of video frames.
3
+"""
4
+
5
+import sys
6
+import os
7
+import signal
8
+import argparse
9
+import json
10
+import datetime
11
+import shlex
12
+import ffmpeg  # ffmpeg-python
13
+import numpy as np
14
+import PIL.Image
15
+import cv2
16
+
17
+
18
+def error(message):
19
+    print(f"{sys.argv[0]}: Error: {message}", file=sys.stderr)
20
+    exit(1)
21
+
22
+
23
+def resize(image, width, height, resample):
24
+    return np.array(
25
+        PIL.Image
26
+        .fromarray(image)
27
+        .resize((width, height), resample)
28
+    )
29
+
30
+
31
+def text(image, text, x, y, thickness, font=cv2.FONT_HERSHEY_SIMPLEX):
32
+    margin = 10
33
+    height, width = image.shape[:2]
34
+    size, _ = cv2.getTextSize(text, font, 1, 1)
35
+    scale = min(1, (width - 2 * margin) / size[0])
36
+    x = np.clip(x, margin, width - int(scale * size[0]) - margin)
37
+    y = np.clip(y, margin + int(scale * size[1]), height - margin)
38
+    black = (0, 0, 0)
39
+    white = (255, 255, 255)
40
+    image = cv2.putText(image, text, (x, y), font, scale, black, 3 * thickness)
41
+    image = cv2.putText(image, text, (x, y), font, scale, white, thickness)
42
+    return image
43
+
44
+
45
+def fix_rect(args, x, y, w, h):
46
+    # Grow into correct aspect ratio.
47
+    nw = max(w, int(h * args.output_aspect))
48
+    nh = max(h, int(w / args.output_aspect))
49
+    # Optionally shrink into working image size.
50
+    scale = min(1, args.working_width / nw, args.working_height / nh)
51
+    nw = int(nw * scale)
52
+    nh = int(nh * scale)
53
+    # Center new region on old region.
54
+    nx = x - (nw - w) // 2
55
+    ny = y - (nh - h) // 2
56
+    # Push into working image.
57
+    nx = max(0, nx) - max(0, (nx+nw) - args.working_width)
58
+    ny = max(0, ny) - max(0, (ny+nh) - args.working_height)
59
+    # Return.
60
+    return nx, ny, nw, nh
61
+
62
+
63
+def run(description, process, init=None, args_pre=None, args_post=None):
64
+    # Parse arguments
65
+    parser = argparse.ArgumentParser(description=description)
66
+    if args_pre is not None:
67
+        args_pre(parser)
68
+    parser.add_argument(
69
+        '--start-frame', metavar='FRAME', type=int,
70
+        help="starting input frame to process (inclusive)")
71
+    parser.add_argument(
72
+        '--end-frame', metavar='FRAME', type=int,
73
+        help="ending input frame to process (exclusive)")
74
+    parser.add_argument(
75
+        '--decimate', metavar="COUNT", type=int, default=1,
76
+        help="only use every %(metavar)s input frame")
77
+    parser.add_argument(
78
+        '--width', type=int,
79
+        help="width of output video")
80
+    parser.add_argument(
81
+        '--height', type=int,
82
+        help="height of output video")
83
+    parser.add_argument(
84
+        '--codec',
85
+        help="codec of output video"
86
+    )
87
+    parser.add_argument(
88
+        '--pix-fmt',
89
+        help="pixel format of output video")
90
+    parser.add_argument(
91
+        '--extra-args', metavar='ARGS',
92
+        help="""
93
+        extra arguments to pass to FFmpeg, as a JSON object (do not specify the
94
+        leading dash for keys, use a null value for arguments that do not take
95
+        a parameter)
96
+        """)
97
+    parser.add_argument(
98
+        '--no-preview', action='store_true',
99
+        help="do not show any previews")
100
+    parser.add_argument(
101
+        '--no-write', action='store_true',
102
+        help="do not write any files")
103
+    parser.add_argument(
104
+        '--no-audio', action='store_true',
105
+        help="do not include audio in output files")
106
+    parser.add_argument(
107
+        '--overwrite', action='store_true',
108
+        help="overwrite output files")
109
+    parser.add_argument(
110
+        '--debug', metavar='DEBUG_FILE',
111
+        help="produce debug output (leave empty to base filename on input)")
112
+    parser.add_argument(
113
+        '--output', metavar='OUTPUT_FILE',
114
+        help="produce final output (leave empty to base filename on input)")
115
+    parser.add_argument(
116
+        'input', metavar='INPUT_FILE',
117
+        help="input file")
118
+    if args_post is not None:
119
+        args_post(parser)
120
+    args = parser.parse_args()
121
+
122
+    # Check arguments.
123
+    root, ext = os.path.splitext(os.path.basename(args.input))
124
+    for file in ["debug", "output"]:
125
+        if getattr(args, file) == "":
126
+            setattr(args, file, f"{root}_{file}{ext}")
127
+    if os.path.isfile(args.output) and not args.overwrite:
128
+        error(f"File exists: '{args.output}', use --overwrite to overwrite.")
129
+    if os.path.isfile(args.debug) and not args.overwrite:
130
+        error(f"File exists: '{args.debug}', use --overwrite to overwrite.")
131
+    if not os.path.isfile(args.input):
132
+        error(f"File does not exist: '{args.input}'.")
133
+
134
+    # Probe input.
135
+    probe = ffmpeg.probe(args.input, select_streams='v')
136
+
137
+    # Common parameters.
138
+    args.duration = float(probe['format']['duration'])
139
+    args.frame_count = int(probe['streams'][0]['nb_frames'])
140
+
141
+    # Input parameters.
142
+    args.input_codec = probe['streams'][0]['codec_name']
143
+    args.input_pix_fmt = probe['streams'][0]['pix_fmt']
144
+    args.input_width = probe['streams'][0]['width']
145
+    args.input_height = probe['streams'][0]['height']
146
+    args.input_aspect = args.input_width / args.input_height
147
+    args.input_fps = (lambda x, y: x / y)(
148
+        *map(int, probe['streams'][0]['r_frame_rate'].split("/"))
149
+    )
150
+
151
+    # Output parameters.
152
+    args.output_codec = args.codec or args.input_codec
153
+    args.output_pix_fmt = args.pix_fmt or args.input_pix_fmt
154
+    args.output_width = args.width or args.input_width
155
+    args.output_height = args.height or int(
156
+        args.output_width / args.input_aspect
157
+    )
158
+    args.output_aspect = args.output_width / args.output_height
159
+    args.output_fps = args.input_fps / (args.decimate or 1)
160
+
161
+    # Working parameters.
162
+    args.working_width = min(
163
+        args.input_width,
164
+        2 * max(
165
+            args.output_width,
166
+            int(args.output_height * args.input_aspect)
167
+        )
168
+    )
169
+    args.working_height = min(
170
+        args.input_height,
171
+        2 * max(
172
+            args.output_height,
173
+            int(args.output_width / args.input_aspect)
174
+        )
175
+    )
176
+    args.working_width += args.working_width % 2
177
+    args.working_height += args.working_height % 2
178
+    args.thickness = max(1, int(args.working_width / 1000))
179
+
180
+    # Fill in default arguments.
181
+    if args.start_frame is None:
182
+        args.start_frame = 0
183
+    if args.end_frame is None:
184
+        args.end_frame = args.frame_count
185
+    if args.extra_args is None:
186
+        args.extra_args = {}
187
+    else:
188
+        try:
189
+            args.extra_args = json.loads(args.extra_args)
190
+        except json.decoder.JSONDecodeError:
191
+            error(f"Extra arguments is not valid JSON.")
192
+        if type(args.extra_args) is not dict:
193
+            error(f"Extra arguments is not a JSON object.")
194
+
195
+    # Open files.
196
+    debug_size = f'{args.working_width}x{args.working_height}'
197
+    output_size = f'{args.output_width}x{args.output_height}'
198
+    pipe_args = {
199
+        'format': 'rawvideo',
200
+        'pix_fmt': 'rgb24',
201
+    }
202
+    output_args = {
203
+        'vcodec': args.output_codec,
204
+        'pix_fmt': args.output_pix_fmt,
205
+        'shortest': None,
206
+        **args.extra_args,
207
+    }
208
+    audio_args = (
209
+        [
210
+            ffmpeg
211
+            .input(args.input)
212
+            .audio
213
+            # This works badly for some reason.
214
+            .filter(
215
+                'atrim',
216
+                start=f"{args.start_frame/args.input_fps}s",
217
+                end=f"{args.end_frame/args.input_fps}s",
218
+            )
219
+        ]
220
+        if not args.no_audio else []
221
+    )
222
+    input_stream = (
223
+        ffmpeg
224
+        .input(args.input)
225
+        .output('pipe:', **pipe_args)
226
+        .global_args('-loglevel', 'error')
227
+        # .global_args('-stats')
228
+        .run_async(pipe_stdout=True)
229
+    )
230
+    if not args.no_write:
231
+        if args.debug:
232
+            debug_stream = (
233
+                ffmpeg
234
+                .input('pipe:', **pipe_args, s=debug_size, r=args.output_fps)
235
+                .output(
236
+                    *audio_args, args.debug, **output_args, r=args.output_fps)
237
+                .global_args('-loglevel', 'error')
238
+                .run_async(pipe_stdin=True, overwrite_output=args.overwrite)
239
+            )
240
+        if args.output:
241
+            output_stream = (
242
+                ffmpeg
243
+                .input('pipe:', **pipe_args, s=output_size, r=args.output_fps)
244
+                .output(
245
+                    *audio_args, args.output, **output_args, r=args.output_fps)
246
+                .global_args('-loglevel', 'error')
247
+                .run_async(pipe_stdin=True, overwrite_output=args.overwrite)
248
+            )
249
+
250
+    # Set up signal handler.
251
+    sigint = False
252
+
253
+    def sigint_handler(signum, frame):
254
+        nonlocal sigint
255
+        sigint = True
256
+
257
+    signal.signal(signal.SIGINT, sigint_handler)
258
+
259
+    # Call init.
260
+    if init is not None:
261
+        state = init(args)
262
+    else:
263
+        state = None
264
+
265
+    # Process.
266
+    try:
267
+        for frame_num in range(args.frame_count):
268
+            # Check for end frame.
269
+            if frame_num >= args.end_frame:
270
+                break
271
+
272
+            # Gather and print info.
273
+            time_now = datetime.timedelta(
274
+                seconds=int(frame_num / args.input_fps))
275
+            time_duration = datetime.timedelta(
276
+                seconds=int(args.duration))
277
+            elapsed_frames = f"{frame_num} / {args.frame_count-1}"
278
+            elapsed_time = f"{time_now} / {time_duration}"
279
+            argv = sys.argv
280
+            if len(argv) >= 1:
281
+                argv[0] = os.path.basename(argv[0])
282
+            argv = " ".join(map(shlex.quote, argv))
283
+            sys.stdout.write(f"{elapsed_frames} ({elapsed_time})\r")
284
+
285
+            # Read input.
286
+            if sigint:
287
+                break
288
+            input_bytes = input_stream.stdout.read(
289
+                args.input_width * args.input_height * 3
290
+            )
291
+            if not input_bytes:
292
+                break
293
+            input_frame = (
294
+                np
295
+                .frombuffer(input_bytes, np.uint8)
296
+                .reshape([args.input_height, args.input_width, 3])
297
+            )
298
+
299
+            # Check for start frame.
300
+            if frame_num < args.start_frame:
301
+                continue
302
+
303
+            # Check for decimate frame.
304
+            if (frame_num - args.start_frame) % args.decimate != 0:
305
+                continue
306
+
307
+            # Resize to working size.
308
+            frame = resize(
309
+                input_frame,
310
+                args.working_width,
311
+                args.working_height,
312
+                PIL.Image.NEAREST,
313
+            )
314
+
315
+            # Call process.
316
+            output_frame, debug_frame = process(args, state, frame, frame_num)
317
+
318
+            # Show info.
319
+            if args.debug:
320
+                debug_frame = text(
321
+                    debug_frame,
322
+                    elapsed_frames,
323
+                    0,
324
+                    0,
325
+                    args.thickness,
326
+                )
327
+                debug_frame = text(
328
+                    debug_frame,
329
+                    elapsed_time,
330
+                    args.working_width,
331
+                    0,
332
+                    args.thickness,
333
+                )
334
+                debug_frame = text(
335
+                    debug_frame,
336
+                    argv,
337
+                    0,
338
+                    args.working_height,
339
+                    args.thickness,
340
+                )
341
+
342
+            # Show preview windows.
343
+            if not args.no_preview:
344
+                if args.debug:
345
+                    cv2.imshow(
346
+                        f"{args.debug}",
347
+                        cv2.cvtColor(debug_frame, cv2.COLOR_RGB2BGR),
348
+                    )
349
+                if args.output:
350
+                    cv2.imshow(
351
+                        f"{args.output}",
352
+                        cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR),
353
+                    )
354
+                if cv2.waitKey(1) in (ord('q'), 27):
355
+                    break
356
+
357
+            # Write files.
358
+            if not args.no_write:
359
+                if sigint:
360
+                    break
361
+                try:
362
+                    if args.debug:
363
+                        debug_stream.stdin.write(
364
+                            debug_frame
365
+                            .astype(np.uint8)
366
+                            .tobytes()
367
+                        )
368
+                    if args.output:
369
+                        output_stream.stdin.write(
370
+                            output_frame
371
+                            .astype(np.uint8)
372
+                            .tobytes()
373
+                        )
374
+                except BrokenPipeError:
375
+                    # FFmpeg has probably written some error message to stderr,
376
+                    # so just break.
377
+                    break
378
+    except KeyboardInterrupt:
379
+        pass
380
+    finally:
381
+        print("")
382
+        # Close and wait.
383
+        if not args.no_write:
384
+            if args.debug:
385
+                debug_stream.stdin.close()
386
+                debug_stream.wait()
387
+            if args.output:
388
+                output_stream.stdin.close()
389
+                output_stream.wait()
390
+        input_stream.send_signal(signal.SIGINT)
391
+        input_stream.communicate()