FFmpeg  4.4
vf_overlay_cuda.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Yaroslav Pogrebnyak <yyyaroslav@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * Overlay one video on top of another using cuda hardware acceleration
24  */
25 
26 #include "libavutil/log.h"
27 #include "libavutil/mem.h"
28 #include "libavutil/opt.h"
29 #include "libavutil/pixdesc.h"
30 #include "libavutil/hwcontext.h"
32 #include "libavutil/cuda_check.h"
33 
34 #include "avfilter.h"
35 #include "framesync.h"
36 #include "internal.h"
37 
38 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, ctx->hwctx->internal->cuda_dl, x)
39 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
40 
41 #define BLOCK_X 32
42 #define BLOCK_Y 16
43 
44 static const enum AVPixelFormat supported_main_formats[] = {
48 };
49 
50 static const enum AVPixelFormat supported_overlay_formats[] = {
55 };
56 
57 /**
58  * OverlayCUDAContext
59  */
60 typedef struct OverlayCUDAContext {
61  const AVClass *class;
62 
65 
67 
68  CUcontext cu_ctx;
69  CUmodule cu_module;
70  CUfunction cu_func;
71  CUstream cu_stream;
72 
74 
77 
79 
80 /**
81  * Helper to find out if provided format is supported by filter
82  */
83 static int format_is_supported(const enum AVPixelFormat formats[], enum AVPixelFormat fmt)
84 {
85  for (int i = 0; formats[i] != AV_PIX_FMT_NONE; i++)
86  if (formats[i] == fmt)
87  return 1;
88  return 0;
89 }
90 
91 /**
92  * Helper checks if we can process main and overlay pixel formats
93  */
94 static int formats_match(const enum AVPixelFormat format_main, const enum AVPixelFormat format_overlay) {
95  switch(format_main) {
96  case AV_PIX_FMT_NV12:
97  return format_overlay == AV_PIX_FMT_NV12;
98  case AV_PIX_FMT_YUV420P:
99  return format_overlay == AV_PIX_FMT_YUV420P ||
100  format_overlay == AV_PIX_FMT_YUVA420P;
101  default:
102  return 0;
103  }
104 }
105 
106 /**
107  * Call overlay kernell for a plane
108  */
111  int x_position, int y_position,
112  uint8_t* main_data, int main_linesize,
113  int main_width, int main_height,
114  uint8_t* overlay_data, int overlay_linesize,
115  int overlay_width, int overlay_height,
116  uint8_t* alpha_data, int alpha_linesize,
117  int alpha_adj_x, int alpha_adj_y) {
118 
119  CudaFunctions *cu = ctx->hwctx->internal->cuda_dl;
120 
121  void* kernel_args[] = {
122  &x_position, &y_position,
123  &main_data, &main_linesize,
124  &overlay_data, &overlay_linesize,
125  &overlay_width, &overlay_height,
126  &alpha_data, &alpha_linesize,
127  &alpha_adj_x, &alpha_adj_y,
128  };
129 
130  return CHECK_CU(cu->cuLaunchKernel(
131  ctx->cu_func,
132  DIV_UP(main_width, BLOCK_X), DIV_UP(main_height, BLOCK_Y), 1,
133  BLOCK_X, BLOCK_Y, 1,
134  0, ctx->cu_stream, kernel_args, NULL));
135 }
136 
137 /**
138  * Perform blend overlay picture over main picture
139  */
141 {
142  int ret;
143 
144  AVFilterContext *avctx = fs->parent;
145  OverlayCUDAContext *ctx = avctx->priv;
146  AVFilterLink *outlink = avctx->outputs[0];
147 
148  CudaFunctions *cu = ctx->hwctx->internal->cuda_dl;
149  CUcontext dummy, cuda_ctx = ctx->hwctx->cuda_ctx;
150 
151  AVFrame *input_main, *input_overlay;
152 
153  ctx->cu_ctx = cuda_ctx;
154 
155  // read main and overlay frames from inputs
156  ret = ff_framesync_dualinput_get(fs, &input_main, &input_overlay);
157  if (ret < 0)
158  return ret;
159 
160  if (!input_main)
161  return AVERROR_BUG;
162 
163  if (!input_overlay)
164  return ff_filter_frame(outlink, input_main);
165 
166  ret = av_frame_make_writable(input_main);
167  if (ret < 0) {
168  av_frame_free(&input_main);
169  return ret;
170  }
171 
172  // push cuda context
173 
174  ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
175  if (ret < 0) {
176  av_frame_free(&input_main);
177  return ret;
178  }
179 
180  // overlay first plane
181 
183  ctx->x_position, ctx->y_position,
184  input_main->data[0], input_main->linesize[0],
185  input_main->width, input_main->height,
186  input_overlay->data[0], input_overlay->linesize[0],
187  input_overlay->width, input_overlay->height,
188  input_overlay->data[3], input_overlay->linesize[3], 1, 1);
189 
190  // overlay rest planes depending on pixel format
191 
192  switch(ctx->in_format_overlay) {
193  case AV_PIX_FMT_NV12:
195  ctx->x_position, ctx->y_position / 2,
196  input_main->data[1], input_main->linesize[1],
197  input_main->width, input_main->height / 2,
198  input_overlay->data[1], input_overlay->linesize[1],
199  input_overlay->width, input_overlay->height / 2,
200  0, 0, 0, 0);
201  break;
202  case AV_PIX_FMT_YUV420P:
203  case AV_PIX_FMT_YUVA420P:
205  ctx->x_position / 2 , ctx->y_position / 2,
206  input_main->data[1], input_main->linesize[1],
207  input_main->width / 2, input_main->height / 2,
208  input_overlay->data[1], input_overlay->linesize[1],
209  input_overlay->width / 2, input_overlay->height / 2,
210  input_overlay->data[3], input_overlay->linesize[3], 2, 2);
211 
213  ctx->x_position / 2 , ctx->y_position / 2,
214  input_main->data[2], input_main->linesize[2],
215  input_main->width / 2, input_main->height / 2,
216  input_overlay->data[2], input_overlay->linesize[2],
217  input_overlay->width / 2, input_overlay->height / 2,
218  input_overlay->data[3], input_overlay->linesize[3], 2, 2);
219  break;
220  default:
221  av_log(ctx, AV_LOG_ERROR, "Passed unsupported overlay pixel format\n");
222  av_frame_free(&input_main);
223  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
224  return AVERROR_BUG;
225  }
226 
227  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
228 
229  return ff_filter_frame(outlink, input_main);
230 }
231 
232 /**
233  * Initialize overlay_cuda
234  */
236 {
237  OverlayCUDAContext* ctx = avctx->priv;
238  ctx->fs.on_event = &overlay_cuda_blend;
239 
240  return 0;
241 }
242 
243 /**
244  * Uninitialize overlay_cuda
245  */
247 {
248  OverlayCUDAContext* ctx = avctx->priv;
249 
250  ff_framesync_uninit(&ctx->fs);
251 
252  if (ctx->hwctx && ctx->cu_module) {
253  CUcontext dummy;
254  CudaFunctions *cu = ctx->hwctx->internal->cuda_dl;
255  CHECK_CU(cu->cuCtxPushCurrent(ctx->cu_ctx));
256  CHECK_CU(cu->cuModuleUnload(ctx->cu_module));
257  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
258  }
259 }
260 
261 /**
262  * Activate overlay_cuda
263  */
265 {
266  OverlayCUDAContext *ctx = avctx->priv;
267 
268  return ff_framesync_activate(&ctx->fs);
269 }
270 
271 /**
272  * Query formats
273  */
275 {
276  static const enum AVPixelFormat pixel_formats[] = {
278  };
279 
280  AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
281 
282  return ff_set_common_formats(avctx, pix_fmts);
283 }
284 
285 /**
286  * Configure output
287  */
289 {
290 
291  extern char vf_overlay_cuda_ptx[];
292 
293  int err;
294  AVFilterContext* avctx = outlink->src;
295  OverlayCUDAContext* ctx = avctx->priv;
296 
297  AVFilterLink *inlink = avctx->inputs[0];
298  AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
299 
300  AVFilterLink *inlink_overlay = avctx->inputs[1];
301  AVHWFramesContext *frames_ctx_overlay = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data;
302 
303  CUcontext dummy, cuda_ctx;
304  CudaFunctions *cu;
305 
306  // check main input formats
307 
308  if (!frames_ctx) {
309  av_log(ctx, AV_LOG_ERROR, "No hw context provided on main input\n");
310  return AVERROR(EINVAL);
311  }
312 
313  ctx->in_format_main = frames_ctx->sw_format;
314  if (!format_is_supported(supported_main_formats, ctx->in_format_main)) {
315  av_log(ctx, AV_LOG_ERROR, "Unsupported main input format: %s\n",
316  av_get_pix_fmt_name(ctx->in_format_main));
317  return AVERROR(ENOSYS);
318  }
319 
320  // check overlay input formats
321 
322  if (!frames_ctx_overlay) {
323  av_log(ctx, AV_LOG_ERROR, "No hw context provided on overlay input\n");
324  return AVERROR(EINVAL);
325  }
326 
327  ctx->in_format_overlay = frames_ctx_overlay->sw_format;
328  if (!format_is_supported(supported_overlay_formats, ctx->in_format_overlay)) {
329  av_log(ctx, AV_LOG_ERROR, "Unsupported overlay input format: %s\n",
330  av_get_pix_fmt_name(ctx->in_format_overlay));
331  return AVERROR(ENOSYS);
332  }
333 
334  // check we can overlay pictures with those pixel formats
335 
336  if (!formats_match(ctx->in_format_main, ctx->in_format_overlay)) {
337  av_log(ctx, AV_LOG_ERROR, "Can't overlay %s on %s \n",
338  av_get_pix_fmt_name(ctx->in_format_overlay), av_get_pix_fmt_name(ctx->in_format_main));
339  return AVERROR(EINVAL);
340  }
341 
342  // initialize
343 
344  ctx->hwctx = frames_ctx->device_ctx->hwctx;
345  cuda_ctx = ctx->hwctx->cuda_ctx;
346  ctx->fs.time_base = inlink->time_base;
347 
348  ctx->cu_stream = ctx->hwctx->stream;
349 
350  outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
351 
352  // load functions
353 
354  cu = ctx->hwctx->internal->cuda_dl;
355 
356  err = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
357  if (err < 0) {
358  return err;
359  }
360 
361  err = CHECK_CU(cu->cuModuleLoadData(&ctx->cu_module, vf_overlay_cuda_ptx));
362  if (err < 0) {
363  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
364  return err;
365  }
366 
367  err = CHECK_CU(cu->cuModuleGetFunction(&ctx->cu_func, ctx->cu_module, "Overlay_Cuda"));
368  if (err < 0) {
369  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
370  return err;
371  }
372 
373  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
374 
375  // init dual input
376 
377  err = ff_framesync_init_dualinput(&ctx->fs, avctx);
378  if (err < 0) {
379  return err;
380  }
381 
382  return ff_framesync_configure(&ctx->fs);
383 }
384 
385 
386 #define OFFSET(x) offsetof(OverlayCUDAContext, x)
387 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
388 
389 static const AVOption overlay_cuda_options[] = {
390  { "x", "Overlay x position",
391  OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, .flags = FLAGS },
392  { "y", "Overlay y position",
393  OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, .flags = FLAGS },
394  { "eof_action", "Action to take when encountering EOF from secondary input ",
395  OFFSET(fs.opt_eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT },
396  EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, "eof_action" },
397  { "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" },
398  { "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" },
399  { "pass", "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS }, .flags = FLAGS, "eof_action" },
400  { "shortest", "force termination when the shortest input terminates", OFFSET(fs.opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
401  { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(fs.opt_repeatlast), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
402  { NULL },
403 };
404 
406 
408  {
409  .name = "main",
410  .type = AVMEDIA_TYPE_VIDEO,
411  },
412  {
413  .name = "overlay",
414  .type = AVMEDIA_TYPE_VIDEO,
415  },
416  { NULL }
417 };
418 
420  {
421  .name = "default",
422  .type = AVMEDIA_TYPE_VIDEO,
423  .config_props = &overlay_cuda_config_output,
424  },
425  { NULL }
426 };
427 
429  .name = "overlay_cuda",
430  .description = NULL_IF_CONFIG_SMALL("Overlay one video on top of another using CUDA"),
431  .priv_size = sizeof(OverlayCUDAContext),
432  .priv_class = &overlay_cuda_class,
439  .preinit = overlay_cuda_framesync_preinit,
440  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
441 };
static int query_formats(AVFilterContext *ctx)
Definition: aeval.c:244
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
static int activate(AVFilterContext *ctx)
Definition: af_adeclick.c:630
#define av_cold
Definition: attributes.h:88
uint8_t
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1094
Main libavfilter public API header.
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:31
#define fs(width, name, subs,...)
Definition: cbs_vp9.c:259
#define NULL
Definition: coverity.c:32
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:279
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:587
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
int ff_framesync_configure(FFFrameSync *fs)
Configure a frame sync structure.
Definition: framesync.c:124
int ff_framesync_dualinput_get(FFFrameSync *fs, AVFrame **f0, AVFrame **f1)
Definition: framesync.c:376
int ff_framesync_activate(FFFrameSync *fs)
Examine the frames in the filter's input and try to produce output.
Definition: framesync.c:341
int ff_framesync_init_dualinput(FFFrameSync *fs, AVFilterContext *parent)
Initialize a frame sync structure for dualinput.
Definition: framesync.c:358
void ff_framesync_uninit(FFFrameSync *fs)
Free all memory currently allocated.
Definition: framesync.c:290
@ EOF_ACTION_PASS
Definition: framesync.h:29
@ EOF_ACTION_ENDALL
Definition: framesync.h:28
@ EOF_ACTION_REPEAT
Definition: framesync.h:27
@ AV_OPT_TYPE_CONST
Definition: opt.h:234
@ AV_OPT_TYPE_INT
Definition: opt.h:225
@ AV_OPT_TYPE_BOOL
Definition: opt.h:242
AVBufferRef * av_buffer_ref(AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:93
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:50
#define AVERROR(e)
Definition: error.h:43
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
int av_frame_make_writable(AVFrame *frame)
Ensure that the frame data is writable, avoiding data copy if possible.
Definition: frame.c:611
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
FFmpeg internal API for CUDA.
int i
Definition: input.c:407
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:339
common internal API header
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:303
Memory handling functions.
int dummy
Definition: motion.c:64
AVOptions.
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2489
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:65
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
@ AV_PIX_FMT_YUVA420P
planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
Definition: pixfmt.h:101
@ AV_PIX_FMT_CUDA
HW acceleration through CUDA.
Definition: pixfmt.h:235
formats
Definition: signature.h:48
uint8_t * data
The data buffer.
Definition: buffer.h:92
This struct is allocated as AVHWDeviceContext.hwctx.
Describe the class of an AVClass context structure.
Definition: log.h:67
An instance of a filter.
Definition: avfilter.h:341
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:349
void * priv
private data for use by the filter
Definition: avfilter.h:356
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:353
A list of supported formats for one end of a filter link.
Definition: formats.h:65
A filter pad used for either input or output.
Definition: internal.h:54
const char * name
Pad name.
Definition: internal.h:60
Filter definition.
Definition: avfilter.h:145
const char * name
Filter name.
Definition: avfilter.h:149
AVFormatInternal * internal
An opaque field for libavformat internal usage.
Definition: avformat.h:1699
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:332
int width
Definition: frame.h:376
int height
Definition: frame.h:376
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:349
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:92
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:222
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:149
AVOption.
Definition: opt.h:248
Frame sync structure.
Definition: framesync.h:146
OverlayCUDAContext.
enum AVPixelFormat in_format_main
enum AVPixelFormat in_format_overlay
AVCUDADeviceContext * hwctx
#define av_log(a,...)
AVFormatContext * ctx
Definition: movenc.c:48
AVFilter ff_vf_overlay_cuda
static int overlay_cuda_config_output(AVFilterLink *outlink)
Configure output.
#define BLOCK_X
static enum AVPixelFormat supported_main_formats[]
static int overlay_cuda_query_formats(AVFilterContext *avctx)
Query formats.
static int overlay_cuda_call_kernel(OverlayCUDAContext *ctx, int x_position, int y_position, uint8_t *main_data, int main_linesize, int main_width, int main_height, uint8_t *overlay_data, int overlay_linesize, int overlay_width, int overlay_height, uint8_t *alpha_data, int alpha_linesize, int alpha_adj_x, int alpha_adj_y)
Call overlay kernell for a plane.
static const AVOption overlay_cuda_options[]
static int format_is_supported(const enum AVPixelFormat formats[], enum AVPixelFormat fmt)
Helper to find out if provided format is supported by filter.
#define DIV_UP(a, b)
static const AVFilterPad overlay_cuda_inputs[]
#define FLAGS
static int overlay_cuda_blend(FFFrameSync *fs)
Perform blend overlay picture over main picture.
static enum AVPixelFormat supported_overlay_formats[]
static av_cold void overlay_cuda_uninit(AVFilterContext *avctx)
Uninitialize overlay_cuda.
static const AVFilterPad overlay_cuda_outputs[]
#define BLOCK_Y
static int overlay_cuda_activate(AVFilterContext *avctx)
Activate overlay_cuda.
static av_cold int overlay_cuda_init(AVFilterContext *avctx)
Initialize overlay_cuda.
#define CHECK_CU(x)
static int formats_match(const enum AVPixelFormat format_main, const enum AVPixelFormat format_overlay)
Helper checks if we can process main and overlay pixel formats.
FRAMESYNC_DEFINE_CLASS(overlay_cuda, OverlayCUDAContext, fs)
#define OFFSET(x)
const char vf_overlay_cuda_ptx[]