55 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
59 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
138 if (
s->hwctx &&
s->cu_module) {
139 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
142 CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
143 CHECK_CU(cu->cuModuleUnload(
s->cu_module));
194 s->frames_ctx = out_ref;
213 int out_width,
int out_height)
224 if (!
ctx->inputs[0]->hw_frames_ctx) {
243 s->in_fmt = in_format;
244 s->out_fmt = out_format;
246 if (
s->passthrough && in_width == out_width && in_height == out_height && in_format == out_format) {
259 if (!
ctx->outputs[0]->hw_frames_ctx)
272 CUcontext
dummy, cuda_ctx = device_hwctx->cuda_ctx;
273 CudaFunctions *cu = device_hwctx->internal->cuda_dl;
279 const char *function_infix =
"";
284 switch(
s->interp_algo) {
287 function_infix =
"_Nearest";
288 s->interp_use_linear = 0;
289 s->interp_as_integer = 1;
293 function_infix =
"_Bilinear";
294 s->interp_use_linear = 1;
295 s->interp_as_integer = 1;
300 function_infix =
"_Bicubic";
301 s->interp_use_linear = 0;
302 s->interp_as_integer = 0;
306 function_infix =
"_Lanczos";
307 s->interp_use_linear = 0;
308 s->interp_as_integer = 0;
315 s->hwctx = device_hwctx;
316 s->cu_stream =
s->hwctx->stream;
318 ret =
CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
322 ret =
CHECK_CU(cu->cuModuleLoadData(&
s->cu_module, scaler_ptx));
326 snprintf(buf,
sizeof(buf),
"Subsample%s_uchar", function_infix);
327 CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uchar,
s->cu_module, buf));
331 snprintf(buf,
sizeof(buf),
"Subsample%s_uchar2", function_infix);
332 CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uchar2,
s->cu_module, buf));
336 snprintf(buf,
sizeof(buf),
"Subsample%s_uchar4", function_infix);
337 CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uchar4,
s->cu_module, buf));
341 snprintf(buf,
sizeof(buf),
"Subsample%s_ushort", function_infix);
342 CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_ushort,
s->cu_module, buf));
346 snprintf(buf,
sizeof(buf),
"Subsample%s_ushort2", function_infix);
347 CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_ushort2,
s->cu_module, buf));
351 snprintf(buf,
sizeof(buf),
"Subsample%s_ushort4", function_infix);
352 CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_ushort4,
s->cu_module, buf));
360 s->w_expr,
s->h_expr,
366 s->force_original_aspect_ratio,
s->force_divisible_by);
368 if (((int64_t)
h * inlink->
w) > INT_MAX ||
369 ((int64_t)
w * inlink->
h) > INT_MAX)
380 inlink->
w, inlink->
h, outlink->
w, outlink->
h,
s->passthrough ?
" (passthrough)" :
"");
384 outlink->
w*inlink->
h},
397 uint8_t *src_dptr,
int src_width,
int src_height,
int src_pitch,
398 uint8_t *dst_dptr,
int dst_width,
int dst_height,
int dst_pitch,
402 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
403 CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
405 void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch,
406 &src_width, &src_height, &
bit_depth, &
s->param };
409 CUDA_TEXTURE_DESC tex_desc = {
410 .filterMode =
s->interp_use_linear ?
411 CU_TR_FILTER_MODE_LINEAR :
412 CU_TR_FILTER_MODE_POINT,
413 .flags =
s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0,
416 CUDA_RESOURCE_DESC res_desc = {
417 .resType = CU_RESOURCE_TYPE_PITCH2D,
418 .res.pitch2D.format = pixel_size == 1 ?
419 CU_AD_FORMAT_UNSIGNED_INT8 :
420 CU_AD_FORMAT_UNSIGNED_INT16,
421 .res.pitch2D.numChannels =
channels,
422 .res.pitch2D.width = src_width,
423 .res.pitch2D.height = src_height,
424 .res.pitch2D.pitchInBytes = src_pitch,
425 .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
432 ret =
CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc,
NULL));
442 CHECK_CU(cu->cuTexObjectDestroy(tex));
456 in->data[0],
in->width,
in->height,
in->linesize[0],
457 out->data[0],
out->width,
out->height,
out->linesize[0],
460 in->data[1],
in->width / 2,
in->height / 2,
in->linesize[1],
461 out->data[1],
out->width / 2,
out->height / 2,
out->linesize[1],
464 in->data[2],
in->width / 2,
in->height / 2,
in->linesize[2],
465 out->data[2],
out->width / 2,
out->height / 2,
out->linesize[2],
470 in->data[0],
in->width,
in->height,
in->linesize[0],
471 out->data[0],
out->width,
out->height,
out->linesize[0],
474 in->data[1],
in->width,
in->height,
in->linesize[1],
475 out->data[1],
out->width,
out->height,
out->linesize[1],
478 in->data[2],
in->width,
in->height,
in->linesize[2],
479 out->data[2],
out->width,
out->height,
out->linesize[2],
484 in->data[0],
in->width,
in->height,
in->linesize[0],
485 out->data[0],
out->width,
out->height,
out->linesize[0],
488 in->data[1],
in->width,
in->height,
in->linesize[1],
489 out->data[1],
out->width,
out->height,
out->linesize[1],
492 in->data[2],
in->width,
in->height,
in->linesize[2],
493 out->data[2],
out->width,
out->height,
out->linesize[2],
498 in->data[0],
in->width,
in->height,
in->linesize[0],
499 out->data[0],
out->width,
out->height,
out->linesize[0],
502 in->data[1],
in->width / 2,
in->height / 2,
in->linesize[1],
503 out->data[1],
out->width / 2,
out->height / 2,
out->linesize[1],
508 in->data[0],
in->width,
in->height,
in->linesize[0],
509 out->data[0],
out->width,
out->height,
out->linesize[0],
512 in->data[1],
in->width / 2,
in->height / 2,
in->linesize[1],
513 out->data[1],
out->width / 2,
out->height / 2,
out->linesize[1],
518 in->data[0],
in->width,
in->height,
in->linesize[0],
519 out->data[0],
out->width,
out->height,
out->linesize[0],
522 in->data[1],
in->width / 2,
in->height / 2,
in->linesize[1],
523 out->data[1],
out->width / 2,
out->height / 2,
out->linesize[1],
529 in->data[0],
in->width,
in->height,
in->linesize[0],
530 out->data[0],
out->width,
out->height,
out->linesize[0],
559 s->frame->width = outlink->
w;
560 s->frame->height = outlink->
h;
574 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
589 ret =
CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
599 av_reduce(&
out->sample_aspect_ratio.num, &
out->sample_aspect_ratio.den,
600 (int64_t)
in->sample_aspect_ratio.num * outlink->
h * link->
w,
601 (int64_t)
in->sample_aspect_ratio.den * outlink->
w * link->
h,
616 return s->passthrough ?
621 #define OFFSET(x) offsetof(CUDAScaleContext, x)
622 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
631 {
"passthrough",
"Do not process frames at all if parameters match",
OFFSET(passthrough),
AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1,
FLAGS },
633 {
"force_original_aspect_ratio",
"decrease or increase w/h if necessary to keep the original AR",
OFFSET(force_original_aspect_ratio),
AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2,
FLAGS,
"force_oar" },
637 {
"force_divisible_by",
"enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used",
OFFSET(force_divisible_by),
AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256,
FLAGS },
668 .
name =
"scale_cuda",
static const AVFilterPad inputs[]
static const AVFilterPad outputs[]
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Main libavfilter public API header.
common internal and external API header
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
AVBufferRef * av_buffer_ref(AVBufferRef *buf)
Create a new reference to an AVBuffer.
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
void av_frame_move_ref(AVFrame *dst, AVFrame *src)
Move everything contained in src to dst and reset src.
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
#define AV_LOG_VERBOSE
Detailed information.
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
const char * av_default_item_name(void *ptr)
Return the context name.
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
int av_reduce(int *dst_num, int *dst_den, int64_t num, int64_t den, int64_t max)
Reduce a fraction.
#define LIBAVUTIL_VERSION_INT
int av_hwframe_ctx_init(AVBufferRef *ref)
Finalize the context before use.
AVBufferRef * av_hwframe_ctx_alloc(AVBufferRef *device_ref_in)
Allocate an AVHWFramesContext tied to a given device context.
int av_hwframe_get_buffer(AVBufferRef *hwframe_ref, AVFrame *frame, int flags)
Allocate a new frame attached to the given AVHWFramesContext.
FFmpeg internal API for CUDA.
int(* func)(AVBPrint *dst, const char *in, const char *arg)
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
common internal API header
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
static enum AVPixelFormat pix_fmts[]
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
#define AV_PIX_FMT_0RGB32
AVPixelFormat
Pixel format.
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
@ AV_PIX_FMT_P010LE
like NV12, with 10bpp per component, data in the high bits, zeros in the low bits,...
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
@ AV_PIX_FMT_P016LE
like NV12, with 16bpp per component, little-endian
@ AV_PIX_FMT_CUDA
HW acceleration through CUDA.
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
#define AV_PIX_FMT_YUV444P16
#define AV_PIX_FMT_0BGR32
int ff_scale_adjust_dimensions(AVFilterLink *inlink, int *ret_w, int *ret_h, int force_original_aspect_ratio, int force_divisible_by)
Transform evaluated width and height obtained from ff_scale_eval_dimensions into actual target width ...
int ff_scale_eval_dimensions(void *log_ctx, const char *w_expr, const char *h_expr, AVFilterLink *inlink, AVFilterLink *outlink, int *ret_w, int *ret_h)
Parse and evaluate string expressions for width and height.
#define FF_ARRAY_ELEMS(a)
A reference to a data buffer.
uint8_t * data
The data buffer.
This struct is allocated as AVHWDeviceContext.hwctx.
Describe the class of an AVClass context structure.
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
AVFilterLink ** inputs
array of pointers to input links
void * priv
private data for use by the filter
A link between two filters.
int w
agreed upon image width
int h
agreed upon image height
AVFilterContext * src
source filter
AVBufferRef * hw_frames_ctx
For hwaccel pixel formats, this should be a reference to the AVHWFramesContext describing the frames.
AVRational sample_aspect_ratio
agreed upon sample aspect ratio
AVFilterContext * dst
dest filter
A filter pad used for either input or output.
const char * name
Pad name.
const char * name
Filter name.
This structure describes decoded (raw) audio or video data.
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
This struct describes a set or pool of "hardware" frames (i.e.
enum AVPixelFormat format
The pixel format identifying the underlying HW surface type.
AVBufferRef * device_ref
A reference to the parent AVHWDeviceContext.
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
int width
The allocated dimensions of the frames in this pool.
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Rational number (pair of numerator and denominator).
CUfunction cu_func_uchar2
CUfunction cu_func_uchar4
enum AVPixelFormat in_fmt
enum AVPixelFormat format
Output sw format.
int force_original_aspect_ratio
CUfunction cu_func_ushort2
AVCUDADeviceContext * hwctx
char * w_expr
width expression string
CUfunction cu_func_ushort
CUfunction cu_func_ushort4
char * h_expr
height expression string
enum AVPixelFormat out_fmt
static enum AVPixelFormat supported_formats[]
static av_cold int init_hwframe_ctx(CUDAScaleContext *s, AVBufferRef *device_ctx, int width, int height)
static const AVOption options[]
static av_cold void cudascale_uninit(AVFilterContext *ctx)
static int cudascale_query_formats(AVFilterContext *ctx)
static int cudascale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
static av_cold int cudascale_config_props(AVFilterLink *outlink)
static AVFrame * cudascale_get_video_buffer(AVFilterLink *inlink, int w, int h)
static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height, int out_width, int out_height)
static av_cold int cudascale_init(AVFilterContext *ctx)
static const AVClass cudascale_class
static int scalecuda_resize(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
static int format_is_supported(enum AVPixelFormat fmt)
static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
AVFilter ff_vf_scale_cuda
static const AVFilterPad cudascale_outputs[]
static const AVFilterPad cudascale_inputs[]
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, int pixel_size, int bit_depth)
#define SCALE_CUDA_PARAM_DEFAULT
const char vf_scale_cuda_ptx[]
const char vf_scale_cuda_bicubic_ptx[]
AVFrame * ff_null_get_video_buffer(AVFilterLink *link, int w, int h)
AVFrame * ff_default_get_video_buffer(AVFilterLink *link, int w, int h)