79 int weight_buf_size =
width *
height *
sizeof(float);
82 if (!(
ctx->research_size & 1)) {
83 ctx->research_size |= 1;
85 "research_size should be odd, set to %d",
89 if (!(
ctx->patch_size & 1)) {
92 "patch_size should be odd, set to %d",
96 if (!
ctx->research_size_uv)
97 ctx->research_size_uv =
ctx->research_size;
98 if (!
ctx->patch_size_uv)
99 ctx->patch_size_uv =
ctx->patch_size;
105 ctx->command_queue = clCreateCommandQueue(
ctx->ocf.hwctx->context,
106 ctx->ocf.hwctx->device_id,
109 "command queue %d.\n", cle);
111 ctx->vert_kernel = clCreateKernel(
ctx->ocf.program,
114 "vert_sum kernel %d.\n", cle);
116 ctx->horiz_kernel = clCreateKernel(
ctx->ocf.program,
119 "horiz_sum kernel %d.\n", cle);
121 ctx->accum_kernel = clCreateKernel(
ctx->ocf.program,
122 "weight_accum", &cle);
124 "accum kernel %d.\n", cle);
126 ctx->average_kernel = clCreateKernel(
ctx->ocf.program,
129 "average kernel %d.\n", cle);
131 ctx->integral_img = clCreateBuffer(
ctx->ocf.hwctx->context, 0,
135 "integral image %d.\n", cle);
137 ctx->weight = clCreateBuffer(
ctx->ocf.hwctx->context, 0,
138 weight_buf_size,
NULL, &cle);
140 "weight buffer %d.\n", cle);
142 ctx->sum = clCreateBuffer(
ctx->ocf.hwctx->context, 0,
143 weight_buf_size,
NULL, &cle);
145 "sum buffer %d.\n", cle);
147 ctx->overflow = clCreateBuffer(
ctx->ocf.hwctx->context, 0,
148 sizeof(cl_int),
NULL, &cle);
150 "overflow buffer %d.\n", cle);
152 ctx->initialised = 1;
174 const float zero = 0.0f;
175 const size_t worksize1[] = {
height};
176 const size_t worksize2[] = {
width};
178 int i, dx, dy, err = 0, weight_buf_size;
180 int nb_pixel, *
tmp =
NULL, idx = 0;
184 cle = clEnqueueFillBuffer(
ctx->command_queue,
ctx->weight,
185 &
zero,
sizeof(
float), 0, weight_buf_size,
189 cle = clEnqueueFillBuffer(
ctx->command_queue,
ctx->sum,
190 &
zero,
sizeof(
float), 0, weight_buf_size,
195 nb_pixel = (2 *
r + 1) * (2 *
r + 1) - 1;
196 dxdy =
av_malloc(nb_pixel * 2 *
sizeof(cl_int));
202 for (dx = -
r; dx <=
r; dx++) {
203 for (dy = -
r; dy <=
r; dy++) {
211 for (
i = 0;
i < nb_pixel / 4;
i++) {
212 dxdy[
i * 8] =
tmp[
i * 8];
213 dxdy[
i * 8 + 1] =
tmp[
i * 8 + 2];
214 dxdy[
i * 8 + 2] =
tmp[
i * 8 + 4];
215 dxdy[
i * 8 + 3] =
tmp[
i * 8 + 6];
216 dxdy[
i * 8 + 4] =
tmp[
i * 8 + 1];
217 dxdy[
i * 8 + 5] =
tmp[
i * 8 + 3];
218 dxdy[
i * 8 + 6] =
tmp[
i * 8 + 5];
219 dxdy[
i * 8 + 7] =
tmp[
i * 8 + 7];
223 for (
i = 0;
i < nb_pixel / 4;
i++) {
224 cl_int *dx_cur = dxdy + 8 *
i;
225 cl_int *dy_cur = dxdy + 8 *
i + 4;
235 cle = clEnqueueNDRangeKernel(
ctx->command_queue,
ctx->horiz_kernel, 1,
245 cle = clEnqueueNDRangeKernel(
ctx->command_queue,
ctx->vert_kernel,
261 cle = clEnqueueNDRangeKernel(
ctx->command_queue,
ctx->accum_kernel,
272 cle = clEnqueueNDRangeKernel(
ctx->command_queue,
ctx->average_kernel, 2,
276 cle = clFlush(
ctx->command_queue);
296 const cl_int
zero = 0;
297 int w,
h, err, cle, overflow, p, patch, research;
318 if (!
ctx->initialised) {
334 cle = clEnqueueWriteBuffer(
ctx->command_queue,
ctx->overflow, CL_FALSE,
337 "detection buffer %d.\n", cle);
341 dst = (cl_mem) output->
data[p];
346 w = p ?
ctx->chroma_w : inlink->
w;
347 h = p ?
ctx->chroma_h : inlink->
h;
348 patch = (p ?
ctx->patch_size_uv :
ctx->patch_size) / 2;
349 research = (p ?
ctx->research_size_uv :
ctx->research_size) / 2;
355 cle = clEnqueueReadBuffer(
ctx->command_queue,
ctx->overflow, CL_FALSE,
356 0,
sizeof(cl_int), &overflow, 0,
NULL,
NULL);
359 cle = clFinish(
ctx->command_queue);
374 clFinish(
ctx->command_queue);
400 #define OFFSET(x) offsetof(NLMeansOpenCLContext, x)
401 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
433 .
name =
"nlmeans_opencl",
436 .priv_class = &nlmeans_opencl_class,
static int query_formats(AVFilterContext *ctx)
static const AVFilterPad inputs[]
static const AVFilterPad outputs[]
simple assert() macros that are a bit more flexible than ISO C assert().
#define av_assert0(cond)
assert() equivalent, that is always enabled.
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Main libavfilter public API header.
static av_cold int init(AVCodecContext *avctx)
common internal and external API header
#define AV_CEIL_RSHIFT(a, b)
static av_cold int uninit(AVCodecContext *avctx)
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
#define AV_LOG_WARNING
Something somehow does not look correct.
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
common internal API header
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Memory handling functions.
const char * ff_opencl_source_nlmeans
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it.
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
int ff_opencl_filter_query_formats(AVFilterContext *avctx)
Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
#define CL_RELEASE_KERNEL(k)
release an OpenCL Kernel
#define CL_SET_KERNEL_ARG(kernel, arg_num, type, arg)
set argument to specific Kernel.
#define CL_RELEASE_MEMORY(m)
release an OpenCL Memory Object
#define CL_RELEASE_QUEUE(q)
release an OpenCL Command Queue
#define CL_FAIL_ON_ERROR(errcode,...)
A helper macro to handle OpenCL errors.
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
AVPixelFormat
Pixel format.
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
#define FF_ARRAY_ELEMS(a)
uint8_t * data
The data buffer.
void * priv
private data for use by the filter
AVFilterLink ** outputs
array of pointers to output links
A link between two filters.
int w
agreed upon image width
int h
agreed upon image height
AVFilterContext * dst
dest filter
A filter pad used for either input or output.
const char * name
Pad name.
const char * name
Filter name.
This structure describes decoded (raw) audio or video data.
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
AVBufferRef * hw_frames_ctx
For hwaccel-format frames, this should be a reference to the AVHWFramesContext describing the frame.
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames,...
This struct describes a set or pool of "hardware" frames (i.e.
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
cl_command_queue command_queue
static av_cold void nlmeans_opencl_uninit(AVFilterContext *avctx)
static enum AVPixelFormat supported_formats[]
static int nlmeans_opencl_init(AVFilterContext *avctx, int width, int height)
static const AVOption nlmeans_opencl_options[]
static int nlmeans_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
static int nlmeans_plane(AVFilterContext *avctx, cl_mem dst, cl_mem src, cl_int width, cl_int height, cl_int p, cl_int r)
AVFILTER_DEFINE_CLASS(nlmeans_opencl)
static const AVFilterPad nlmeans_opencl_inputs[]
AVFilter ff_vf_nlmeans_opencl
static const AVFilterPad nlmeans_opencl_outputs[]
static int is_format_supported(enum AVPixelFormat fmt)
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.