FFmpeg  4.4
me_cmp_init.c
Go to the documentation of this file.
1 /*
2  * SIMD-optimized motion estimation
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "libavutil/attributes.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/mem_internal.h"
28 #include "libavutil/x86/asm.h"
29 #include "libavutil/x86/cpu.h"
30 #include "libavcodec/me_cmp.h"
31 #include "libavcodec/mpegvideo.h"
32 
38  ptrdiff_t stride, int h);
40  ptrdiff_t stride, int h);
42  ptrdiff_t stride, int h);
43 int ff_hf_noise8_mmx(uint8_t *pix1, ptrdiff_t stride, int h);
44 int ff_hf_noise16_mmx(uint8_t *pix1, ptrdiff_t stride, int h);
46  ptrdiff_t stride, int h);
48  ptrdiff_t stride, int h);
50  ptrdiff_t stride, int h);
52  ptrdiff_t stride, int h);
54  ptrdiff_t stride, int h);
56  ptrdiff_t stride, int h);
58  ptrdiff_t stride, int h);
60  ptrdiff_t stride, int h);
62  ptrdiff_t stride, int h);
64  ptrdiff_t stride, int h);
66  ptrdiff_t stride, int h);
68  ptrdiff_t stride, int h);
70  ptrdiff_t stride, int h);
72  ptrdiff_t stride, int h);
74  ptrdiff_t stride, int h);
76  ptrdiff_t stride, int h);
78  ptrdiff_t stride, int h);
80  ptrdiff_t stride, int h);
81 
82 #define hadamard_func(cpu) \
83  int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1, \
84  uint8_t *src2, ptrdiff_t stride, int h); \
85  int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1, \
86  uint8_t *src2, ptrdiff_t stride, int h);
87 
89 hadamard_func(mmxext)
90 hadamard_func(sse2)
91 hadamard_func(ssse3)
92 
93 #if HAVE_X86ASM
94 static int nsse16_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
95  ptrdiff_t stride, int h)
96 {
97  int score1, score2;
98 
99  if (c)
100  score1 = c->mecc.sse[0](c, pix1, pix2, stride, h);
101  else
102  score1 = ff_sse16_mmx(c, pix1, pix2, stride, h);
103  score2 = ff_hf_noise16_mmx(pix1, stride, h) + ff_hf_noise8_mmx(pix1+8, stride, h)
104  - ff_hf_noise16_mmx(pix2, stride, h) - ff_hf_noise8_mmx(pix2+8, stride, h);
105 
106  if (c)
107  return score1 + FFABS(score2) * c->avctx->nsse_weight;
108  else
109  return score1 + FFABS(score2) * 8;
110 }
111 
112 static int nsse8_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
113  ptrdiff_t stride, int h)
114 {
115  int score1 = ff_sse8_mmx(c, pix1, pix2, stride, h);
116  int score2 = ff_hf_noise8_mmx(pix1, stride, h) -
117  ff_hf_noise8_mmx(pix2, stride, h);
118 
119  if (c)
120  return score1 + FFABS(score2) * c->avctx->nsse_weight;
121  else
122  return score1 + FFABS(score2) * 8;
123 }
124 
125 #endif /* HAVE_X86ASM */
126 
127 #if HAVE_INLINE_ASM
128 
129 static int vsad_intra16_mmx(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
130  ptrdiff_t stride, int h)
131 {
132  int tmp;
133 
134  av_assert2((((int) pix) & 7) == 0);
135  av_assert2((stride & 7) == 0);
136 
137 #define SUM(in0, in1, out0, out1) \
138  "movq (%0), %%mm2\n" \
139  "movq 8(%0), %%mm3\n" \
140  "add %2,%0\n" \
141  "movq %%mm2, " #out0 "\n" \
142  "movq %%mm3, " #out1 "\n" \
143  "psubusb " #in0 ", %%mm2\n" \
144  "psubusb " #in1 ", %%mm3\n" \
145  "psubusb " #out0 ", " #in0 "\n" \
146  "psubusb " #out1 ", " #in1 "\n" \
147  "por %%mm2, " #in0 "\n" \
148  "por %%mm3, " #in1 "\n" \
149  "movq " #in0 ", %%mm2\n" \
150  "movq " #in1 ", %%mm3\n" \
151  "punpcklbw %%mm7, " #in0 "\n" \
152  "punpcklbw %%mm7, " #in1 "\n" \
153  "punpckhbw %%mm7, %%mm2\n" \
154  "punpckhbw %%mm7, %%mm3\n" \
155  "paddw " #in1 ", " #in0 "\n" \
156  "paddw %%mm3, %%mm2\n" \
157  "paddw %%mm2, " #in0 "\n" \
158  "paddw " #in0 ", %%mm6\n"
159 
160 
161  __asm__ volatile (
162  "movl %3, %%ecx\n"
163  "pxor %%mm6, %%mm6\n"
164  "pxor %%mm7, %%mm7\n"
165  "movq (%0), %%mm0\n"
166  "movq 8(%0), %%mm1\n"
167  "add %2, %0\n"
168  "jmp 2f\n"
169  "1:\n"
170 
171  SUM(%%mm4, %%mm5, %%mm0, %%mm1)
172  "2:\n"
173  SUM(%%mm0, %%mm1, %%mm4, %%mm5)
174 
175  "subl $2, %%ecx\n"
176  "jnz 1b\n"
177 
178  "movq %%mm6, %%mm0\n"
179  "psrlq $32, %%mm6\n"
180  "paddw %%mm6, %%mm0\n"
181  "movq %%mm0, %%mm6\n"
182  "psrlq $16, %%mm0\n"
183  "paddw %%mm6, %%mm0\n"
184  "movd %%mm0, %1\n"
185  : "+r" (pix), "=r" (tmp)
186  : "r" (stride), "m" (h)
187  : "%ecx");
188 
189  return tmp & 0xFFFF;
190 }
191 #undef SUM
192 
193 static int vsad16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
194  ptrdiff_t stride, int h)
195 {
196  int tmp;
197 
198  av_assert2((((int) pix1) & 7) == 0);
199  av_assert2((((int) pix2) & 7) == 0);
200  av_assert2((stride & 7) == 0);
201 
202 #define SUM(in0, in1, out0, out1) \
203  "movq (%0), %%mm2\n" \
204  "movq (%1), " #out0 "\n" \
205  "movq 8(%0), %%mm3\n" \
206  "movq 8(%1), " #out1 "\n" \
207  "add %3, %0\n" \
208  "add %3, %1\n" \
209  "psubb " #out0 ", %%mm2\n" \
210  "psubb " #out1 ", %%mm3\n" \
211  "pxor %%mm7, %%mm2\n" \
212  "pxor %%mm7, %%mm3\n" \
213  "movq %%mm2, " #out0 "\n" \
214  "movq %%mm3, " #out1 "\n" \
215  "psubusb " #in0 ", %%mm2\n" \
216  "psubusb " #in1 ", %%mm3\n" \
217  "psubusb " #out0 ", " #in0 "\n" \
218  "psubusb " #out1 ", " #in1 "\n" \
219  "por %%mm2, " #in0 "\n" \
220  "por %%mm3, " #in1 "\n" \
221  "movq " #in0 ", %%mm2\n" \
222  "movq " #in1 ", %%mm3\n" \
223  "punpcklbw %%mm7, " #in0 "\n" \
224  "punpcklbw %%mm7, " #in1 "\n" \
225  "punpckhbw %%mm7, %%mm2\n" \
226  "punpckhbw %%mm7, %%mm3\n" \
227  "paddw " #in1 ", " #in0 "\n" \
228  "paddw %%mm3, %%mm2\n" \
229  "paddw %%mm2, " #in0 "\n" \
230  "paddw " #in0 ", %%mm6\n"
231 
232 
233  __asm__ volatile (
234  "movl %4, %%ecx\n"
235  "pxor %%mm6, %%mm6\n"
236  "pcmpeqw %%mm7, %%mm7\n"
237  "psllw $15, %%mm7\n"
238  "packsswb %%mm7, %%mm7\n"
239  "movq (%0), %%mm0\n"
240  "movq (%1), %%mm2\n"
241  "movq 8(%0), %%mm1\n"
242  "movq 8(%1), %%mm3\n"
243  "add %3, %0\n"
244  "add %3, %1\n"
245  "psubb %%mm2, %%mm0\n"
246  "psubb %%mm3, %%mm1\n"
247  "pxor %%mm7, %%mm0\n"
248  "pxor %%mm7, %%mm1\n"
249  "jmp 2f\n"
250  "1:\n"
251 
252  SUM(%%mm4, %%mm5, %%mm0, %%mm1)
253  "2:\n"
254  SUM(%%mm0, %%mm1, %%mm4, %%mm5)
255 
256  "subl $2, %%ecx\n"
257  "jnz 1b\n"
258 
259  "movq %%mm6, %%mm0\n"
260  "psrlq $32, %%mm6\n"
261  "paddw %%mm6, %%mm0\n"
262  "movq %%mm0, %%mm6\n"
263  "psrlq $16, %%mm0\n"
264  "paddw %%mm6, %%mm0\n"
265  "movd %%mm0, %2\n"
266  : "+r" (pix1), "+r" (pix2), "=r" (tmp)
267  : "r" (stride), "m" (h)
268  : "%ecx");
269 
270  return tmp & 0x7FFF;
271 }
272 #undef SUM
273 
274 DECLARE_ASM_CONST(8, uint64_t, round_tab)[3] = {
275  0x0000000000000000ULL,
276  0x0001000100010001ULL,
277  0x0002000200020002ULL,
278 };
279 
280 static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2,
281  ptrdiff_t stride, int h)
282 {
283  x86_reg len = -stride * h;
284  __asm__ volatile (
285  ".p2align 4 \n\t"
286  "1: \n\t"
287  "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
288  "movq (%2, %%"FF_REG_a"), %%mm2 \n\t"
289  "movq (%2, %%"FF_REG_a"), %%mm4 \n\t"
290  "add %3, %%"FF_REG_a" \n\t"
291  "psubusb %%mm0, %%mm2 \n\t"
292  "psubusb %%mm4, %%mm0 \n\t"
293  "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
294  "movq (%2, %%"FF_REG_a"), %%mm3 \n\t"
295  "movq (%2, %%"FF_REG_a"), %%mm5 \n\t"
296  "psubusb %%mm1, %%mm3 \n\t"
297  "psubusb %%mm5, %%mm1 \n\t"
298  "por %%mm2, %%mm0 \n\t"
299  "por %%mm1, %%mm3 \n\t"
300  "movq %%mm0, %%mm1 \n\t"
301  "movq %%mm3, %%mm2 \n\t"
302  "punpcklbw %%mm7, %%mm0 \n\t"
303  "punpckhbw %%mm7, %%mm1 \n\t"
304  "punpcklbw %%mm7, %%mm3 \n\t"
305  "punpckhbw %%mm7, %%mm2 \n\t"
306  "paddw %%mm1, %%mm0 \n\t"
307  "paddw %%mm3, %%mm2 \n\t"
308  "paddw %%mm2, %%mm0 \n\t"
309  "paddw %%mm0, %%mm6 \n\t"
310  "add %3, %%"FF_REG_a" \n\t"
311  " js 1b \n\t"
312  : "+a" (len)
313  : "r" (blk1 - len), "r" (blk2 - len), "r" (stride));
314 }
315 
316 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
317  ptrdiff_t stride, int h)
318 {
319  x86_reg len = -stride * h;
320  __asm__ volatile (
321  ".p2align 4 \n\t"
322  "1: \n\t"
323  "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
324  "movq (%2, %%"FF_REG_a"), %%mm1 \n\t"
325  "movq (%1, %%"FF_REG_a"), %%mm2 \n\t"
326  "movq (%2, %%"FF_REG_a"), %%mm3 \n\t"
327  "punpcklbw %%mm7, %%mm0 \n\t"
328  "punpcklbw %%mm7, %%mm1 \n\t"
329  "punpckhbw %%mm7, %%mm2 \n\t"
330  "punpckhbw %%mm7, %%mm3 \n\t"
331  "paddw %%mm0, %%mm1 \n\t"
332  "paddw %%mm2, %%mm3 \n\t"
333  "movq (%3, %%"FF_REG_a"), %%mm4 \n\t"
334  "movq (%3, %%"FF_REG_a"), %%mm2 \n\t"
335  "paddw %%mm5, %%mm1 \n\t"
336  "paddw %%mm5, %%mm3 \n\t"
337  "psrlw $1, %%mm1 \n\t"
338  "psrlw $1, %%mm3 \n\t"
339  "packuswb %%mm3, %%mm1 \n\t"
340  "psubusb %%mm1, %%mm4 \n\t"
341  "psubusb %%mm2, %%mm1 \n\t"
342  "por %%mm4, %%mm1 \n\t"
343  "movq %%mm1, %%mm0 \n\t"
344  "punpcklbw %%mm7, %%mm0 \n\t"
345  "punpckhbw %%mm7, %%mm1 \n\t"
346  "paddw %%mm1, %%mm0 \n\t"
347  "paddw %%mm0, %%mm6 \n\t"
348  "add %4, %%"FF_REG_a" \n\t"
349  " js 1b \n\t"
350  : "+a" (len)
351  : "r" (blk1a - len), "r" (blk1b - len), "r" (blk2 - len),
352  "r" (stride));
353 }
354 
355 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
356  ptrdiff_t stride, int h)
357 {
358  x86_reg len = -stride * h;
359  __asm__ volatile (
360  "movq (%1, %%"FF_REG_a"), %%mm0\n\t"
361  "movq 1(%1, %%"FF_REG_a"), %%mm2\n\t"
362  "movq %%mm0, %%mm1 \n\t"
363  "movq %%mm2, %%mm3 \n\t"
364  "punpcklbw %%mm7, %%mm0 \n\t"
365  "punpckhbw %%mm7, %%mm1 \n\t"
366  "punpcklbw %%mm7, %%mm2 \n\t"
367  "punpckhbw %%mm7, %%mm3 \n\t"
368  "paddw %%mm2, %%mm0 \n\t"
369  "paddw %%mm3, %%mm1 \n\t"
370  ".p2align 4 \n\t"
371  "1: \n\t"
372  "movq (%2, %%"FF_REG_a"), %%mm2\n\t"
373  "movq 1(%2, %%"FF_REG_a"), %%mm4\n\t"
374  "movq %%mm2, %%mm3 \n\t"
375  "movq %%mm4, %%mm5 \n\t"
376  "punpcklbw %%mm7, %%mm2 \n\t"
377  "punpckhbw %%mm7, %%mm3 \n\t"
378  "punpcklbw %%mm7, %%mm4 \n\t"
379  "punpckhbw %%mm7, %%mm5 \n\t"
380  "paddw %%mm4, %%mm2 \n\t"
381  "paddw %%mm5, %%mm3 \n\t"
382  "movq %5, %%mm5 \n\t"
383  "paddw %%mm2, %%mm0 \n\t"
384  "paddw %%mm3, %%mm1 \n\t"
385  "paddw %%mm5, %%mm0 \n\t"
386  "paddw %%mm5, %%mm1 \n\t"
387  "movq (%3, %%"FF_REG_a"), %%mm4 \n\t"
388  "movq (%3, %%"FF_REG_a"), %%mm5 \n\t"
389  "psrlw $2, %%mm0 \n\t"
390  "psrlw $2, %%mm1 \n\t"
391  "packuswb %%mm1, %%mm0 \n\t"
392  "psubusb %%mm0, %%mm4 \n\t"
393  "psubusb %%mm5, %%mm0 \n\t"
394  "por %%mm4, %%mm0 \n\t"
395  "movq %%mm0, %%mm4 \n\t"
396  "punpcklbw %%mm7, %%mm0 \n\t"
397  "punpckhbw %%mm7, %%mm4 \n\t"
398  "paddw %%mm0, %%mm6 \n\t"
399  "paddw %%mm4, %%mm6 \n\t"
400  "movq %%mm2, %%mm0 \n\t"
401  "movq %%mm3, %%mm1 \n\t"
402  "add %4, %%"FF_REG_a" \n\t"
403  " js 1b \n\t"
404  : "+a" (len)
405  : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len),
406  "r" (stride), "m" (round_tab[2]));
407 }
408 
409 static inline int sum_mmx(void)
410 {
411  int ret;
412  __asm__ volatile (
413  "movq %%mm6, %%mm0 \n\t"
414  "psrlq $32, %%mm6 \n\t"
415  "paddw %%mm0, %%mm6 \n\t"
416  "movq %%mm6, %%mm0 \n\t"
417  "psrlq $16, %%mm6 \n\t"
418  "paddw %%mm0, %%mm6 \n\t"
419  "movd %%mm6, %0 \n\t"
420  : "=r" (ret));
421  return ret & 0xFFFF;
422 }
423 
424 static inline void sad8_x2a_mmx(uint8_t *blk1, uint8_t *blk2,
425  ptrdiff_t stride, int h)
426 {
427  sad8_2_mmx(blk1, blk1 + 1, blk2, stride, h);
428 }
429 
430 static inline void sad8_y2a_mmx(uint8_t *blk1, uint8_t *blk2,
431  ptrdiff_t stride, int h)
432 {
433  sad8_2_mmx(blk1, blk1 + stride, blk2, stride, h);
434 }
435 
436 #define PIX_SAD(suf) \
437 static int sad8_ ## suf(MpegEncContext *v, uint8_t *blk2, \
438  uint8_t *blk1, ptrdiff_t stride, int h) \
439 { \
440  av_assert2(h == 8); \
441  __asm__ volatile ( \
442  "pxor %%mm7, %%mm7 \n\t" \
443  "pxor %%mm6, %%mm6 \n\t" \
444  :); \
445  \
446  sad8_1_ ## suf(blk1, blk2, stride, 8); \
447  \
448  return sum_ ## suf(); \
449 } \
450  \
451 static int sad8_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
452  uint8_t *blk1, ptrdiff_t stride, int h) \
453 { \
454  av_assert2(h == 8); \
455  __asm__ volatile ( \
456  "pxor %%mm7, %%mm7 \n\t" \
457  "pxor %%mm6, %%mm6 \n\t" \
458  "movq %0, %%mm5 \n\t" \
459  :: "m" (round_tab[1])); \
460  \
461  sad8_x2a_ ## suf(blk1, blk2, stride, 8); \
462  \
463  return sum_ ## suf(); \
464 } \
465  \
466 static int sad8_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
467  uint8_t *blk1, ptrdiff_t stride, int h) \
468 { \
469  av_assert2(h == 8); \
470  __asm__ volatile ( \
471  "pxor %%mm7, %%mm7 \n\t" \
472  "pxor %%mm6, %%mm6 \n\t" \
473  "movq %0, %%mm5 \n\t" \
474  :: "m" (round_tab[1])); \
475  \
476  sad8_y2a_ ## suf(blk1, blk2, stride, 8); \
477  \
478  return sum_ ## suf(); \
479 } \
480  \
481 static int sad8_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
482  uint8_t *blk1, ptrdiff_t stride, int h) \
483 { \
484  av_assert2(h == 8); \
485  __asm__ volatile ( \
486  "pxor %%mm7, %%mm7 \n\t" \
487  "pxor %%mm6, %%mm6 \n\t" \
488  ::); \
489  \
490  sad8_4_ ## suf(blk1, blk2, stride, 8); \
491  \
492  return sum_ ## suf(); \
493 } \
494  \
495 static int sad16_ ## suf(MpegEncContext *v, uint8_t *blk2, \
496  uint8_t *blk1, ptrdiff_t stride, int h) \
497 { \
498  __asm__ volatile ( \
499  "pxor %%mm7, %%mm7 \n\t" \
500  "pxor %%mm6, %%mm6 \n\t" \
501  :); \
502  \
503  sad8_1_ ## suf(blk1, blk2, stride, h); \
504  sad8_1_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
505  \
506  return sum_ ## suf(); \
507 } \
508  \
509 static int sad16_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
510  uint8_t *blk1, ptrdiff_t stride, int h) \
511 { \
512  __asm__ volatile ( \
513  "pxor %%mm7, %%mm7 \n\t" \
514  "pxor %%mm6, %%mm6 \n\t" \
515  "movq %0, %%mm5 \n\t" \
516  :: "m" (round_tab[1])); \
517  \
518  sad8_x2a_ ## suf(blk1, blk2, stride, h); \
519  sad8_x2a_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
520  \
521  return sum_ ## suf(); \
522 } \
523  \
524 static int sad16_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
525  uint8_t *blk1, ptrdiff_t stride, int h) \
526 { \
527  __asm__ volatile ( \
528  "pxor %%mm7, %%mm7 \n\t" \
529  "pxor %%mm6, %%mm6 \n\t" \
530  "movq %0, %%mm5 \n\t" \
531  :: "m" (round_tab[1])); \
532  \
533  sad8_y2a_ ## suf(blk1, blk2, stride, h); \
534  sad8_y2a_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
535  \
536  return sum_ ## suf(); \
537 } \
538  \
539 static int sad16_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
540  uint8_t *blk1, ptrdiff_t stride, int h) \
541 { \
542  __asm__ volatile ( \
543  "pxor %%mm7, %%mm7 \n\t" \
544  "pxor %%mm6, %%mm6 \n\t" \
545  ::); \
546  \
547  sad8_4_ ## suf(blk1, blk2, stride, h); \
548  sad8_4_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
549  \
550  return sum_ ## suf(); \
551 } \
552 
553 PIX_SAD(mmx)
554 
555 #endif /* HAVE_INLINE_ASM */
556 
558 {
559  int cpu_flags = av_get_cpu_flags();
560 
561 #if HAVE_INLINE_ASM
562  if (INLINE_MMX(cpu_flags)) {
563  c->pix_abs[0][0] = sad16_mmx;
564  c->pix_abs[0][1] = sad16_x2_mmx;
565  c->pix_abs[0][2] = sad16_y2_mmx;
566  c->pix_abs[0][3] = sad16_xy2_mmx;
567  c->pix_abs[1][0] = sad8_mmx;
568  c->pix_abs[1][1] = sad8_x2_mmx;
569  c->pix_abs[1][2] = sad8_y2_mmx;
570  c->pix_abs[1][3] = sad8_xy2_mmx;
571 
572  c->sad[0] = sad16_mmx;
573  c->sad[1] = sad8_mmx;
574 
575  c->vsad[4] = vsad_intra16_mmx;
576 
577  if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
578  c->vsad[0] = vsad16_mmx;
579  }
580  }
581 
582 #endif /* HAVE_INLINE_ASM */
583 
584  if (EXTERNAL_MMX(cpu_flags)) {
585  c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx;
586  c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
587  c->sum_abs_dctelem = ff_sum_abs_dctelem_mmx;
588  c->sse[0] = ff_sse16_mmx;
589  c->sse[1] = ff_sse8_mmx;
590 #if HAVE_X86ASM
591  c->nsse[0] = nsse16_mmx;
592  c->nsse[1] = nsse8_mmx;
593 #endif
594  }
595 
596  if (EXTERNAL_MMXEXT(cpu_flags)) {
597  c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext;
598  c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext;
599  c->sum_abs_dctelem = ff_sum_abs_dctelem_mmxext;
600 
601  c->sad[0] = ff_sad16_mmxext;
602  c->sad[1] = ff_sad8_mmxext;
603 
604  c->pix_abs[0][0] = ff_sad16_mmxext;
605  c->pix_abs[0][1] = ff_sad16_x2_mmxext;
606  c->pix_abs[0][2] = ff_sad16_y2_mmxext;
607  c->pix_abs[1][0] = ff_sad8_mmxext;
608  c->pix_abs[1][1] = ff_sad8_x2_mmxext;
609  c->pix_abs[1][2] = ff_sad8_y2_mmxext;
610 
611  c->vsad[4] = ff_vsad_intra16_mmxext;
612  c->vsad[5] = ff_vsad_intra8_mmxext;
613 
614  if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
615  c->pix_abs[0][3] = ff_sad16_approx_xy2_mmxext;
616  c->pix_abs[1][3] = ff_sad8_approx_xy2_mmxext;
617 
618  c->vsad[0] = ff_vsad16_approx_mmxext;
619  c->vsad[1] = ff_vsad8_approx_mmxext;
620  }
621  }
622 
623  if (EXTERNAL_SSE2(cpu_flags)) {
624  c->sse[0] = ff_sse16_sse2;
625  c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2;
626 
627 #if HAVE_ALIGNED_STACK
628  c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
629  c->hadamard8_diff[1] = ff_hadamard8_diff_sse2;
630 #endif
632  c->sad[0] = ff_sad16_sse2;
633  c->pix_abs[0][0] = ff_sad16_sse2;
634  c->pix_abs[0][1] = ff_sad16_x2_sse2;
635  c->pix_abs[0][2] = ff_sad16_y2_sse2;
636 
637  c->vsad[4] = ff_vsad_intra16_sse2;
638  if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
639  c->pix_abs[0][3] = ff_sad16_approx_xy2_sse2;
640  c->vsad[0] = ff_vsad16_approx_sse2;
641  }
642  }
643  }
644 
645  if (EXTERNAL_SSSE3(cpu_flags)) {
646  c->sum_abs_dctelem = ff_sum_abs_dctelem_ssse3;
647 #if HAVE_ALIGNED_STACK
648  c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
649  c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
650 #endif
651  }
652 }
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
Macro definitions for various function/variable attributes.
#define av_cold
Definition: attributes.h:88
uint8_t
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
static atomic_int cpu_flags
Definition: cpu.c:50
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:95
#define AV_CPU_FLAG_SSE2SLOW
SSE2 supported, but usually not faster.
Definition: cpu.h:37
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
Definition: avcodec.h:333
@ AV_CODEC_ID_SNOW
Definition: codec_id.h:262
#define DECLARE_ASM_CONST(n, t, v)
Declare a static constant aligned variable appropriate for use in inline assembly code.
Definition: mem.h:119
int x86_reg
Definition: asm.h:72
int stride
Definition: mace.c:144
void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
int ff_sad16_y2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sum_abs_dctelem_ssse3(int16_t *block)
int ff_sum_abs_dctelem_mmx(int16_t *block)
int ff_vsad16_approx_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad8_x2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad8_approx_xy2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
#define hadamard_func(cpu)
Definition: me_cmp_init.c:82
int ff_sad16_approx_xy2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_hf_noise8_mmx(uint8_t *pix1, ptrdiff_t stride, int h)
int ff_sum_abs_dctelem_sse2(int16_t *block)
int ff_sad16_x2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad8_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sum_abs_dctelem_mmxext(int16_t *block)
int ff_sad16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad16_y2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_vsad_intra16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_vsad_intra8_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad16_approx_xy2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_hf_noise16_mmx(uint8_t *pix1, ptrdiff_t stride, int h)
int ff_sse16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_vsad8_approx_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad8_y2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_vsad_intra16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad16_x2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_vsad16_approx_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int dummy
Definition: motion.c:64
mpegvideo header.
main external API structure.
Definition: avcodec.h:536
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:616
enum AVCodecID codec_id
Definition: avcodec.h:546
MpegEncContext.
Definition: mpegvideo.h:81
static uint8_t tmp[11]
Definition: aes_ctr.c:27
static int16_t block[64]
Definition: dct.c:116
int len
static double c[64]
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
#define INLINE_MMX(flags)
Definition: cpu.h:86
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59