FFmpeg  4.4
swscale_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdint.h>
22 
23 #include "libavutil/x86/asm.h"
25 
26 #undef REAL_MOVNTQ
27 #undef MOVNTQ
28 #undef MOVNTQ2
29 #undef PREFETCH
30 
31 
32 #if COMPILE_TEMPLATE_MMXEXT
33 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
34 #define MOVNTQ2 "movntq "
35 #else
36 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
37 #define MOVNTQ2 "movq "
38 #endif
39 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
40 
41 #define YSCALEYUV2PACKEDX_UV \
42  __asm__ volatile(\
43  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
44  ".p2align 4 \n\t"\
45  "nop \n\t"\
46  "1: \n\t"\
47  "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
48  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
49  "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
50  "movq %%mm3, %%mm4 \n\t"\
51  ".p2align 4 \n\t"\
52  "2: \n\t"\
53  "movq 8(%%"FF_REG_d"), %%mm0 \n\t" /* filterCoeff */\
54  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" /* UsrcData */\
55  "add %6, %%"FF_REG_S" \n\t" \
56  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm5 \n\t" /* VsrcData */\
57  "add $16, %%"FF_REG_d" \n\t"\
58  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
59  "pmulhw %%mm0, %%mm2 \n\t"\
60  "pmulhw %%mm0, %%mm5 \n\t"\
61  "paddw %%mm2, %%mm3 \n\t"\
62  "paddw %%mm5, %%mm4 \n\t"\
63  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
64  " jnz 2b \n\t"\
65 
66 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
67  "lea "offset"(%0), %%"FF_REG_d" \n\t"\
68  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
69  "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
70  "movq "#dst1", "#dst2" \n\t"\
71  ".p2align 4 \n\t"\
72  "2: \n\t"\
73  "movq 8(%%"FF_REG_d"), "#coeff" \n\t" /* filterCoeff */\
74  "movq (%%"FF_REG_S", %%"FF_REG_a", 2), "#src1" \n\t" /* Y1srcData */\
75  "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), "#src2" \n\t" /* Y2srcData */\
76  "add $16, %%"FF_REG_d" \n\t"\
77  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
78  "pmulhw "#coeff", "#src1" \n\t"\
79  "pmulhw "#coeff", "#src2" \n\t"\
80  "paddw "#src1", "#dst1" \n\t"\
81  "paddw "#src2", "#dst2" \n\t"\
82  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
83  " jnz 2b \n\t"\
84 
85 #define YSCALEYUV2PACKEDX \
86  YSCALEYUV2PACKEDX_UV \
87  YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
88 
89 #define YSCALEYUV2PACKEDX_END \
90  :: "r" (&c->redDither), \
91  "m" (dummy), "m" (dummy), "m" (dummy),\
92  "r" (dest), "m" (dstW_reg), "m"(uv_off) \
93  NAMED_CONSTRAINTS_ADD(bF8,bFC) \
94  : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_S \
95  );
96 
97 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
98  __asm__ volatile(\
99  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
100  ".p2align 4 \n\t"\
101  "nop \n\t"\
102  "1: \n\t"\
103  "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
104  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
105  "pxor %%mm4, %%mm4 \n\t"\
106  "pxor %%mm5, %%mm5 \n\t"\
107  "pxor %%mm6, %%mm6 \n\t"\
108  "pxor %%mm7, %%mm7 \n\t"\
109  ".p2align 4 \n\t"\
110  "2: \n\t"\
111  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm0 \n\t" /* UsrcData */\
112  "add %6, %%"FF_REG_S" \n\t" \
113  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" /* VsrcData */\
114  "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
115  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm1 \n\t" /* UsrcData */\
116  "movq %%mm0, %%mm3 \n\t"\
117  "punpcklwd %%mm1, %%mm0 \n\t"\
118  "punpckhwd %%mm1, %%mm3 \n\t"\
119  "movq "STR(APCK_COEF)"(%%"FF_REG_d"),%%mm1 \n\t" /* filterCoeff */\
120  "pmaddwd %%mm1, %%mm0 \n\t"\
121  "pmaddwd %%mm1, %%mm3 \n\t"\
122  "paddd %%mm0, %%mm4 \n\t"\
123  "paddd %%mm3, %%mm5 \n\t"\
124  "add %6, %%"FF_REG_S" \n\t" \
125  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm3 \n\t" /* VsrcData */\
126  "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
127  "add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
128  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
129  "movq %%mm2, %%mm0 \n\t"\
130  "punpcklwd %%mm3, %%mm2 \n\t"\
131  "punpckhwd %%mm3, %%mm0 \n\t"\
132  "pmaddwd %%mm1, %%mm2 \n\t"\
133  "pmaddwd %%mm1, %%mm0 \n\t"\
134  "paddd %%mm2, %%mm6 \n\t"\
135  "paddd %%mm0, %%mm7 \n\t"\
136  " jnz 2b \n\t"\
137  "psrad $16, %%mm4 \n\t"\
138  "psrad $16, %%mm5 \n\t"\
139  "psrad $16, %%mm6 \n\t"\
140  "psrad $16, %%mm7 \n\t"\
141  "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
142  "packssdw %%mm5, %%mm4 \n\t"\
143  "packssdw %%mm7, %%mm6 \n\t"\
144  "paddw %%mm0, %%mm4 \n\t"\
145  "paddw %%mm0, %%mm6 \n\t"\
146  "movq %%mm4, "U_TEMP"(%0) \n\t"\
147  "movq %%mm6, "V_TEMP"(%0) \n\t"\
148 
149 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
150  "lea "offset"(%0), %%"FF_REG_d" \n\t"\
151  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
152  "pxor %%mm1, %%mm1 \n\t"\
153  "pxor %%mm5, %%mm5 \n\t"\
154  "pxor %%mm7, %%mm7 \n\t"\
155  "pxor %%mm6, %%mm6 \n\t"\
156  ".p2align 4 \n\t"\
157  "2: \n\t"\
158  "movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
159  "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
160  "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
161  "movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
162  "movq %%mm0, %%mm3 \n\t"\
163  "punpcklwd %%mm4, %%mm0 \n\t"\
164  "punpckhwd %%mm4, %%mm3 \n\t"\
165  "movq "STR(APCK_COEF)"(%%"FF_REG_d"), %%mm4 \n\t" /* filterCoeff */\
166  "pmaddwd %%mm4, %%mm0 \n\t"\
167  "pmaddwd %%mm4, %%mm3 \n\t"\
168  "paddd %%mm0, %%mm1 \n\t"\
169  "paddd %%mm3, %%mm5 \n\t"\
170  "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
171  "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
172  "add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
173  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
174  "movq %%mm2, %%mm0 \n\t"\
175  "punpcklwd %%mm3, %%mm2 \n\t"\
176  "punpckhwd %%mm3, %%mm0 \n\t"\
177  "pmaddwd %%mm4, %%mm2 \n\t"\
178  "pmaddwd %%mm4, %%mm0 \n\t"\
179  "paddd %%mm2, %%mm7 \n\t"\
180  "paddd %%mm0, %%mm6 \n\t"\
181  " jnz 2b \n\t"\
182  "psrad $16, %%mm1 \n\t"\
183  "psrad $16, %%mm5 \n\t"\
184  "psrad $16, %%mm7 \n\t"\
185  "psrad $16, %%mm6 \n\t"\
186  "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
187  "packssdw %%mm5, %%mm1 \n\t"\
188  "packssdw %%mm6, %%mm7 \n\t"\
189  "paddw %%mm0, %%mm1 \n\t"\
190  "paddw %%mm0, %%mm7 \n\t"\
191  "movq "U_TEMP"(%0), %%mm3 \n\t"\
192  "movq "V_TEMP"(%0), %%mm4 \n\t"\
193 
194 #define YSCALEYUV2PACKEDX_ACCURATE \
195  YSCALEYUV2PACKEDX_ACCURATE_UV \
196  YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
197 
198 #define YSCALEYUV2RGBX \
199  "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
200  "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
201  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
202  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
203  "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
204  "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
205  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
206  "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
207  "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
208  "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
209  "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
210  "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
211  "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
212  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
213  "paddw %%mm3, %%mm4 \n\t"\
214  "movq %%mm2, %%mm0 \n\t"\
215  "movq %%mm5, %%mm6 \n\t"\
216  "movq %%mm4, %%mm3 \n\t"\
217  "punpcklwd %%mm2, %%mm2 \n\t"\
218  "punpcklwd %%mm5, %%mm5 \n\t"\
219  "punpcklwd %%mm4, %%mm4 \n\t"\
220  "paddw %%mm1, %%mm2 \n\t"\
221  "paddw %%mm1, %%mm5 \n\t"\
222  "paddw %%mm1, %%mm4 \n\t"\
223  "punpckhwd %%mm0, %%mm0 \n\t"\
224  "punpckhwd %%mm6, %%mm6 \n\t"\
225  "punpckhwd %%mm3, %%mm3 \n\t"\
226  "paddw %%mm7, %%mm0 \n\t"\
227  "paddw %%mm7, %%mm6 \n\t"\
228  "paddw %%mm7, %%mm3 \n\t"\
229  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
230  "packuswb %%mm0, %%mm2 \n\t"\
231  "packuswb %%mm6, %%mm5 \n\t"\
232  "packuswb %%mm3, %%mm4 \n\t"\
233 
234 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
235  "movq "#b", "#q2" \n\t" /* B */\
236  "movq "#r", "#t" \n\t" /* R */\
237  "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\
238  "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\
239  "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\
240  "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\
241  "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\
242  "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\
243  "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\
244  "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\
245  "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\
246  "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\
247 \
248  MOVNTQ( q0, (dst, index, 4))\
249  MOVNTQ( b, 8(dst, index, 4))\
250  MOVNTQ( q2, 16(dst, index, 4))\
251  MOVNTQ( q3, 24(dst, index, 4))\
252 \
253  "add $8, "#index" \n\t"\
254  "cmp "dstw", "#index" \n\t"\
255  " jb 1b \n\t"
256 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
257 
258 static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
259  const int16_t **lumSrc, int lumFilterSize,
260  const int16_t *chrFilter, const int16_t **chrUSrc,
261  const int16_t **chrVSrc,
262  int chrFilterSize, const int16_t **alpSrc,
263  uint8_t *dest, int dstW, int dstY)
264 {
265  x86_reg dummy=0;
266  x86_reg dstW_reg = dstW;
267  x86_reg uv_off = c->uv_offx2;
268 
269  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
272  "movq %%mm2, "U_TEMP"(%0) \n\t"
273  "movq %%mm4, "V_TEMP"(%0) \n\t"
274  "movq %%mm5, "Y_TEMP"(%0) \n\t"
276  "movq "Y_TEMP"(%0), %%mm5 \n\t"
277  "psraw $3, %%mm1 \n\t"
278  "psraw $3, %%mm7 \n\t"
279  "packuswb %%mm7, %%mm1 \n\t"
280  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
282  } else {
285  "pcmpeqd %%mm7, %%mm7 \n\t"
286  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
288  }
289 }
290 
291 static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
292  const int16_t **lumSrc, int lumFilterSize,
293  const int16_t *chrFilter, const int16_t **chrUSrc,
294  const int16_t **chrVSrc,
295  int chrFilterSize, const int16_t **alpSrc,
296  uint8_t *dest, int dstW, int dstY)
297 {
298  x86_reg dummy=0;
299  x86_reg dstW_reg = dstW;
300  x86_reg uv_off = c->uv_offx2;
301 
302  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
305  YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
306  "psraw $3, %%mm1 \n\t"
307  "psraw $3, %%mm7 \n\t"
308  "packuswb %%mm7, %%mm1 \n\t"
309  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
311  } else {
314  "pcmpeqd %%mm7, %%mm7 \n\t"
315  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
317  }
318 }
319 
320 static void RENAME(yuv2bgr32_X)(SwsContext *c, const int16_t *lumFilter,
321  const int16_t **lumSrc, int lumFilterSize,
322  const int16_t *chrFilter, const int16_t **chrUSrc,
323  const int16_t **chrVSrc,
324  int chrFilterSize, const int16_t **alpSrc,
325  uint8_t *dest, int dstW, int dstY)
326 {
327  x86_reg dummy=0;
328  x86_reg dstW_reg = dstW;
329  x86_reg uv_off = c->uv_offx2;
330 
331  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
334  YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
335  "psraw $3, %%mm1 \n\t"
336  "psraw $3, %%mm7 \n\t"
337  "packuswb %%mm7, %%mm1 \n\t"
338  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
340  } else {
343  "pcmpeqd %%mm7, %%mm7 \n\t"
344  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
346  }
347 }
348 
349 #define REAL_WRITERGB16(dst, dstw, index) \
350  "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
351  "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
352  "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
353  "psrlq $3, %%mm2 \n\t"\
354 \
355  "movq %%mm2, %%mm1 \n\t"\
356  "movq %%mm4, %%mm3 \n\t"\
357 \
358  "punpcklbw %%mm7, %%mm3 \n\t"\
359  "punpcklbw %%mm5, %%mm2 \n\t"\
360  "punpckhbw %%mm7, %%mm4 \n\t"\
361  "punpckhbw %%mm5, %%mm1 \n\t"\
362 \
363  "psllq $3, %%mm3 \n\t"\
364  "psllq $3, %%mm4 \n\t"\
365 \
366  "por %%mm3, %%mm2 \n\t"\
367  "por %%mm4, %%mm1 \n\t"\
368 \
369  MOVNTQ(%%mm2, (dst, index, 2))\
370  MOVNTQ(%%mm1, 8(dst, index, 2))\
371 \
372  "add $8, "#index" \n\t"\
373  "cmp "dstw", "#index" \n\t"\
374  " jb 1b \n\t"
375 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
376 
377 static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
378  const int16_t **lumSrc, int lumFilterSize,
379  const int16_t *chrFilter, const int16_t **chrUSrc,
380  const int16_t **chrVSrc,
381  int chrFilterSize, const int16_t **alpSrc,
382  uint8_t *dest, int dstW, int dstY)
383 {
384  x86_reg dummy=0;
385  x86_reg dstW_reg = dstW;
386  x86_reg uv_off = c->uv_offx2;
387 
390  "pxor %%mm7, %%mm7 \n\t"
391  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
392 #ifdef DITHER1XBPP
393  "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
394  "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
395  "paddusb "RED_DITHER"(%0), %%mm5\n\t"
396 #endif
397  WRITERGB16(%4, "%5", %%FF_REGa)
399 }
400 
401 static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
402  const int16_t **lumSrc, int lumFilterSize,
403  const int16_t *chrFilter, const int16_t **chrUSrc,
404  const int16_t **chrVSrc,
405  int chrFilterSize, const int16_t **alpSrc,
406  uint8_t *dest, int dstW, int dstY)
407 {
408  x86_reg dummy=0;
409  x86_reg dstW_reg = dstW;
410  x86_reg uv_off = c->uv_offx2;
411 
414  "pxor %%mm7, %%mm7 \n\t"
415  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
416 #ifdef DITHER1XBPP
417  "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
418  "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
419  "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
420 #endif
421  WRITERGB16(%4, "%5", %%FF_REGa)
423 }
424 
425 #define REAL_WRITERGB15(dst, dstw, index) \
426  "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
427  "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
428  "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
429  "psrlq $3, %%mm2 \n\t"\
430  "psrlq $1, %%mm5 \n\t"\
431 \
432  "movq %%mm2, %%mm1 \n\t"\
433  "movq %%mm4, %%mm3 \n\t"\
434 \
435  "punpcklbw %%mm7, %%mm3 \n\t"\
436  "punpcklbw %%mm5, %%mm2 \n\t"\
437  "punpckhbw %%mm7, %%mm4 \n\t"\
438  "punpckhbw %%mm5, %%mm1 \n\t"\
439 \
440  "psllq $2, %%mm3 \n\t"\
441  "psllq $2, %%mm4 \n\t"\
442 \
443  "por %%mm3, %%mm2 \n\t"\
444  "por %%mm4, %%mm1 \n\t"\
445 \
446  MOVNTQ(%%mm2, (dst, index, 2))\
447  MOVNTQ(%%mm1, 8(dst, index, 2))\
448 \
449  "add $8, "#index" \n\t"\
450  "cmp "dstw", "#index" \n\t"\
451  " jb 1b \n\t"
452 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
453 
454 static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
455  const int16_t **lumSrc, int lumFilterSize,
456  const int16_t *chrFilter, const int16_t **chrUSrc,
457  const int16_t **chrVSrc,
458  int chrFilterSize, const int16_t **alpSrc,
459  uint8_t *dest, int dstW, int dstY)
460 {
461  x86_reg dummy=0;
462  x86_reg dstW_reg = dstW;
463  x86_reg uv_off = c->uv_offx2;
464 
467  "pxor %%mm7, %%mm7 \n\t"
468  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
469 #ifdef DITHER1XBPP
470  "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
471  "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
472  "paddusb "RED_DITHER"(%0), %%mm5\n\t"
473 #endif
474  WRITERGB15(%4, "%5", %%FF_REGa)
476 }
477 
478 static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
479  const int16_t **lumSrc, int lumFilterSize,
480  const int16_t *chrFilter, const int16_t **chrUSrc,
481  const int16_t **chrVSrc,
482  int chrFilterSize, const int16_t **alpSrc,
483  uint8_t *dest, int dstW, int dstY)
484 {
485  x86_reg dummy=0;
486  x86_reg dstW_reg = dstW;
487  x86_reg uv_off = c->uv_offx2;
488 
491  "pxor %%mm7, %%mm7 \n\t"
492  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
493 #ifdef DITHER1XBPP
494  "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
495  "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
496  "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
497 #endif
498  WRITERGB15(%4, "%5", %%FF_REGa)
500 }
501 
502 #define WRITEBGR24MMX(dst, dstw, index) \
503  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
504  "movq %%mm2, %%mm1 \n\t" /* B */\
505  "movq %%mm5, %%mm6 \n\t" /* R */\
506  "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
507  "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
508  "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
509  "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
510  "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
511  "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
512  "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
513  "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
514  "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
515  "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
516 \
517  "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
518  "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
519  "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
520  "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
521 \
522  "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
523  "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
524  "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
525  "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
526 \
527  "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
528  "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
529  "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
530  "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
531 \
532  "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
533  "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
534  "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
535  "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
536  MOVNTQ(%%mm0, (dst))\
537 \
538  "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
539  "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
540  "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
541  "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
542  MOVNTQ(%%mm6, 8(dst))\
543 \
544  "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
545  "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
546  "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
547  MOVNTQ(%%mm5, 16(dst))\
548 \
549  "add $24, "#dst" \n\t"\
550 \
551  "add $8, "#index" \n\t"\
552  "cmp "dstw", "#index" \n\t"\
553  " jb 1b \n\t"
554 
555 #define WRITEBGR24MMXEXT(dst, dstw, index) \
556  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
557  "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
558  "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
559  "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
560  "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
561  "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
562 \
563  "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
564  "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
565  "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
566 \
567  "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
568  "por %%mm1, %%mm6 \n\t"\
569  "por %%mm3, %%mm6 \n\t"\
570  MOVNTQ(%%mm6, (dst))\
571 \
572  "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
573  "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
574  "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
575  "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
576 \
577  "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
578  "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
579  "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
580 \
581  "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
582  "por %%mm3, %%mm6 \n\t"\
583  MOVNTQ(%%mm6, 8(dst))\
584 \
585  "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
586  "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
587  "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
588 \
589  "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
590  "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
591  "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
592 \
593  "por %%mm1, %%mm3 \n\t"\
594  "por %%mm3, %%mm6 \n\t"\
595  MOVNTQ(%%mm6, 16(dst))\
596 \
597  "add $24, "#dst" \n\t"\
598 \
599  "add $8, "#index" \n\t"\
600  "cmp "dstw", "#index" \n\t"\
601  " jb 1b \n\t"
602 
603 #if COMPILE_TEMPLATE_MMXEXT
604 #undef WRITEBGR24
605 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index)
606 #else
607 #undef WRITEBGR24
608 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
609 #endif
610 
611 #if HAVE_6REGS
612 static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
613  const int16_t **lumSrc, int lumFilterSize,
614  const int16_t *chrFilter, const int16_t **chrUSrc,
615  const int16_t **chrVSrc,
616  int chrFilterSize, const int16_t **alpSrc,
617  uint8_t *dest, int dstW, int dstY)
618 {
619  x86_reg dummy=0;
620  x86_reg dstW_reg = dstW;
621  x86_reg uv_off = c->uv_offx2;
622 
625  "pxor %%mm7, %%mm7 \n\t"
626  "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c"\n\t" //FIXME optimize
627  "add %4, %%"FF_REG_c" \n\t"
628  WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa)
629  :: "r" (&c->redDither),
630  "m" (dummy), "m" (dummy), "m" (dummy),
631  "r" (dest), "m" (dstW_reg), "m"(uv_off)
632  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
633  : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
634  );
635 }
636 
637 static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
638  const int16_t **lumSrc, int lumFilterSize,
639  const int16_t *chrFilter, const int16_t **chrUSrc,
640  const int16_t **chrVSrc,
641  int chrFilterSize, const int16_t **alpSrc,
642  uint8_t *dest, int dstW, int dstY)
643 {
644  x86_reg dummy=0;
645  x86_reg dstW_reg = dstW;
646  x86_reg uv_off = c->uv_offx2;
647 
650  "pxor %%mm7, %%mm7 \n\t"
651  "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c" \n\t" //FIXME optimize
652  "add %4, %%"FF_REG_c" \n\t"
653  WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa)
654  :: "r" (&c->redDither),
655  "m" (dummy), "m" (dummy), "m" (dummy),
656  "r" (dest), "m" (dstW_reg), "m"(uv_off)
657  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
658  : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
659  );
660 }
661 #endif /* HAVE_6REGS */
662 
663 #define REAL_WRITEYUY2(dst, dstw, index) \
664  "packuswb %%mm3, %%mm3 \n\t"\
665  "packuswb %%mm4, %%mm4 \n\t"\
666  "packuswb %%mm7, %%mm1 \n\t"\
667  "punpcklbw %%mm4, %%mm3 \n\t"\
668  "movq %%mm1, %%mm7 \n\t"\
669  "punpcklbw %%mm3, %%mm1 \n\t"\
670  "punpckhbw %%mm3, %%mm7 \n\t"\
671 \
672  MOVNTQ(%%mm1, (dst, index, 2))\
673  MOVNTQ(%%mm7, 8(dst, index, 2))\
674 \
675  "add $8, "#index" \n\t"\
676  "cmp "dstw", "#index" \n\t"\
677  " jb 1b \n\t"
678 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
679 
680 static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
681  const int16_t **lumSrc, int lumFilterSize,
682  const int16_t *chrFilter, const int16_t **chrUSrc,
683  const int16_t **chrVSrc,
684  int chrFilterSize, const int16_t **alpSrc,
685  uint8_t *dest, int dstW, int dstY)
686 {
687  x86_reg dummy=0;
688  x86_reg dstW_reg = dstW;
689  x86_reg uv_off = c->uv_offx2;
690 
692  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
693  "psraw $3, %%mm3 \n\t"
694  "psraw $3, %%mm4 \n\t"
695  "psraw $3, %%mm1 \n\t"
696  "psraw $3, %%mm7 \n\t"
697  WRITEYUY2(%4, "%5", %%FF_REGa)
699 }
700 
701 static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
702  const int16_t **lumSrc, int lumFilterSize,
703  const int16_t *chrFilter, const int16_t **chrUSrc,
704  const int16_t **chrVSrc,
705  int chrFilterSize, const int16_t **alpSrc,
706  uint8_t *dest, int dstW, int dstY)
707 {
708  x86_reg dummy=0;
709  x86_reg dstW_reg = dstW;
710  x86_reg uv_off = c->uv_offx2;
711 
713  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
714  "psraw $3, %%mm3 \n\t"
715  "psraw $3, %%mm4 \n\t"
716  "psraw $3, %%mm1 \n\t"
717  "psraw $3, %%mm7 \n\t"
718  WRITEYUY2(%4, "%5", %%FF_REGa)
720 }
721 
722 #define REAL_YSCALEYUV2RGB_UV(index, c) \
723  "xor "#index", "#index" \n\t"\
724  ".p2align 4 \n\t"\
725  "1: \n\t"\
726  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
727  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
728  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
729  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
730  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
731  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
732  "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
733  "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
734  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
735  "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
736  "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
737  "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
738  "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
739  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
740  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
741  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
742  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
743  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
744  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
745  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
746  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
747  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
748 
749 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
750  "movq ("#b1", "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
751  "movq ("#b2", "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
752  "movq 8("#b1", "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
753  "movq 8("#b2", "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
754  "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
755  "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
756  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
757  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
758  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
759  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
760  "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
761  "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
762 
763 #define REAL_YSCALEYUV2RGB_COEFF(c) \
764  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
765  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
766  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
767  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
768  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
769  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
770  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
771  "paddw %%mm3, %%mm4 \n\t"\
772  "movq %%mm2, %%mm0 \n\t"\
773  "movq %%mm5, %%mm6 \n\t"\
774  "movq %%mm4, %%mm3 \n\t"\
775  "punpcklwd %%mm2, %%mm2 \n\t"\
776  "punpcklwd %%mm5, %%mm5 \n\t"\
777  "punpcklwd %%mm4, %%mm4 \n\t"\
778  "paddw %%mm1, %%mm2 \n\t"\
779  "paddw %%mm1, %%mm5 \n\t"\
780  "paddw %%mm1, %%mm4 \n\t"\
781  "punpckhwd %%mm0, %%mm0 \n\t"\
782  "punpckhwd %%mm6, %%mm6 \n\t"\
783  "punpckhwd %%mm3, %%mm3 \n\t"\
784  "paddw %%mm7, %%mm0 \n\t"\
785  "paddw %%mm7, %%mm6 \n\t"\
786  "paddw %%mm7, %%mm3 \n\t"\
787  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
788  "packuswb %%mm0, %%mm2 \n\t"\
789  "packuswb %%mm6, %%mm5 \n\t"\
790  "packuswb %%mm3, %%mm4 \n\t"\
791 
792 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
793 
794 #define YSCALEYUV2RGB(index, c) \
795  REAL_YSCALEYUV2RGB_UV(index, c) \
796  REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
797  REAL_YSCALEYUV2RGB_COEFF(c)
798 
799 /**
800  * vertical bilinear scale YV12 to RGB
801  */
802 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
803  const int16_t *ubuf[2], const int16_t *vbuf[2],
804  const int16_t *abuf[2], uint8_t *dest,
805  int dstW, int yalpha, int uvalpha, int y)
806 {
807  const int16_t *buf0 = buf[0], *buf1 = buf[1],
808  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
809 
810  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
811  const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
812 #if ARCH_X86_64
813  __asm__ volatile(
814  YSCALEYUV2RGB(%%r8, %5)
815  YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
816  "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
817  "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
818  "packuswb %%mm7, %%mm1 \n\t"
819  WRITEBGR32(%4, DSTW_OFFSET"(%5)", %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
820  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
821  "a" (&c->redDither),
822  "r" (abuf0), "r" (abuf1)
823  : "%r8"
824  );
825 #else
826  c->u_temp=(intptr_t)abuf0;
827  c->v_temp=(intptr_t)abuf1;
828  __asm__ volatile(
829  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
830  "mov %4, %%"FF_REG_b" \n\t"
831  "push %%"FF_REG_BP" \n\t"
832  YSCALEYUV2RGB(%%FF_REGBP, %5)
833  "push %0 \n\t"
834  "push %1 \n\t"
835  "mov "U_TEMP"(%5), %0 \n\t"
836  "mov "V_TEMP"(%5), %1 \n\t"
837  YSCALEYUV2RGB_YA(%%FF_REGBP, %5, %0, %1)
838  "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
839  "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
840  "packuswb %%mm7, %%mm1 \n\t"
841  "pop %1 \n\t"
842  "pop %0 \n\t"
843  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
844  "pop %%"FF_REG_BP" \n\t"
845  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
846  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
847  "a" (&c->redDither)
848  );
849 #endif
850  } else {
851  __asm__ volatile(
852  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
853  "mov %4, %%"FF_REG_b" \n\t"
854  "push %%"FF_REG_BP" \n\t"
855  YSCALEYUV2RGB(%%FF_REGBP, %5)
856  "pcmpeqd %%mm7, %%mm7 \n\t"
857  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
858  "pop %%"FF_REG_BP" \n\t"
859  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
860  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
861  "a" (&c->redDither)
862  );
863  }
864 }
865 
866 static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
867  const int16_t *ubuf[2], const int16_t *vbuf[2],
868  const int16_t *abuf[2], uint8_t *dest,
869  int dstW, int yalpha, int uvalpha, int y)
870 {
871  const int16_t *buf0 = buf[0], *buf1 = buf[1],
872  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
873 
874  __asm__ volatile(
875  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
876  "mov %4, %%"FF_REG_b" \n\t"
877  "push %%"FF_REG_BP" \n\t"
878  YSCALEYUV2RGB(%%FF_REGBP, %5)
879  "pxor %%mm7, %%mm7 \n\t"
880  WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
881  "pop %%"FF_REG_BP" \n\t"
882  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
883  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
884  "a" (&c->redDither)
885  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
886  );
887 }
888 
889 static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
890  const int16_t *ubuf[2], const int16_t *vbuf[2],
891  const int16_t *abuf[2], uint8_t *dest,
892  int dstW, int yalpha, int uvalpha, int y)
893 {
894  const int16_t *buf0 = buf[0], *buf1 = buf[1],
895  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
896 
897  __asm__ volatile(
898  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
899  "mov %4, %%"FF_REG_b" \n\t"
900  "push %%"FF_REG_BP" \n\t"
901  YSCALEYUV2RGB(%%FF_REGBP, %5)
902  "pxor %%mm7, %%mm7 \n\t"
903  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
904 #ifdef DITHER1XBPP
905  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
906  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
907  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
908 #endif
909  WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
910  "pop %%"FF_REG_BP" \n\t"
911  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
912  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
913  "a" (&c->redDither)
915  );
916 }
917 
918 static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
919  const int16_t *ubuf[2], const int16_t *vbuf[2],
920  const int16_t *abuf[2], uint8_t *dest,
921  int dstW, int yalpha, int uvalpha, int y)
922 {
923  const int16_t *buf0 = buf[0], *buf1 = buf[1],
924  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
925 
926  __asm__ volatile(
927  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
928  "mov %4, %%"FF_REG_b" \n\t"
929  "push %%"FF_REG_BP" \n\t"
930  YSCALEYUV2RGB(%%FF_REGBP, %5)
931  "pxor %%mm7, %%mm7 \n\t"
932  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
933 #ifdef DITHER1XBPP
934  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
935  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
936  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
937 #endif
938  WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
939  "pop %%"FF_REG_BP" \n\t"
940  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
941  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
942  "a" (&c->redDither)
943  NAMED_CONSTRAINTS_ADD(bF8,bFC)
944  );
945 }
946 
947 #define REAL_YSCALEYUV2PACKED(index, c) \
948  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
949  "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
950  "psraw $3, %%mm0 \n\t"\
951  "psraw $3, %%mm1 \n\t"\
952  "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
953  "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
954  "xor "#index", "#index" \n\t"\
955  ".p2align 4 \n\t"\
956  "1: \n\t"\
957  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
958  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
959  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
960  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
961  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
962  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
963  "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
964  "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
965  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
966  "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
967  "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
968  "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
969  "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
970  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
971  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
972  "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
973  "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
974  "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
975  "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
976  "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
977  "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
978  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
979  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
980  "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
981  "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
982  "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
983  "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
984 
985 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
986 
987 static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
988  const int16_t *ubuf[2], const int16_t *vbuf[2],
989  const int16_t *abuf[2], uint8_t *dest,
990  int dstW, int yalpha, int uvalpha, int y)
991 {
992  const int16_t *buf0 = buf[0], *buf1 = buf[1],
993  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
994 
995  __asm__ volatile(
996  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
997  "mov %4, %%"FF_REG_b" \n\t"
998  "push %%"FF_REG_BP" \n\t"
999  YSCALEYUV2PACKED(%%FF_REGBP, %5)
1000  WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1001  "pop %%"FF_REG_BP" \n\t"
1002  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1003  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1004  "a" (&c->redDither)
1005  );
1006 }
1007 
1008 #define REAL_YSCALEYUV2RGB1(index, c) \
1009  "xor "#index", "#index" \n\t"\
1010  ".p2align 4 \n\t"\
1011  "1: \n\t"\
1012  "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
1013  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1014  "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
1015  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1016  "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
1017  "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
1018  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
1019  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
1020  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
1021  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
1022  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
1023  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
1024  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
1025  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1026  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1027  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1028  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1029  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
1030  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
1031  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
1032  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
1033  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
1034  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
1035  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
1036  "paddw %%mm3, %%mm4 \n\t"\
1037  "movq %%mm2, %%mm0 \n\t"\
1038  "movq %%mm5, %%mm6 \n\t"\
1039  "movq %%mm4, %%mm3 \n\t"\
1040  "punpcklwd %%mm2, %%mm2 \n\t"\
1041  "punpcklwd %%mm5, %%mm5 \n\t"\
1042  "punpcklwd %%mm4, %%mm4 \n\t"\
1043  "paddw %%mm1, %%mm2 \n\t"\
1044  "paddw %%mm1, %%mm5 \n\t"\
1045  "paddw %%mm1, %%mm4 \n\t"\
1046  "punpckhwd %%mm0, %%mm0 \n\t"\
1047  "punpckhwd %%mm6, %%mm6 \n\t"\
1048  "punpckhwd %%mm3, %%mm3 \n\t"\
1049  "paddw %%mm7, %%mm0 \n\t"\
1050  "paddw %%mm7, %%mm6 \n\t"\
1051  "paddw %%mm7, %%mm3 \n\t"\
1052  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
1053  "packuswb %%mm0, %%mm2 \n\t"\
1054  "packuswb %%mm6, %%mm5 \n\t"\
1055  "packuswb %%mm3, %%mm4 \n\t"\
1056 
1057 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
1058 
1059 // do vertical chrominance interpolation
1060 #define REAL_YSCALEYUV2RGB1b(index, c) \
1061  "xor "#index", "#index" \n\t"\
1062  ".p2align 4 \n\t"\
1063  "1: \n\t"\
1064  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
1065  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
1066  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1067  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
1068  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
1069  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1070  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
1071  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
1072  "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
1073  "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
1074  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
1075  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
1076  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
1077  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
1078  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
1079  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
1080  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
1081  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1082  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1083  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1084  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1085  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
1086  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
1087  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
1088  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
1089  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
1090  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
1091  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
1092  "paddw %%mm3, %%mm4 \n\t"\
1093  "movq %%mm2, %%mm0 \n\t"\
1094  "movq %%mm5, %%mm6 \n\t"\
1095  "movq %%mm4, %%mm3 \n\t"\
1096  "punpcklwd %%mm2, %%mm2 \n\t"\
1097  "punpcklwd %%mm5, %%mm5 \n\t"\
1098  "punpcklwd %%mm4, %%mm4 \n\t"\
1099  "paddw %%mm1, %%mm2 \n\t"\
1100  "paddw %%mm1, %%mm5 \n\t"\
1101  "paddw %%mm1, %%mm4 \n\t"\
1102  "punpckhwd %%mm0, %%mm0 \n\t"\
1103  "punpckhwd %%mm6, %%mm6 \n\t"\
1104  "punpckhwd %%mm3, %%mm3 \n\t"\
1105  "paddw %%mm7, %%mm0 \n\t"\
1106  "paddw %%mm7, %%mm6 \n\t"\
1107  "paddw %%mm7, %%mm3 \n\t"\
1108  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
1109  "packuswb %%mm0, %%mm2 \n\t"\
1110  "packuswb %%mm6, %%mm5 \n\t"\
1111  "packuswb %%mm3, %%mm4 \n\t"\
1112 
1113 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
1114 
1115 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
1116  "movq (%1, "#index", 2), %%mm7 \n\t" /* abuf0[index ] */\
1117  "movq 8(%1, "#index", 2), %%mm1 \n\t" /* abuf0[index+4] */\
1118  "psraw $7, %%mm7 \n\t" /* abuf0[index ] >>7 */\
1119  "psraw $7, %%mm1 \n\t" /* abuf0[index+4] >>7 */\
1120  "packuswb %%mm1, %%mm7 \n\t"
1121 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
1122 
1123 /**
1124  * YV12 to RGB without scaling or interpolating
1125  */
1126 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
1127  const int16_t *ubuf[2], const int16_t *vbuf[2],
1128  const int16_t *abuf0, uint8_t *dest,
1129  int dstW, int uvalpha, int y)
1130 {
1131  const int16_t *ubuf0 = ubuf[0];
1132  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1133 
1134  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1135  const int16_t *ubuf1 = ubuf[0];
1136  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
1137  __asm__ volatile(
1138  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1139  "mov %4, %%"FF_REG_b" \n\t"
1140  "push %%"FF_REG_BP" \n\t"
1141  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1142  YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
1143  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1144  "pop %%"FF_REG_BP" \n\t"
1145  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1146  :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1147  "a" (&c->redDither)
1148  );
1149  } else {
1150  __asm__ volatile(
1151  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1152  "mov %4, %%"FF_REG_b" \n\t"
1153  "push %%"FF_REG_BP" \n\t"
1154  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1155  "pcmpeqd %%mm7, %%mm7 \n\t"
1156  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1157  "pop %%"FF_REG_BP" \n\t"
1158  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1159  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1160  "a" (&c->redDither)
1161  );
1162  }
1163  } else {
1164  const int16_t *ubuf1 = ubuf[1];
1165  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
1166  __asm__ volatile(
1167  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1168  "mov %4, %%"FF_REG_b" \n\t"
1169  "push %%"FF_REG_BP" \n\t"
1170  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1171  YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
1172  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1173  "pop %%"FF_REG_BP" \n\t"
1174  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1175  :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1176  "a" (&c->redDither)
1177  );
1178  } else {
1179  __asm__ volatile(
1180  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1181  "mov %4, %%"FF_REG_b" \n\t"
1182  "push %%"FF_REG_BP" \n\t"
1183  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1184  "pcmpeqd %%mm7, %%mm7 \n\t"
1185  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1186  "pop %%"FF_REG_BP" \n\t"
1187  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1188  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1189  "a" (&c->redDither)
1190  );
1191  }
1192  }
1193 }
1194 
1195 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
1196  const int16_t *ubuf[2], const int16_t *vbuf[2],
1197  const int16_t *abuf0, uint8_t *dest,
1198  int dstW, int uvalpha, int y)
1199 {
1200  const int16_t *ubuf0 = ubuf[0];
1201  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1202 
1203  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1204  const int16_t *ubuf1 = ubuf[0];
1205  __asm__ volatile(
1206  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1207  "mov %4, %%"FF_REG_b" \n\t"
1208  "push %%"FF_REG_BP" \n\t"
1209  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1210  "pxor %%mm7, %%mm7 \n\t"
1211  WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1212  "pop %%"FF_REG_BP" \n\t"
1213  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1214  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1215  "a" (&c->redDither)
1216  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
1217  );
1218  } else {
1219  const int16_t *ubuf1 = ubuf[1];
1220  __asm__ volatile(
1221  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1222  "mov %4, %%"FF_REG_b" \n\t"
1223  "push %%"FF_REG_BP" \n\t"
1224  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1225  "pxor %%mm7, %%mm7 \n\t"
1226  WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1227  "pop %%"FF_REG_BP" \n\t"
1228  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1229  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1230  "a" (&c->redDither)
1231  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
1232  );
1233  }
1234 }
1235 
1236 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
1237  const int16_t *ubuf[2], const int16_t *vbuf[2],
1238  const int16_t *abuf0, uint8_t *dest,
1239  int dstW, int uvalpha, int y)
1240 {
1241  const int16_t *ubuf0 = ubuf[0];
1242  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1243 
1244  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1245  const int16_t *ubuf1 = ubuf[0];
1246  __asm__ volatile(
1247  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1248  "mov %4, %%"FF_REG_b" \n\t"
1249  "push %%"FF_REG_BP" \n\t"
1250  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1251  "pxor %%mm7, %%mm7 \n\t"
1252  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1253 #ifdef DITHER1XBPP
1254  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1255  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1256  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1257 #endif
1258  WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1259  "pop %%"FF_REG_BP" \n\t"
1260  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1261  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1262  "a" (&c->redDither)
1264  );
1265  } else {
1266  const int16_t *ubuf1 = ubuf[1];
1267  __asm__ volatile(
1268  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1269  "mov %4, %%"FF_REG_b" \n\t"
1270  "push %%"FF_REG_BP" \n\t"
1271  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1272  "pxor %%mm7, %%mm7 \n\t"
1273  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1274 #ifdef DITHER1XBPP
1275  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1276  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1277  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1278 #endif
1279  WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1280  "pop %%"FF_REG_BP" \n\t"
1281  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1282  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1283  "a" (&c->redDither)
1285  );
1286  }
1287 }
1288 
1289 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
1290  const int16_t *ubuf[2], const int16_t *vbuf[2],
1291  const int16_t *abuf0, uint8_t *dest,
1292  int dstW, int uvalpha, int y)
1293 {
1294  const int16_t *ubuf0 = ubuf[0];
1295  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1296 
1297  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1298  const int16_t *ubuf1 = ubuf[0];
1299  __asm__ volatile(
1300  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1301  "mov %4, %%"FF_REG_b" \n\t"
1302  "push %%"FF_REG_BP" \n\t"
1303  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1304  "pxor %%mm7, %%mm7 \n\t"
1305  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1306 #ifdef DITHER1XBPP
1307  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1308  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1309  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1310 #endif
1311  WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1312  "pop %%"FF_REG_BP" \n\t"
1313  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1314  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1315  "a" (&c->redDither)
1316  NAMED_CONSTRAINTS_ADD(bF8,bFC)
1317  );
1318  } else {
1319  const int16_t *ubuf1 = ubuf[1];
1320  __asm__ volatile(
1321  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1322  "mov %4, %%"FF_REG_b" \n\t"
1323  "push %%"FF_REG_BP" \n\t"
1324  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1325  "pxor %%mm7, %%mm7 \n\t"
1326  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1327 #ifdef DITHER1XBPP
1328  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1329  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1330  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1331 #endif
1332  WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1333  "pop %%"FF_REG_BP" \n\t"
1334  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1335  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1336  "a" (&c->redDither)
1337  NAMED_CONSTRAINTS_ADD(bF8,bFC)
1338  );
1339  }
1340 }
1341 
1342 #define REAL_YSCALEYUV2PACKED1(index, c) \
1343  "xor "#index", "#index" \n\t"\
1344  ".p2align 4 \n\t"\
1345  "1: \n\t"\
1346  "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
1347  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1348  "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
1349  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1350  "psraw $7, %%mm3 \n\t" \
1351  "psraw $7, %%mm4 \n\t" \
1352  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1353  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1354  "psraw $7, %%mm1 \n\t" \
1355  "psraw $7, %%mm7 \n\t" \
1356 
1357 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
1358 
1359 #define REAL_YSCALEYUV2PACKED1b(index, c) \
1360  "xor "#index", "#index" \n\t"\
1361  ".p2align 4 \n\t"\
1362  "1: \n\t"\
1363  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
1364  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
1365  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1366  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
1367  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
1368  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1369  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
1370  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
1371  "psrlw $8, %%mm3 \n\t" \
1372  "psrlw $8, %%mm4 \n\t" \
1373  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1374  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1375  "psraw $7, %%mm1 \n\t" \
1376  "psraw $7, %%mm7 \n\t"
1377 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
1378 
1379 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
1380  const int16_t *ubuf[2], const int16_t *vbuf[2],
1381  const int16_t *abuf0, uint8_t *dest,
1382  int dstW, int uvalpha, int y)
1383 {
1384  const int16_t *ubuf0 = ubuf[0];
1385  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1386 
1387  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1388  const int16_t *ubuf1 = ubuf[0];
1389  __asm__ volatile(
1390  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1391  "mov %4, %%"FF_REG_b" \n\t"
1392  "push %%"FF_REG_BP" \n\t"
1393  YSCALEYUV2PACKED1(%%FF_REGBP, %5)
1394  WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1395  "pop %%"FF_REG_BP" \n\t"
1396  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1397  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1398  "a" (&c->redDither)
1399  );
1400  } else {
1401  const int16_t *ubuf1 = ubuf[1];
1402  __asm__ volatile(
1403  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1404  "mov %4, %%"FF_REG_b" \n\t"
1405  "push %%"FF_REG_BP" \n\t"
1406  YSCALEYUV2PACKED1b(%%FF_REGBP, %5)
1407  WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1408  "pop %%"FF_REG_BP" \n\t"
1409  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1410  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1411  "a" (&c->redDither)
1412  );
1413  }
1414 }
1416 {
1417  enum AVPixelFormat dstFormat = c->dstFormat;
1418 
1419  c->use_mmx_vfilter= 0;
1420  if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat)
1421  && dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE
1422  && !(c->flags & SWS_BITEXACT)) {
1423  if (c->flags & SWS_ACCURATE_RND) {
1424  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1425  switch (c->dstFormat) {
1426  case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
1427 #if HAVE_6REGS
1428  case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
1429 #endif
1430  case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
1431  case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
1432  case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
1433  default: break;
1434  }
1435  }
1436  } else {
1437  c->use_mmx_vfilter= 1;
1438  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1439  switch (c->dstFormat) {
1440  case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
1441  case AV_PIX_FMT_BGR32: c->yuv2packedX = RENAME(yuv2bgr32_X); break;
1442 #if HAVE_6REGS
1443  case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
1444 #endif
1445  case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
1446  case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
1447  case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
1448  default: break;
1449  }
1450  }
1451  }
1452  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1453  switch (c->dstFormat) {
1454  case AV_PIX_FMT_RGB32:
1455  c->yuv2packed1 = RENAME(yuv2rgb32_1);
1456  c->yuv2packed2 = RENAME(yuv2rgb32_2);
1457  break;
1458  case AV_PIX_FMT_BGR24:
1459  c->yuv2packed1 = RENAME(yuv2bgr24_1);
1460  c->yuv2packed2 = RENAME(yuv2bgr24_2);
1461  break;
1462  case AV_PIX_FMT_RGB555:
1463  c->yuv2packed1 = RENAME(yuv2rgb555_1);
1464  c->yuv2packed2 = RENAME(yuv2rgb555_2);
1465  break;
1466  case AV_PIX_FMT_RGB565:
1467  c->yuv2packed1 = RENAME(yuv2rgb565_1);
1468  c->yuv2packed2 = RENAME(yuv2rgb565_2);
1469  break;
1470  case AV_PIX_FMT_YUYV422:
1471  c->yuv2packed1 = RENAME(yuv2yuyv422_1);
1472  c->yuv2packed2 = RENAME(yuv2yuyv422_2);
1473  break;
1474  default:
1475  break;
1476  }
1477  }
1478  }
1479 
1480  if (c->srcBpc == 8 && c->dstBpc <= 14) {
1481  // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
1482 #if COMPILE_TEMPLATE_MMXEXT
1483  if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
1484  c->hyscale_fast = ff_hyscale_fast_mmxext;
1485  c->hcscale_fast = ff_hcscale_fast_mmxext;
1486  } else {
1487 #endif /* COMPILE_TEMPLATE_MMXEXT */
1488  c->hyscale_fast = NULL;
1489  c->hcscale_fast = NULL;
1490 #if COMPILE_TEMPLATE_MMXEXT
1491  }
1492 #endif /* COMPILE_TEMPLATE_MMXEXT */
1493  }
1494 }
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
#define av_cold
Definition: attributes.h:88
uint8_t
#define CONFIG_SWSCALE_ALPHA
Definition: config.h:564
#define NULL
Definition: coverity.c:32
#define RENAME(name)
Definition: ffv1.h:196
#define SWS_BITEXACT
Definition: swscale.h:84
#define SWS_ACCURATE_RND
Definition: swscale.h:83
#define SWS_FULL_CHR_H_INT
Definition: swscale.h:79
#define SWS_FAST_BILINEAR
Definition: swscale.h:58
#define NAMED_CONSTRAINTS_ADD(...)
Definition: asm.h:145
int x86_reg
Definition: asm.h:72
int dummy
Definition: motion.c:64
#define AV_PIX_FMT_BGR32
Definition: pixfmt.h:374
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
@ AV_PIX_FMT_GRAYF32LE
IEEE-754 single precision Y, 32bpp, little-endian.
Definition: pixfmt.h:341
@ AV_PIX_FMT_GRAYF32BE
IEEE-754 single precision Y, 32bpp, big-endian.
Definition: pixfmt.h:340
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:67
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:69
#define AV_PIX_FMT_RGB565
Definition: pixfmt.h:386
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:372
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:387
#define Y_TEMP
#define RED_DITHER
#define ALP_MMX_FILTER_OFFSET
#define ESP_OFFSET
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
#define BLUE_DITHER
#define V_TEMP
#define GREEN_DITHER
#define DSTW_OFFSET
static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
#define U_TEMP
void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc)
void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc)
static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)
#define WRITEBGR24(dst, dstw, index)
#define YSCALEYUV2PACKED(index, c)
#define YSCALEYUV2RGB1_ALPHA(index)
static void RENAME() yuv2rgb565_X(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
static void RENAME() yuv2yuyv422_1(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y)
#define YSCALEYUV2PACKEDX
#define YSCALEYUV2PACKEDX_YA(offset, coeff, src1, src2, dst1, dst2)
static av_cold void RENAME() sws_init_swscale(SwsContext *c)
static void RENAME() yuv2bgr24_2(SwsContext *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
static void RENAME() yuv2rgb555_2(SwsContext *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
#define YSCALEYUV2RGB1(index, c)
static void RENAME() yuv2rgb565_2(SwsContext *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
static void RENAME() yuv2bgr24_1(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y)
#define YSCALEYUV2PACKED1(index, c)
static void RENAME() yuv2yuyv422_2(SwsContext *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
static void RENAME() yuv2rgb565_X_ar(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
static void RENAME() yuv2rgb32_2(SwsContext *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
vertical bilinear scale YV12 to RGB
static void RENAME() yuv2yuyv422_X(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
static void RENAME() yuv2rgb555_X_ar(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
#define WRITERGB16(dst, dstw, index)
static void RENAME() yuv2rgb555_1(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y)
#define YSCALEYUV2RGB_YA(index, c, b1, b2)
#define YSCALEYUV2RGB1b(index, c)
static void RENAME() yuv2rgb565_1(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y)
static void RENAME() yuv2bgr32_X(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
static void RENAME() yuv2rgb555_X(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
static void RENAME() yuv2yuyv422_X_ar(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
static void RENAME() yuv2rgb32_X_ar(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
#define YSCALEYUV2RGBX
#define WRITERGB15(dst, dstw, index)
#define YSCALEYUV2RGB(index, c)
#define WRITEYUY2(dst, dstw, index)
static void RENAME() yuv2rgb32_X(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
#define YSCALEYUV2PACKEDX_END
#define YSCALEYUV2PACKEDX_ACCURATE
static void RENAME() yuv2rgb32_1(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y)
YV12 to RGB without scaling or interpolating.
#define YSCALEYUV2PACKED1b(index, c)
#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset)
static double c[64]