FFmpeg  4.4
vf_thumbnail_cuda.ptx.c
Go to the documentation of this file.
1 const char vf_thumbnail_cuda_ptx[] = \
2  "//\n"
3  "// Generated by LLVM NVPTX Back-End\n"
4  "//\n"
5  "\n"
6  ".version 3.2\n"
7  ".target sm_30\n"
8  ".address_size 64\n"
9  "\n"
10  " // .globl Thumbnail_uchar\n"
11  "\n"
12  ".visible .entry Thumbnail_uchar(\n"
13  " .param .u64 Thumbnail_uchar_param_0,\n"
14  " .param .u64 Thumbnail_uchar_param_1,\n"
15  " .param .u32 Thumbnail_uchar_param_2,\n"
16  " .param .u32 Thumbnail_uchar_param_3\n"
17  ")\n"
18  "{\n"
19  " .reg .pred %p<4>;\n"
20  " .reg .f32 %f<3>;\n"
21  " .reg .b32 %r<17>;\n"
22  " .reg .b64 %rd<7>;\n"
23  "\n"
24  " ld.param.u32 %r9, [Thumbnail_uchar_param_2];\n"
25  " // begin inline asm\n"
26  " mov.u32 %r3, %ctaid.x;\n"
27  " // end inline asm\n"
28  " ld.param.u32 %r10, [Thumbnail_uchar_param_3];\n"
29  " // begin inline asm\n"
30  " mov.u32 %r4, %ctaid.y;\n"
31  " // end inline asm\n"
32  " // begin inline asm\n"
33  " mov.u32 %r5, %ntid.x;\n"
34  " // end inline asm\n"
35  " // begin inline asm\n"
36  " mov.u32 %r6, %ntid.y;\n"
37  " // end inline asm\n"
38  " // begin inline asm\n"
39  " mov.u32 %r7, %tid.x;\n"
40  " // end inline asm\n"
41  " // begin inline asm\n"
42  " mov.u32 %r8, %tid.y;\n"
43  " // end inline asm\n"
44  " mad.lo.s32 %r1, %r5, %r3, %r7;\n"
45  " mad.lo.s32 %r2, %r6, %r4, %r8;\n"
46  " setp.ge.s32 %p1, %r2, %r10;\n"
47  " setp.ge.s32 %p2, %r1, %r9;\n"
48  " or.pred %p3, %p2, %p1;\n"
49  " @%p3 bra LBB0_2;\n"
50  " ld.param.u64 %rd4, [Thumbnail_uchar_param_0];\n"
51  " ld.param.u64 %rd3, [Thumbnail_uchar_param_1];\n"
52  " cvta.to.global.u64 %rd1, %rd3;\n"
53  " cvt.rn.f32.s32 %f1, %r1;\n"
54  " cvt.rn.f32.s32 %f2, %r2;\n"
55  " // begin inline asm\n"
56  " tex.2d.v4.u32.f32 {%r11, %r12, %r13, %r14}, [%rd4, {%f1, %f2}];\n"
57  " // end inline asm\n"
58  " and.b32 %r15, %r11, 255;\n"
59  " mul.wide.u32 %rd5, %r15, 4;\n"
60  " add.s64 %rd6, %rd1, %rd5;\n"
61  " atom.global.add.u32 %r16, [%rd6], 1;\n"
62  "LBB0_2:\n"
63  " ret;\n"
64  "\n"
65  "}\n"
66  " // .globl Thumbnail_uchar2\n"
67  ".visible .entry Thumbnail_uchar2(\n"
68  " .param .u64 Thumbnail_uchar2_param_0,\n"
69  " .param .u64 Thumbnail_uchar2_param_1,\n"
70  " .param .u32 Thumbnail_uchar2_param_2,\n"
71  " .param .u32 Thumbnail_uchar2_param_3\n"
72  ")\n"
73  "{\n"
74  " .reg .pred %p<4>;\n"
75  " .reg .f32 %f<3>;\n"
76  " .reg .b32 %r<19>;\n"
77  " .reg .b64 %rd<10>;\n"
78  "\n"
79  " ld.param.u32 %r9, [Thumbnail_uchar2_param_2];\n"
80  " // begin inline asm\n"
81  " mov.u32 %r3, %ctaid.x;\n"
82  " // end inline asm\n"
83  " ld.param.u32 %r10, [Thumbnail_uchar2_param_3];\n"
84  " // begin inline asm\n"
85  " mov.u32 %r4, %ctaid.y;\n"
86  " // end inline asm\n"
87  " // begin inline asm\n"
88  " mov.u32 %r5, %ntid.x;\n"
89  " // end inline asm\n"
90  " // begin inline asm\n"
91  " mov.u32 %r6, %ntid.y;\n"
92  " // end inline asm\n"
93  " // begin inline asm\n"
94  " mov.u32 %r7, %tid.x;\n"
95  " // end inline asm\n"
96  " // begin inline asm\n"
97  " mov.u32 %r8, %tid.y;\n"
98  " // end inline asm\n"
99  " mad.lo.s32 %r1, %r5, %r3, %r7;\n"
100  " mad.lo.s32 %r2, %r6, %r4, %r8;\n"
101  " setp.ge.s32 %p1, %r2, %r10;\n"
102  " setp.ge.s32 %p2, %r1, %r9;\n"
103  " or.pred %p3, %p2, %p1;\n"
104  " @%p3 bra LBB1_2;\n"
105  " ld.param.u64 %rd4, [Thumbnail_uchar2_param_0];\n"
106  " ld.param.u64 %rd3, [Thumbnail_uchar2_param_1];\n"
107  " cvta.to.global.u64 %rd1, %rd3;\n"
108  " cvt.rn.f32.s32 %f1, %r1;\n"
109  " cvt.rn.f32.s32 %f2, %r2;\n"
110  " // begin inline asm\n"
111  " tex.2d.v4.u32.f32 {%r11, %r12, %r13, %r14}, [%rd4, {%f1, %f2}];\n"
112  " // end inline asm\n"
113  " and.b32 %r15, %r11, 255;\n"
114  " mul.wide.u32 %rd5, %r15, 4;\n"
115  " add.s64 %rd6, %rd1, %rd5;\n"
116  " atom.global.add.u32 %r16, [%rd6], 1;\n"
117  " and.b32 %r17, %r12, 255;\n"
118  " mul.wide.u32 %rd7, %r17, 4;\n"
119  " add.s64 %rd8, %rd1, %rd7;\n"
120  " add.s64 %rd9, %rd8, 1024;\n"
121  " atom.global.add.u32 %r18, [%rd9], 1;\n"
122  "LBB1_2:\n"
123  " ret;\n"
124  "\n"
125  "}\n"
126  " // .globl Thumbnail_ushort\n"
127  ".visible .entry Thumbnail_ushort(\n"
128  " .param .u64 Thumbnail_ushort_param_0,\n"
129  " .param .u64 Thumbnail_ushort_param_1,\n"
130  " .param .u32 Thumbnail_ushort_param_2,\n"
131  " .param .u32 Thumbnail_ushort_param_3\n"
132  ")\n"
133  "{\n"
134  " .reg .pred %p<4>;\n"
135  " .reg .f32 %f<3>;\n"
136  " .reg .b32 %r<19>;\n"
137  " .reg .b64 %rd<7>;\n"
138  "\n"
139  " ld.param.u32 %r9, [Thumbnail_ushort_param_2];\n"
140  " // begin inline asm\n"
141  " mov.u32 %r3, %ctaid.x;\n"
142  " // end inline asm\n"
143  " ld.param.u32 %r10, [Thumbnail_ushort_param_3];\n"
144  " // begin inline asm\n"
145  " mov.u32 %r4, %ctaid.y;\n"
146  " // end inline asm\n"
147  " // begin inline asm\n"
148  " mov.u32 %r5, %ntid.x;\n"
149  " // end inline asm\n"
150  " // begin inline asm\n"
151  " mov.u32 %r6, %ntid.y;\n"
152  " // end inline asm\n"
153  " // begin inline asm\n"
154  " mov.u32 %r7, %tid.x;\n"
155  " // end inline asm\n"
156  " // begin inline asm\n"
157  " mov.u32 %r8, %tid.y;\n"
158  " // end inline asm\n"
159  " mad.lo.s32 %r1, %r5, %r3, %r7;\n"
160  " mad.lo.s32 %r2, %r6, %r4, %r8;\n"
161  " setp.ge.s32 %p1, %r2, %r10;\n"
162  " setp.ge.s32 %p2, %r1, %r9;\n"
163  " or.pred %p3, %p2, %p1;\n"
164  " @%p3 bra LBB2_2;\n"
165  " ld.param.u64 %rd4, [Thumbnail_ushort_param_0];\n"
166  " ld.param.u64 %rd3, [Thumbnail_ushort_param_1];\n"
167  " cvta.to.global.u64 %rd1, %rd3;\n"
168  " cvt.rn.f32.s32 %f1, %r1;\n"
169  " cvt.rn.f32.s32 %f2, %r2;\n"
170  " // begin inline asm\n"
171  " tex.2d.v4.u32.f32 {%r11, %r12, %r13, %r14}, [%rd4, {%f1, %f2}];\n"
172  " // end inline asm\n"
173  " and.b32 %r15, %r11, 65535;\n"
174  " add.s32 %r16, %r15, 128;\n"
175  " shr.u32 %r17, %r16, 8;\n"
176  " mul.wide.u32 %rd5, %r17, 4;\n"
177  " add.s64 %rd6, %rd1, %rd5;\n"
178  " atom.global.add.u32 %r18, [%rd6], 1;\n"
179  "LBB2_2:\n"
180  " ret;\n"
181  "\n"
182  "}\n"
183  " // .globl Thumbnail_ushort2\n"
184  ".visible .entry Thumbnail_ushort2(\n"
185  " .param .u64 Thumbnail_ushort2_param_0,\n"
186  " .param .u64 Thumbnail_ushort2_param_1,\n"
187  " .param .u32 Thumbnail_ushort2_param_2,\n"
188  " .param .u32 Thumbnail_ushort2_param_3\n"
189  ")\n"
190  "{\n"
191  " .reg .pred %p<4>;\n"
192  " .reg .f32 %f<3>;\n"
193  " .reg .b32 %r<23>;\n"
194  " .reg .b64 %rd<10>;\n"
195  "\n"
196  " ld.param.u32 %r9, [Thumbnail_ushort2_param_2];\n"
197  " // begin inline asm\n"
198  " mov.u32 %r3, %ctaid.x;\n"
199  " // end inline asm\n"
200  " ld.param.u32 %r10, [Thumbnail_ushort2_param_3];\n"
201  " // begin inline asm\n"
202  " mov.u32 %r4, %ctaid.y;\n"
203  " // end inline asm\n"
204  " // begin inline asm\n"
205  " mov.u32 %r5, %ntid.x;\n"
206  " // end inline asm\n"
207  " // begin inline asm\n"
208  " mov.u32 %r6, %ntid.y;\n"
209  " // end inline asm\n"
210  " // begin inline asm\n"
211  " mov.u32 %r7, %tid.x;\n"
212  " // end inline asm\n"
213  " // begin inline asm\n"
214  " mov.u32 %r8, %tid.y;\n"
215  " // end inline asm\n"
216  " mad.lo.s32 %r1, %r5, %r3, %r7;\n"
217  " mad.lo.s32 %r2, %r6, %r4, %r8;\n"
218  " setp.ge.s32 %p1, %r2, %r10;\n"
219  " setp.ge.s32 %p2, %r1, %r9;\n"
220  " or.pred %p3, %p2, %p1;\n"
221  " @%p3 bra LBB3_2;\n"
222  " ld.param.u64 %rd4, [Thumbnail_ushort2_param_0];\n"
223  " ld.param.u64 %rd3, [Thumbnail_ushort2_param_1];\n"
224  " cvta.to.global.u64 %rd1, %rd3;\n"
225  " cvt.rn.f32.s32 %f1, %r1;\n"
226  " cvt.rn.f32.s32 %f2, %r2;\n"
227  " // begin inline asm\n"
228  " tex.2d.v4.u32.f32 {%r11, %r12, %r13, %r14}, [%rd4, {%f1, %f2}];\n"
229  " // end inline asm\n"
230  " and.b32 %r15, %r11, 65535;\n"
231  " add.s32 %r16, %r15, 128;\n"
232  " shr.u32 %r17, %r16, 8;\n"
233  " mul.wide.u32 %rd5, %r17, 4;\n"
234  " add.s64 %rd6, %rd1, %rd5;\n"
235  " atom.global.add.u32 %r18, [%rd6], 1;\n"
236  " and.b32 %r19, %r12, 65535;\n"
237  " add.s32 %r20, %r19, 128;\n"
238  " shr.u32 %r21, %r20, 8;\n"
239  " mul.wide.u32 %rd7, %r21, 4;\n"
240  " add.s64 %rd8, %rd1, %rd7;\n"
241  " add.s64 %rd9, %rd8, 1024;\n"
242  " atom.global.add.u32 %r22, [%rd9], 1;\n"
243  "LBB3_2:\n"
244  " ret;\n"
245  "\n"
246  "}\n"
247 ;
const char vf_thumbnail_cuda_ptx[]