FFmpeg  4.4
opusenc.c
Go to the documentation of this file.
1 /*
2  * Opus encoder
3  * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "opusenc.h"
23 #include "opus_pvq.h"
24 #include "opusenc_psy.h"
25 #include "opustab.h"
26 
27 #include "libavutil/float_dsp.h"
28 #include "libavutil/mem_internal.h"
29 #include "libavutil/opt.h"
30 #include "internal.h"
31 #include "bytestream.h"
32 #include "audio_frame_queue.h"
33 
34 typedef struct OpusEncContext {
43  struct FFBufQueue bufqueue;
44 
47 
49 
50  int channels;
51 
54 
55  /* Actual energy the decoder will have */
57 
58  DECLARE_ALIGNED(32, float, scratch)[2048];
60 
62 {
63  uint8_t *bs = avctx->extradata;
64 
65  bytestream_put_buffer(&bs, "OpusHead", 8);
66  bytestream_put_byte (&bs, 0x1);
67  bytestream_put_byte (&bs, avctx->channels);
68  bytestream_put_le16 (&bs, avctx->initial_padding);
69  bytestream_put_le32 (&bs, avctx->sample_rate);
70  bytestream_put_le16 (&bs, 0x0);
71  bytestream_put_byte (&bs, 0x0); /* Default layout */
72 }
73 
74 static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_needed)
75 {
76  int tmp = 0x0, extended_toc = 0;
77  static const int toc_cfg[][OPUS_MODE_NB][OPUS_BANDWITH_NB] = {
78  /* Silk Hybrid Celt Layer */
79  /* NB MB WB SWB FB NB MB WB SWB FB NB MB WB SWB FB Bandwidth */
80  { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 17, 0, 21, 25, 29 } }, /* 2.5 ms */
81  { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 18, 0, 22, 26, 30 } }, /* 5 ms */
82  { { 1, 5, 9, 0, 0 }, { 0, 0, 0, 13, 15 }, { 19, 0, 23, 27, 31 } }, /* 10 ms */
83  { { 2, 6, 10, 0, 0 }, { 0, 0, 0, 14, 16 }, { 20, 0, 24, 28, 32 } }, /* 20 ms */
84  { { 3, 7, 11, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 40 ms */
85  { { 4, 8, 12, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 60 ms */
86  };
87  int cfg = toc_cfg[s->packet.framesize][s->packet.mode][s->packet.bandwidth];
88  *fsize_needed = 0;
89  if (!cfg)
90  return 1;
91  if (s->packet.frames == 2) { /* 2 packets */
92  if (s->frame[0].framebits == s->frame[1].framebits) { /* same size */
93  tmp = 0x1;
94  } else { /* different size */
95  tmp = 0x2;
96  *fsize_needed = 1; /* put frame sizes in the packet */
97  }
98  } else if (s->packet.frames > 2) {
99  tmp = 0x3;
100  extended_toc = 1;
101  }
102  tmp |= (s->channels > 1) << 2; /* Stereo or mono */
103  tmp |= (cfg - 1) << 3; /* codec configuration */
104  *toc++ = tmp;
105  if (extended_toc) {
106  for (int i = 0; i < (s->packet.frames - 1); i++)
107  *fsize_needed |= (s->frame[i].framebits != s->frame[i + 1].framebits);
108  tmp = (*fsize_needed) << 7; /* vbr flag */
109  tmp |= (0) << 6; /* padding flag */
110  tmp |= s->packet.frames;
111  *toc++ = tmp;
112  }
113  *size = 1 + extended_toc;
114  return 0;
115 }
116 
118 {
119  AVFrame *cur = NULL;
120  const int subframesize = s->avctx->frame_size;
121  int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
122 
123  cur = ff_bufqueue_get(&s->bufqueue);
124 
125  for (int ch = 0; ch < f->channels; ch++) {
126  CeltBlock *b = &f->block[ch];
127  const void *input = cur->extended_data[ch];
128  size_t bps = av_get_bytes_per_sample(cur->format);
129  memcpy(b->overlap, input, bps*cur->nb_samples);
130  }
131 
132  av_frame_free(&cur);
133 
134  for (int sf = 0; sf < subframes; sf++) {
135  if (sf != (subframes - 1))
136  cur = ff_bufqueue_get(&s->bufqueue);
137  else
138  cur = ff_bufqueue_peek(&s->bufqueue, 0);
139 
140  for (int ch = 0; ch < f->channels; ch++) {
141  CeltBlock *b = &f->block[ch];
142  const void *input = cur->extended_data[ch];
143  const size_t bps = av_get_bytes_per_sample(cur->format);
144  const size_t left = (subframesize - cur->nb_samples)*bps;
145  const size_t len = FFMIN(subframesize, cur->nb_samples)*bps;
146  memcpy(&b->samples[sf*subframesize], input, len);
147  memset(&b->samples[cur->nb_samples], 0, left);
148  }
149 
150  /* Last frame isn't popped off and freed yet - we need it for overlap */
151  if (sf != (subframes - 1))
152  av_frame_free(&cur);
153  }
154 }
155 
156 /* Apply the pre emphasis filter */
158 {
159  const int subframesize = s->avctx->frame_size;
160  const int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
161 
162  /* Filter overlap */
163  for (int ch = 0; ch < f->channels; ch++) {
164  CeltBlock *b = &f->block[ch];
165  float m = b->emph_coeff;
166  for (int i = 0; i < CELT_OVERLAP; i++) {
167  float sample = b->overlap[i];
168  b->overlap[i] = sample - m;
169  m = sample * CELT_EMPH_COEFF;
170  }
171  b->emph_coeff = m;
172  }
173 
174  /* Filter the samples but do not update the last subframe's coeff - overlap ^^^ */
175  for (int sf = 0; sf < subframes; sf++) {
176  for (int ch = 0; ch < f->channels; ch++) {
177  CeltBlock *b = &f->block[ch];
178  float m = b->emph_coeff;
179  for (int i = 0; i < subframesize; i++) {
180  float sample = b->samples[sf*subframesize + i];
181  b->samples[sf*subframesize + i] = sample - m;
182  m = sample * CELT_EMPH_COEFF;
183  }
184  if (sf != (subframes - 1))
185  b->emph_coeff = m;
186  }
187  }
188 }
189 
190 /* Create the window and do the mdct */
192 {
193  float *win = s->scratch, *temp = s->scratch + 1920;
194 
195  if (f->transient) {
196  for (int ch = 0; ch < f->channels; ch++) {
197  CeltBlock *b = &f->block[ch];
198  float *src1 = b->overlap;
199  for (int t = 0; t < f->blocks; t++) {
200  float *src2 = &b->samples[CELT_OVERLAP*t];
201  s->dsp->vector_fmul(win, src1, ff_celt_window, 128);
202  s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2,
203  ff_celt_window - 8, 128);
204  src1 = src2;
205  s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks);
206  }
207  }
208  } else {
209  int blk_len = OPUS_BLOCK_SIZE(f->size), wlen = OPUS_BLOCK_SIZE(f->size + 1);
210  int rwin = blk_len - CELT_OVERLAP, lap_dst = (wlen - blk_len - CELT_OVERLAP) >> 1;
211  memset(win, 0, wlen*sizeof(float));
212  for (int ch = 0; ch < f->channels; ch++) {
213  CeltBlock *b = &f->block[ch];
214 
215  /* Overlap */
216  s->dsp->vector_fmul(temp, b->overlap, ff_celt_window, 128);
217  memcpy(win + lap_dst, temp, CELT_OVERLAP*sizeof(float));
218 
219  /* Samples, flat top window */
220  memcpy(&win[lap_dst + CELT_OVERLAP], b->samples, rwin*sizeof(float));
221 
222  /* Samples, windowed */
223  s->dsp->vector_fmul_reverse(temp, b->samples + rwin,
224  ff_celt_window - 8, 128);
225  memcpy(win + lap_dst + blk_len, temp, CELT_OVERLAP*sizeof(float));
226 
227  s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1);
228  }
229  }
230 
231  for (int ch = 0; ch < f->channels; ch++) {
232  CeltBlock *block = &f->block[ch];
233  for (int i = 0; i < CELT_MAX_BANDS; i++) {
234  float ener = 0.0f;
235  int band_offset = ff_celt_freq_bands[i] << f->size;
236  int band_size = ff_celt_freq_range[i] << f->size;
237  float *coeffs = &block->coeffs[band_offset];
238 
239  for (int j = 0; j < band_size; j++)
240  ener += coeffs[j]*coeffs[j];
241 
242  block->lin_energy[i] = sqrtf(ener) + FLT_EPSILON;
243  ener = 1.0f/block->lin_energy[i];
244 
245  for (int j = 0; j < band_size; j++)
246  coeffs[j] *= ener;
247 
248  block->energy[i] = log2f(block->lin_energy[i]) - ff_celt_mean_energy[i];
249 
250  /* CELT_ENERGY_SILENCE is what the decoder uses and its not -infinity */
251  block->energy[i] = FFMAX(block->energy[i], CELT_ENERGY_SILENCE);
252  }
253  }
254 }
255 
257 {
258  int tf_select = 0, diff = 0, tf_changed = 0, tf_select_needed;
259  int bits = f->transient ? 2 : 4;
260 
261  tf_select_needed = ((f->size && (opus_rc_tell(rc) + bits + 1) <= f->framebits));
262 
263  for (int i = f->start_band; i < f->end_band; i++) {
264  if ((opus_rc_tell(rc) + bits + tf_select_needed) <= f->framebits) {
265  const int tbit = (diff ^ 1) == f->tf_change[i];
266  ff_opus_rc_enc_log(rc, tbit, bits);
267  diff ^= tbit;
268  tf_changed |= diff;
269  }
270  bits = f->transient ? 4 : 5;
271  }
272 
273  if (tf_select_needed && ff_celt_tf_select[f->size][f->transient][0][tf_changed] !=
274  ff_celt_tf_select[f->size][f->transient][1][tf_changed]) {
275  ff_opus_rc_enc_log(rc, f->tf_select, 1);
276  tf_select = f->tf_select;
277  }
278 
279  for (int i = f->start_band; i < f->end_band; i++)
280  f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]];
281 }
282 
284 {
285  float gain = f->pf_gain;
286  int txval, octave = f->pf_octave, period = f->pf_period, tapset = f->pf_tapset;
287 
288  ff_opus_rc_enc_log(rc, f->pfilter, 1);
289  if (!f->pfilter)
290  return;
291 
292  /* Octave */
293  txval = FFMIN(octave, 6);
294  ff_opus_rc_enc_uint(rc, txval, 6);
295  octave = txval;
296  /* Period */
297  txval = av_clip(period - (16 << octave) + 1, 0, (1 << (4 + octave)) - 1);
298  ff_opus_rc_put_raw(rc, period, 4 + octave);
299  period = txval + (16 << octave) - 1;
300  /* Gain */
301  txval = FFMIN(((int)(gain / 0.09375f)) - 1, 7);
302  ff_opus_rc_put_raw(rc, txval, 3);
303  gain = 0.09375f * (txval + 1);
304  /* Tapset */
305  if ((opus_rc_tell(rc) + 2) <= f->framebits)
307  else
308  tapset = 0;
309  /* Finally create the coeffs */
310  for (int i = 0; i < 2; i++) {
311  CeltBlock *block = &f->block[i];
312 
313  block->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD);
314  block->pf_gains_new[0] = gain * ff_celt_postfilter_taps[tapset][0];
315  block->pf_gains_new[1] = gain * ff_celt_postfilter_taps[tapset][1];
316  block->pf_gains_new[2] = gain * ff_celt_postfilter_taps[tapset][2];
317  }
318 }
319 
321  float last_energy[][CELT_MAX_BANDS], int intra)
322 {
323  float alpha, beta, prev[2] = { 0, 0 };
324  const uint8_t *pmod = ff_celt_coarse_energy_dist[f->size][intra];
325 
326  /* Inter is really just differential coding */
327  if (opus_rc_tell(rc) + 3 <= f->framebits)
328  ff_opus_rc_enc_log(rc, intra, 3);
329  else
330  intra = 0;
331 
332  if (intra) {
333  alpha = 0.0f;
334  beta = 1.0f - (4915.0f/32768.0f);
335  } else {
336  alpha = ff_celt_alpha_coef[f->size];
337  beta = ff_celt_beta_coef[f->size];
338  }
339 
340  for (int i = f->start_band; i < f->end_band; i++) {
341  for (int ch = 0; ch < f->channels; ch++) {
342  CeltBlock *block = &f->block[ch];
343  const int left = f->framebits - opus_rc_tell(rc);
344  const float last = FFMAX(-9.0f, last_energy[ch][i]);
345  float diff = block->energy[i] - prev[ch] - last*alpha;
346  int q_en = lrintf(diff);
347  if (left >= 15) {
348  ff_opus_rc_enc_laplace(rc, &q_en, pmod[i << 1] << 7, pmod[(i << 1) + 1] << 6);
349  } else if (left >= 2) {
350  q_en = av_clip(q_en, -1, 1);
351  ff_opus_rc_enc_cdf(rc, 2*q_en + 3*(q_en < 0), ff_celt_model_energy_small);
352  } else if (left >= 1) {
353  q_en = av_clip(q_en, -1, 0);
354  ff_opus_rc_enc_log(rc, (q_en & 1), 1);
355  } else q_en = -1;
356 
357  block->error_energy[i] = q_en - diff;
358  prev[ch] += beta * q_en;
359  }
360  }
361 }
362 
364  float last_energy[][CELT_MAX_BANDS])
365 {
366  uint32_t inter, intra;
368 
369  exp_quant_coarse(rc, f, last_energy, 1);
370  intra = OPUS_RC_CHECKPOINT_BITS(rc);
371 
373 
374  exp_quant_coarse(rc, f, last_energy, 0);
375  inter = OPUS_RC_CHECKPOINT_BITS(rc);
376 
377  if (inter > intra) { /* Unlikely */
379  exp_quant_coarse(rc, f, last_energy, 1);
380  }
381 }
382 
384 {
385  for (int i = f->start_band; i < f->end_band; i++) {
386  if (!f->fine_bits[i])
387  continue;
388  for (int ch = 0; ch < f->channels; ch++) {
389  CeltBlock *block = &f->block[ch];
390  int quant, lim = (1 << f->fine_bits[i]);
391  float offset, diff = 0.5f - block->error_energy[i];
392  quant = av_clip(floor(diff*lim), 0, lim - 1);
393  ff_opus_rc_put_raw(rc, quant, f->fine_bits[i]);
394  offset = 0.5f - ((quant + 0.5f) * (1 << (14 - f->fine_bits[i])) / 16384.0f);
395  block->error_energy[i] -= offset;
396  }
397  }
398 }
399 
401 {
402  for (int priority = 0; priority < 2; priority++) {
403  for (int i = f->start_band; i < f->end_band && (f->framebits - opus_rc_tell(rc)) >= f->channels; i++) {
404  if (f->fine_priority[i] != priority || f->fine_bits[i] >= CELT_MAX_FINE_BITS)
405  continue;
406  for (int ch = 0; ch < f->channels; ch++) {
407  CeltBlock *block = &f->block[ch];
408  const float err = block->error_energy[i];
409  const float offset = 0.5f * (1 << (14 - f->fine_bits[i] - 1)) / 16384.0f;
410  const int sign = FFABS(err + offset) < FFABS(err - offset);
411  ff_opus_rc_put_raw(rc, sign, 1);
412  block->error_energy[i] -= offset*(1 - 2*sign);
413  }
414  }
415  }
416 }
417 
419  CeltFrame *f, int index)
420 {
422 
423  ff_opus_psy_celt_frame_init(&s->psyctx, f, index);
424 
426 
427  if (f->silence) {
428  if (f->framebits >= 16)
429  ff_opus_rc_enc_log(rc, 1, 15); /* Silence (if using explicit singalling) */
430  for (int ch = 0; ch < s->channels; ch++)
431  memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
432  return;
433  }
434 
435  /* Filters */
437  if (f->pfilter) {
438  ff_opus_rc_enc_log(rc, 0, 15);
440  }
441 
442  /* Transform */
443  celt_frame_mdct(s, f);
444 
445  /* Need to handle transient/non-transient switches at any point during analysis */
446  while (ff_opus_psy_celt_frame_process(&s->psyctx, f, index))
447  celt_frame_mdct(s, f);
448 
450 
451  /* Silence */
452  ff_opus_rc_enc_log(rc, 0, 15);
453 
454  /* Pitch filter */
455  if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits)
457 
458  /* Transient flag */
459  if (f->size && opus_rc_tell(rc) + 3 <= f->framebits)
460  ff_opus_rc_enc_log(rc, f->transient, 3);
461 
462  /* Main encoding */
463  celt_quant_coarse (f, rc, s->last_quantized_energy);
464  celt_enc_tf (f, rc);
465  ff_celt_bitalloc (f, rc, 1);
466  celt_quant_fine (f, rc);
467  ff_celt_quant_bands(f, rc);
468 
469  /* Anticollapse bit */
470  if (f->anticollapse_needed)
471  ff_opus_rc_put_raw(rc, f->anticollapse, 1);
472 
473  /* Final per-band energy adjustments from leftover bits */
474  celt_quant_final(s, rc, f);
475 
476  for (int ch = 0; ch < f->channels; ch++) {
477  CeltBlock *block = &f->block[ch];
478  for (int i = 0; i < CELT_MAX_BANDS; i++)
479  s->last_quantized_energy[ch][i] = block->energy[i] + block->error_energy[i];
480  }
481 }
482 
483 static inline int write_opuslacing(uint8_t *dst, int v)
484 {
485  dst[0] = FFMIN(v - FFALIGN(v - 255, 4), v);
486  dst[1] = v - dst[0] >> 2;
487  return 1 + (v >= 252);
488 }
489 
491 {
492  int offset, fsize_needed;
493 
494  /* Write toc */
495  opus_gen_toc(s, avpkt->data, &offset, &fsize_needed);
496 
497  /* Frame sizes if needed */
498  if (fsize_needed) {
499  for (int i = 0; i < s->packet.frames - 1; i++) {
500  offset += write_opuslacing(avpkt->data + offset,
501  s->frame[i].framebits >> 3);
502  }
503  }
504 
505  /* Packets */
506  for (int i = 0; i < s->packet.frames; i++) {
507  ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset,
508  s->frame[i].framebits >> 3);
509  offset += s->frame[i].framebits >> 3;
510  }
511 
512  avpkt->size = offset;
513 }
514 
515 /* Used as overlap for the first frame and padding for the last encoded packet */
517 {
518  AVFrame *f = av_frame_alloc();
519  if (!f)
520  return NULL;
521  f->format = s->avctx->sample_fmt;
522  f->nb_samples = s->avctx->frame_size;
523  f->channel_layout = s->avctx->channel_layout;
524  if (av_frame_get_buffer(f, 4)) {
525  av_frame_free(&f);
526  return NULL;
527  }
528  for (int i = 0; i < s->channels; i++) {
529  size_t bps = av_get_bytes_per_sample(f->format);
530  memset(f->extended_data[i], 0, bps*f->nb_samples);
531  }
532  return f;
533 }
534 
535 static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
536  const AVFrame *frame, int *got_packet_ptr)
537 {
538  OpusEncContext *s = avctx->priv_data;
539  int ret, frame_size, alloc_size = 0;
540 
541  if (frame) { /* Add new frame to queue */
542  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
543  return ret;
544  ff_bufqueue_add(avctx, &s->bufqueue, av_frame_clone(frame));
545  } else {
546  ff_opus_psy_signal_eof(&s->psyctx);
547  if (!s->afq.remaining_samples || !avctx->frame_number)
548  return 0; /* We've been flushed and there's nothing left to encode */
549  }
550 
551  /* Run the psychoacoustic system */
552  if (ff_opus_psy_process(&s->psyctx, &s->packet))
553  return 0;
554 
555  frame_size = OPUS_BLOCK_SIZE(s->packet.framesize);
556 
557  if (!frame) {
558  /* This can go negative, that's not a problem, we only pad if positive */
559  int pad_empty = s->packet.frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1;
560  /* Pad with empty 2.5 ms frames to whatever framesize was decided,
561  * this should only happen at the very last flush frame. The frames
562  * allocated here will be freed (because they have no other references)
563  * after they get used by celt_frame_setup_input() */
564  for (int i = 0; i < pad_empty; i++) {
565  AVFrame *empty = spawn_empty_frame(s);
566  if (!empty)
567  return AVERROR(ENOMEM);
568  ff_bufqueue_add(avctx, &s->bufqueue, empty);
569  }
570  }
571 
572  for (int i = 0; i < s->packet.frames; i++) {
573  celt_encode_frame(s, &s->rc[i], &s->frame[i], i);
574  alloc_size += s->frame[i].framebits >> 3;
575  }
576 
577  /* Worst case toc + the frame lengths if needed */
578  alloc_size += 2 + s->packet.frames*2;
579 
580  if ((ret = ff_alloc_packet2(avctx, avpkt, alloc_size, 0)) < 0)
581  return ret;
582 
583  /* Assemble packet */
584  opus_packet_assembler(s, avpkt);
585 
586  /* Update the psychoacoustic system */
587  ff_opus_psy_postencode_update(&s->psyctx, s->frame, s->rc);
588 
589  /* Remove samples from queue and skip if needed */
590  ff_af_queue_remove(&s->afq, s->packet.frames*frame_size, &avpkt->pts, &avpkt->duration);
591  if (s->packet.frames*frame_size > avpkt->duration) {
593  if (!side)
594  return AVERROR(ENOMEM);
595  AV_WL32(&side[4], s->packet.frames*frame_size - avpkt->duration + 120);
596  }
597 
598  *got_packet_ptr = 1;
599 
600  return 0;
601 }
602 
604 {
605  OpusEncContext *s = avctx->priv_data;
606 
607  for (int i = 0; i < CELT_BLOCK_NB; i++)
608  ff_mdct15_uninit(&s->mdct[i]);
609 
610  ff_celt_pvq_uninit(&s->pvq);
611  av_freep(&s->dsp);
612  av_freep(&s->frame);
613  av_freep(&s->rc);
614  ff_af_queue_close(&s->afq);
615  ff_opus_psy_end(&s->psyctx);
616  ff_bufqueue_discard_all(&s->bufqueue);
617  av_freep(&avctx->extradata);
618 
619  return 0;
620 }
621 
623 {
624  int ret, max_frames;
625  OpusEncContext *s = avctx->priv_data;
626 
627  s->avctx = avctx;
628  s->channels = avctx->channels;
629 
630  /* Opus allows us to change the framesize on each packet (and each packet may
631  * have multiple frames in it) but we can't change the codec's frame size on
632  * runtime, so fix it to the lowest possible number of samples and use a queue
633  * to accumulate AVFrames until we have enough to encode whatever the encoder
634  * decides is the best */
635  avctx->frame_size = 120;
636  /* Initial padding will change if SILK is ever supported */
637  avctx->initial_padding = 120;
638 
639  if (!avctx->bit_rate) {
640  int coupled = ff_opus_default_coupled_streams[s->channels - 1];
641  avctx->bit_rate = coupled*(96000) + (s->channels - coupled*2)*(48000);
642  } else if (avctx->bit_rate < 6000 || avctx->bit_rate > 255000 * s->channels) {
643  int64_t clipped_rate = av_clip(avctx->bit_rate, 6000, 255000 * s->channels);
644  av_log(avctx, AV_LOG_ERROR, "Unsupported bitrate %"PRId64" kbps, clipping to %"PRId64" kbps\n",
645  avctx->bit_rate/1000, clipped_rate/1000);
646  avctx->bit_rate = clipped_rate;
647  }
648 
649  /* Extradata */
650  avctx->extradata_size = 19;
652  if (!avctx->extradata)
653  return AVERROR(ENOMEM);
654  opus_write_extradata(avctx);
655 
656  ff_af_queue_init(avctx, &s->afq);
657 
658  if ((ret = ff_celt_pvq_init(&s->pvq, 1)) < 0)
659  return ret;
660 
661  if (!(s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT)))
662  return AVERROR(ENOMEM);
663 
664  /* I have no idea why a base scaling factor of 68 works, could be the twiddles */
665  for (int i = 0; i < CELT_BLOCK_NB; i++)
666  if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
667  return AVERROR(ENOMEM);
668 
669  /* Zero out previous energy (matters for inter first frame) */
670  for (int ch = 0; ch < s->channels; ch++)
671  memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
672 
673  /* Allocate an empty frame to use as overlap for the first frame of audio */
674  ff_bufqueue_add(avctx, &s->bufqueue, spawn_empty_frame(s));
675  if (!ff_bufqueue_peek(&s->bufqueue, 0))
676  return AVERROR(ENOMEM);
677 
678  if ((ret = ff_opus_psy_init(&s->psyctx, s->avctx, &s->bufqueue, &s->options)))
679  return ret;
680 
681  /* Frame structs and range coder buffers */
682  max_frames = ceilf(FFMIN(s->options.max_delay_ms, 120.0f)/2.5f);
683  s->frame = av_malloc(max_frames*sizeof(CeltFrame));
684  if (!s->frame)
685  return AVERROR(ENOMEM);
686  s->rc = av_malloc(max_frames*sizeof(OpusRangeCoder));
687  if (!s->rc)
688  return AVERROR(ENOMEM);
689 
690  for (int i = 0; i < max_frames; i++) {
691  s->frame[i].dsp = s->dsp;
692  s->frame[i].avctx = s->avctx;
693  s->frame[i].seed = 0;
694  s->frame[i].pvq = s->pvq;
695  s->frame[i].apply_phase_inv = s->options.apply_phase_inv;
696  s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f;
697  }
698 
699  return 0;
700 }
701 
702 #define OPUSENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
703 static const AVOption opusenc_options[] = {
704  { "opus_delay", "Maximum delay in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS, "max_delay_ms" },
705  { "apply_phase_inv", "Apply intensity stereo phase inversion", offsetof(OpusEncContext, options.apply_phase_inv), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, OPUSENC_FLAGS, "apply_phase_inv" },
706  { NULL },
707 };
708 
709 static const AVClass opusenc_class = {
710  .class_name = "Opus encoder",
711  .item_name = av_default_item_name,
712  .option = opusenc_options,
713  .version = LIBAVUTIL_VERSION_INT,
714 };
715 
717  { "b", "0" },
718  { "compression_level", "10" },
719  { NULL },
720 };
721 
723  .name = "opus",
724  .long_name = NULL_IF_CONFIG_SMALL("Opus"),
725  .type = AVMEDIA_TYPE_AUDIO,
726  .id = AV_CODEC_ID_OPUS,
727  .defaults = opusenc_defaults,
728  .priv_class = &opusenc_class,
729  .priv_data_size = sizeof(OpusEncContext),
731  .encode2 = opus_encode_frame,
732  .close = opus_encode_end,
735  .supported_samplerates = (const int []){ 48000, 0 },
736  .channel_layouts = (const uint64_t []){ AV_CH_LAYOUT_MONO,
737  AV_CH_LAYOUT_STEREO, 0 },
738  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
740 };
static float win(SuperEqualizerContext *s, float n, int N)
#define av_cold
Definition: attributes.h:88
uint8_t
av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
Initialize AudioFrameQueue.
void ff_af_queue_close(AudioFrameQueue *afq)
Close AudioFrameQueue.
void ff_af_queue_remove(AudioFrameQueue *afq, int nb_samples, int64_t *pts, int64_t *duration)
Remove frame(s) from the queue.
int ff_af_queue_add(AudioFrameQueue *afq, const AVFrame *f)
Add a frame to the queue.
uint8_t * av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type, buffer_size_t size)
Definition: avpacket.c:343
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:31
static void ff_bufqueue_add(void *log, struct FFBufQueue *queue, AVFrame *buf)
Add a buffer to the queue.
Definition: bufferqueue.h:71
static AVFrame * ff_bufqueue_peek(struct FFBufQueue *queue, unsigned index)
Get a buffer from the queue without altering it.
Definition: bufferqueue.h:87
static void ff_bufqueue_discard_all(struct FFBufQueue *queue)
Unref and remove all buffers from the queue.
Definition: bufferqueue.h:111
static AVFrame * ff_bufqueue_get(struct FFBufQueue *queue)
Get the first buffer from the queue and remove it.
Definition: bufferqueue.h:98
static av_always_inline void bytestream_put_buffer(uint8_t **b, const uint8_t *src, unsigned int size)
Definition: bytestream.h:372
#define s(width, name)
Definition: cbs_vp9.c:257
#define f(width, name)
Definition: cbs_vp9.c:255
#define FFMIN(a, b)
Definition: common.h:105
#define av_clip
Definition: common.h:122
#define FFMAX(a, b)
Definition: common.h:103
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
#define NULL
Definition: coverity.c:32
static __device__ float floor(float a)
Definition: cuda_runtime.h:173
static __device__ float ceilf(float a)
Definition: cuda_runtime.h:175
static AVFrame * frame
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
Definition: encode.c:33
const OptionDef options[]
#define sample
@ AV_OPT_TYPE_FLOAT
Definition: opt.h:228
@ AV_OPT_TYPE_BOOL
Definition: opt.h:242
#define AV_CH_LAYOUT_MONO
#define AV_CH_LAYOUT_STEREO
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
Definition: avcodec.h:333
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: codec.h:77
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: codec.h:82
#define AV_CODEC_CAP_EXPERIMENTAL
Codec is experimental and is thus avoided in favor of non experimental encoders.
Definition: codec.h:100
@ AV_CODEC_ID_OPUS
Definition: codec_id.h:484
#define AV_INPUT_BUFFER_PADDING_SIZE
Required number of additionally allocated bytes at the end of the input bitstream for decoding.
Definition: avcodec.h:215
@ AV_PKT_DATA_SKIP_SAMPLES
Recommmends skipping the specified number of samples.
Definition: packet.h:156
#define AVERROR(e)
Definition: error.h:43
AVFrame * av_frame_clone(const AVFrame *src)
Create a new frame that references the same data as src.
Definition: frame.c:540
int av_frame_get_buffer(AVFrame *frame, int align)
Allocate new buffer(s) for audio or video data.
Definition: frame.c:337
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:190
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:235
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:117
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt)
Return number of bytes per sample.
Definition: samplefmt.c:106
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:69
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:59
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
int index
Definition: gxfenc.c:89
for(j=16;j >0;--j)
static const int16_t alpha[]
Definition: ilbcdata.h:55
int i
Definition: input.c:407
#define AV_WL32(p, v)
Definition: intreadwrite.h:426
#define FF_CODEC_CAP_INIT_THREADSAFE
The codec does not modify any global variables in the init function, allowing to call the init functi...
Definition: internal.h:41
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: internal.h:49
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
common internal API header
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
#define log2f(x)
Definition: libm.h:409
#define lrintf(x)
Definition: libm_mips.h:70
#define FFALIGN(x, a)
Definition: macros.h:48
av_cold void ff_mdct15_uninit(MDCT15Context **ps)
Definition: mdct15.c:43
av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale)
Definition: mdct15.c:247
unsigned bps
Definition: movenc.c:1601
int frame_size
Definition: mxfenc.c:2206
AVOptions.
void ff_celt_quant_bands(CeltFrame *f, OpusRangeCoder *rc)
Definition: opus.c:446
void ff_celt_bitalloc(CeltFrame *f, OpusRangeCoder *rc, int encode)
Definition: opus.c:555
@ OPUS_BANDWITH_NB
Definition: opus.h:78
#define CELT_OVERLAP
Definition: opus.h:43
@ OPUS_MODE_NB
Definition: opus.h:68
#define CELT_MAX_BANDS
Definition: opus.h:46
#define CELT_POSTFILTER_MINPERIOD
Definition: opus_celt.h:45
#define CELT_ENERGY_SILENCE
Definition: opus_celt.h:46
#define CELT_MAX_FINE_BITS
Definition: opus_celt.h:41
@ CELT_BLOCK_NB
Definition: opus_celt.h:63
int av_cold ff_celt_pvq_init(CeltPVQ **pvq, int encode)
Definition: opus_pvq.c:897
void av_cold ff_celt_pvq_uninit(CeltPVQ **pvq)
Definition: opus_pvq.c:914
void ff_opus_rc_enc_end(OpusRangeCoder *rc, uint8_t *dst, int size)
Definition: opus_rc.c:360
void ff_opus_rc_enc_uint(OpusRangeCoder *rc, uint32_t val, uint32_t size)
CELT: write a uniformly distributed integer.
Definition: opus_rc.c:204
void ff_opus_rc_put_raw(OpusRangeCoder *rc, uint32_t val, uint32_t count)
CELT: write 0 - 31 bits to the rawbits buffer.
Definition: opus_rc.c:161
void ff_opus_rc_enc_cdf(OpusRangeCoder *rc, int val, const uint16_t *cdf)
Definition: opus_rc.c:109
void ff_opus_rc_enc_log(OpusRangeCoder *rc, int val, uint32_t bits)
Definition: opus_rc.c:131
void ff_opus_rc_enc_laplace(OpusRangeCoder *rc, int *value, uint32_t symbol, int decay)
Definition: opus_rc.c:314
void ff_opus_rc_enc_init(OpusRangeCoder *rc)
Definition: opus_rc.c:402
#define OPUS_RC_CHECKPOINT_ROLLBACK(rc)
Definition: opus_rc.h:123
static av_always_inline uint32_t opus_rc_tell(const OpusRangeCoder *rc)
CELT: estimate bits of entropy that have thus far been consumed for the current CELT frame,...
Definition: opus_rc.h:61
#define OPUS_RC_CHECKPOINT_SPAWN(rc)
Definition: opus_rc.h:116
#define OPUS_RC_CHECKPOINT_BITS(rc)
Definition: opus_rc.h:120
#define CELT_EMPH_COEFF
Definition: opusdsp.h:24
static void celt_quant_coarse(CeltFrame *f, OpusRangeCoder *rc, float last_energy[][CELT_MAX_BANDS])
Definition: opusenc.c:363
AVCodec ff_opus_encoder
Definition: opusenc.c:722
static const AVClass opusenc_class
Definition: opusenc.c:709
static void celt_frame_setup_input(OpusEncContext *s, CeltFrame *f)
Definition: opusenc.c:117
static const AVOption opusenc_options[]
Definition: opusenc.c:703
static av_cold int opus_encode_init(AVCodecContext *avctx)
Definition: opusenc.c:622
static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_needed)
Definition: opusenc.c:74
static AVFrame * spawn_empty_frame(OpusEncContext *s)
Definition: opusenc.c:516
static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: opusenc.c:535
static void celt_enc_tf(CeltFrame *f, OpusRangeCoder *rc)
Definition: opusenc.c:256
static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
Definition: opusenc.c:191
static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt)
Definition: opusenc.c:490
static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f, float last_energy[][CELT_MAX_BANDS], int intra)
Definition: opusenc.c:320
static const AVCodecDefault opusenc_defaults[]
Definition: opusenc.c:716
static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
Definition: opusenc.c:157
static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f, int index)
Definition: opusenc.c:418
static void celt_quant_final(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f)
Definition: opusenc.c:400
#define OPUSENC_FLAGS
Definition: opusenc.c:702
static void opus_write_extradata(AVCodecContext *avctx)
Definition: opusenc.c:61
static void celt_enc_quant_pfilter(OpusRangeCoder *rc, CeltFrame *f)
Definition: opusenc.c:283
static av_cold int opus_encode_end(AVCodecContext *avctx)
Definition: opusenc.c:603
static int write_opuslacing(uint8_t *dst, int v)
Definition: opusenc.c:483
static void celt_quant_fine(CeltFrame *f, OpusRangeCoder *rc)
Definition: opusenc.c:383
#define OPUS_MAX_CHANNELS
Definition: opusenc.h:34
#define OPUS_BLOCK_SIZE(x)
Definition: opusenc.h:39
#define OPUS_MAX_LOOKAHEAD
Definition: opusenc.h:32
av_cold int ff_opus_psy_end(OpusPsyContext *s)
Definition: opusenc_psy.c:593
int ff_opus_psy_celt_frame_process(OpusPsyContext *s, CeltFrame *f, int index)
Definition: opusenc_psy.c:455
int ff_opus_psy_process(OpusPsyContext *s, OpusPacketInfo *p)
Definition: opusenc_psy.c:223
av_cold int ff_opus_psy_init(OpusPsyContext *s, AVCodecContext *avctx, struct FFBufQueue *bufqueue, OpusEncOptions *options)
Definition: opusenc_psy.c:516
void ff_opus_psy_signal_eof(OpusPsyContext *s)
Definition: opusenc_psy.c:588
void ff_opus_psy_celt_frame_init(OpusPsyContext *s, CeltFrame *f, int index)
Definition: opusenc_psy.c:254
void ff_opus_psy_postencode_update(OpusPsyContext *s, CeltFrame *f, OpusRangeCoder *rc)
Definition: opusenc_psy.c:479
const uint8_t ff_celt_freq_range[]
Definition: opustab.c:772
const uint8_t ff_celt_freq_bands[]
Definition: opustab.c:768
const float *const ff_celt_window
Definition: opustab.c:1135
const uint16_t ff_celt_model_energy_small[]
Definition: opustab.c:766
const uint16_t ff_celt_model_tapset[]
Definition: opustab.c:758
const uint8_t ff_opus_default_coupled_streams[]
Definition: opustab.c:27
const int8_t ff_celt_tf_select[4][2][2][2]
Definition: opustab.c:782
const uint8_t ff_celt_coarse_energy_dist[4][2][42]
Definition: opustab.c:808
const float ff_celt_alpha_coef[]
Definition: opustab.c:800
const float ff_celt_beta_coef[]
Definition: opustab.c:804
const float ff_celt_mean_energy[]
Definition: opustab.c:792
const float ff_celt_postfilter_taps[3][3]
Definition: opustab.c:1098
Describe the class of an AVClass context structure.
Definition: log.h:67
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:72
main external API structure.
Definition: avcodec.h:536
int64_t bit_rate
the average bitrate
Definition: avcodec.h:586
int initial_padding
Audio only.
Definition: avcodec.h:2062
int sample_rate
samples per second
Definition: avcodec.h:1196
int frame_number
Frame counter, set by libavcodec.
Definition: avcodec.h:1227
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:616
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:637
int channels
number of audio channels
Definition: avcodec.h:1197
int extradata_size
Definition: avcodec.h:638
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:1216
void * priv_data
Definition: avcodec.h:563
AVCodec.
Definition: codec.h:197
const char * name
Name of the codec implementation.
Definition: codec.h:204
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:384
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames,...
Definition: frame.h:391
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:365
AVOption.
Definition: opt.h:248
This structure stores compressed data.
Definition: packet.h:346
int size
Definition: packet.h:370
int64_t duration
Duration of this packet in AVStream->time_base units, 0 if unknown.
Definition: packet.h:387
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: packet.h:362
uint8_t * data
Definition: packet.h:369
Structure holding the queue.
Definition: bufferqueue.h:49
struct FFBufQueue bufqueue
Definition: opusenc.c:43
float last_quantized_energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
Definition: opusenc.c:56
float scratch[2048]
Definition: opusenc.c:58
int channels
Definition: opusenc.c:50
OpusPsyContext psyctx
Definition: opusenc.c:37
int enc_id_bits
Definition: opusenc.c:46
OpusRangeCoder * rc
Definition: opusenc.c:53
AVClass * av_class
Definition: opusenc.c:35
CeltFrame * frame
Definition: opusenc.c:52
OpusEncOptions options
Definition: opusenc.c:36
AVCodecContext * avctx
Definition: opusenc.c:38
AVFloatDSPContext * dsp
Definition: opusenc.c:40
uint8_t enc_id[64]
Definition: opusenc.c:45
MDCT15Context * mdct[CELT_BLOCK_NB]
Definition: opusenc.c:41
CeltPVQ * pvq
Definition: opusenc.c:42
OpusPacketInfo packet
Definition: opusenc.c:48
AudioFrameQueue afq
Definition: opusenc.c:39
#define av_freep(p)
#define av_malloc(s)
#define av_log(a,...)
static uint8_t tmp[11]
Definition: aes_ctr.c:27
#define src1
Definition: h264pred.c:140
static int16_t block[64]
Definition: dct.c:116
int size
const char * b
Definition: vf_curves.c:118
else temp
Definition: vf_mcdeint.c:259
static av_always_inline int diff(const uint32_t a, const uint32_t b)
static const uint8_t offset[127][2]
Definition: vf_spp.c:107
const uint8_t * quant
int len
uint8_t bits
Definition: vp3data.h:141