2 This file is part of GNUnet.
3 Copyright (C) 2013 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 SPDX-License-Identifier: AGPL3.0-or-later
21 * @file conversation/gnunet-helper-audio-record.c
22 * @brief program to record audio data from the microphone
23 * @author Siomon Dieterle
24 * @author Andreas Fuchs
25 * @author Christian Grothoff
28 #include "gnunet_util_lib.h"
29 #include "gnunet_protocols.h"
30 #include "conversation.h"
31 #include "gnunet_constants.h"
32 #include "gnunet_core_service.h"
34 #include <pulse/simple.h>
35 #include <pulse/error.h>
36 #include <pulse/rtclock.h>
38 #include <pulse/pulseaudio.h>
39 #include <opus/opus.h>
40 #include <opus/opus_types.h>
43 #define DEBUG_RECORD_PURE_OGG 1
48 #define SAMPLING_RATE 48000
51 * How many ms of audio to buffer before encoding them.
53 * 60, 40, 20, 10, 5, 2.5
55 #define FRAME_SIZE_MS 40
58 * How many samples to buffer before encoding them.
60 #define FRAME_SIZE (SAMPLING_RATE / 1000 * FRAME_SIZE_MS)
63 * Pages are commited when their size goes over this value.
64 * Note that in practice we flush pages VERY often (every frame),
65 * which means that pages NEVER really get to be this big.
66 * With one-packet-per-page, pages are roughly 100-300 bytes each.
68 * This value is chosen to make MAX_PAYLOAD_BYTES=1024 fit
71 #define PAGE_WATERLINE 800
74 * Maximum length of opus payload
76 #define MAX_PAYLOAD_BYTES 1024
84 * Configures the encoder's expected packet loss percentage.
86 * Higher values will trigger progressively more loss resistant behavior
87 * in the encoder at the expense of quality at a given bitrate
88 * in the lossless case, but greater quality under loss.
90 #define CONV_OPUS_PACKET_LOSS_PERCENTAGE 1
93 * Configures the encoder's computational complexity.
95 * The supported range is 0-10 inclusive with 10 representing
96 * the highest complexity.
98 #define CONV_OPUS_ENCODING_COMPLEXITY 10
101 * Configures the encoder's use of inband forward error correction (FEC).
103 * Note: This is only applicable to the LPC layer.
105 #define CONV_OPUS_INBAND_FEC 1
108 * Configures the type of signal being encoded.
110 * This is a hint which helps the encoder's mode selection.
113 * OPUS_AUTO - (default) Encoder detects the type automatically.
114 * OPUS_SIGNAL_VOICE - Bias thresholds towards choosing LPC or Hybrid modes.
115 * OPUS_SIGNAL_MUSIC - Bias thresholds towards choosing MDCT modes.
117 #define CONV_OPUS_SIGNAL OPUS_SIGNAL_VOICE
123 * OPUS_APPLICATION_VOIP - gives best quality at a given bitrate for voice
124 * signals. It enhances the input signal by high-pass filtering and
125 * emphasizing formants and harmonics. Optionally it includes in-band forward
126 * error correction to protect against packet loss. Use this mode for typical
127 * VoIP applications. Because of the enhancement, even at high bitrates
128 * the output may sound different from the input.
129 * OPUS_APPLICATION_AUDIO - gives best quality at a given bitrate for most
130 * non-voice signals like music. Use this mode for music and mixed
131 * (music/voice) content, broadcast, and applications requiring less than
132 * 15 ms of coding delay.
133 * OPUS_APPLICATION_RESTRICTED_LOWDELAY - configures low-delay mode that
134 * disables the speech-optimized mode in exchange for slightly reduced delay.
135 * This mode can only be set on an newly initialized or freshly reset encoder
136 * because it changes the codec delay.
138 #define CONV_OPUS_APP_TYPE OPUS_APPLICATION_VOIP
141 * Specification for recording. May change in the future to spec negotiation.
143 static pa_sample_spec sample_spec = {
144 .format = PA_SAMPLE_FLOAT32LE,
145 .rate = SAMPLING_RATE,
149 GNUNET_NETWORK_STRUCT_BEGIN
151 /* OggOpus spec says the numbers must be in little-endian order */
152 struct OpusHeadPacket
157 uint16_t preskip GNUNET_PACKED;
158 uint32_t sampling_rate GNUNET_PACKED;
159 uint16_t gain GNUNET_PACKED;
160 uint8_t channel_mapping;
163 struct OpusCommentsPacket
166 uint32_t vendor_length;
168 char vendor[vendor_length];
169 uint32_t string_count;
170 followed by @a string_count pairs of:
171 uint32_t string_length;
172 char string[string_length];
176 GNUNET_NETWORK_STRUCT_END
179 * Pulseaudio mainloop api
181 static pa_mainloop_api *mainloop_api;
184 * Pulseaudio mainloop
186 static pa_mainloop *m;
191 static pa_context *context;
194 * Pulseaudio recording stream
196 static pa_stream *stream_in;
199 * Pulseaudio io events
201 static pa_io_event *stdio_event;
206 static OpusEncoder *enc;
209 * Buffer for encoded data
211 static unsigned char *opus_data;
214 * PCM data buffer for one OPUS frame
216 static float *pcm_buffer;
219 * Length of the pcm data needed for one OPUS frame
221 static int pcm_length;
226 static char *transmit_buffer;
229 * Length of audio buffer
231 static size_t transmit_buffer_length;
234 * Read index for transmit buffer
236 static size_t transmit_buffer_index;
239 * Audio message skeleton
241 static struct AudioMessage *audio_message;
246 static ogg_stream_state os;
251 static int32_t packet_id;
254 * Ogg granule for current packet
256 static int64_t enc_granulepos;
258 #ifdef DEBUG_RECORD_PURE_OGG
260 * 1 to not to write GNUnet message headers,
261 * producing pure playable ogg output
263 static int dump_pure_ogg;
267 * Pulseaudio shutdown task
272 mainloop_api->quit (mainloop_api,
279 write_data (const char *ptr,
285 while (off < msg_size)
287 ret = write (STDOUT_FILENO,
293 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR,
303 write_page (ogg_page *og)
305 static unsigned long long toff;
307 msg_size = sizeof (struct AudioMessage) + og->header_len + og->body_len;
308 audio_message->header.size = htons ((uint16_t) msg_size);
309 GNUNET_memcpy (&audio_message[1], og->header, og->header_len);
310 GNUNET_memcpy (((char *) &audio_message[1]) + og->header_len, og->body, og->body_len);
313 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
314 "Sending %u bytes of audio data (total: %llu)\n",
315 (unsigned int) msg_size,
317 #ifdef DEBUG_RECORD_PURE_OGG
319 write_data ((const char *) &audio_message[1],
320 og->header_len + og->body_len);
323 write_data ((const char *) audio_message,
329 * Creates OPUS packets from PCM data
340 while (transmit_buffer_length >= transmit_buffer_index + pcm_length)
342 GNUNET_memcpy (pcm_buffer,
343 &transmit_buffer[transmit_buffer_index],
345 transmit_buffer_index += pcm_length;
347 opus_encode_float (enc, pcm_buffer, FRAME_SIZE, opus_data,
352 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
353 _("opus_encode_float() failed: %s. Aborting\n"),
354 opus_strerror (len));
357 if (((uint32_t)len) > UINT16_MAX - sizeof (struct AudioMessage))
363 /* As per OggOpus spec, granule is calculated as if the audio
364 had 48kHz sampling rate. */
365 enc_granulepos += FRAME_SIZE * 48000 / SAMPLING_RATE;
367 op.packet = (unsigned char *) opus_data;
371 op.granulepos = enc_granulepos;
372 op.packetno = packet_id++;
373 ogg_stream_packetin (&os, &op);
375 while (ogg_stream_flush_fill (&os, &og, PAGE_WATERLINE))
377 if ( ((unsigned long long) og.header_len) +
378 ((unsigned long long) og.body_len) >
379 UINT16_MAX - sizeof (struct AudioMessage))
388 new_size = transmit_buffer_length - transmit_buffer_index;
391 nbuf = pa_xmalloc (new_size);
393 &transmit_buffer[transmit_buffer_index],
395 pa_xfree (transmit_buffer);
396 transmit_buffer = nbuf;
400 pa_xfree (transmit_buffer);
401 transmit_buffer = NULL;
403 transmit_buffer_index = 0;
404 transmit_buffer_length = new_size;
409 * Pulseaudio callback when new data is available.
412 stream_read_callback (pa_stream * s,
419 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
420 "Got %u/%d bytes of PCM data\n",
421 (unsigned int) length,
424 GNUNET_assert (NULL != s);
425 GNUNET_assert (length > 0);
427 mainloop_api->io_enable (stdio_event, PA_IO_EVENT_OUTPUT);
429 if (pa_stream_peek (s, (const void **) &data, &length) < 0)
431 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
432 _("pa_stream_peek() failed: %s\n"),
433 pa_strerror (pa_context_errno (context)));
437 GNUNET_assert (NULL != data);
438 GNUNET_assert (length > 0);
439 if (NULL != transmit_buffer)
441 transmit_buffer = pa_xrealloc (transmit_buffer,
442 transmit_buffer_length + length);
443 GNUNET_memcpy (&transmit_buffer[transmit_buffer_length],
446 transmit_buffer_length += length;
450 transmit_buffer = pa_xmalloc (length);
451 GNUNET_memcpy (transmit_buffer, data, length);
452 transmit_buffer_length = length;
453 transmit_buffer_index = 0;
461 * Exit callback for SIGTERM and SIGINT
464 exit_signal_callback (pa_mainloop_api * m,
473 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
474 _("Got signal, exiting.\n"));
480 * Pulseaudio stream state callback
483 stream_state_callback (pa_stream * s,
487 GNUNET_assert (NULL != s);
488 switch (pa_stream_get_state (s))
490 case PA_STREAM_CREATING:
491 case PA_STREAM_TERMINATED:
493 case PA_STREAM_READY:
495 const pa_buffer_attr *a;
496 char cmt[PA_CHANNEL_MAP_SNPRINT_MAX];
497 char sst[PA_SAMPLE_SPEC_SNPRINT_MAX];
499 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
500 _("Stream successfully created.\n"));
502 if (!(a = pa_stream_get_buffer_attr (s)))
504 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
505 _("pa_stream_get_buffer_attr() failed: %s\n"),
506 pa_strerror (pa_context_errno
507 (pa_stream_get_context (s))));
512 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
513 _("Buffer metrics: maxlength=%u, fragsize=%u\n"),
514 a->maxlength, a->fragsize);
516 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
517 _("Using sample spec '%s', channel map '%s'.\n"),
518 pa_sample_spec_snprint (sst, sizeof (sst),
519 pa_stream_get_sample_spec (s)),
520 pa_channel_map_snprint (cmt, sizeof (cmt),
521 pa_stream_get_channel_map (s)));
523 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
524 _("Connected to device %s (%u, %ssuspended).\n"),
525 pa_stream_get_device_name (s),
526 pa_stream_get_device_index (s),
527 pa_stream_is_suspended (s) ? "" : "not ");
530 case PA_STREAM_FAILED:
532 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
533 _("Stream error: %s\n"),
534 pa_strerror (pa_context_errno (pa_stream_get_context (s))));
541 * Pulseaudio context state callback
544 context_state_callback (pa_context * c,
550 switch (pa_context_get_state (c))
552 case PA_CONTEXT_CONNECTING:
553 case PA_CONTEXT_AUTHORIZING:
554 case PA_CONTEXT_SETTING_NAME:
556 case PA_CONTEXT_READY:
561 GNUNET_assert (!stream_in);
562 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
563 _("Connection established.\n"));
565 pa_stream_new (c, "GNUNET_VoIP recorder", &sample_spec, NULL)))
567 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
568 _("pa_stream_new() failed: %s\n"),
569 pa_strerror (pa_context_errno (c)));
572 pa_stream_set_state_callback (stream_in, &stream_state_callback, NULL);
573 pa_stream_set_read_callback (stream_in, &stream_read_callback, NULL);
574 memset (&na, 0, sizeof (na));
575 na.maxlength = UINT32_MAX;
576 na.fragsize = pcm_length;
577 if ((r = pa_stream_connect_record (stream_in, NULL, &na,
578 PA_STREAM_ADJUST_LATENCY)) < 0)
580 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
581 _("pa_stream_connect_record() failed: %s\n"),
582 pa_strerror (pa_context_errno (c)));
588 case PA_CONTEXT_TERMINATED:
591 case PA_CONTEXT_FAILED:
593 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
594 _("Connection failure: %s\n"),
595 pa_strerror (pa_context_errno (c)));
614 if (!pa_sample_spec_valid (&sample_spec))
616 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
619 /* set up main record loop */
620 if (!(m = pa_mainloop_new ()))
622 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
623 _("pa_mainloop_new() failed.\n"));
625 mainloop_api = pa_mainloop_get_api (m);
627 /* listen to signals */
628 r = pa_signal_init (mainloop_api);
629 GNUNET_assert (r == 0);
630 pa_signal_new (SIGINT, &exit_signal_callback, NULL);
631 pa_signal_new (SIGTERM, &exit_signal_callback, NULL);
633 /* connect to the main pulseaudio context */
635 if (!(context = pa_context_new (mainloop_api, "GNUNET VoIP")))
637 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
638 _("pa_context_new() failed.\n"));
640 pa_context_set_state_callback (context, &context_state_callback, NULL);
641 if (pa_context_connect (context, NULL, 0, NULL) < 0)
643 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
644 _("pa_context_connect() failed: %s\n"),
645 pa_strerror (pa_context_errno (context)));
647 if (pa_mainloop_run (m, &i) < 0)
649 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
650 _("pa_mainloop_run() failed.\n"));
663 pcm_length = FRAME_SIZE * CHANNELS * sizeof (float);
664 pcm_buffer = pa_xmalloc (pcm_length);
665 opus_data = GNUNET_malloc (MAX_PAYLOAD_BYTES);
666 enc = opus_encoder_create (SAMPLING_RATE,
670 opus_encoder_ctl (enc,
671 OPUS_SET_PACKET_LOSS_PERC (CONV_OPUS_PACKET_LOSS_PERCENTAGE));
672 opus_encoder_ctl (enc,
673 OPUS_SET_COMPLEXITY (CONV_OPUS_ENCODING_COMPLEXITY));
674 opus_encoder_ctl (enc,
675 OPUS_SET_INBAND_FEC (CONV_OPUS_INBAND_FEC));
676 opus_encoder_ctl (enc,
677 OPUS_SET_SIGNAL (CONV_OPUS_SIGNAL));
685 struct OpusHeadPacket headpacket;
686 struct OpusCommentsPacket *commentspacket;
687 size_t commentspacket_len;
689 serialno = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_STRONG,
691 /*Initialize Ogg stream struct*/
692 if (-1 == ogg_stream_init (&os, serialno))
694 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
695 _("ogg_stream_init() failed.\n"));
708 GNUNET_memcpy (headpacket.magic, "OpusHead", 8);
709 headpacket.version = 1;
710 headpacket.channels = CHANNELS;
711 headpacket.preskip = GNUNET_htole16 (0);
712 headpacket.sampling_rate = GNUNET_htole32 (SAMPLING_RATE);
713 headpacket.gain = GNUNET_htole16 (0);
714 headpacket.channel_mapping = 0; /* Mono or stereo */
716 op.packet = (unsigned char *) &headpacket;
717 op.bytes = sizeof (headpacket);
721 op.packetno = packet_id++;
722 ogg_stream_packetin (&os, &op);
724 /* Head packet must be alone on its page */
725 while (ogg_stream_flush (&os, &og))
730 commentspacket_len = sizeof (*commentspacket);
731 opusver = opus_get_version_string ();
732 vendor_length = strlen (opusver);
733 commentspacket_len += vendor_length;
734 commentspacket_len += sizeof (uint32_t);
736 commentspacket = (struct OpusCommentsPacket *) malloc (commentspacket_len);
737 if (NULL == commentspacket)
739 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
740 _("Failed to allocate %u bytes for second packet\n"),
741 (unsigned int) commentspacket_len);
745 GNUNET_memcpy (commentspacket->magic, "OpusTags", 8);
746 commentspacket->vendor_length = GNUNET_htole32 (vendor_length);
747 GNUNET_memcpy (&commentspacket[1], opusver, vendor_length);
748 *(uint32_t *) &((char *) &commentspacket[1])[vendor_length] = \
749 GNUNET_htole32 (0); /* no tags */
751 op.packet = (unsigned char *) commentspacket;
752 op.bytes = commentspacket_len;
756 op.packetno = packet_id++;
757 ogg_stream_packetin (&os, &op);
759 /* Comment packets must not be mixed with audio packets on their pages */
760 while (ogg_stream_flush (&os, &og))
765 free (commentspacket);
770 * The main function for the record helper.
772 * @param argc number of arguments from the command line
773 * @param argv command line arguments
774 * @return 0 ok, 1 on error
782 GNUNET_assert (GNUNET_OK ==
783 GNUNET_log_setup ("gnunet-helper-audio-record",
786 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
787 "Audio source starts\n");
788 audio_message = GNUNET_malloc (UINT16_MAX);
789 audio_message->header.type = htons (GNUNET_MESSAGE_TYPE_CONVERSATION_AUDIO);
791 #ifdef DEBUG_RECORD_PURE_OGG
792 dump_pure_ogg = getenv ("GNUNET_RECORD_PURE_OGG") ? 1 : 0;