2 This file is part of GNUnet.
3 Copyright (C) 2013 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 SPDX-License-Identifier: AGPL3.0-or-later
21 * @file conversation/gnunet-helper-audio-record.c
22 * @brief program to record audio data from the microphone
23 * @author Siomon Dieterle
24 * @author Andreas Fuchs
25 * @author Christian Grothoff
28 #include "gnunet_util_lib.h"
29 #include "gnunet_protocols.h"
30 #include "conversation.h"
31 #include "gnunet_constants.h"
32 #include "gnunet_core_service.h"
34 #include <pulse/simple.h>
35 #include <pulse/error.h>
36 #include <pulse/rtclock.h>
38 #include <pulse/pulseaudio.h>
39 #include <opus/opus.h>
40 #include <opus/opus_types.h>
43 #define DEBUG_RECORD_PURE_OGG 1
48 #define SAMPLING_RATE 48000
51 * How many ms of audio to buffer before encoding them.
53 * 60, 40, 20, 10, 5, 2.5
55 #define FRAME_SIZE_MS 40
58 * How many samples to buffer before encoding them.
60 #define FRAME_SIZE (SAMPLING_RATE / 1000 * FRAME_SIZE_MS)
63 * Pages are commited when their size goes over this value.
64 * Note that in practice we flush pages VERY often (every frame),
65 * which means that pages NEVER really get to be this big.
66 * With one-packet-per-page, pages are roughly 100-300 bytes each.
68 * This value is chosen to make MAX_PAYLOAD_BYTES=1024 fit
71 #define PAGE_WATERLINE 800
74 * Maximum length of opus payload
76 #define MAX_PAYLOAD_BYTES 1024
84 * Configures the encoder's expected packet loss percentage.
86 * Higher values will trigger progressively more loss resistant behavior
87 * in the encoder at the expense of quality at a given bitrate
88 * in the lossless case, but greater quality under loss.
90 #define CONV_OPUS_PACKET_LOSS_PERCENTAGE 1
93 * Configures the encoder's computational complexity.
95 * The supported range is 0-10 inclusive with 10 representing
96 * the highest complexity.
98 #define CONV_OPUS_ENCODING_COMPLEXITY 10
101 * Configures the encoder's use of inband forward error correction (FEC).
103 * Note: This is only applicable to the LPC layer.
105 #define CONV_OPUS_INBAND_FEC 1
108 * Configures the type of signal being encoded.
110 * This is a hint which helps the encoder's mode selection.
113 * OPUS_AUTO - (default) Encoder detects the type automatically.
114 * OPUS_SIGNAL_VOICE - Bias thresholds towards choosing LPC or Hybrid modes.
115 * OPUS_SIGNAL_MUSIC - Bias thresholds towards choosing MDCT modes.
117 #define CONV_OPUS_SIGNAL OPUS_SIGNAL_VOICE
123 * OPUS_APPLICATION_VOIP - gives best quality at a given bitrate for voice
124 * signals. It enhances the input signal by high-pass filtering and
125 * emphasizing formants and harmonics. Optionally it includes in-band forward
126 * error correction to protect against packet loss. Use this mode for typical
127 * VoIP applications. Because of the enhancement, even at high bitrates
128 * the output may sound different from the input.
129 * OPUS_APPLICATION_AUDIO - gives best quality at a given bitrate for most
130 * non-voice signals like music. Use this mode for music and mixed
131 * (music/voice) content, broadcast, and applications requiring less than
132 * 15 ms of coding delay.
133 * OPUS_APPLICATION_RESTRICTED_LOWDELAY - configures low-delay mode that
134 * disables the speech-optimized mode in exchange for slightly reduced delay.
135 * This mode can only be set on an newly initialized or freshly reset encoder
136 * because it changes the codec delay.
138 #define CONV_OPUS_APP_TYPE OPUS_APPLICATION_VOIP
141 * Specification for recording. May change in the future to spec negotiation.
143 static pa_sample_spec sample_spec = {
144 .format = PA_SAMPLE_FLOAT32LE,
145 .rate = SAMPLING_RATE,
149 GNUNET_NETWORK_STRUCT_BEGIN
151 /* OggOpus spec says the numbers must be in little-endian order */
152 struct OpusHeadPacket
157 uint16_t preskip GNUNET_PACKED;
158 uint32_t sampling_rate GNUNET_PACKED;
159 uint16_t gain GNUNET_PACKED;
160 uint8_t channel_mapping;
163 struct OpusCommentsPacket
166 uint32_t vendor_length;
168 char vendor[vendor_length];
169 uint32_t string_count;
170 followed by @a string_count pairs of:
171 uint32_t string_length;
172 char string[string_length];
176 GNUNET_NETWORK_STRUCT_END
179 * Pulseaudio mainloop api
181 static pa_mainloop_api *mainloop_api;
184 * Pulseaudio mainloop
186 static pa_mainloop *m;
191 static pa_context *context;
194 * Pulseaudio recording stream
196 static pa_stream *stream_in;
199 * Pulseaudio io events
201 static pa_io_event *stdio_event;
206 static OpusEncoder *enc;
209 * Buffer for encoded data
211 static unsigned char *opus_data;
214 * PCM data buffer for one OPUS frame
216 static float *pcm_buffer;
219 * Length of the pcm data needed for one OPUS frame
221 static int pcm_length;
226 static char *transmit_buffer;
229 * Length of audio buffer
231 static size_t transmit_buffer_length;
234 * Read index for transmit buffer
236 static size_t transmit_buffer_index;
239 * Audio message skeleton
241 static struct AudioMessage *audio_message;
246 static ogg_stream_state os;
251 static int32_t packet_id;
254 * Ogg granule for current packet
256 static int64_t enc_granulepos;
258 #ifdef DEBUG_RECORD_PURE_OGG
260 * 1 to not to write GNUnet message headers,
261 * producing pure playable ogg output
263 static int dump_pure_ogg;
267 * Pulseaudio shutdown task
272 mainloop_api->quit (mainloop_api,
279 write_data (const char *ptr,
286 while (off < msg_size)
288 ret = write (STDOUT_FILENO,
294 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR,
304 write_page (ogg_page *og)
306 static unsigned long long toff;
309 msg_size = sizeof(struct AudioMessage) + og->header_len + og->body_len;
310 audio_message->header.size = htons ((uint16_t) msg_size);
311 GNUNET_memcpy (&audio_message[1], og->header, og->header_len);
312 GNUNET_memcpy (((char *) &audio_message[1]) + og->header_len, og->body,
316 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
317 "Sending %u bytes of audio data (total: %llu)\n",
318 (unsigned int) msg_size,
320 #ifdef DEBUG_RECORD_PURE_OGG
322 write_data ((const char *) &audio_message[1],
323 og->header_len + og->body_len);
326 write_data ((const char *) audio_message,
332 * Creates OPUS packets from PCM data
343 while (transmit_buffer_length >= transmit_buffer_index + pcm_length)
345 GNUNET_memcpy (pcm_buffer,
346 &transmit_buffer[transmit_buffer_index],
348 transmit_buffer_index += pcm_length;
350 opus_encode_float (enc, pcm_buffer, FRAME_SIZE, opus_data,
355 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
356 _ ("opus_encode_float() failed: %s. Aborting\n"),
357 opus_strerror (len));
360 if (((uint32_t) len) > UINT16_MAX - sizeof(struct AudioMessage))
366 /* As per OggOpus spec, granule is calculated as if the audio
367 had 48kHz sampling rate. */
368 enc_granulepos += FRAME_SIZE * 48000 / SAMPLING_RATE;
370 op.packet = (unsigned char *) opus_data;
374 op.granulepos = enc_granulepos;
375 op.packetno = packet_id++;
376 ogg_stream_packetin (&os, &op);
378 while (ogg_stream_flush_fill (&os, &og, PAGE_WATERLINE))
380 if (((unsigned long long) og.header_len)
381 + ((unsigned long long) og.body_len) >
382 UINT16_MAX - sizeof(struct AudioMessage))
391 new_size = transmit_buffer_length - transmit_buffer_index;
394 nbuf = pa_xmalloc (new_size);
396 &transmit_buffer[transmit_buffer_index],
398 pa_xfree (transmit_buffer);
399 transmit_buffer = nbuf;
403 pa_xfree (transmit_buffer);
404 transmit_buffer = NULL;
406 transmit_buffer_index = 0;
407 transmit_buffer_length = new_size;
412 * Pulseaudio callback when new data is available.
415 stream_read_callback (pa_stream *s,
422 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
423 "Got %u/%d bytes of PCM data\n",
424 (unsigned int) length,
427 GNUNET_assert (NULL != s);
428 GNUNET_assert (length > 0);
430 mainloop_api->io_enable (stdio_event, PA_IO_EVENT_OUTPUT);
432 if (pa_stream_peek (s, (const void **) &data, &length) < 0)
434 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
435 _ ("pa_stream_peek() failed: %s\n"),
436 pa_strerror (pa_context_errno (context)));
440 GNUNET_assert (NULL != data);
441 GNUNET_assert (length > 0);
442 if (NULL != transmit_buffer)
444 transmit_buffer = pa_xrealloc (transmit_buffer,
445 transmit_buffer_length + length);
446 GNUNET_memcpy (&transmit_buffer[transmit_buffer_length],
449 transmit_buffer_length += length;
453 transmit_buffer = pa_xmalloc (length);
454 GNUNET_memcpy (transmit_buffer, data, length);
455 transmit_buffer_length = length;
456 transmit_buffer_index = 0;
464 * Exit callback for SIGTERM and SIGINT
467 exit_signal_callback (pa_mainloop_api *m,
476 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
477 _ ("Got signal, exiting.\n"));
483 * Pulseaudio stream state callback
486 stream_state_callback (pa_stream *s,
490 GNUNET_assert (NULL != s);
491 switch (pa_stream_get_state (s))
493 case PA_STREAM_CREATING:
494 case PA_STREAM_TERMINATED:
497 case PA_STREAM_READY:
499 const pa_buffer_attr *a;
500 char cmt[PA_CHANNEL_MAP_SNPRINT_MAX];
501 char sst[PA_SAMPLE_SPEC_SNPRINT_MAX];
503 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
504 _ ("Stream successfully created.\n"));
506 if (! (a = pa_stream_get_buffer_attr (s)))
508 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
509 _ ("pa_stream_get_buffer_attr() failed: %s\n"),
510 pa_strerror (pa_context_errno
511 (pa_stream_get_context (s))));
515 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
516 _ ("Buffer metrics: maxlength=%u, fragsize=%u\n"),
517 a->maxlength, a->fragsize);
519 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
520 _ ("Using sample spec '%s', channel map '%s'.\n"),
521 pa_sample_spec_snprint (sst, sizeof(sst),
522 pa_stream_get_sample_spec (s)),
523 pa_channel_map_snprint (cmt, sizeof(cmt),
524 pa_stream_get_channel_map (s)));
526 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
527 _ ("Connected to device %s (%u, %ssuspended).\n"),
528 pa_stream_get_device_name (s),
529 pa_stream_get_device_index (s),
530 pa_stream_is_suspended (s) ? "" : "not ");
534 case PA_STREAM_FAILED:
536 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
537 _ ("Stream error: %s\n"),
538 pa_strerror (pa_context_errno (pa_stream_get_context (s))));
545 * Pulseaudio context state callback
548 context_state_callback (pa_context *c,
554 switch (pa_context_get_state (c))
556 case PA_CONTEXT_CONNECTING:
557 case PA_CONTEXT_AUTHORIZING:
558 case PA_CONTEXT_SETTING_NAME:
561 case PA_CONTEXT_READY:
566 GNUNET_assert (! stream_in);
567 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
568 _ ("Connection established.\n"));
570 pa_stream_new (c, "GNUNET_VoIP recorder", &sample_spec, NULL)))
572 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
573 _ ("pa_stream_new() failed: %s\n"),
574 pa_strerror (pa_context_errno (c)));
577 pa_stream_set_state_callback (stream_in, &stream_state_callback, NULL);
578 pa_stream_set_read_callback (stream_in, &stream_read_callback, NULL);
579 memset (&na, 0, sizeof(na));
580 na.maxlength = UINT32_MAX;
581 na.fragsize = pcm_length;
582 if ((r = pa_stream_connect_record (stream_in, NULL, &na,
583 PA_STREAM_ADJUST_LATENCY)) < 0)
585 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
586 _ ("pa_stream_connect_record() failed: %s\n"),
587 pa_strerror (pa_context_errno (c)));
594 case PA_CONTEXT_TERMINATED:
598 case PA_CONTEXT_FAILED:
600 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
601 _ ("Connection failure: %s\n"),
602 pa_strerror (pa_context_errno (c)));
621 if (! pa_sample_spec_valid (&sample_spec))
623 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
626 /* set up main record loop */
627 if (! (m = pa_mainloop_new ()))
629 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
630 _ ("pa_mainloop_new() failed.\n"));
632 mainloop_api = pa_mainloop_get_api (m);
634 /* listen to signals */
635 r = pa_signal_init (mainloop_api);
636 GNUNET_assert (r == 0);
637 pa_signal_new (SIGINT, &exit_signal_callback, NULL);
638 pa_signal_new (SIGTERM, &exit_signal_callback, NULL);
640 /* connect to the main pulseaudio context */
642 if (! (context = pa_context_new (mainloop_api, "GNUNET VoIP")))
644 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
645 _ ("pa_context_new() failed.\n"));
647 pa_context_set_state_callback (context, &context_state_callback, NULL);
648 if (pa_context_connect (context, NULL, 0, NULL) < 0)
650 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
651 _ ("pa_context_connect() failed: %s\n"),
652 pa_strerror (pa_context_errno (context)));
654 if (pa_mainloop_run (m, &i) < 0)
656 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
657 _ ("pa_mainloop_run() failed.\n"));
670 pcm_length = FRAME_SIZE * CHANNELS * sizeof(float);
671 pcm_buffer = pa_xmalloc (pcm_length);
672 opus_data = GNUNET_malloc (MAX_PAYLOAD_BYTES);
673 enc = opus_encoder_create (SAMPLING_RATE,
677 opus_encoder_ctl (enc,
678 OPUS_SET_PACKET_LOSS_PERC (
679 CONV_OPUS_PACKET_LOSS_PERCENTAGE));
680 opus_encoder_ctl (enc,
681 OPUS_SET_COMPLEXITY (CONV_OPUS_ENCODING_COMPLEXITY));
682 opus_encoder_ctl (enc,
683 OPUS_SET_INBAND_FEC (CONV_OPUS_INBAND_FEC));
684 opus_encoder_ctl (enc,
685 OPUS_SET_SIGNAL (CONV_OPUS_SIGNAL));
693 struct OpusHeadPacket headpacket;
694 struct OpusCommentsPacket *commentspacket;
695 size_t commentspacket_len;
697 serialno = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_STRONG,
699 /*Initialize Ogg stream struct*/
700 if (-1 == ogg_stream_init (&os, serialno))
702 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
703 _ ("ogg_stream_init() failed.\n"));
716 GNUNET_memcpy (headpacket.magic, "OpusHead", 8);
717 headpacket.version = 1;
718 headpacket.channels = CHANNELS;
719 headpacket.preskip = GNUNET_htole16 (0);
720 headpacket.sampling_rate = GNUNET_htole32 (SAMPLING_RATE);
721 headpacket.gain = GNUNET_htole16 (0);
722 headpacket.channel_mapping = 0; /* Mono or stereo */
724 op.packet = (unsigned char *) &headpacket;
725 op.bytes = sizeof(headpacket);
729 op.packetno = packet_id++;
730 ogg_stream_packetin (&os, &op);
732 /* Head packet must be alone on its page */
733 while (ogg_stream_flush (&os, &og))
738 commentspacket_len = sizeof(*commentspacket);
739 opusver = opus_get_version_string ();
740 vendor_length = strlen (opusver);
741 commentspacket_len += vendor_length;
742 commentspacket_len += sizeof(uint32_t);
744 commentspacket = (struct OpusCommentsPacket *) malloc (commentspacket_len);
745 if (NULL == commentspacket)
747 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
748 _ ("Failed to allocate %u bytes for second packet\n"),
749 (unsigned int) commentspacket_len);
753 GNUNET_memcpy (commentspacket->magic, "OpusTags", 8);
754 commentspacket->vendor_length = GNUNET_htole32 (vendor_length);
755 GNUNET_memcpy (&commentspacket[1], opusver, vendor_length);
756 *(uint32_t *) &((char *) &commentspacket[1])[vendor_length] = \
757 GNUNET_htole32 (0); /* no tags */
759 op.packet = (unsigned char *) commentspacket;
760 op.bytes = commentspacket_len;
764 op.packetno = packet_id++;
765 ogg_stream_packetin (&os, &op);
767 /* Comment packets must not be mixed with audio packets on their pages */
768 while (ogg_stream_flush (&os, &og))
773 free (commentspacket);
778 * The main function for the record helper.
780 * @param argc number of arguments from the command line
781 * @param argv command line arguments
782 * @return 0 ok, 1 on error
790 GNUNET_assert (GNUNET_OK ==
791 GNUNET_log_setup ("gnunet-helper-audio-record",
794 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
795 "Audio source starts\n");
796 audio_message = GNUNET_malloc (UINT16_MAX);
797 audio_message->header.type = htons (GNUNET_MESSAGE_TYPE_CONVERSATION_AUDIO);
799 #ifdef DEBUG_RECORD_PURE_OGG
800 dump_pure_ogg = getenv ("GNUNET_RECORD_PURE_OGG") ? 1 : 0;