2 This file is part of GNUnet.
3 Copyright (C) 2013 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 * @file conversation/gnunet-helper-audio-record.c
20 * @brief program to record audio data from the microphone
21 * @author Siomon Dieterle
22 * @author Andreas Fuchs
23 * @author Christian Grothoff
26 #include "gnunet_util_lib.h"
27 #include "gnunet_protocols.h"
28 #include "conversation.h"
29 #include "gnunet_constants.h"
30 #include "gnunet_core_service.h"
32 #include <pulse/simple.h>
33 #include <pulse/error.h>
34 #include <pulse/rtclock.h>
36 #include <pulse/pulseaudio.h>
37 #include <opus/opus.h>
38 #include <opus/opus_types.h>
41 #define DEBUG_RECORD_PURE_OGG 1
46 #define SAMPLING_RATE 48000
49 * How many ms of audio to buffer before encoding them.
51 * 60, 40, 20, 10, 5, 2.5
53 #define FRAME_SIZE_MS 40
56 * How many samples to buffer before encoding them.
58 #define FRAME_SIZE (SAMPLING_RATE / 1000 * FRAME_SIZE_MS)
61 * Pages are commited when their size goes over this value.
62 * Note that in practice we flush pages VERY often (every frame),
63 * which means that pages NEVER really get to be this big.
64 * With one-packet-per-page, pages are roughly 100-300 bytes each.
66 * This value is chosen to make MAX_PAYLOAD_BYTES=1024 fit
69 #define PAGE_WATERLINE 800
72 * Maximum length of opus payload
74 #define MAX_PAYLOAD_BYTES 1024
82 * Configures the encoder's expected packet loss percentage.
84 * Higher values will trigger progressively more loss resistant behavior
85 * in the encoder at the expense of quality at a given bitrate
86 * in the lossless case, but greater quality under loss.
88 #define CONV_OPUS_PACKET_LOSS_PERCENTAGE 1
91 * Configures the encoder's computational complexity.
93 * The supported range is 0-10 inclusive with 10 representing
94 * the highest complexity.
96 #define CONV_OPUS_ENCODING_COMPLEXITY 10
99 * Configures the encoder's use of inband forward error correction (FEC).
101 * Note: This is only applicable to the LPC layer.
103 #define CONV_OPUS_INBAND_FEC 1
106 * Configures the type of signal being encoded.
108 * This is a hint which helps the encoder's mode selection.
111 * OPUS_AUTO - (default) Encoder detects the type automatically.
112 * OPUS_SIGNAL_VOICE - Bias thresholds towards choosing LPC or Hybrid modes.
113 * OPUS_SIGNAL_MUSIC - Bias thresholds towards choosing MDCT modes.
115 #define CONV_OPUS_SIGNAL OPUS_SIGNAL_VOICE
121 * OPUS_APPLICATION_VOIP - gives best quality at a given bitrate for voice
122 * signals. It enhances the input signal by high-pass filtering and
123 * emphasizing formants and harmonics. Optionally it includes in-band forward
124 * error correction to protect against packet loss. Use this mode for typical
125 * VoIP applications. Because of the enhancement, even at high bitrates
126 * the output may sound different from the input.
127 * OPUS_APPLICATION_AUDIO - gives best quality at a given bitrate for most
128 * non-voice signals like music. Use this mode for music and mixed
129 * (music/voice) content, broadcast, and applications requiring less than
130 * 15 ms of coding delay.
131 * OPUS_APPLICATION_RESTRICTED_LOWDELAY - configures low-delay mode that
132 * disables the speech-optimized mode in exchange for slightly reduced delay.
133 * This mode can only be set on an newly initialized or freshly reset encoder
134 * because it changes the codec delay.
136 #define CONV_OPUS_APP_TYPE OPUS_APPLICATION_VOIP
139 * Specification for recording. May change in the future to spec negotiation.
141 static pa_sample_spec sample_spec = {
142 .format = PA_SAMPLE_FLOAT32LE,
143 .rate = SAMPLING_RATE,
147 GNUNET_NETWORK_STRUCT_BEGIN
149 /* OggOpus spec says the numbers must be in little-endian order */
150 struct OpusHeadPacket
155 uint16_t preskip GNUNET_PACKED;
156 uint32_t sampling_rate GNUNET_PACKED;
157 uint16_t gain GNUNET_PACKED;
158 uint8_t channel_mapping;
161 struct OpusCommentsPacket
164 uint32_t vendor_length;
166 char vendor[vendor_length];
167 uint32_t string_count;
168 followed by @a string_count pairs of:
169 uint32_t string_length;
170 char string[string_length];
174 GNUNET_NETWORK_STRUCT_END
177 * Pulseaudio mainloop api
179 static pa_mainloop_api *mainloop_api;
182 * Pulseaudio mainloop
184 static pa_mainloop *m;
189 static pa_context *context;
192 * Pulseaudio recording stream
194 static pa_stream *stream_in;
197 * Pulseaudio io events
199 static pa_io_event *stdio_event;
204 static OpusEncoder *enc;
207 * Buffer for encoded data
209 static unsigned char *opus_data;
212 * PCM data buffer for one OPUS frame
214 static float *pcm_buffer;
217 * Length of the pcm data needed for one OPUS frame
219 static int pcm_length;
224 static char *transmit_buffer;
227 * Length of audio buffer
229 static size_t transmit_buffer_length;
232 * Read index for transmit buffer
234 static size_t transmit_buffer_index;
237 * Audio message skeleton
239 static struct AudioMessage *audio_message;
244 static ogg_stream_state os;
249 static int32_t packet_id;
252 * Ogg granule for current packet
254 static int64_t enc_granulepos;
256 #ifdef DEBUG_RECORD_PURE_OGG
258 * 1 to not to write GNUnet message headers,
259 * producing pure playable ogg output
261 static int dump_pure_ogg;
265 * Pulseaudio shutdown task
270 mainloop_api->quit (mainloop_api,
277 write_data (const char *ptr,
283 while (off < msg_size)
285 ret = write (STDOUT_FILENO,
291 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR,
301 write_page (ogg_page *og)
303 static unsigned long long toff;
305 msg_size = sizeof (struct AudioMessage) + og->header_len + og->body_len;
306 audio_message->header.size = htons ((uint16_t) msg_size);
307 GNUNET_memcpy (&audio_message[1], og->header, og->header_len);
308 GNUNET_memcpy (((char *) &audio_message[1]) + og->header_len, og->body, og->body_len);
311 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
312 "Sending %u bytes of audio data (total: %llu)\n",
313 (unsigned int) msg_size,
315 #ifdef DEBUG_RECORD_PURE_OGG
317 write_data ((const char *) &audio_message[1],
318 og->header_len + og->body_len);
321 write_data ((const char *) audio_message,
327 * Creates OPUS packets from PCM data
338 while (transmit_buffer_length >= transmit_buffer_index + pcm_length)
340 GNUNET_memcpy (pcm_buffer,
341 &transmit_buffer[transmit_buffer_index],
343 transmit_buffer_index += pcm_length;
345 opus_encode_float (enc, pcm_buffer, FRAME_SIZE, opus_data,
350 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
351 _("opus_encode_float() failed: %s. Aborting\n"),
352 opus_strerror (len));
355 if (((uint32_t)len) > UINT16_MAX - sizeof (struct AudioMessage))
361 /* As per OggOpus spec, granule is calculated as if the audio
362 had 48kHz sampling rate. */
363 enc_granulepos += FRAME_SIZE * 48000 / SAMPLING_RATE;
365 op.packet = (unsigned char *) opus_data;
369 op.granulepos = enc_granulepos;
370 op.packetno = packet_id++;
371 ogg_stream_packetin (&os, &op);
373 while (ogg_stream_flush_fill (&os, &og, PAGE_WATERLINE))
375 if ( ((unsigned long long) og.header_len) +
376 ((unsigned long long) og.body_len) >
377 UINT16_MAX - sizeof (struct AudioMessage))
386 new_size = transmit_buffer_length - transmit_buffer_index;
389 nbuf = pa_xmalloc (new_size);
391 &transmit_buffer[transmit_buffer_index],
393 pa_xfree (transmit_buffer);
394 transmit_buffer = nbuf;
398 pa_xfree (transmit_buffer);
399 transmit_buffer = NULL;
401 transmit_buffer_index = 0;
402 transmit_buffer_length = new_size;
407 * Pulseaudio callback when new data is available.
410 stream_read_callback (pa_stream * s,
417 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
418 "Got %u/%d bytes of PCM data\n",
419 (unsigned int) length,
422 GNUNET_assert (NULL != s);
423 GNUNET_assert (length > 0);
425 mainloop_api->io_enable (stdio_event, PA_IO_EVENT_OUTPUT);
427 if (pa_stream_peek (s, (const void **) &data, &length) < 0)
429 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
430 _("pa_stream_peek() failed: %s\n"),
431 pa_strerror (pa_context_errno (context)));
435 GNUNET_assert (NULL != data);
436 GNUNET_assert (length > 0);
437 if (NULL != transmit_buffer)
439 transmit_buffer = pa_xrealloc (transmit_buffer,
440 transmit_buffer_length + length);
441 GNUNET_memcpy (&transmit_buffer[transmit_buffer_length],
444 transmit_buffer_length += length;
448 transmit_buffer = pa_xmalloc (length);
449 GNUNET_memcpy (transmit_buffer, data, length);
450 transmit_buffer_length = length;
451 transmit_buffer_index = 0;
459 * Exit callback for SIGTERM and SIGINT
462 exit_signal_callback (pa_mainloop_api * m,
471 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
472 _("Got signal, exiting.\n"));
478 * Pulseaudio stream state callback
481 stream_state_callback (pa_stream * s,
485 GNUNET_assert (NULL != s);
486 switch (pa_stream_get_state (s))
488 case PA_STREAM_CREATING:
489 case PA_STREAM_TERMINATED:
491 case PA_STREAM_READY:
493 const pa_buffer_attr *a;
494 char cmt[PA_CHANNEL_MAP_SNPRINT_MAX];
495 char sst[PA_SAMPLE_SPEC_SNPRINT_MAX];
497 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
498 _("Stream successfully created.\n"));
500 if (!(a = pa_stream_get_buffer_attr (s)))
502 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
503 _("pa_stream_get_buffer_attr() failed: %s\n"),
504 pa_strerror (pa_context_errno
505 (pa_stream_get_context (s))));
510 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
511 _("Buffer metrics: maxlength=%u, fragsize=%u\n"),
512 a->maxlength, a->fragsize);
514 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
515 _("Using sample spec '%s', channel map '%s'.\n"),
516 pa_sample_spec_snprint (sst, sizeof (sst),
517 pa_stream_get_sample_spec (s)),
518 pa_channel_map_snprint (cmt, sizeof (cmt),
519 pa_stream_get_channel_map (s)));
521 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
522 _("Connected to device %s (%u, %ssuspended).\n"),
523 pa_stream_get_device_name (s),
524 pa_stream_get_device_index (s),
525 pa_stream_is_suspended (s) ? "" : "not ");
528 case PA_STREAM_FAILED:
530 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
531 _("Stream error: %s\n"),
532 pa_strerror (pa_context_errno (pa_stream_get_context (s))));
539 * Pulseaudio context state callback
542 context_state_callback (pa_context * c,
548 switch (pa_context_get_state (c))
550 case PA_CONTEXT_CONNECTING:
551 case PA_CONTEXT_AUTHORIZING:
552 case PA_CONTEXT_SETTING_NAME:
554 case PA_CONTEXT_READY:
559 GNUNET_assert (!stream_in);
560 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
561 _("Connection established.\n"));
563 pa_stream_new (c, "GNUNET_VoIP recorder", &sample_spec, NULL)))
565 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
566 _("pa_stream_new() failed: %s\n"),
567 pa_strerror (pa_context_errno (c)));
570 pa_stream_set_state_callback (stream_in, &stream_state_callback, NULL);
571 pa_stream_set_read_callback (stream_in, &stream_read_callback, NULL);
572 memset (&na, 0, sizeof (na));
573 na.maxlength = UINT32_MAX;
574 na.fragsize = pcm_length;
575 if ((r = pa_stream_connect_record (stream_in, NULL, &na,
576 PA_STREAM_ADJUST_LATENCY)) < 0)
578 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
579 _("pa_stream_connect_record() failed: %s\n"),
580 pa_strerror (pa_context_errno (c)));
586 case PA_CONTEXT_TERMINATED:
589 case PA_CONTEXT_FAILED:
591 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
592 _("Connection failure: %s\n"),
593 pa_strerror (pa_context_errno (c)));
612 if (!pa_sample_spec_valid (&sample_spec))
614 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
617 /* set up main record loop */
618 if (!(m = pa_mainloop_new ()))
620 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
621 _("pa_mainloop_new() failed.\n"));
623 mainloop_api = pa_mainloop_get_api (m);
625 /* listen to signals */
626 r = pa_signal_init (mainloop_api);
627 GNUNET_assert (r == 0);
628 pa_signal_new (SIGINT, &exit_signal_callback, NULL);
629 pa_signal_new (SIGTERM, &exit_signal_callback, NULL);
631 /* connect to the main pulseaudio context */
633 if (!(context = pa_context_new (mainloop_api, "GNUNET VoIP")))
635 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
636 _("pa_context_new() failed.\n"));
638 pa_context_set_state_callback (context, &context_state_callback, NULL);
639 if (pa_context_connect (context, NULL, 0, NULL) < 0)
641 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
642 _("pa_context_connect() failed: %s\n"),
643 pa_strerror (pa_context_errno (context)));
645 if (pa_mainloop_run (m, &i) < 0)
647 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
648 _("pa_mainloop_run() failed.\n"));
661 pcm_length = FRAME_SIZE * CHANNELS * sizeof (float);
662 pcm_buffer = pa_xmalloc (pcm_length);
663 opus_data = GNUNET_malloc (MAX_PAYLOAD_BYTES);
664 enc = opus_encoder_create (SAMPLING_RATE,
668 opus_encoder_ctl (enc,
669 OPUS_SET_PACKET_LOSS_PERC (CONV_OPUS_PACKET_LOSS_PERCENTAGE));
670 opus_encoder_ctl (enc,
671 OPUS_SET_COMPLEXITY (CONV_OPUS_ENCODING_COMPLEXITY));
672 opus_encoder_ctl (enc,
673 OPUS_SET_INBAND_FEC (CONV_OPUS_INBAND_FEC));
674 opus_encoder_ctl (enc,
675 OPUS_SET_SIGNAL (CONV_OPUS_SIGNAL));
683 struct OpusHeadPacket headpacket;
684 struct OpusCommentsPacket *commentspacket;
685 size_t commentspacket_len;
687 serialno = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_STRONG,
689 /*Initialize Ogg stream struct*/
690 if (-1 == ogg_stream_init (&os, serialno))
692 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
693 _("ogg_stream_init() failed.\n"));
706 GNUNET_memcpy (headpacket.magic, "OpusHead", 8);
707 headpacket.version = 1;
708 headpacket.channels = CHANNELS;
709 headpacket.preskip = GNUNET_htole16 (0);
710 headpacket.sampling_rate = GNUNET_htole32 (SAMPLING_RATE);
711 headpacket.gain = GNUNET_htole16 (0);
712 headpacket.channel_mapping = 0; /* Mono or stereo */
714 op.packet = (unsigned char *) &headpacket;
715 op.bytes = sizeof (headpacket);
719 op.packetno = packet_id++;
720 ogg_stream_packetin (&os, &op);
722 /* Head packet must be alone on its page */
723 while (ogg_stream_flush (&os, &og))
728 commentspacket_len = sizeof (*commentspacket);
729 opusver = opus_get_version_string ();
730 vendor_length = strlen (opusver);
731 commentspacket_len += vendor_length;
732 commentspacket_len += sizeof (uint32_t);
734 commentspacket = (struct OpusCommentsPacket *) malloc (commentspacket_len);
735 if (NULL == commentspacket)
737 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
738 _("Failed to allocate %u bytes for second packet\n"),
739 (unsigned int) commentspacket_len);
743 GNUNET_memcpy (commentspacket->magic, "OpusTags", 8);
744 commentspacket->vendor_length = GNUNET_htole32 (vendor_length);
745 GNUNET_memcpy (&commentspacket[1], opusver, vendor_length);
746 *(uint32_t *) &((char *) &commentspacket[1])[vendor_length] = \
747 GNUNET_htole32 (0); /* no tags */
749 op.packet = (unsigned char *) commentspacket;
750 op.bytes = commentspacket_len;
754 op.packetno = packet_id++;
755 ogg_stream_packetin (&os, &op);
757 /* Comment packets must not be mixed with audio packets on their pages */
758 while (ogg_stream_flush (&os, &og))
763 free (commentspacket);
768 * The main function for the record helper.
770 * @param argc number of arguments from the command line
771 * @param argv command line arguments
772 * @return 0 ok, 1 on error
780 GNUNET_assert (GNUNET_OK ==
781 GNUNET_log_setup ("gnunet-helper-audio-record",
784 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
785 "Audio source starts\n");
786 audio_message = GNUNET_malloc (UINT16_MAX);
787 audio_message->header.type = htons (GNUNET_MESSAGE_TYPE_CONVERSATION_AUDIO);
789 #ifdef DEBUG_RECORD_PURE_OGG
790 dump_pure_ogg = getenv ("GNUNET_RECORD_PURE_OGG") ? 1 : 0;