SqMod/vendor/DPP/include/dpp/discordvoiceclient.h

/************************************************************************************
 *
 * D++, A Lightweight C++ library for Discord
 *
 * Copyright 2021 Craig Edwards and D++ contributors
 * (https://github.com/brainboxdotcc/DPP/graphs/contributors)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ************************************************************************************/
#pragma once

#include <dpp/export.h>

#include <errno.h>

#ifdef _WIN32
#include <WinSock2.h>
#include <WS2tcpip.h>
#include <io.h>
#else
#include <resolv.h>
#include <netdb.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <unistd.h>
#endif

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <fcntl.h>
#include <signal.h>
#include <string.h>
#include <string>
#include <map>
#include <vector>
#include <dpp/json_fwd.hpp>
#include <dpp/wsclient.h>
#include <dpp/dispatcher.h>
#include <dpp/cluster.h>
#include <queue>
#include <thread>
#include <deque>
#include <mutex>

#ifdef HAVE_VOICE
#include <sodium.h>
#include <opus/opus.h>
#endif


using json = nlohmann::json;

namespace dpp {

// Forward declaration
class cluster;

#define AUDIO_TRACK_MARKER (uint16_t)0xFFFF

/** @brief Implements a discord voice connection.
 * Each discord_voice_client connects to one voice channel and derives from a websocket client.
 */
class CoreExport discord_voice_client : public websocket_client
{
	/** Mutex for outbound packet stream */
	std::mutex stream_mutex;

	/** Mutex for message queue */
	std::mutex queue_mutex;

	/** Queue of outbound messages */
	std::deque<std::string> message_queue;

	/** Thread this connection is executing on */
	std::thread* runner;

	/** Run shard loop under a thread */
	void ThreadRun();

	/** Last connect time of voice session */
	time_t connect_time;

	/**
	 * @brief IP of UDP/RTP endpoint
	 */
	std::string ip;

	/**
	 * @brief Port number of UDP/RTP endpoint
	 */
	uint16_t port;

	/**
	 * @brief SSRC value
	 */
	uint64_t ssrc;

	/**
	 * @brief List of supported audio encoding modes
	 */
	std::vector<std::string> modes;

	/** Output buffer. Each string is a UDP packet.
	 * Generally these will be RTP.
	 */
	std::vector<std::string> outbuf;

	/** Input  buffer. Each string is a received UDP
	 * packet. These will usually be RTP.
	 */
	std::vector<std::string> inbuf;

	/** If true, audio packet sending is paused
	 */
	bool paused;

#ifdef HAVE_VOICE
	/** libopus encoder
	 */
	OpusEncoder* encoder;

	/** libopus decoder
	 */
	OpusDecoder* decoder;

	/** libopus repacketizer
	 * (merges frames into one packet)
	 */
	OpusRepacketizer* repacketizer;
#endif

	/** File descriptor for UDP connection
	 */
	SOCKET fd;

	/** Socket address of voice server
	 */
	struct sockaddr_in servaddr;

	/** Secret key for encrypting voice.
	 * If it has been sent, this is non-null and points to a
	 * sequence of exactly 32 bytes.
	 */
	uint8_t* secret_key;

	/** Sequence number of outbound audio. This is incremented
	 * once per frame sent.
	 */
	uint16_t sequence;

	/** Timestamp value used in outbound audio. Each packet
	 * has the timestamp value which is incremented to match
	 * how many frames are sent.
	 */
	uint32_t timestamp;

	/** This is set to true if we have started sending audio.
	 * When this moves from false to true, this causes the
	 * client to send the 'talking' notification to the websocket.
	 */
	bool sending;

	/** Number of track markers in the buffer. For example if there
	 * are two track markers in the buffer there are 3 tracks.
	 * Special case:
	 * If the buffer is empty, there are zero tracks in the
	 * buffer.
	 */
	uint32_t tracks;

	/** Meta data associated with each track.
	 * Arbitrary string that the user can set via
	 * dpp::discord_voice_client::AddMarker
	 */
	std::vector<std::string> track_meta;

	/** Encoding buffer for opus repacketizer and encode
	 */
	uint8_t encode_buffer[65536];

	/**
	 * @brief Send data to UDP socket immediately.
	 *
	 * @param data data to send
	 * @param length length of data to send
	 * @return int bytes sent. Will return -1 if we cannot send
	 */
	int UDPSend(const char* data, size_t length);

	/**
	 * @brief Receive data from UDP socket immediately.
	 *
	 * @param data data to receive
	 * @param max_length size of data receiving buffer
	 * @return int bytes received. -1 if there is an error
	 * (e.g. EAGAIN)
	 */
	int UDPRecv(char* data, size_t max_length);

	/**
	 * @brief This hooks the ssl_client, returning the file
	 * descriptor if we want to send buffered data, or
	 * -1 if there is nothing to send
	 *
	 * @return int file descriptor or -1
	 */
	int WantWrite();

	/**
	 * @brief This hooks the ssl_client, returning the file
	 * descriptor if we want to receive buffered data, or
	 * -1 if we are not wanting to receive
	 *
	 * @return int file descriptor or -1
	 */
	int WantRead();

	/**
	 * @brief Called by ssl_client when the socket is ready
	 * for writing, at this point we pick the head item off
	 * the buffer and send it. So long as it doesnt error
	 * completely, we pop it off the head of the queue.
	 */
	void WriteReady();

	/**
	 * @brief Called by ssl_client when there is data to be
	 * read. At this point we insert that data into the
	 * input queue.
	 */
	void ReadReady();

	/**
	 * @brief Send data to the UDP socket, using the buffer.
	 *
	 * @param packet packet data
	 * @param len length of packet
	 */
	void Send(const char* packet, size_t len);

	/**
	 * @brief Queue a message to be sent via the websocket
	 *
	 * @param j The JSON data of the message to be sent
	 * @param to_front If set to true, will place the message at the front of the queue not the back
	 * (this is for urgent messages such as heartbeat, presence, so they can take precedence over
	 * chunk requests etc)
	 */
	void QueueMessage(const std::string &j, bool to_front = false);

	/**
	 * @brief Clear the outbound message queue
	 *
	 */
	void ClearQueue();

	/**
	 * @brief Get the size of the outbound message queue
	 *
	 * @return The size of the queue
	 */
	size_t GetQueueSize();

	/**
	 * @brief Encode a byte buffer using opus codec.
	 * Multiple opus frames (2880 bytes each) will be encoded into one packet for sending.
	 *
	 * @param input Input data as raw bytes of PCM data
	 * @param inDataSize Input data length
	 * @param output Output data as an opus encoded packet
	 * @param outDataSize Output data length, should be at least equal to the input size.
	 * Will be adjusted on return to the actual compressed data size.
	 * @return size_t The compressed data size that was encoded.
	 */
	size_t encode(uint8_t *input, size_t inDataSize, uint8_t *output, size_t &outDataSize);

public:

	/** Owning cluster */
	class dpp::cluster* creator;

	/* This needs to be static, we only initialise libsodium once per program start,
	* so initialising it on first use in a voice connection is best.
	*/
	static bool sodium_initialised;

	/** True when the thread is shutting down */
	bool terminating;

	/** Heartbeat interval for sending heartbeat keepalive */
	uint32_t heartbeat_interval;

	/** Last heartbeat */
	time_t last_heartbeat;

	/** Thread ID */
	std::thread::native_handle_type thread_id;

	/** Discord voice session token */
	std::string token;

	/** Discord voice session id */
	std::string sessionid;

	/** Server ID */
	snowflake server_id;

	/** Channel ID */
	snowflake channel_id;

	/** Log a message to whatever log the user is using.
	 * The logged message is passed up the chain to the on_log event in user code which can then do whatever
	 * it wants to do with it.
	 * @param severity The log level from dpp::loglevel
	 * @param msg The log message to output
	 */
	virtual void log(dpp::loglevel severity, const std::string &msg);

	/** Fires every second from the underlying socket I/O loop, used for sending heartbeats */
	virtual void one_second_timer();

	/**
	 * @brief voice client is ready to stream audio.
	 * The voice client is considered ready if it has a secret key.
	 *
	 * @return true if ready to stream audio
	 */
	bool is_ready();

	/**
	 * @brief Returns true if the voice client is connected to the websocket
	 *
	 * @return True if connected
	 */
	bool is_connected();

	/**
	 * @brief Returns the connection time of the voice client
	 *
	 * @return dpp::utility::uptime Detail of how long the voice client has been connected for
	 */
	dpp::utility::uptime get_uptime();

	/** Constructor takes shard id, max shards and token.
	 * @param _cluster The owning cluster for this shard
	 * @param _server_id The server id to identify voice connection as
	 * @param _token The voice session token to use for identifying to the websocket
	 * @param _session_id The voice session id to identify with
	 * @param _host The voice server hostname to connect to (hostname:port format)
	 */
	discord_voice_client(dpp::cluster* _cluster, snowflake _channel_id, snowflake _server_id, const std::string &_token, const std::string &_session_id, const std::string &_host);

	/** Destructor */
	virtual ~discord_voice_client();

	/** Handle JSON from the websocket.
	 * @param buffer The entire buffer content from the websocket client
	 * @returns True if a frame has been handled
	 */
	virtual bool HandleFrame(const std::string &buffer);

	/** Handle a websocket error.
	 * @param errorcode The error returned from the websocket
	 */
	virtual void Error(uint32_t errorcode);

	/** Start and monitor I/O loop */
	void Run();

	/**
	 * @brief Send audio to the voice channel.
	 *
	 * You should send an audio packet of n11520 bytes.
	 * Note that this function can be costly as it has to opus encode
	 * the PCM audio on the fly, and also encrypt it with libsodium.
	 *
	 * @note Because this function encrypts and encodes packets before
	 * pushing them onto the output queue, if you have a complete stream
	 * ready to send and know its length it is advisable to call this
	 * method multiple times to enqueue the entire stream audio so that
	 * it is all encoded at once. Constantly calling this from the
	 * dpp::on_voice_buffer_send callback can and will eat a TON of cpu!
	 *
	 * @param audio_data Raw PCM audio data. Channels are interleaved,
	 * with each channel's amplitude being a 16 bit value.
	 * @param length The length of the audio data. The length should
	 * be a multiple of 4 (2x 16 bit stero channels) with a maximum
	 * length of 11520, which is a complete opus frame at highest
	 * quality.
	 * @param use_opus Some containers such as .ogg may contain OPUS
	 * encoded data already. In this case, we don't need to encode the
	 * frames using opus here. We can set use_opus to false and bypass the
	 * codec, only applying libsodium to the stream.
	 */
	void send_audio(uint16_t* audio_data, const size_t length, bool use_opus = true);

	/**
	 * @brief Pause sending of audio
	 *
	 * @param pause True to pause, false to resume
	 */
	void pause_audio(bool pause);

	/**
	 * @brief Immediately stop all audio.
	 * Clears the packet queue.
	 */
	void stop_audio();

	/**
	 * @brief Returns true if we are playing audio
	 *
	 * @return true if audio is playing
	 */
	bool is_playing();

	/**
	 * @brief Get the number of seconds remaining
	 * of the audio output buffer
	 *
	 * @return float number of seconds remaining
	 */
	float get_secs_remaining();

	/**
	 * @brief Get the number of tracks remaining
	 * in the output buffer.
	 * This is calculated by the number of track
	 * markers plus one.
	 * @return uint32_t Number of tracks in the
	 * buffer
	 */
	uint32_t get_tracks_remaining();

	/**
	 * @brief Get the time remaining to send the
	 * audio output buffer in hours:minutes:seconds
	 *
	 * @return dpp::utility::uptime length of buffer
	 */
	dpp::utility::uptime get_remaining();

	/**
	 * @brief Insert a track marker into the audio
	 * output buffer.
	 * A track marker is an arbitrary flag in the
	 * buffer contents that indictes the end of some
	 * block of audio of significance to the sender.
	 * This may be a song from a streaming site, or
	 * some voice audio/speech, a sound effect, or
	 * whatever you choose. You can later skip
	 * to the next marker using the
	 * dpp::discord_voice_client::skip_to_next_marker
	 * function.
	 * @param metadata Arbitrary information related to this
	 * track
	 */
	void insert_marker(const std::string& metadata = "");

	/**
	 * @brief Skip tp the next track marker,
	 * previously inserted by using the
	 * dpp::discord_voice_client::insert_marker
	 * function. If there are no markers in the
	 * output buffer, then this skips to the end
	 * of the buffer and is equivalent to the
	 * dpp::discord_voice_client::stop_audio
	 * function.
	 * @note It is possible to use this function
	 * while the output stream is paused.
	 */
	void skip_to_next_marker();

	/**
	 * @brief Get the metdata string associated with each inserted marker.
	 *
	 * @return const std::vector<std::string>& list of metadata strings
	 */
	const std::vector<std::string> get_marker_metadata();

	/**
	 * @brief Returns true if the audio is paused.
	 * You can unpause with
	 * dpp::discord_voice_client::pause_audio.
	 *
	 * @return true if paused
	 */
	bool is_paused();

	/**
	 * @brief Discord external IP detection.
	 * @return std::string Your external IP address
	 * @note This is a blocking operation that waits
	 * for a single packet from Discord's voice servers.
	 */
	std::string discover_ip();
};

};