From eed1131337036ffab8f86f5c469f52e15e458ce8 Mon Sep 17 00:00:00 2001 From: Luca Kellermann Date: Fri, 1 Nov 2024 22:19:40 +0100 Subject: [PATCH] Implement new voice encryption modes The new encryption modes are aead_aes256_gcm_rtpsize and aead_xchacha20_poly1305_rtpsize. XSalsa20 Poly1305 encryption is deprecated for Discord voice connections and will be discontinued as of November 18th, 2024. See https://discord.com/developers/docs/change-log#voice-encryption-modes --- gradle/libs.versions.toml | 2 + voice/api/voice.api | 46 ++++- voice/build.gradle.kts | 2 + .../kotlin/dev/kord/voice/EncryptionMode.kt | 40 +++- .../java/com/iwebpp/crypto/TweetNaclFast.java | 17 ++ voice/src/main/kotlin/EncryptionMode.kt | 40 +++- voice/src/main/kotlin/VoiceConnection.kt | 68 ++++++- .../src/main/kotlin/VoiceConnectionBuilder.kt | 25 ++- .../kotlin/encryption/AeadAes256GcmRtpSize.kt | 147 ++++++++++++++ .../AeadXChaCha20Poly1305RtpSize.kt | 132 ++++++++++++ .../kotlin/encryption/VoicePacketEncrypt.kt | 188 ++++++++++++++++++ .../encryption/XSalsa20Poly1305Codec.kt | 9 +- .../encryption/XSalsa20Poly1305Encryption.kt | 2 + .../strategies/LiteNonceStrategy.kt | 7 +- .../encryption/strategies/NonceStrategy.kt | 5 + .../strategies/NormalNonceStrategy.kt | 7 +- .../strategies/SuffixNonceStrategy.kt | 7 +- .../main/kotlin/handlers/StreamsHandler.kt | 8 +- .../kotlin/handlers/UdpLifeCycleHandler.kt | 31 ++- .../src/main/kotlin/streams/DefaultStreams.kt | 176 +++++++++++++++- voice/src/main/kotlin/streams/NOPStreams.kt | 17 +- voice/src/main/kotlin/streams/Streams.kt | 23 +++ voice/src/main/kotlin/udp/AudioFrameSender.kt | 31 ++- .../main/kotlin/udp/AudioPacketProvider.kt | 94 ++++++++- .../main/kotlin/udp/DecryptedVoicePacket.kt | 31 +++ .../kotlin/udp/DefaultAudioFrameSender.kt | 118 ++++++++++- voice/src/main/kotlin/udp/RTPPacket.kt | 3 + 27 files changed, 1210 insertions(+), 66 deletions(-) create mode 100644 voice/src/main/kotlin/encryption/AeadAes256GcmRtpSize.kt create mode 100644 voice/src/main/kotlin/encryption/AeadXChaCha20Poly1305RtpSize.kt create mode 100644 voice/src/main/kotlin/encryption/VoicePacketEncrypt.kt create mode 100644 voice/src/main/kotlin/udp/DecryptedVoicePacket.kt diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 1599989c8fa..0b6156a780b 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -11,6 +11,7 @@ kord-cache = "0.5.4" # https://github.com/kordlib/cache # implementation dependencies kotlin-logging = "7.0.0" # https://github.com/oshai/kotlin-logging slf4j = "2.0.16" # https://www.slf4j.org +tink = "1.15.0" # https://github.com/tink-crypto/tink-java kotlin-node = "22.5.4-pre.818" # https://github.com/JetBrains/kotlin-wrappers bignum = "0.3.10" # https://github.com/ionspin/kotlin-multiplatform-bignum stately = "2.1.0" # https://github.com/touchlab/Stately @@ -58,6 +59,7 @@ kotlinx-datetime = { module = "org.jetbrains.kotlinx:kotlinx-datetime", version. # other kotlin-logging = { module = "io.github.oshai:kotlin-logging", version.ref = "kotlin-logging" } slf4j-api = { module = "org.slf4j:slf4j-api", version.ref = "slf4j" } +tink = { module = "com.google.crypto.tink:tink", version.ref = "tink" } kotlin-node = { module = "org.jetbrains.kotlin-wrappers:kotlin-node", version.ref = "kotlin-node" } # JDK replacements diff --git a/voice/api/voice.api b/voice/api/voice.api index 513ead64fbb..9aa4367f194 100644 --- a/voice/api/voice.api +++ b/voice/api/voice.api @@ -204,6 +204,14 @@ public abstract class dev/kord/voice/EncryptionMode { public final fun toString ()Ljava/lang/String; } +public final class dev/kord/voice/EncryptionMode$AeadAes256GcmRtpSize : dev/kord/voice/EncryptionMode { + public static final field INSTANCE Ldev/kord/voice/EncryptionMode$AeadAes256GcmRtpSize; +} + +public final class dev/kord/voice/EncryptionMode$AeadXChaCha20Poly1305RtpSize : dev/kord/voice/EncryptionMode { + public static final field INSTANCE Ldev/kord/voice/EncryptionMode$AeadXChaCha20Poly1305RtpSize; +} + public final class dev/kord/voice/EncryptionMode$Companion { public final fun from (Ljava/lang/String;)Ldev/kord/voice/EncryptionMode; public final fun getEntries ()Ljava/util/List; @@ -317,6 +325,7 @@ public final class dev/kord/voice/SpeakingFlags$Companion { } public final class dev/kord/voice/VoiceConnection { + public synthetic fun (Ldev/kord/voice/VoiceConnectionData;Ldev/kord/gateway/Gateway;Ldev/kord/voice/gateway/VoiceGateway;Ldev/kord/voice/udp/VoiceUdpSocket;Ldev/kord/voice/gateway/VoiceGatewayConfiguration;Ldev/kord/voice/streams/Streams;Ldev/kord/voice/AudioProvider;Ldev/kord/voice/FrameInterceptor;Ldev/kord/voice/udp/AudioFrameSender;JLkotlin/jvm/internal/DefaultConstructorMarker;)V public synthetic fun (Ldev/kord/voice/VoiceConnectionData;Ldev/kord/gateway/Gateway;Ldev/kord/voice/gateway/VoiceGateway;Ldev/kord/voice/udp/VoiceUdpSocket;Ldev/kord/voice/gateway/VoiceGatewayConfiguration;Ldev/kord/voice/streams/Streams;Ldev/kord/voice/AudioProvider;Ldev/kord/voice/FrameInterceptor;Ldev/kord/voice/udp/AudioFrameSender;Ldev/kord/voice/encryption/strategies/NonceStrategy;JLkotlin/jvm/internal/DefaultConstructorMarker;)V public final fun connect (Lkotlinx/coroutines/CoroutineScope;Lkotlin/coroutines/Continuation;)Ljava/lang/Object; public static synthetic fun connect$default (Ldev/kord/voice/VoiceConnection;Lkotlinx/coroutines/CoroutineScope;Lkotlin/coroutines/Continuation;ILjava/lang/Object;)Ljava/lang/Object; @@ -1061,13 +1070,17 @@ public final class dev/kord/voice/io/ReadableByteArrayCursor { } public final class dev/kord/voice/streams/DefaultStreams : dev/kord/voice/streams/Streams { + public fun (Ldev/kord/voice/gateway/VoiceGateway;Ldev/kord/voice/udp/VoiceUdpSocket;)V public fun (Ldev/kord/voice/gateway/VoiceGateway;Ldev/kord/voice/udp/VoiceUdpSocket;Ldev/kord/voice/encryption/strategies/NonceStrategy;)V public fun getIncomingAudioFrames ()Lkotlinx/coroutines/flow/Flow; public synthetic fun getIncomingAudioPackets ()Lkotlinx/coroutines/flow/Flow; public fun getIncomingAudioPackets ()Lkotlinx/coroutines/flow/SharedFlow; public synthetic fun getIncomingUserStreams ()Lkotlinx/coroutines/flow/Flow; public fun getIncomingUserStreams ()Lkotlinx/coroutines/flow/SharedFlow; + public synthetic fun getIncomingVoicePackets ()Lkotlinx/coroutines/flow/Flow; + public fun getIncomingVoicePackets ()Lkotlinx/coroutines/flow/SharedFlow; public fun getSsrcToUser ()Ljava/util/Map; + public fun listen ([BLio/ktor/network/sockets/SocketAddress;Ldev/kord/voice/EncryptionMode;Lkotlin/coroutines/Continuation;)Ljava/lang/Object; public fun listen ([BLio/ktor/network/sockets/SocketAddress;Lkotlin/coroutines/Continuation;)Ljava/lang/Object; } @@ -1076,7 +1089,9 @@ public final class dev/kord/voice/streams/NOPStreams : dev/kord/voice/streams/St public fun getIncomingAudioFrames ()Lkotlinx/coroutines/flow/Flow; public fun getIncomingAudioPackets ()Lkotlinx/coroutines/flow/Flow; public fun getIncomingUserStreams ()Lkotlinx/coroutines/flow/Flow; + public fun getIncomingVoicePackets ()Lkotlinx/coroutines/flow/Flow; public fun getSsrcToUser ()Ljava/util/Map; + public fun listen ([BLio/ktor/network/sockets/SocketAddress;Ldev/kord/voice/EncryptionMode;Lkotlin/coroutines/Continuation;)Ljava/lang/Object; public fun listen ([BLio/ktor/network/sockets/SocketAddress;Lkotlin/coroutines/Continuation;)Ljava/lang/Object; } @@ -1084,7 +1099,9 @@ public abstract interface class dev/kord/voice/streams/Streams { public abstract fun getIncomingAudioFrames ()Lkotlinx/coroutines/flow/Flow; public abstract fun getIncomingAudioPackets ()Lkotlinx/coroutines/flow/Flow; public abstract fun getIncomingUserStreams ()Lkotlinx/coroutines/flow/Flow; + public abstract fun getIncomingVoicePackets ()Lkotlinx/coroutines/flow/Flow; public abstract fun getSsrcToUser ()Ljava/util/Map; + public abstract fun listen ([BLio/ktor/network/sockets/SocketAddress;Ldev/kord/voice/EncryptionMode;Lkotlin/coroutines/Continuation;)Ljava/lang/Object; public abstract fun listen ([BLio/ktor/network/sockets/SocketAddress;Lkotlin/coroutines/Continuation;)Ljava/lang/Object; } @@ -1093,14 +1110,19 @@ public abstract interface class dev/kord/voice/udp/AudioFrameSender { } public final class dev/kord/voice/udp/AudioFrameSenderConfiguration { + public synthetic fun (Lio/ktor/network/sockets/SocketAddress;I[BLdev/kord/voice/FrameInterceptorConfiguration;Ldev/kord/voice/EncryptionMode;Lkotlin/jvm/internal/DefaultConstructorMarker;)V public synthetic fun (Lio/ktor/network/sockets/SocketAddress;I[BLdev/kord/voice/FrameInterceptorConfiguration;Lkotlin/jvm/internal/DefaultConstructorMarker;)V public final fun component1 ()Lio/ktor/network/sockets/SocketAddress; public final fun component2-pVg5ArA ()I public final fun component3 ()[B public final fun component4 ()Ldev/kord/voice/FrameInterceptorConfiguration; - public final fun copy-Yuhug_o (Lio/ktor/network/sockets/SocketAddress;I[BLdev/kord/voice/FrameInterceptorConfiguration;)Ldev/kord/voice/udp/AudioFrameSenderConfiguration; + public final fun component5 ()Ldev/kord/voice/EncryptionMode; + public final synthetic fun copy-Yuhug_o (Lio/ktor/network/sockets/SocketAddress;I[BLdev/kord/voice/FrameInterceptorConfiguration;)Ldev/kord/voice/udp/AudioFrameSenderConfiguration; public static synthetic fun copy-Yuhug_o$default (Ldev/kord/voice/udp/AudioFrameSenderConfiguration;Lio/ktor/network/sockets/SocketAddress;I[BLdev/kord/voice/FrameInterceptorConfiguration;ILjava/lang/Object;)Ldev/kord/voice/udp/AudioFrameSenderConfiguration; + public final fun copy-roUYKiI (Lio/ktor/network/sockets/SocketAddress;I[BLdev/kord/voice/FrameInterceptorConfiguration;Ldev/kord/voice/EncryptionMode;)Ldev/kord/voice/udp/AudioFrameSenderConfiguration; + public static synthetic fun copy-roUYKiI$default (Ldev/kord/voice/udp/AudioFrameSenderConfiguration;Lio/ktor/network/sockets/SocketAddress;I[BLdev/kord/voice/FrameInterceptorConfiguration;Ldev/kord/voice/EncryptionMode;ILjava/lang/Object;)Ldev/kord/voice/udp/AudioFrameSenderConfiguration; public fun equals (Ljava/lang/Object;)Z + public final fun getEncryptionMode ()Ldev/kord/voice/EncryptionMode; public final fun getInterceptorConfiguration ()Ldev/kord/voice/FrameInterceptorConfiguration; public final fun getKey ()[B public final fun getServer ()Lio/ktor/network/sockets/SocketAddress; @@ -1110,12 +1132,30 @@ public final class dev/kord/voice/udp/AudioFrameSenderConfiguration { } public abstract class dev/kord/voice/udp/AudioPacketProvider { + public fun ([B)V public fun ([BLdev/kord/voice/encryption/strategies/NonceStrategy;)V public final fun getKey ()[B public final fun getNonceStrategy ()Ldev/kord/voice/encryption/strategies/NonceStrategy; public abstract fun provide-jfaDVJw (SII[B)Ldev/kord/voice/io/ByteArrayView; } +public final class dev/kord/voice/udp/DecryptedVoicePacket { + public synthetic fun (SIILdev/kord/voice/udp/DecryptedVoicePacket$HeaderExtension;[BLkotlin/jvm/internal/DefaultConstructorMarker;)V + public synthetic fun (SII[ILdev/kord/voice/udp/DecryptedVoicePacket$HeaderExtension;[BLkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun getCsrcs--hP7Qyg ()[I + public final fun getDecryptedAudio ()[B + public final fun getHeaderExtension ()Ldev/kord/voice/udp/DecryptedVoicePacket$HeaderExtension; + public final fun getSequenceNumber-Mh2AYeg ()S + public final fun getSsrc-pVg5ArA ()I + public final fun getTimestamp-pVg5ArA ()I +} + +public final class dev/kord/voice/udp/DecryptedVoicePacket$HeaderExtension { + public synthetic fun (S[ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun getDefinedByProfile-Mh2AYeg ()S + public final fun getHeaderExtension--hP7Qyg ()[I +} + public final class dev/kord/voice/udp/DefaultAudioFrameSender : dev/kord/voice/udp/AudioFrameSender { public fun (Ldev/kord/voice/udp/DefaultAudioFrameSenderData;)V public final fun getData ()Ldev/kord/voice/udp/DefaultAudioFrameSenderData; @@ -1123,12 +1163,15 @@ public final class dev/kord/voice/udp/DefaultAudioFrameSender : dev/kord/voice/u } public final class dev/kord/voice/udp/DefaultAudioFrameSenderData { + public fun (Ldev/kord/voice/udp/VoiceUdpSocket;Ldev/kord/voice/FrameInterceptor;Ldev/kord/voice/AudioProvider;)V public fun (Ldev/kord/voice/udp/VoiceUdpSocket;Ldev/kord/voice/FrameInterceptor;Ldev/kord/voice/AudioProvider;Ldev/kord/voice/encryption/strategies/NonceStrategy;)V public final fun component1 ()Ldev/kord/voice/udp/VoiceUdpSocket; public final fun component2 ()Ldev/kord/voice/FrameInterceptor; public final fun component3 ()Ldev/kord/voice/AudioProvider; public final fun component4 ()Ldev/kord/voice/encryption/strategies/NonceStrategy; + public final fun copy (Ldev/kord/voice/udp/VoiceUdpSocket;Ldev/kord/voice/FrameInterceptor;Ldev/kord/voice/AudioProvider;)Ldev/kord/voice/udp/DefaultAudioFrameSenderData; public final fun copy (Ldev/kord/voice/udp/VoiceUdpSocket;Ldev/kord/voice/FrameInterceptor;Ldev/kord/voice/AudioProvider;Ldev/kord/voice/encryption/strategies/NonceStrategy;)Ldev/kord/voice/udp/DefaultAudioFrameSenderData; + public static synthetic fun copy$default (Ldev/kord/voice/udp/DefaultAudioFrameSenderData;Ldev/kord/voice/udp/VoiceUdpSocket;Ldev/kord/voice/FrameInterceptor;Ldev/kord/voice/AudioProvider;ILjava/lang/Object;)Ldev/kord/voice/udp/DefaultAudioFrameSenderData; public static synthetic fun copy$default (Ldev/kord/voice/udp/DefaultAudioFrameSenderData;Ldev/kord/voice/udp/VoiceUdpSocket;Ldev/kord/voice/FrameInterceptor;Ldev/kord/voice/AudioProvider;Ldev/kord/voice/encryption/strategies/NonceStrategy;ILjava/lang/Object;)Ldev/kord/voice/udp/DefaultAudioFrameSenderData; public fun equals (Ljava/lang/Object;)Z public final fun getInterceptor ()Ldev/kord/voice/FrameInterceptor; @@ -1140,6 +1183,7 @@ public final class dev/kord/voice/udp/DefaultAudioFrameSenderData { } public final class dev/kord/voice/udp/DefaultAudioPacketProvider : dev/kord/voice/udp/AudioPacketProvider { + public fun ([BLdev/kord/voice/EncryptionMode;)V public fun ([BLdev/kord/voice/encryption/strategies/NonceStrategy;)V public fun provide-jfaDVJw (SII[B)Ldev/kord/voice/io/ByteArrayView; } diff --git a/voice/build.gradle.kts b/voice/build.gradle.kts index 5eddf01309a..f436b94a366 100644 --- a/voice/build.gradle.kts +++ b/voice/build.gradle.kts @@ -12,6 +12,8 @@ dependencies { implementation(libs.kotlin.logging) implementation(libs.slf4j.api) + implementation(libs.tink) + compileOnly(projects.kspAnnotations) api(libs.ktor.network) diff --git a/voice/build/generated/ksp/main/kotlin/dev/kord/voice/EncryptionMode.kt b/voice/build/generated/ksp/main/kotlin/dev/kord/voice/EncryptionMode.kt index 86877499ceb..54ec358701b 100644 --- a/voice/build/generated/ksp/main/kotlin/dev/kord/voice/EncryptionMode.kt +++ b/voice/build/generated/ksp/main/kotlin/dev/kord/voice/EncryptionMode.kt @@ -15,7 +15,7 @@ import kotlinx.serialization.encoding.Encoder /** * See [EncryptionMode]s in the - * [Discord Developer Documentation](https://discord.com/developers/docs/topics/voice-connections#establishing-a-voice-udp-connection-encryption-modes). + * [Discord Developer Documentation](https://discord.com/developers/docs/topics/voice-connections#transport-encryption-modes). */ @Serializable(with = EncryptionMode.Serializer::class) public sealed class EncryptionMode( @@ -42,10 +42,32 @@ public sealed class EncryptionMode( `value`: String, ) : EncryptionMode(value) + public object AeadAes256GcmRtpSize : EncryptionMode("aead_aes256_gcm_rtpsize") + + public object AeadXChaCha20Poly1305RtpSize : EncryptionMode("aead_xchacha20_poly1305_rtpsize") + + @Deprecated( + message = + "Use 'EncryptionMode.from(\"xsalsa20_poly1305\")' if you need to keep using this deprecated 'EncryptionMode'. XSalsa20 Poly1305 encryption is deprecated for Discord voice connections and will be discontinued as of November 18th, 2024. As of this date, the voice gateway will not allow you to connect with one of the deprecated encryption modes. See https://discord.com/developers/docs/change-log#voice-encryption-modes for details. The deprecation level will be raised to ERROR in 0.17.0, to HIDDEN in 0.18.0, and this object will be removed in 0.19.0.", + replaceWith = ReplaceWith(expression = "EncryptionMode.from(\"xsalsa20_poly1305\")", imports + = arrayOf("dev.kord.voice.EncryptionMode")), + ) public object XSalsa20Poly1305 : EncryptionMode("xsalsa20_poly1305") + @Deprecated( + message = + "Use 'EncryptionMode.from(\"xsalsa20_poly1305_suffix\")' if you need to keep using this deprecated 'EncryptionMode'. XSalsa20 Poly1305 encryption is deprecated for Discord voice connections and will be discontinued as of November 18th, 2024. As of this date, the voice gateway will not allow you to connect with one of the deprecated encryption modes. See https://discord.com/developers/docs/change-log#voice-encryption-modes for details. The deprecation level will be raised to ERROR in 0.17.0, to HIDDEN in 0.18.0, and this object will be removed in 0.19.0.", + replaceWith = ReplaceWith(expression = "EncryptionMode.from(\"xsalsa20_poly1305_suffix\")", + imports = arrayOf("dev.kord.voice.EncryptionMode")), + ) public object XSalsa20Poly1305Suffix : EncryptionMode("xsalsa20_poly1305_suffix") + @Deprecated( + message = + "Use 'EncryptionMode.from(\"xsalsa20_poly1305_lite\")' if you need to keep using this deprecated 'EncryptionMode'. XSalsa20 Poly1305 encryption is deprecated for Discord voice connections and will be discontinued as of November 18th, 2024. As of this date, the voice gateway will not allow you to connect with one of the deprecated encryption modes. See https://discord.com/developers/docs/change-log#voice-encryption-modes for details. The deprecation level will be raised to ERROR in 0.17.0, to HIDDEN in 0.18.0, and this object will be removed in 0.19.0.", + replaceWith = ReplaceWith(expression = "EncryptionMode.from(\"xsalsa20_poly1305_lite\")", + imports = arrayOf("dev.kord.voice.EncryptionMode")), + ) public object XSalsa20Poly1305Lite : EncryptionMode("xsalsa20_poly1305_lite") internal object Serializer : KSerializer { @@ -65,9 +87,11 @@ public sealed class EncryptionMode( */ public val entries: List by lazy(mode = PUBLICATION) { listOf( - XSalsa20Poly1305, - XSalsa20Poly1305Suffix, - XSalsa20Poly1305Lite, + AeadAes256GcmRtpSize, + AeadXChaCha20Poly1305RtpSize, + @Suppress("DEPRECATION") XSalsa20Poly1305, + @Suppress("DEPRECATION") XSalsa20Poly1305Suffix, + @Suppress("DEPRECATION") XSalsa20Poly1305Lite, ) } @@ -76,9 +100,11 @@ public sealed class EncryptionMode( * specified [value]. */ public fun from(`value`: String): EncryptionMode = when (value) { - "xsalsa20_poly1305" -> XSalsa20Poly1305 - "xsalsa20_poly1305_suffix" -> XSalsa20Poly1305Suffix - "xsalsa20_poly1305_lite" -> XSalsa20Poly1305Lite + "aead_aes256_gcm_rtpsize" -> AeadAes256GcmRtpSize + "aead_xchacha20_poly1305_rtpsize" -> AeadXChaCha20Poly1305RtpSize + "xsalsa20_poly1305" -> @Suppress("DEPRECATION") XSalsa20Poly1305 + "xsalsa20_poly1305_suffix" -> @Suppress("DEPRECATION") XSalsa20Poly1305Suffix + "xsalsa20_poly1305_lite" -> @Suppress("DEPRECATION") XSalsa20Poly1305Lite else -> Unknown(value) } } diff --git a/voice/src/main/java/com/iwebpp/crypto/TweetNaclFast.java b/voice/src/main/java/com/iwebpp/crypto/TweetNaclFast.java index c3106669fcc..075cb03f33c 100644 --- a/voice/src/main/java/com/iwebpp/crypto/TweetNaclFast.java +++ b/voice/src/main/java/com/iwebpp/crypto/TweetNaclFast.java @@ -13,6 +13,23 @@ * @description * TweetNacl.c Java porting * */ +/** + * @deprecated + * This class provides XSalsa20 Poly1305 encryption and is no longer used by default by Kord. If you need an + * implementation of TweetNaCl, provide your own instead. XSalsa20 Poly1305 encryption is deprecated for Discord voice + * connections and will be discontinued as of November 18th, 2024. As of this date, the voice gateway will not allow you + * to connect with one of the deprecated encryption modes. See + * + * https://discord.com/developers/docs/change-log#voice-encryption-modes for details. This class will be removed in + * 0.19.0. + */ +@Deprecated +@kotlin.Deprecated( + message = "This class provides XSalsa20 Poly1305 encryption and is no longer used by default by Kord. If you " + + "need an implementation of TweetNaCl, provide your own instead. " + + dev.kord.voice.VoiceConnectionKt.XSalsa20_CLASS_DEPRECATION, + level = kotlin.DeprecationLevel.WARNING +) public final class TweetNaclFast { private final static String TAG = "TweetNaclFast"; diff --git a/voice/src/main/kotlin/EncryptionMode.kt b/voice/src/main/kotlin/EncryptionMode.kt index d634833c4f7..57142b6d795 100644 --- a/voice/src/main/kotlin/EncryptionMode.kt +++ b/voice/src/main/kotlin/EncryptionMode.kt @@ -1,10 +1,42 @@ @file:Generate( STRING_KORD_ENUM, name = "EncryptionMode", - docUrl = "https://discord.com/developers/docs/topics/voice-connections#establishing-a-voice-udp-connection-encryption-modes", + docUrl = "https://discord.com/developers/docs/topics/voice-connections#transport-encryption-modes", entries = [ - Entry("XSalsa20Poly1305", stringValue = "xsalsa20_poly1305"), - Entry("XSalsa20Poly1305Suffix", stringValue = "xsalsa20_poly1305_suffix"), - Entry("XSalsa20Poly1305Lite", stringValue = "xsalsa20_poly1305_lite") + Entry("AeadAes256GcmRtpSize", stringValue = "aead_aes256_gcm_rtpsize"), + Entry("AeadXChaCha20Poly1305RtpSize", stringValue = "aead_xchacha20_poly1305_rtpsize"), + Entry( + "XSalsa20Poly1305", stringValue = "xsalsa20_poly1305", + deprecated = Deprecated( + """Use 'EncryptionMode.from("xsalsa20_poly1305")' if you need to keep using this deprecated """ + + "'EncryptionMode'. $XSalsa20_OBJECT_DEPRECATION", + ReplaceWith( + """EncryptionMode.from("xsalsa20_poly1305")""", imports = ["dev.kord.voice.EncryptionMode"], + ), + DeprecationLevel.WARNING, + ), + ), + Entry( + "XSalsa20Poly1305Suffix", stringValue = "xsalsa20_poly1305_suffix", + deprecated = Deprecated( + """Use 'EncryptionMode.from("xsalsa20_poly1305_suffix")' if you need to keep using this deprecated """ + + "'EncryptionMode'. $XSalsa20_OBJECT_DEPRECATION", + ReplaceWith( + """EncryptionMode.from("xsalsa20_poly1305_suffix")""", imports = ["dev.kord.voice.EncryptionMode"], + ), + DeprecationLevel.WARNING, + ), + ), + Entry( + "XSalsa20Poly1305Lite", stringValue = "xsalsa20_poly1305_lite", + deprecated = Deprecated( + """Use 'EncryptionMode.from("xsalsa20_poly1305_lite")' if you need to keep using this deprecated """ + + "'EncryptionMode'. $XSalsa20_OBJECT_DEPRECATION", + ReplaceWith( + """EncryptionMode.from("xsalsa20_poly1305_lite")""", imports = ["dev.kord.voice.EncryptionMode"], + ), + DeprecationLevel.WARNING, + ), + ), ] ) diff --git a/voice/src/main/kotlin/VoiceConnection.kt b/voice/src/main/kotlin/VoiceConnection.kt index c034484c90b..af342ab13f3 100644 --- a/voice/src/main/kotlin/VoiceConnection.kt +++ b/voice/src/main/kotlin/VoiceConnection.kt @@ -4,7 +4,7 @@ import dev.kord.common.annotation.KordVoice import dev.kord.common.entity.Snowflake import dev.kord.gateway.Gateway import dev.kord.gateway.UpdateVoiceStatus -import dev.kord.voice.encryption.strategies.NonceStrategy +import dev.kord.voice.encryption.strategies.* import dev.kord.voice.gateway.VoiceGateway import dev.kord.voice.gateway.VoiceGatewayConfiguration import dev.kord.voice.handlers.StreamsHandler @@ -41,10 +41,9 @@ public data class VoiceConnectionData( * @param audioProvider a [AudioProvider] that will provide [AudioFrame] when required. * @param frameInterceptor a [FrameInterceptor] that will intercept all outgoing [AudioFrame]s. * @param frameSender the [AudioFrameSender] that will handle the sending of audio packets. - * @param nonceStrategy the [NonceStrategy] that is used during encryption of audio. */ @KordVoice -public class VoiceConnection( +public class VoiceConnection internal constructor( public val data: VoiceConnectionData, public val gateway: Gateway, public val voiceGateway: VoiceGateway, @@ -54,9 +53,53 @@ public class VoiceConnection( public val audioProvider: AudioProvider, public val frameInterceptor: FrameInterceptor, public val frameSender: AudioFrameSender, - public val nonceStrategy: NonceStrategy, - connectionDetachDuration: Duration + connectionDetachDuration: Duration, + internal val strategy: @Suppress("DEPRECATION") NonceStrategy?, ) { + @Suppress("DeprecatedCallableAddReplaceWith") + @Deprecated( + "The 'nonceStrategy' property is only used for XSalsa20 Poly1305 encryption. A 'VoiceConnection' instance " + + "can be created without a 'nonceStrategy' in which case this property throws an " + + "'UnsupportedOperationException'. $XSalsa20_PROPERTY_DEPRECATION", + level = DeprecationLevel.WARNING, + ) + public val nonceStrategy: @Suppress("DEPRECATION") NonceStrategy + get() = strategy + ?: throw UnsupportedOperationException("This VoiceConnection instance was created without a nonceStrategy.") + + @Deprecated( + "The 'nonceStrategy' property is only used for XSalsa20 Poly1305 encryption. Construct a 'VoiceConnection' " + + "instance without a 'nonceStrategy' instead. $XSalsa20_CONSTRUCTOR_DEPRECATION", + ReplaceWith( + "VoiceConnection(data, gateway, voiceGateway, socket, voiceGatewayConfiguration, streams, audioProvider, " + + "frameInterceptor, frameSender, connectionDetachDuration)", + imports = ["dev.kord.voice.VoiceConnection"] + ), + DeprecationLevel.WARNING, + ) + public constructor( + data: VoiceConnectionData, gateway: Gateway, voiceGateway: VoiceGateway, socket: VoiceUdpSocket, + voiceGatewayConfiguration: VoiceGatewayConfiguration, streams: Streams, audioProvider: AudioProvider, + frameInterceptor: FrameInterceptor, frameSender: AudioFrameSender, + nonceStrategy: @Suppress("DEPRECATION") NonceStrategy, connectionDetachDuration: Duration, + ) : this( + data = data, gateway = gateway, voiceGateway = voiceGateway, socket = socket, + voiceGatewayConfiguration = voiceGatewayConfiguration, streams = streams, audioProvider = audioProvider, + frameInterceptor = frameInterceptor, frameSender = frameSender, + connectionDetachDuration = connectionDetachDuration, strategy = nonceStrategy, + ) + + public constructor( + data: VoiceConnectionData, gateway: Gateway, voiceGateway: VoiceGateway, socket: VoiceUdpSocket, + voiceGatewayConfiguration: VoiceGatewayConfiguration, streams: Streams, audioProvider: AudioProvider, + frameInterceptor: FrameInterceptor, frameSender: AudioFrameSender, connectionDetachDuration: Duration, + ) : this( + data = data, gateway = gateway, voiceGateway = voiceGateway, socket = socket, + voiceGatewayConfiguration = voiceGatewayConfiguration, streams = streams, audioProvider = audioProvider, + frameInterceptor = frameInterceptor, frameSender = frameSender, + connectionDetachDuration = connectionDetachDuration, strategy = null, + ) + public val scope: CoroutineScope = CoroutineScope(SupervisorJob() + CoroutineName("kord-voice-connection[${data.guildId.value}]")) @@ -153,3 +196,18 @@ public suspend inline fun VoiceConnection( contract { callsInPlace(builder, InvocationKind.EXACTLY_ONCE) } return VoiceConnectionBuilder(gateway, selfId, channelId, guildId).apply(builder).build() } + + +private const val DEPRECATION_PART_1 = "XSalsa20 Poly1305 encryption is deprecated for Discord voice connections and " + + "will be discontinued as of November 18th, 2024. As of this date, the voice gateway will not allow you to " + + "connect with one of the deprecated encryption modes. See " + + "https://discord.com/developers/docs/change-log#voice-encryption-modes for details. The deprecation level will " + + "be raised to ERROR in 0.17.0, to HIDDEN in 0.18.0, and this" +private const val DEPRECATION_PART_2 = "will be removed in 0.19.0." + +internal const val XSalsa20_CLASS_DEPRECATION = "$DEPRECATION_PART_1 class $DEPRECATION_PART_2" +internal const val XSalsa20_INTERFACE_DEPRECATION = "$DEPRECATION_PART_1 interface $DEPRECATION_PART_2" +internal const val XSalsa20_OBJECT_DEPRECATION = "$DEPRECATION_PART_1 object $DEPRECATION_PART_2" +internal const val XSalsa20_CONSTRUCTOR_DEPRECATION = "$DEPRECATION_PART_1 constructor $DEPRECATION_PART_2" +internal const val XSalsa20_PROPERTY_DEPRECATION = "$DEPRECATION_PART_1 property $DEPRECATION_PART_2" +internal const val XSalsa20_FUNCTION_DEPRECATION = "$DEPRECATION_PART_1 function $DEPRECATION_PART_2" diff --git a/voice/src/main/kotlin/VoiceConnectionBuilder.kt b/voice/src/main/kotlin/VoiceConnectionBuilder.kt index 62765a73701..d85f62ef3c7 100644 --- a/voice/src/main/kotlin/VoiceConnectionBuilder.kt +++ b/voice/src/main/kotlin/VoiceConnectionBuilder.kt @@ -7,8 +7,7 @@ import dev.kord.gateway.Gateway import dev.kord.gateway.UpdateVoiceStatus import dev.kord.gateway.VoiceServerUpdate import dev.kord.gateway.VoiceStateUpdate -import dev.kord.voice.encryption.strategies.LiteNonceStrategy -import dev.kord.voice.encryption.strategies.NonceStrategy +import dev.kord.voice.encryption.strategies.* import dev.kord.voice.exception.VoiceConnectionInitializationException import dev.kord.voice.gateway.DefaultVoiceGatewayBuilder import dev.kord.voice.gateway.VoiceGateway @@ -65,9 +64,19 @@ public class VoiceConnectionBuilder( /** * The nonce strategy to be used for the encryption of audio packets. - * If `null`, [dev.kord.voice.encryption.strategies.LiteNonceStrategy] will be used. */ - public var nonceStrategy: NonceStrategy? = null + @Deprecated( + "The 'nonceStrategy' property is only used for XSalsa20 Poly1305 encryption. Do not explicitly specify a " + + "'nonceStrategy', the 'VoiceConnection' automatically selects a suitable encryption mode. " + + XSalsa20_PROPERTY_DEPRECATION, + level = DeprecationLevel.WARNING, + ) + public var nonceStrategy: @Suppress("DEPRECATION") NonceStrategy? + get() = _nonceStrategy + set(value) { + _nonceStrategy = value + } + private var _nonceStrategy: @Suppress("DEPRECATION") NonceStrategy? = null /** * A boolean indicating whether your voice state will be muted. @@ -166,19 +175,19 @@ public class VoiceConnectionBuilder( .build() val udpSocket = udpSocket ?: GlobalVoiceUdpSocket val audioProvider = audioProvider ?: EmptyAudioPlayerProvider - val nonceStrategy = nonceStrategy ?: LiteNonceStrategy() + val nonceStrategy = _nonceStrategy val frameInterceptor = frameInterceptor ?: DefaultFrameInterceptor() val audioSender = audioSender ?: DefaultAudioFrameSender( DefaultAudioFrameSenderData( udpSocket, frameInterceptor, + nonceStrategy, audioProvider, - nonceStrategy ) ) val streams = - streams ?: if (receiveVoice) DefaultStreams(voiceGateway, udpSocket, nonceStrategy) else NOPStreams + streams ?: if (receiveVoice) DefaultStreams(voiceGateway, nonceStrategy, udpSocket) else NOPStreams return VoiceConnection( voiceConnectionData, @@ -190,8 +199,8 @@ public class VoiceConnectionBuilder( audioProvider, frameInterceptor, audioSender, + connectionDetachDuration, nonceStrategy, - connectionDetachDuration ) } diff --git a/voice/src/main/kotlin/encryption/AeadAes256GcmRtpSize.kt b/voice/src/main/kotlin/encryption/AeadAes256GcmRtpSize.kt new file mode 100644 index 00000000000..cb831010fd4 --- /dev/null +++ b/voice/src/main/kotlin/encryption/AeadAes256GcmRtpSize.kt @@ -0,0 +1,147 @@ +package dev.kord.voice.encryption + +import dev.kord.voice.udp.DecryptedVoicePacket +import dev.kord.voice.udp.RTP_HEADER_LENGTH +import kotlinx.io.Buffer +import kotlinx.io.Source +import kotlinx.io.UnsafeIoApi +import kotlinx.io.readTo +import kotlinx.io.unsafe.UnsafeBufferOperations +import java.security.Security +import javax.crypto.AEADBadTagException +import javax.crypto.Cipher +import javax.crypto.Cipher.DECRYPT_MODE +import javax.crypto.Cipher.ENCRYPT_MODE +import javax.crypto.spec.GCMParameterSpec +import javax.crypto.spec.SecretKeySpec + +private const val AES_256_GCM_NO_PADDING = "AES_256/GCM/NoPadding" +private const val AES = "AES" + +internal val isAes256GcmSupported + get() = Security.getAlgorithms("Cipher") + .any { algorithm -> AES_256_GCM_NO_PADDING.equals(algorithm, ignoreCase = true) } + +private const val AUTH_TAG_SIZE = 16 +private const val AUTH_TAG_BITS = AUTH_TAG_SIZE * 8 +private const val IV_SIZE = 12 +private const val NONCE_SIZE = 4 +private const val ADDITIONAL_SIZE = RTP_HEADER_LENGTH + AUTH_TAG_SIZE + NONCE_SIZE + +internal class AeadAes256GcmRtpSizeVoicePacketCreator(key: ByteArray) : EncryptedVoicePacketCreator { + + // the first 4 bytes are the 32-bit incremental nonce (big endian), the remaining bytes are 0 + private val ivBuffer = ByteArray(IV_SIZE) + private var nonce = 0 + private val cipher: Cipher = Cipher.getInstance(AES_256_GCM_NO_PADDING) + private val key = SecretKeySpec(key, AES) + + override fun createEncryptedVoicePacket( + sequence: UShort, + timestamp: UInt, + ssrc: UInt, + audioPlaintext: ByteArray, // TODO rename to plaintextAudio? + ): ByteArray { + val nonce = nonce++ + val plaintextSize = audioPlaintext.size + val packetSize = plaintextSize + ADDITIONAL_SIZE + val packet = ByteArray(packetSize) // TODO use cipher.getOutputSize? + + // write the header into the voice packet + packet.writeRtpHeader(sequence, timestamp, ssrc) + + // encrypt the audio into the voice packet + ivBuffer.writeIntBigEndian(offset = 0, nonce) + cipher.init(ENCRYPT_MODE, key, GCMParameterSpec(AUTH_TAG_BITS, ivBuffer)) + cipher.updateAAD(packet, /* offset = */ 0, /* len = */ RTP_HEADER_LENGTH) + val written = cipher.doFinal( + /* input = */ audioPlaintext, /* inputOffset = */ 0, /* inputLen = */ plaintextSize, + /* output = */ packet, /* outputOffset = */ RTP_HEADER_LENGTH, + ) + check(written == plaintextSize + AUTH_TAG_SIZE) { "Ciphertext doesn't have the expected length." } + + // append the nonce to the end of the voice packet + packet.writeIntBigEndian(offset = packetSize - NONCE_SIZE, nonce) + + return packet + } +} + +internal class AeadAes256GcmRtpSizeVoicePacketDecryptor(key: ByteArray) : Decrypt() { + private val ivBuffer = ByteArray(IV_SIZE) + private val cipher: Cipher = Cipher.getInstance(AES_256_GCM_NO_PADDING) + private val key = SecretKeySpec(key, AES) + + override fun decrypt(audioPacket: Source): DecryptedVoicePacket? = audioPacket.use { packet -> + val headerSize = readUnencryptedRtpHeaderPart(packet) + if (headerSize < 0) { + return null + } + val input = Buffer() + var output: Buffer? = null + try { + val payloadSize = packet.transferTo(input) + // TODO padding handling + if (payloadSize < NONCE_SIZE + AUTH_TAG_SIZE) { + return null + } + + output = Buffer() + + // read the nonce from the end of the voice packet + // TODO copy directly to ivBuffer when https://github.com/Kotlin/kotlinx-io/issues/191 is implemented + input.copyTo(output, startIndex = payloadSize - NONCE_SIZE) + output.readTo(ivBuffer, startIndex = 0, endIndex = NONCE_SIZE) + + cipher.init(DECRYPT_MODE, key, GCMParameterSpec(AUTH_TAG_BITS, ivBuffer)) + cipher.updateAAD(unencryptedRtpHeaderPartBuffer, /* offset = */ 0, /* len = */ headerSize) + return if (input.decryptTo(output)) { + createDecryptedVoicePacket(headerSize, output) + } else { + null + } + } catch (e: Throwable) { + output?.clear() + throw e + } finally { + input.clear() // release buffer segments + } + } + + @OptIn(UnsafeIoApi::class) + private fun Buffer.decryptTo(output: Buffer) = try { + var sizeWithoutNonce = size - NONCE_SIZE + while (sizeWithoutNonce > 0) { + sizeWithoutNonce -= decryptPartTo(output, maxInputLen = sizeWithoutNonce) + } + + UnsafeBufferOperations.writeToTail( + output, + minimumCapacity = maxOf(1, cipher.getOutputSize(/* inputLen = */ 0)) + ) { outputBytes, outputStartIndex, _ -> + return@writeToTail cipher.doFinal(outputBytes, outputStartIndex) + } + true + } catch (_: AEADBadTagException) { + false + } + + /** + * [Updates][Cipher.update] the [cipher] with up to [maxInputLen] bytes from this [Buffer] into [output] and returns + * the number of bytes consumed from this [Buffer]. + */ + @UnsafeIoApi + private fun Buffer.decryptPartTo(output: Buffer, maxInputLen: Long): Int = + UnsafeBufferOperations.readFromHead(buffer = this) { inputBytes, inputStartIndex, inputEndIndex -> + val inputLen = minOf((inputEndIndex - inputStartIndex).toLong(), maxInputLen).toInt() + + UnsafeBufferOperations.writeToTail( + buffer = output, + minimumCapacity = cipher.getOutputSize(inputLen), + ) { outputBytes, outputStartIndex, _ -> + return@writeToTail cipher.update(inputBytes, inputStartIndex, inputLen, outputBytes, outputStartIndex) + } + + return@readFromHead inputLen // will be consumed from buffer + } +} diff --git a/voice/src/main/kotlin/encryption/AeadXChaCha20Poly1305RtpSize.kt b/voice/src/main/kotlin/encryption/AeadXChaCha20Poly1305RtpSize.kt new file mode 100644 index 00000000000..c0144512d1d --- /dev/null +++ b/voice/src/main/kotlin/encryption/AeadXChaCha20Poly1305RtpSize.kt @@ -0,0 +1,132 @@ +package dev.kord.voice.encryption + +import com.google.crypto.tink.aead.internal.InsecureNonceXChaCha20 +import com.google.crypto.tink.aead.internal.InsecureNonceXChaCha20Poly1305 +import com.google.crypto.tink.aead.internal.Poly1305 +import dev.kord.voice.udp.DecryptedVoicePacket +import dev.kord.voice.udp.RTP_HEADER_LENGTH +import kotlinx.io.Buffer +import kotlinx.io.Source +import kotlinx.io.readTo +import java.nio.ByteBuffer +import javax.crypto.AEADBadTagException + +private const val AUTH_TAG_SIZE = Poly1305.MAC_TAG_SIZE_IN_BYTES +private const val NONCE_SIZE = 4 +private const val ADDITIONAL_SIZE = RTP_HEADER_LENGTH + AUTH_TAG_SIZE + NONCE_SIZE +private val EMPTY_BYTE_ARRAY = ByteArray(size = 0) + +internal class AeadXChaCha20Poly1305RtpSizeVoicePacketCreator(key: ByteArray) : EncryptedVoicePacketCreator { + + // the first 4 bytes are the 32-bit incremental nonce (big endian), the remaining bytes are 0 + private val nonceBuffer = ByteArray(InsecureNonceXChaCha20.NONCE_SIZE_IN_BYTES) + private var nonce = 0 + private val xChaCha20Poly1305 = InsecureNonceXChaCha20Poly1305(key) + private val associatedDataBuffer = ByteArray(RTP_HEADER_LENGTH) + + override fun createEncryptedVoicePacket( + sequence: UShort, + timestamp: UInt, + ssrc: UInt, + audioPlaintext: ByteArray, + ): ByteArray { + val nonce = nonce++ + val plaintextSize = audioPlaintext.size + val packetSize = plaintextSize + ADDITIONAL_SIZE + val packet = ByteArray(packetSize) + + // write the header into associatedDataBuffer and the voice packet + associatedDataBuffer.writeRtpHeader(sequence, timestamp, ssrc) + associatedDataBuffer.copyInto(packet) + + nonceBuffer.writeIntBigEndian(offset = 0, nonce) + + // TODO check if this is true + // InsecureNonceXChaCha20Poly1305.encrypt requires output.limit() to be set to where the ciphertext will end, + // otherwise it will read too much when computing the authentication tag. ByteBuffer.wrap with offset and length + // will set the limit accordingly. + val output = + ByteBuffer.wrap(packet, /* offset = */ RTP_HEADER_LENGTH, /* length = */ plaintextSize + AUTH_TAG_SIZE) + xChaCha20Poly1305.encrypt(output, nonceBuffer, audioPlaintext, associatedDataBuffer) + val nonceOffset = packetSize - NONCE_SIZE + check(output.position() == nonceOffset && output.limit() == nonceOffset) { + "Ciphertext doesn't have the expected length." + } + + // append the nonce to the end of the voice packet + packet.writeIntBigEndian(nonceOffset, nonce) + + return packet + } +} + +internal class AeadXChaCha20Poly1305RtpSizeVoicePacketDecryptor(key: ByteArray) : Decrypt() { + private var ciphertextBuffer: ByteArray = EMPTY_BYTE_ARRAY + private val nonceBuffer = ByteArray(InsecureNonceXChaCha20.NONCE_SIZE_IN_BYTES) + private val xChaCha20Poly1305 = InsecureNonceXChaCha20Poly1305(key) + + override fun decrypt(audioPacket: Source): DecryptedVoicePacket? = audioPacket.use { packet -> + val headerSize = readUnencryptedRtpHeaderPart(packet) + if (headerSize < 0) { + return null + } + + val ciphertext = when (packet) { + is Buffer -> getCiphertextAndFillNonceBufferFromBuffer(packet) + else -> getCiphertextAndFillNonceBufferFromSource(packet) + } ?: return null + + val associatedData = when (headerSize) { + MAX_UNENCRYPTED_RTP_HEADER_PART_SIZE -> unencryptedRtpHeaderPartBuffer + else -> unencryptedRtpHeaderPartBuffer.copyOf(headerSize) + } + + val plaintext = try { + xChaCha20Poly1305.decrypt(ciphertext, nonceBuffer, associatedData) + } catch (_: AEADBadTagException) { + return null + } + + return createDecryptedVoicePacket(headerSize, extensionAndAudio = plaintext) + } + + // TODO padding handling + + private fun getCiphertextAndFillNonceBufferFromSource(source: Source): ByteBuffer? { + val buffer = Buffer() + try { + source.transferTo(buffer) + return getCiphertextAndFillNonceBufferFromBuffer(buffer) + } finally { + buffer.clear() // recycle buffer segments + } + } + + private fun getCiphertextAndFillNonceBufferFromBuffer(buffer: Buffer): ByteBuffer? { + val ciphertextSizeLong = buffer.size - NONCE_SIZE + if (ciphertextSizeLong !in AUTH_TAG_SIZE..Int.MAX_VALUE) { + return null + } + + val ciphertextSize = ciphertextSizeLong.toInt() + val ciphertextBuffer = growCiphertextBuffer(ciphertextSize) + + buffer.readTo(ciphertextBuffer, startIndex = 0, endIndex = ciphertextSize) + buffer.readTo(nonceBuffer, startIndex = 0, endIndex = NONCE_SIZE) + + return ByteBuffer.wrap(ciphertextBuffer, /* offset = */ 0, /* length = */ ciphertextSize) + } + + private fun growCiphertextBuffer(ciphertextSize: Int): ByteArray { + var buffer = ciphertextBuffer + val bufferSize = buffer.size + if (bufferSize < ciphertextSize) { + // preferredSize = bufferSize + (bufferSize / 2) = 1.5 * bufferSize, + /** see [java.util.ArrayList.grow] and [jdk.internal.util.ArraysSupport.newLength] */ + val preferredSize = bufferSize + (bufferSize shr 1) + buffer = ByteArray(size = maxOf(ciphertextSize, preferredSize)) + ciphertextBuffer = buffer + } + return buffer + } +} diff --git a/voice/src/main/kotlin/encryption/VoicePacketEncrypt.kt b/voice/src/main/kotlin/encryption/VoicePacketEncrypt.kt new file mode 100644 index 00000000000..570dcf9b0c7 --- /dev/null +++ b/voice/src/main/kotlin/encryption/VoicePacketEncrypt.kt @@ -0,0 +1,188 @@ +package dev.kord.voice.encryption + +import dev.kord.voice.udp.DecryptedVoicePacket +import dev.kord.voice.udp.DecryptedVoicePacket.Companion.EMPTY_UINT_ARRAY +import kotlinx.io.* + +internal interface EncryptedVoicePacketCreator { + fun createEncryptedVoicePacket(sequence: UShort, timestamp: UInt, ssrc: UInt, audioPlaintext: ByteArray): ByteArray +} + +internal fun ByteArray.writeShortBigEndian(offset: Int, value: Short) { + this[offset] = (value.toInt() ushr 8).toByte() + this[offset + 1] = value.toByte() +} + +internal fun ByteArray.writeIntBigEndian(offset: Int, value: Int) { + this[offset] = (value ushr 24).toByte() + this[offset + 1] = (value ushr 16).toByte() + this[offset + 2] = (value ushr 8).toByte() + this[offset + 3] = value.toByte() +} + +internal fun ByteArray.readShortBigEndian(offset: Int): Short { + return (this[offset].toInt() shl 8) + .or(this[offset + 1].toInt() and 0xFF) + .toShort() +} + +internal fun ByteArray.readIntBigEndian(offset: Int): Int { + return (this[offset].toInt() shl 24) + .or((this[offset + 1].toInt() and 0xFF) shl 16) + .or((this[offset + 2].toInt() and 0xFF) shl 8) + .or(this[offset + 3].toInt() and 0xFF) +} + +internal fun ByteArray.writeRtpHeader(sequence: UShort, timestamp: UInt, ssrc: UInt) { + // https://discord.com/developers/docs/topics/voice-connections#transport-encryption-modes-voice-packet-structure + // https://datatracker.ietf.org/doc/html/rfc3550#section-5.1 + this[0] = VERSION_2.toByte() + this[1] = PAYLOAD_TYPE + writeShortBigEndian(offset = 2, sequence.toShort()) + writeIntBigEndian(offset = 4, timestamp.toInt()) + writeIntBigEndian(offset = 8, ssrc.toInt()) +} + +// https://discord.com/developers/docs/topics/voice-connections#transport-encryption-modes: +// The RTP size variants determine the unencrypted size of the RTP header in the same way as SRTP, which considers CSRCs +// and (optionally) the extension preamble to be part of the unencrypted header. The deprecated variants use a fixed +// size unencrypted header for RTP. +// +// The unencrypted part of the RTP header consists of 12 bytes that are always present, up to 15 CSRCs (32 bits each) +// and an optional extension preamble (32 bits). If a header extension is present, the non-preamble part of the +// extension is encrypted together with the payload. +private const val CSRC_SIZE = 4 +private const val EXTENSION_WORD_SIZE = 4 +private const val EXTENSION_PREAMBLE_SIZE = 4 +private const val MIN_UNENCRYPTED_RTP_HEADER_PART_SIZE = 12 +internal const val MAX_UNENCRYPTED_RTP_HEADER_PART_SIZE = + MIN_UNENCRYPTED_RTP_HEADER_PART_SIZE + (15 * CSRC_SIZE) + EXTENSION_PREAMBLE_SIZE + +private const val VERSION_MASK = 0b11_0_0_0000 +private const val VERSION_2 = 0b10_0_0_0000 // 0x80 +private const val PADDING_MASK = 0b00_1_0_0000 +private const val EXTENSION_MASK = 0b00_0_1_0000 +private const val CSRC_COUNT_MASK = 0b00_0_0_1111 +private const val PAYLOAD_TYPE: Byte = 0x78 + +internal abstract class Decrypt { + @JvmField + protected val unencryptedRtpHeaderPartBuffer = ByteArray(MAX_UNENCRYPTED_RTP_HEADER_PART_SIZE) + + @JvmField // TODO maybe private is enough + protected var packetHasPadding = false + private var csrcCount = 0 + private var encryptedExtensionPartLength = 0 // -1 if there is no extension + + abstract fun decrypt(audioPacket: Source): DecryptedVoicePacket? + + protected fun readUnencryptedRtpHeaderPart(packet: Source): Int { + if (!packet.request(MIN_UNENCRYPTED_RTP_HEADER_PART_SIZE.toLong())) { + return -1 + } + + // read the first two octets of the header + val octet1 = packet.readByte().toInt() + val octet2 = packet.readByte() + + // check the version + if ((octet1 and VERSION_MASK) != VERSION_2) { + return -1 + } + val hasExtension = (octet1 and EXTENSION_MASK) != 0 + val csrcCnt = octet1 and CSRC_COUNT_MASK + val headerSize = MIN_UNENCRYPTED_RTP_HEADER_PART_SIZE + + (csrcCnt * CSRC_SIZE) + + (if (hasExtension) EXTENSION_PREAMBLE_SIZE else 0) + + // check the payload type + if (octet2 != PAYLOAD_TYPE) { + return -1 + } + + if (!packet.request(byteCount = headerSize - 2L)) { + return -1 + } + + // read the remaining unencrypted part of the header + unencryptedRtpHeaderPartBuffer[0] = octet1.toByte() + unencryptedRtpHeaderPartBuffer[1] = octet2 + packet.readTo(unencryptedRtpHeaderPartBuffer, startIndex = 2, endIndex = headerSize) + + encryptedExtensionPartLength = if (hasExtension) { + val extensionLength = unencryptedRtpHeaderPartBuffer + .readShortBigEndian(offset = headerSize - 2).toInt() + .and(0xFFFF) + if (!packet.request(byteCount = (headerSize + (extensionLength * EXTENSION_WORD_SIZE)).toLong())) { + return -1 + } + extensionLength + } else { + -1 + } + packetHasPadding = (octet1 and PADDING_MASK) != 0 + csrcCount = csrcCnt + + return headerSize + } + + protected fun createDecryptedVoicePacket(headerSize: Int, extensionAndAudio: ByteArray) = + createDecryptedVoicePacket( + headerSize, + extensionAndAudio, + readExtensionWord = { array, i -> array.readIntBigEndian(offset = i * EXTENSION_WORD_SIZE).toUInt() }, + readDecryptedAudio = { array, extensionLength -> + array.copyOfRange(fromIndex = extensionLength * EXTENSION_WORD_SIZE, toIndex = array.size) + }, + ) + + protected fun createDecryptedVoicePacket(headerSize: Int, extensionAndAudio: Buffer) = createDecryptedVoicePacket( + headerSize, + extensionAndAudio, + readExtensionWord = { buffer, _ -> buffer.readUInt() }, + readDecryptedAudio = { buffer, _ -> buffer.readByteArray() }, // the extension part was already consumed + ) + + @OptIn(ExperimentalUnsignedTypes::class) + private inline fun createDecryptedVoicePacket( + headerSize: Int, + extensionAndAudio: T, + readExtensionWord: (extensionAndAudio: T, i: Int) -> UInt, + readDecryptedAudio: (extensionAndAudio: T, extensionLength: Int) -> ByteArray, + ): DecryptedVoicePacket { + val sequenceNumber = unencryptedRtpHeaderPartBuffer.readShortBigEndian(offset = 2).toUShort() + val timestamp = unencryptedRtpHeaderPartBuffer.readIntBigEndian(offset = 4).toUInt() + val ssrc = unencryptedRtpHeaderPartBuffer.readIntBigEndian(offset = 8).toUInt() + + val csrcCount = csrcCount + val csrcs = if (csrcCount > 0) UIntArray(csrcCount) else EMPTY_UINT_ARRAY + for (i in 0..= 0) { + val definedByProfile = unencryptedRtpHeaderPartBuffer + .readShortBigEndian(offset = headerSize - EXTENSION_PREAMBLE_SIZE) + .toUShort() + val headerExtension = if (extensionLength > 0) UIntArray(extensionLength) else EMPTY_UINT_ARRAY + for (i in 0.. { streamsJob?.cancel() - streamsJob = launch { streams.listen(it.secretKey.toUByteArray().toByteArray(), server.value!!) } + streamsJob = launch { + streams.listen( + it.secretKey.toUByteArray().toByteArray(), + server.value!!, + encryptionMode = it.mode, + ) + } } on { diff --git a/voice/src/main/kotlin/handlers/UdpLifeCycleHandler.kt b/voice/src/main/kotlin/handlers/UdpLifeCycleHandler.kt index 7d9004417a6..742d70157a0 100644 --- a/voice/src/main/kotlin/handlers/UdpLifeCycleHandler.kt +++ b/voice/src/main/kotlin/handlers/UdpLifeCycleHandler.kt @@ -3,9 +3,8 @@ package dev.kord.voice.handlers import dev.kord.voice.EncryptionMode import dev.kord.voice.FrameInterceptorConfiguration import dev.kord.voice.VoiceConnection -import dev.kord.voice.encryption.strategies.LiteNonceStrategy -import dev.kord.voice.encryption.strategies.NormalNonceStrategy -import dev.kord.voice.encryption.strategies.SuffixNonceStrategy +import dev.kord.voice.encryption.isAes256GcmSupported +import dev.kord.voice.encryption.strategies.* import dev.kord.voice.gateway.* import dev.kord.voice.udp.AudioFrameSenderConfiguration import io.github.oshai.kotlinlogging.KotlinLogging @@ -26,6 +25,7 @@ internal class UdpLifeCycleHandler( private var server: InetSocketAddress? by atomic(null) private var audioSenderJob: Job? by atomic(null) + private var encryptionMode: EncryptionMode? by atomic(null) @OptIn(ExperimentalUnsignedTypes::class) override suspend fun start() = coroutineScope { @@ -37,18 +37,29 @@ internal class UdpLifeCycleHandler( udpLifeCycleLogger.trace { "ip discovered for voice successfully" } - val encryptionMode = when (connection.nonceStrategy) { + val mode = @Suppress("DEPRECATION") when (connection.strategy) { + null -> + // prefer aead_aes256_gcm_rtpsize when available, fall back to aead_xchacha20_poly1305_rtpsize, + // see https://discord.com/developers/docs/topics/voice-connections#transport-encryption-modes + if (EncryptionMode.AeadAes256GcmRtpSize in it.modes && isAes256GcmSupported) { + EncryptionMode.AeadAes256GcmRtpSize + } else { + EncryptionMode.AeadXChaCha20Poly1305RtpSize + } + + // use deprecated modes only when explicitly specified in VoiceConnection is LiteNonceStrategy -> EncryptionMode.XSalsa20Poly1305Lite is NormalNonceStrategy -> EncryptionMode.XSalsa20Poly1305 is SuffixNonceStrategy -> EncryptionMode.XSalsa20Poly1305Suffix } + encryptionMode = mode val selectProtocol = SelectProtocol( protocol = "udp", data = SelectProtocol.Data( address = ip.hostname, port = ip.port, - mode = encryptionMode + mode = mode, ) ) @@ -56,12 +67,20 @@ internal class UdpLifeCycleHandler( } on { + val mode = it.mode + val expectedMode = encryptionMode + check(mode == expectedMode) { + "Session Description contained unexpected encryption mode: $mode. Specified $expectedMode in Select " + + "Protocol." + } + with(connection) { val config = AudioFrameSenderConfiguration( ssrc = ssrc!!, key = it.secretKey.toUByteArray().toByteArray(), server = server!!, - interceptorConfiguration = FrameInterceptorConfiguration(gateway, voiceGateway, ssrc!!) + interceptorConfiguration = FrameInterceptorConfiguration(gateway, voiceGateway, ssrc!!), + encryptionMode = mode, ) audioSenderJob?.cancel() diff --git a/voice/src/main/kotlin/streams/DefaultStreams.kt b/voice/src/main/kotlin/streams/DefaultStreams.kt index 0907f1ff895..6eb12305a22 100644 --- a/voice/src/main/kotlin/streams/DefaultStreams.kt +++ b/voice/src/main/kotlin/streams/DefaultStreams.kt @@ -1,14 +1,18 @@ package dev.kord.voice.streams -import com.iwebpp.crypto.TweetNaclFast +import com.iwebpp.crypto.* import dev.kord.common.annotation.KordVoice import dev.kord.common.entity.Snowflake import dev.kord.voice.AudioFrame -import dev.kord.voice.encryption.XSalsa20Poly1305Codec -import dev.kord.voice.encryption.strategies.NonceStrategy +import dev.kord.voice.EncryptionMode +import dev.kord.voice.XSalsa20_CONSTRUCTOR_DEPRECATION +import dev.kord.voice.XSalsa20_FUNCTION_DEPRECATION +import dev.kord.voice.encryption.* +import dev.kord.voice.encryption.strategies.* import dev.kord.voice.gateway.Speaking import dev.kord.voice.gateway.VoiceGateway import dev.kord.voice.io.* +import dev.kord.voice.udp.DecryptedVoicePacket import dev.kord.voice.udp.PayloadType import dev.kord.voice.udp.RTPPacket import dev.kord.voice.udp.VoiceUdpSocket @@ -21,16 +25,41 @@ import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.flow.* +import kotlinx.coroutines.launch private val defaultStreamsLogger = KotlinLogging.logger { } @KordVoice -public class DefaultStreams( - private val voiceGateway: VoiceGateway, - private val udp: VoiceUdpSocket, - private val nonceStrategy: NonceStrategy +public class DefaultStreams internal constructor( + gateway: VoiceGateway, + strategy: @Suppress("DEPRECATION") NonceStrategy?, + udpSocket: VoiceUdpSocket, ) : Streams { - private fun CoroutineScope.listenForIncoming(key: ByteArray, server: SocketAddress) { + private val voiceGateway = gateway + private val udp = udpSocket + private val nonceStrategy = strategy + + public constructor(voiceGateway: VoiceGateway, udp: VoiceUdpSocket) : + this(gateway = voiceGateway, strategy = null, udpSocket = udp) + + @Deprecated( + "The 'nonceStrategy' parameter is only used for XSalsa20 Poly1305 encryption. Construct a 'DefaultStreams' " + + "instance without a 'NonceStrategy' instead. $XSalsa20_CONSTRUCTOR_DEPRECATION", + ReplaceWith("DefaultStreams(voiceGateway, udp)", imports = ["dev.kord.voice.streams.DefaultStreams"]), + DeprecationLevel.WARNING, + ) + public constructor( + voiceGateway: VoiceGateway, udp: VoiceUdpSocket, nonceStrategy: @Suppress("DEPRECATION") NonceStrategy, + ) : this(gateway = voiceGateway, strategy = nonceStrategy, udpSocket = udp) + + internal fun CoroutineScope.listenForIncoming( + udp: VoiceUdpSocket, + key: ByteArray, + server: SocketAddress, + @Suppress("LocalVariableName") _incomingAudioPackets: MutableSharedFlow, + nonceStrategy: @Suppress("DEPRECATION") NonceStrategy, + emitVoicePacket: suspend (RTPPacket) -> Unit, + ) { udp.incoming .filter { it.address == server } .mapNotNull { RTPPacket.fromPacket(it.packet) } @@ -38,6 +67,7 @@ public class DefaultStreams( .decrypt(nonceStrategy, key) .clean() .onEach { _incomingAudioPackets.emit(it) } + .onEach(emitVoicePacket) .launchIn(this) } @@ -62,8 +92,40 @@ public class DefaultStreams( }.launchIn(this) } - override suspend fun listen(key: ByteArray, server: SocketAddress): Unit = coroutineScope { - listenForIncoming(key, server) + override suspend fun listen(key: ByteArray, server: SocketAddress, encryptionMode: EncryptionMode) { + val decryptionDelegate = @Suppress("DEPRECATION") when (encryptionMode) { + EncryptionMode.AeadAes256GcmRtpSize -> NewDecryptionDelegate(AeadAes256GcmRtpSizeVoicePacketDecryptor(key)) + EncryptionMode.AeadXChaCha20Poly1305RtpSize -> + NewDecryptionDelegate(AeadXChaCha20Poly1305RtpSizeVoicePacketDecryptor(key)) + EncryptionMode.XSalsa20Poly1305 -> + LegacyDecryptionDelegate(key, this, nonceStrategy as? NormalNonceStrategy ?: NormalNonceStrategy()) + EncryptionMode.XSalsa20Poly1305Lite -> + LegacyDecryptionDelegate(key, this, nonceStrategy as? LiteNonceStrategy ?: LiteNonceStrategy()) + EncryptionMode.XSalsa20Poly1305Suffix -> + LegacyDecryptionDelegate(key, this, nonceStrategy as? SuffixNonceStrategy ?: SuffixNonceStrategy()) + is EncryptionMode.Unknown -> throw UnsupportedOperationException("Unknown encryption mode $encryptionMode") + } + listen(decryptionDelegate, server) + } + + @Deprecated( + "This functions always uses XSalsa20 Poly1305 encryption. Pass an explicit 'EncryptionMode' instead. A " + + "'DefaultStreams' instance can be created without a 'NonceStrategy' in which case this function throws " + + "an 'UnsupportedOperationException'. $XSalsa20_FUNCTION_DEPRECATION", + ReplaceWith( + "this.listen(key, server, EncryptionMode.AeadXChaCha20Poly1305RtpSize)", + imports = ["dev.kord.voice.EncryptionMode"], + ), + DeprecationLevel.WARNING, + ) + override suspend fun listen(key: ByteArray, server: SocketAddress) { + val strategy = nonceStrategy + ?: throw UnsupportedOperationException("This DefaultStreams instance was created without a NonceStrategy.") + listen(LegacyDecryptionDelegate(key, this, strategy), server) + } + + private suspend fun listen(delegate: DecryptionDelegate, server: SocketAddress): Unit = coroutineScope { + delegate.listenForIncoming(scope = this, udp, server, _incomingAudioPackets, _incomingVoicePackets) listenForUserFrames() } @@ -71,6 +133,9 @@ public class DefaultStreams( override val incomingAudioPackets: SharedFlow = _incomingAudioPackets + private val _incomingVoicePackets = MutableSharedFlow() + override val incomingVoicePackets: SharedFlow get() = _incomingVoicePackets + override val incomingAudioFrames: Flow> get() = incomingAudioPackets.map { it.ssrc to AudioFrame(it.payload.toByteArray()) } @@ -86,6 +151,97 @@ public class DefaultStreams( override val ssrcToUser: Map get() = _ssrcToUser.value } +private interface DecryptionDelegate { + fun listenForIncoming( + scope: CoroutineScope, + udp: VoiceUdpSocket, + server: SocketAddress, + audioPackets: MutableSharedFlow, + voicePackets: MutableSharedFlow, + ) +} + +private class NewDecryptionDelegate(private val decrypt: Decrypt) : DecryptionDelegate { + override fun listenForIncoming( + scope: CoroutineScope, + udp: VoiceUdpSocket, + server: SocketAddress, + audioPackets: MutableSharedFlow, + voicePackets: MutableSharedFlow, + ) { + scope.launch { + udp.incoming.collect { datagram -> + if (datagram.address != server) { + return@collect + } + val voicePacket = decrypt.decrypt(datagram.packet) ?: return@collect + voicePackets.emit(voicePacket) + + @OptIn(ExperimentalUnsignedTypes::class) + if (audioPackets.subscriptionCount.value > 0) { + val decryptedAudio = voicePacket.decryptedAudio + val extension = voicePacket.headerExtension + val extensionSize = extension?.let { 4 + it.headerExtension.size * UInt.SIZE_BYTES } ?: 0 + val data = ByteArray(size = decryptedAudio.size + extensionSize) + if (extension != null) { + data.writeShortBigEndian(offset = 0, extension.definedByProfile.toShort()) + data.writeShortBigEndian(offset = 2, extension.headerExtension.size.toShort()) + extension.headerExtension.forEachIndexed { index, extensionWord -> + data.writeIntBigEndian(offset = 4 + index * UInt.SIZE_BYTES, extensionWord.toInt()) + } + } + decryptedAudio.copyInto(data, destinationOffset = extensionSize) + audioPackets.emit( + RTPPacket( + paddingBytes = 0u, // TODO explain + payloadType = PayloadType.Audio.raw, // TODO explain + sequence = voicePacket.sequenceNumber, + timestamp = voicePacket.timestamp, + ssrc = voicePacket.ssrc, + csrcIdentifiers = voicePacket.csrcs.copyOf(), + hasMarker = false, // TODO explain + hasExtension = extension != null, + // TODO explain + payload = ByteArrayView.from(data, start = extensionSize, end = data.size)!!, + ) + ) + } + } + } + } +} + +private class LegacyDecryptionDelegate( + private val key: ByteArray, + private val streams: DefaultStreams, + private val nonceStrategy: @Suppress("DEPRECATION") NonceStrategy, +) : DecryptionDelegate { + override fun listenForIncoming( + scope: CoroutineScope, + udp: VoiceUdpSocket, + server: SocketAddress, + audioPackets: MutableSharedFlow, + voicePackets: MutableSharedFlow, + ) = with(streams) { + scope.listenForIncoming(udp, key, server, audioPackets, nonceStrategy) { rtpPacket -> + if (voicePackets.subscriptionCount.value > 0) { + voicePackets.emit( + @OptIn(ExperimentalUnsignedTypes::class) + DecryptedVoicePacket( + sequenceNumber = rtpPacket.sequence, + timestamp = rtpPacket.timestamp, + ssrc = rtpPacket.ssrc, + csrcs = rtpPacket.csrcIdentifiers.copyOf(), + headerExtension = null, + decryptedAudio = rtpPacket.payload.toByteArray(), + ) + ) + } + } + } +} + +@Suppress("DEPRECATION") private fun Flow.decrypt(nonceStrategy: NonceStrategy, key: ByteArray): Flow { val codec = XSalsa20Poly1305Codec(key) val nonceBuffer = ByteArray(TweetNaclFast.SecretBox.nonceLength).mutableCursor() diff --git a/voice/src/main/kotlin/streams/NOPStreams.kt b/voice/src/main/kotlin/streams/NOPStreams.kt index 3b914f6095a..33811861908 100644 --- a/voice/src/main/kotlin/streams/NOPStreams.kt +++ b/voice/src/main/kotlin/streams/NOPStreams.kt @@ -3,6 +3,9 @@ package dev.kord.voice.streams import dev.kord.common.annotation.KordVoice import dev.kord.common.entity.Snowflake import dev.kord.voice.AudioFrame +import dev.kord.voice.EncryptionMode +import dev.kord.voice.XSalsa20_FUNCTION_DEPRECATION +import dev.kord.voice.udp.DecryptedVoicePacket import dev.kord.voice.udp.RTPPacket import io.ktor.network.sockets.* import kotlinx.coroutines.flow.Flow @@ -10,10 +13,22 @@ import kotlinx.coroutines.flow.flow @KordVoice public object NOPStreams : Streams { + @Deprecated( + "This functions always uses XSalsa20 Poly1305 encryption. Pass an explicit 'EncryptionMode' instead. " + + XSalsa20_FUNCTION_DEPRECATION, + ReplaceWith( + "this.listen(key, server, EncryptionMode.AeadXChaCha20Poly1305RtpSize)", + imports = ["dev.kord.voice.EncryptionMode"], + ), + DeprecationLevel.WARNING, + ) override suspend fun listen(key: ByteArray, server: SocketAddress) {} + override suspend fun listen(key: ByteArray, server: SocketAddress, encryptionMode: EncryptionMode) {} + override val incomingAudioPackets: Flow = flow { } + override val incomingVoicePackets: Flow = flow { } override val incomingAudioFrames: Flow> = flow { } override val incomingUserStreams: Flow> = flow { } override val ssrcToUser: Map = emptyMap() -} \ No newline at end of file +} diff --git a/voice/src/main/kotlin/streams/Streams.kt b/voice/src/main/kotlin/streams/Streams.kt index 12ac383f11a..8ccc4725f6d 100644 --- a/voice/src/main/kotlin/streams/Streams.kt +++ b/voice/src/main/kotlin/streams/Streams.kt @@ -3,6 +3,9 @@ package dev.kord.voice.streams import dev.kord.common.annotation.KordVoice import dev.kord.common.entity.Snowflake import dev.kord.voice.AudioFrame +import dev.kord.voice.EncryptionMode +import dev.kord.voice.XSalsa20_FUNCTION_DEPRECATION +import dev.kord.voice.udp.DecryptedVoicePacket import dev.kord.voice.udp.RTPPacket import io.ktor.network.sockets.* import kotlinx.coroutines.flow.Flow @@ -15,13 +18,33 @@ public interface Streams { /** * Starts propagating packets from [server] with the following [key] to decrypt the incoming frames. */ + @Deprecated( + "This functions always uses XSalsa20 Poly1305 encryption. Pass an explicit 'EncryptionMode' instead. " + + XSalsa20_FUNCTION_DEPRECATION, + ReplaceWith( + "this.listen(key, server, EncryptionMode.AeadXChaCha20Poly1305RtpSize)", + imports = ["dev.kord.voice.EncryptionMode"], + ), + DeprecationLevel.WARNING, + ) public suspend fun listen(key: ByteArray, server: SocketAddress) + /** + * Starts propagating packets from [server] with the following [key] to decrypt the incoming frames according to + * [encryptionMode]. + */ + public suspend fun listen(key: ByteArray, server: SocketAddress, encryptionMode: EncryptionMode) + /** * A flow of all incoming [dev.kord.voice.udp.RTPPacket]s through the UDP connection. */ public val incomingAudioPackets: Flow + /** + * A flow of all incoming [DecryptedVoicePacket]s through the UDP connection. + */ + public val incomingVoicePackets: Flow + /** * A flow of all incoming [AudioFrame]s mapped to their [ssrc][UInt]. */ diff --git a/voice/src/main/kotlin/udp/AudioFrameSender.kt b/voice/src/main/kotlin/udp/AudioFrameSender.kt index ebb5e79be68..a61fb224625 100644 --- a/voice/src/main/kotlin/udp/AudioFrameSender.kt +++ b/voice/src/main/kotlin/udp/AudioFrameSender.kt @@ -3,7 +3,9 @@ package dev.kord.voice.udp import dev.kord.common.annotation.KordVoice +import dev.kord.voice.EncryptionMode import dev.kord.voice.FrameInterceptorConfiguration +import dev.kord.voice.XSalsa20_CONSTRUCTOR_DEPRECATION import io.ktor.network.sockets.* @KordVoice @@ -11,8 +13,33 @@ public data class AudioFrameSenderConfiguration( val server: SocketAddress, val ssrc: UInt, val key: ByteArray, - val interceptorConfiguration: FrameInterceptorConfiguration -) + val interceptorConfiguration: FrameInterceptorConfiguration, + val encryptionMode: EncryptionMode, +) { + @Deprecated( + "An 'AudioFrameSenderConfiguration' instance must be created with an 'encryptionMode'. " + + XSalsa20_CONSTRUCTOR_DEPRECATION, + level = DeprecationLevel.WARNING, + ) + public constructor( + server: SocketAddress, ssrc: UInt, key: ByteArray, interceptorConfiguration: FrameInterceptorConfiguration, + ) : this( + server = server, ssrc = ssrc, key = key, interceptorConfiguration = interceptorConfiguration, + encryptionMode = EncryptionMode.from("AudioFrameSenderConfiguration.encryptionMode placeholder"), + ) + + @Deprecated( + "Kept for binary compatibility, this function will be removed in 0.19.0.", + level = DeprecationLevel.HIDDEN, + ) + public fun copy( + server: SocketAddress = this.server, ssrc: UInt = this.ssrc, key: ByteArray = this.key, + interceptorConfiguration: FrameInterceptorConfiguration = this.interceptorConfiguration, + ): AudioFrameSenderConfiguration = AudioFrameSenderConfiguration( + server = server, ssrc = ssrc, key = key, interceptorConfiguration = interceptorConfiguration, + encryptionMode = this.encryptionMode, + ) +} @KordVoice public interface AudioFrameSender { diff --git a/voice/src/main/kotlin/udp/AudioPacketProvider.kt b/voice/src/main/kotlin/udp/AudioPacketProvider.kt index 05f2e522d4c..0b77aa50303 100644 --- a/voice/src/main/kotlin/udp/AudioPacketProvider.kt +++ b/voice/src/main/kotlin/udp/AudioPacketProvider.kt @@ -1,22 +1,104 @@ package dev.kord.voice.udp -import com.iwebpp.crypto.TweetNaclFast -import dev.kord.voice.encryption.XSalsa20Poly1305Codec -import dev.kord.voice.encryption.strategies.NonceStrategy +import com.iwebpp.crypto.* +import dev.kord.voice.EncryptionMode +import dev.kord.voice.XSalsa20_CONSTRUCTOR_DEPRECATION +import dev.kord.voice.XSalsa20_PROPERTY_DEPRECATION +import dev.kord.voice.encryption.AeadAes256GcmRtpSizeVoicePacketCreator +import dev.kord.voice.encryption.AeadXChaCha20Poly1305RtpSizeVoicePacketCreator +import dev.kord.voice.encryption.EncryptedVoicePacketCreator +import dev.kord.voice.encryption.* +import dev.kord.voice.encryption.strategies.* import dev.kord.voice.io.ByteArrayView import dev.kord.voice.io.MutableByteArrayCursor import dev.kord.voice.io.mutableCursor import dev.kord.voice.io.view -public abstract class AudioPacketProvider(public val key: ByteArray, public val nonceStrategy: NonceStrategy) { +public abstract class AudioPacketProvider internal constructor( + private val strategy: @Suppress("DEPRECATION") NonceStrategy?, + public val key: ByteArray, +) { + @Deprecated( + "The 'nonceStrategy' property is only used for XSalsa20 Poly1305 encryption. An 'AudioPacketProvider' " + + "instance can be created without a 'nonceStrategy' in which case this property throws an " + + "'UnsupportedOperationException'. $XSalsa20_PROPERTY_DEPRECATION", + level = DeprecationLevel.WARNING, + ) + public val nonceStrategy: @Suppress("DEPRECATION") NonceStrategy + get() = strategy ?: throw UnsupportedOperationException( + "This AudioPacketProvider instance was created without a nonceStrategy." + ) + + @Deprecated( + "The 'nonceStrategy' property is only used for XSalsa20 Poly1305 encryption. Construct an " + + "'AudioPacketProvider' instance without a 'nonceStrategy' instead. $XSalsa20_CONSTRUCTOR_DEPRECATION", + ReplaceWith("AudioPacketProvider(key)", imports = ["dev.kord.voice.udp.AudioPacketProvider"]), + DeprecationLevel.WARNING, + ) + public constructor(key: ByteArray, nonceStrategy: @Suppress("DEPRECATION") NonceStrategy) : + this(strategy = nonceStrategy, key = key) + + public constructor(key: ByteArray) : this(strategy = null, key = key) + public abstract fun provide(sequence: UShort, timestamp: UInt, ssrc: UInt, data: ByteArray): ByteArrayView } private class CouldNotEncryptDataException(data: ByteArray) : RuntimeException("Couldn't encrypt the following data: [${data.joinToString(", ")}]") -public class DefaultAudioPacketProvider(key: ByteArray, nonceStrategy: NonceStrategy) : - AudioPacketProvider(key, nonceStrategy) { +public class DefaultAudioPacketProvider internal constructor( + key: ByteArray, encryptionMode: EncryptionMode?, nonceStrategy: @Suppress("DEPRECATION") NonceStrategy?, +) : AudioPacketProvider(nonceStrategy, key) { + @Deprecated( + "The 'nonceStrategy' property is only used for XSalsa20 Poly1305 encryption. Construct a " + + "'DefaultAudioPacketProvider' instance with an 'EncryptionMode' instead of a 'nonceStrategy'. " + + XSalsa20_CONSTRUCTOR_DEPRECATION, + ReplaceWith( + "DefaultAudioPacketProvider(key, EncryptionMode.AeadXChaCha20Poly1305RtpSize)", + imports = ["dev.kord.voice.udp.DefaultAudioPacketProvider", "dev.kord.voice.EncryptionMode"], + ), + DeprecationLevel.WARNING, + ) + public constructor(key: ByteArray, nonceStrategy: @Suppress("DEPRECATION") NonceStrategy) : + this(key = key, encryptionMode = null, nonceStrategy = nonceStrategy) + + public constructor(key: ByteArray, encryptionMode: EncryptionMode) : + this(key = key, encryptionMode = encryptionMode, nonceStrategy = null) + + private val delegate = if (nonceStrategy != null) { + LegacyProviderDelegate(key, nonceStrategy) + } else @Suppress("DEPRECATION") when (encryptionMode!!) { + EncryptionMode.AeadAes256GcmRtpSize -> + EncryptedPacketCreatorProviderDelegate(AeadAes256GcmRtpSizeVoicePacketCreator(key)) + EncryptionMode.AeadXChaCha20Poly1305RtpSize -> + EncryptedPacketCreatorProviderDelegate(AeadXChaCha20Poly1305RtpSizeVoicePacketCreator(key)) + EncryptionMode.XSalsa20Poly1305 -> LegacyProviderDelegate(key, NormalNonceStrategy()) + EncryptionMode.XSalsa20Poly1305Lite -> LegacyProviderDelegate(key, LiteNonceStrategy()) + EncryptionMode.XSalsa20Poly1305Suffix -> LegacyProviderDelegate(key, SuffixNonceStrategy()) + is EncryptionMode.Unknown -> throw UnsupportedOperationException("Unknown encryption mode $encryptionMode") + } + + override fun provide(sequence: UShort, timestamp: UInt, ssrc: UInt, data: ByteArray): ByteArrayView = + delegate.provide(sequence, timestamp, ssrc, data) +} + +private interface ProviderDelegate { + fun provide(sequence: UShort, timestamp: UInt, ssrc: UInt, data: ByteArray): ByteArrayView +} + +private class EncryptedPacketCreatorProviderDelegate( + private val packetCreator: EncryptedVoicePacketCreator, +) : ProviderDelegate { + private val lock = Any() // TODO do we need this lock? + override fun provide(sequence: UShort, timestamp: UInt, ssrc: UInt, data: ByteArray): ByteArrayView = + synchronized(lock) { + packetCreator.createEncryptedVoicePacket(sequence, timestamp, ssrc, audioPlaintext = data).view() + } +} + +@Suppress("DEPRECATION") +private class LegacyProviderDelegate(key: ByteArray, private val nonceStrategy: NonceStrategy) : ProviderDelegate { + private val codec = XSalsa20Poly1305Codec(key) private val packetBuffer = ByteArray(2048) diff --git a/voice/src/main/kotlin/udp/DecryptedVoicePacket.kt b/voice/src/main/kotlin/udp/DecryptedVoicePacket.kt new file mode 100644 index 00000000000..7f16a777f76 --- /dev/null +++ b/voice/src/main/kotlin/udp/DecryptedVoicePacket.kt @@ -0,0 +1,31 @@ +package dev.kord.voice.udp + +public class DecryptedVoicePacket @ExperimentalUnsignedTypes constructor( + public val sequenceNumber: UShort, + public val timestamp: UInt, + public val ssrc: UInt, + @property:ExperimentalUnsignedTypes + public val csrcs: UIntArray, + public val headerExtension: HeaderExtension?, + public val decryptedAudio: ByteArray, +) { + @OptIn(ExperimentalUnsignedTypes::class) + public constructor( + sequenceNumber: UShort, + timestamp: UInt, + ssrc: UInt, + headerExtension: HeaderExtension?, + decryptedAudio: ByteArray, + ) : this(sequenceNumber, timestamp, ssrc, csrcs = EMPTY_UINT_ARRAY, headerExtension, decryptedAudio) + + public class HeaderExtension @ExperimentalUnsignedTypes constructor( + public val definedByProfile: UShort, + @property:ExperimentalUnsignedTypes + public val headerExtension: UIntArray, + ) + + internal companion object { + @ExperimentalUnsignedTypes + internal val EMPTY_UINT_ARRAY = UIntArray(size = 0) + } +} diff --git a/voice/src/main/kotlin/udp/DefaultAudioFrameSender.kt b/voice/src/main/kotlin/udp/DefaultAudioFrameSender.kt index 3edeb832c77..ea9b9e709e2 100644 --- a/voice/src/main/kotlin/udp/DefaultAudioFrameSender.kt +++ b/voice/src/main/kotlin/udp/DefaultAudioFrameSender.kt @@ -1,10 +1,8 @@ package dev.kord.voice.udp import dev.kord.common.annotation.KordVoice -import dev.kord.voice.AudioFrame -import dev.kord.voice.AudioProvider -import dev.kord.voice.FrameInterceptor -import dev.kord.voice.encryption.strategies.NonceStrategy +import dev.kord.voice.* +import dev.kord.voice.encryption.strategies.* import io.github.oshai.kotlinlogging.KotlinLogging import io.ktor.network.sockets.* import io.ktor.utils.io.core.* @@ -17,12 +15,110 @@ import kotlin.random.Random private val audioFrameSenderLogger = KotlinLogging.logger { } @KordVoice -public data class DefaultAudioFrameSenderData( - val udp: VoiceUdpSocket, - val interceptor: FrameInterceptor, - val provider: AudioProvider, - val nonceStrategy: NonceStrategy, -) +public class DefaultAudioFrameSenderData private constructor(private val wrapper: Wrapper) { + private data class Wrapper( + val udp: VoiceUdpSocket, + val interceptor: FrameInterceptor, + val provider: AudioProvider, + val nonceStrategy: @Suppress("DEPRECATION") NonceStrategy?, + ) + + internal val strategy get() = wrapper.nonceStrategy + + public constructor(udp: VoiceUdpSocket, interceptor: FrameInterceptor, provider: AudioProvider) : + this(Wrapper(udp = udp, interceptor = interceptor, provider = provider, nonceStrategy = null)) + + public val udp: VoiceUdpSocket get() = wrapper.udp + public val interceptor: FrameInterceptor get() = wrapper.interceptor + public val provider: AudioProvider get() = wrapper.provider + public operator fun component1(): VoiceUdpSocket = wrapper.udp + public operator fun component2(): FrameInterceptor = wrapper.interceptor + public operator fun component3(): AudioProvider = wrapper.provider + override fun equals(other: Any?): Boolean = other is DefaultAudioFrameSenderData && this.wrapper == other.wrapper + override fun hashCode(): Int = wrapper.hashCode() + override fun toString(): String = when (val n = wrapper.nonceStrategy) { + null -> "DefaultAudioFrameSenderData(udp=${wrapper.udp}, interceptor=${wrapper.interceptor}, " + + "provider=${wrapper.provider})" + else -> "DefaultAudioFrameSenderData(udp=${wrapper.udp}, interceptor=${wrapper.interceptor}, " + + "provider=${wrapper.provider}, nonceStrategy=$n)" + } + + public fun copy( + udp: VoiceUdpSocket = wrapper.udp, interceptor: FrameInterceptor = wrapper.interceptor, + provider: AudioProvider = wrapper.provider, + ): DefaultAudioFrameSenderData = DefaultAudioFrameSenderData( + Wrapper(udp = udp, interceptor = interceptor, provider = provider, nonceStrategy = wrapper.nonceStrategy) + ) + + internal constructor( + udpSocket: VoiceUdpSocket, frameInterceptor: FrameInterceptor, + strategy: @Suppress("DEPRECATION") NonceStrategy?, audioProvider: AudioProvider, + ) : this( + Wrapper(udp = udpSocket, interceptor = frameInterceptor, provider = audioProvider, nonceStrategy = strategy) + ) + + @Deprecated( + "The 'nonceStrategy' property is only used for XSalsa20 Poly1305 encryption. Construct a " + + "'DefaultAudioFrameSenderData' instance without a 'nonceStrategy' instead. " + + XSalsa20_CONSTRUCTOR_DEPRECATION, + ReplaceWith( + "DefaultAudioFrameSenderData(udp, interceptor, provider)", + imports = ["dev.kord.voice.udp.DefaultAudioFrameSenderData"], + ), + DeprecationLevel.WARNING, + ) + public constructor( + udp: VoiceUdpSocket, interceptor: FrameInterceptor, provider: AudioProvider, + nonceStrategy: @Suppress("DEPRECATION") NonceStrategy, + ) : this(Wrapper(udp = udp, interceptor = interceptor, provider = provider, nonceStrategy = nonceStrategy)) + + @Deprecated( + "The 'nonceStrategy' property is only used for XSalsa20 Poly1305 encryption. A 'DefaultAudioFrameSenderData' " + + "instance can be created without a 'nonceStrategy' in which case this property throws an " + + "'UnsupportedOperationException'. $XSalsa20_PROPERTY_DEPRECATION", + level = DeprecationLevel.WARNING, + ) + public val nonceStrategy: @Suppress("DEPRECATION") NonceStrategy + get() = wrapper.nonceStrategy ?: throw UnsupportedOperationException( + "This DefaultAudioFrameSenderData instance was created without a nonceStrategy." + ) + + @Deprecated( + "The 'nonceStrategy' property is only used for XSalsa20 Poly1305 encryption. A 'DefaultAudioFrameSenderData' " + + "instance can be created without a 'nonceStrategy' in which case this function throws an " + + "'UnsupportedOperationException'. $XSalsa20_FUNCTION_DEPRECATION", + level = DeprecationLevel.WARNING, + ) + public operator fun component4(): @Suppress("DEPRECATION") NonceStrategy = + wrapper.nonceStrategy ?: throw UnsupportedOperationException( + "This DefaultAudioFrameSenderData instance was created without a nonceStrategy." + ) + + @Deprecated( + "The 'nonceStrategy' property is only used for XSalsa20 Poly1305 encryption. Create a copy of this " + + "'DefaultAudioFrameSenderData' instance without a 'nonceStrategy' instead. $XSalsa20_FUNCTION_DEPRECATION", + ReplaceWith("this.copy(udp = udp, interceptor = interceptor, provider = provider)"), + DeprecationLevel.WARNING, + ) + public fun copy( + udp: VoiceUdpSocket = wrapper.udp, interceptor: FrameInterceptor = wrapper.interceptor, + provider: AudioProvider = wrapper.provider, + nonceStrategy: @Suppress("DEPRECATION") NonceStrategy = NONCE_STRATEGY_SENTINEL, + ): DefaultAudioFrameSenderData = when { + // nonceStrategy was not overridden, keep the old one (which might be null) + nonceStrategy === NONCE_STRATEGY_SENTINEL -> DefaultAudioFrameSenderData( + Wrapper(udp = udp, interceptor = interceptor, provider = provider, nonceStrategy = wrapper.nonceStrategy) + ) + else -> DefaultAudioFrameSenderData( + Wrapper(udp = udp, interceptor = interceptor, provider = provider, nonceStrategy = nonceStrategy) + ) + } + + private companion object { + @Suppress("DEPRECATION") // used as a sentinel value by comparing the identity with === + private val NONCE_STRATEGY_SENTINEL: NonceStrategy = SuffixNonceStrategy() + } +} @KordVoice public class DefaultAudioFrameSender( @@ -31,7 +127,7 @@ public class DefaultAudioFrameSender( override suspend fun start(configuration: AudioFrameSenderConfiguration): Unit = coroutineScope { var sequence: UShort = Random.nextBits(UShort.SIZE_BITS).toUShort() - val packetProvider = DefaultAudioPacketProvider(configuration.key, data.nonceStrategy) + val packetProvider = DefaultAudioPacketProvider(configuration.key, configuration.encryptionMode, data.strategy) val frames = Channel(Channel.RENDEZVOUS) with(data.provider) { launch { provideFrames(frames) } } diff --git a/voice/src/main/kotlin/udp/RTPPacket.kt b/voice/src/main/kotlin/udp/RTPPacket.kt index 56a89078e20..2eba4e56562 100644 --- a/voice/src/main/kotlin/udp/RTPPacket.kt +++ b/voice/src/main/kotlin/udp/RTPPacket.kt @@ -10,6 +10,9 @@ import kotlinx.io.readUInt import kotlinx.io.readUShort import kotlin.experimental.and +// this size doesn't contain the optional CSRCs and header extension +// https://discord.com/developers/docs/topics/voice-connections#transport-encryption-modes-voice-packet-structure +// TODO rename to something with min size internal const val RTP_HEADER_LENGTH = 12 /**