Implement FFmpeg converter and enable mp3 conversion for /sonos

This commit is contained in:
2020-06-19 20:51:53 +02:00
parent 1ce1ae95fe
commit 2fd0da2362
17 changed files with 183 additions and 45 deletions

View File

@@ -41,21 +41,22 @@ old PC-speakers you are able to connect your device to them via line port and
get a complete speech-based notification system.
## The `/wave` endpoint
## The `/{wave,acc,mp3,m4a,wma,flac}` endpoint
```
POST /wave
POST /{wave,acc,mp3,m4a,wma,flac}
{
"text": "The text to be spoken",
"language": "en_US"
}
```
*Returns:* `200 OK` with wave file (`Content-Type: audio/x-wav`)
*Returns:* `200 OK` with proper audio file
The `/wave` endpoint enables you to download a wav file containing speech of the
This endpoint enables you to download an audio file file containing speech of the
provided text. The goal of this endpoint is to provide interface allowing you establishment
of the connection between the TTS Server and some other kind of already running TTS system,
which can invoke the HTTP request to your Android device and do something with returned
wav file.
audio file. Note that all files but wav originates right from the wav file through FFmpeg
converter.
## The `/sonos` endpoint
```

View File

@@ -37,6 +37,7 @@ dependencies {
implementation 'com.bartlomiejpluta:sonos-controller:v.0.2'
implementation 'com.google.dagger:dagger-android:2.15'
implementation 'com.google.dagger:dagger-android-support:2.15'
implementation 'com.github.adrielcafe:AndroidAudioConverter:0.0.8'
kapt 'com.google.dagger:dagger-android-processor:2.15'
kapt 'com.google.dagger:dagger-compiler:2.15'
testImplementation 'junit:junit:4.12'

View File

@@ -58,21 +58,22 @@ POST /say
get a complete speech-based notification system.
</p>
<h2>The <code>/wave</code> endpoint</h2>
<h2>The <code>/{wave,acc,mp3,m4a,wma,flac}</code> endpoint</h2>
<pre>
POST /wave
POST /{wave,acc,mp3,m4a,wma,flac}
{
"text": "The text to be spoken",
"language": "en_US"
}
</pre>
<p><b>Returns:</b> <code>200 OK</code> with wave file (<code>Content-Type: audio/x-wav</code>)</p>
<p><b>Returns:</b> <code>200 OK</code> with proper audio file</p>
<p>
The <code>/wave</code> endpoint enables you to download a wav file containing speech of the
This endpoint enables you to download an audio file file containing speech of the
provided text. The goal of this endpoint is to provide interface allowing you establishment
of the connection between the TTS Server and some other kind of already running TTS system,
which can invoke the HTTP request to your Android device and do something with returned
wav file.
audio file. Note that all files but wav originates right from the wav file through FFmpeg
converter.
</p>
<h2>The <code>/sonos</code> endpoint</h2>

View File

@@ -1,6 +1,7 @@
package com.bartlomiejpluta.ttsserver.core.sonos.worker
import android.content.SharedPreferences
import cafe.adriel.androidaudioconverter.model.AudioFormat
import com.bartlomiejpluta.ttsserver.core.tts.engine.TTSEngine
import com.bartlomiejpluta.ttsserver.core.web.dto.SonosDTO
import com.bartlomiejpluta.ttsserver.core.web.endpoint.Endpoint
@@ -40,7 +41,7 @@ class SonosWorker(
}
private fun prepareTTSFile(data: SonosDTO): String {
val filename = tts.createTTSFile(data.text, data.language).name
val filename = tts.createTTSFile(data.text, data.language, AudioFormat.MP3).name
return "$announcementUrl$filename"
}

View File

@@ -6,12 +6,14 @@ import android.media.AudioAttributes
import android.media.MediaPlayer
import android.net.Uri
import android.speech.tts.TextToSpeech
import cafe.adriel.androidaudioconverter.model.AudioFormat
import com.bartlomiejpluta.ttsserver.core.tts.exception.TTSException
import com.bartlomiejpluta.ttsserver.core.tts.listener.GongListener
import com.bartlomiejpluta.ttsserver.core.tts.listener.TTSProcessListener
import com.bartlomiejpluta.ttsserver.core.tts.model.TTSStream
import com.bartlomiejpluta.ttsserver.core.tts.status.TTSStatus
import com.bartlomiejpluta.ttsserver.core.tts.status.TTSStatusHolder
import com.bartlomiejpluta.ttsserver.core.util.AudioConverter
import com.bartlomiejpluta.ttsserver.ui.preference.PreferenceKey
import java.io.BufferedInputStream
import java.io.File
@@ -23,38 +25,50 @@ class TTSEngine(
private val context: Context,
private val tts: TextToSpeech,
private val ttsStatusHolder: TTSStatusHolder,
private val preferences: SharedPreferences
private val preferences: SharedPreferences,
private val audioConverter: AudioConverter
) {
private val messageDigest = MessageDigest.getInstance("SHA-256")
val status: TTSStatus
get() = ttsStatusHolder.status
fun createTTSFile(text: String, language: Locale): File {
fun createTTSFile(text: String, language: Locale, audioFormat: AudioFormat = AudioFormat.WAV): File {
val digest = hash(text, language)
val filename = "tts_$digest.wav"
val file = File(context.cacheDir, filename)
val targetFilename = "tts_$digest.${audioFormat.format}"
val wavFilename = "tts_$digest.wav"
val wavFile = File(context.cacheDir, wavFilename)
val targetFile = File(context.cacheDir, targetFilename)
file.takeIf { it.exists() }?.let { return it }
targetFile.takeIf { it.exists() }?.let { return it }
val uuid = UUID.randomUUID().toString()
val listener = TTSProcessListener(uuid)
tts.setOnUtteranceProgressListener(listener)
tts.language = language
tts.synthesizeToFile(text, null, file, uuid)
tts.synthesizeToFile(text, null, wavFile, uuid)
listener.await()
return convertFile(wavFile, audioFormat)
}
private fun convertFile(file: File, audioFormat: AudioFormat): File {
if (audioFormat == AudioFormat.WAV) {
return file
}
return audioConverter.convert(file, audioFormat).also { file.delete() }
}
private fun hash(text: String, language: Locale): String {
val bytes = "$text$language".toByteArray()
val digest = messageDigest.digest(bytes)
return digest.fold("", { str, it -> str + "%02x".format(it) })
}
fun fetchTTSStream(text: String, language: Locale): TTSStream {
fun fetchTTSStream(text: String, language: Locale, audioFormat: AudioFormat = AudioFormat.WAV): TTSStream {
val file = createTempFile("tmp_tts_server", ".wav")
val uuid = UUID.randomUUID().toString()
@@ -65,8 +79,9 @@ class TTSEngine(
tts.synthesizeToFile(text, null, file, uuid)
listener.await()
val stream = BufferedInputStream(FileInputStream(file))
val length = file.length()
val converted = convertFile(file, audioFormat)
val stream = BufferedInputStream(FileInputStream(converted))
val length = converted.length()
file.delete()

View File

@@ -0,0 +1,6 @@
package com.bartlomiejpluta.ttsserver.core.tts.exception
class AudioConversionException : Exception {
constructor(message: String) : super(message)
constructor(message: String, cause: Throwable) : super(message, cause)
}

View File

@@ -0,0 +1,20 @@
package com.bartlomiejpluta.ttsserver.core.tts.listener
import cafe.adriel.androidaudioconverter.callback.IConvertCallback
import com.bartlomiejpluta.ttsserver.core.tts.exception.AudioConversionException
import java.io.File
import java.util.concurrent.LinkedBlockingQueue
class ConverterListener : IConvertCallback {
private val queue = LinkedBlockingQueue<File>()
fun await() = queue.take()
override fun onSuccess(convertedFile: File?) {
queue.add(convertedFile)
}
override fun onFailure(error: Exception?) {
error?.let { throw AudioConversionException("Conversion failed", error) }
}
}

View File

@@ -0,0 +1,47 @@
package com.bartlomiejpluta.ttsserver.core.util
import android.content.Context
import cafe.adriel.androidaudioconverter.AndroidAudioConverter
import cafe.adriel.androidaudioconverter.callback.ILoadCallback
import cafe.adriel.androidaudioconverter.model.AudioFormat
import com.bartlomiejpluta.ttsserver.core.tts.exception.AudioConversionException
import com.bartlomiejpluta.ttsserver.core.tts.listener.ConverterListener
import java.io.File
class AudioConverter(private val context: Context) {
var state: State = State.UNLOADED
private set
enum class State {
READY,
ERROR,
UNLOADED
}
init {
AndroidAudioConverter.load(context, object : ILoadCallback {
override fun onSuccess() {
state = State.READY
}
override fun onFailure(error: Exception?) {
state = State.ERROR
}
})
}
fun convert(file: File, format: AudioFormat): File {
if(state != State.READY) {
throw AudioConversionException("Converter is not ready")
}
val listener = ConverterListener()
AndroidAudioConverter.with(context)
.setFile(file)
.setFormat(format)
.setCallback(listener)
.convert()
return listener.await()
}
}

View File

@@ -4,14 +4,19 @@ enum class Endpoint(val uri: String, val id: Int) {
UNKNOWN("/", 1),
SAY("/say", 2),
WAVE("/wave", 3),
SONOS("/sonos", 4),
SONOS_CACHE("/sonos/*", 5),
GONG("/gong.wav", 6);
AAC("/aac", 4),
MP3("/mp3", 5),
M4A("/m4a", 6),
WMA("/wma", 7),
FLAC("/flac", 8),
SONOS("/sonos", 9),
SONOS_CACHE("/sonos/*", 10),
GONG("/gong.wav", 11);
val trimmedUri: String
get() = uri.replace("*", "")
companion object {
fun of(id: Int) = values().firstOrNull { it.id == id } ?: UNKNOWN
fun of(ordinal: Int) = values().firstOrNull { it.ordinal == ordinal } ?: UNKNOWN
}
}

View File

@@ -8,7 +8,7 @@ object EndpointMatcher {
init {
Endpoint.values().forEach {
uriMatcher.addURI("", it.uri, it.id)
uriMatcher.addURI("", it.uri, it.ordinal)
}
}

View File

@@ -0,0 +1,28 @@
package com.bartlomiejpluta.ttsserver.core.web.mime
import cafe.adriel.androidaudioconverter.model.AudioFormat
import java.io.File
import java.util.*
enum class MimeType(val mimeType: String) {
AAC("audio/aac"),
MP3("audio/mpeg"),
M4A("audio/m4a"),
WMA("audio/x-ms-wma"),
WAV("audio/x-wav"),
FLAC("audio/x-wav"),
JSON("application/json");
companion object {
fun forAudioFormat(audioFormat: AudioFormat) = when(audioFormat) {
AudioFormat.AAC -> AAC
AudioFormat.MP3 -> MP3
AudioFormat.M4A -> M4A
AudioFormat.WMA -> WMA
AudioFormat.WAV -> WAV
AudioFormat.FLAC -> FLAC
}
fun forFile(file: File) = valueOf(file.extension.toUpperCase(Locale.ROOT))
}
}

View File

@@ -5,6 +5,7 @@ import android.content.Intent
import android.content.SharedPreferences
import android.net.Uri
import androidx.localbroadcastmanager.content.LocalBroadcastManager
import cafe.adriel.androidaudioconverter.model.AudioFormat
import com.bartlomiejpluta.ttsserver.core.sonos.queue.SonosQueue
import com.bartlomiejpluta.ttsserver.core.tts.engine.TTSEngine
import com.bartlomiejpluta.ttsserver.core.tts.exception.TTSException
@@ -14,6 +15,7 @@ import com.bartlomiejpluta.ttsserver.core.web.dto.SonosDTO
import com.bartlomiejpluta.ttsserver.core.web.endpoint.Endpoint
import com.bartlomiejpluta.ttsserver.core.web.endpoint.EndpointMatcher
import com.bartlomiejpluta.ttsserver.core.web.exception.WebException
import com.bartlomiejpluta.ttsserver.core.web.mime.MimeType
import com.bartlomiejpluta.ttsserver.service.foreground.ForegroundService
import com.bartlomiejpluta.ttsserver.service.state.ServiceState
import com.bartlomiejpluta.ttsserver.ui.preference.PreferenceKey
@@ -57,7 +59,12 @@ class WebServer(
private fun dispatch(it: IHTTPSession): Response {
return when (EndpointMatcher.match(it.uri)) {
Endpoint.SAY -> say(it)
Endpoint.WAVE -> wave(it)
Endpoint.WAVE -> file(it, AudioFormat.WAV)
Endpoint.AAC -> file(it, AudioFormat.AAC)
Endpoint.MP3 -> file(it, AudioFormat.MP3)
Endpoint.M4A -> file(it, AudioFormat.M4A)
Endpoint.WMA -> file(it, AudioFormat.WMA)
Endpoint.FLAC -> file(it, AudioFormat.FLAC)
Endpoint.SONOS -> sonos(it)
Endpoint.SONOS_CACHE -> sonosCache(it)
Endpoint.GONG -> gong(it)
@@ -103,8 +110,8 @@ class WebServer(
}
}
private fun wave(session: IHTTPSession): Response {
if (!preferences.getBoolean(PreferenceKey.ENABLE_WAVE_ENDPOINT, true)) {
private fun file(session: IHTTPSession, audioFormat: AudioFormat): Response {
if (!preferences.getBoolean(PreferenceKey.ENABLE_FILE_ENDPOINTS, true)) {
throw WebException(NOT_FOUND)
}
@@ -118,8 +125,8 @@ class WebServer(
val dto = extractBody(session) { BaseDTO(it) }
val (stream, size) = tts.fetchTTSStream(dto.text, dto.language)
return newFixedLengthResponse(OK, MIME_WAVE, stream, size)
val (stream, size) = tts.fetchTTSStream(dto.text, dto.language, audioFormat)
return newFixedLengthResponse(OK, MimeType.forAudioFormat(audioFormat).mimeType, stream, size)
}
private fun sonos(session: IHTTPSession): Response {
@@ -160,7 +167,7 @@ class WebServer(
val stream = BufferedInputStream(FileInputStream(file))
val size = file.length()
return newFixedLengthResponse(OK, MIME_WAVE, stream, size)
return newFixedLengthResponse(OK, MimeType.forFile(file).mimeType, stream, size)
}
private fun gong(session: IHTTPSession): Response {
@@ -183,7 +190,7 @@ class WebServer(
context.contentResolver.openInputStream(uri) ?: throw TTSException()
)
return newFixedLengthResponse(OK, MIME_WAVE, stream, size)
return newFixedLengthResponse(OK, MimeType.WAV.mimeType, stream, size)
}
override fun start() {
@@ -208,7 +215,6 @@ class WebServer(
companion object {
private const val MIME_JSON = "application/json"
private const val MIME_WAVE = "audio/x-wav"
private const val CONTENT_TYPE = "content-type"
private val SUCCESS_RESPONSE = response("Request has been completed")
private val QUEUED_RESPONSE = response("Request has been queued")

View File

@@ -7,6 +7,7 @@ import androidx.preference.PreferenceManager
import com.bartlomiejpluta.ttsserver.core.sonos.queue.SonosQueue
import com.bartlomiejpluta.ttsserver.core.tts.engine.TTSEngine
import com.bartlomiejpluta.ttsserver.core.tts.status.TTSStatusHolder
import com.bartlomiejpluta.ttsserver.core.util.AudioConverter
import com.bartlomiejpluta.ttsserver.core.util.NetworkUtil
import com.bartlomiejpluta.ttsserver.core.web.server.WebServerFactory
import com.bartlomiejpluta.ttsserver.service.notification.ForegroundNotificationFactory
@@ -32,8 +33,9 @@ class TTSModule {
context: Context,
textToSpeech: TextToSpeech,
ttsStatusHolder: TTSStatusHolder,
preferences: SharedPreferences
) = TTSEngine(context, textToSpeech, ttsStatusHolder, preferences)
preferences: SharedPreferences,
converter: AudioConverter
) = TTSEngine(context, textToSpeech, ttsStatusHolder, preferences, converter)
@Provides
@Singleton
@@ -58,6 +60,10 @@ class TTSModule {
fun networkUtil(context: Context, preferences: SharedPreferences) =
NetworkUtil(context, preferences)
@Provides
@Singleton
fun adudioConverter(context: Context) = AudioConverter(context)
@Provides
@Singleton
fun sonosQueue(tts: TTSEngine, preferences: SharedPreferences, networkUtil: NetworkUtil) =

View File

@@ -5,7 +5,7 @@ object PreferenceKey {
const val PORT = "preference_port"
const val ENABLE_HTTP_DEBUG = "preference_http_debug"
const val ENABLE_SAY_ENDPOINT = "preference_enable_say_endpoint"
const val ENABLE_WAVE_ENDPOINT = "preference_enable_wave_endpoint"
const val ENABLE_FILE_ENDPOINTS = "preference_enable_file_endpoints"
const val ENABLE_SONOS_ENDPOINT = "preference_enable_sonos_endpoint"
const val ENABLE_GONG = "preference_enable_gong"
const val GONG = "preference_gong"

View File

@@ -12,6 +12,7 @@ import androidx.preference.Preference
import androidx.preference.PreferenceFragmentCompat
import androidx.preference.SwitchPreference
import com.bartlomiejpluta.R
import com.bartlomiejpluta.ttsserver.core.web.mime.MimeType
import com.bartlomiejpluta.ttsserver.service.foreground.ForegroundService
import com.bartlomiejpluta.ttsserver.service.state.ServiceState
@@ -19,7 +20,7 @@ import com.bartlomiejpluta.ttsserver.service.state.ServiceState
class PreferencesFragment : PreferenceFragmentCompat() {
private lateinit var portPreference: IntEditTextPreference
private lateinit var sayEndpointPreference: SwitchPreference
private lateinit var waveEndpointPreference: SwitchPreference
private lateinit var fileEndpointPreference: SwitchPreference
private lateinit var sonosEndpointPreference: SwitchPreference
private lateinit var httpDebugPreference: SwitchPreference
private lateinit var enableGongPreference: SwitchPreference
@@ -61,7 +62,7 @@ class PreferencesFragment : PreferenceFragmentCompat() {
portPreference.setOnBindEditTextListener { it.inputType = InputType.TYPE_CLASS_NUMBER }
httpDebugPreference = findPreference(PreferenceKey.ENABLE_HTTP_DEBUG)!!
sayEndpointPreference = findPreference(PreferenceKey.ENABLE_SAY_ENDPOINT)!!
waveEndpointPreference = findPreference(PreferenceKey.ENABLE_WAVE_ENDPOINT)!!
fileEndpointPreference = findPreference(PreferenceKey.ENABLE_FILE_ENDPOINTS)!!
sonosEndpointPreference = findPreference(PreferenceKey.ENABLE_SONOS_ENDPOINT)!!
enableGongPreference = findPreference(PreferenceKey.ENABLE_GONG)!!
enableGongPreference.setOnPreferenceClickListener { preference ->
@@ -90,7 +91,7 @@ class PreferencesFragment : PreferenceFragmentCompat() {
private fun openFilePicker(preference: Preference?) {
if ((preference as SwitchPreference).isChecked) {
val intent = Intent(Intent.ACTION_OPEN_DOCUMENT)
.apply { type = "audio/x-wav" }
.apply { type = MimeType.WAV.mimeType }
.let { Intent.createChooser(it, getString(R.string.preference_gong_picker_prompt)) }
startActivityForResult(intent, PICKFILE_RESULT_CODE)

View File

@@ -19,8 +19,8 @@
<string name="preference_port_title">Server port</string>
<string name="preference_enable_say_endpoint_summary">Allow HTTP clients to use /say endpoint which enables them to say message using builtin speakers or external ones connected to the device</string>
<string name="preference_enable_say_endpoint_title">Enable /say endpoint</string>
<string name="preference_enable_wave_endpoint_summary">Allow HTTP clients to use /wave endpoint which enables them to retrieve TTS message as wave file</string>
<string name="preference_enable_wave_endpoint_title">Enable /wave endpoint</string>
<string name="preference_enable_file_endpoints_summary">Allow HTTP clients to use file endpoints (/wav, /mp3, /flac etc.) which enables them to retrieve TTS message as audio file</string>
<string name="preference_enable_file_endpoints_title">Enable file endpoints</string>
<string name="preference_enable_sonos_endpoint_summary">Allow HTTP clients to use /sonos endpoint which enables them to send TTS messages directly to Sonos devices</string>
<string name="preference_enable_sonos_endpoint_title">Enable /sonos endpoint</string>
<string name="preference_http_debug_summary">Attach the stacktrace to each error HTTP response if available</string>

View File

@@ -27,9 +27,9 @@
<SwitchPreference
android:defaultValue="true"
android:key="preference_enable_wave_endpoint"
android:summary="@string/preference_enable_wave_endpoint_summary"
android:title="@string/preference_enable_wave_endpoint_title"
android:key="preference_enable_file_endpoints"
android:summary="@string/preference_enable_file_endpoints_summary"
android:title="@string/preference_enable_file_endpoints_title"
app:iconSpaceReserved="false" />
<SwitchPreference