diff --git a/app/recordings/app_languages.php b/app/recordings/app_languages.php index f131227f63..11148db5d6 100644 --- a/app/recordings/app_languages.php +++ b/app/recordings/app_languages.php @@ -494,6 +494,58 @@ $text['header']['zh-cn'] = "上传录音"; $text['header']['ja-jp'] = "録音をアップロードする"; $text['header']['ko-kr'] = "녹음 업로드"; +$text['label-model']['en-us'] = "Model"; +$text['label-model']['en-gb'] = "Model"; +$text['label-model']['ar-eg'] = ""; +$text['label-model']['de-at'] = ""; +$text['label-model']['de-ch'] = ""; +$text['label-model']['de-de'] = ""; +$text['label-model']['el-gr'] = ""; +$text['label-model']['es-cl'] = ""; +$text['label-model']['es-mx'] = ""; +$text['label-model']['fr-ca'] = ""; +$text['label-model']['fr-fr'] = ""; +$text['label-model']['he-il'] = ""; +$text['label-model']['it-it'] = ""; +$text['label-model']['nl-nl'] = ""; +$text['label-model']['pl-pl'] = ""; +$text['label-model']['pt-br'] = ""; +$text['label-model']['pt-pt'] = ""; +$text['label-model']['ro-ro'] = ""; +$text['label-model']['ru-ru'] = ""; +$text['label-model']['sv-se'] = ""; +$text['label-model']['uk-ua'] = ""; +$text['label-model']['tr-tr'] = ""; +$text['label-model']['zh-cn'] = ""; +$text['label-model']['ja-jp'] = ""; +$text['label-model']['ko-kr'] = ""; + +$text['description-model']['en-us'] = "Model the engine will use."; +$text['description-model']['en-gb'] = "Model the engine will use."; +$text['description-model']['ar-eg'] = ""; +$text['description-model']['de-at'] = ""; +$text['description-model']['de-ch'] = ""; +$text['description-model']['de-de'] = ""; +$text['description-model']['el-gr'] = ""; +$text['description-model']['es-cl'] = ""; +$text['description-model']['es-mx'] = ""; +$text['description-model']['fr-ca'] = ""; +$text['description-model']['fr-fr'] = ""; +$text['description-model']['he-il'] = ""; +$text['description-model']['it-it'] = ""; +$text['description-model']['nl-nl'] = ""; +$text['description-model']['pl-pl'] = ""; +$text['description-model']['pt-br'] = ""; +$text['description-model']['pt-pt'] = ""; +$text['description-model']['ro-ro'] = ""; +$text['description-model']['ru-ru'] = ""; +$text['description-model']['sv-se'] = ""; +$text['description-model']['uk-ua'] = ""; +$text['description-model']['tr-tr'] = ""; +$text['description-model']['zh-cn'] = ""; +$text['description-model']['ja-jp'] = ""; +$text['description-model']['ko-kr'] = ""; + $text['label-voice']['en-us'] = "Voice"; $text['label-voice']['en-gb'] = "Voice"; $text['label-voice']['ar-eg'] = ""; diff --git a/app/recordings/recording_edit.php b/app/recordings/recording_edit.php index 2bf45f1649..79820cf478 100644 --- a/app/recordings/recording_edit.php +++ b/app/recordings/recording_edit.php @@ -59,6 +59,7 @@ if (($speech_enabled == 'true' && !empty($speech_engine)) || ($transcribe_enabled == 'true' && !empty($transcribe_engine))) { $ai = new ai($settings); $voices = $ai->get_voices(); + $models = $ai->get_models(); $translate_enabled = false; $language_enabled = false; //$translate_enabled = $ai->get_translate_enabled(); @@ -77,6 +78,7 @@ $recording_filename_original = $_POST["recording_filename_original"]; $recording_name = $_POST["recording_name"]; $recording_voice = $_POST["recording_voice"]; + $recording_model = $_POST["recording_model"]; $recording_language = $_POST["recording_language"]; //$translate = $_POST["translate"]; $recording_message = $_POST["recording_message"]; @@ -152,6 +154,11 @@ $recording_voice = 'alloy'; } + //set the default value + if (empty($recording_model)) { + $recording_model = $settings->get('ai', 'speech_model', ''); + } + //set the recording format if (empty($recording_format)) { $recording_format = 'wav'; @@ -183,6 +190,7 @@ $ai->audio_path = $recording_path; $ai->audio_filename = $recording_filename; $ai->audio_format = $recording_format; + $ai->audio_model = $recording_model ?? ''; $ai->audio_voice = $recording_voice; //$ai->audio_language = $recording_language; //$ai->audio_translate = $translate; @@ -299,6 +307,28 @@ } if ($speech_enabled == 'true' || $transcribe_enabled == 'true') { + //models + if (!empty($models)) { + echo "\n"; + echo "\n"; + echo " ".$text['label-model']."\n"; + echo "\n"; + echo "\n"; + echo " \n"; + } + else { + echo " \n"; + } + echo "
\n"; + echo $text['description-model']."\n"; + echo "\n"; + echo "\n"; + //voices echo "\n"; echo "\n"; echo " ".$text['label-voice']."\n"; @@ -316,8 +346,12 @@ if (!empty($voices)) { echo " \n"; } diff --git a/app/recordings/recordings.php b/app/recordings/recordings.php index 98c6c85111..f9396b8455 100644 --- a/app/recordings/recordings.php +++ b/app/recordings/recordings.php @@ -384,6 +384,9 @@ } } +//set the default value for speech + $speech_enabled = !empty($_SESSION['ai']['speech_enabled']['boolean']) && !empty($_SESSION['ai']['speech_engine']['text']); + //create token $object = new token; $token = $object->create($_SERVER['PHP_SELF']); @@ -502,7 +505,7 @@ foreach ($recordings as $row) { //playback progress bar if (permission_exists('recording_play')) { - echo "\n"; + echo "\n"; echo "\n"; // dummy row to maintain alternating background color } if (permission_exists('recording_edit')) { diff --git a/core/ai/resources/classes/ai.php b/core/ai/resources/classes/ai.php index 2211727fc8..649ffe6d9e 100644 --- a/core/ai/resources/classes/ai.php +++ b/core/ai/resources/classes/ai.php @@ -27,6 +27,7 @@ if (!class_exists('ai')) { public $audio_path; public $audio_filename; public $audio_format; + public $audio_model; public $audio_voice; public $audio_language; public $audio_message; @@ -65,6 +66,21 @@ if (!class_exists('ai')) { return $object->get_voices(); } + /** + * get_voices - get the list voices + */ + public function get_models() : array { + + //set the class interface to use the _template suffix + $classname = 'ai_'.$this->speech_engine; + + //create the object + $object = new $classname($this->settings); + + //return the voices array + return $object->get_models(); + } + /** * is_translate_enabled - get whether the engine can do translations */ @@ -127,6 +143,7 @@ if (!class_exists('ai')) { $object->set_filename($this->audio_filename); $object->set_format($this->audio_format); $object->set_voice($this->audio_voice); + $object->set_model($this->audio_model); //$object->set_language($this->audio_language); //$object->set_translate($this->audio_translate); $object->set_message($this->audio_message); diff --git a/core/ai/resources/classes/ai_elevenlabs.php b/core/ai/resources/classes/ai_elevenlabs.php new file mode 100644 index 0000000000..44aa57168e --- /dev/null +++ b/core/ai/resources/classes/ai_elevenlabs.php @@ -0,0 +1,183 @@ + + Portions created by the Initial Developer are Copyright (C) 2008-2018 + the Initial Developer. All Rights Reserved. + + Contributor(s): + Mark J Crane + Tim Fry + */ + +/** + * ai_elevenlabs class + * + */ +class ai_elevenlabs implements ai_speech { + + private $voice; + private $path; + private $message; + private $format; + private $filename; + private $languages; + private $transcribe_key; + private $speech_key; + private $model; + + public function __construct($settings) { + $this->voice = ""; + $this->path = ""; + $this->message = ""; + $this->format = ""; + $this->filename = ""; + //build the setting object and get the recording path + $this->transcribe_key = $settings->get('ai', 'transcribe_key'); + $this->speech_key = $settings->get('ai', 'speech_key'); + } + + public function set_filename(string $audio_filename) { + $this->filename = $audio_filename; + } + + public function set_format(string $audio_format) { + $this->format = $audio_format; + } + + public function set_message(string $audio_message) { + $this->message = $audio_message; + } + + public function set_path(string $audio_path) { + $this->path = $audio_path; + } + + public function set_voice(string $audio_voice) { + $this->voice = $audio_voice; + } + + public function speech(): bool { + $model_id = $this->model; + $ch = curl_init('https://api.elevenlabs.io/v1/text-to-speech/' . $this->voice); + $headers = [ + 'Content-Type: application/json', + "xi-api-key: $this->speech_key", + ]; + $encoded_message = json_encode([ + 'model_id' => $model_id, + 'text' => $this->message, + 'voice_settings' => [ + 'similarity_boost' => 1, + 'stability' => 1, + ], + ]); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, $encoded_message); + $response = curl_exec($ch); + $error = curl_error($ch); + $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); +// $curl = new curl('https://api.elevenlabs.io/v1/text-to-speech/' . $this->voice); +// $response = $curl->set_headers($headers)->post($encoded_message); +// $error = $curl->get_error(); +// $http_code = $curl->get_http_code(); +// if ($curl->get_http_code() == 200) { + if ($http_code == 200) { + file_put_contents($this->path . '/' . $this->filename, $response); + return true; + } + return false; + } + + public function is_language_enabled(): bool { + return false; + } + + public function get_languages(): array { + return ['english' => 'English']; + } + + public function get_voices(): array { + $return_value = []; + $url = 'https://api.elevenlabs.io/v1/voices'; + $headers = [ + 'Content-Type: application/json', + "xi-api-key: $this->speech_key", + ]; + $curl = curl_init(); + curl_setopt_array($curl, [ + CURLOPT_URL => $url, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_ENCODING => "", + CURLOPT_MAXREDIRS => 10, + CURLOPT_TIMEOUT => 30, + CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, + CURLOPT_CUSTOMREQUEST => "GET", + ]); + + $response = curl_exec($curl); + $error = curl_error($curl); + + curl_close($curl); + if (!empty($response)) { + $json_array = json_decode($response, true); + foreach($json_array['voices'] as $row) { + $voice_id = $row['voice_id']; + $name = $row['name']; + $gender = $row['labels']['gender'] ?? ''; + $accent = $row['labels']['accent'] ?? ''; + $use_case = $row['labels']['use case'] ?? ''; + $recommended_model = $row['high_quality_base_model_ids'][0] ?? ''; + $return_value[$voice_id] = "$name ($gender, $accent"; + if (!empty($use_case)) { + $return_value[$voice_id] .= ", " . $use_case; + } + $return_value[$voice_id] .= ")"; + if (!empty($recommended_model)) { + $return_value[$voice_id] .= " - $recommended_model"; + } + } + } + return $return_value; + } + + public function set_language(string $audio_language) { + $this->languages = $audio_language; + } + + public function set_model(string $model): void { + if (array_key_exists($model, $this->get_models())) { + $this->model = $model; + } else { + throw new \Exception('Model does not exist'); + } + } + + public function get_models(): array { + return [ + 'eleven_turbo_v1' => 'Eleven Turbo v1', + 'eleven_turbo_v2' => 'Eleven Turbo v2', + 'eleven_multilingual_v1' => 'Eleven Multilingual v1', + 'eleven_multilingual_v2' => 'Eleven Multilingual v2', + ]; + } +} diff --git a/core/ai/resources/interfaces/ai_speech.php b/core/ai/resources/interfaces/ai_speech.php index 311b8743f2..33fcbb5a18 100644 --- a/core/ai/resources/interfaces/ai_speech.php +++ b/core/ai/resources/interfaces/ai_speech.php @@ -3,15 +3,17 @@ //define the template class if (!interface_exists('ai_speech')) { interface ai_speech { - public function set_path(string $audio_path); - public function set_filename(string $audio_filename); - public function set_format(string $audio_format); - public function set_voice(string $audio_voice); - public function set_message(string $audio_message); + public function get_languages() : array; + public function get_models(): array; public function get_voices() : array; public function is_language_enabled() : bool; + public function set_filename(string $audio_filename); + public function set_format(string $audio_format); public function set_language(string $audio_language); - public function get_languages() : array; + public function set_message(string $audio_message); + public function set_model(string $audio_model): void; + public function set_path(string $audio_path); + public function set_voice(string $audio_voice); public function speech() : bool; } }