Ai elevenlabs io engine (#6932)

* allow settings to load domain and user settings

* ensure the engine is also enabled before attempting to create a new object

* add default values to stop undefined variable warnings

* enforce all models to set and return voices and languages

* enforce all models to set and return languages

* ensure speech_enabled is set by the current session

* create elevenlabs implementation of ai speech

* Update ai_elevenlabs.php
This commit is contained in:
frytimo
2024-03-30 17:16:16 -03:00
committed by GitHub
parent 883c32d932
commit 502ba07c53
6 changed files with 300 additions and 9 deletions

View File

@@ -494,6 +494,58 @@ $text['header']['zh-cn'] = "上传录音";
$text['header']['ja-jp'] = "録音をアップロードする";
$text['header']['ko-kr'] = "녹음 업로드";
$text['label-model']['en-us'] = "Model";
$text['label-model']['en-gb'] = "Model";
$text['label-model']['ar-eg'] = "";
$text['label-model']['de-at'] = "";
$text['label-model']['de-ch'] = "";
$text['label-model']['de-de'] = "";
$text['label-model']['el-gr'] = "";
$text['label-model']['es-cl'] = "";
$text['label-model']['es-mx'] = "";
$text['label-model']['fr-ca'] = "";
$text['label-model']['fr-fr'] = "";
$text['label-model']['he-il'] = "";
$text['label-model']['it-it'] = "";
$text['label-model']['nl-nl'] = "";
$text['label-model']['pl-pl'] = "";
$text['label-model']['pt-br'] = "";
$text['label-model']['pt-pt'] = "";
$text['label-model']['ro-ro'] = "";
$text['label-model']['ru-ru'] = "";
$text['label-model']['sv-se'] = "";
$text['label-model']['uk-ua'] = "";
$text['label-model']['tr-tr'] = "";
$text['label-model']['zh-cn'] = "";
$text['label-model']['ja-jp'] = "";
$text['label-model']['ko-kr'] = "";
$text['description-model']['en-us'] = "Model the engine will use.";
$text['description-model']['en-gb'] = "Model the engine will use.";
$text['description-model']['ar-eg'] = "";
$text['description-model']['de-at'] = "";
$text['description-model']['de-ch'] = "";
$text['description-model']['de-de'] = "";
$text['description-model']['el-gr'] = "";
$text['description-model']['es-cl'] = "";
$text['description-model']['es-mx'] = "";
$text['description-model']['fr-ca'] = "";
$text['description-model']['fr-fr'] = "";
$text['description-model']['he-il'] = "";
$text['description-model']['it-it'] = "";
$text['description-model']['nl-nl'] = "";
$text['description-model']['pl-pl'] = "";
$text['description-model']['pt-br'] = "";
$text['description-model']['pt-pt'] = "";
$text['description-model']['ro-ro'] = "";
$text['description-model']['ru-ru'] = "";
$text['description-model']['sv-se'] = "";
$text['description-model']['uk-ua'] = "";
$text['description-model']['tr-tr'] = "";
$text['description-model']['zh-cn'] = "";
$text['description-model']['ja-jp'] = "";
$text['description-model']['ko-kr'] = "";
$text['label-voice']['en-us'] = "Voice";
$text['label-voice']['en-gb'] = "Voice";
$text['label-voice']['ar-eg'] = "";

View File

@@ -59,6 +59,7 @@
if (($speech_enabled == 'true' && !empty($speech_engine)) || ($transcribe_enabled == 'true' && !empty($transcribe_engine))) {
$ai = new ai($settings);
$voices = $ai->get_voices();
$models = $ai->get_models();
$translate_enabled = false;
$language_enabled = false;
//$translate_enabled = $ai->get_translate_enabled();
@@ -77,6 +78,7 @@
$recording_filename_original = $_POST["recording_filename_original"];
$recording_name = $_POST["recording_name"];
$recording_voice = $_POST["recording_voice"];
$recording_model = $_POST["recording_model"];
$recording_language = $_POST["recording_language"];
//$translate = $_POST["translate"];
$recording_message = $_POST["recording_message"];
@@ -152,6 +154,11 @@
$recording_voice = 'alloy';
}
//set the default value
if (empty($recording_model)) {
$recording_model = $settings->get('ai', 'speech_model', '');
}
//set the recording format
if (empty($recording_format)) {
$recording_format = 'wav';
@@ -183,6 +190,7 @@
$ai->audio_path = $recording_path;
$ai->audio_filename = $recording_filename;
$ai->audio_format = $recording_format;
$ai->audio_model = $recording_model ?? '';
$ai->audio_voice = $recording_voice;
//$ai->audio_language = $recording_language;
//$ai->audio_translate = $translate;
@@ -299,6 +307,28 @@
}
if ($speech_enabled == 'true' || $transcribe_enabled == 'true') {
//models
if (!empty($models)) {
echo "<tr>\n";
echo "<td class='vncell' valign='top' align='left' nowrap>\n";
echo " ".$text['label-model']."\n";
echo "</td>\n";
echo "<td class='vtable' align='left'>\n";
echo " <select class='formfld' name='recording_model'>\n";
echo " <option value=''></option>\n";
foreach($models as $model_id => $model_name) {
echo " <option value='".escape($model_id)."' ".(($model_id == $recording_model) ? "selected='selected'" : '').">".escape($model_name)."</option>\n";
}
echo " </select>\n";
}
else {
echo " <input class='formfld' type='hidden' name='recording_model' maxlength='255' value=''>\n";
}
echo "<br />\n";
echo $text['description-model']."\n";
echo "</td>\n";
echo "</tr>\n";
//voices
echo "<tr>\n";
echo "<td class='vncell' valign='top' align='left' nowrap>\n";
echo " ".$text['label-voice']."\n";
@@ -316,8 +346,12 @@
if (!empty($voices)) {
echo " <select class='formfld' name='recording_voice'>\n";
echo " <option value=''></option>\n";
foreach($voices as $voice) {
echo " <option value='".escape($voice)."' ".(($voice == $recording_voice) ? "selected='selected'" : null).">".escape($voice)."</option>\n";
foreach($voices as $key => $voice) {
if (gettype($key) === "integer") {
echo " <option value='".escape($voice)."' ".(($voice == $recording_voice) ? "selected='selected'" : null).">".escape($voice)."</option>\n";
} else {
echo " <option value='".escape($key)."' ".(($voice == $recording_voice) ? "selected='selected'" : null).">".escape($voice)."</option>\n";
}
}
echo " </select>\n";
}

View File

@@ -384,6 +384,9 @@
}
}
//set the default value for speech
$speech_enabled = !empty($_SESSION['ai']['speech_enabled']['boolean']) && !empty($_SESSION['ai']['speech_engine']['text']);
//create token
$object = new token;
$token = $object->create($_SERVER['PHP_SELF']);
@@ -502,7 +505,7 @@
foreach ($recordings as $row) {
//playback progress bar
if (permission_exists('recording_play')) {
echo "<tr class='list-row' id='recording_progress_bar_".escape($row['recording_uuid'])."' onclick=\"recording_play('".escape($row['voicemail_greeting_uuid'])."')\" style='display: none;'><td id='playback_progress_bar_background_".escape($row['recording_uuid'])."' class='playback_progress_bar_background' style='padding: 0; border: none;' colspan='".$col_count."'><span class='playback_progress_bar' id='recording_progress_".escape($row['recording_uuid'])."'></span></td><td class='description hide-sm-dn' style='border-bottom: none !important;'></td></tr>\n";
echo "<tr class='list-row' id='recording_progress_bar_".escape($row['recording_uuid'])."' onclick=\"recording_play('".escape($row['voicemail_greeting_uuid'] ?? '')."')\" style='display: none;'><td id='playback_progress_bar_background_".escape($row['recording_uuid'])."' class='playback_progress_bar_background' style='padding: 0; border: none;' colspan='".$col_count."'><span class='playback_progress_bar' id='recording_progress_".escape($row['recording_uuid'])."'></span></td><td class='description hide-sm-dn' style='border-bottom: none !important;'></td></tr>\n";
echo "<tr class='list-row' style='display: none;'><td></td></tr>\n"; // dummy row to maintain alternating background color
}
if (permission_exists('recording_edit')) {

View File

@@ -27,6 +27,7 @@ if (!class_exists('ai')) {
public $audio_path;
public $audio_filename;
public $audio_format;
public $audio_model;
public $audio_voice;
public $audio_language;
public $audio_message;
@@ -65,6 +66,21 @@ if (!class_exists('ai')) {
return $object->get_voices();
}
/**
* get_voices - get the list voices
*/
public function get_models() : array {
//set the class interface to use the _template suffix
$classname = 'ai_'.$this->speech_engine;
//create the object
$object = new $classname($this->settings);
//return the voices array
return $object->get_models();
}
/**
* is_translate_enabled - get whether the engine can do translations
*/
@@ -127,6 +143,7 @@ if (!class_exists('ai')) {
$object->set_filename($this->audio_filename);
$object->set_format($this->audio_format);
$object->set_voice($this->audio_voice);
$object->set_model($this->audio_model);
//$object->set_language($this->audio_language);
//$object->set_translate($this->audio_translate);
$object->set_message($this->audio_message);

View File

@@ -0,0 +1,183 @@
<?php
/*
FusionPBX
Version: MPL 1.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the
License.
The Original Code is FusionPBX
The Initial Developer of the Original Code is
Mark J Crane <markjcrane@fusionpbx.com>
Portions created by the Initial Developer are Copyright (C) 2008-2018
the Initial Developer. All Rights Reserved.
Contributor(s):
Mark J Crane <markjcrane@fusionpbx.com>
Tim Fry <tim.fry@hotmail.com>
*/
/**
* ai_elevenlabs class
*
*/
class ai_elevenlabs implements ai_speech {
private $voice;
private $path;
private $message;
private $format;
private $filename;
private $languages;
private $transcribe_key;
private $speech_key;
private $model;
public function __construct($settings) {
$this->voice = "";
$this->path = "";
$this->message = "";
$this->format = "";
$this->filename = "";
//build the setting object and get the recording path
$this->transcribe_key = $settings->get('ai', 'transcribe_key');
$this->speech_key = $settings->get('ai', 'speech_key');
}
public function set_filename(string $audio_filename) {
$this->filename = $audio_filename;
}
public function set_format(string $audio_format) {
$this->format = $audio_format;
}
public function set_message(string $audio_message) {
$this->message = $audio_message;
}
public function set_path(string $audio_path) {
$this->path = $audio_path;
}
public function set_voice(string $audio_voice) {
$this->voice = $audio_voice;
}
public function speech(): bool {
$model_id = $this->model;
$ch = curl_init('https://api.elevenlabs.io/v1/text-to-speech/' . $this->voice);
$headers = [
'Content-Type: application/json',
"xi-api-key: $this->speech_key",
];
$encoded_message = json_encode([
'model_id' => $model_id,
'text' => $this->message,
'voice_settings' => [
'similarity_boost' => 1,
'stability' => 1,
],
]);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $encoded_message);
$response = curl_exec($ch);
$error = curl_error($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
// $curl = new curl('https://api.elevenlabs.io/v1/text-to-speech/' . $this->voice);
// $response = $curl->set_headers($headers)->post($encoded_message);
// $error = $curl->get_error();
// $http_code = $curl->get_http_code();
// if ($curl->get_http_code() == 200) {
if ($http_code == 200) {
file_put_contents($this->path . '/' . $this->filename, $response);
return true;
}
return false;
}
public function is_language_enabled(): bool {
return false;
}
public function get_languages(): array {
return ['english' => 'English'];
}
public function get_voices(): array {
$return_value = [];
$url = 'https://api.elevenlabs.io/v1/voices';
$headers = [
'Content-Type: application/json',
"xi-api-key: $this->speech_key",
];
$curl = curl_init();
curl_setopt_array($curl, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "GET",
]);
$response = curl_exec($curl);
$error = curl_error($curl);
curl_close($curl);
if (!empty($response)) {
$json_array = json_decode($response, true);
foreach($json_array['voices'] as $row) {
$voice_id = $row['voice_id'];
$name = $row['name'];
$gender = $row['labels']['gender'] ?? '';
$accent = $row['labels']['accent'] ?? '';
$use_case = $row['labels']['use case'] ?? '';
$recommended_model = $row['high_quality_base_model_ids'][0] ?? '';
$return_value[$voice_id] = "$name ($gender, $accent";
if (!empty($use_case)) {
$return_value[$voice_id] .= ", " . $use_case;
}
$return_value[$voice_id] .= ")";
if (!empty($recommended_model)) {
$return_value[$voice_id] .= " - $recommended_model";
}
}
}
return $return_value;
}
public function set_language(string $audio_language) {
$this->languages = $audio_language;
}
public function set_model(string $model): void {
if (array_key_exists($model, $this->get_models())) {
$this->model = $model;
} else {
throw new \Exception('Model does not exist');
}
}
public function get_models(): array {
return [
'eleven_turbo_v1' => 'Eleven Turbo v1',
'eleven_turbo_v2' => 'Eleven Turbo v2',
'eleven_multilingual_v1' => 'Eleven Multilingual v1',
'eleven_multilingual_v2' => 'Eleven Multilingual v2',
];
}
}

View File

@@ -3,15 +3,17 @@
//define the template class
if (!interface_exists('ai_speech')) {
interface ai_speech {
public function set_path(string $audio_path);
public function set_filename(string $audio_filename);
public function set_format(string $audio_format);
public function set_voice(string $audio_voice);
public function set_message(string $audio_message);
public function get_languages() : array;
public function get_models(): array;
public function get_voices() : array;
public function is_language_enabled() : bool;
public function set_filename(string $audio_filename);
public function set_format(string $audio_format);
public function set_language(string $audio_language);
public function get_languages() : array;
public function set_message(string $audio_message);
public function set_model(string $audio_model): void;
public function set_path(string $audio_path);
public function set_voice(string $audio_voice);
public function speech() : bool;
}
}