-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathstreaming.js
122 lines (105 loc) · 5.37 KB
/
streaming.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import { getApiUrl, doExtrasFetch, modules } from '../../../extensions.js';
export { StreamingSttProvider };
const DEBUG_PREFIX = '<Speech Recognition module (streaming)> ';
class StreamingSttProvider {
//########//
// Config //
//########//
settings;
defaultSettings = {
triggerCharMin: 0,
triggerWordsText: '',
triggerWords: [],
triggerWordsEnabled: false,
triggerWordsIncluded: false,
debug: false,
language: '',
};
get settingsHtml() {
let html = '\
<div id="speech_recognition_streaming_trigger_words_div">\
<span>Minimum spoken characters to trigger</span><br>\
<input max="99" min="0" class="text_pole" id="speech_recognition_streaming_trigger_min_chars" type="number"><br>\
<span>Trigger words</span>\
<textarea id="speech_recognition_streaming_trigger_words" class="text_pole textarea_compact" type="text" rows="4" placeholder="Enter comma separated words that triggers new message, example:\nhey, hey aqua, record, listen"></textarea>\
<label class="checkbox_label" for="speech_recognition_streaming_trigger_words_enabled">\
<input type="checkbox" id="speech_recognition_streaming_trigger_words_enabled" name="speech_recognition_trigger_words_enabled">\
<small>Enable trigger words</small>\
</label>\
<label class="checkbox_label" for="speech_recognition_trigger_words_included">\
<input type="checkbox" id="speech_recognition_trigger_words_included" name="speech_recognition_trigger_words_included">\
<small>Include trigger words in message</small>\
</label>\
<label class="checkbox_label" for="speech_recognition_streaming_debug">\
<input type="checkbox" id="speech_recognition_streaming_debug" name="speech_recognition_streaming_debug">\
<small>Enable debug pop ups</small>\
</label>\
</div>\
';
return html;
}
onSettingsChange() {
this.settings.triggerCharMin = Number.parseInt($('#speech_recognition_streaming_trigger_min_chars').val());
this.settings.triggerWordsText = $('#speech_recognition_streaming_trigger_words').val();
let array = $('#speech_recognition_streaming_trigger_words').val().split(',');
array = array.map(element => { return element.trim().toLowerCase(); });
array = array.filter((str) => str !== '');
this.settings.triggerWords = array;
this.settings.triggerWordsEnabled = $('#speech_recognition_streaming_trigger_words_enabled').is(':checked');
this.settings.triggerWordsIncluded = $('#speech_recognition_trigger_words_included').is(':checked');
this.settings.debug = $('#speech_recognition_streaming_debug').is(':checked');
console.debug(DEBUG_PREFIX + ' Updated settings: ', this.settings);
this.loadSettings(this.settings);
}
loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.debug(DEBUG_PREFIX + 'Using default Whisper STT extension settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to STT extension: ${key}`;
}
}
$('#speech_recognition_streaming_trigger_min_chars').val(this.settings.triggerCharMin);
$('#speech_recognition_streaming_trigger_words').val(this.settings.triggerWordsText);
$('#speech_recognition_streaming_trigger_words_enabled').prop('checked', this.settings.triggerWordsEnabled);
$('#speech_recognition_trigger_words_included').prop('checked', this.settings.triggerWordsIncluded);
$('#speech_recognition_streaming_debug').prop('checked', this.settings.debug);
$('#speech_recognition_language').val(this.settings.language);
console.debug(DEBUG_PREFIX + 'streaming STT settings loaded');
}
async getUserMessage() {
// Return if module is not loaded
if (!modules.includes('streaming-stt')) {
console.debug(DEBUG_PREFIX + 'Module streaming-stt must be activated in Sillytavern Extras for streaming user voice.');
return '';
}
const url = new URL(getApiUrl());
url.pathname = '/api/speech-recognition/streaming/record-and-transcript';
const apiResult = await doExtrasFetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Bypass-Tunnel-Reminder': 'bypass',
},
body: JSON.stringify({
text: '',
language: this.settings.language,
}),
});
if (!apiResult.ok) {
toastr.error(apiResult.statusText, DEBUG_PREFIX + 'STT Generation Failed (streaming)', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
}
const data = await apiResult.json();
//Return the transcript if it exceeds the minimum character number.
if (data.transcript.length > this.settings.triggerCharMin) {
return data.transcript;
}
}
}