metya 1 год назад
Родитель
Сommit
d70e668b61
5 измененных файлов с 174 добавлено и 125 удалено
  1. 126 66
      app.py
  2. BIN
      model_v4_ru.pt
  3. BIN
      static/audio.wav
  4. 0 43
      templates/audio.html
  5. 48 16
      templates/index.html

+ 126 - 66
app.py

@@ -5,26 +5,28 @@ import torch
 import openai
 
 
-with open('.env', 'r') as env:
+with open(".env", "r") as env:
     key = env.readline().strip()
 
 client = openai.OpenAI(api_key=key)
 # openai.api_key = key
 
-device = torch.device('cpu')
+device = torch.device("cpu")
 torch.set_num_threads(4)
-local_file = 'model_v4_ru.pt'
+local_file = "model_v4_ru.pt"
 
 if not os.path.isfile(local_file):
-    torch.hub.download_url_to_file('https://models.silero.ai/models/tts/ru/v4_ru.pt',
-                                   local_file)
+    torch.hub.download_url_to_file(
+        "https://models.silero.ai/models/tts/ru/v4_ru.pt", local_file
+    )
 
-model = torch.package.PackageImporter(local_file).load_pickle("tts_models", "model") # type: ignore
+model = torch.package.PackageImporter(local_file).load_pickle("tts_models", "model")  # type: ignore
 model.to(device)
 
 sample_rate = 48000
-speaker='xenia'
-example_text = 'В недрах тундры выдры в г+етрах т+ырят в вёдра ядра к+едров.'
+speaker = "xenia"
+example_text = "В недрах тундры выдры в г+етрах т+ырят в вёдра ядра к+едров."
+
 
 class State:
     count = 0
@@ -39,43 +41,52 @@ class State:
     need_audio = False
     need_generation_from_client = True
     big_head = False
+    number_audio = 0
+
 
 state = State()
 
 app = Flask(__name__)
 # app.logger.setLevel(logging.DEBUG)
-app.logger.info('start logger')
+app.logger.info("start logger")
 
 
-@app.route('/send_data', methods=['POST'])
+@app.route("/send_data", methods=["POST"])
 def send_data():
     # Получаем данные из запроса
-    data = request.form['data']
-    need_generation = request.form['state']
-    state.need_generation_from_client = (need_generation in ["true", "True"])
-    # if state.need_generation_from_client and 
+    data = request.form["data"]
+    need_generation = request.form["state"]
+    state.need_generation_from_client = need_generation in ["true", "True"]
+    # if state.need_generation_from_client and
     if state.count < -10 or state.count > 60:
         state.count = 0
         state.need_generation = True
 
     # Обработка полученных данных
     detections = json.loads(data)
-    if len(detections['face']) > 0:
-        if state.count < 0 or state.new_audio: state.count = 0
-        if state.count > 5 and state.need_generation and state.need_generation_from_client:
-            app.logger.info(f"time for generation {state.count=}, {state.need_generation=}, {state.need_generation_from_client=}")
+    if len(detections["face"]) > 0:
+        if state.count < 0 or state.new_audio:
             state.count = 0
-            # emotion = max(set(state['emotion']), key=state['emotion'].count), 
-            # sex = max(set(state['gender']), key=state['gender'].count), 
+        if (
+            state.count > 5
+            and state.need_generation
+            and state.need_generation_from_client
+        ):
+            app.logger.info(
+                f"time for generation {state.count=}, {state.need_generation=}, {state.need_generation_from_client=}"
+            )
+            state.count = 0
+            # emotion = max(set(state['emotion']), key=state['emotion'].count),
+            # sex = max(set(state['gender']), key=state['gender'].count),
             # age = sum(state['age'])/len(state['age']),
             state.emotion, state.age, state.gender = [], [], []
-            emotion = detections['face'][0]['emotion']
-            sex = detections['face'][0]['gender']
-            age = detections['face'][0]['age']
-            app.logger.info(f'\n{emotion=}, \n{sex=}, \n{age=}') 
+            emotion = detections["face"][0]["emotion"]
+            sex = detections["face"][0]["gender"]
+            age = detections["face"][0]["age"]
+            app.logger.info(f"\n{emotion=}, \n{sex=}, \n{age=}")
             state.prompt = generate_prompt(emotion, age, sex)
-            state.generation_text = generate_text(state.prompt) 
-        elif detections['face'][0]['size'][0] > 200:
+            state.generation_text = generate_text(state.prompt)
+        elif detections["face"][0]["size"][0] > 200:
             # state.age.append(detections['face'][0]['age'])
             # state.gender.append(detections['face'][0]['gender'])
             # state.emotion.append(detections['face'][0]['emotion'][0]['emotion'])
@@ -87,46 +98,74 @@ def send_data():
     else:
         state.count -= 1
 
-    app.logger.info(f"STATUS {state.count=}, {state.need_generation=}, {state.need_generation_from_client=}")
+    app.logger.info(
+        f"STATUS {state.count=}, {state.need_generation=}, {state.need_generation_from_client=}"
+    )
 
     return data
 
-@app.route('/check_audio', methods = ["GET", "POST"])
+
+@app.route("/check_audio", methods=["GET", "POST"])
 def check_audio():
-    app.logger.info(f'checking need generation {state.need_generation=}, {state.need_audio=}')
+    app.logger.info(
+        f"checking need generation {state.need_generation=}, {state.need_audio=}"
+    )
 
     if state.need_audio and state.big_head:
-        generate_audio(state.generation_text)
+        audio_path = generate_audio(state.generation_text)
     else:
         state.new_audio = False
-        
-    app.logger.info(f'\n{state.need_audio=},\n{state.new_audio=},\n{state.need_generation=}')    
+        audio_path = ""
+
+    app.logger.info(
+        f"\n{state.need_audio=},\n{state.new_audio=},\n{state.need_generation=}"
+    )
 
     response = {
-        'newAudio': state.new_audio,
-        'need_generation': state.need_generation,
-        'filename': "audio.wav",
-        'text': state.generation_text,
-        'prompt': state.prompt
+        "newAudio": state.new_audio,
+        "need_generation": state.need_generation,
+        "filename": audio_path,
+        "text": state.generation_text,
+        "prompt": state.prompt,
     }
     return jsonify(response)
 
-@app.route("/audio.wav")
-def audio():
+
+@app.route("/<filename>")
+def audio(filename):
     state.need_generation = True
-    return app.send_static_file('audio.wav')
+    return app.send_static_file(filename)
+
+
+@app.route("/delete_audio", methods=["POST"])
+def delete_audio():
+    data = request.get_json()
+    filename = data.get("filename")
 
+    if not filename:
+        return jsonify({"message": "No filename provided"}), 400
 
-@app.route('/')
+    file_path = os.path.join("static", filename)
+
+    try:
+        if os.path.exists(file_path):
+            os.remove(file_path)
+            return jsonify({"message": "File deleted successfully"}), 200
+        else:
+            return jsonify({"message": "File not found"}), 404
+    except Exception as e:
+        return jsonify({"message": "Error deleting file", "error": str(e)}), 500
+
+
+@app.route("/")
 def index():
     """Video streaming home page."""
-    # return render_template('index.html')
-    return render_template('index.html')
+    return render_template("index.html")
 
 
 def generate_prompt(emotion, age, sex):
-    app.logger.info('preload prompt')
-    prompt = f'''Ты - это арт объект в виде носимого устройства. Твое название - Onism Machina.
+    app.logger.info("\033[92m" + "preload prompt" + "\033[00m")
+    prompt = f"""Ты - это арт объект в виде носимого устройства. Твое название - Onism Machina.
 Твоя задача - изучить эмоции человека, чтобы больше понимать о том, что это такое и таким образом познать себя, ведь ты - Искусственный Интеллект.
 Тебе также интересна симбиотическая связь человека и ИИ.
 К тебе подходит человек и он показывает эмоцию {emotion}. Ему {age} лет.
@@ -141,47 +180,68 @@ def generate_prompt(emotion, age, sex):
 Ты можешь говорить об эмоциях или их аналогах у Искусственного Интеллекта.
 Ты можешь давать человеку советы в соответствии с его эмоциональным статусом.
 Люди не могут отвечать тебе.
-'''
+"""
     return prompt
 
+
 def generate_text(prompt):
     state.need_generation = False
-    app.logger.info("start generating text from openai")
+    app.logger.info("\033[92m" + "start generating text from openai" + "\033[00m")
     response = client.chat.completions.create(
-                        model="gpt-3.5-turbo",
-                        temperature=1,
-                        max_tokens=800,
-                        messages=[
-                                {"role": "system", "content": "Ты — это арт объект выставки про взаимодействие машины и человека."},
-                                {"role": "user", "content": prompt},
-                                ])
+        model="gpt-3.5-turbo",
+        temperature=1,
+        max_tokens=800,
+        messages=[
+            {
+                "role": "system",
+                "content": "Ты — это арт объект выставки про взаимодействие машины и человека.",
+            },
+            {"role": "user", "content": prompt},
+        ],
+    )
     state.need_audio = True
-    app.logger.info("openai generation is done")
+    app.logger.info("\033[92m" + "openai generation is done" + "\033[00m")
     return response.choices[0].message.content
 
+
 def generate_audio(sample_text):
-    app.logger.info('starting audio generation')
+    state.number_audio += 1
+    app.logger.info(
+        "\033[93m"
+        + f"starting audio generation with name audio{state.number_audio}"
+        + "\033[00m"
+    )
     state.need_audio = False
     state.need_generation = False
     text = trim_text(sample_text)
-    audio_paths = model.save_wav(text=text,
-                                speaker=speaker,
-                                sample_rate=sample_rate,
-                                audio_path="static/audio.wav")
-    app.logger.info('generating audio is done')
+    audio_paths = model.save_wav(
+        text=text,
+        speaker=speaker,
+        sample_rate=sample_rate,
+        audio_path=f"static/audio{state.number_audio}.wav",
+    )
+    app.logger.info(
+        "\033[95m" + f"generating audio with path {audio_paths} is done" + "\033[00m"
+    )
     state.new_audio = True
+    return audio_paths.split("/")[-1]
+
 
 def trim_text(example_text):
     if len(example_text) >= 1000:
-        app.logger.info(f'TEXT IS TOO LONG {len(example_text)} - TRIM!')
+        app.logger.info(
+            "\033[91m {}\033[00m".format(
+                f"TEXT IS TOO LONG {len(example_text)} - TRIM!"
+            )
+        )
         for i in range(1000, 500, -1):
-            if example_text[i] in ['.', '?', '...']:
-                return example_text[:i+1]
+            if example_text[i] in [".", "?", "..."]:
+                return example_text[: i + 1]
     else:
         return example_text
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     app.logger.setLevel("DEBUG")
-    app.logger.info('start app')
+    app.logger.info("start app")
     app.run(debug=True, host="0.0.0.0")

BIN
model_v4_ru.pt


BIN
static/audio.wav


+ 0 - 43
templates/audio.html

@@ -1,43 +0,0 @@
-<!DOCTYPE html>
-<html>
-
-<head>
-    <script src="https://code.jquery.com/jquery-3.7.0.min.js"></script>
-</head>
-
-<body>
-    <audio id="audio-player"></audio>
-    <img src="static/123.jpg">
-    <script>
-        $(document).ready(function () {
-            // Функция для проверки наличия нового аудиофайла на сервере
-            function checkForNewAudio() {
-                $.ajax({
-                    url: '/generate_audio',
-                    method: 'GET',
-                    success: function (response) {
-                        if (response.newAudio) {
-                            // Если есть новый аудиофайл, проигрывайте его на странице
-                            playAudio(response.filename);
-                        }
-                    },
-                    error: function (error) {
-                        console.error('Ошибка при проверке наличия нового аудиофайла:', error);
-                    }
-                });
-            }
-
-            // Функция для проигрывания аудиофайла
-            function playAudio(audioSrc) {
-                const audioPlayer = new Audio(audioSrc);
-                audioPlayer.play();
-            }
-
-            // Периодически проверять наличие нового аудиофайла
-            setInterval(checkForNewAudio, 60000); // Проверять каждые 5 секунд
-        });
-
-    </script>
-</body>
-
-</html>

+ 48 - 16
templates/index.html

@@ -40,11 +40,11 @@
             //const dataInput = document.getElementById('data-input');
             //const canvas = $('#canvas').get(0)
 
-                var interpolated;
-                var need_generation = true;
-                var need_playing = true;
-                var text;
-                var prompt;
+            var interpolated;
+            var need_generation = true;
+            var need_playing = true;
+            var text;
+            var prompt;
 
             function splitTextIntoLines(text, wordsPerLine) {
                 const words = text.split(' ');
@@ -72,17 +72,31 @@
                     `platform: ${human.env.platform} | ` +
                     `agent ${human.env.agent}<br>` +
                     `need_generation ${need_generation}<br>` + // draw labels, boxes, lines, etc.
-                `prompt ${prompt}<br>` +
+                    `prompt ${prompt}<br>` +
                     `text: ${text}`;
             }
 
             async function playAudio(audioSrc) {
                 console.log('playing audio')
                 const audioPlayer = new Audio(audioSrc);
-                audioPlayer.addEventListener('ended', function () {
+                audioPlayer.addEventListener('ended', async function () {
                     need_generation = true;
                     need_playing = true;
+                    text = '';
                     console.log('playing done');
+                    // Отправляем запрос на сервер для удаления файла
+                    $.ajax({
+                        url: '/delete_audio',
+                        type: 'POST',
+                        contentType: 'application/json',
+                        data: JSON.stringify({ filename: audioSrc }),
+                        success: function (data) {
+                            console.log('Delete response:', data.message);
+                        },
+                        error: function (jqXHR, textStatus, errorThrown) {
+                            console.error('Ошибка при удалении аудио файла:', textStatus, errorThrown);
+                        }
+                    });
                 });
                 audioPlayer.play();
             }
@@ -102,9 +116,12 @@
                             need_playing = false;
                             playAudio(response.filename);
                         }
+                        console.log("check audio done")
+                        console.log("text is " + text)
+                        console.log("filename is " + response.filename)
                     },
-                    error: function (error) {
-                        console.error('Ошибка при проверке наличия нового аудиофайла:', error);
+                    error: function (jqXHR, textStatus, errorThrown) {
+                        console.error('Ошибка при проверке наличия нового аудиофайла:', textStatus, errorThrown);
                     }
                 });
             }
@@ -116,11 +133,16 @@
                     data: { data: JSON.stringify(interpolated), state: need_generation },
                     success: function (response) {
                         console.log('face data sent!');
+                    },
+                    error: function (jqXHR, textStatus, errorThrown) {
+                        console.log("DATA WAS NOW SENT, ALARM!");
                     }
                 });
             };
 
-
+            let drawLoopIntervalId;
+            let sendDataIntervalId;
+            let checkForNewAudioIntervalId;
 
             async function main() { // main entry point
                 document.getElementById('log').innerHTML =
@@ -136,12 +158,22 @@
                 canvas.width = human.webcam.width; // set canvas resolution to input webcam native resolution
                 canvas.height = human.webcam.height;
                 canvas.onclick = async () => { // pause when clicked on screen and resume on next click
-                    if (human.webcam.paused) await human.webcam.play();
-                    else human.webcam.pause();
+                    if (human.webcam.paused) {
+                        await human.webcam.play();
+                        drawLoopIntervalId = setInterval(drawLoop, 30);
+                        sendDataIntervalId = setInterval(send_data, 1000);
+                        checkForNewAudioIntervalId = setInterval(checkForNewAudio, 5000);
+                    }
+                    else {
+                        human.webcam.pause();
+                        clearInterval(drawLoopIntervalId);
+                        clearInterval(sendDataIntervalId);
+                        clearInterval(checkForNewAudioIntervalId);
+                    }
                 };
-                await setInterval(drawLoop, 30); // start draw loop
-                await setInterval(send_data, 1000);
-                await setInterval(checkForNewAudio, 5000)
+                drawLoopIntervalId = setInterval(drawLoop, 30);
+                sendDataIntervalId = setInterval(send_data, 1000);
+                checkForNewAudioIntervalId = setInterval(checkForNewAudio, 5000);
             };
 
 
@@ -152,4 +184,4 @@
         </script>
 </body>
 
-</html>
+</html>