| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- from flask import Flask, render_template, Response, request, jsonify
- import json
- import os
- import torch
- import openai
- openai.api_key = ""
- device = torch.device('cpu')
- torch.set_num_threads(4)
- local_file = 'model.pt'
- if not os.path.isfile(local_file):
- torch.hub.download_url_to_file('https://models.silero.ai/models/tts/ru/v3_1_ru.pt',
- local_file)
- model = torch.package.PackageImporter(local_file).load_pickle("tts_models", "model") # type: ignore
- model.to(device)
- sample_rate = 48000
- speaker='xenia'
- example_text = 'В недрах тундры выдры в г+етрах т+ырят в вёдра ядра к+едров.'
- state = {}
- state['count'] = 0
- state['size'] = []
- state['gender'] = []
- state['emotion'] = []
- state['age'] = []
- state['prompt'] = ""
- state['need_generation'] = True
- state["new_audio"] = False
- state["generated_text"] = ""
- state["need_audio"] = False
- app = Flask(__name__)
- # app.logger.setLevel(logging.DEBUG)
- app.logger.info('start logger')
- @app.route('/send_data', methods=['POST'])
- def send_data():
- # Получаем данные из запроса
- data = request.form['data']
- need_generation = request.form['state']
- state['need_generation'] = need_generation
- # Обработка полученных данных
- detections = json.loads(data)
- if detections['face']:
- if state['count'] < 0 or state['new_audio']: state['count'] = 0
- if state['count'] > 5 and state["need_generation"]:
- state['count'] = 0
- emotion = max(set(state['emotion']), key=state['emotion'].count),
- sex = max(set(state['gender']), key=state['gender'].count),
- age = sum(state['age'])/len(state['age']),
- app.logger.info(f'{emotion=}, {sex=}, {age=}')
- state["prompt"] = generate_prompt(emotion, age, sex)
- state["generated_text"] = generate_text(state["prompt"])
- elif detections['face'][0]['size'][0] > 200:
- state['age'].append(detections['face'][0]['age'])
- state["gender"].append(detections['face'][0]['gender'])
- state["emotion"].append(detections['face'][0]['emotion'][0]['emotion'])
- state['count'] += 1
- else:
- state['count'] -= 1
- else:
- state['count'] -= 1
- # state["size"].append(detections['face'][0]['size'][0])
- # print(detections['face'][0])
- # print(detections['face'][0]['age'], detections['face'][0]['emotion'], detections['face'][0]['gender'])
- return data
- @app.route('/generate_audio', methods = ["GET", "POST"])
- def generate_audio():
- app.logger.info('checking need generation')
- if state["need_audio"]:
- app.logger.info('starting audio generation')
- audio_paths = model.save_wav(text=state['generated_text'],
- speaker=speaker,
- sample_rate=sample_rate,
- audio_path="static/audio.wav")
- app.logger.info('generating audio is done')
- state["new_audio"] = True
- state["need_generation"] = False
- state['need_audio'] = False
- else:
- state['new_audio'] = False
-
- app.logger.info(f'\n{state["need_audio"]=},\n{state["new_audio"]=},\n{state["need_generation"]=}')
- response = {
- 'newAudio': state["new_audio"],
- 'need_generation': state["need_generation"],
- 'filename': "audio.wav",
- 'text': state['generated_text']
- }
- return jsonify(response)
- @app.route("/audio.wav")
- def audio():
- # print("Requested path:", request.path)
- # print("File path:", os.path.join(app.static_folder, 'audio.wav'))
- return app.send_static_file('audio.wav')
- @app.route('/')
- def index():
- """Video streaming home page."""
- # return render_template('index.html')
- return render_template('index.html')
- def generate_prompt(emotion, age, sex):
- app.logger.info('preload prompt')
- prompt = f'''Ты — это арт объект выставки про взаимодействие машины и человека. \
- К тебе подходит человек и он показывает эмоцию {emotion}. \
- Ему {age} лет. И это {sex}. \
- Твоя нейросеть распознала эту эмоцию и теперь тебе нужно дать какой-то необычный концептуальный ответ. \
- Что ты скажешь этому человеку?'''
- return prompt
- def generate_text(prompt):
- app.logger.info("start generating text from openai")
- response = openai.ChatCompletion.create(
- model="gpt-3.5-turbo",
- temperature=1,
- # max_tokens=1000,
- messages=[
- {"role": "system", "content": "Ты — это арт объект выставки про взаимодействие машины и человека."},
- {"role": "user", "content": prompt},
- ])
- state["need_generation"] = False
- state["need_audio"] = True
- app.logger.info("openai generation is done")
- return response['choices'][0]['message']['content'] # type: ignore
- if __name__ == '__main__':
- app.logger.info('start app')
- app.run(debug=True, host="0.0.0.0")
- # ssl_context=("127.0.0.1.pem", "127.0.0.1-key.pem"))
|