app.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. from flask import Flask, render_template, Response, request, jsonify
  2. import json
  3. import os
  4. import torch
  5. import openai
  6. openai.api_key = ""
  7. device = torch.device('cpu')
  8. torch.set_num_threads(4)
  9. local_file = 'model.pt'
  10. if not os.path.isfile(local_file):
  11. torch.hub.download_url_to_file('https://models.silero.ai/models/tts/ru/v3_1_ru.pt',
  12. local_file)
  13. model = torch.package.PackageImporter(local_file).load_pickle("tts_models", "model") # type: ignore
  14. model.to(device)
  15. sample_rate = 48000
  16. speaker='xenia'
  17. example_text = 'В недрах тундры выдры в г+етрах т+ырят в вёдра ядра к+едров.'
  18. state = {}
  19. state['count'] = 0
  20. state['size'] = []
  21. state['gender'] = []
  22. state['emotion'] = []
  23. state['age'] = []
  24. state['prompt'] = ""
  25. state['need_generation'] = True
  26. state["new_audio"] = False
  27. state["generated_text"] = ""
  28. state["need_audio"] = False
  29. app = Flask(__name__)
  30. # app.logger.setLevel(logging.DEBUG)
  31. app.logger.info('start logger')
  32. @app.route('/send_data', methods=['POST'])
  33. def send_data():
  34. # Получаем данные из запроса
  35. data = request.form['data']
  36. need_generation = request.form['state']
  37. state['need_generation'] = need_generation
  38. # Обработка полученных данных
  39. detections = json.loads(data)
  40. if detections['face']:
  41. if state['count'] < 0 or state['new_audio']: state['count'] = 0
  42. if state['count'] > 5 and state["need_generation"]:
  43. state['count'] = 0
  44. emotion = max(set(state['emotion']), key=state['emotion'].count),
  45. sex = max(set(state['gender']), key=state['gender'].count),
  46. age = sum(state['age'])/len(state['age']),
  47. app.logger.info(f'{emotion=}, {sex=}, {age=}')
  48. state["prompt"] = generate_prompt(emotion, age, sex)
  49. state["generated_text"] = generate_text(state["prompt"])
  50. elif detections['face'][0]['size'][0] > 200:
  51. state['age'].append(detections['face'][0]['age'])
  52. state["gender"].append(detections['face'][0]['gender'])
  53. state["emotion"].append(detections['face'][0]['emotion'][0]['emotion'])
  54. state['count'] += 1
  55. else:
  56. state['count'] -= 1
  57. else:
  58. state['count'] -= 1
  59. # state["size"].append(detections['face'][0]['size'][0])
  60. # print(detections['face'][0])
  61. # print(detections['face'][0]['age'], detections['face'][0]['emotion'], detections['face'][0]['gender'])
  62. return data
  63. @app.route('/generate_audio', methods = ["GET", "POST"])
  64. def generate_audio():
  65. app.logger.info('checking need generation')
  66. if state["need_audio"]:
  67. app.logger.info('starting audio generation')
  68. audio_paths = model.save_wav(text=state['generated_text'],
  69. speaker=speaker,
  70. sample_rate=sample_rate,
  71. audio_path="static/audio.wav")
  72. app.logger.info('generating audio is done')
  73. state["new_audio"] = True
  74. state["need_generation"] = False
  75. state['need_audio'] = False
  76. else:
  77. state['new_audio'] = False
  78. app.logger.info(f'\n{state["need_audio"]=},\n{state["new_audio"]=},\n{state["need_generation"]=}')
  79. response = {
  80. 'newAudio': state["new_audio"],
  81. 'need_generation': state["need_generation"],
  82. 'filename': "audio.wav",
  83. 'text': state['generated_text']
  84. }
  85. return jsonify(response)
  86. @app.route("/audio.wav")
  87. def audio():
  88. # print("Requested path:", request.path)
  89. # print("File path:", os.path.join(app.static_folder, 'audio.wav'))
  90. return app.send_static_file('audio.wav')
  91. @app.route('/')
  92. def index():
  93. """Video streaming home page."""
  94. # return render_template('index.html')
  95. return render_template('index.html')
  96. def generate_prompt(emotion, age, sex):
  97. app.logger.info('preload prompt')
  98. prompt = f'''Ты — это арт объект выставки про взаимодействие машины и человека. \
  99. К тебе подходит человек и он показывает эмоцию {emotion}. \
  100. Ему {age} лет. И это {sex}. \
  101. Твоя нейросеть распознала эту эмоцию и теперь тебе нужно дать какой-то необычный концептуальный ответ. \
  102. Что ты скажешь этому человеку?'''
  103. return prompt
  104. def generate_text(prompt):
  105. app.logger.info("start generating text from openai")
  106. response = openai.ChatCompletion.create(
  107. model="gpt-3.5-turbo",
  108. temperature=1,
  109. # max_tokens=1000,
  110. messages=[
  111. {"role": "system", "content": "Ты — это арт объект выставки про взаимодействие машины и человека."},
  112. {"role": "user", "content": prompt},
  113. ])
  114. state["need_generation"] = False
  115. state["need_audio"] = True
  116. app.logger.info("openai generation is done")
  117. return response['choices'][0]['message']['content'] # type: ignore
  118. if __name__ == '__main__':
  119. app.logger.info('start app')
  120. app.run(debug=True, host="0.0.0.0")
  121. # ssl_context=("127.0.0.1.pem", "127.0.0.1-key.pem"))