まずは完成動画を掲載
現在はClientの入力をそのままFeeがエコーバックするだけになっている
動きとしては
Clientからブラウザで文字入力
http GETで文字送信
Flask-Pythonが受信して、Feeの返信を作成(今回はエコーバック)
PythonからCeVIO AIにFeeの返信文字を投げ、Wavファイルと口パク用の音素データ(Phoneme)を作成
作成完了したらGETコマンド返信
WebGL側でGETの返信を受け、WavファイルのPlay準備のため2秒Wait
その後Playと同時に音素データから口の開閉と形のパラメータを再生時間を見ながらタイミングを合わせてセット
という制御になっている
よってWav再生と口パクは同期をとっているわけではない(同時スタートさせているだけ)
また口パクは滑らかにするように、4Tap程度のFIRフィルタを作ってある
見た目だが、なぜかUnity Editor上で動かすとスムーズにならないのだが、WebGLでBuildした画像を見るとスムーズになっている(気がするだけ?)
とりあえずこれで、基本的なパーツはいったん完成と言えるだろう
最初に考えたシステムよりは、はるかにスマートになっていると思うので、
全体としての資料は別途まとめたい
Unity:InputFieldManager
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;
using UnityEngine.Networking;
using TMPro;
using UnityEngine.UIElements;
public class InputFieldManager : MonoBehaviour
{
TMP_InputField inputField;
TMP_Text text;
string text0,text1,text2,text3;
string ClientSentence = "";
string FeeSentence = "";
string Phoneme = "";
public static string[] Phoneme_mat;
void TextWindowinput(string inputtext)
{
text3 = text2;
text2 = text1;
text1 = text0;
text0 = inputtext;
text.text = text0 + "\r\n" + text1 + "\r\n" + text2 + "\r\n" + text3;
}
void Start()
{
inputField = GameObject.Find("InputField (TMP)").GetComponent<TMP_InputField>();
text = GameObject.Find("Text (TMP)").GetComponent<TMP_Text>();
inputField.ActivateInputField();
inputField.Select();
}
void Update()
{
if (!inputField.isFocused)
{
inputField.ActivateInputField();
inputField.Select();
}
if (AudioPlayer.is_Playback_play && FeeSentence != ""){
TextWindowinput(ClientSentence);
TextWindowinput(FeeSentence);
ClientSentence = "";
FeeSentence = "";
}
}
public void OnEndEdit()
{
//TextWindowinput("Client : " + inputField.text);
StartCoroutine(SendData(InputFieldManager_ID.ClientId, InputFieldManager_ID.ClientName, inputField.text));
inputField.text = "";
}
IEnumerator SendData(string id, string name, string sentence)
{
// GET受信設定と返信
// 通信フォーマット
// ブラウザより送信(GET) http:// .... ?ClientID=hide,ClientName=英夫,ClientSentence=今日はいい天気だねえ
// ブラウザへの返信 ClientID=hide,ClientSentence=英夫:今日はいい天気だねえ,FeeSentence=Fee:ですね,Phoneme=(sil,0.0,0.005)(d,0.005,0.07)(e,0.07,0.165)(s,0.165,0.215)(U,0.215,0.27)(n,0.27,0.34)(e,0.34,0.5700000000000001)(sil,0.5700000000000001,0.715)
// 自分のClientIDと返信のClientIDが一致していたら、自分が話者、そうでないときは聞き役
string url = "http://127.0.0.1:8080/";
UnityWebRequest request = UnityWebRequest.Get(
url + "?SendData=ClientId=" + id + ",ClientName=" + name + ",ClientSentence=" + sentence);
yield return request.SendWebRequest();
if (request.result != UnityWebRequest.Result.Success)
{
Debug.Log(request.error);
}
else
{
// 受信終了時の処理
Debug.Log("Data sent successfully!");
AudioPlayer.is_Playback_ready = true;
string response = request.downloadHandler.text;
Debug.Log(response);
// Phonemeデータを分解して配列に入れる
int startpos = response.IndexOf(",ClientSentence=")+(",ClientSentence=").Length;
int endpos = response.IndexOf(",FeeSentence=");
ClientSentence = response.Substring(startpos,endpos - startpos);
startpos = response.IndexOf(",FeeSentence=")+(",FeeSentence=").Length;
endpos = response.IndexOf(",Phoneme=");
FeeSentence = response.Substring(startpos,endpos - startpos);
startpos = response.IndexOf(",Phoneme=")+(",Phoneme=").Length;
endpos = response.Length;
Phoneme = response.Substring(startpos,endpos - startpos);
string Phoneme_t = Phoneme.Replace("(","");
Phoneme_t = Phoneme_t.Replace(")",",");
Phoneme_mat = Phoneme_t.Split(",");
string x = "";
for (int i=0; i<Phoneme_mat.Length; i++){
x = x + " " + Phoneme_mat[i];
}
Debug.Log(x);
}
}
}
Unity:AudioPlayer
using System.Collections;
using System.Collections.Generic;
using JetBrains.Annotations;
using UnityEngine;
using UnityEngine.Networking;
public class AudioPlayer : MonoBehaviour
{
public string url = "http://127.0.0.1:8080/Fee_voice/Fee_voice.wav";
public static bool is_Playback_ready = false;
public static bool is_Playback_play = false;
private float startTime;
private int index = 0;
float Phoneme_elapsed;
int mat_size;
void Start()
{
}
void Update()
{
// Playback準備とwavファイルPlayをキック
if (is_Playback_ready){
is_Playback_ready = false;
StartCoroutine(Playback());
}
// Playフラグで口パクスタート
if (is_Playback_play){
if (startTime == 0){
startTime = Time.time;
index = 0;
Phoneme_elapsed = float.Parse(InputFieldManager.Phoneme_mat[1]);
Fee_ParameterSet.Mouth = "n";
mat_size = InputFieldManager.Phoneme_mat.Length;
Debug.Log(mat_size);
}else{
float elapsedTime = Time.time - startTime;
if (elapsedTime >= Phoneme_elapsed){
if ( mat_size-1 > index+2){
index += 2;
Phoneme_elapsed = float.Parse(InputFieldManager.Phoneme_mat[index+1]);
Fee_ParameterSet.Mouth = InputFieldManager.Phoneme_mat[index];
}else{
index = 0;
startTime = 0;
is_Playback_play = false;
}
}
}
}
}
IEnumerator Playback()
{
using (var uwr = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.WAV))
{
yield return uwr.SendWebRequest();
if (uwr.result == UnityWebRequest.Result.ConnectionError || uwr.result == UnityWebRequest.Result.ProtocolError)
{
Debug.LogError(uwr.error);
}
else
{
var audioClip = DownloadHandlerAudioClip.GetContent(uwr);
var audioSource = GetComponent<AudioSource>();
audioSource.clip = audioClip;
yield return new WaitForSeconds(2);
is_Playback_play = true;
Debug.Log("audioplayer.Play");
audioSource.Play();
}
}
}
}
Unity:Fee_parameterSet
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Live2D.Cubism.Core;
using Live2D.Cubism.Framework;
using System.Net;
using System.Text;
using System.Threading;
using System;
using Unity.VisualScripting;
public class Fee_ParameterSet : MonoBehaviour
{
// Fee 表情制御
private CubismModel _model;
private void Start()
{
// Fee表情制御
_model = this.FindCubismModel();
}
public static string Mouth = "N";
public float[] Mouth_form_delay = {0,0,0,0,0,0,0,0,0,0};
public float[] Mouth_open_delay = {0,0,0,0,0,0,0,0,0,0};
public string log = "";
public bool log_flag = false;
private void LateUpdate()
{
float[] Mouth_form = {1f, 1f, -1f, 0f, 0f,1f};
float[] Mouth_open = {1f,0.3f,0.3f,0.6f,0.4f,0f};
// FIR Filer
float[] k = {1,1,1,1,0,0,0,0,0,0};
// float[] k = {1,0,0,0,0,0,0,0,0,0};
Mouth_form_delay[9] = Mouth_form_delay[8];
Mouth_form_delay[8] = Mouth_form_delay[7];
Mouth_form_delay[7] = Mouth_form_delay[6];
Mouth_form_delay[6] = Mouth_form_delay[5];
Mouth_form_delay[5] = Mouth_form_delay[4];
Mouth_form_delay[4] = Mouth_form_delay[3];
Mouth_form_delay[3] = Mouth_form_delay[2];
Mouth_form_delay[2] = Mouth_form_delay[1];
Mouth_form_delay[1] = Mouth_form_delay[0];
Mouth_open_delay[9] = Mouth_open_delay[8];
Mouth_open_delay[8] = Mouth_open_delay[7];
Mouth_open_delay[7] = Mouth_open_delay[6];
Mouth_open_delay[6] = Mouth_open_delay[5];
Mouth_open_delay[5] = Mouth_open_delay[4];
Mouth_open_delay[4] = Mouth_open_delay[3];
Mouth_open_delay[3] = Mouth_open_delay[2];
Mouth_open_delay[2] = Mouth_open_delay[1];
Mouth_open_delay[1] = Mouth_open_delay[0];
if (Mouth == "a" || Mouth == "A"){
Mouth_form_delay[0] = Mouth_form[0];
Mouth_open_delay[0] = Mouth_open[0];
}else if (Mouth == "i" || Mouth == "I"){
Mouth_form_delay[0] = Mouth_form[1];
Mouth_open_delay[0] = Mouth_open[1];
}else if (Mouth == "u" || Mouth == "U"){
Mouth_form_delay[0] = Mouth_form[2];
Mouth_open_delay[0] = Mouth_open[2];
}else if (Mouth == "e" || Mouth == "E"){
Mouth_form_delay[0] = Mouth_form[3];
Mouth_open_delay[0] = Mouth_open[3];
}else if (Mouth == "o" || Mouth == "O"){
Mouth_form_delay[0] = Mouth_form[4];
Mouth_open_delay[0] = Mouth_open[4];
}else if (Mouth == "n" || Mouth == "N"|| Mouth == "sil"){
Mouth_form_delay[0] = Mouth_form[5];
Mouth_open_delay[0] = Mouth_open[5];
}
int filter_tap = 0;
for (int i=0; i<k.Length; i++){
if (k[i] == 1){
filter_tap ++;
}
}
float Mouth_form_out = (Mouth_form_delay[9]*k[9] + Mouth_form_delay[8]*k[8] + Mouth_form_delay[7]*k[7] + Mouth_form_delay[6]*k[6] + Mouth_form_delay[5]*k[5] + Mouth_form_delay[4]*k[4] + Mouth_form_delay[3]*k[3] + Mouth_form_delay[2]*k[2] + Mouth_form_delay[1]*k[1] + Mouth_form_delay[0]*k[0] )/filter_tap;
float Mouth_open_out = (Mouth_open_delay[9]*k[9] + Mouth_open_delay[8]*k[8] + Mouth_open_delay[7]*k[7] + Mouth_open_delay[6]*k[6] + Mouth_open_delay[5]*k[5] + Mouth_open_delay[4]*k[4] + Mouth_open_delay[3]*k[3] + Mouth_open_delay[2]*k[2] + Mouth_open_delay[1]*k[1] + Mouth_open_delay[0]*k[0] )/filter_tap;
var parameter = _model.Parameters[14];
parameter.BlendToValue(CubismParameterBlendMode.Override, Mouth_form_out);
parameter = _model.Parameters[15];
parameter.BlendToValue(CubismParameterBlendMode.Override, Mouth_open_out);
// デバッグ用係数Pickup
if ((Mouth_form_delay[0] != Mouth_form[5]) || (Mouth_open_delay[0] != Mouth_open[5])) {
log += "," + Mouth_open_out;
log_flag = true;
}
if ((Mouth_form_delay[0] == Mouth_form[5]) && (Mouth_open_delay[0] == Mouth_open[5]) && (log_flag == true)) {
Debug.Log(log);
log = "";
log_flag = false;
}
}
}
Python:test.py
from flask import Flask, send_from_directory,request
from flask_cors import CORS
import CeVIO_control
import time
app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}})
recievedata = ""
senddata = ""
# GET受信設定と返信
# 通信フォーマット
# ブラウザより送信(GET) http:// .... ?ClientID=hide,ClientName=英夫,ClientSentence=今日はいい天気だねえ
# ブラウザへの返信 ClientID=hide,ClientSentence=英夫:今日はいい天気だねえ,FeeSentence=Fee:ですね,Phoneme=(sil,0.0,0.005)(d,0.005,0.07)(e,0.07,0.165)(s,0.165,0.215)(U,0.215,0.27)(n,0.27,0.34)(e,0.34,0.5700000000000001)(sil,0.5700000000000001,0.715)
# 自分のClientIDと返信のClientIDが一致していたら、自分が話者、そうでないときは聞き役
@app.route('/')
def get():
recievedata = request.args.get('SendData')
print(recievedata)
# 送信データ分解
ClientId = recievedata[recievedata.find('ClientId=')+len('ClientId='):recievedata.find(',')]
recievedata = recievedata[len('ClientId=')+len(ClientId)+1:]
ClientName = recievedata[recievedata.find('ClientName=')+len('ClientName='):recievedata.find(',')]
recievedata = recievedata[len('ClientName=')+len(ClientName)+1:]
ClientSentence = recievedata[recievedata.find('ClientSentence=')+len('ClientSentence='):]
print(ClientId,ClientName,ClientSentence)
# ここにFeeの返答処理が入る
FeeSentence = ClientSentence
# CeVIO AI処理
CeVIO_control.CeVIO_init()
Phoneme = CeVIO_control.CeVIO_phoneme_data_get(FeeSentence)
print(Phoneme)
senddata = 'ClientId=' + ClientId \
+ ',ClientName=' + ClientName \
+ ',ClientSentence=' + ClientName + ' : ' + ClientSentence \
+ ',FeeSentence=' + 'Fee : ' + FeeSentence \
+ ',Phoneme=' + Phoneme
CeVIO_control.CeVIO_wav_file_output(FeeSentence)
return senddata
# wavファイルをアクセスできるように設定
@app.route("/Fee_voice/<path:filename>")
def play(filename):
return send_from_directory("Fee_voice", filename)
if __name__ == '__main__':
app.run(debug=True, host="0.0.0.0", port=8080)
# 注意:CeVIO AIはPythonを実行する前に起動させておくこと
Python:CeVIO_control.py
import win32com.client
service_control = win32com.client.Dispatch("CeVIO.Talk.RemoteService2.ServiceControl2V40")
talker2 = win32com.client.Dispatch("CeVIO.Talk.RemoteService2.Talker2V40")
def CeVIO_init():
service_control.StartHost(False)
#キャストを確認する
string_array = talker2.AvailableCasts
print([string_array.At(i) for i in range(string_array.Length)])
#キャストを設定する
talker2.Cast = "夏色花梨"
#ボリュームを設定する
talker2.Volume = 5
#速度を設定する
talker2.speed = 43
#感情を確認する
component_array = talker2.Components
print([component_array.At(i).Name for i in range(component_array.Length)])
CeVIO_emotion_set(100,0,0,0,0)
return
def CeVIO_emotion_set(ureshii,futsuu,ikari,kanashimi,ochitsuki):
#感情を設定する
component_array = talker2.Components
component_array.ByName("嬉しい").Value = ureshii
component_array.ByName("普通").Value = futsuu
component_array.ByName("怒り").Value = ikari
component_array.ByName("哀しみ").Value = kanashimi
component_array.ByName("落ち着き").Value = ochitsuki
# Feeの言葉をwavファイルに出力する
def CeVIO_wav_file_output(text):
talker2.OutputWaveToFile(text, "C:\Snack_Virtual_2\Flask_Python\Fee_voice\Fee_voice.wav")
print("Fee voice : " + text )
return
def CeVIO_phoneme_data_get(text):
#音素のデータを取得する
phoneme_data_array = talker2.GetPhonemes(text)
result = ''
for i in range(phoneme_data_array.Length):
phoneme_data = phoneme_data_array.At(i)
result = result + '(' + phoneme_data.Phoneme + ',' + str(phoneme_data.StartTime) + ')'
return(result)
# if __name__ == '__main__':
# CeVIO_init()
# CeVIO_phoneme_data_get("ですね")
それにしてもC#とPythonを同時にやっていると頭がぐちゃぐちゃになる
文法とか頭が大混乱なので、なんでもかんでもBingでコード化してもらった
物凄い威力だ
Comments