[OPENAI_API_KEY]
tomoi@DESKTOP-NHQLQU4:~/who5_chat_poc/templates$ export OPENAI_API_KEY="sk-proj-3MmfRzOl2bGkGIuIGk13bLVROGouXb0lqVUr_aSS
nalGq2kAc32tEr3bRlbZ_-LHPdcSmd7O7eT3BlbkFJQtltX7ryPXslPrauvvN0sh0py4YSPZ6WRWPQsyAH9sQpV_OnFOMvc59UbQuI8WUdAePxBCFioA"
[tree]
tomoi@DESKTOP-NHQLQU4:~$ tree who5_chat_poc/
who5_chat_poc/
├── app.py
├── requirements.txt
├── templates
│ └── index.html
└── who5_poc.sqlite3
[venvの環境]
tomoi@DESKTOP-NHQLQU4:~/who5_chat_poc$ python3 -m venv venv
tomoi@DESKTOP-NHQLQU4:~/who5_chat_poc$ source venv/bin/activate
[python app.py]
(venv) tomoi@DESKTOP-NHQLQU4:~/who5_chat_poc$ python app.py
* Serving Flask app 'app'
* Debug mode: on
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on http://127.0.0.1:5000
Press CTRL+C to quit
* Restarting with stat
* Debugger is active!
* Debugger PIN: 793-572-591
http://localhost:5000/ で起動
=================
[who5_chat_poc/requirements.txt]
flask==3.0.3
openai>=1.40.0
httpx==0.27.2
[who5_chat_poc/app.py]
import os
import json
import uuid
import sqlite3
import tempfile
from pathlib import Path
from datetime import datetime
from typing import Any, Dict
from flask import Flask, request, jsonify, render_template, send_file, after_this_request
from openai import OpenAI
APP_TITLE = "WHO-5 会話推定 PoC"
DB_PATH = os.path.join(os.path.dirname(__file__), "who5_poc.sqlite3")
# 注意:医療診断ではない(PoCの位置づけ)
DISCLAIMER = (
"本機能は医療的な診断・判定を目的としない推定(目安)です。"
"異常が疑われる場合は医療機関等へ相談してください。"
)
# ---------------------------
# 音声Q&A(ブラウザが質問→ユーザが音声で回答)
# ---------------------------
INTERVIEW_QUESTIONS = [
"ここ1週間の生活を一言でいうと、どんな感じですか?",
"この1週間で、楽しいと感じたことはありましたか?あれば何ですか?",
"この1週間、落ち着いてリラックスできた時間はどのくらいありましたか?",
"この1週間、活動的に動けた日はどのくらいありましたか?",
"睡眠はどうですか?寝つき、途中で目が覚める、起床時の爽快感を教えてください。",
"この1週間、興味を持って取り組めたことはありましたか?",
"外出は週に何回くらいですか?買い物や散歩でも構いません。",
"人と会ったり話したりはどのくらいありましたか?",
]
# セッション状態(PoCなのでメモリ上)
# SESSIONS[session_id] = {"i": 0, "qa": [{"q":..., "a":...}, ...]}
SESSIONS: Dict[str, Dict[str, Any]] = {}
# ---------------------------
# LLM向け指示(JSONのみ出す)
# ---------------------------
SYSTEM_PROMPT = """\
あなたは「会話からWHO-5相当の指標を推定する」アシスタント。
次のルールに厳密に従うこと。
# 目的
ユーザーの自由記述(会話)から、WHO-5の5項目に対応する状態を推定して数値化する。
併せて外出頻度(週あたり)を推定する。
# 出力形式(厳守)
必ず JSON だけを出力する(前後に説明文やコードブロックを付けない)。
JSONスキーマは以下とする:
{
"who5": {
"item1": 0-5,
"item2": 0-5,
"item3": 0-5,
"item4": 0-5,
"item5": 0-5
},
"who5_total_0_25": 0-25,
"who5_score_0_100": 0-100,
"outing_per_week_est": 0以上の数(整数推奨),
"confidence_0_1": 0.0-1.0,
"evidence": {
"who5_item1": "根拠(会話の要点を短く)",
"who5_item2": "...",
"who5_item3": "...",
"who5_item4": "...",
"who5_item5": "...",
"outing": "..."
},
"notes": "不確実性や追加で聞くべき最小質問がある場合は短く書く。なければ空文字。"
}
# WHO-5の項目(対応づけの参考)
item1: 明るく楽しい気分
item2: 落ち着いたリラックス
item3: 意欲的・活動的
item4: よく休めて爽快に目覚め
item5: 日常生活で興味のあることが多い
# 採点ルール(0-5)
0: 全くない / ほぼ該当しない
1: まれに
2: ときどき
3: しばしば
4: ほとんどいつも
5: いつも
# 計算
who5_total_0_25 = item1+...+item5
who5_score_0_100 = who5_total_0_25 * 4
# 注意
- 会話が短く推定が困難なら confidence を下げ、notes に「最小の追加質問」を最大2個だけ書く。
- 推定不能でも JSON 形式は必ず守る。
"""
app = Flask(__name__)
client = OpenAI()
def db_init() -> None:
with sqlite3.connect(DB_PATH) as con:
con.execute(
"""
CREATE TABLE IF NOT EXISTS logs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
created_at TEXT NOT NULL,
user_text TEXT NOT NULL,
model TEXT NOT NULL,
result_json TEXT NOT NULL
)
"""
)
con.commit()
def clamp_int(x: Any, lo: int, hi: int) -> int:
try:
v = int(round(float(x)))
except Exception:
return lo
return max(lo, min(hi, v))
def clamp_float(x: Any, lo: float, hi: float) -> float:
try:
v = float(x)
except Exception:
return lo
return max(lo, min(hi, v))
def normalize_result(obj: Dict[str, Any]) -> Dict[str, Any]:
who5 = obj.get("who5", {}) or {}
item1 = clamp_int(who5.get("item1", 0), 0, 5)
item2 = clamp_int(who5.get("item2", 0), 0, 5)
item3 = clamp_int(who5.get("item3", 0), 0, 5)
item4 = clamp_int(who5.get("item4", 0), 0, 5)
item5 = clamp_int(who5.get("item5", 0), 0, 5)
total = item1 + item2 + item3 + item4 + item5
score = total * 4
outing = clamp_int(obj.get("outing_per_week_est", 0), 0, 50)
conf = clamp_float(obj.get("confidence_0_1", 0.3), 0.0, 1.0)
evidence = obj.get("evidence", {}) or {}
notes = obj.get("notes", "")
return {
"who5": {"item1": item1, "item2": item2, "item3": item3, "item4": item4, "item5": item5},
"who5_total_0_25": total,
"who5_score_0_100": score,
"outing_per_week_est": outing,
"confidence_0_1": conf,
"evidence": {
"who5_item1": str(evidence.get("who5_item1", ""))[:120],
"who5_item2": str(evidence.get("who5_item2", ""))[:120],
"who5_item3": str(evidence.get("who5_item3", ""))[:120],
"who5_item4": str(evidence.get("who5_item4", ""))[:120],
"who5_item5": str(evidence.get("who5_item5", ""))[:120],
"outing": str(evidence.get("outing", ""))[:120],
},
"notes": str(notes)[:200],
"disclaimer": DISCLAIMER,
}
def infer_from_text(user_text: str, model: str = "gpt-4.1-mini") -> Dict[str, Any]:
resp = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_text},
],
temperature=0.2,
)
text = (resp.choices[0].message.content or "").strip()
try:
obj = json.loads(text)
except Exception:
# JSON部分を抜き出し
start = text.find("{")
end = text.rfind("}")
if start != -1 and end != -1 and end > start:
obj = json.loads(text[start : end + 1])
else:
obj = {
"who5": {"item1": 0, "item2": 0, "item3": 0, "item4": 0, "item5": 0},
"outing_per_week_est": 0,
"confidence_0_1": 0.1,
"evidence": {},
"notes": "出力がJSONとして取得できませんでした。入力をもう少し具体的にしてください。",
}
return normalize_result(obj)
def make_spoken_summary(result: Dict[str, Any]) -> str:
score = result.get("who5_score_0_100", "-")
outing = result.get("outing_per_week_est", "-")
conf = result.get("confidence_0_1", "-")
notes = (result.get("notes") or "").strip()
msg = f"推定結果です。WHO-5スコアは{score}点、外出頻度は週{outing}回程度、信頼度は{conf}です。"
if notes:
msg += f"補足として、{notes}"
return msg
@app.get("/")
def index():
return render_template("index.html", app_title=APP_TITLE)
@app.post("/api/infer")
def api_infer():
payload = request.get_json(force=True)
user_text = (payload.get("text") or "").strip()
if not user_text:
return jsonify({"error": "text is empty"}), 400
model = (payload.get("model") or "gpt-4.1-mini").strip()
result = infer_from_text(user_text, model=model)
with sqlite3.connect(DB_PATH) as con:
con.execute(
"INSERT INTO logs(created_at, user_text, model, result_json) VALUES (?,?,?,?)",
(datetime.now().isoformat(timespec="seconds"), user_text, model, json.dumps(result, ensure_ascii=False)),
)
con.commit()
return jsonify(result)
# ---------------------------
# 音声Q&A:セッション開始
# ---------------------------
@app.post("/api/session/start")
def api_session_start():
sid = str(uuid.uuid4())
SESSIONS[sid] = {"i": 0, "qa": []}
first_q = INTERVIEW_QUESTIONS[0]
return jsonify({"session_id": sid, "question": first_q, "done": False})
# ---------------------------
# 音声Q&A:音声回答を受けて次の質問、または最終推定
# ---------------------------
@app.post("/api/session/answer_audio")
def api_session_answer_audio():
sid = request.form.get("session_id", "").strip()
if not sid or sid not in SESSIONS:
return jsonify({"error": "invalid session_id"}), 400
if "audio" not in request.files:
return jsonify({"error": "audio file is missing"}), 400
sess = SESSIONS[sid]
idx = int(sess.get("i", 0))
if idx >= len(INTERVIEW_QUESTIONS):
return jsonify({"error": "session already finished"}), 400
f = request.files["audio"]
suffix = Path(f.filename).suffix or ".webm"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
f.save(tmp.name)
tmp_path = tmp.name
try:
with open(tmp_path, "rb") as audio_fp:
tr = client.audio.transcriptions.create(model="whisper-1", file=audio_fp)
answer_text = (tr.text or "").strip()
finally:
try:
os.remove(tmp_path)
except Exception:
pass
q = INTERVIEW_QUESTIONS[idx]
sess["qa"].append({"q": q, "a": answer_text})
sess["i"] = idx + 1
# 次の質問がある
if sess["i"] < len(INTERVIEW_QUESTIONS):
next_q = INTERVIEW_QUESTIONS[sess["i"]]
return jsonify({"session_id": sid, "transcript": answer_text, "question": next_q, "done": False})
# 最終推定(全Q&Aをまとめる)
joined = "\n".join([f"Q: {x['q']}\nA: {x['a']}" for x in sess["qa"]]).strip()
result = infer_from_text(joined)
with sqlite3.connect(DB_PATH) as con:
con.execute(
"INSERT INTO logs(created_at, user_text, model, result_json) VALUES (?,?,?,?)",
(datetime.now().isoformat(timespec="seconds"), joined, "interview+infer", json.dumps(result, ensure_ascii=False)),
)
con.commit()
spoken = make_spoken_summary(result)
return jsonify({"session_id": sid, "transcript": answer_text, "done": True, "result": result, "spoken": spoken})
# ---------------------------
# 単発:音声→文字起こし
# ---------------------------
@app.post("/api/transcribe")
def api_transcribe():
if "audio" not in request.files:
return jsonify({"error": "audio file is missing"}), 400
f = request.files["audio"]
if not f.filename:
return jsonify({"error": "filename is empty"}), 400
suffix = Path(f.filename).suffix or ".webm"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
f.save(tmp.name)
tmp_path = tmp.name
try:
with open(tmp_path, "rb") as audio_fp:
tr = client.audio.transcriptions.create(model="whisper-1", file=audio_fp)
text = (tr.text or "").strip()
return jsonify({"text": text})
finally:
try:
os.remove(tmp_path)
except Exception:
pass
# ---------------------------
# TTS:テキスト→mp3(安定版)
# ---------------------------
@app.post("/api/tts")
def api_tts():
payload = request.get_json(force=True)
text = (payload.get("text") or "").strip()
if not text:
return jsonify({"error": "text is empty"}), 400
# 一時ファイルを作り、送信後に削除する
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
out_path = tmp.name
@after_this_request
def _cleanup(response):
try:
os.remove(out_path)
except Exception:
pass
return response
try:
# SDKのバージョン差を吸収:
# 1) format 引数が使える版 → format="mp3"
# 2) format 引数が使えない版 → format を渡さない(mp3がデフォルトのことが多い)
try:
speech = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=text,
format="mp3",
)
except TypeError:
speech = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=text,
)
# SDKの戻り型差異を吸収してファイル化
if hasattr(speech, "stream_to_file"):
speech.stream_to_file(out_path)
else:
data = speech.read() if hasattr(speech, "read") else speech
with open(out_path, "wb") as f:
f.write(data)
return send_file(out_path, mimetype="audio/mpeg", as_attachment=False)
except Exception as e:
return jsonify({"error": f"TTS failed: {type(e).__name__}: {e}"}), 500
# ---------------------------
# 単発:録音→推定(自由発話)
# ---------------------------
@app.post("/api/infer_audio")
def api_infer_audio():
if "audio" not in request.files:
return jsonify({"error": "audio file is missing"}), 400
f = request.files["audio"]
suffix = Path(f.filename).suffix or ".webm"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
f.save(tmp.name)
tmp_path = tmp.name
try:
with open(tmp_path, "rb") as audio_fp:
tr = client.audio.transcriptions.create(model="whisper-1", file=audio_fp)
user_text = (tr.text or "").strip()
if not user_text:
return jsonify({"error": "transcription is empty"}), 400
result = infer_from_text(user_text)
with sqlite3.connect(DB_PATH) as con:
con.execute(
"INSERT INTO logs(created_at, user_text, model, result_json) VALUES (?,?,?,?)",
(datetime.now().isoformat(timespec="seconds"), user_text, "whisper-1+infer", json.dumps(result, ensure_ascii=False)),
)
con.commit()
spoken = make_spoken_summary(result)
return jsonify({"transcript": user_text, "result": result, "spoken": spoken})
finally:
try:
os.remove(tmp_path)
except Exception:
pass
if __name__ == "__main__":
db_init()
app.run(host="127.0.0.1", port=5000, debug=True)
[who5_chat_poc/templates/index.html]
<!doctype html>
<html lang="ja">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title>{{ app_title }}</title>
<style>
body { font-family: system-ui, -apple-system, Segoe UI, Roboto, "Hiragino Kaku Gothic ProN", "Noto Sans JP", sans-serif; margin: 24px; }
textarea { width: 100%; height: 140px; font-size: 14px; }
button { padding: 10px 14px; font-size: 14px; }
.row { display:flex; gap:16px; align-items:flex-start; margin-top:16px; }
.card { border:1px solid #ddd; border-radius:10px; padding:14px; flex:1; }
.muted { color:#666; font-size: 12px; }
pre { white-space: pre-wrap; word-break: break-word; background:#fafafa; border:1px solid #eee; padding:10px; border-radius:8px; margin: 6px 0; }
.score { font-size: 34px; font-weight: 700; }
.grid { display:grid; grid-template-columns: 1fr 1fr; gap:10px; }
.pill { display:inline-block; padding:2px 8px; border:1px solid #ddd; border-radius:999px; font-size:12px; color:#444; background:#fff; }
</style>
</head>
<body>
<h1>{{ app_title }}</h1>
<p class="muted">
目的:自由記述から WHO-5 相当(目安)と外出頻度を推定します(医療診断ではありません)。
</p>
<label>日常の様子を、文章で入力してください(例:最近の1日の過ごし方/睡眠/外出/人との会話など)</label>
<textarea id="text"></textarea>
<!-- 音声Q&A(ブラウザが質問を読み上げ、回答を録音) -->
<div style="margin-top:10px; display:flex; gap:10px; align-items:center; flex-wrap:wrap;">
<button id="startInterview">音声Q&A開始</button>
<span class="pill" id="modePill">通常</span>
<span class="muted" id="qStatus"></span>
</div>
<div class="muted" style="margin-top:10px;">現在の質問</div>
<pre id="currentQuestion"></pre>
<!-- 録音UI(通常モードでもQ&Aモードでも共通で使う) -->
<div style="margin-top:10px; display:flex; gap:10px; align-items:center;">
<button id="rec">録音開始</button>
<button id="stop" disabled>録音停止</button>
<span class="muted" id="recStatus"></span>
</div>
<div class="muted" style="margin-top:10px;">音声入力(文字起こし)</div>
<pre id="transcriptBox"></pre>
<div style="margin-top:10px; display:flex; gap:10px; align-items:center;">
<button id="speak" disabled>結果を読み上げ</button>
<span class="muted" id="speakStatus"></span>
</div>
<!-- テキストで推定(従来機能) -->
<div style="margin-top:10px; display:flex; gap:10px; align-items:center;">
<button id="run">推定する</button>
<span class="muted" id="status"></span>
</div>
<div class="row">
<div class="card">
<div class="muted">WHO-5 スコア(0?100)</div>
<div class="score" id="who5score">-</div>
<div class="grid" style="margin-top:10px;">
<div>
<div class="muted">合計(0?25)</div>
<div id="who5total">-</div>
</div>
<div>
<div class="muted">外出頻度(週)</div>
<div id="outing">-</div>
</div>
<div>
<div class="muted">信頼度(0?1)</div>
<div id="conf">-</div>
</div>
<div>
<div class="muted">注意</div>
<div id="notes">-</div>
</div>
</div>
<div class="muted" style="margin-top:10px;">根拠(要約)</div>
<pre id="evidence"></pre>
</div>
<div class="card">
<div class="muted">生JSON</div>
<pre id="raw"></pre>
</div>
</div>
<script>
const runBtn = document.getElementById("run");
const statusEl = document.getElementById("status");
function fmt(x) {
if (x === null || x === undefined) return "-";
return String(x);
}
function renderResult(obj) {
document.getElementById("who5score").textContent = fmt(obj.who5_score_0_100);
document.getElementById("who5total").textContent = fmt(obj.who5_total_0_25);
document.getElementById("outing").textContent = fmt(obj.outing_per_week_est);
document.getElementById("conf").textContent = fmt(obj.confidence_0_1);
document.getElementById("notes").textContent = obj.notes ? obj.notes : "-";
const ev = obj.evidence || {};
const lines = [
`item1: ${ev.who5_item1 || ""}`,
`item2: ${ev.who5_item2 || ""}`,
`item3: ${ev.who5_item3 || ""}`,
`item4: ${ev.who5_item4 || ""}`,
`item5: ${ev.who5_item5 || ""}`,
`outing: ${ev.outing || ""}`,
"",
`disclaimer: ${obj.disclaimer || ""}`
];
document.getElementById("evidence").textContent = lines.join("\n");
document.getElementById("raw").textContent = JSON.stringify(obj, null, 2);
}
// ----------------------------
// テキスト推定(従来)
// ----------------------------
runBtn.addEventListener("click", async () => {
const text = document.getElementById("text").value.trim();
if (!text) { alert("入力が空です"); return; }
statusEl.textContent = "推定中…";
runBtn.disabled = true;
try {
const res = await fetch("/api/infer", {
method: "POST",
headers: {"Content-Type":"application/json"},
body: JSON.stringify({text})
});
const obj = await res.json();
if (!res.ok) throw new Error(obj.error || "error");
renderResult(obj);
statusEl.textContent = "完了";
} catch (e) {
statusEl.textContent = "失敗";
alert(String(e));
} finally {
runBtn.disabled = false;
}
});
// ----------------------------
// 録音・TTS(通常モード / Q&Aモード共用)
// ----------------------------
let mediaRecorder = null;
let recordedChunks = [];
let lastSpokenText = "";
const recBtn = document.getElementById("rec");
const stopBtn = document.getElementById("stop");
const recStatus = document.getElementById("recStatus");
const transcriptBox = document.getElementById("transcriptBox");
const speakBtn = document.getElementById("speak");
const speakStatus = document.getElementById("speakStatus");
// Q&A UI
const startInterviewBtn = document.getElementById("startInterview");
const qStatus = document.getElementById("qStatus");
const currentQuestion = document.getElementById("currentQuestion");
const modePill = document.getElementById("modePill");
// Q&A状態
let interviewSessionId = "";
let currentQuestionText = "";
// ★重要:多重実行防止フラグ
let interviewBusy = false; // start?質問読み上げ完了までtrue
let ttsBusy = false; // TTS再生中true
function setModePill() {
modePill.textContent = interviewSessionId ? "音声Q&A" : "通常";
}
// 音声(mp3)を生成して再生(同時再生禁止)
async function ttsPlay(text) {
// すでに再生中なら待つ
while (ttsBusy) {
await new Promise(r => setTimeout(r, 80));
}
ttsBusy = true;
try {
const res = await fetch("/api/tts", {
method: "POST",
headers: {"Content-Type":"application/json"},
body: JSON.stringify({ text })
});
if (!res.ok) {
let msg = "";
const ct = res.headers.get("content-type") || "";
if (ct.includes("application/json")) {
const obj = await res.json().catch(() => ({}));
msg = obj.error || JSON.stringify(obj);
} else {
msg = await res.text().catch(() => "");
}
throw new Error(msg || `tts error (${res.status})`);
}
const blob = await res.blob();
const url = URL.createObjectURL(blob);
const audio = new Audio(url);
await audio.play();
await new Promise(resolve => audio.onended = resolve);
try { URL.revokeObjectURL(url); } catch (_) {}
} finally {
ttsBusy = false;
}
}
// Q&A開始(連打・二重起動防止、読み上げ中は録音禁止)
async function startInterview() {
if (interviewBusy) return; // 二重起動防止
if (interviewSessionId) return; // 既に開始済みなら無視
interviewBusy = true;
startInterviewBtn.disabled = true; // 連打防止
recBtn.disabled = true; // 読み上げが終わるまで録音禁止
stopBtn.disabled = true;
try {
qStatus.textContent = "開始中…";
currentQuestion.textContent = "";
currentQuestionText = "";
const res = await fetch("/api/session/start", { method: "POST" });
const obj = await res.json().catch(() => ({}));
if (!res.ok) throw new Error(obj.error || "start failed");
interviewSessionId = obj.session_id || "";
currentQuestionText = obj.question || "";
currentQuestion.textContent = currentQuestionText;
setModePill();
qStatus.textContent = "質問読み上げ中…";
await ttsPlay(currentQuestionText);
qStatus.textContent = "回答を録音してください(録音開始→録音停止)";
} catch (e) {
qStatus.textContent = "失敗";
alert(String(e));
interviewSessionId = "";
setModePill();
} finally {
interviewBusy = false;
startInterviewBtn.disabled = false;
recBtn.disabled = false;
}
}
startInterviewBtn.addEventListener("click", async () => {
try {
await startInterview();
} catch (e) {
// startInterview内で処理しているので、基本ここには来ない
qStatus.textContent = "失敗";
alert(String(e));
}
});
// 通常の「録音→推定」用
async function postAudioInfer(blob) {
const fd = new FormData();
fd.append("audio", blob, "recording.webm");
const res = await fetch("/api/infer_audio", { method: "POST", body: fd });
const obj = await res.json().catch(() => ({}));
if (!res.ok) throw new Error(obj.error || "audio api error");
return obj;
}
// Q&Aの「録音→回答提出」用
async function postAudioAnswer(blob, sessionId) {
const fd = new FormData();
fd.append("audio", blob, "recording.webm");
fd.append("session_id", sessionId);
const res = await fetch("/api/session/answer_audio", { method: "POST", body: fd });
const obj = await res.json().catch(() => ({}));
if (!res.ok) throw new Error(obj.error || "answer_audio error");
return obj;
}
// 録音開始
recBtn.addEventListener("click", async () => {
// TTS再生中は録音させない(質問と回答が混ざるのを防ぐ)
if (ttsBusy) {
alert("読み上げ中です。読み上げが終わってから録音してください。");
return;
}
recordedChunks = [];
transcriptBox.textContent = "";
recStatus.textContent = "マイク取得中…";
let stream;
try {
stream = await navigator.mediaDevices.getUserMedia({ audio: true });
} catch (e) {
recStatus.textContent = "失敗";
alert("マイクが取得できませんでした。ブラウザの権限を確認してください。");
return;
}
try {
mediaRecorder = new MediaRecorder(stream);
} catch (e) {
recStatus.textContent = "失敗";
alert("MediaRecorderが利用できません。別ブラウザで試してください。");
try { stream.getTracks().forEach(t => t.stop()); } catch (_) {}
return;
}
mediaRecorder.ondataavailable = (e) => {
if (e.data && e.data.size > 0) recordedChunks.push(e.data);
};
mediaRecorder.onstart = () => {
recStatus.textContent = "録音中…";
recBtn.disabled = true;
stopBtn.disabled = false;
// Q&A開始中は、開始ボタンも押せないように(念のため)
startInterviewBtn.disabled = true;
};
mediaRecorder.onstop = async () => {
recStatus.textContent = "送信中…";
stopBtn.disabled = true;
const blob = new Blob(recordedChunks, { type: "audio/webm" });
try {
// ----------------------------
// Q&A中なら answer_audio、通常なら infer_audio
// ----------------------------
if (interviewSessionId) {
const obj = await postAudioAnswer(blob, interviewSessionId);
transcriptBox.textContent = obj.transcript || "";
// textareaにも反映(任意)
const textArea = document.getElementById("text");
textArea.value = obj.transcript || textArea.value;
if (obj.done) {
// 最終結果
const r = obj.result;
renderResult(r);
lastSpokenText = obj.spoken || "";
speakBtn.disabled = !lastSpokenText;
currentQuestion.textContent = "終了しました。";
qStatus.textContent = "終了(必要なら『結果を読み上げ』)";
// Q&Aモード解除
interviewSessionId = "";
setModePill();
recStatus.textContent = "完了";
return;
} else {
// 次の質問
currentQuestionText = obj.question || "";
currentQuestion.textContent = currentQuestionText;
qStatus.textContent = "質問読み上げ中…";
recBtn.disabled = true; // ★読み上げ中は録音禁止
stopBtn.disabled = true;
await ttsPlay(currentQuestionText);
recBtn.disabled = false; // ★読み上げ後に録音可
qStatus.textContent = "回答を録音してください(録音開始→録音停止)";
recStatus.textContent = "完了";
return; // 通常処理へ落とさない
}
}
// ----------------------------
// 通常モード:録音→推定
// ----------------------------
const obj = await postAudioInfer(blob);
transcriptBox.textContent = obj.transcript || "";
lastSpokenText = obj.spoken || "";
// textareaにも反映(任意)
const textArea = document.getElementById("text");
textArea.value = obj.transcript || textArea.value;
// 推定結果
const r = obj.result;
renderResult(r);
speakBtn.disabled = !lastSpokenText;
recStatus.textContent = "完了";
} catch (e) {
recStatus.textContent = "失敗";
alert(String(e));
} finally {
// ストリーム停止
try { mediaRecorder.stream.getTracks().forEach(t => t.stop()); } catch (_) {}
recBtn.disabled = false;
startInterviewBtn.disabled = false;
}
};
mediaRecorder.start();
});
// 録音停止
stopBtn.addEventListener("click", () => {
if (mediaRecorder && mediaRecorder.state !== "inactive") {
mediaRecorder.stop();
}
});
// 結果読み上げ
speakBtn.addEventListener("click", async () => {
if (!lastSpokenText) return;
speakStatus.textContent = "音声生成中…";
speakBtn.disabled = true;
recBtn.disabled = true; // 読み上げ中の録音禁止
try {
await ttsPlay(lastSpokenText);
speakStatus.textContent = "完了";
} catch (e) {
speakStatus.textContent = "失敗";
alert(String(e));
} finally {
speakBtn.disabled = false;
recBtn.disabled = false;
}
});
// 初期表示
setModePill();
qStatus.textContent = "音声Q&Aは『音声Q&A開始』から開始します。";
currentQuestion.textContent = "(未開始)";
</script>
</body>
</html>