import { execSync } from 'child_process' import { writeFileSync, unlinkSync, existsSync, mkdirSync } from 'fs' import { join } from 'path' import refreshTokenHelper from "../../utils/refreshTokenHelper" import getTokenHelper from "../../utils/getTokenHelper" /** * Queued server-side voice transcription using whisper.cpp (local, German). * * POST /api/mobile/transcribe * Body: multipart form — 'audio' file + 'requestId' field * * Saves audio to local temp file, converts to WAV, transcribes with Whisper, * appends transcript to the request's Summary field. * Returns immediately with { queued: true } — transcription runs async. */ interface TranscribeContext { token: string idempBaseUrl: string requestId: string audioPath: string } const processTranscription = async (ctx: TranscribeContext) => { const wavPath = ctx.audioPath.replace(/\.\w+$/, '.wav') try { // Step 1: Convert to WAV 16kHz mono using ffmpeg console.log(`[Transcribe] Converting audio: ${ctx.audioPath}`) try { execSync(`ffmpeg -i "${ctx.audioPath}" -ar 16000 -ac 1 -y "${wavPath}" 2>/dev/null`, { timeout: 30000 }) } catch (e: any) { console.error('[Transcribe] ffmpeg conversion failed:', e.message) throw new Error('Audio conversion failed') } // Step 2: Transcribe with whisper.cpp // Use persistent path outside node_modules (survives npm install / deploys) // Fallback to node_modules path for dev environments const persistentDir = '/opt/whisper-cpp' const nodeModulesDir = join(process.cwd(), 'node_modules/whisper-node/lib/whisper.cpp') const whisperBin = existsSync(join(persistentDir, 'main')) ? join(persistentDir, 'main') : join(nodeModulesDir, 'main') const modelFile = existsSync(join(persistentDir, 'ggml-base.bin')) ? join(persistentDir, 'ggml-base.bin') : join(nodeModulesDir, 'models/ggml-base.bin') if (!existsSync(whisperBin)) { throw new Error('Whisper binary not found at: ' + whisperBin) } if (!existsSync(modelFile)) { throw new Error('Whisper model not found at: ' + modelFile) } console.log(`[Transcribe] Running whisper on: ${wavPath}`) let whisperOutput: string try { whisperOutput = execSync( `"${whisperBin}" -m "${modelFile}" -l de -f "${wavPath}"`, { timeout: 120000, encoding: 'utf-8' } ) } catch (e: any) { console.error('[Transcribe] Whisper execution failed:', e.stderr || e.message) throw new Error('Whisper execution failed') } // Parse whisper output: lines like "[00:00:00.000 --> 00:00:05.000] text here" const segments = whisperOutput .split('\n') .filter(line => line.includes('-->')) .map(line => { const match = line.match(/\]\s*(.*)$/) return match ? match[1].trim() : '' }) .filter(Boolean) if (segments.length === 0) { console.log('[Transcribe] No speech detected') return } const fullText = segments.join(' ').trim() if (!fullText) { console.log('[Transcribe] Empty transcript') return } console.log(`[Transcribe] Result for request ${ctx.requestId}: "${fullText}"`) // Step 3: Fetch current request and append transcript to summary const headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', 'Authorization': `Bearer ${ctx.token}` } const currentReq: any = await $fetch(`${ctx.idempBaseUrl}/models/r_request/${ctx.requestId}`, { headers }) const currentSummary = currentReq?.Summary || '' const updatedSummary = currentSummary + `\n[VOICE: ${fullText}]` await $fetch(`${ctx.idempBaseUrl}/models/r_request/${ctx.requestId}`, { method: 'PUT', headers, body: { summary: updatedSummary, tableName: 'R_Request' } }) console.log(`[Transcribe] Successfully saved transcript for request ${ctx.requestId}`) } catch (err: any) { console.error(`[Transcribe] Error for request ${ctx.requestId}:`, err?.message || err) } finally { // Cleanup temp files try { if (existsSync(ctx.audioPath)) unlinkSync(ctx.audioPath) } catch {} try { if (existsSync(wavPath)) unlinkSync(wavPath) } catch {} } } export default defineEventHandler(async (event) => { // Read multipart form data const formData = await readMultipartFormData(event) if (!formData) { return { status: 400, message: 'Multipart form data required' } } let requestId = '' let audioBuffer: Buffer | null = null let audioFilename = 'audio.webm' for (const part of formData) { if (part.name === 'requestId') { requestId = part.data.toString() } else if (part.name === 'audio') { audioBuffer = part.data audioFilename = part.filename || 'audio.webm' } } if (!requestId || !audioBuffer) { return { status: 400, message: 'requestId and audio file are required' } } // Get token for iDempiere API calls let token: string try { token = await getTokenHelper(event) } catch { try { token = await refreshTokenHelper(event) } catch { return { status: 401, message: 'Authentication failed' } } } // Save audio to temp file const config = useRuntimeConfig() const tmpDir = join(process.cwd(), '.tmp-transcribe') if (!existsSync(tmpDir)) mkdirSync(tmpDir, { recursive: true }) const audioPath = join(tmpDir, `${Date.now()}-${audioFilename}`) writeFileSync(audioPath, audioBuffer) console.log(`[Transcribe] Queued: request=${requestId}, file=${audioPath} (${audioBuffer.length} bytes)`) const ctx: TranscribeContext = { token, idempBaseUrl: config.api.url, requestId, audioPath } // Fire and forget — don't block the response processTranscription(ctx).catch(err => { console.error('[Transcribe] Background processing failed:', err) }) return { status: 200, queued: true, message: 'Transcription queued' } })