import { z } from "zod"; import mammoth from "mammoth"; import pdfParse from "pdf-parse"; const MAX_UPLOAD_BYTES = (() => { const parsed = Number(process.env.CONTEXT_MAX_UPLOAD_BYTES); if (Number.isFinite(parsed) && parsed > 0) { return Math.floor(parsed); } return 8 * 1024 * 1024; })(); const acceptedMimeTypes = new Set([ "text/plain", "text/markdown", "text/x-markdown", "application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "image/png", "image/jpeg", "image/webp" ]); const acceptedExtensions = new Set([ ".txt", ".md", ".markdown", ".pdf", ".docx", ".png", ".jpg", ".jpeg", ".webp" ]); const mimeByExtension: Record = { ".txt": "text/plain", ".md": "text/markdown", ".markdown": "text/markdown", ".pdf": "application/pdf", ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".webp": "image/webp" }; export const contextDocumentSchema = z.object({ id: z.string().min(1), source_type: z.enum(["text", "pdf", "docx", "image"]), file_name: z.string().min(1), mime_type: z.string().min(1), size_bytes: z.number().int().positive(), extracted_text: z.string(), extracted_text_preview: z.string(), summary: z.string().min(1), highlights: z.array(z.string()).default([]), created_at: z.string().min(1), expires_at: z.string().min(1), used_openai: z.boolean().default(false) }); export type ContextDocument = z.infer; function normalizeText(raw: string): string { return raw.replace(/\r/g, "\n").replace(/\n{3,}/g, "\n\n").trim(); } function extractJsonObject(content: string): string { const start = content.indexOf("{"); const end = content.lastIndexOf("}"); if (start === -1 || end === -1 || end <= start) { throw new Error("OpenAI response did not contain a JSON object."); } return content.slice(start, end + 1); } function buildRegexHighlights(text: string): string[] { const highlights: string[] = []; const deadlineMatches = text.match( /(?:deadline|date limite|livraison|avant)\s*[:\-]?\s*(\d{4}-\d{2}-\d{2})/gi ); if (deadlineMatches?.length) { highlights.push(`Deadlines detectees: ${deadlineMatches.slice(0, 3).join(" | ")}`); } const effortMatch = text.match( /(\d{1,3})\s*(h|heure|heures|min|minutes|jour|jours)\s*(?:par\s*semaine|au\s*total)?/i ); if (effortMatch) { highlights.push(`Charge detectee: ${effortMatch[0]}`); } const deliverablesMatch = text.match(/(?:livrable|tache|objectif|milestone)s?\s*[:\-]?\s*(.+)/i); if (deliverablesMatch?.[1]) { highlights.push(`Element cle: ${deliverablesMatch[1].slice(0, 120)}`); } return highlights.slice(0, 5); } function inferSourceType(mimeType: string): "text" | "pdf" | "docx" | "image" { if (mimeType === "application/pdf") return "pdf"; if ( mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ) { return "docx"; } if (mimeType.startsWith("image/")) return "image"; return "text"; } function getFileExtension(fileName: string): string { const idx = fileName.lastIndexOf("."); if (idx === -1) return ""; return fileName.slice(idx).toLowerCase(); } function resolveEffectiveMimeType(file: File): string { const provided = file.type?.toLowerCase().trim(); const extension = getFileExtension(file.name); if (provided && provided !== "application/octet-stream") { return provided; } return mimeByExtension[extension] || provided || "application/octet-stream"; } function isSupportedFile(file: File, effectiveMimeType: string): boolean { if (acceptedMimeTypes.has(effectiveMimeType)) { return true; } const extension = getFileExtension(file.name); return acceptedExtensions.has(extension); } async function extractTextFromFile( file: File, mimeType: string ): Promise<{ text: string; imageDataUrl?: string }> { const bytes = Buffer.from(await file.arrayBuffer()); if (mimeType.startsWith("image/")) { return { text: "", imageDataUrl: `data:${mimeType};base64,${bytes.toString("base64")}` }; } if (mimeType === "application/pdf") { const parsed = await pdfParse(bytes); return { text: normalizeText(parsed.text || "") }; } if ( mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ) { const result = await mammoth.extractRawText({ buffer: bytes }); return { text: normalizeText(result.value || "") }; } const decoded = new TextDecoder("utf-8", { fatal: false }).decode(bytes); return { text: normalizeText(decoded) }; } async function summarizeWithOpenAI(input: { text: string; imageDataUrl?: string; fileName: string; mimeType: string; }): Promise<{ summary: string; highlights: string[]; extractedText: string }> { const apiKey = process.env.OPENAI_API_KEY; if (!apiKey) { const fallbackText = input.text || "Document image sans OCR local."; return { summary: "Contexte importe. Resume local genere sans IA.", highlights: buildRegexHighlights(fallbackText), extractedText: fallbackText }; } const model = process.env.OPENAI_CONTEXT_MODEL || "gpt-4o-mini"; const trimmedText = input.text.slice(0, 15000); const userContent = input.imageDataUrl ? [ { type: "text", text: "Analyse this project/planning document image and return strict JSON with: summary (short French), highlights (array max 5), extracted_text (French transcription if readable). Focus on deadlines, workload, cadence hints, priorities." }, { type: "image_url", image_url: { url: input.imageDataUrl } } ] : `Analyse ce document de planning et retourne JSON strict avec: summary (court), highlights (max 5), extracted_text (texte utile condense). Fichier: ${input.fileName} (${input.mimeType}).\n\nDocument:\n${trimmedText}`; const response = await fetch("https://api.openai.com/v1/chat/completions", { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" }, body: JSON.stringify({ model, temperature: 0.2, response_format: { type: "json_object" }, messages: [ { role: "system", content: "You are a planning context extractor. Return strict JSON only with keys: summary, highlights, extracted_text. Keep highlights concise and actionable." }, { role: "user", content: userContent } ] }) }); if (!response.ok) { const detail = await response.text(); throw new Error(`OpenAI context analysis failed (${response.status}): ${detail}`); } const payload = (await response.json()) as { choices?: Array<{ message?: { content?: string | null } }>; }; const rawContent = payload.choices?.[0]?.message?.content; if (!rawContent || typeof rawContent !== "string") { throw new Error("OpenAI context analysis returned empty content."); } const parsed = JSON.parse(extractJsonObject(rawContent)) as { summary?: unknown; highlights?: unknown; extracted_text?: unknown; }; const summary = typeof parsed.summary === "string" && parsed.summary.trim().length ? parsed.summary.trim() : "Contexte importe et analyse."; const highlights = Array.isArray(parsed.highlights) ? parsed.highlights .map((item) => (typeof item === "string" ? item.trim() : "")) .filter(Boolean) .slice(0, 5) : []; const extractedText = typeof parsed.extracted_text === "string" && parsed.extracted_text.trim().length ? normalizeText(parsed.extracted_text) : input.text; return { summary, highlights: highlights.length ? highlights : buildRegexHighlights(extractedText), extractedText }; } export async function analyzeUploadedContext(file: File): Promise { const mimeType = resolveEffectiveMimeType(file); if (!isSupportedFile(file, mimeType)) { throw new Error("Type de fichier non supporte. Utilisez TXT, MD, PDF, DOCX, JPG, PNG ou WEBP."); } if (file.size <= 0 || file.size > MAX_UPLOAD_BYTES) { throw new Error("Fichier invalide (taille max 8 Mo)."); } const sourceType = inferSourceType(mimeType); const extracted = await extractTextFromFile(file, mimeType); const fallbackText = extracted.text || "Document image importe."; const analyzed = await summarizeWithOpenAI({ text: fallbackText, imageDataUrl: extracted.imageDataUrl, fileName: file.name, mimeType }).catch(() => ({ summary: "Contexte importe. Resume local utilise.", highlights: buildRegexHighlights(fallbackText), extractedText: fallbackText })); const now = new Date(); const expires = new Date(now.getTime() + 24 * 60 * 60 * 1000); return contextDocumentSchema.parse({ id: `ctx_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`, source_type: sourceType, file_name: file.name, mime_type: mimeType, size_bytes: file.size, extracted_text: analyzed.extractedText, extracted_text_preview: analyzed.extractedText.slice(0, 4000), summary: analyzed.summary, highlights: analyzed.highlights, created_at: now.toISOString(), expires_at: expires.toISOString(), used_openai: Boolean(process.env.OPENAI_API_KEY) }); }