fix: harden analysis queue scheduling

This commit is contained in:
MythEclipse
2026-05-14 19:39:25 +07:00
parent f14e893cb7
commit 243a18ecad
2 changed files with 90 additions and 28 deletions

View File

@@ -3,6 +3,7 @@ import { createChildLogger } from "../logger";
import { retryWithBackoff } from "../retry"; import { retryWithBackoff } from "../retry";
import { import {
getConversationContextBefore, getConversationContextBefore,
getMessageById,
getPendingConversationKeys, getPendingConversationKeys,
getPendingMessagesByConversation, getPendingMessagesByConversation,
updateMessageAIAnalysis, updateMessageAIAnalysis,
@@ -15,13 +16,17 @@ const logger = createChildLogger("ai-analyzer");
// Debounce state per conversation key // Debounce state per conversation key
const conversationDebounceTimers = new Map<string, NodeJS.Timeout>(); const conversationDebounceTimers = new Map<string, NodeJS.Timeout>();
const conversationPendingBatches = new Map<string, Set<string>>(); // Track conversations currently being processed
const conversationProcessing = new Set<string>();
// Track conversations in error cooldown (failed recently)
const conversationErrorCooldown = new Map<string, number>();
let activeRequests = 0; let activeRequests = 0;
let lastError: string | null = null; let lastError: string | null = null;
const MAX_ACTIVE_REQUESTS = 1; const MAX_ACTIVE_REQUESTS = 1;
const DEBOUNCE_MS = 1500; const DEBOUNCE_MS = 1500;
const RECOVERY_INTERVAL_MS = 15000; const RECOVERY_INTERVAL_MS = 15000;
const ERROR_COOLDOWN_MS = 30000;
const MAX_CONTEXT_TOKENS = 8000; const MAX_CONTEXT_TOKENS = 8000;
const MAX_BATCH_SIZE = 25; const MAX_BATCH_SIZE = 25;
@@ -68,6 +73,7 @@ async function processBatch(
if (messages.length === 0) return; if (messages.length === 0) return;
activeRequests++; activeRequests++;
conversationProcessing.add(conversationKey);
try { try {
// Get context before the first message // Get context before the first message
const firstMessage = messages[0]; const firstMessage = messages[0];
@@ -115,6 +121,9 @@ async function processBatch(
(globalThis as any).broadcastMessageAnalyzed?.(row); (globalThis as any).broadcastMessageAnalyzed?.(row);
} }
// Clear error cooldown on success
conversationErrorCooldown.delete(conversationKey);
logger.info( logger.info(
{ conversationKey, count: messages.length }, { conversationKey, count: messages.length },
"Batch analysis complete", "Batch analysis complete",
@@ -142,8 +151,15 @@ async function processBatch(
(globalThis as any).broadcastMessageAnalyzed?.(row); (globalThis as any).broadcastMessageAnalyzed?.(row);
} }
} }
// Set error cooldown for this conversation
conversationErrorCooldown.set(
conversationKey,
Date.now() + ERROR_COOLDOWN_MS,
);
} finally { } finally {
activeRequests--; activeRequests--;
conversationProcessing.delete(conversationKey);
} }
} }
@@ -151,6 +167,25 @@ async function processBatch(
* Debounced analysis trigger for a conversation * Debounced analysis trigger for a conversation
*/ */
function scheduleConversationAnalysis(conversationKey: string): void { function scheduleConversationAnalysis(conversationKey: string): void {
// Skip if already processing
if (conversationProcessing.has(conversationKey)) {
logger.debug(
{ conversationKey },
"Conversation already processing, skipping schedule",
);
return;
}
// Skip if in error cooldown
const cooldownUntil = conversationErrorCooldown.get(conversationKey);
if (cooldownUntil && Date.now() < cooldownUntil) {
logger.debug(
{ conversationKey, cooldownMs: cooldownUntil - Date.now() },
"Conversation in error cooldown, skipping schedule",
);
return;
}
// Clear existing timer // Clear existing timer
const existingTimer = conversationDebounceTimers.get(conversationKey); const existingTimer = conversationDebounceTimers.get(conversationKey);
if (existingTimer) { if (existingTimer) {
@@ -161,9 +196,14 @@ function scheduleConversationAnalysis(conversationKey: string): void {
const timer = setTimeout(async () => { const timer = setTimeout(async () => {
conversationDebounceTimers.delete(conversationKey); conversationDebounceTimers.delete(conversationKey);
// Wait for active requests to complete // If activeRequests >= MAX_ACTIVE_REQUESTS, requeue instead of waiting
while (activeRequests >= MAX_ACTIVE_REQUESTS) { if (activeRequests >= MAX_ACTIVE_REQUESTS) {
await new Promise((resolve) => setTimeout(resolve, 100)); logger.debug(
{ conversationKey, activeRequests },
"Max active requests reached, requeuing conversation",
);
scheduleConversationAnalysis(conversationKey);
return;
} }
// Get pending messages for this conversation // Get pending messages for this conversation
@@ -175,9 +215,6 @@ function scheduleConversationAnalysis(conversationKey: string): void {
if (messages.length > 0) { if (messages.length > 0) {
await processBatch(conversationKey, messages); await processBatch(conversationKey, messages);
} }
// Clear pending batch
conversationPendingBatches.delete(conversationKey);
}, DEBOUNCE_MS); }, DEBOUNCE_MS);
conversationDebounceTimers.set(conversationKey, timer); conversationDebounceTimers.set(conversationKey, timer);
@@ -186,13 +223,31 @@ function scheduleConversationAnalysis(conversationKey: string): void {
/** /**
* Queues a message for analysis (debounced by conversation) * Queues a message for analysis (debounced by conversation)
*/ */
export function queueMessageAnalysis(messageId: string): void { export async function queueMessageAnalysis(messageId: string): Promise<void> {
if (!config.AI_ANALYSIS_ENABLED) return; if (!config.AI_ANALYSIS_ENABLED) return;
logger.debug({ messageId }, "Queueing message for analysis"); logger.debug({ messageId }, "Queueing message for analysis");
// Note: We don't have the message here, so we'll rely on recovery interval try {
// to pick it up from the database // Look up the message to get its conversation key
const message = await getMessageById(messageId);
if (!message) {
logger.warn({ messageId }, "Message not found for analysis queue");
return;
}
// Schedule its conversation for analysis
const conversationKey = getConversationKey(message);
queueConversationAnalysis(conversationKey);
} catch (error) {
logger.error(
{
messageId,
error: error instanceof Error ? error.message : String(error),
},
"Failed to queue message for analysis",
);
}
} }
/** /**
@@ -203,11 +258,6 @@ export function queueConversationAnalysis(conversationKey: string): void {
logger.debug({ conversationKey }, "Queueing conversation for analysis"); logger.debug({ conversationKey }, "Queueing conversation for analysis");
// Track pending batch
if (!conversationPendingBatches.has(conversationKey)) {
conversationPendingBatches.set(conversationKey, new Set());
}
// Schedule debounced analysis // Schedule debounced analysis
scheduleConversationAnalysis(conversationKey); scheduleConversationAnalysis(conversationKey);
} }
@@ -240,15 +290,28 @@ export function startPendingAIAnalysisWorker(): void {
const conversationKeys = await getPendingConversationKeys(100); const conversationKeys = await getPendingConversationKeys(100);
for (const key of conversationKeys) { for (const key of conversationKeys) {
// Only schedule if not already scheduled // Skip if already scheduled
if (!conversationDebounceTimers.has(key)) { if (conversationDebounceTimers.has(key)) {
continue;
}
// Skip if currently processing
if (conversationProcessing.has(key)) {
continue;
}
// Skip if in error cooldown
const cooldownUntil = conversationErrorCooldown.get(key);
if (cooldownUntil && Date.now() < cooldownUntil) {
continue;
}
logger.debug( logger.debug(
{ conversationKey: key }, { conversationKey: key },
"Recovering pending conversation", "Recovering pending conversation",
); );
scheduleConversationAnalysis(key); scheduleConversationAnalysis(key);
} }
}
} catch (error) { } catch (error) {
logger.error({ error }, "Pending AI analysis recovery worker failed"); logger.error({ error }, "Pending AI analysis recovery worker failed");
} }

View File

@@ -509,17 +509,16 @@ export async function getPendingMessagesByConversation(
const db = getDatabase() as any; const db = getDatabase() as any;
// conversationKey is either thread_id or channel_id // conversationKey is either thread_id or channel_id
const isThreadId = conversationKey.startsWith("t"); // Query both to safely handle the key
const condition = isThreadId
? eq(messagesTable.thread_id, conversationKey)
: eq(messagesTable.channel_id, conversationKey);
const rows = await db const rows = await db
.select() .select()
.from(messagesTable) .from(messagesTable)
.where( .where(
and( and(
condition, or(
eq(messagesTable.thread_id, conversationKey),
eq(messagesTable.channel_id, conversationKey),
),
eq(messagesTable.ai_status, "pending"), eq(messagesTable.ai_status, "pending"),
isNull(messagesTable.deleted_at), isNull(messagesTable.deleted_at),
), ),