fix: harden llm moderation parsing

2026-05-14 19:16:46 +07:00
parent 81253e4ffe
commit 65ab5ecb32
2 changed files with 291 additions and 12 deletions
--- a/src/moderation/llmModerationClient.ts
+++ b/src/moderation/llmModerationClient.ts
@@ -1,7 +1,7 @@
 import type { AnalysisResult, MessageRecord } from "./types";
 import { config } from "../config";
 import { createChildLogger } from "../logger";
 import { retryWithBackoff } from "../retry";
 import { config } from "../config";
 import type { AnalysisResult, MessageRecord } from "./types";
 const log = createChildLogger("llmModerationClient");
@@ -20,23 +20,40 @@ interface RawModerationResponse {
 /**
 * Parses LLM moderation response and validates against target IDs.
 * Extracts JSON from surrounding text, validates structure, and transforms to AnalysisResult[].
 * Scans from first '{' and attempts JSON.parse at each candidate closing brace.
 */
 export function parseModerationResponse(
  content: string,
  targetIds: string[],
 ): AnalysisResult[] {
-  // Extract JSON object from surrounding text
+  // Find first opening brace
-  const jsonMatch = content.match(/\{[\s\S]*\}/);
+  const startIdx = content.indexOf("{");
-  if (!jsonMatch) {
+  if (startIdx === -1) {
    throw new Error("No JSON object found in response");
  }
  // Scan from start and try parsing at each closing brace
  let parsed: unknown;
-  try {
+  let lastError: Error | null = null;
-    parsed = JSON.parse(jsonMatch[0]);
+
-  } catch (error) {
+  for (let i = startIdx + 1; i < content.length; i++) {
    if (content[i] === "}") {
      const candidate = content.substring(startIdx, i + 1);
      try {
        parsed = JSON.parse(candidate);
        // Successfully parsed, break out
        break;
      } catch (error) {
        // Store error and continue scanning
        lastError = error instanceof Error ? error : new Error(String(error));
        continue;
      }
    }
  }
  if (!parsed) {
    throw new Error(
-      `Failed to parse JSON: ${error instanceof Error ? error.message : String(error)}`,
+      `Failed to parse JSON: ${lastError?.message || "No valid JSON object found"}`,
    );
  }
@@ -67,6 +84,10 @@ export function parseModerationResponse(
      throw new Error(`Unknown message_id: ${message_id}`);
    }
    if (foundIds.has(message_id)) {
      throw new Error(`Duplicate message_id in results: ${message_id}`);
    }
    foundIds.add(message_id);
    // Validate status
@@ -77,7 +98,10 @@ export function parseModerationResponse(
      );
    }
-    // Validate and clamp score
+    // Validate score: reject null/undefined/non-finite before coercion
    if (score === null || score === undefined) {
      throw new Error("Invalid score: must not be null or undefined");
    }
    let numScore = Number(score);
    if (!Number.isFinite(numScore)) {
      throw new Error(`Invalid score: ${score}. Must be a finite number`);
--- a/tests/moderation/llmModerationClient.test.ts
+++ b/tests/moderation/llmModerationClient.test.ts
@@ -1,7 +1,17 @@
-import { describe, expect, it } from "vitest";
+import { describe, expect, it, vi, beforeEach } from "vitest";
-import { parseModerationResponse } from "../../src/moderation/llmModerationClient";
+import {
  parseModerationResponse,
  runModerationAnalysis,
 } from "../../src/moderation/llmModerationClient";
 vi.mock("../../src/retry", () => ({
  retryWithBackoff: vi.fn((fn) => fn()),
 }));
 describe("parseModerationResponse", () => {
  beforeEach(() => {
    vi.clearAllMocks();
  });
  it("parses valid keyed results", () => {
    const result = parseModerationResponse(
      JSON.stringify({
@@ -53,4 +63,249 @@ describe("parseModerationResponse", () => {
      ),
    ).toThrow(/unknown/i);
  });
  it("handles surrounding text around JSON", () => {
    const content = `Some preamble text here.
    {
      "results": [
        {
          "message_id": "m1",
          "status": "clean",
          "flags": [],
          "score": 0.1,
          "analysis": "OK"
        }
      ]
    }
    Some trailing text here.`;
    const result = parseModerationResponse(content, ["m1"]);
    expect(result).toHaveLength(1);
    expect(result[0].messageId).toBe("m1");
  });
  it("handles nested fields in results", () => {
    const content = JSON.stringify({
      results: [
        {
          message_id: "m1",
          status: "warn",
          flags: ["spam", "abuse"],
          score: 0.85,
          analysis: "Multiple violations detected",
          metadata: {
            nested: "field",
            count: 5,
          },
        },
      ],
    });
    const result = parseModerationResponse(content, ["m1"]);
    expect(result).toHaveLength(1);
    expect(result[0].score).toBe(0.85);
  });
  it("rejects null score", () => {
    expect(() =>
      parseModerationResponse(
        JSON.stringify({
          results: [
            {
              message_id: "m1",
              status: "clean",
              flags: [],
              score: null,
              analysis: "OK",
            },
          ],
        }),
        ["m1"],
      ),
    ).toThrow(/null or undefined/i);
  });
  it("rejects undefined score", () => {
    expect(() =>
      parseModerationResponse(
        JSON.stringify({
          results: [
            {
              message_id: "m1",
              status: "clean",
              flags: [],
              analysis: "OK",
            },
          ],
        }),
        ["m1"],
      ),
    ).toThrow(/null or undefined/i);
  });
  it("rejects duplicate message_id", () => {
    expect(() =>
      parseModerationResponse(
        JSON.stringify({
          results: [
            {
              message_id: "m1",
              status: "clean",
              flags: [],
              score: 0.1,
              analysis: "OK",
            },
            {
              message_id: "m1",
              status: "warn",
              flags: ["spam"],
              score: 0.5,
              analysis: "Duplicate",
            },
          ],
        }),
        ["m1"],
      ),
    ).toThrow(/duplicate/i);
  });
  it("rejects invalid status", () => {
    expect(() =>
      parseModerationResponse(
        JSON.stringify({
          results: [
            {
              message_id: "m1",
              status: "invalid_status",
              flags: [],
              score: 0.5,
              analysis: "OK",
            },
          ],
        }),
        ["m1"],
      ),
    ).toThrow(/invalid status/i);
  });
  it("clamps score to 0-1 range", () => {
    const result = parseModerationResponse(
      JSON.stringify({
        results: [
          {
            message_id: "m1",
            status: "clean",
            flags: [],
            score: 1.5,
            analysis: "OK",
          },
        ],
      }),
      ["m1"],
    );
    expect(result[0].score).toBe(1);
  });
  it("clamps negative score to 0", () => {
    const result = parseModerationResponse(
      JSON.stringify({
        results: [
          {
            message_id: "m1",
            status: "clean",
            flags: [],
            score: -0.5,
            analysis: "OK",
          },
        ],
      }),
      ["m1"],
    );
    expect(result[0].score).toBe(0);
  });
 });
 describe("runModerationAnalysis", () => {
  it("parses successful response from LLM", async () => {
    const mockResponse = {
      choices: [
        {
          message: {
            content: JSON.stringify({
              results: [
                {
                  message_id: "m1",
                  status: "clean",
                  flags: [],
                  score: 0.1,
                  analysis: "OK",
                },
              ],
            }),
          },
        },
      ],
    };
    global.fetch = vi.fn().mockResolvedValue({
      ok: true,
      json: async () => mockResponse,
    });
    const result = await runModerationAnalysis({
      targets: [{ id: "m1", username: "user1", content: "hello" }],
      contextText: "test context",
    });
    expect(result.results).toHaveLength(1);
    expect(result.results[0].messageId).toBe("m1");
    expect(result.raw).toEqual(mockResponse);
  });
  it("throws on non-ok HTTP response", async () => {
    global.fetch = vi.fn().mockResolvedValue({
      ok: false,
      status: 500,
      text: async () => "Internal Server Error",
    });
    await expect(
      runModerationAnalysis({
        targets: [{ id: "m1", username: "user1", content: "hello" }],
        contextText: "test context",
      }),
    ).rejects.toThrow(/LLM API error 500/);
  });
  it("throws on missing choices in response", async () => {
    global.fetch = vi.fn().mockResolvedValue({
      ok: true,
      json: async () => ({}),
    });
    await expect(
      runModerationAnalysis({
        targets: [{ id: "m1", username: "user1", content: "hello" }],
        contextText: "test context",
      }),
    ).rejects.toThrow(/Invalid LLM response structure/);
  });
  it("throws on missing content in message", async () => {
    global.fetch = vi.fn().mockResolvedValue({
      ok: true,
      json: async () => ({
        choices: [{ message: {} }],
      }),
    });
    await expect(
      runModerationAnalysis({
        targets: [{ id: "m1", username: "user1", content: "hello" }],
        contextText: "test context",
      }),
    ).rejects.toThrow(/No content in LLM response/);
  });
 });