fix: harden llm moderation parsing

This commit is contained in:
MythEclipse
2026-05-14 19:16:46 +07:00
parent 81253e4ffe
commit 65ab5ecb32
2 changed files with 291 additions and 12 deletions

View File

@@ -1,7 +1,7 @@
import type { AnalysisResult, MessageRecord } from "./types";
import { config } from "../config";
import { createChildLogger } from "../logger"; import { createChildLogger } from "../logger";
import { retryWithBackoff } from "../retry"; import { retryWithBackoff } from "../retry";
import { config } from "../config";
import type { AnalysisResult, MessageRecord } from "./types";
const log = createChildLogger("llmModerationClient"); const log = createChildLogger("llmModerationClient");
@@ -20,23 +20,40 @@ interface RawModerationResponse {
/** /**
* Parses LLM moderation response and validates against target IDs. * Parses LLM moderation response and validates against target IDs.
* Extracts JSON from surrounding text, validates structure, and transforms to AnalysisResult[]. * Extracts JSON from surrounding text, validates structure, and transforms to AnalysisResult[].
* Scans from first '{' and attempts JSON.parse at each candidate closing brace.
*/ */
export function parseModerationResponse( export function parseModerationResponse(
content: string, content: string,
targetIds: string[], targetIds: string[],
): AnalysisResult[] { ): AnalysisResult[] {
// Extract JSON object from surrounding text // Find first opening brace
const jsonMatch = content.match(/\{[\s\S]*\}/); const startIdx = content.indexOf("{");
if (!jsonMatch) { if (startIdx === -1) {
throw new Error("No JSON object found in response"); throw new Error("No JSON object found in response");
} }
// Scan from start and try parsing at each closing brace
let parsed: unknown; let parsed: unknown;
try { let lastError: Error | null = null;
parsed = JSON.parse(jsonMatch[0]);
} catch (error) { for (let i = startIdx + 1; i < content.length; i++) {
if (content[i] === "}") {
const candidate = content.substring(startIdx, i + 1);
try {
parsed = JSON.parse(candidate);
// Successfully parsed, break out
break;
} catch (error) {
// Store error and continue scanning
lastError = error instanceof Error ? error : new Error(String(error));
continue;
}
}
}
if (!parsed) {
throw new Error( throw new Error(
`Failed to parse JSON: ${error instanceof Error ? error.message : String(error)}`, `Failed to parse JSON: ${lastError?.message || "No valid JSON object found"}`,
); );
} }
@@ -67,6 +84,10 @@ export function parseModerationResponse(
throw new Error(`Unknown message_id: ${message_id}`); throw new Error(`Unknown message_id: ${message_id}`);
} }
if (foundIds.has(message_id)) {
throw new Error(`Duplicate message_id in results: ${message_id}`);
}
foundIds.add(message_id); foundIds.add(message_id);
// Validate status // Validate status
@@ -77,7 +98,10 @@ export function parseModerationResponse(
); );
} }
// Validate and clamp score // Validate score: reject null/undefined/non-finite before coercion
if (score === null || score === undefined) {
throw new Error("Invalid score: must not be null or undefined");
}
let numScore = Number(score); let numScore = Number(score);
if (!Number.isFinite(numScore)) { if (!Number.isFinite(numScore)) {
throw new Error(`Invalid score: ${score}. Must be a finite number`); throw new Error(`Invalid score: ${score}. Must be a finite number`);

View File

@@ -1,7 +1,17 @@
import { describe, expect, it } from "vitest"; import { describe, expect, it, vi, beforeEach } from "vitest";
import { parseModerationResponse } from "../../src/moderation/llmModerationClient"; import {
parseModerationResponse,
runModerationAnalysis,
} from "../../src/moderation/llmModerationClient";
vi.mock("../../src/retry", () => ({
retryWithBackoff: vi.fn((fn) => fn()),
}));
describe("parseModerationResponse", () => { describe("parseModerationResponse", () => {
beforeEach(() => {
vi.clearAllMocks();
});
it("parses valid keyed results", () => { it("parses valid keyed results", () => {
const result = parseModerationResponse( const result = parseModerationResponse(
JSON.stringify({ JSON.stringify({
@@ -53,4 +63,249 @@ describe("parseModerationResponse", () => {
), ),
).toThrow(/unknown/i); ).toThrow(/unknown/i);
}); });
it("handles surrounding text around JSON", () => {
const content = `Some preamble text here.
{
"results": [
{
"message_id": "m1",
"status": "clean",
"flags": [],
"score": 0.1,
"analysis": "OK"
}
]
}
Some trailing text here.`;
const result = parseModerationResponse(content, ["m1"]);
expect(result).toHaveLength(1);
expect(result[0].messageId).toBe("m1");
});
it("handles nested fields in results", () => {
const content = JSON.stringify({
results: [
{
message_id: "m1",
status: "warn",
flags: ["spam", "abuse"],
score: 0.85,
analysis: "Multiple violations detected",
metadata: {
nested: "field",
count: 5,
},
},
],
});
const result = parseModerationResponse(content, ["m1"]);
expect(result).toHaveLength(1);
expect(result[0].score).toBe(0.85);
});
it("rejects null score", () => {
expect(() =>
parseModerationResponse(
JSON.stringify({
results: [
{
message_id: "m1",
status: "clean",
flags: [],
score: null,
analysis: "OK",
},
],
}),
["m1"],
),
).toThrow(/null or undefined/i);
});
it("rejects undefined score", () => {
expect(() =>
parseModerationResponse(
JSON.stringify({
results: [
{
message_id: "m1",
status: "clean",
flags: [],
analysis: "OK",
},
],
}),
["m1"],
),
).toThrow(/null or undefined/i);
});
it("rejects duplicate message_id", () => {
expect(() =>
parseModerationResponse(
JSON.stringify({
results: [
{
message_id: "m1",
status: "clean",
flags: [],
score: 0.1,
analysis: "OK",
},
{
message_id: "m1",
status: "warn",
flags: ["spam"],
score: 0.5,
analysis: "Duplicate",
},
],
}),
["m1"],
),
).toThrow(/duplicate/i);
});
it("rejects invalid status", () => {
expect(() =>
parseModerationResponse(
JSON.stringify({
results: [
{
message_id: "m1",
status: "invalid_status",
flags: [],
score: 0.5,
analysis: "OK",
},
],
}),
["m1"],
),
).toThrow(/invalid status/i);
});
it("clamps score to 0-1 range", () => {
const result = parseModerationResponse(
JSON.stringify({
results: [
{
message_id: "m1",
status: "clean",
flags: [],
score: 1.5,
analysis: "OK",
},
],
}),
["m1"],
);
expect(result[0].score).toBe(1);
});
it("clamps negative score to 0", () => {
const result = parseModerationResponse(
JSON.stringify({
results: [
{
message_id: "m1",
status: "clean",
flags: [],
score: -0.5,
analysis: "OK",
},
],
}),
["m1"],
);
expect(result[0].score).toBe(0);
});
});
describe("runModerationAnalysis", () => {
it("parses successful response from LLM", async () => {
const mockResponse = {
choices: [
{
message: {
content: JSON.stringify({
results: [
{
message_id: "m1",
status: "clean",
flags: [],
score: 0.1,
analysis: "OK",
},
],
}),
},
},
],
};
global.fetch = vi.fn().mockResolvedValue({
ok: true,
json: async () => mockResponse,
});
const result = await runModerationAnalysis({
targets: [{ id: "m1", username: "user1", content: "hello" }],
contextText: "test context",
});
expect(result.results).toHaveLength(1);
expect(result.results[0].messageId).toBe("m1");
expect(result.raw).toEqual(mockResponse);
});
it("throws on non-ok HTTP response", async () => {
global.fetch = vi.fn().mockResolvedValue({
ok: false,
status: 500,
text: async () => "Internal Server Error",
});
await expect(
runModerationAnalysis({
targets: [{ id: "m1", username: "user1", content: "hello" }],
contextText: "test context",
}),
).rejects.toThrow(/LLM API error 500/);
});
it("throws on missing choices in response", async () => {
global.fetch = vi.fn().mockResolvedValue({
ok: true,
json: async () => ({}),
});
await expect(
runModerationAnalysis({
targets: [{ id: "m1", username: "user1", content: "hello" }],
contextText: "test context",
}),
).rejects.toThrow(/Invalid LLM response structure/);
});
it("throws on missing content in message", async () => {
global.fetch = vi.fn().mockResolvedValue({
ok: true,
json: async () => ({
choices: [{ message: {} }],
}),
});
await expect(
runModerationAnalysis({
targets: [{ id: "m1", username: "user1", content: "hello" }],
contextText: "test context",
}),
).rejects.toThrow(/No content in LLM response/);
});
}); });