Skip to content

Commit

Permalink
Merge pull request #955 from mendableai/rafa/fix-default-on-schema-ll…
Browse files Browse the repository at this point in the history
…m-extract

fixed optional+default bug on llm schema
  • Loading branch information
nickscamara authored Dec 27, 2024
2 parents 0421f81 + 2c233bd commit eba5fda
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 6 deletions.
8 changes: 4 additions & 4 deletions apps/api/requests.http
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ content-type: application/json
"urls": ["firecrawl.dev"],
"prompt": "What is the title, description and main product of the page?",
"schema": {
"title": "string",
"description": "string",
"mainProduct": "string"
"title": { "type": "string" },
"description": { "type": "string" },
"mainProduct": { "type": "string" }
}
}
}
33 changes: 33 additions & 0 deletions apps/api/src/scraper/scrapeURL/transformers/llmExtract.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { removeDefaultProperty } from "./llmExtract";

describe("removeDefaultProperty", () => {
it("should remove the default property from a simple object", () => {
const input = { default: "test", test: "test" };
const expectedOutput = { test: "test" };
expect(removeDefaultProperty(input)).toEqual(expectedOutput);
});

it("should remove the default property from a nested object", () => {
const input = { default: "test", nested: { default: "nestedTest", test: "nestedTest" } };
const expectedOutput = { nested: { test: "nestedTest" } };
expect(removeDefaultProperty(input)).toEqual(expectedOutput);
});

it("should remove the default property from an array of objects", () => {
const input = { array: [{ default: "test1", test: "test1" }, { default: "test2", test: "test2" }] };
const expectedOutput = { array: [{ test: "test1" }, { test: "test2" }] };
expect(removeDefaultProperty(input)).toEqual(expectedOutput);
});

it("should handle objects without a default property", () => {
const input = { test: "test" };
const expectedOutput = { test: "test" };
expect(removeDefaultProperty(input)).toEqual(expectedOutput);
});

it("should handle null and non-object inputs", () => {
expect(removeDefaultProperty(null)).toBeNull();
expect(removeDefaultProperty("string")).toBe("string");
expect(removeDefaultProperty(123)).toBe(123);
});
});
26 changes: 24 additions & 2 deletions apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ export async function generateOpenAICompletions(
}

let schema = options.schema;
if (schema) {
schema = removeDefaultProperty(schema);
}

if (schema && schema.type === "array") {
schema = {
type: "object",
Expand All @@ -134,10 +138,12 @@ export async function generateOpenAICompletions(
schema = {
type: "object",
properties: Object.fromEntries(
Object.entries(schema).map(([key, value]) => [key, { type: value }]),
Object.entries(schema).map(([key, value]) => {
return [key, removeDefaultProperty(value)];
})
),
required: Object.keys(schema),
additionalProperties: false,
additionalProperties: false
};
}

Expand Down Expand Up @@ -232,3 +238,19 @@ export async function performLLMExtract(

return document;
}

export function removeDefaultProperty(schema: any): any {
if (typeof schema !== 'object' || schema === null) return schema;

const { default: _, ...rest } = schema;

for (const key in rest) {
if (Array.isArray(rest[key])) {
rest[key] = rest[key].map((item: any) => removeDefaultProperty(item));
} else if (typeof rest[key] === 'object' && rest[key] !== null) {
rest[key] = removeDefaultProperty(rest[key]);
}
}

return rest;
}

0 comments on commit eba5fda

Please sign in to comment.