-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add @acusti/parsing with asJSON util
- Loading branch information
Showing
9 changed files
with
267 additions
and
1 deletion.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# @acusti/parsing | ||
|
||
[![latest version](https://img.shields.io/npm/v/@acusti/parsing?style=for-the-badge)](https://www.npmjs.com/package/@acusti/parsing) | ||
[![maintenance status](https://img.shields.io/npms-io/maintenance-score/@acusti/parsing?style=for-the-badge)](https://npms.io/search?q=%40acusti%2Funiquify) | ||
[![bundle size](https://img.shields.io/bundlephobia/minzip/@acusti/parsing?style=for-the-badge)](https://bundlephobia.com/package/@acusti/parsing) | ||
[![downloads per month](https://img.shields.io/npm/dm/@acusti/parsing?style=for-the-badge)](https://www.npmjs.com/package/@acusti/parsing) | ||
|
||
`@acusti/parsing` exports `asJSON`, a function that takes a string and | ||
attempts to parse it as JSON, returning the resulting JS value, or `null` | ||
if the string defeated all attempts at parsing it. This is especially | ||
useful for generative AI when you prompt an LLM to generate a response in | ||
JSON, because most models are unable to consistently generate valid JSON, | ||
and even when they do, will often have a pre- or post-amble as a part of | ||
the response. | ||
|
||
## Usage | ||
|
||
``` | ||
npm install @acusti/parsing | ||
# or | ||
yarn add @acusti/parsing | ||
``` | ||
|
||
Import `asJSON` (it’s a named export) and pass a string to it: | ||
|
||
````js | ||
import { asJSON } from '@acusti/parsing'; | ||
|
||
asJSON(`Here is the JSON output for the "About Us" page based on the provided props: | ||
{ | ||
"heading": "Our Story", | ||
"subheading": "A Passion for Sourdough" | ||
} | ||
`); | ||
/* results in: | ||
{ | ||
heading: 'Our Story', | ||
subheading: 'A Passion for Sourdough', | ||
} | ||
*/ | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
{ | ||
"name": "@acusti/parsing", | ||
"version": "0.1.0", | ||
"type": "module", | ||
"sideEffects": false, | ||
"exports": "./dist/index.js", | ||
"main": "./dist/index.js", | ||
"types": "./dist/index.d.ts", | ||
"files": [ | ||
"dist", | ||
"src" | ||
], | ||
"description": "Loosely parse a string as JSON with numerous affordances for syntax errors", | ||
"keywords": [ | ||
"parse", | ||
"parsing", | ||
"json", | ||
"llm", | ||
"genai", | ||
"generative-ai" | ||
], | ||
"scripts": { | ||
"test": "vitest" | ||
}, | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/acusti/uikit.git", | ||
"directory": "packages/parsing" | ||
}, | ||
"author": "andrew patton <[email protected]> (https://www.acusti.ca)", | ||
"license": "Unlicense", | ||
"bugs": { | ||
"url": "https://github.com/acusti/uikit/issues" | ||
}, | ||
"homepage": "https://github.com/acusti/uikit/tree/main/packages/parsing#readme", | ||
"devDependencies": { | ||
"typescript": "^5.3.3", | ||
"vitest": "^1.1.0" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import { describe, expect, it } from 'vitest'; | ||
|
||
import { asJSON } from './as-json.js'; | ||
|
||
describe('@acusti/parsing', () => { | ||
describe('asJSON', () => { | ||
it('should convert a LLM response string to a props object', () => { | ||
const response = `\ | ||
Here is the JSON output for the "About Us" page based on the provided props: | ||
{ | ||
"heading": "Our Story", | ||
"subheading": "A Passion for Sourdough" | ||
} | ||
`; | ||
expect(asJSON(response)).toEqual({ | ||
heading: 'Our Story', | ||
subheading: 'A Passion for Sourdough', | ||
}); | ||
}); | ||
|
||
it('should strip invalid JSON when the LLM response goes off the rails', () => { | ||
const response = `\ | ||
Here is the JSON output for the "Meet the Team" page: | ||
{ | ||
"callToAction": "Learn More", | ||
"heading": "Meet the Team", | ||
"subheading": "Our bakery is built on the foundation of passionate individuals who are dedicated to creating the best sourdough bread in North Lake Tahoe. Meet the team behind Masa Madre." | ||
[ | ||
"teamMembers": [ | ||
{ | ||
"name": "Jenny Lee", | ||
"role": "Head Baker", | ||
"description": "Jenny is the mastermind behind Masa Madre's delicious sourdough bread. With over 10 years of experience in the baking industry, she brings a wealth of knowledge and expertise to the table. Jenny's passion for sourdough bread is evident in every loaf she creates, and her dedication to using only the finest ingredients has earned her a loyal following of customers." | ||
}, | ||
{ | ||
"name": "Tommy Thompson", | ||
"role": "Baker", | ||
"description": "Tommy is the muscle behind Masa Madre's bakery. With a background in culinary arts, he brings a creative touch to every loaf he bakes. Tommy's attention to detail and commitment to quality has made him an invaluable member of the team." | ||
}, | ||
{ | ||
"name": "Emily Chen", | ||
"role": "Marketing Manager", | ||
"description": "Emily is the marketing genius behind Masa Madre's success. With a background in advertising and a passion for food, she has helped to create a strong brand identity for the bakery. Emily's creativity and attention to detail have been instrumental in building a loyal customer base." | ||
] | ||
} | ||
`; | ||
expect(asJSON(response)).toEqual({ | ||
callToAction: 'Learn More', | ||
heading: 'Meet the Team', | ||
subheading: | ||
'Our bakery is built on the foundation of passionate individuals who are dedicated to creating the best sourdough bread in North Lake Tahoe. Meet the team behind Masa Madre.', | ||
}); | ||
}); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// Adapted from https://github.com/langchain-ai/langchainjs/blob/215dd52/langchain-core/src/output_parsers/json.ts#L58 | ||
// MIT License | ||
const parsePartialJSON = (text: string) => { | ||
// If the input is undefined/null, return null to indicate failure. | ||
if (text == null) return null; | ||
|
||
// Attempt to parse the string as-is. | ||
try { | ||
return JSON.parse(text); | ||
Check failure on line 9 in packages/parsing/src/as-json.ts GitHub Actions / build (18.x)
|
||
} catch (error) { | ||
// Pass | ||
} | ||
|
||
// Initialize variables. | ||
let newText = ''; | ||
const stack = []; | ||
let isInsideString = false; | ||
let escaped = false; | ||
|
||
// Process each character in the string one at a time. | ||
for (let char of text) { | ||
if (isInsideString) { | ||
if (char === '"' && !escaped) { | ||
isInsideString = false; | ||
} else if (char === '\n' && !escaped) { | ||
char = '\\n'; // Replace the newline character with the escape sequence. | ||
} else if (char === '\\') { | ||
escaped = !escaped; | ||
} else { | ||
escaped = false; | ||
} | ||
} else { | ||
if (char === '"') { | ||
isInsideString = true; | ||
escaped = false; | ||
} else if (char === '{') { | ||
stack.push('}'); | ||
} else if (char === '[') { | ||
stack.push(']'); | ||
} else if (char === '}' || char === ']') { | ||
if (stack && stack[stack.length - 1] === char) { | ||
stack.pop(); | ||
} else { | ||
// Mismatched closing character; the input is malformed. | ||
return null; | ||
} | ||
} | ||
} | ||
|
||
// Append the processed character to the new string. | ||
newText += char; | ||
} | ||
|
||
// If we're still inside a string at the end of processing, | ||
// we need to close the string. | ||
if (isInsideString) { | ||
newText += '"'; | ||
} | ||
|
||
// Close any remaining open structures in the reverse order that they were opened. | ||
for (let i = stack.length - 1; i >= 0; i -= 1) { | ||
newText += stack[i]; | ||
} | ||
|
||
// Attempt to parse the modified string as JSON. | ||
try { | ||
return JSON.parse(newText); | ||
Check failure on line 67 in packages/parsing/src/as-json.ts GitHub Actions / build (18.x)
|
||
} catch (error) { | ||
// If we still can't parse the string as JSON, return null to indicate failure. | ||
return null; | ||
} | ||
}; | ||
|
||
type ReturnValue = string | boolean | number | Record<string, unknown> | Array<unknown>; | ||
|
||
export function asJSON(result: string): ReturnValue | null { | ||
// because props are Record<string, string>, there should only be 1 '{' and 1 '}' | ||
const startJSONIndex = result.indexOf('{'); | ||
let endJSONIndex = result.indexOf('}'); | ||
if (endJSONIndex === -1) { | ||
result += '}'; | ||
endJSONIndex = result.length; | ||
} | ||
result = result.substring(startJSONIndex, endJSONIndex + 1); | ||
// remove any arrays (TODO make this better) | ||
result = result.split('[')[0]; | ||
|
||
return parsePartialJSON(result); | ||
Check failure on line 88 in packages/parsing/src/as-json.ts GitHub Actions / build (18.x)
|
||
// const props: LayoutProps | null = parsePartialJSON(result); | ||
// return props; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
export { asJSON } from './as-json.js'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
{ | ||
"extends": "../../tsconfig.json", | ||
"compilerOptions": { | ||
"outDir": "dist", | ||
"rootDir": "src" | ||
}, | ||
"references": [] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters