Skip to content

Commit

Permalink
Move digest definitions into a separate file
Browse files Browse the repository at this point in the history
  • Loading branch information
jacksonh committed Mar 18, 2024
1 parent 544d197 commit 7111dca
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 90 deletions.
153 changes: 108 additions & 45 deletions packages/api/src/jobs/build_digest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,33 @@ import showdown from 'showdown'
import { parsedContentToLibraryItem, savePage } from '../services/save_page'
import { generateSlug } from '../utils/helpers'
import { PageType } from '../generated/graphql'
import * as stream from 'stream'

import { Storage } from '@google-cloud/storage'
import { readStringFromStorage } from '../utils/uploads'

export interface BuildDigestJobData {
userId: string
}

export const BUILD_DIGEST_JOB_NAME = 'build-digest-job'

interface Selector {
query: string
count: number
reason: string
}

interface DigestDefinition {
name: string
preferenceSelectors: Selector[]
candidateSelectors: Selector[]
fastMatchAttributes: string[]
selectionPrompt: string
assemblePrompt: string
introductionCopy: string[]
}

interface SelectionResultItem {
id: string
title: string
Expand All @@ -36,22 +56,22 @@ interface SelectedLibraryItem {
url: string
}

const createPreferencesList = async (
userId: string
): Promise<LibraryItem[]> => {
const recentPreferences = await searchLibraryItems(
{
from: 0,
size: 21,
includePending: false,
includeDeleted: false,
includeContent: false,
useFolders: false,
query: `is:read OR has:highlights sort:saved-desc`,
},
userId
)
return recentPreferences.libraryItems
const fetchDigestDefinition = async (): Promise<
DigestDefinition | undefined
> => {
const bucketName = env.fileUpload.gcsUploadBucket

try {
const str = await readStringFromStorage(
bucketName,
`digest-builders/simple-001.json`
)
return JSON.parse(str) as DigestDefinition
} catch (err) {
logger.info(`unable to digest definition`, { error: err })
}

return undefined
}

function removeDuplicateTitles(items: LibraryItem[]): LibraryItem[] {
Expand All @@ -68,20 +88,50 @@ function removeDuplicateTitles(items: LibraryItem[]): LibraryItem[] {
return uniqueItems
}

const createCandidatesList = async (userId: string): Promise<LibraryItem[]> => {
const candidates = await searchLibraryItems(
{
from: 0,
size: 100,
includePending: false,
includeDeleted: false,
includeContent: false,
useFolders: false,
query: `is:unread sort:saved-desc`,
},
userId
)
return removeDuplicateTitles(candidates.libraryItems)
const createPreferencesList = async (
digestDefinition: DigestDefinition,
userId: string
): Promise<LibraryItem[]> => {
const result: LibraryItem[] = []
for (const selector of digestDefinition.preferenceSelectors) {
const recentPreferences = await searchLibraryItems(
{
from: 0,
size: selector.count,
includePending: false,
includeDeleted: false,
includeContent: false,
useFolders: false,
query: selector.query,
},
userId
)
result.push(...recentPreferences.libraryItems)
}
return result
}

const createCandidatesList = async (
digestDefinition: DigestDefinition,
userId: string
): Promise<LibraryItem[]> => {
const result: LibraryItem[] = []
for (const selector of digestDefinition.candidateSelectors) {
const candidates = await searchLibraryItems(
{
from: 0,
size: selector.count,
includePending: false,
includeDeleted: false,
includeContent: false,
useFolders: false,
query: selector.query,
},
userId
)
result.push(...candidates.libraryItems)
}
return removeDuplicateTitles(result)
}

const isSelectedLibraryItem = (
Expand All @@ -92,15 +142,12 @@ const isSelectedLibraryItem = (

const getSelection = async (
llm: OpenAI,
digestDefinition: DigestDefinition,
candidates: LibraryItem[],
recentPreferences: LibraryItem[]
): Promise<SelectedLibraryItem[]> => {
if (!process.env.DIGEST_SELECTION_PROMPT) {
return []
}

const selectionTemplate = PromptTemplate.fromTemplate(
process.env.DIGEST_SELECTION_PROMPT
digestDefinition.selectionPrompt
)
const selectionChain = selectionTemplate.pipe(llm)
const selectionResult = await selectionChain.invoke(
Expand Down Expand Up @@ -149,14 +196,12 @@ const getSelection = async (

const createDigestArticleContent = async (
llm: OpenAI,
digestDefinition: DigestDefinition,
candidates: LibraryItem[],
selection: SelectedLibraryItem[]
): Promise<string | undefined> => {
if (!process.env.DIGEST_INTRODUCTION_PROMPT) {
return undefined
}
const introductionTemplate = PromptTemplate.fromTemplate(
process.env.DIGEST_INTRODUCTION_PROMPT
digestDefinition.assemblePrompt
)
const introductionChain = introductionTemplate.pipe(llm)
const introductionResult = await introductionChain.invoke({
Expand All @@ -180,8 +225,20 @@ export const buildDigest = async (jobData: BuildDigestJobData) => {
console.log(
'[digest]: ********************************* building daily digest ***********************************'
)
const candidates = await createCandidatesList(jobData.userId)
const recentPreferences = await createPreferencesList(jobData.userId)
const digestDefinition = await fetchDigestDefinition()
if (!digestDefinition) {
logger.warn('[digest] no digest definition found')
return
}

const candidates = await createCandidatesList(
digestDefinition,
jobData.userId
)
const recentPreferences = await createPreferencesList(
digestDefinition,
jobData.userId
)

console.log(
'[digest]: preferences: ',
Expand All @@ -199,9 +256,15 @@ export const buildDigest = async (jobData: BuildDigestJobData) => {
},
})

const selection = await getSelection(llm, candidates, recentPreferences)
const selection = await getSelection(
llm,
digestDefinition,
candidates,
recentPreferences
)
const articleHTML = await createDigestArticleContent(
llm,
digestDefinition,
candidates,
selection
)
Expand All @@ -211,12 +274,12 @@ export const buildDigest = async (jobData: BuildDigestJobData) => {
document: articleHTML,
pageInfo: {},
}
const formattedDate = new Date().toLocaleTimeString('en-US', {

const formattedDate = new Intl.DateTimeFormat('en-US', {
weekday: 'long',
year: 'numeric',
month: 'long',
day: 'numeric',
})
}).format(new Date())

const title = `Your Omnivore Daily Digest for ${formattedDate}`
const originalURL = `https://omnivore.app/me/digest?q=${uuid()}`
Expand Down
37 changes: 1 addition & 36 deletions packages/api/src/jobs/process-youtube-video.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import * as stream from 'stream'
import { Storage } from '@google-cloud/storage'
import { stringToHash } from '../utils/helpers'
import { FeatureName, findFeatureByName } from '../services/features'
import { readStringFromStorage } from '../utils/uploads'

export interface ProcessYouTubeVideoJobData {
userId: string
Expand Down Expand Up @@ -220,42 +221,6 @@ export const addTranscriptPlaceholdReadableContent = async (
return updatedContent.parsedContent?.content
}

async function readStringFromStorage(
bucketName: string,
fileName: string
): Promise<string> {
try {
const storage = env.fileUpload?.gcsUploadSAKeyFilePath
? new Storage({ keyFilename: env.fileUpload.gcsUploadSAKeyFilePath })
: new Storage()

const existsResponse = await storage
.bucket(bucketName)
.file(fileName)
.exists()
const exists = existsResponse[0]

if (!exists) {
throw new Error(
`File '${fileName}' does not exist in bucket '${bucketName}'.`
)
}

// Download the file contents as a string
const fileContentResponse = await storage
.bucket(bucketName)
.file(fileName)
.download()
const fileContent = fileContentResponse[0].toString()

console.log(`File '${fileName}' downloaded successfully as string.`)
return fileContent
} catch (error) {
console.error('Error downloading file:', error)
throw error
}
}

const writeStringToStorage = async (
bucketName: string,
fileName: string,
Expand Down
18 changes: 9 additions & 9 deletions packages/api/src/queue-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,15 +159,15 @@ const setupCronJobs = async () => {
)

// TEMP: for testing locally
// await queue.add(
// BUILD_DIGEST_JOB_NAME,
// {
// userId: 'a03a7396-909b-11ed-9075-c3f3cf07eed9',
// },
// {
// priority: 1,
// }
// )
await queue.add(
BUILD_DIGEST_JOB_NAME,
{
userId: 'a03a7396-909b-11ed-9075-c3f3cf07eed9',
},
{
priority: 1,
}
)
}

const main = async () => {
Expand Down
35 changes: 35 additions & 0 deletions packages/api/src/utils/uploads.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,38 @@ export const uploadToBucket = async (
export const createGCSFile = (filename: string): File => {
return storage.bucket(bucketName).file(filename)
}

export const readStringFromStorage = async (
bucketName: string,
fileName: string
): Promise<string> => {
try {
const storage = env.fileUpload?.gcsUploadSAKeyFilePath
? new Storage({ keyFilename: env.fileUpload.gcsUploadSAKeyFilePath })
: new Storage()

const existsResponse = await storage
.bucket(bucketName)
.file(fileName)
.exists()
const exists = existsResponse[0]

if (!exists) {
throw new Error(
`File '${fileName}' does not exist in bucket '${bucketName}'.`
)
}

const fileContentResponse = await storage
.bucket(bucketName)
.file(fileName)
.download()
const fileContent = fileContentResponse[0].toString()

console.log(`File '${fileName}' downloaded successfully as string.`)
return fileContent
} catch (error) {
console.error('Error downloading file:', error)
throw error
}
}

0 comments on commit 7111dca

Please sign in to comment.