diff --git a/crates/tabby-common/src/api/code.rs b/crates/tabby-common/src/api/code.rs index 4566c09ec82..7e9587e1847 100644 --- a/crates/tabby-common/src/api/code.rs +++ b/crates/tabby-common/src/api/code.rs @@ -22,6 +22,7 @@ pub struct HitDocument { pub filepath: String, pub git_url: String, pub language: String, + pub start_line: i64, } #[derive(Error, Debug)] diff --git a/crates/tabby-common/src/index.rs b/crates/tabby-common/src/index.rs index a8298436629..4225971d6ca 100644 --- a/crates/tabby-common/src/index.rs +++ b/crates/tabby-common/src/index.rs @@ -21,6 +21,7 @@ pub struct CodeSearchSchema { pub field_filepath: Field, pub field_language: Field, pub field_body: Field, + pub field_start_line: Field, } impl CodeSearchSchema { @@ -38,6 +39,7 @@ impl CodeSearchSchema { let field_filepath = builder.add_text_field("filepath", STRING | STORED); let field_language = builder.add_text_field("language", STRING | STORED); let field_body = builder.add_text_field("body", code_options); + let field_start_line = builder.add_i64_field("start_line", STORED); let schema = builder.build(); Self { @@ -46,6 +48,7 @@ impl CodeSearchSchema { field_filepath, field_language, field_body, + field_start_line, } } } diff --git a/crates/tabby-scheduler/src/code/mod.rs b/crates/tabby-scheduler/src/code/mod.rs index 912df9f9d7c..74a06a5e0dc 100644 --- a/crates/tabby-scheduler/src/code/mod.rs +++ b/crates/tabby-scheduler/src/code/mod.rs @@ -57,7 +57,26 @@ impl CodeIntelligence { pub fn chunks<'splitter, 'text: 'splitter>( &'splitter self, text: &'text str, - ) -> impl Iterator + 'splitter { - self.splitter.chunks(text, 192) + ) -> impl Iterator + 'splitter { + self.splitter + .chunk_indices(text, 256) + .map(|(offset, chunk)| (line_number_from_byte_offset(text, offset), chunk)) } } + +fn line_number_from_byte_offset(s: &str, byte_offset: usize) -> i64 { + let mut line_number = 1; // Start counting from line 1 + let mut current_offset = 0; + + for c in s.chars() { + if c == '\n' { + line_number += 1; + } + current_offset += c.len_utf8(); + if current_offset >= byte_offset { + break; + } + } + + line_number +} diff --git a/crates/tabby-scheduler/src/index.rs b/crates/tabby-scheduler/src/index.rs index c03ee7273c7..9b6a0f8a554 100644 --- a/crates/tabby-scheduler/src/index.rs +++ b/crates/tabby-scheduler/src/index.rs @@ -48,7 +48,7 @@ pub fn index_repositories(_config: &[RepositoryConfig]) -> Result<()> { } }; - for body in intelligence.chunks(&text) { + for (start_line, body) in intelligence.chunks(&text) { pb.as_mut().map(|b| b.update(body.len())).transpose()?; writer.add_document(doc!( @@ -56,6 +56,7 @@ pub fn index_repositories(_config: &[RepositoryConfig]) -> Result<()> { code.field_filepath => file.filepath.clone(), code.field_language => file.language.clone(), code.field_body => body, + code.field_start_line => start_line, ))?; } } diff --git a/crates/tabby/src/services/code.rs b/crates/tabby/src/services/code.rs index 66744eac62e..bf4fa71a523 100644 --- a/crates/tabby/src/services/code.rs +++ b/crates/tabby/src/services/code.rs @@ -77,6 +77,7 @@ impl CodeSearchImpl { filepath: get_field(&doc, self.schema.field_filepath), git_url: get_field(&doc, self.schema.field_git_url), language: get_field(&doc, self.schema.field_language), + start_line: get_i64_field(&doc, self.schema.field_start_line), }, id: doc_address.doc_id, } @@ -172,6 +173,13 @@ fn get_field(doc: &Document, field: Field) -> String { .to_owned() } +fn get_i64_field(doc: &Document, field: Field) -> i64 { + doc.get_first(field) + .and_then(|x| x.as_i64()) + .expect("Missing field") + .to_owned() +} + struct CodeSearchService { search: Arc>>, } diff --git a/crates/tabby/src/services/completion/completion_prompt.rs b/crates/tabby/src/services/completion/completion_prompt.rs index c0ed4d1c676..40a4bb0a820 100644 --- a/crates/tabby/src/services/completion/completion_prompt.rs +++ b/crates/tabby/src/services/completion/completion_prompt.rs @@ -38,7 +38,7 @@ impl PromptBuilder { pub async fn collect(&self, language: &str, segments: &Segments) -> Vec { let quota_threshold_for_snippets_from_code_search = 256; - let mut max_snippets_chars_in_prompt = 768; + let mut max_snippets_chars_in_prompt = 1024; let mut snippets: Vec = vec![]; if let Some((count_characters, snippets_from_segments)) = diff --git a/ee/tabby-ui/app/files/components/chat-side-bar.tsx b/ee/tabby-ui/app/files/components/chat-side-bar.tsx index f4afee67c5a..77c1f945195 100644 --- a/ee/tabby-ui/app/files/components/chat-side-bar.tsx +++ b/ee/tabby-ui/app/files/components/chat-side-bar.tsx @@ -2,6 +2,8 @@ import React from 'react' import { useStore } from '@/lib/hooks/use-store' import { useChatStore } from '@/lib/stores/chat-store' +import fetcher from '@/lib/tabby/fetcher' +import { ISearchHit, SearchReponse } from '@/lib/types' import { cn } from '@/lib/utils' import { Button } from '@/components/ui/button' import { IconClose } from '@/components/ui/icons' @@ -22,7 +24,7 @@ export const ChatSideBar: React.FC = ({ const activeChatId = useStore(useChatStore, state => state.activeChatId) const iframeRef = React.useRef(null) - const getPrompt = ({ + const getPrompt = async ({ action, code, language, @@ -30,6 +32,7 @@ export const ChatSideBar: React.FC = ({ lineFrom, lineTo }: QuickActionEventPayload) => { + const contextPrompt = await buildContextPrompt(language, code, path) let builtInPrompt = '' switch (action) { case 'explain': @@ -47,18 +50,22 @@ export const ChatSideBar: React.FC = ({ const codeBlockMeta = `${ language ?? '' } is_reference=1 path=${path} line_from=${lineFrom} line_to=${lineTo}` - return `${builtInPrompt}\n${'```'}${codeBlockMeta}\n${code}\n${'```'}\n` + return `${contextPrompt}${builtInPrompt}\n${'```'}${codeBlockMeta}\n${code}\n${'```'}\n` } - React.useEffect(() => { + async function postPrompt(e: QuickActionEventPayload) { const contentWindow = iframeRef.current?.contentWindow + contentWindow?.postMessage({ + action: 'append', + payload: await getPrompt(e) + }) + } + React.useEffect(() => { if (pendingEvent) { - contentWindow?.postMessage({ - action: 'append', - payload: getPrompt(pendingEvent) + postPrompt(pendingEvent).then(() => { + setPendingEvent(undefined) }) - setPendingEvent(undefined) } }, [pendingEvent, iframeRef.current?.contentWindow]) @@ -90,3 +97,74 @@ function Header() { ) } + +async function buildContextPrompt( + language: string | undefined, + code: string, + path: string | undefined +) { + if (!language || !path) { + return [] + } + + if (code.length < 128) { + return [] + } + + const segments = path.split('/'); + const repo = segments[0]; + path = segments.slice(1).join('/'); + + const tokens = code.split(/[^\w]/).filter(x => x) + + // FIXME(jueliang): restrict query with `git_url` of `repo`. + const languageQuery = buildLanguageQuery(language) + const bodyQuery = tokens.map(x => `body:${x}`).join(' OR ') + const query = `${languageQuery} AND (${bodyQuery})` + + const queryParam = `q=${encodeURIComponent(query)}&limit=20` + + const data: SearchReponse = await fetcher(`/v1beta/search?${queryParam}`, { + responseFormat: 'json' + }) + const snippets = + data.hits.filter(x => x.score > 30 && path !== x.doc.filepath) || [] + return formatContextPrompt(repo, language, snippets.slice(0, 3)) +} + +function formatContextPrompt( + repo: string, + language: string, + snippets: ISearchHit[] +) { + let prompt = 'Given following relevant code snippets:\n\n' + for (const { doc } of snippets) { + const numLines = doc.body.split(/\r\n|\r|\n/).length + const fromLine = doc.start_line + const toLine = doc.start_line + numLines - 1 + const reference = `\`\`\`${language} is_reference=1 path=${repo}/${doc.filepath} line_from=${fromLine} line_to=${toLine} +${doc.body} +\`\`\` +` + prompt += reference + } + + if (snippets.length) { + return prompt + } else { + return '' + } +} + +function buildLanguageQuery(language: string) { + if ( + language == 'javascript' || + language == 'jsx' || + language == 'typescript' || + language == 'tsx' + ) { + language = 'javascript-typescript' + } + + return `language:${language}` +} diff --git a/ee/tabby-ui/components/prompt-form.tsx b/ee/tabby-ui/components/prompt-form.tsx index 20bd822b006..1782eee875e 100644 --- a/ee/tabby-ui/components/prompt-form.tsx +++ b/ee/tabby-ui/components/prompt-form.tsx @@ -5,7 +5,6 @@ import useSWR from 'swr' import { useEnterSubmit } from '@/lib/hooks/use-enter-submit' import fetcher from '@/lib/tabby/fetcher' -import type { ISearchHit, SearchReponse } from '@/lib/types' import { cn } from '@/lib/utils' import { Button, buttonVariants } from '@/components/ui/button' import { @@ -349,3 +348,21 @@ function IconForCompletionKind({ return } } + +type ISearchHit = { + id: number + score: number + doc: { + body: string + filepath: string + git_url: string + language: string + name: string + kind: string + } +} + +type SearchReponse = { + hits: Array + num_hits: number +} \ No newline at end of file diff --git a/ee/tabby-ui/lib/types/chat.ts b/ee/tabby-ui/lib/types/chat.ts index acb097210e4..c9ce8b7ce3b 100644 --- a/ee/tabby-ui/lib/types/chat.ts +++ b/ee/tabby-ui/lib/types/chat.ts @@ -12,18 +12,19 @@ export interface Chat extends Record { export type ISearchHit = { id: number - doc?: { - body?: string - name?: string - filepath?: string - git_url?: string - kind?: string - language?: string + score: number + doc: { + body: string + filepath: string + git_url: string + language: string + start_line: number } } + export type SearchReponse = { - hits?: Array - num_hits?: number + hits: Array + num_hits: number } export type MessageActionType = 'edit' | 'delete' | 'regenerate'