Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use 127.0.0.1 #968

Merged
merged 5 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/public/schemas/llms.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@
"type": "boolean",
"description": "Indicates if tools are supported"
},
"listModels": {
"type": "boolean",
"default": true,
"description": "Indicates if listing models is supported"
},
"pullModel": {
"type": "boolean",
"description": "Indicates if pulling models is supported"
},
"openaiCompatibility": {
"type": "string",
"description": "Uses OpenAI API compatibility layer documentation URL"
Expand Down
13 changes: 11 additions & 2 deletions packages/cli/src/info.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
ModelConnectionInfo,
resolveModelConnectionInfo,
} from "../../core/src/models"
import { deleteEmptyValues } from "../../core/src/util"
import { CORE_VERSION } from "../../core/src/version"
import { YAMLStringify } from "../../core/src/yaml"
import { buildProject } from "./build"
Expand Down Expand Up @@ -64,11 +65,19 @@ export async function envInfo(
if (models) {
const lm = await resolveLanguageModel(modelProvider.id)
if (lm.listModels) {
const ms = await lm.listModels(conn)
const ms = await lm.listModels(conn, {})
if (ms?.length) conn.models = ms
}
}
res.providers.push(conn)
res.providers.push(
deleteEmptyValues({
provider: conn.provider,
source: conn.source,
base: conn.base,
type: conn.type,
models: conn.models,
})
)
}
} catch (e) {
if (error)
Expand Down
7 changes: 6 additions & 1 deletion packages/core/src/aici.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import {
ChatCompletionContentPartText,
ChatCompletionResponse,
} from "./chattypes"
import { TraceOptions } from "./trace"
import { CancellationOptions } from "./cancellation"

/**
* Renders an AICI node into a string representation.
Expand Down Expand Up @@ -404,7 +406,10 @@ const AICIChatCompletion: ChatCompletionHandler = async (
* @param cfg - The configuration for the language model.
* @returns A list of language model information.
*/
async function listModels(cfg: LanguageModelConfiguration) {
async function listModels(
cfg: LanguageModelConfiguration,
options?: TraceOptions & CancellationOptions
) {
const { token, base, version } = cfg
const url = `${base}/proxy/info`
const fetch = await createFetch()
Expand Down
3 changes: 2 additions & 1 deletion packages/core/src/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ export interface LanguageModelInfo {
}

export type ListModelsFunction = (
cfg: LanguageModelConfiguration
cfg: LanguageModelConfiguration,
options: TraceOptions & CancellationOptions
) => Promise<LanguageModelInfo[]>

export type PullModelFunction = (
Expand Down
15 changes: 9 additions & 6 deletions packages/core/src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,19 +108,20 @@ export const MARKDOWN_PROMPT_FENCE = "`````"

export const OPENAI_API_BASE = "https://api.openai.com/v1"
export const OLLAMA_DEFAUT_PORT = 11434
export const OLLAMA_API_BASE = "http://localhost:11434/v1"
export const LLAMAFILE_API_BASE = "http://localhost:8080/v1"
export const LOCALAI_API_BASE = "http://localhost:8080/v1"
export const LITELLM_API_BASE = "http://localhost:4000"
export const OLLAMA_API_BASE = "http://127.0.0.1:11434/v1"
export const LLAMAFILE_API_BASE = "http://127.0.0.1:8080/v1"
export const LOCALAI_API_BASE = "http://127.0.0.1:8080/v1"
export const LITELLM_API_BASE = "http://127.0.0.1:4000"
export const LMSTUDIO_API_BASE = "http://127.0.0.1:1234/v1"
export const JAN_API_BASE = "http://127.0.0.1:1337/v1"
pelikhan marked this conversation as resolved.
Show resolved Hide resolved

export const ANTHROPIC_API_BASE = "https://api.anthropic.com"
export const HUGGINGFACE_API_BASE = "https://api-inference.huggingface.co/v1"
export const GOOGLE_API_BASE =
"https://generativelanguage.googleapis.com/v1beta/openai/"
export const ALIBABA_BASE =
"https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
export const MISTRAL_API_BASE = "https://api.mistral.ai/v1"
pelikhan marked this conversation as resolved.
Show resolved Hide resolved
export const LMSTUDIO_API_BASE = "http://localhost:1234/v1"
export const JAN_API_BASE = "http://localhost:1337/v1"

export const PROMPTFOO_CACHE_PATH = ".genaiscript/cache/tests"
export const PROMPTFOO_CONFIG_DIR = ".genaiscript/config/tests"
Expand Down Expand Up @@ -190,6 +191,8 @@ export const MODEL_PROVIDERS = Object.freeze<
topP?: boolean
prediction?: boolean
bearerToken?: boolean
listModels?: boolean
pullModel?: boolean
aliases?: Record<string, string>
}[]
>(CONFIGURATION_DATA.providers)
Expand Down
13 changes: 12 additions & 1 deletion packages/core/src/llms.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
{
"id": "azure",
"detail": "Azure OpenAI deployment",
"listModels": false,
"bearerToken": false
},
{
"id": "azure_serverless",
"detail": "Azure AI OpenAI (serverless deployments)",
"listModels": false,
"bearerToken": false,
"aliases": {
"large": "gpt-4o",
Expand All @@ -34,6 +36,7 @@
{
"id": "azure_serverless_models",
"detail": "Azure AI Models (serverless deployments, not OpenAI)",
"listModels": false,
"prediction": false,
"bearerToken": true
},
Expand All @@ -43,6 +46,7 @@
"logprobs": false,
"topLogprobs": false,
"prediction": false,
"listModels": false,
"aliases": {
"large": "claude-3-5-sonnet-latest",
"small": "claude-3-5-haiku-latest",
Expand Down Expand Up @@ -70,6 +74,7 @@
"openaiCompatibility": "https://ai.google.dev/gemini-api/docs/openai",
"prediction": false,
"bearerToken": true,
"listModels": false,
"aliases": {
"large": "gemini-1.5-flash-latest",
"small": "gemini-1.5-flash-latest",
Expand All @@ -84,6 +89,7 @@
"id": "huggingface",
"detail": "Hugging Face models",
"prediction": false,
"listModels": false,
"aliases": {
"large": "Qwen/Qwen2.5-72B-Instruct",
"small": "Qwen/Qwen2.5-Coder-32B-Instruct",
Expand All @@ -110,6 +116,7 @@
"openaiCompatibility": "https://www.alibabacloud.com/help/en/model-studio/developer-reference/compatibility-of-openai-with-dashscope",
"tools": false,
"prediction": false,
"listModels": false,
"bearerToken": true,
"aliases": {
"large": "qwen-max",
Expand All @@ -125,6 +132,7 @@
"topLogprobs": false,
"limitations": "Smaller context windows, and rate limiting",
"prediction": false,
"listModels": false,
"bearerToken": true,
"aliases": {
"large": "gpt-4o",
Expand All @@ -145,6 +153,7 @@
"detail": "Ollama local model",
"logitBias": false,
"openaiCompatibility": "https://github.com/ollama/ollama/blob/main/docs/openai.md",
"pullModel": true,
"prediction": false
},
{
Expand All @@ -155,7 +164,9 @@
{
"id": "jan",
"detail": "Jan local server",
"prediction": false
"prediction": false,
"listModels": true,
"top_p": false
},
{
"id": "llamafile",
Expand Down
11 changes: 9 additions & 2 deletions packages/core/src/lm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@ import {
MODEL_PROVIDER_ANTHROPIC,
MODEL_PROVIDER_ANTHROPIC_BEDROCK,
MODEL_PROVIDER_CLIENT,
MODEL_PROVIDER_JAN,
MODEL_PROVIDER_OLLAMA,
MODEL_PROVIDER_TRANSFORMERS,
MODEL_PROVIDERS,
} from "./constants"
import { host } from "./host"
import { OllamaModel } from "./ollama"
import { OpenAIModel } from "./openai"
import { LocalOpenAICompatibleModel } from "./openai"
import { TransformersModel } from "./transformers"

export function resolveLanguageModel(provider: string): LanguageModel {
Expand All @@ -27,5 +29,10 @@ export function resolveLanguageModel(provider: string): LanguageModel {
if (provider === MODEL_PROVIDER_ANTHROPIC_BEDROCK)
return AnthropicBedrockModel
if (provider === MODEL_PROVIDER_TRANSFORMERS) return TransformersModel
return OpenAIModel

const features = MODEL_PROVIDERS.find((p) => p.id === provider)
return LocalOpenAICompatibleModel(provider, {
listModels: features?.listModels !== false,
pullModel: features?.pullModel,
})
}
7 changes: 5 additions & 2 deletions packages/core/src/ollama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import { OpenAIChatCompletion } from "./openai"
import { LanguageModelConfiguration } from "./host"
import { host } from "./host"
import { logError, logVerbose } from "./util"
import { TraceOptions } from "./trace"
import { CancellationOptions } from "./cancellation"

/**
* Lists available models for the Ollama language model configuration.
Expand All @@ -17,10 +19,11 @@ import { logError, logVerbose } from "./util"
* @returns A promise that resolves to an array of LanguageModelInfo objects.
*/
async function listModels(
cfg: LanguageModelConfiguration
cfg: LanguageModelConfiguration,
options: TraceOptions & CancellationOptions
): Promise<LanguageModelInfo[]> {
// Create a fetch instance to make HTTP requests
const fetch = await createFetch({ retries: 0 })
const fetch = await createFetch({ retries: 0, ...options })
// Fetch the list of models from the remote API
const res = await fetch(cfg.base.replace("/v1", "/api/tags"), {
method: "GET",
Expand Down
88 changes: 78 additions & 10 deletions packages/core/src/openai.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import {
deleteUndefinedValues,
logError,
logVerbose,
normalizeInt,
trimTrailingSlash,
Expand All @@ -18,9 +19,14 @@ import {
TOOL_URL,
} from "./constants"
import { estimateTokens } from "./tokens"
import { ChatCompletionHandler, LanguageModel, LanguageModelInfo } from "./chat"
import {
ChatCompletionHandler,
LanguageModel,
LanguageModelInfo,
PullModelFunction,
} from "./chat"
import { RequestError, errorMessage, serializeError } from "./error"
import { createFetch, traceFetchPost } from "./fetch"
import { createFetch, iterateBody, traceFetchPost } from "./fetch"
import { parseModelIdentifier } from "./models"
import { JSON5TryParse } from "./json5"
import {
Expand All @@ -39,9 +45,10 @@ import {
ChatCompletionTokenLogprob,
} from "./chattypes"
import { resolveTokenEncoder } from "./encoders"
import { toSignal } from "./cancellation"
import { CancellationOptions, toSignal } from "./cancellation"
import { INITryParse } from "./ini"
import { serializeChunkChoiceToLogProbs } from "./logprob"
import { TraceOptions } from "./trace"

export function getConfigHeaders(cfg: LanguageModelConfiguration) {
let { token, type, base, provider } = cfg
Expand Down Expand Up @@ -420,9 +427,10 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async (
}

async function listModels(
cfg: LanguageModelConfiguration
cfg: LanguageModelConfiguration,
options: TraceOptions & CancellationOptions
): Promise<LanguageModelInfo[]> {
const fetch = await createFetch({ retries: 0 })
const fetch = await createFetch({ retries: 0, ...(options || {}) })
const res = await fetch(cfg.base + "/models", {
method: "GET",
headers: {
Expand All @@ -449,8 +457,68 @@ async function listModels(
)
}

export const OpenAIModel = Object.freeze<LanguageModel>({
completer: OpenAIChatCompletion,
id: MODEL_PROVIDER_OPENAI,
listModels,
})
const pullModel: PullModelFunction = async (modelId, options) => {
const { trace, cancellationToken } = options || {}
const { provider, model } = parseModelIdentifier(modelId)
const fetch = await createFetch({ retries: 0, ...options })
const conn = await host.getLanguageModelConfiguration(modelId, {
token: true,
cancellationToken,
trace,
})
try {
// test if model is present
const resTags = await fetch(`${conn.base}/models`, {
retries: 0,
method: "GET",
headers: {
"User-Agent": TOOL_ID,
"Content-Type": "application/json",
},
})
if (resTags.ok) {
const { data: models }: { data: { id: string }[] } =
await resTags.json()
if (models.find((m) => m.id === model)) return { ok: true }
}

// pull
logVerbose(`${provider}: pull ${model}`)
const resPull = await fetch(`${conn.base}/models/pull`, {
method: "POST",
headers: {
"User-Agent": TOOL_ID,
"Content-Type": "application/json",
},
body: JSON.stringify({ model }),
})
if (!resPull.ok) {
logError(`${provider}: failed to pull model ${model}`)
logVerbose(resPull.statusText)
return { ok: false, status: resPull.status }
}
0
for await (const chunk of iterateBody(resPull, { cancellationToken }))
process.stderr.write(".")
process.stderr.write("\n")
return { ok: true }
} catch (e) {
logError(e)
trace?.error(e)
return { ok: false, error: serializeError(e) }
}
}

export function LocalOpenAICompatibleModel(
providerId: string,
options: { listModels?: boolean; pullModel?: boolean }
) {
return Object.freeze<LanguageModel>(
deleteUndefinedValues({
completer: OpenAIChatCompletion,
id: providerId,
listModels: options?.listModels ? listModels : undefined,
pullModel: options?.pullModel ? pullModel : undefined,
})
)
}
2 changes: 1 addition & 1 deletion packages/vscode/src/servermanager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ export class TerminalServerManager implements ServerManager {
private async startClient(): Promise<WebSocketClient> {
assert(!this._client)
this._port = await findRandomOpenPort()
const url = `http://localhost:${this._port}?api-key=${encodeURIComponent(this.state.sessionApiKey)}`
const url = `http://127.0.0.1:${this._port}?api-key=${encodeURIComponent(this.state.sessionApiKey)}`
pelikhan marked this conversation as resolved.
Show resolved Hide resolved
logInfo(`client url: ${url}`)
const client = (this._client = new WebSocketClient(url))
client.chatRequest = createChatModelRunner(this.state)
Expand Down
Loading