-
Notifications
You must be signed in to change notification settings - Fork 0
/
config-sample.yml
64 lines (57 loc) · 2.33 KB
/
config-sample.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# This is a sample config file.
# Rename this file to `config.yml` and modify its contents.
db:
driver: sqlite
connectionString: "./data.db" # Set this to ":memory:" to avoid creating a file
http:
listen: localhost
port: 8080
resultsPage:
# Enables the default search results page at localhost:8080 (the address specified in the `http` block above)
enabled: true
# Inject an HTML fragment at the bottom of the <head> element on the search results page
customHTML: |
<style type="text/css">
h1 {
color: green;
}
</style>
sources:
# Internally identify the site as `brendan`. All API requests will have to reference this ID.
- id: brendan
# Start crawling at this URL:
url: https://www.bswanson.dev
# Only allow crawling on these domains. Must include the domain of the base URL.
allowedDomains:
- "www.bswanson.dev"
# The maximum number of links the crawler will follow away from the base URL.
maxDepth: 100
# The amount of requests **per minute** that the crawler will make to your site.
# This number is used to start a scheduled task, so don't set this number too high to conserve CPU cycles.
speed: 30
refresh:
# Set `enabled` to `true` to recrawl old content after a certain amount of days.
enabled: true
# The minimum amount of time between refreshes, **in days**.
# In this example, pages are recrawled weekly.
minAge: 7
# The maximum amount of text content to index per page, in characters
sizeLimit: 200000 # Content will be truncated after 200,000 characters
embeddings:
enabled: true
# The maximum number of requests per minute to the embeddings API.
# This number is used to start a scheduled task, so don't set this number too high to conserve CPU cycles.
speed: 30
# The maximum amount of chunks to include in a single embeddings API request.
batchSize: 64
# Use OpenAI's embedding model:
# openaiBaseUrl: https://api.openai.com/v1/
# model: text-embedding-3-small
# dimensions: 1536
# apiKey: sk-*************************************
# You can also use any OpenAI-compatible API, like a local Ollama server:
openaiBaseUrl: http://localhost:11434/v1/
model: bge-m3
dimensions: 1024
chunkSize: 200
chunkOverlap: 30 # 15% overlap