feat: implement RAG indexing worker with vector database schema and document embedding support
Build and Release / release (push) Successful in 1m24s
Build and Release / release (push) Successful in 1m24s
This commit is contained in:
+18
-5
@@ -19,13 +19,26 @@ import (
|
||||
|
||||
func runStatistics(ctx context.Context, repo repositories.StatisticRepository) {
|
||||
log.Info().Msg("Running daily statistics...")
|
||||
today := time.Now().Truncate(24 * time.Hour)
|
||||
_, err := repo.Upsert(ctx, today)
|
||||
|
||||
loc, err := time.LoadLocation("Asia/Ho_Chi_Minh")
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("Failed to upsert system statistics")
|
||||
} else {
|
||||
log.Info().Msg("Successfully updated daily statistics and cleared cache")
|
||||
log.Warn().Err(err).Msg("Failed to load Asia/Ho_Chi_Minh timezone, falling back to fixed UTC+7")
|
||||
loc = time.FixedZone("ICT", 7*3600)
|
||||
}
|
||||
|
||||
now := time.Now().In(loc)
|
||||
today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.UTC)
|
||||
|
||||
// Upsert stats for today, yesterday, and the day before to prevent timezone gaps/delays
|
||||
for i := 0; i < 3; i++ {
|
||||
date := today.AddDate(0, 0, -i)
|
||||
log.Info().Str("date", date.Format("2006-01-02")).Msg("Upserting system statistics")
|
||||
_, err = repo.Upsert(ctx, date)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("date", date.Format("2006-01-02")).Msg("Failed to upsert system statistics")
|
||||
}
|
||||
}
|
||||
log.Info().Msg("Successfully updated daily statistics and cleared cache")
|
||||
}
|
||||
|
||||
func runBackup(ctx context.Context, s3 storage.Storage, dbURI string) {
|
||||
|
||||
+25
-41
@@ -53,33 +53,24 @@ func processRagTask(ctx context.Context, ragRepo repositories.RagRepository, rag
|
||||
var vectors [][]float32
|
||||
var err error
|
||||
|
||||
for attempt := 0; attempt <= maxRetries; attempt++ {
|
||||
if attempt > 0 {
|
||||
delay := baseRetryDelay * time.Duration(math.Pow(2, float64(attempt-1)))
|
||||
log.Warn().
|
||||
Str("worker", workerName).
|
||||
Str("wiki_id", wiki.ID).
|
||||
Int("attempt", attempt).
|
||||
Dur("delay", delay).
|
||||
Msg("Retrying wiki embedding")
|
||||
time.Sleep(delay)
|
||||
}
|
||||
|
||||
for attempt := 0; ; attempt++ {
|
||||
chunks, vectors, err = ragUtils.PrepareChunks(ctx, cleanText)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
|
||||
delay := baseRetryDelay * time.Duration(math.Pow(2, float64(attempt)))
|
||||
if delay > 2*time.Minute {
|
||||
delay = 2 * time.Minute
|
||||
}
|
||||
|
||||
log.Error().Err(err).
|
||||
Str("worker", workerName).
|
||||
Str("wiki_id", wiki.ID).
|
||||
Int("attempt", attempt).
|
||||
Msg("Failed to prepare wiki chunks")
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("worker", workerName).Str("wiki_id", wiki.ID).Msg("Giving up on wiki after max retries")
|
||||
continue
|
||||
Int("attempt", attempt+1).
|
||||
Dur("retry_delay", delay).
|
||||
Msg("Failed to prepare wiki chunks, retrying...")
|
||||
time.Sleep(delay)
|
||||
}
|
||||
|
||||
_ = ragRepo.DeleteBySourceIDs(ctx, "wiki", []string{wiki.ID})
|
||||
@@ -106,33 +97,24 @@ func processRagTask(ctx context.Context, ragRepo repositories.RagRepository, rag
|
||||
var vectors [][]float32
|
||||
var err error
|
||||
|
||||
for attempt := 0; attempt <= maxRetries; attempt++ {
|
||||
if attempt > 0 {
|
||||
delay := baseRetryDelay * time.Duration(math.Pow(2, float64(attempt-1)))
|
||||
log.Warn().
|
||||
Str("worker", workerName).
|
||||
Str("entity_id", entity.ID).
|
||||
Int("attempt", attempt).
|
||||
Dur("delay", delay).
|
||||
Msg("Retrying entity embedding")
|
||||
time.Sleep(delay)
|
||||
}
|
||||
|
||||
for attempt := 0; ; attempt++ {
|
||||
chunks, vectors, err = ragUtils.PrepareChunks(ctx, cleanText)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
|
||||
delay := baseRetryDelay * time.Duration(math.Pow(2, float64(attempt)))
|
||||
if delay > 2*time.Minute {
|
||||
delay = 2 * time.Minute
|
||||
}
|
||||
|
||||
log.Error().Err(err).
|
||||
Str("worker", workerName).
|
||||
Str("entity_id", entity.ID).
|
||||
Int("attempt", attempt).
|
||||
Msg("Failed to prepare entity chunks")
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("worker", workerName).Str("entity_id", entity.ID).Msg("Giving up on entity after max retries")
|
||||
continue
|
||||
Int("attempt", attempt+1).
|
||||
Dur("retry_delay", delay).
|
||||
Msg("Failed to prepare entity chunks, retrying...")
|
||||
time.Sleep(delay)
|
||||
}
|
||||
|
||||
_ = ragRepo.DeleteBySourceIDs(ctx, "entity", []string{entity.ID})
|
||||
@@ -253,9 +235,11 @@ func main() {
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for i := 1; i <= workerCount; i++ {
|
||||
wg.Go(func() {
|
||||
runSingleWorker(ctx, rdb, i, ragRepo, ragUtils)
|
||||
})
|
||||
wg.Add(1)
|
||||
go func(workerID int) {
|
||||
defer wg.Done()
|
||||
runSingleWorker(ctx, rdb, workerID, ragRepo, ragUtils)
|
||||
}(i)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
Reference in New Issue
Block a user