Spaces:
Sleeping
Sleeping
Initial codes commit
Browse files- db/initializer.py +5 -0
- modules/corpus.py +1 -1
db/initializer.py
CHANGED
|
@@ -26,6 +26,11 @@ def initialize_dbs():
|
|
| 26 |
corpus.prepare_corpus()
|
| 27 |
# 2) 인덱스/ID 매핑 메모리 로드
|
| 28 |
_load_index_in_memory()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
def force_update():
|
| 31 |
_load_index_in_memory()
|
|
|
|
| 26 |
corpus.prepare_corpus()
|
| 27 |
# 2) 인덱스/ID 매핑 메모리 로드
|
| 28 |
_load_index_in_memory()
|
| 29 |
+
# 3) Arrow 캐시 생성
|
| 30 |
+
datasets = corpus._get_datasets()
|
| 31 |
+
for _subset, ds in datasets.items():
|
| 32 |
+
# dummy 호출로 캐시 생성
|
| 33 |
+
_ = ds.filter(lambda r: False)
|
| 34 |
|
| 35 |
def force_update():
|
| 36 |
_load_index_in_memory()
|
modules/corpus.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# rag/modules/
|
| 2 |
from typing import List, Dict, Any
|
| 3 |
from datasets import load_dataset, DatasetDict, Dataset
|
| 4 |
from config import HF_CORPUS_REPO, HF_CORPUS_SUBSET, HF_CORPUS_SPLIT, MARKER_DIR, CORPUS_READY_MARK
|
|
|
|
| 1 |
+
# rag/modules/corpus.py
|
| 2 |
from typing import List, Dict, Any
|
| 3 |
from datasets import load_dataset, DatasetDict, Dataset
|
| 4 |
from config import HF_CORPUS_REPO, HF_CORPUS_SUBSET, HF_CORPUS_SPLIT, MARKER_DIR, CORPUS_READY_MARK
|