Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feat] Support Knowledge-based Retriever #348

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
66 changes: 66 additions & 0 deletions configs/datasets/FewCLUE_chid/FewCLUE_chid_knowledge_gen_0a29a2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import KnowledgeRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CHIDDataset_V2
from opencompass.utils.text_postprocessors import first_capital_postprocess

chid_knowledge_reader_cfg = dict(
input_columns=["content", "A", "B", "C", "D", "E", "F", "G"],
output_column="answer",
)

chid_knowledge_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template='以下是参考内容:{knowledge},结合上述参考内容,考虑接下来的问题:'
),
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt=
"</E>{content}\n请选择______处所填的词\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nF. {F}\nG. {G}\n请从“A”,“B”,“C”,“D”,“E”,“F”,“G”中进行选择。答:",
),
]
),
ice_token='</E>'
),
retriever=dict(
type=KnowledgeRetriever,
knowledge_docs=[
'./data/knowledge/chengyu-01-of-02.txt',
'./data/knowledge/chengyu-02-of-02.txt',
],
retrieve_keys=['A', 'B', 'C', 'D', 'E', 'F', 'G'],
ice_eos_token='\n'
),
inferencer=dict(type=GenInferencer),
)

chid_knowledge_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type=first_capital_postprocess),
)

chid_knowledge_datasets = [
dict(
abbr="chid-dev",
type=CHIDDataset_V2,
path="./data/FewCLUE/chid/dev_few_all.json",
reader_cfg=chid_knowledge_reader_cfg,
infer_cfg=chid_knowledge_infer_cfg,
eval_cfg=chid_knowledge_eval_cfg,
),
dict(
abbr="chid-test",
type=CHIDDataset_V2,
path="./data/FewCLUE/chid/test_public.json",
reader_cfg=chid_knowledge_reader_cfg,
infer_cfg=chid_knowledge_infer_cfg,
eval_cfg=chid_knowledge_eval_cfg,
),
]
9 changes: 9 additions & 0 deletions configs/eval_demo_knowledge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from mmengine.config import read_base

with read_base():
from .datasets.FewCLUE_chid.FewCLUE_chid_knowledge_gen_0a29a2 import chid_knowledge_datasets
from .models.hf_opt_125m import opt125m
from .models.hf_opt_350m import opt350m

datasets = [*chid_knowledge_datasets]
models = [opt125m, opt350m]
1 change: 1 addition & 0 deletions opencompass/openicl/icl_retriever/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
from .icl_topk_retriever import TopkRetriever # noqa
from .icl_votek_retriever import VotekRetriever # noqa
from .icl_zero_retriever import ZeroRetriever # noqa
from .icl_knowledge_retriever import KnowledgeRetriever