| | """Loading datasets and evaluators.""" |
| |
|
| | from typing import Any, Dict, List, Optional, Sequence, Type, Union |
| |
|
| | from langchain_core.language_models import BaseLanguageModel |
| |
|
| | from langchain.chains.base import Chain |
| | from langchain.evaluation.agents.trajectory_eval_chain import TrajectoryEvalChain |
| | from langchain.evaluation.comparison import PairwiseStringEvalChain |
| | from langchain.evaluation.comparison.eval_chain import LabeledPairwiseStringEvalChain |
| | from langchain.evaluation.criteria.eval_chain import ( |
| | CriteriaEvalChain, |
| | LabeledCriteriaEvalChain, |
| | ) |
| | from langchain.evaluation.embedding_distance.base import ( |
| | EmbeddingDistanceEvalChain, |
| | PairwiseEmbeddingDistanceEvalChain, |
| | ) |
| | from langchain.evaluation.exact_match.base import ExactMatchStringEvaluator |
| | from langchain.evaluation.parsing.base import ( |
| | JsonEqualityEvaluator, |
| | JsonValidityEvaluator, |
| | ) |
| | from langchain.evaluation.parsing.json_distance import JsonEditDistanceEvaluator |
| | from langchain.evaluation.parsing.json_schema import JsonSchemaEvaluator |
| | from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain |
| | from langchain.evaluation.regex_match.base import RegexMatchStringEvaluator |
| | from langchain.evaluation.schema import EvaluatorType, LLMEvalChain, StringEvaluator |
| | from langchain.evaluation.scoring.eval_chain import ( |
| | LabeledScoreStringEvalChain, |
| | ScoreStringEvalChain, |
| | ) |
| | from langchain.evaluation.string_distance.base import ( |
| | PairwiseStringDistanceEvalChain, |
| | StringDistanceEvalChain, |
| | ) |
| |
|
| |
|
| | def load_dataset(uri: str) -> List[Dict]: |
| | """Load a dataset from the `LangChainDatasets on HuggingFace <https://huggingface.co/LangChainDatasets>`_. |
| | |
| | Args: |
| | uri: The uri of the dataset to load. |
| | |
| | Returns: |
| | A list of dictionaries, each representing a row in the dataset. |
| | |
| | **Prerequisites** |
| | |
| | .. code-block:: shell |
| | |
| | pip install datasets |
| | |
| | Examples |
| | -------- |
| | .. code-block:: python |
| | |
| | from langchain.evaluation import load_dataset |
| | ds = load_dataset("llm-math") |
| | """ |
| | try: |
| | from datasets import load_dataset |
| | except ImportError: |
| | raise ImportError( |
| | "load_dataset requires the `datasets` package." |
| | " Please install with `pip install datasets`" |
| | ) |
| |
|
| | dataset = load_dataset(f"LangChainDatasets/{uri}") |
| | return [d for d in dataset["train"]] |
| |
|
| |
|
| | _EVALUATOR_MAP: Dict[ |
| | EvaluatorType, Union[Type[LLMEvalChain], Type[Chain], Type[StringEvaluator]] |
| | ] = { |
| | EvaluatorType.QA: QAEvalChain, |
| | EvaluatorType.COT_QA: CotQAEvalChain, |
| | EvaluatorType.CONTEXT_QA: ContextQAEvalChain, |
| | EvaluatorType.PAIRWISE_STRING: PairwiseStringEvalChain, |
| | EvaluatorType.SCORE_STRING: ScoreStringEvalChain, |
| | EvaluatorType.LABELED_PAIRWISE_STRING: LabeledPairwiseStringEvalChain, |
| | EvaluatorType.LABELED_SCORE_STRING: LabeledScoreStringEvalChain, |
| | EvaluatorType.AGENT_TRAJECTORY: TrajectoryEvalChain, |
| | EvaluatorType.CRITERIA: CriteriaEvalChain, |
| | EvaluatorType.LABELED_CRITERIA: LabeledCriteriaEvalChain, |
| | EvaluatorType.STRING_DISTANCE: StringDistanceEvalChain, |
| | EvaluatorType.PAIRWISE_STRING_DISTANCE: PairwiseStringDistanceEvalChain, |
| | EvaluatorType.EMBEDDING_DISTANCE: EmbeddingDistanceEvalChain, |
| | EvaluatorType.PAIRWISE_EMBEDDING_DISTANCE: PairwiseEmbeddingDistanceEvalChain, |
| | EvaluatorType.JSON_VALIDITY: JsonValidityEvaluator, |
| | EvaluatorType.JSON_EQUALITY: JsonEqualityEvaluator, |
| | EvaluatorType.JSON_EDIT_DISTANCE: JsonEditDistanceEvaluator, |
| | EvaluatorType.JSON_SCHEMA_VALIDATION: JsonSchemaEvaluator, |
| | EvaluatorType.REGEX_MATCH: RegexMatchStringEvaluator, |
| | EvaluatorType.EXACT_MATCH: ExactMatchStringEvaluator, |
| | } |
| |
|
| |
|
| | def load_evaluator( |
| | evaluator: EvaluatorType, |
| | *, |
| | llm: Optional[BaseLanguageModel] = None, |
| | **kwargs: Any, |
| | ) -> Union[Chain, StringEvaluator]: |
| | """Load the requested evaluation chain specified by a string. |
| | |
| | Parameters |
| | ---------- |
| | evaluator : EvaluatorType |
| | The type of evaluator to load. |
| | llm : BaseLanguageModel, optional |
| | The language model to use for evaluation, by default None |
| | **kwargs : Any |
| | Additional keyword arguments to pass to the evaluator. |
| | |
| | Returns |
| | ------- |
| | Chain |
| | The loaded evaluation chain. |
| | |
| | Examples |
| | -------- |
| | >>> from langchain.evaluation import load_evaluator, EvaluatorType |
| | >>> evaluator = load_evaluator(EvaluatorType.QA) |
| | """ |
| | if evaluator not in _EVALUATOR_MAP: |
| | raise ValueError( |
| | f"Unknown evaluator type: {evaluator}" |
| | f"\nValid types are: {list(_EVALUATOR_MAP.keys())}" |
| | ) |
| | evaluator_cls = _EVALUATOR_MAP[evaluator] |
| | if issubclass(evaluator_cls, LLMEvalChain): |
| | try: |
| | try: |
| | from langchain_openai import ChatOpenAI |
| | except ImportError: |
| | try: |
| | from langchain_community.chat_models.openai import ChatOpenAI |
| | except ImportError: |
| | raise ImportError( |
| | "Could not import langchain_openai or fallback onto " |
| | "langchain_community. Please install langchain_openai " |
| | "or specify a language model explicitly. " |
| | "It's recommended to install langchain_openai AND " |
| | "specify a language model explicitly." |
| | ) |
| |
|
| | llm = llm or ChatOpenAI( |
| | model="gpt-4", seed=42, temperature=0 |
| | ) |
| | except Exception as e: |
| | raise ValueError( |
| | f"Evaluation with the {evaluator_cls} requires a " |
| | "language model to function." |
| | " Failed to create the default 'gpt-4' model." |
| | " Please manually provide an evaluation LLM" |
| | " or check your openai credentials." |
| | ) from e |
| | return evaluator_cls.from_llm(llm=llm, **kwargs) |
| | else: |
| | return evaluator_cls(**kwargs) |
| |
|
| |
|
| | def load_evaluators( |
| | evaluators: Sequence[EvaluatorType], |
| | *, |
| | llm: Optional[BaseLanguageModel] = None, |
| | config: Optional[dict] = None, |
| | **kwargs: Any, |
| | ) -> List[Union[Chain, StringEvaluator]]: |
| | """Load evaluators specified by a list of evaluator types. |
| | |
| | Parameters |
| | ---------- |
| | evaluators : Sequence[EvaluatorType] |
| | The list of evaluator types to load. |
| | llm : BaseLanguageModel, optional |
| | The language model to use for evaluation, if none is provided, a default |
| | ChatOpenAI gpt-4 model will be used. |
| | config : dict, optional |
| | A dictionary mapping evaluator types to additional keyword arguments, |
| | by default None |
| | **kwargs : Any |
| | Additional keyword arguments to pass to all evaluators. |
| | |
| | Returns |
| | ------- |
| | List[Chain] |
| | The loaded evaluators. |
| | |
| | Examples |
| | -------- |
| | >>> from langchain.evaluation import load_evaluators, EvaluatorType |
| | >>> evaluators = [EvaluatorType.QA, EvaluatorType.CRITERIA] |
| | >>> loaded_evaluators = load_evaluators(evaluators, criteria="helpfulness") |
| | """ |
| | loaded = [] |
| | for evaluator in evaluators: |
| | _kwargs = config.get(evaluator, {}) if config else {} |
| | loaded.append(load_evaluator(evaluator, llm=llm, **{**kwargs, **_kwargs})) |
| | return loaded |
| |
|