precis / scripts /evaluate.py
compendious's picture
Changes
b813321
#!/usr/bin/env python3
"""CLI evaluation script for Précis."""
import argparse
import logging
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.config import ModelConfig, DataConfig
from src.model import load_tokenizer
from src.tuning.data import create_dummy_data
from transformers import AutoModelForCausalLM
from peft import PeftModel
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
def parse_args():
parser = argparse.ArgumentParser(description="Evaluate Précis model")
parser.add_argument("--checkpoint", type=str, required=True, help="Path to model checkpoint")
parser.add_argument("--num-samples", type=int, default=5, help="Number of samples to evaluate")
parser.add_argument("--max-new-tokens", type=int, default=256, help="Max tokens to generate")
return parser.parse_args()
def main():
args = parse_args()
config = ModelConfig()
data_config = DataConfig()
logger.info(f"Loading checkpoint from {args.checkpoint}")
tokenizer = load_tokenizer(config)
model = AutoModelForCausalLM.from_pretrained(
args.checkpoint,
device_map="auto",
trust_remote_code=True,
)
# Generate on dummy samples
samples = create_dummy_data(args.num_samples)
for i, sample in enumerate(samples):
prompt = data_config.format_prompt(sample["text"])
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=args.max_new_tokens,
do_sample=True,
temperature=0.7,
pad_token_id=tokenizer.pad_token_id,
)
generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
summary = generated[len(prompt):]
logger.info(f"\n=== Sample {i+1} ===")
logger.info(f"Input: {sample['text'][:100]}...")
logger.info(f"Generated: {summary}")
logger.info(f"Reference: {sample['summary']}")
if __name__ == "__main__":
main()