"""
Test Suite for VQA Enhancements
Tests LLM Reasoning Engine and Conversational VQA features
"""
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
def test_llm_reasoning():
    """Test LLM Reasoning Service"""
    print("=" * 80)
    print("🧪 TEST 1: LLM Reasoning Engine")
    print("=" * 80)
    try:
        from llm_reasoning_service import get_llm_reasoning_service
        service = get_llm_reasoning_service()
        print("✓ LLM Reasoning Service initialized\n")
        print("📝 Test Case 1.1: Can a candle melt?")
        result = service.reason_with_facts(
            object_name="candle",
            facts={
                "materials": ["wax", "wick"],
                "categories": ["light source", "household item"]
            },
            question="Can this melt?"
        )
        print(f"   Answer: {result['answer']}")
        print(f"   Reasoning Chain:")
        for i, step in enumerate(result['reasoning_chain'], 1):
            print(f"      {i}. {step}")
        print(f"   Confidence: {result['confidence']}")
        print(f"   Status: {result['status']}")
        assert result['answer'], "Answer should not be empty"
        assert result['confidence'] > 0, "Confidence should be positive"
        print("   ✓ Test passed\n")
        print("📝 Test Case 1.2: Would ice cream survive in the desert?")
        result = service.reason_with_facts(
            object_name="ice cream",
            facts={
                "materials": ["milk", "sugar", "cream"],
                "categories": ["frozen dessert", "food"],
                "properties": ["cold", "frozen"]
            },
            question="Would this survive in the desert?"
        )
        print(f"   Answer: {result['answer']}")
        print(f"   Reasoning Chain:")
        for i, step in enumerate(result['reasoning_chain'], 1):
            print(f"      {i}. {step}")
        print(f"   Confidence: {result['confidence']}")
        print(f"   Status: {result['status']}")
        assert result['answer'], "Answer should not be empty"
        print("   ✓ Test passed\n")
        print("📝 Test Case 1.3: Is an apple edible?")
        result = service.reason_with_facts(
            object_name="apple",
            facts={
                "categories": ["fruit", "food"],
                "properties": ["nutritious", "healthy"]
            },
            question="Is this edible?"
        )
        print(f"   Answer: {result['answer']}")
        print(f"   Confidence: {result['confidence']}")
        print("   ✓ Test passed\n")
        print("✅ LLM Reasoning Engine: ALL TESTS PASSED\n")
        return True
    except ValueError as e:
        print(f"⚠️  LLM Reasoning tests skipped: {e}")
        print("   (Set GROQ_API_KEY to run these tests)\n")
        return False
    except Exception as e:
        print(f"❌ LLM Reasoning tests failed: {e}\n")
        return False
def test_conversation_manager():
    """Test Conversation Manager"""
    print("=" * 80)
    print("🧪 TEST 2: Conversation Manager")
    print("=" * 80)
    try:
        from conversation_manager import ConversationManager
        manager = ConversationManager(session_timeout_minutes=30)
        print("✓ Conversation Manager initialized\n")
        print("📝 Test Case 2.1: Multi-turn conversation")
        session_id = manager.create_session("test_image.jpg")
        print(f"   Created session: {session_id}")
        manager.add_turn(
            session_id=session_id,
            question="What is this?",
            answer="apple",
            objects_detected=["apple"]
        )
        print("   Turn 1: 'What is this?' → 'apple'")
        session = manager.get_session(session_id)
        question_2 = "Is it healthy?"
        resolved_2 = manager.resolve_references(question_2, session)
        print(f"   Turn 2: '{question_2}' → Resolved: '{resolved_2}'")
        assert "apple" in resolved_2.lower() or resolved_2 == "Is apple healthy?", \
            "Pronoun 'it' should be resolved to 'apple'"
        manager.add_turn(
            session_id=session_id,
            question=question_2,
            answer="Yes, apples are healthy",
            objects_detected=["apple"]
        )
        question_3 = "What color is it?"
        resolved_3 = manager.resolve_references(question_3, session)
        print(f"   Turn 3: '{question_3}' → Resolved: '{resolved_3}'")
        assert "apple" in resolved_3.lower(), \
            "Pronoun 'it' should still resolve to 'apple'"
        print("   ✓ Pronoun resolution working\n")
        print("📝 Test Case 2.2: Context retrieval")
        context = manager.get_context_for_question(session_id, "Another question")
        print(f"   Turn number: {context['turn_number']}")
        print(f"   Previous objects: {context['previous_objects']}")
        print(f"   Has context: {context['has_context']}")
        assert context['turn_number'] == 4, "Should be on turn 4"
        assert context['has_context'], "Should have context"
        assert "apple" in context['previous_objects'], "Should remember apple"
        print("   ✓ Context tracking working\n")
        print("📝 Test Case 2.3: Conversation history")
        history = manager.get_history(session_id)
        print(f"   Total turns: {len(history)}")
        for i, turn in enumerate(history, 1):
            print(f"   Turn {i}: Q: {turn['question']} | A: {turn['answer']}")
        assert len(history) == 3, "Should have 3 turns"
        print("   ✓ History retrieval working\n")
        print("📝 Test Case 2.4: Session deletion")
        deleted = manager.delete_session(session_id)
        assert deleted, "Session should be deleted"
        session_after = manager.get_session(session_id)
        assert session_after is None, "Session should not exist after deletion"
        print("   ✓ Session deletion working\n")
        print("✅ Conversation Manager: ALL TESTS PASSED\n")
        return True
    except Exception as e:
        print(f"❌ Conversation Manager tests failed: {e}\n")
        import traceback
        traceback.print_exc()
        return False
def test_integration():
    """Test integration of features"""
    print("=" * 80)
    print("🧪 TEST 3: Integration Tests")
    print("=" * 80)
    try:
        from semantic_neurosymbolic_vqa import SemanticNeurosymbolicVQA
        print("📝 Test Case 3.1: Semantic VQA with LLM reasoning")
        vqa = SemanticNeurosymbolicVQA(device='cpu')
        if vqa.llm_enabled:
            print("   ✓ LLM reasoning integrated into Semantic VQA")
        else:
            print("   ⚠️  LLM reasoning not available (fallback mode)")
        print()
        print("📝 Test Case 3.2: Ensemble VQA with conversation support")
        from ensemble_vqa_app import ProductionEnsembleVQA
        print("   ✓ Ensemble VQA imports successfully")
        print("   (Full test requires model checkpoints)\n")
        print("✅ Integration: TESTS PASSED\n")
        return True
    except Exception as e:
        print(f"❌ Integration tests failed: {e}\n")
        import traceback
        traceback.print_exc()
        return False
def run_all_tests():
    """Run all test suites"""
    print("\n" + "=" * 80)
    print("🚀 VQA ENHANCEMENT TEST SUITE")
    print("=" * 80)
    print()
    results = []
    results.append(("LLM Reasoning", test_llm_reasoning()))
    results.append(("Conversation Manager", test_conversation_manager()))
    results.append(("Integration", test_integration()))
    print("=" * 80)
    print("📊 TEST SUMMARY")
    print("=" * 80)
    for name, passed in results:
        status = "✅ PASSED" if passed else "❌ FAILED"
        print(f"{name}: {status}")
    total_passed = sum(1 for _, passed in results if passed)
    total_tests = len(results)
    print()
    print(f"Total: {total_passed}/{total_tests} test suites passed")
    print("=" * 80)
    return all(passed for _, passed in results)
if __name__ == "__main__":
    success = run_all_tests()
    sys.exit(0 if success else 1)