""" Test Suite for VQA Enhancements Tests LLM Reasoning Engine and Conversational VQA features """ import os import sys sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) def test_llm_reasoning(): """Test LLM Reasoning Service""" print("=" * 80) print("๐Ÿงช TEST 1: LLM Reasoning Engine") print("=" * 80) try: from llm_reasoning_service import get_llm_reasoning_service service = get_llm_reasoning_service() print("โœ“ LLM Reasoning Service initialized\n") print("๐Ÿ“ Test Case 1.1: Can a candle melt?") result = service.reason_with_facts( object_name="candle", facts={ "materials": ["wax", "wick"], "categories": ["light source", "household item"] }, question="Can this melt?" ) print(f" Answer: {result['answer']}") print(f" Reasoning Chain:") for i, step in enumerate(result['reasoning_chain'], 1): print(f" {i}. {step}") print(f" Confidence: {result['confidence']}") print(f" Status: {result['status']}") assert result['answer'], "Answer should not be empty" assert result['confidence'] > 0, "Confidence should be positive" print(" โœ“ Test passed\n") print("๐Ÿ“ Test Case 1.2: Would ice cream survive in the desert?") result = service.reason_with_facts( object_name="ice cream", facts={ "materials": ["milk", "sugar", "cream"], "categories": ["frozen dessert", "food"], "properties": ["cold", "frozen"] }, question="Would this survive in the desert?" ) print(f" Answer: {result['answer']}") print(f" Reasoning Chain:") for i, step in enumerate(result['reasoning_chain'], 1): print(f" {i}. {step}") print(f" Confidence: {result['confidence']}") print(f" Status: {result['status']}") assert result['answer'], "Answer should not be empty" print(" โœ“ Test passed\n") print("๐Ÿ“ Test Case 1.3: Is an apple edible?") result = service.reason_with_facts( object_name="apple", facts={ "categories": ["fruit", "food"], "properties": ["nutritious", "healthy"] }, question="Is this edible?" ) print(f" Answer: {result['answer']}") print(f" Confidence: {result['confidence']}") print(" โœ“ Test passed\n") print("โœ… LLM Reasoning Engine: ALL TESTS PASSED\n") return True except ValueError as e: print(f"โš ๏ธ LLM Reasoning tests skipped: {e}") print(" (Set GROQ_API_KEY to run these tests)\n") return False except Exception as e: print(f"โŒ LLM Reasoning tests failed: {e}\n") return False def test_conversation_manager(): """Test Conversation Manager""" print("=" * 80) print("๐Ÿงช TEST 2: Conversation Manager") print("=" * 80) try: from conversation_manager import ConversationManager manager = ConversationManager(session_timeout_minutes=30) print("โœ“ Conversation Manager initialized\n") print("๐Ÿ“ Test Case 2.1: Multi-turn conversation") session_id = manager.create_session("test_image.jpg") print(f" Created session: {session_id}") manager.add_turn( session_id=session_id, question="What is this?", answer="apple", objects_detected=["apple"] ) print(" Turn 1: 'What is this?' โ†’ 'apple'") session = manager.get_session(session_id) question_2 = "Is it healthy?" resolved_2 = manager.resolve_references(question_2, session) print(f" Turn 2: '{question_2}' โ†’ Resolved: '{resolved_2}'") assert "apple" in resolved_2.lower() or resolved_2 == "Is apple healthy?", \ "Pronoun 'it' should be resolved to 'apple'" manager.add_turn( session_id=session_id, question=question_2, answer="Yes, apples are healthy", objects_detected=["apple"] ) question_3 = "What color is it?" resolved_3 = manager.resolve_references(question_3, session) print(f" Turn 3: '{question_3}' โ†’ Resolved: '{resolved_3}'") assert "apple" in resolved_3.lower(), \ "Pronoun 'it' should still resolve to 'apple'" print(" โœ“ Pronoun resolution working\n") print("๐Ÿ“ Test Case 2.2: Context retrieval") context = manager.get_context_for_question(session_id, "Another question") print(f" Turn number: {context['turn_number']}") print(f" Previous objects: {context['previous_objects']}") print(f" Has context: {context['has_context']}") assert context['turn_number'] == 4, "Should be on turn 4" assert context['has_context'], "Should have context" assert "apple" in context['previous_objects'], "Should remember apple" print(" โœ“ Context tracking working\n") print("๐Ÿ“ Test Case 2.3: Conversation history") history = manager.get_history(session_id) print(f" Total turns: {len(history)}") for i, turn in enumerate(history, 1): print(f" Turn {i}: Q: {turn['question']} | A: {turn['answer']}") assert len(history) == 3, "Should have 3 turns" print(" โœ“ History retrieval working\n") print("๐Ÿ“ Test Case 2.4: Session deletion") deleted = manager.delete_session(session_id) assert deleted, "Session should be deleted" session_after = manager.get_session(session_id) assert session_after is None, "Session should not exist after deletion" print(" โœ“ Session deletion working\n") print("โœ… Conversation Manager: ALL TESTS PASSED\n") return True except Exception as e: print(f"โŒ Conversation Manager tests failed: {e}\n") import traceback traceback.print_exc() return False def test_integration(): """Test integration of features""" print("=" * 80) print("๐Ÿงช TEST 3: Integration Tests") print("=" * 80) try: from semantic_neurosymbolic_vqa import SemanticNeurosymbolicVQA print("๐Ÿ“ Test Case 3.1: Semantic VQA with LLM reasoning") vqa = SemanticNeurosymbolicVQA(device='cpu') if vqa.llm_enabled: print(" โœ“ LLM reasoning integrated into Semantic VQA") else: print(" โš ๏ธ LLM reasoning not available (fallback mode)") print() print("๐Ÿ“ Test Case 3.2: Ensemble VQA with conversation support") from ensemble_vqa_app import ProductionEnsembleVQA print(" โœ“ Ensemble VQA imports successfully") print(" (Full test requires model checkpoints)\n") print("โœ… Integration: TESTS PASSED\n") return True except Exception as e: print(f"โŒ Integration tests failed: {e}\n") import traceback traceback.print_exc() return False def run_all_tests(): """Run all test suites""" print("\n" + "=" * 80) print("๐Ÿš€ VQA ENHANCEMENT TEST SUITE") print("=" * 80) print() results = [] results.append(("LLM Reasoning", test_llm_reasoning())) results.append(("Conversation Manager", test_conversation_manager())) results.append(("Integration", test_integration())) print("=" * 80) print("๐Ÿ“Š TEST SUMMARY") print("=" * 80) for name, passed in results: status = "โœ… PASSED" if passed else "โŒ FAILED" print(f"{name}: {status}") total_passed = sum(1 for _, passed in results if passed) total_tests = len(results) print() print(f"Total: {total_passed}/{total_tests} test suites passed") print("=" * 80) return all(passed for _, passed in results) if __name__ == "__main__": success = run_all_tests() sys.exit(0 if success else 1)