Spaces:

DaCrow13
/

Hopcroft-Skill-Classification

Sleeping

Hopcroft-Skill-Classification / Makefile

DaCrow13

Deploy to HF Spaces (Clean)

225af6a 4 months ago

7.1 kB

	#################################################################################
	# GLOBALS #
	#################################################################################

	PROJECT_NAME = Hopcroft
	PYTHON_VERSION = 3.10
	PYTHON_INTERPRETER = python

	#################################################################################
	# COMMANDS #
	#################################################################################

	## Install Python dependencies
	.PHONY: requirements
	requirements:
	$(PYTHON_INTERPRETER) -m pip install -U pip
	$(PYTHON_INTERPRETER) -m pip install -r requirements.txt

	## Delete all compiled Python files
	.PHONY: clean
	clean:
	find . -type f -name "*.py[co]" -delete
	find . -type d -name "__pycache__" -delete

	## Lint using ruff
	.PHONY: lint
	lint:
	ruff format --check
	ruff check

	## Format source code with ruff
	.PHONY: format
	format:
	ruff check --fix
	ruff format

	#################################################################################
	# PROJECT RULES #
	#################################################################################

	## Download dataset from Hugging Face
	.PHONY: data
	data:
	$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.dataset

	## Extract features from raw data
	.PHONY: features
	features:
	$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.features

	#################################################################################
	# TRAINING RULES #
	#################################################################################

	## Train Random Forest baseline with TF-IDF features (cleaned data)
	.PHONY: train-baseline-tfidf
	train-baseline-tfidf:
	$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.modeling.train baseline

	## Train Random Forest baseline with Embedding features (cleaned data)
	.PHONY: train-baseline-embeddings
	train-baseline-embeddings:
	$(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_baseline_train; run_baseline_train(feature_type='embedding', use_cleaned=True)"

	## Train Random Forest with SMOTE and TF-IDF features (cleaned data)
	.PHONY: train-smote-tfidf
	train-smote-tfidf:
	$(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_smote_experiment, load_data; X, Y = load_data(feature_type='tfidf', use_cleaned=True); run_smote_experiment(X, Y, feature_type='tfidf')"

	## Train Random Forest with SMOTE and Embedding features (cleaned data)
	.PHONY: train-smote-embeddings
	train-smote-embeddings:
	$(PYTHON_INTERPRETER) -c "from hopcroft_skill_classification_tool_competition.modeling.train import run_smote_experiment, load_data; X, Y = load_data(feature_type='embedding', use_cleaned=True); run_smote_experiment(X, Y, feature_type='embedding')"

	#################################################################################
	# TESTING RULES #
	#################################################################################

	## Run all unit tests
	.PHONY: test-unit
	test-unit:
	pytest tests/unit/ -v -m unit

	## Run all integration tests
	.PHONY: test-integration
	test-integration:
	pytest tests/integration/ -v -m integration

	## Run all system tests
	.PHONY: test-system
	test-system:
	pytest tests/system/ -v -m system

	## Run all tests (unit, integration, system)
	.PHONY: test-all
	test-all:
	pytest tests/ -v --ignore=tests/behavioral --ignore=tests/deepchecks

	## Run tests with coverage report
	.PHONY: test-coverage
	test-coverage:
	pytest tests/ --cov=hopcroft_skill_classification_tool_competition --cov-report=html --cov-report=term

	## Run fast tests only (exclude slow tests)
	.PHONY: test-fast
	test-fast:
	pytest tests/ -v -m "not slow" --ignore=tests/behavioral --ignore=tests/deepchecks

	## Run behavioral tests
	.PHONY: test-behavioral
	test-behavioral:
	pytest tests/behavioral/ -v --ignore=tests/behavioral/test_model_training.py

	## Run Great Expectations validation
	.PHONY: validate-gx
	validate-gx:
	$(PYTHON_INTERPRETER) -m hopcroft_skill_classification_tool_competition.tests.test_gx

	## Run Deepchecks validation
	.PHONY: validate-deepchecks
	validate-deepchecks:
	$(PYTHON_INTERPRETER) tests/deepchecks/run_all_deepchecks.py

	## Run all validation and tests
	.PHONY: test-complete
	test-complete: test-all validate-gx validate-deepchecks test-behavioral

	#################################################################################
	# Self Documenting Commands #
	#################################################################################

	.DEFAULT_GOAL := help

	define PRINT_HELP_PYSCRIPT
	import re, sys; \
	lines = '\n'.join([line for line in sys.stdin]); \
	matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
	print('Available rules:\n'); \
	print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
	endef
	export PRINT_HELP_PYSCRIPT

	help:
	@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)

	################################################################################
	# API COMMANDS #
	################################################################################

	## Run API in development mode
	.PHONY: api-dev
	api-dev:
	fastapi dev hopcroft_skill_classification_tool_competition/main.py

	## Run API in production mode
	.PHONY: api-run
	api-run:
	fastapi run hopcroft_skill_classification_tool_competition/main.py

	## Test API health check (requires running API)
	.PHONY: test-api-health
	test-api-health:
	@echo "Testing API health endpoint..."
	curl -X GET "http://127.0.0.1:8000/health"

	## Test API POST /predict (requires running API)
	.PHONY: test-api-predict
	test-api-predict:
	@echo "Testing prediction endpoint..."
	curl -X POST "http://127.0.0.1:8000/predict" -H "Content-Type: application/json" -d '{"issue_text": "Fix critical bug in authentication and login flow with OAuth2", "repo_name": "my-repo"}'

	## Test API GET /predictions (requires running API)
	.PHONY: test-api-list
	test-api-list:
	@echo "Testing list predictions endpoint..."
	curl "http://127.0.0.1:8000/predictions?limit=5"

	## Test API GET /predictions/{run_id} (requires running API and valid run_id)
	.PHONY: test-api-get-prediction
	test-api-get-prediction:
	@echo "Testing get specific prediction endpoint..."
	@echo "Usage: make test-api-get-prediction RUN_ID=<your_run_id>"
	@if [ -z "$(RUN_ID)" ]; then echo "Error: RUN_ID not set. Example: make test-api-get-prediction RUN_ID=abc123"; exit 1; fi
	curl "http://127.0.0.1:8000/predictions/$(RUN_ID)"

	## Run all API tests (requires running API)
	.PHONY: test-api-all
	test-api-all: test-api-health test-api-predict test-api-list
	@echo "\n All API tests completed!"