Spaces:
Paused
Paused
| import logging | |
| import os | |
| # 1. Use the Mirror to bypass connection issues | |
| os.environ["HF_ENDPOINT"] = "https://hf-mirror.com" | |
| # 2. Increase timeouts so it doesn't fail on slow handshakes | |
| os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "300" | |
| os.environ["HF_HUB_ETAG_TIMEOUT"] = "300" | |
| from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger("ModelDownloader") | |
| def download_everything(): | |
| logger.info("🚀 STARTING ROBUST DOWNLOAD...") | |
| # 1. Download CodeBERT | |
| logger.info("📦 Downloading CodeBERT (Microsoft)...") | |
| try: | |
| AutoTokenizer.from_pretrained("microsoft/codebert-base") | |
| AutoModel.from_pretrained("microsoft/codebert-base") | |
| logger.info("✅ CodeBERT Cached!") | |
| except Exception as e: | |
| logger.error(f"❌ CodeBERT Failed: {e}") | |
| # 2. Download Qwen | |
| logger.info("🧠 Downloading Qwen 1.5B (This is ~3GB, please wait)...") | |
| try: | |
| model_id = "Qwen/Qwen2.5-Coder-1.5B-Instruct" | |
| AutoTokenizer.from_pretrained(model_id) | |
| # This is the heavy part that was failing | |
| AutoModelForCausalLM.from_pretrained(model_id) | |
| logger.info("✅ Qwen 1.5B Cached!") | |
| except Exception as e: | |
| logger.error(f"❌ Qwen Failed: {e}") | |
| logger.info( | |
| "💡 TIP: If it fails, just run this script again. It will resume where it left off." | |
| ) | |
| if __name__ == "__main__": | |
| download_everything() |