Text Generation
Transformers
PyTorch
codegen
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Install required packages
2
+ %pip install azure-ai-ml azure-identity --upgrade --quiet
3
+
4
+ import os
5
+ import time
6
+ from azure.ai.ml import MLClient
7
+ from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment
8
+ from azure.identity import DefaultAzureCredential
9
+
10
+ # Set environment variables (replace with your values)
11
+ # Follow setup steps at: https://huggingface.co/docs/microsoft-azure/guides/configure-azure-ml-microsoft-foundry
12
+ os.environ["SUBSCRIPTION_ID"] = "<YOUR_SUBSCRIPTION_ID>"
13
+ os.environ["RESOURCE_GROUP"] = "<YOUR_RESOURCE_GROUP>"
14
+ os.environ["WORKSPACE_NAME"] = "<YOUR_WORKSPACE_NAME>"
15
+
16
+ # Generate unique names for endpoint and deployment
17
+ timestamp = str(int(time.time()))
18
+ os.environ["ENDPOINT_NAME"] = f"hf-ep-{timestamp}"
19
+ os.environ["DEPLOYMENT_NAME"] = f"hf-deploy-{timestamp}"
20
+
21
+ # Create Azure ML Client for Microsoft Foundry (classic)
22
+ client = MLClient(
23
+ credential=DefaultAzureCredential(),
24
+ subscription_id=os.getenv("SUBSCRIPTION_ID"),
25
+ resource_group_name=os.getenv("RESOURCE_GROUP"),
26
+ workspace_name=os.getenv("WORKSPACE_NAME"),
27
+ )
28
+
29
+ # Build model URI for Azure registry
30
+ model_uri = f"azureml://registries/HuggingFace/models/salesforce-codegen-350m-multi/labels/latest"
31
+
32
+ # Create endpoint and deployment
33
+ endpoint = ManagedOnlineEndpoint(name=os.getenv("ENDPOINT_NAME"))
34
+
35
+ deployment = ManagedOnlineDeployment(
36
+ name=os.getenv("DEPLOYMENT_NAME"),
37
+ endpoint_name=os.getenv("ENDPOINT_NAME"),
38
+ model=model_uri,
39
+ # Check https://huggingface.co/docs/microsoft-azure/foundry/hardware to see the available instances
40
+ instance_type="Standard_NC40ads_H100_v5",
41
+ instance_count=1,
42
+ )
43
+
44
+ # Deploy endpoint and deployment (this may take 10-15 minutes)
45
+ client.begin_create_or_update(endpoint).wait()
46
+ client.online_deployments.begin_create_or_update(deployment).wait()
47
+
48
+ print(f"Endpoint '{os.getenv('ENDPOINT_NAME')}' deployed successfully!")
49
+ print("You can now send requests to your endpoint via Microsoft Foundry or Azure Machine Learning.")