AzureOpenAI API Examples

You can also use an OpenAI model deployed into Azure AI. For this, you will provide a few pieces of information from the Azure AI playground:

[1]:
call_delay_secs = 0
[2]:
import os

# If using DefaultAzureCredential below
from azure.identity import DefaultAzureCredential, get_bearer_token_provider

# This is the name of the model deployed, such as 'gpt-4' or 'gpt-3.5-turbo
model = os.getenv("AZUREAI_OPENAI_CHAT_MODEL", "Please set the model")

# This is the deployment URL, as provided in the Azure AI playground ('view code')
# It will end with 'openai.azure.com'
azure_endpoint = os.getenv("AZUREAI_OPENAI_CHAT_ENDPOINT", "Please set the endpoint")

# This is the name of the deployment specified in the Azure portal
azure_deployment = os.getenv("AZUREAI_OPENAI_CHAT_DEPLOYMENT_NAME", "Please set the deployment name")

# This is the deployed API version, such as 2024-02-15-preview
azure_api_version = os.getenv("AZUREAI_OPENAI_CHAT_API_VERSION", "Please set the API version")

# The environment variable should be set to the API key from the Azure AI playground:
# api_key=os.getenv("AZUREAI_CHAT_KEY", "Please set API key")

# Alternatively, we can use Entra authentication
token_provider = get_bearer_token_provider(
     DefaultAzureCredential(),
     "https://cognitiveservices.azure.com/.default"
)

We can now construct the guidance model object:

[3]:
from guidance import models, gen
from guidance.models import create_azure_openai_model

azureai_model = create_azure_openai_model(
    model_name=model,
    azure_deployment=azure_deployment,
    azure_endpoint=azure_endpoint,
    api_version=azure_api_version,
    # For authentication, use either
    # api_key=api_key
    # or
    azure_ad_token_provider=token_provider,
)

We can use the model as before:

[4]:
from guidance import system, user, assistant

with system():
    lm = azureai_model + "You are a helpful assistant."

with user():
    lm += "What is the meaning of life?"

with assistant():
    lm += gen("response")
[5]:
import time

time.sleep(call_delay_secs)

AOAI models also support constrained generation using JSON:

[11]:
import json

from guidance import json as gen_json

cat_schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string", "minLength": 4},
        "age": {"type": "integer", "minimum": 0, "maximum": 20},
        "colour": {
            "type": "array",
            "items": {"type": "integer", "minimum": 0, "maximum": 255},
            "minItems": 3,
            "maxItems": 3,
        },
    },
    "required": ["name", "age", "colour"],
    "additionalProperties": False,
}

with system():
    lm = azureai_model + "You are an expert in the ancient lore of cats"

with user():
    lm += "Create a simple description of a cat in JSON, including the name, age & colour"

with assistant():
    lm += gen_json(schema=cat_schema, name="my_cat_text", temperature=1.0)


my_cat = json.loads(lm["my_cat_text"])

print(json.dumps(my_cat, indent=4))
{
    "name": "Whiskers",
    "age": 3,
    "colour": [
        255,
        200,
        150
    ]
}
[7]:
time.sleep(call_delay_secs)
[ ]: