And use it in the following codes.

import os
import sys
import openai

api_token = os.environ.get('LEPTON_API_TOKEN')
openai.base_url = "https://octopus-v2.lepton.run/api/v1/"
openai.api_key = api_token

# List available models
print("==== Available models ====")
models = openai.models.list()
print(models)

model = "octopus-v2"

completion = openai.completions.create(
    model=model,
    prompt="Below is the query from the users, please call the correct function and generate the parameters to call the function.\n\nQuery: Take a selfie for me with front camera \n\nResponse:",
    max_tokens=256,
    stream=True,
)

print(f"==== Model: {model} ====")
for chunk in completion:
    content = chunk.choices[0].text
    if content:
        sys.stdout.write(content)
        sys.stdout.flush()
sys.stdout.write("\n")

The rate limit for the Model APIs is 10 requests per minute across all models under Basic Plan. For the pricing plan, you may check out pricing page, If you need a higher rate limit with SLA or dedicated deployment, please contact us.