Skill

Generate Images with Grok and OpenAI

Name: Generate Images with Grok and OpenAI
Availability: OnlineOnly
Author: CAMEL-AI

Image Gen Toolkit is a Python class toolkit that generates images using Grok and OpenAI models, enabling AI assistants to create visual content

Get skill

Works with openai

CAMEL-AI

Maintainer?

Spark score

out of 100

Updated today

Version 0.2.90

Models

gpt 4o

Add to Favorites

Why it matters

Leverage advanced AI models like Grok and OpenAI to create high-quality images for your projects. This toolkit simplifies the image generation process, allowing you to produce visuals efficiently.

Outcomes

What it gets done

Generate images using Grok models

Generate images using OpenAI models

Integrate image generation into your applications

Install

Add it to your toolbox

Run in your project directory:

curl -fsSL https://spark.entire.vc/get/camel-imagegenerationtoolkit | bash

Capabilities

What this skill does

Generate images

Creates images from text prompts or templates.

Overview

Image Gen Toolkit

What it does

Image Gen Toolkit is a Python class toolkit that provides AI assistants with image generation capabilities using Grok and OpenAI models. It offers a unified interface for creating visual content programmatically, abstracting the complexity of working with multiple image generation APIs. The toolkit is part of the CAMEL framework and enables developers to add image creation functionality to their AI agents.

How it connects

Use this toolkit when building AI assistants or agents that need to generate images on demand from text prompts, such as creating illustrations, visual responses, or custom graphics within automated workflows. It is particularly useful when you need to support both Grok and OpenAI image generation providers through a single, consistent interface.

Source code

import base64
import os
from io import BytesIO
from typing import ClassVar, List, Literal, Optional, Tuple, Union

from openai import OpenAI
from PIL import Image

from camel.logger import get_logger
from camel.toolkits import FunctionTool
from camel.toolkits.base import BaseToolkit
from camel.utils import MCPServer, api_keys_required

logger = get_logger(name)

@MCPServer()
class ImageGenToolkit(BaseToolkit):
r"""A class toolkit for image generation using Grok and OpenAI models."""

GROK_MODELS: ClassVar[List[str]] = [
    "grok-2-image",
    "grok-2-image-latest",
    "grok-2-image-1212",
]
OPENAI_MODELS: ClassVar[List[str]] = [
    "gpt-image-1",
    "dall-e-3",
    "dall-e-2",
]

def __init__(
    self,
    model: Optional[
        Literal[
            "gpt-image-1",
            "dall-e-3",
            "dall-e-2",
            "grok-2-image",
            "grok-2-image-latest",
            "grok-2-image-1212",
        ]
    ] = "dall-e-3",
    timeout: Optional[float] = None,
    api_key: Optional[str] = None,
    url: Optional[str] = None,
    size: Optional[
        Literal[
            "256x256",
            "512x512",
            "1024x1024",
            "1536x1024",
            "1024x1536",
            "1792x1024",
            "1024x1792",
            "auto",
        ]
    ] = "1024x1024",
    quality: Optional[
        Literal["auto", "low", "medium", "high", "standard", "hd"]
    ] = "standard",
    response_format: Optional[Literal["url", "b64_json"]] = "b64_json",
    background: Optional[
        Literal["transparent", "opaque", "auto"]
    ] = "auto",
    style: Optional[Literal["vivid", "natural"]] = None,
    working_directory: Optional[str] = "image_save",
):
    # NOTE: Some arguments are set in the constructor to prevent the agent
    # from making invalid API calls with model-specific parameters. For
    # example, the 'style' argument is only supported by 'dall-e-3'.
    r"""Initializes a new instance of the ImageGenToolkit class.

    Args:
        api_key (Optional[str]): The API key for authenticating
            with the image model service. (default: :obj:`None`)
        url (Optional[str]): The url to the image model service.
            (default: :obj:`None`)
        model (Optional[str]): The model to use.
            (default: :obj:`"dall-e-3"`)
        timeout (Optional[float]): The timeout value for API requests
            in seconds. If None, no timeout is applied.
            (default: :obj:`None`)
        size (Optional[Literal["256x256", "512x512", "1024x1024",
            "1536x1024", "1024x1536", "1792x1024", "1024x1792",
            "auto"]]):
            The size of the image to generate.
            (default: :obj:`"1024x1024"`)
        quality (Optional[Literal["auto", "low", "medium", "high",
            "standard", "hd"]]):The quality of the image to
            generate. Different models support different values.
            (default: :obj:`"standard"`)
        response_format (Optional[Literal["url", "b64_json"]]):
            The format of the response.(default: :obj:`"b64_json"`)
        background (Optional[Literal["transparent", "opaque", "auto"]]):
            The background of the image.(default: :obj:`"auto"`)
        style (Optional[Literal["vivid", "natural"]]): The style of the
            image.(default: :obj:`None`)
        working_directory (Optional[str]): The path to save the generated
            image.(default: :obj:`"image_save"`)
    """
    super().__init__(timeout=timeout)
    if model not in self.GROK_MODELS + self.OPENAI_MODELS:
        available_models = sorted(self.OPENAI_MODELS + self.GROK_MODELS)
        raise ValueError(
            f"Unsupported model: {model}. "
            f"Supported models are: {available_models}"
        )

    # Set default url for Grok models
    url = "https://api.x.ai/v1" if model in self.GROK_MODELS else url

    api_key, base_url = (
        self.get_openai_credentials(url, api_key)
        if model in self.OPENAI_MODELS
        else self.get_grok_credentials(url, api_key)
    )

    self.client = OpenAI(api_key=api_key, base_url=base_url)
    self.model = model
    self.size = size
    self.quality = quality
    self.response_format = response_format
    self.background = background
    self.style = style
    self.working_directory: str = working_directory or "image_save"

def base64_to_image(self, base64_string: str) -> Optional[Image.Image]:
    r"""Converts a base64 encoded string into a PIL Image object.

    Args:
        base64_string (str): The base64 encoded string of the image.

    Returns:
        Optional[Image.Image]: The PIL Image object or None if conversion
            fails.
    """
    try:
        # decode the base64 string to get the image data
        image_data = base64.b64decode(base64_string)
        # create a memory buffer for the image data
        image_buffer = BytesIO(image_data)
        # open the image with PIL
        image = Image.open(image_buffer)
        return image
    except Exception as e:
        logger.error(
            f"An error occurred while converting base64 to image: {e}"
        )
        return None

def _build_base_params(self, prompt: str, n: Optional[int] = None) -> dict:
    r"""Build base parameters dict for Image Model API calls.

    Args:
        prompt (str): The text prompt for the image operation.
        n (Optional[int]): The number of images to generate.

    Returns:
        dict: Parameters dictionary with non-None values.
    """
    params = {"prompt": prompt, "model": self.model}

    # basic parameters supported by all models
    if n is not None:
        params["n"] = n  # type: ignore[assignment]

    if self.model in self.GROK_MODELS:
        return params

    if self.size is not None:
        params["size"] = self.size

    # Model-specific parameter filtering based on model
    if self.model == "dall-e-2":
        # dall-e-2 supports: prompt, model, n, size, response_format
        if self.response_format is not None:
            params["response_format"] = self.response_format

    elif self.model == "dall-e-3":
        # dall-e-3 supports: prompt, model, n,
        # size, quality, response_format, style
        if self.quality is not None:
            params["quality"] = self.quality
        if self.response_format is not None:
            params["response_format"] = self.response_format
        if self.style is not None:
            params["style"] = self.style

    elif self.model == "gpt-image-1":
        # gpt-image-1 supports: prompt, model, n, size, quality, background
        # Note: gpt-image-1 seems to default to b64_json response format
        if self.quality is not None:
            params["quality"] = self.quality
        if self.background is not None:
            params["background"] = self.background
    return params

def _handle_api_response(
    self,
    response,
    image_name: Union[str, List[str]],
    operation: str,
) -> str:
    r"""Handle API response from image operations.

    Args:
        response: The response object from image model API.
        image_name (Union[str, List[str]]): Name(s) for the saved image
            file(s). If str, the same name is used for all images (will
            cause error for multiple images). If list, must have exactly
            the same length as the number of images generated.
        operation (str): Operation type for success message ("generated").

    Returns:
        str: Success message with image path/URL or error message.
    """
    source = "Grok" if self.model in self.GROK_MODELS else "OpenAI"
    if response.data is None or len(response.data) == 0:
        error_msg = f"No image data returned from {source} API."
        logger.error(error_msg)
        return error_msg

    # Validate image_name parameter
    if isinstance(image_name, list):
        if len(image_name) != len(response.data):
            error_msg = (
                f"Error: Number of image names"
                f" ({len(image_name)}) does not match number of "
                f"images generated({len(response.data)})"
            )
            logger.error(error_msg)
            return error_msg
        image_names = image_name
    else:
        # If string, use same name for all images
        image_names = [image_name] * len(response.data)

    results = []

    for i, image_data in enumerate(response.data):
        # check if response has URL or base64 data
        if hasattr(image_data, 'url') and image_data.url:
            image_url = image_data.url
            results.append(f"Image URL: {image_url}")
        elif hasattr(image_data, 'b64_json') and image_data.b64_json:
            image_b64 = image_data.b64_json

            # Save the image from base64
            image_bytes = base64.b64decode(image_b64)
            os.makedirs(self.working_directory, exist_ok=True)

            filename = f"{image_names[i]}"

            image_path = os.path.join(self.working_directory, filename)

            # Check if file already exists
            if os.path.exists(image_path):
                error_msg = (
                    f"Error: File '{image_path}' already exists. "
                    "Please use a different image_name."
                )
                logger.error(error_msg)
                return error_msg

            try:
                with open(image_path, "wb") as f:
                    f.write(image_bytes)
                results.append(f"Image saved to {image_path}")
            except Exception as e:
                error_msg = f"Error saving image to '{image_path}': {e!s}"
                logger.error(error_msg)
                return error_msg
        else:
            error_msg = (
                f"No valid image data (URL or base64) found in image {i+1}"
            )
            logger.error(error_msg)
            results.append(error_msg)

    if results:
        count = len(response.data)
        if count == 1:
            return f"Image {operation} successfully. {results[0]}"
        else:
            return (
                f"{count} images {operation} successfully:\n"
                + "\n".join(
                    f"  {i+1}. {result}"
                    for i, result in enumerate(results)
                )
            )
    else:
        error_msg = "No valid image data found in any response"
        logger.error(error_msg)
        return error_msg

def generate_image(
    self,
    prompt: str,
    image_name: Union[str, List[str]] = "image.png",
    n: int = 1,
) -> str:
    r"""Generate an image using image models.
    The generated image will be saved locally (for ``b64_json`` response
    formats) or an image URL will be returned (for ``url`` response
    formats).

    Args:
        prompt (str): The text prompt to generate the image.
        image_name (Union[str, List[str]]): The name(s) of the image(s) to
            save. The image name must end with `.png`. If str: same name
            used for all images (causes error if n > 1). If list: must
            match the number of images being generated (n parameter).
            (default: :obj:`"image.png"`)
        n (int): The number of images to generate. (default: :obj:`1`)

    Returns:
        str: the content of the model response or format of the response.
    """
    try:
        params = self._build_base_params(prompt, n)
        response = self.client.images.generate(**params)
        return self._handle_api_response(response, image_name, "generated")
    except Exception as e:
        error_msg = f"An error occurred while generating image: {e}"
        logger.error(error_msg)
        return error_msg

@api_keys_required([("api_key", "XAI_API_KEY")])
def get_grok_credentials(self, url, api_key) -> Tuple[str, str]:  # type: ignore[return-value]
    r"""Get API credentials for the specified Grok model.

    Args:
        url (str): The base URL for the Grok API.
        api_key (str): The API key for the Grok API.

    Returns:
        tuple: (api_key, base_url)
    """

    # Get credentials based on model type
    api_key = api_key or os.getenv("XAI_API_KEY")
    return api_key, url

@api_keys_required([("api_key", "OPENAI_API_KEY")])
def get_openai_credentials(self, url, api_key) -> Tuple[str, str | None]:  # type: ignore[return-value]
    r"""Get API credentials for the specified OpenAI model.

    Args:
        url (str): The base URL for the OpenAI API.
        api_key (str): The API key for the OpenAI API.

    Returns:
        Tuple[str, str | None]: (api_key, base_url)
    """

    api_key = api_key or os.getenv("OPENAI_API_KEY")
    base_url = url or os.getenv("OPENAI_API_BASE_URL")
    return api_key, base_url

def get_tools(self) -> List[FunctionTool]:
    r"""Returns a list of FunctionTool objects representing the functions
        in the toolkit.

    Returns:
        List[FunctionTool]: A list of FunctionTool objects representing the
            functions in the toolkit.
    """
    return [
        FunctionTool(self.generate_image),
    ]

Backward compatibility alias

OpenAIImageToolkit = ImageGenToolkit

Discussion