Exercise: NER with tool calling

Task: Create a small script that uses tool (or function calling) to extract the following named entities from a given text: City, State, Person.

Instructions:

Define an OpenAI tool with a function named_entity_recognition.
Choose an appropriate output format, for example: {"named_entities": [{"entity": "Mike", "label": "Person}, {"entity": "Münster", "label": "City"}]}
Define a matching prompt in the role system and the text input for the role user.
Extract the result.

# prerequisites
import os
from llm_utils.client import get_openai_client

MODEL = "gpt-4o"

client = get_openai_client(
    model=MODEL,
    config_path=os.environ.get("CONFIG_PATH")
)

# here goes your code

Show solution

tools = [
    {
        "type": "function",
        "function": {
            "name": "named_entity_recognition",
            "description": "Extract the named entities from the given text.",
            "parameters": {
                "type": "object",
                "properties": {
                    "named_entities": {
                        "type": "array",
                        "description": "A list of all extracted named entities in form of dictionaries containing the entity name and the label",
                        "items": {
                            "type": "object",
                            "properties": {
                                "entity": {"type": "string"}, 
                                "label": {"type": "string"}
                            },
                            "required": ["entity", "label"]
                        }
                    },
                },
                "required": ["named_entities"],
            },
        }
    }
]

# define the prompts
messages = []
messages.append({"role": "system", "content": "Extract all named entities from the provided text. Possible labels are 'City', 'State' or 'Person'. If no named entities are contained in the text, do not make assumptions and return nothing."})
messages.append({"role": "user", "content": "Leonard Hoffstaedter lives in Pasadena, CA."})

response = client.chat.completions.create(
    model=MODEL,
    messages=messages,
    tools=tools,
    tool_choice={"type": "function", "function": {"name": "named_entity_recognition"}}
)
response

ChatCompletion(id='chatcmpl-99ALw7LjaBzZ63s5CMt9wDGn3aWhM', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_1aw75NLIUiEpdYMztdXRDZEh', function=Function(arguments='{\n"named_entities": [\n  {\n    "entity": "Leonard Hoffstaedter",\n    "label": "Person"\n  },\n  {\n    "entity": "Pasadena",\n    "label": "City"\n  },\n  {\n    "entity": "CA",\n    "label": "State"\n  }\n]\n}', name='named_entity_recognition'), type='function')]), content_filter_results={})], created=1711971776, model='gpt-4', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=68, prompt_tokens=142, total_tokens=210), prompt_filter_results=[{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}])

# retrieve the result
import json 

result = json.loads(response.choices[0].message.tool_calls[0].function.arguments)
for named_entity in result["named_entities"]: 
    print(f"{named_entity['entity']}: {named_entity['label']}")

Leonard Hoffstaedter: Person
Pasadena: City
CA: State