Exercise: Embedding similarity

Task: Use the OpenAI embeddings API to compute the similarity between two given words or phrases.


Show solution
import numpy as np

def cosine_similarity(vec1: np.array, vec2: np.array) -> float: 
    return np.dot(vec1, vec2) / ( np.linalg.norm(vec1) * np.linalg.norm(vec2) )
import os

from llm_utils.client import get_openai_client, OpenAIModels

MODEL = OpenAIModels.EMBED.value

# get the OpenAI client
client = get_openai_client(

# create the embeddings
word_1 = "king"
word_2 = "queen"

response_1 = client.embeddings.create(input=word_1, model=MODEL)
embedding_1 = response_1.data[0].embedding
response_2 = client.embeddings.create(input=word_2, model=MODEL)
embedding_2 = response_2.data[0].embedding
# calculate the distance 
dist_12 = cosine_similarity(embedding_1, embedding_2)
print(f"Cosine similarity between {word_1} and {word_2}: {round(dist_12, 3)}.")
Cosine similarity between king and queen: 0.915.
word_3 = "pawn"
embedding_3 = client.embeddings.create(input=word_3, model=MODEL).data[0].embedding

dist_13 = cosine_similarity(embedding_1, embedding_3)
print(f"Cosine similarity between {word_1} and {word_3}: {round(dist_13, 3)}.")
Cosine similarity between king and pawn: 0.829.

Task: Use the OpenAI embeddings API and simple embedding arithmetics to introduce more context to word similarities.


Show solution
words = ["python", "snake", "javascript", "reptile"]
response = client.embeddings.create(input=words, model=MODEL)
embeddings = [emb.embedding for emb in response.data]
print(f"Similarity between '{words[0]}' and '{words[1]}': {round(cosine_similarity(embeddings[0], embeddings[1]), 3)}.")
print(f"Similarity between '{words[0]}' and '{words[2]}': {round(cosine_similarity(embeddings[0], embeddings[2]), 3)}.")
print(f"Similarity between '{words[0]} + {words[3]}' and '{words[1]}': {round(cosine_similarity(np.array(embeddings[0]) + np.array(embeddings[3]), embeddings[1]), 3)}.")
Similarity between 'python' and 'snake': 0.841.
Similarity between 'python' and 'javascript': 0.85.
Similarity between 'python + reptile' and 'snake': 0.894.
Back to top