import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.preprocessing import OneHotEncoder
# Example text
texts = ["hello world", "machine learning is fun"]
# Tokenizer and one-hot encoding
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
vocabulary = tokenizer.word_index
one_hot = tokenizer.texts_to_matrix(texts, mode='binary')
# One-hot encoding for characters
# Extract characters from the text
characters = sorted(set("".join(texts).replace(" ", ""))) # Remove
spaces and sort unique characters
encoder = OneHotEncoder(sparse_output=False)
char_one_hot = encoder.fit_transform(np.array(characters).reshape(-1,
1))
# Plot heatmap for one-hot encoding of characters
plt.figure(figsize=(8, 5))
sns.heatmap(char_one_hot, annot=True, fmt=".1f", cmap="Blues",
xticklabels=encoder.categories_[0],
yticklabels=characters)
plt.title("One-Hot Encoding for Characters")
plt.xlabel("Character")
plt.ylabel("Position")
plt.show()