In this notebook, I analyzed whether tweets classified as hate speech tend to have negative sentiments. First, I trained each dataset using the BERT pre-trained model. Then, I did cross-inferencing to analyze whether hate speech tends to have negative sentiments.
!pip install transformers -q
import random
import pandas as pd
import numpy as np
import re
import pickle
from itertools import product
import matplotlib.pyplot as plt
from string import punctuation
np.random.seed(seed=42)
sentiment_dataset = pd.read_csv("sentiment.csv", sep=';', encoding="utf-8", engine="python")
hate_speech_dataset = pd.read_csv("hatespeech.txt", sep='\t', engine="python")
sentiment_dataset
hate_speech_dataset
Before we feed all data into the model, we check characteristics of each row first
# Print random tweet given how much we wanna print
def print_tweet(tweets, num_of_print):
for i in range(num_of_print):
index = random.randint(0, len(tweets) - 1)
print(f"---{index}: ")
print(tweets[index])
print("Dataset from Hate Speech:")
print_tweet(hate_speech_dataset["Tweet"], 5)
print("\n\n")
print("Dataset from Sentiment:")
print_tweet(sentiment_dataset["Tweets"], 5)
From tweets above, we can see some characteristics of the text. These characteristics are:
To make the model better, I removed these characteristics from the text. I also removed some punctuations, except exclamation mark, comma, dash, dot, and question mark because I might influence the BERT computation. Also, I removed multiple symbols that always we see in every text. Moreover, I remove new line and multiple spaces that appear after removing some entities.
# Preprocessing Functions
replace_new_line = lambda tweet: re.sub(r"\n", " ", tweet)
remove_multiple_spaces = lambda tweet: re.sub(r"\s\s+", " ", tweet)
remove_entities = lambda tweet: re.sub(r"[@#](\S+):?", "", tweet)
remove_RT_word = lambda tweet: re.sub(r"\bRT\b|\brt\b", "", tweet)
remove_URL = lambda tweet: re.sub(r"https?\S+", "", tweet)
remove_multiple_symbols = lambda symbol, tweet: re.sub(f"[{symbol}][{symbol}]+", f"{symbol}", tweet)
remove_punctuation = lambda tweet: re.sub(r"[$%&'()*+/:;<=>[\]^_`{|}~]", "", tweet)
remove_non_unicode = lambda tweet: re.sub(r'[^\x00-\x7F]+', '', tweet)
remove_undetected_unicode = lambda tweet: re.sub(r'\\\S+', '', tweet)
lowercase_tweet = lambda tweet: str.lower(tweet)
def cleaning_tweet(tweet):
final_tweet = remove_entities(tweet)
final_tweet = remove_RT_word(final_tweet)
final_tweet = remove_URL(final_tweet)
final_tweet = lowercase_tweet(final_tweet)
final_tweet = remove_punctuation(final_tweet)
final_tweet = remove_non_unicode(final_tweet)
final_tweet = remove_undetected_unicode(final_tweet)
for symbol in ['!', ',', '-', '.', '?']:
final_tweet = remove_multiple_symbols(symbol, final_tweet)
final_tweet = replace_new_line(final_tweet)
final_tweet = remove_multiple_spaces(final_tweet)
final_tweet = str.strip(final_tweet)
return final_tweet
def before_after_tweet(tweets, num_of_print):
for i in range(num_of_print):
index = random.randint(0, len(tweets) - 1)
print("*" * 10)
print(f"---{index}: ")
print(f"Before: {tweets[index]}")
print(f"After: {cleaning_tweet(tweets[index])}")
print("Dataset from Hate Speech:")
before_after_tweet(hate_speech_dataset["Tweet"], 5)
print("\n\n")
print("Dataset from Sentiment:")
before_after_tweet(sentiment_dataset["Tweets"], 5)
From printing above, we can see that the text looks much better than before being preprocessed. Even though there are some misleading like dot between two characters "word.word" or misleading punctuations "anak muda alay.,", overall all text has neat representation.
hate_speech_dataset["preprocessed_tweet"] = hate_speech_dataset["Tweet"].apply(cleaning_tweet)
sentiment_dataset["preprocessed_tweet"] = sentiment_dataset["Tweets"].apply(cleaning_tweet)
Classification steps referenced from https://www.tensorflow.org/tutorials/text/classify_text_with_bert
These are ordered list of models for hate speech dataset:
pooler_output
that will be fed into next layer.These are ordered list of models for sentiment dataset:
from keras.layers import Input, Dropout, Dense
from keras import Sequential
from transformers import BertTokenizer, TFBertModel
import tensorflow as tf
def build_bert_model():
model_name='cahya/bert-base-indonesian-522M'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = TFBertModel.from_pretrained(model_name)
return model, tokenizer
def get_pooled_output_from_text(text, model, tokenizer):
encoded_input = tokenizer(text, return_tensors='tf')
output = model(encoded_input)
pooled_output = output['pooler_output']
return tf.reshape(pooled_output, [-1])
def build_model():
model = Sequential()
model.add(Dense(128, activation="relu", input_shape=(768, )))
model.add(Dropout(0.4))
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(1, activation="sigmoid"))
return model
def build_model_2():
model = Sequential()
model.add(Dense(64, input_shape=(768, )))
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(1, activation="sigmoid"))
return model
bert_model, bert_tokenizer = build_bert_model()
%%time
pooling_output_hate_speech = [
get_pooled_output_from_text(tweet, bert_model, bert_tokenizer) for tweet in hate_speech_dataset["preprocessed_tweet"].to_list()
]
pooling_output_hate_speech = np.array(pooling_output_hate_speech)
# Save to pickle since we took a time to get pooling output
pickle.dump(pooling_output_hate_speech, open("hate_speech_pooling_out", "wb"))
hate_speech_label_list = hate_speech_dataset["Label"].to_list()
hate_speech_label = np.array([1 if label == "HS" else 0 for label in hate_speech_label_list])
model_for_hate_speech = build_model()
hs_indexes = np.arange(len(pooling_output_hate_speech))
np.random.shuffle(hs_indexes)
TEST_LEN = int(0.2 * len(pooling_output_hate_speech))
hs_train_idx = hs_indexes[:-TEST_LEN]
hs_test_idx = hs_indexes[-TEST_LEN:]
hate_speech_X_train, hate_speech_y_train = pooling_output_hate_speech[hs_train_idx], hate_speech_label[hs_train_idx]
hate_speech_X_test, hate_speech_y_test = pooling_output_hate_speech[hs_test_idx], hate_speech_label[hs_test_idx]
model_for_hate_speech.compile(optimizer="adam", loss=tf.keras.losses.BinaryCrossentropy(), metrics=["accuracy"])
hs_result = model_for_hate_speech.fit(hate_speech_X_train, hate_speech_y_train, epochs=50, validation_data=(hate_speech_X_test, hate_speech_y_test))
plt.figure(figsize=(12, 8))
plt.plot(hs_result.epoch, hs_result.history['accuracy'], label="Train")
plt.plot(hs_result.epoch, hs_result.history['val_accuracy'], label="Validation")
plt.title("Hate Speech: Accuracy")
plt.legend()
plt.show()
For hate speech model, we can see the model did a good job because accuracy graph between train and validation are so close
%%time
pooling_output_sentiment = [
get_pooled_output_from_text(tweet, bert_model, bert_tokenizer) for tweet in sentiment_dataset["preprocessed_tweet"].to_list()
]
pooling_output_sentiment = np.array(pooling_output_sentiment)
# Save to pickle since we took a time to get pooling output
pickle.dump(pooling_output_sentiment, open("sentiment_pooling_out", "wb"))
model_for_sentiment = build_model_2()
sentiment_list = sentiment_dataset["Label"].to_list()
sentiment_label = np.array([1 if label == "positif" else 0 for label in sentiment_list])
s_indexes = np.arange(len(pooling_output_sentiment))
np.random.shuffle(s_indexes)
TEST_LEN = int(0.2 * len(pooling_output_sentiment))
s_train_idx = s_indexes[:-TEST_LEN]
s_test_idx = s_indexes[-TEST_LEN:]
sentiment_X_train, sentiment_y_train = pooling_output_sentiment[s_train_idx], sentiment_label[s_train_idx]
sentiment_X_test, sentiment_y_test = pooling_output_sentiment[s_test_idx], sentiment_label[s_test_idx]
model_for_sentiment.compile(optimizer="adam", loss=tf.keras.losses.BinaryCrossentropy(), metrics=["accuracy"])
s_result = model_for_sentiment.fit(sentiment_X_train, sentiment_y_train, epochs=50, validation_data=(sentiment_X_test, sentiment_y_test))
plt.figure(figsize=(12, 8))
plt.plot(s_result.epoch, s_result.history['accuracy'], label="Train")
plt.plot(s_result.epoch, s_result.history['val_accuracy'], label="Validation")
plt.title("Sentiment: Accuracy")
plt.legend()
plt.show()
The sentiment model is nearly the same good as the hate speech model. Overall accuracy for train is around 85% - 90% while overall of the validation data is nearly 80%.
For cross-inferencing, I trained the model using test dataset that I was divided. For hate speech model, I trained it using test dataset from sentiment and vice-versa. The result is depicted using "confusion matrix" alike.
# For labelling
label_hate_speech = ["not_hs", "hs"]
label_sentiment = ["negative", "positive"]
# Cross-inferecing for sentiment dataset (Test sentiment dataset to hate speech model)
ci_sentiment = (model_for_hate_speech.predict(sentiment_X_train) > 0.5).astype("int32")
ci_sentiment = ci_sentiment.reshape(-1)
# Cross-inferecing for hate speech dataset (Test hate speech dataset to sentiment model)
ci_hate_speech = (model_for_sentiment.predict(hate_speech_X_train) > 0.5).astype("int32")
ci_hate_speech = ci_hate_speech.reshape(-1)
comparison_matrix = np.zeros((2, 2), dtype="int32")
# Note for comparison_matrix:
# (0, 0) -> not hate speech and negative sentiment
# (1, 0) -> hate speech and negative sentiment
# (0, 1) -> not hate speech and positive sentiment
# (1, 1) -> hate speech and positive sentiment
# Calculate from Hate Speech dataset and its cross-inferencing result
for hs, s in zip(hate_speech_y_train, ci_hate_speech):
comparison_matrix[hs, s] += 1
# Calculate from Sentiment dataset and its cross-inferencing result
for hs, s in zip(ci_sentiment, sentiment_y_train):
comparison_matrix[hs, s] += 1
plt.figure(figsize=(8,8))
plt.imshow(comparison_matrix, interpolation="nearest", cmap=plt.cm.Greens)
plt.title("Comparison Matrix")
plt.colorbar()
tick_marks = np.arange(2)
plt.xticks(tick_marks, label_sentiment, rotation=45)
plt.yticks(tick_marks, label_hate_speech)
threshold = comparison_matrix.max() / 2
for i, j in product(range(comparison_matrix.shape[0]), range(comparison_matrix.shape[1])):
plt.text(j, i, format(comparison_matrix[i, j], 'd'),
horizontalalignment="center", color="white" if comparison_matrix[i, j] > threshold else "black")
plt.tight_layout()
plt.ylabel("Hate Speech")
plt.xlabel("Sentiment")
plt.show()
Step-by-step for create visualization above is from https://towardsdatascience.com/a-simple-cnn-multi-image-classifier-31c463324fa.
Plotting above showed that hate speech does not tends to be negative sentiment. As we can see, "hate speech and negative statement" gives the value of 333, not greater than "not hate speech but negative statement", i.e. 490. However, we can see that not hate speech tweet tends to be positive sentiment. Also, we can conclude that these tweets overall are not hate speech because the amount of not hate speech is much bigger than hate speech.