# Loading the dependencies for the model
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
import math


# Loading the iris data into X, y. Each dataset will have its own way of doing this.
iris = load_iris()
X = iris.data
y = iris.target
y_labels = iris.target_names
X_labels = iris.feature_names


#4 features, 150 samples, 3 target values to predict
print("Column names - ",X_labels)
print("Shape of X - ",X.shape)
print("Target values - ",y_labels)

Column names -  ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Shape of X -  (150, 4)
Target values -  ['setosa' 'versicolor' 'virginica']


#Separating test and train data for model evaluation
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2)


#Instantiate the classifier and fit it to training data
nb = GaussianNB()
nb.fit(X_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)


#Evaluation of the model using test data
nb.score(X_test, y_test)

0.9333333333333333


# Model's prediction when sepal_len = 5.8, sepal_wid = 2.2, petal_len = 4.2, petal_wid = 0.4
nb.predict([[5.8,2.2,4.2,0.4]])

array([1])


#Putting all the data into a dataframe
df = pd.DataFrame(X, columns=X_labels)
df['class'] = y
df.head()


#Calculating class priors (simply the probability of class labels)
priors = np.unique(y, return_counts=True)[1] / len(y)
priors

array([0.33333333, 0.33333333, 0.33333333])


#Class wise mean for each feature
classwise_means = df.groupby(['class']).mean().reset_index()
classwise_means


#Class wise mean for each feature
classwise_std = df.groupby(['class']).std().reset_index()
classwise_std


# Gaussian probability function vectorized to work on multidimensional arrays with broadcasting
def pdf(x, mean, sd):
    return (1 / np.sqrt(2*np.pi*sd**2)) * np.exp(-1*((x-mean)**2/(2*sd**2)))

vectorized_pdf = np.vectorize(pdf)


#Converting everything to numpy arrays
mean_array = np.array(classwise_means.iloc[:,1:])
std_array = np.array(classwise_std.iloc[:,1:])

#Sample is the feature vector, whose label needs to be predicted
sample = [5.1, 3.5, 1.4, 0.2]


#Element wise application of the pdf(x,mean,std) function on broadcasted arrays
elementwise_pdf = vectorized_pdf(sample, mean_array, std_array)
elementwise_pdf

array([[1.09224777e+00, 1.03362481e+00, 2.15537744e+00, 3.44157106e+00],
       [2.08211139e-01, 8.49365434e-02, 7.67726912e-09, 1.83893298e-07],
       [4.05935486e-02, 3.27130259e-01, 3.70613589e-13, 3.66254229e-10]])


#Rowwise product of the above matrix
rowwise_product = np.product(elementwise_pdf,axis=1)
rowwise_product

array([8.37460175e+00, 2.49672786e-17, 1.80252677e-24])


#Elementwise multiplication with corresponding class probabilities (priors)
multiply_priors = np.multiply(priors,rowwise_product)
multiply_priors

array([2.79153392e+00, 8.32242620e-18, 6.00842257e-25])


#Argmax to predict class label
prediction = np.argmax(multiply_priors)
prediction

0


from sklearn.feature_extraction.text import CountVectorizer


#Defining the training data
documents = ['quite happy with it', 
             'bad device', 
             'great job with the features', 
             'bad experience',
             'horrible device',
             'very happy with the product']

classes = ['positive','negative','positive','negative','negative','positive']


#Fitting the count vectorizer to get word representation for each sentence
cnt = CountVectorizer()
cnt_matrix = cnt.fit_transform(documents).todense()
cnt_matrix.shape

(6, 14)


#Creating a dataframe to calculate summary statistics later
training_data = pd.DataFrame(cnt_matrix,
                                index = documents, 
                                columns=cnt.get_feature_names())
training_data['Class'] = classes
training_data


#Calculating label probability (priors)
label_proba = np.unique(classes, return_counts=True)[1]/len(classes)
label_proba

array([0.5, 0.5])


#Calculating word frequency for each word in training data (label-wise)

## 0 = negative, 1 = positive
Nc = training_data.groupby(['Class']).sum().reset_index('Class').drop('Class',axis=1)
Nc


# Calculating N which is the total number of word occurances associated with each label
N = np.sum(np.array(Nc),axis=1)
N

array([ 6, 14], dtype=int64)


#Calculating length of vocabulary
d = len(cnt.get_feature_names())
d

14


#Setting value of the laplace smoothing factor alpha
a = 1


#Getting theta for a single input word
def get_thetas(word):
    try:
        return (np.array(Nc[word]) + a) / (N+d)
    except:
        return (np.array([0]*len(Nc)) + a) / (N+d)
    
get_thetas('happy')

array([0.05      , 0.10714286])


#The sentence for which label needs to be predicted (word tokenized from start)
sample = ['happy', 'with', 'the', 'product']


# Calculating product of the probabilities for each word (label-wise)
probability_product = np.product(np.array([list(get_thetas(i)) for i in sample]), axis=0)
probability_product

array([6.25000000e-06, 1.17138692e-04])


#Prediction of label using argmax after multiplying the thetas with label probabilities (priors)
np.argmax(np.multiply(label_proba, probability_product))

1

	class	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	0	5.006	3.428	1.462	0.246
1	1	5.936	2.770	4.260	1.326
2	2	6.588	2.974	5.552	2.026

	class	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	0	0.352490	0.379064	0.173664	0.105386
1	1	0.516171	0.313798	0.469911	0.197753
2	2	0.635880	0.322497	0.551895	0.274650

Understanding the Naïve Bayes Algorithm¶

Table of contents:¶

1. Introduction¶

1.1 Overview¶

1.2 The Bayes Theorem¶

1.3 The Naive Assumption¶

1.4 Decision Boundaries¶

1.5 Different Classifiers¶

2. Implementing Naive Bayes with Sklearn¶

3. Behind the scenes¶

3.1 Gaussian Naive Bayes¶

3.2 Multinomial classifier¶

4. References¶

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	bad	device	experience	features	great	happy	horrible	it	job	product	quite	the	very	with	Class
quite happy with it	0	0	0	0	0	1	0	1	0	0	1	0	0	1	positive
bad device	1	1	0	0	0	0	0	0	0	0	0	0	0	0	negative
great job with the features	0	0	0	1	1	0	0	0	1	0	0	1	0	1	positive
bad experience	1	0	1	0	0	0	0	0	0	0	0	0	0	0	negative
horrible device	0	1	0	0	0	0	1	0	0	0	0	0	0	0	negative
very happy with the product	0	0	0	0	0	1	0	0	0	1	0	1	1	1	positive

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2