[go: up one dir, main page]

0% found this document useful (0 votes)
8 views12 pages

DM Practical06

The document contains Python code for implementing a Random Forest classifier using decision trees. It includes classes for DecisionTree and RandomForest, with methods for fitting the model, making predictions, and calculating information gain and entropy. The example usage demonstrates training the Random Forest on a sample dataset and making predictions.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views12 pages

DM Practical06

The document contains Python code for implementing a Random Forest classifier using decision trees. It includes classes for DecisionTree and RandomForest, with methods for fitting the model, making predictions, and calculating information gain and entropy. The example usage demonstrates training the Random Forest on a sample dataset and making predictions.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 12

Assignment No:-06

Assignment Name:- Classification using


Random Forest Roll No:-05
import numpy as np
from collections import Counter

class
DecisionTree:
def fit(self, X, y):
self.tree = self._grow_tree(X, y)

def _grow_tree(self, X, y):


if len(set(y)) == 1:
return y[0]

n_samples, n_features = X.shape


best_feature, best_threshold = self._best_split(X, y, n_features)

if best_feature is None:
return Counter(y).most_common(1)[0][0]

left_indices = X[:, best_feature] < best_threshold


right_indices = X[:, best_feature] >= best_threshold

left_child = self._grow_tree(X[left_indices], y[left_indices])


right_child = self._grow_tree(X[right_indices], y[right_indices])
return (best_feature, best_threshold, left_child, right_child)

def _best_split(self, X, y, n_features):


best_gain = -1
best_feature, best_threshold = None, None

for feature in range(n_features):


thresholds = np.unique(X[:,
feature]) for threshold in
thresholds:
left_indices = y[X[:, feature] < threshold]
right_indices = y[X[:, feature] >= threshold]
gain = self._information_gain(y, left_indices, right_indices)

if gain > best_gain:


best_gain = gain
best_feature = feature
best_threshold = threshold

return best_feature, best_threshold

def _information_gain(self, y, left_y, right_y):


p = len(left_y) / len(y)
return self._entropy(y) - (p * self._entropy(left_y) + (1 - p) * self._entropy(right_y))

def _entropy(self, y):


probabilities = np.bincount(y) / len(y)
return -np.sum(probabilities * np.log2(probabilities + 1e-6))

def predict(self, X):


return np.array([self._predict(sample) for sample in X])

def _predict(self, sample):


node = self.tree
while isinstance(node, tuple):
feature, threshold, left_child, right_child =
node if sample[feature] < threshold:
node = left_child
else:
node =
right_child return
node

class RandomForest:
def init (self, n_trees=5):
self.n_trees =
n_trees self.trees =
[]

def fit(self, X, y):


for _ in range(self.n_trees):
indices = np.random.choice(len(y), len(y), replace=True)
X_sample = X[indices]
y_sample =
y[indices] tree =
DecisionTree()
tree.fit(X_sample, y_sample)
self.trees.append(tree)

def predict(self, X):


tree_predictions = np.array([tree.predict(X) for tree in self.trees])
return [Counter(tree_preds).most_common(1)[0][0] for tree_preds in
tree_predictions.T]

# Example usage
if name == " main ":
# Sample dataset
X = np.array([[1, 2], [1, 4], [1, 0],
[2, 2], [2, 4], [2, 0]])
y = np.array([0, 0, 0, 1, 1, 1]) # Binary labels
# Create and train the Random
Forest rf =
RandomForest(n_trees=5)
rf.fit(X, y)

# Make predictions
predictions = rf.predict(X)
print("Predictions:", predictions)

….OUTPUT….
Assignment No:-06
Assignment Name:- Classification using
Random Forest Roll No:-06
import numpy as np
from collections import Counter

class
DecisionTree:
def fit(self, X, y):
self.tree = self._grow_tree(X, y)

def _grow_tree(self, X, y):


if len(set(y)) == 1:
return y[0]

n_samples, n_features = X.shape


best_feature, best_threshold = self._best_split(X, y, n_features)

if best_feature is None:
return Counter(y).most_common(1)[0][0]

left_indices = X[:, best_feature] < best_threshold


right_indices = X[:, best_feature] >= best_threshold

left_child = self._grow_tree(X[left_indices], y[left_indices])


right_child = self._grow_tree(X[right_indices], y[right_indices])
return (best_feature, best_threshold, left_child, right_child)

def _best_split(self, X, y, n_features):


best_gain = -1
best_feature, best_threshold = None, None

for feature in range(n_features):


thresholds = np.unique(X[:,
feature]) for threshold in
thresholds:
left_indices = y[X[:, feature] < threshold]
right_indices = y[X[:, feature] >= threshold]
gain = self._information_gain(y, left_indices, right_indices)

if gain > best_gain:


best_gain = gain
best_feature = feature
best_threshold = threshold

return best_feature, best_threshold

def _information_gain(self, y, left_y, right_y):


p = len(left_y) / len(y)
return self._entropy(y) - (p * self._entropy(left_y) + (1 - p) * self._entropy(right_y))

def _entropy(self, y):


probabilities = np.bincount(y) / len(y)
return -np.sum(probabilities * np.log2(probabilities + 1e-6))

def predict(self, X):


return np.array([self._predict(sample) for sample in X])

def _predict(self, sample):


node = self.tree
while isinstance(node, tuple):
feature, threshold, left_child, right_child =
node if sample[feature] < threshold:
node = left_child
else:
node =
right_child return
node

class RandomForest:
def init (self, n_trees=5):
self.n_trees =
n_trees self.trees =
[]

def fit(self, X, y):


for _ in range(self.n_trees):
indices = np.random.choice(len(y), len(y), replace=True)
X_sample = X[indices]
y_sample =
y[indices] tree =
DecisionTree()
tree.fit(X_sample, y_sample)
self.trees.append(tree)

def predict(self, X):


tree_predictions = np.array([tree.predict(X) for tree in self.trees])
return [Counter(tree_preds).most_common(1)[0][0] for tree_preds in
tree_predictions.T]

# Example usage
if name == " main ":
# Sample dataset
X = np.array([[1, 2], [1, 4], [1, 0],
[2, 2], [2, 4], [2, 0]])
y = np.array([0, 0, 0, 1, 1, 1]) # Binary labels
# Create and train the Random
Forest rf =
RandomForest(n_trees=5)
rf.fit(X, y)

# Make predictions
predictions = rf.predict(X)
print("Predictions:", predictions)

….OUTPUT….
Assignment No:-06
Assignment Name:- Classification using
Random Forest Roll No:-07
import numpy as np
from collections import Counter

class
DecisionTree:
def fit(self, X, y):
self.tree = self._grow_tree(X, y)

def _grow_tree(self, X, y):


if len(set(y)) == 1:
return y[0]

n_samples, n_features = X.shape


best_feature, best_threshold = self._best_split(X, y, n_features)

if best_feature is None:
return Counter(y).most_common(1)[0][0]

left_indices = X[:, best_feature] < best_threshold


right_indices = X[:, best_feature] >= best_threshold

left_child = self._grow_tree(X[left_indices], y[left_indices])


right_child = self._grow_tree(X[right_indices], y[right_indices])
return (best_feature, best_threshold, left_child, right_child)

def _best_split(self, X, y, n_features):


best_gain = -1
best_feature, best_threshold = None, None

for feature in range(n_features):


thresholds = np.unique(X[:,
feature]) for threshold in
thresholds:
left_indices = y[X[:, feature] < threshold]
right_indices = y[X[:, feature] >= threshold]
gain = self._information_gain(y, left_indices, right_indices)

if gain > best_gain:


best_gain = gain
best_feature = feature
best_threshold = threshold

return best_feature, best_threshold

def _information_gain(self, y, left_y, right_y):


p = len(left_y) / len(y)
return self._entropy(y) - (p * self._entropy(left_y) + (1 - p) * self._entropy(right_y))

def _entropy(self, y):


probabilities = np.bincount(y) / len(y)
return -np.sum(probabilities * np.log2(probabilities + 1e-6))

def predict(self, X):


return np.array([self._predict(sample) for sample in X])

def _predict(self, sample):


node = self.tree
while isinstance(node, tuple):
feature, threshold, left_child, right_child =
node if sample[feature] < threshold:
node = left_child
else:
node =
right_child return
node

class RandomForest:
def init (self, n_trees=5):
self.n_trees =
n_trees self.trees =
[]

def fit(self, X, y):


for _ in range(self.n_trees):
indices = np.random.choice(len(y), len(y), replace=True)
X_sample = X[indices]
y_sample =
y[indices] tree =
DecisionTree()
tree.fit(X_sample, y_sample)
self.trees.append(tree)

def predict(self, X):


tree_predictions = np.array([tree.predict(X) for tree in self.trees])
return [Counter(tree_preds).most_common(1)[0][0] for tree_preds in
tree_predictions.T]

# Example usage
if name == " main ":
# Sample dataset
X = np.array([[1, 2], [1, 4], [1, 0],
[2, 2], [2, 4], [2, 0]])
y = np.array([0, 0, 0, 1, 1, 1]) # Binary labels
# Create and train the Random
Forest rf =
RandomForest(n_trees=5)
rf.fit(X, y)

# Make predictions
predictions = rf.predict(X)
print("Predictions:", predictions)

….OUTPUT….
Assignment No:-06
Assignment Name:- Classification using
Random Forest Roll No:-08
import numpy as np
from collections import Counter

class
DecisionTree:
def fit(self, X, y):
self.tree = self._grow_tree(X, y)

def _grow_tree(self, X, y):


if len(set(y)) == 1:
return y[0]

n_samples, n_features = X.shape


best_feature, best_threshold = self._best_split(X, y, n_features)

if best_feature is None:
return Counter(y).most_common(1)[0][0]

left_indices = X[:, best_feature] < best_threshold


right_indices = X[:, best_feature] >= best_threshold

left_child = self._grow_tree(X[left_indices], y[left_indices])


right_child = self._grow_tree(X[right_indices], y[right_indices])
return (best_feature, best_threshold, left_child, right_child)

def _best_split(self, X, y, n_features):


best_gain = -1
best_feature, best_threshold = None, None

for feature in range(n_features):


thresholds = np.unique(X[:,
feature]) for threshold in
thresholds:
left_indices = y[X[:, feature] < threshold]
right_indices = y[X[:, feature] >= threshold]
gain = self._information_gain(y, left_indices, right_indices)

if gain > best_gain:


best_gain = gain
best_feature = feature
best_threshold = threshold

return best_feature, best_threshold

def _information_gain(self, y, left_y, right_y):


p = len(left_y) / len(y)
return self._entropy(y) - (p * self._entropy(left_y) + (1 - p) * self._entropy(right_y))

def _entropy(self, y):


probabilities = np.bincount(y) / len(y)
return -np.sum(probabilities * np.log2(probabilities + 1e-6))

def predict(self, X):


return np.array([self._predict(sample) for sample in X])

def _predict(self, sample):


node = self.tree
while isinstance(node, tuple):
feature, threshold, left_child, right_child =
node if sample[feature] < threshold:
node = left_child
else:
node =
right_child return
node

class RandomForest:
def init (self, n_trees=5):
self.n_trees =
n_trees self.trees =
[]

def fit(self, X, y):


for _ in range(self.n_trees):
indices = np.random.choice(len(y), len(y), replace=True)
X_sample = X[indices]
y_sample =
y[indices] tree =
DecisionTree()
tree.fit(X_sample, y_sample)
self.trees.append(tree)

def predict(self, X):


tree_predictions = np.array([tree.predict(X) for tree in self.trees])
return [Counter(tree_preds).most_common(1)[0][0] for tree_preds in
tree_predictions.T]

# Example usage
if name == " main ":
# Sample dataset
X = np.array([[1, 2], [1, 4], [1, 0],
[2, 2], [2, 4], [2, 0]])
y = np.array([0, 0, 0, 1, 1, 1]) # Binary labels
# Create and train the Random
Forest rf =
RandomForest(n_trees=5)
rf.fit(X, y)

# Make predictions
predictions = rf.predict(X)
print("Predictions:", predictions)

….OUTPUT….

You might also like