Topics

import numpy as np
 
class LogisticRegression:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
 
    def _sigmoid(self, z):
        # Clip to avoid overflow/underflow
        z = np.clip(z, -500, 500)
        return 1 / (1 + np.exp(-z))
 
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
 
        for _ in range(self.n_iters):
            # Linear model z = X.w + b
            linear_model = np.dot(X, self.weights) + self.bias
            # Apply sigmoid
            y_predicted_prob = self._sigmoid(linear_model)
 
            # gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted_prob - y))
            db = (1 / n_samples) * np.sum(y_predicted_prob - y)
 
            # Update params
            self.weights -= self.lr * dw
            self.bias -= self.lr * db
 
    def predict_proba(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted_prob = self._sigmoid(linear_model)
        return y_predicted_prob
 
    def predict(self, X, threshold=0.5):
        y_predicted_prob = self.predict_proba(X)
        y_predicted_class = [1 if i > threshold else 0 for i in y_predicted_prob]
        return np.array(y_predicted_class)
 
 
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
 
 
seed = 123
X, y = make_classification(n_samples=1000, n_features=3, n_redundant=0, n_informative=3, random_state=seed,
                           n_clusters_per_class=1)
 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
 
# Standardize features (important for GD)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
 
model = LogisticRegression(learning_rate=0.1, n_iters=100)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
 
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy
 
print(f"LR classification accuracy: {accuracy(y_test, predictions)}")