In [2]:
# %matplotlib inline # décommente si nécessaire : Colab le fait déjà
import numpy as np
import matplotlib.pyplot as plt
In [4]:
def generate_demo_data(n_per_class: int = 100, seed: int = 42):
rng = np.random.default_rng(seed)
blue = rng.normal(loc=[-2, -2], scale=1.0, size=(n_per_class, 2))
red = rng.normal(loc=[ 2, 2], scale=1.0, size=(n_per_class, 2))
X = np.vstack((blue, red))
y = np.hstack((np.zeros(n_per_class), np.ones(n_per_class))) # bleu→0, rouge→1
return X, y
# --- appelle la fonction ---
X, y = generate_demo_data()
print(f"{X.shape[0]} points chargés ; X.shape = {X.shape}, y.shape = {y.shape}")
200 points chargés ; X.shape = (200, 2), y.shape = (200,)
In [6]:
def train_test_split(X, y, test_ratio=0.2, seed=42):
rng = np.random.default_rng(seed)
idx = rng.permutation(len(X))
test_size = int(len(X) * test_ratio)
test_idx, train_idx = idx[:test_size], idx[test_size:]
return X[train_idx], X[test_idx], y[train_idx], y[test_idx]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_ratio=0.2)
print(f"Train : {len(X_train)} – Test : {len(X_test)}")
Train : 160 – Test : 40
In [8]:
# Visualisation rapide du jeu d'entraînement
plt.figure(figsize=(5, 5))
plt.scatter(
X_train[:, 0], X_train[:, 1],
c=np.where(y_train == 0, "blue", "red"),
edgecolor="k", alpha=0.7
)
plt.title("Nuage d'entraînement – aperçu rapide")
plt.axis("equal")
plt.show()
In [9]:
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def predict_proba(X, w, b):
return sigmoid(X @ w + b)
def compute_loss(y_true, y_pred):
eps = 1e-15
y_pred = np.clip(y_pred, eps, 1 - eps)
return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
def compute_gradients(X, y_true, y_pred):
m = len(y_true)
error = y_pred - y_true
dw = (1/m) * X.T @ error
db = (1/m) * np.sum(error)
return dw, db
In [10]:
def train_logreg(X, y, lr=0.1, n_iter=1000):
w = np.zeros(X.shape[1])
b = 0.0
losses = []
for i in range(n_iter):
y_hat = predict_proba(X, w, b)
loss = compute_loss(y, y_hat)
losses.append(loss)
dw, db = compute_gradients(X, y, y_hat)
w -= lr * dw
b -= lr * db
if (i + 1) % 100 == 0:
print(f"it {i+1:4d}/{n_iter}, loss = {loss:.4f}")
return w, b, np.array(losses)
w_opt, b_opt, history = train_logreg(X_train, y_train, lr=0.1, n_iter=1000)
it 100/1000, loss = 0.0337 it 200/1000, loss = 0.0241 it 300/1000, loss = 0.0207 it 400/1000, loss = 0.0190 it 500/1000, loss = 0.0179 it 600/1000, loss = 0.0172 it 700/1000, loss = 0.0166 it 800/1000, loss = 0.0162 it 900/1000, loss = 0.0159 it 1000/1000, loss = 0.0156
In [11]:
# prédictions test
proba_test = predict_proba(X_test, w_opt, b_opt)
y_pred_test = (proba_test >= 0.5).astype(int)
accuracy = np.mean(y_pred_test == y_test)
print(f"Accuracy test = {accuracy*100:.1f}%")
# --- figures ---
plt.figure(figsize=(12, 4))
# 1) courbe de perte
plt.subplot(1, 3, 1)
plt.plot(history)
plt.xlabel("Itération"); plt.ylabel("Perte"); plt.title("Courbe de perte")
plt.grid(True, linestyle="--", linewidth=0.5)
# 2) train (labels réels)
plt.subplot(1, 3, 2)
plt.scatter(X_train[:, 0], X_train[:, 1],
c=np.where(y_train == 0, "blue", "red"),
edgecolor="k", alpha=0.7)
plt.title("Ensemble d'entraînement (réel)")
plt.axis("equal")
# 3) test (labels prédits + erreurs)
plt.subplot(1, 3, 3)
colors_pred = np.where(y_pred_test == 0, "blue", "red")
plt.scatter(X_test[:, 0], X_test[:, 1],
c=colors_pred, edgecolor="k", alpha=0.9, label="test (prédit)")
mis = y_pred_test != y_test
if np.any(mis):
plt.scatter(X_test[mis, 0], X_test[mis, 1],
marker="x", s=80, c="k", linewidths=2, label="erreur")
plt.title("Ensemble de test (prédiction)")
plt.legend(frameon=False)
plt.axis("equal")
plt.tight_layout()
plt.show()
Accuracy test = 100.0%
In [12]:
def classify(points):
"""
points : array-like shape (n_samples, 2)
return : ndarray dtype='<U5' ("bleu" ou "rouge")
"""
proba = predict_proba(np.asarray(points), w_opt, b_opt)
return np.where(proba >= 0.5, "rouge", "bleu")
# Exemple rapide
sample_pts = np.array([[0, 0], [-3, -1], [3, 2]])
print(sample_pts, "→", classify(sample_pts))
[[ 0 0] [-3 -1] [ 3 2]] → ['bleu' 'bleu' 'rouge']