Tabular Data Analysis with Neural Networks: Predictive Modeling Tutorial

Discover the power of neural networks in analyzing tabular data through our comprehensive tutorial. In this video, we dive into the world of predictive modeling using neural networks, enabling you to unlock valuable insights and make accurate predictions from structured data.
We introduce the concept of neural networks and their ability to handle complex relationships and patterns in tabular data.
Using Python and popular deep learning library fastai we guide you through the step-by-step process of building and training a neural network model for predictive modeling. You'll learn how to preprocess the data, handle missing values, normalize features, and split the dataset into training and testing sets.
Through practical example and simulated dataset, we demonstrate how neural networks can be applied to tasks such as customer credit risk assessment. You'll witness firsthand how neural networks can uncover hidden patterns and make accurate predictions from tabular data.

# code below

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from fastai.tabular.all import *

# Simulated data
np.random.seed(0)

n_samples = 100000
n_features = 5

X = np.random.rand(n_samples, n_features)
age = X[:, 0]
gender = np.random.choice([0, 1], size=n_samples)
occupation = np.random.choice([0, 1, 2], size=n_samples)
zip_code = np.random.choice([0, 1, 2, 3], size=n_samples)
default_flag = np.random.choice([0, 1], size=n_samples, p=[0.8, 0.2])
data = pd.DataFrame({
'age': age,
'gender': gender,
'occupation': occupation,
'zip_code': zip_code,
'default_flag': default_flag
})

data['default_flag'] = data['default_flag'].astype(float)

# Data preprocessing
procs = [Categorify, FillMissing, Normalize] #FillMissing,
splits = RandomSplitter(valid_pct=0.2)(range_of(data))

to = TabularPandas(data, procs=procs, cat_names=['gender', 'occupation', 'zip_code'],
cont_names=['age'], y_names='default_flag', splits=splits)

dls = to.dataloaders(bs=64)

class CollabNN(Module):
def __init__(self, emb_szs, n_cont, layers, out_sz, y_range=None):
self.embeds = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in emb_szs])
self.emb_drop = nn.Dropout(0.4)

# Calculate the total number of embeddings
n_emb = sum(nf for _, nf in emb_szs)
self.bn_cont = nn.BatchNorm1d(n_emb + n_cont)

layer_sizes = [n_emb + n_cont] + layers
self.layers = nn.ModuleList([nn.Linear(layer_sizes[i], layer_sizes[i+1]) for i in range(len(layer_sizes)-1)])
self.fc = nn.Linear(layers[-1], out_sz)
self.y_range = y_range

def forward(self, x_cat, x_cont):
# Embedding each categorical variable
emb_gender = self.embeds[0](x_cat[:, 0])
emb_occupation = self.embeds[1](x_cat[:, 1])
emb_zip_code = self.embeds[2](x_cat[:, 2])

x = self.emb_drop(torch.cat([emb_gender, emb_occupation, emb_zip_code], 1))
x = torch.cat([x, x_cont], 1) # Combine categorical and continuous features
x = self.bn_cont(x)

for layer in self.layers:
x = F.relu(layer(x))
x = self.fc(x)

if self.y_range is not None:
x = torch.sigmoid(x)
x = x * (self.y_range[1] - self.y_range[0])
x = x + self.y_range[0]
return x
from torch.nn import BCEWithLogitsLoss
n_act = 100
emb_szs = [(3, n_act), (4, n_act), (5, n_act)] # Embedding sizes for categorical variables
n_cont = 1 # Number of continuous variables
layers = [100, 50] # Sizes of hidden layers
out_sz = 1 # Output size (binary classification)
y_range = (0, 1) # Range for the output (probability between 0 and 1)
model = CollabNN(emb_szs, n_cont, layers, out_sz, y_range)
learn = Learner(dls, model, loss_func=BCEWithLogitsLoss(), metrics=accuracy)
learn.fit_one_cycle(1, 1e-3)

# Set the model to evaluation mode
learn.model.eval()

# Get the test dataloader from the Learner object
test_dl = learn.dls.test_dl(data)

# Iterate over the test dataset and make predictions
predictions = []
with torch.no_grad():
for batch in test_dl:
x_cat, x_cont = batch[:2] # Extract categorical and continuous features from the batch

# Forward pass through the model
pred = learn.model(x_cat, x_cont)
# Convert predictions to probabilities by applying the sigmoid function
pred_prob = torch.sigmoid(pred)
# Round the probabilities to obtain binary predictions (0 or 1)
pred_label = torch.round(pred_prob)
# Append the predictions to the list
predictions.extend(pred_prob.squeeze().tolist())

# Convert the predictions to a NumPy array
predictions = np.array(predictions)

unique_labels = np.unique(predictions)
print(unique_labels)

Теги

Смотрите далее

Бременские музыканты 2024

Natural Older Woman Over 60🔥Attractively Dressed and Beauty|| Wearing Beautiful Silk Lingerie Outfit

Аудиокнига "Мой (не)любимый дракон" Валерия Чернованова audiofy.ru

7 ДНЕЙ В ДИКОЙ АФРИКЕ! Наткнулись на Племя Массаи!

НЕ ХОДИ В ЗАБРОШЕННЫЙ ДОМ | Страшная история

24 de noviembre de 2024

BABYMONSTER - 'CLIK CLAK' M/V

Трудная правда, или Что мы знаем о Кате и Вале

Новогодние зефирные шары

Физрук поставил завуча на место 🤯 Сериал: Физрук. #физрук #врек #хочуврек #хочувтренды

Читаем Добротолюбие. «Дело или сердце». Священник Константин Корепанов

4K HDR - РУССКИЙ РЕЙД (фильм) смотреть онлайн

Ника Жукова - Любовь не верит слезам (из т/с "Плакса-2") (ПРЕМЬЕРА)

Аресты и обыски в Администрации Президента России

sữa tập múa

Новые клипы

Тренды Люди и Блоги