Spaces:
Runtime error
Runtime error
| #Import libraries | |
| #pip install pandas torch transformers datasets scikit-learn | |
| import torch | |
| #Set device | |
| if torch.cuda.is_available(): | |
| device = torch.device('cuda') # CUDA GPU | |
| elif torch.backends.mps.is_available(): | |
| device = torch.device('mps') #Apple GPU | |
| else: | |
| device = torch.device("cpu") | |
| print('Using device:', device) | |
| #Additional Info when using cuda | |
| if device.type == 'cuda': | |
| print("Device name: ", torch.cuda.get_device_name(0)) | |
| print("Device properties:", torch.cuda.get_device_properties(0)) | |
| print('Memory Usage:') | |
| print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB') | |
| print('Cached: ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB') | |
| #ncf_model.to(device) | |
| #Load dataset | |
| import pandas as pd | |
| from datasets import load_dataset | |
| import numpy as np | |
| review_dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023","raw_review_All_Beauty",trust_remote_code=True) | |
| # User Reviews dataframe (reviews are in the 'train' split) | |
| reviews_df = pd.DataFrame(review_dataset['full']) | |
| # Map user_id and parent_asin to indices | |
| user_map = {user: idx for idx, user in enumerate(reviews_df["user_id"].unique())} | |
| item_map = {asin: idx for idx, asin in enumerate(reviews_df["parent_asin"].unique())} | |
| meta_dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023","raw_meta_All_Beauty") | |
| # User Reviews dataframe (reviews are in the 'train' split) | |
| meta_df = pd.DataFrame(meta_dataset['full']) | |
| #Split data | |
| from sklearn.model_selection import train_test_split | |
| reviews_df["user_idx"] = reviews_df["user_id"].map(user_map) | |
| reviews_df["item_idx"] = reviews_df["parent_asin"].map(item_map) | |
| # Train-test split | |
| train, test = train_test_split(reviews_df, test_size=0.2, random_state=42) | |
| #NCF model | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| class NCF(nn.Module): | |
| def __init__(self, num_users, num_items, embedding_dim=32, hidden_dims=[64, 32], dropout_rate=0.5): | |
| super(NCF, self).__init__() | |
| # Embedding layers | |
| self.user_embedding = nn.Embedding(num_users, embedding_dim) | |
| self.item_embedding = nn.Embedding(num_items, embedding_dim) | |
| # Neural layers | |
| input_dim = embedding_dim * 2 | |
| layers = [] | |
| for hidden_dim in hidden_dims: | |
| layers.append(nn.Linear(input_dim, hidden_dim)) | |
| layers.append(nn.ReLU()) | |
| input_dim = hidden_dim | |
| self.mlp = nn.Sequential(*layers) | |
| # Final prediction layer | |
| self.output = nn.Linear(hidden_dims[-1], 1) | |
| self.dropout = nn.Dropout(p=dropout_rate) | |
| def forward(self, user_idx, item_idx): | |
| # Embeddings | |
| user_emb = self.user_embedding(user_idx) | |
| item_emb = self.item_embedding(item_idx) | |
| # Concatenate and pass through MLP | |
| x = torch.cat([user_emb, item_emb], dim=-1) | |
| x = self.mlp(x) | |
| x = self.dropout(x) | |
| # Prediction | |
| return torch.sigmoid(self.output(x)) | |
| #prepare dataloader | |
| from torch.utils.data import Dataset, DataLoader | |
| class ReviewsDataset(Dataset): | |
| def __init__(self, data): | |
| self.user_idx = data["user_idx"].values | |
| self.item_idx = data["item_idx"].values | |
| self.rating = data["rating"].values | |
| def __len__(self): | |
| return len(self.rating) | |
| def __getitem__(self, idx): | |
| return { | |
| "user_idx": torch.tensor(self.user_idx[idx], dtype=torch.long), | |
| "item_idx": torch.tensor(self.item_idx[idx], dtype=torch.long), | |
| "rating": torch.tensor(self.rating[idx], dtype=torch.float), | |
| } | |
| # Create DataLoaders | |
| train_dataset = ReviewsDataset(train) | |
| test_dataset = ReviewsDataset(test) | |
| train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True) | |
| test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False) | |
| #train | |
| results = {"train_loss": [], | |
| "train_acc": [], | |
| "test_loss": [], | |
| "test_acc": [] | |
| } | |
| def train_model(model, train_loader, test_loader, epochs=10, lr=0.001, lr_decay_step=5, lr_decay_gamma=0.1): | |
| model.to("cuda") # Move model to GPU | |
| criterion = nn.MSELoss() | |
| optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.01) | |
| scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_decay_step, gamma=lr_decay_gamma) | |
| for epoch in range(epochs): | |
| model.train() | |
| train_loss = 0 | |
| train_mae = 0 | |
| for batch in train_loader: | |
| user_idx = batch["user_idx"].to("cuda") | |
| item_idx = batch["item_idx"].to("cuda") | |
| ratings = batch["rating"].to("cuda") | |
| optimizer.zero_grad() | |
| predictions = model(user_idx, item_idx).squeeze() | |
| loss = criterion(predictions, ratings / 5.0) # Normalize ratings | |
| loss.backward() | |
| optimizer.step() | |
| train_loss += loss.item() | |
| train_mae += torch.abs(predictions - (ratings / 5.0)).sum().item() | |
| avg_train_loss = train_loss / len(train_loader) | |
| avg_train_mae = train_mae / len(train_loader.dataset) | |
| print(f"Epoch {epoch+1}, Train Loss: {train_loss / len(train_loader):.4f}, Train MAE: {avg_train_mae:.4f}") | |
| results["train_loss"].append(train_loss / len(train_loader)) | |
| results["train_acc"].append(avg_train_mae) | |
| scheduler.step() | |
| evaluate_model(model, test_loader) | |
| def evaluate_model(model, test_loader): | |
| model.eval() | |
| test_loss = 0 | |
| test_mae = 0 | |
| with torch.no_grad(): | |
| for batch in test_loader: | |
| user_idx = batch["user_idx"].to("cuda") | |
| item_idx = batch["item_idx"].to("cuda") | |
| ratings = batch["rating"].to("cuda") | |
| predictions = model(user_idx, item_idx).squeeze() | |
| loss = nn.MSELoss()(predictions, ratings / 5.0) | |
| test_loss += loss.item() | |
| test_mae += torch.abs(predictions - (ratings / 5.0)).sum().item() | |
| avg_test_loss = test_loss / len(test_loader) | |
| avg_test_mae = test_mae / len(test_loader.dataset) | |
| print(f"Test Loss: {test_loss / len(test_loader):.4f}, Test MAE: {avg_test_mae:.4f}") | |
| results["test_loss"].append(test_loss / len(test_loader)) | |
| results["test_acc"].append(avg_test_mae) | |
| num_users = len(user_map) | |
| num_items = len(item_map) | |
| # Initialize model | |
| ncf_model = NCF(num_users=num_users, num_items=num_items, embedding_dim=32, hidden_dims=[64, 32]) | |
| # Train the model | |
| train_model(ncf_model, train_loader, test_loader, epochs=10, lr=1e-4) | |
| #Plot | |
| import matplotlib.pyplot as plt | |
| # Plot loss curves of a model | |
| def plot_loss_curves(results): | |
| loss = results["train_loss"] | |
| test_loss = results["test_loss"] | |
| accuracy = results["train_acc"] | |
| test_accuracy = results["test_acc"] | |
| epochs = range(len(results["train_loss"])) | |
| plt.figure(figsize=(15, 7)) | |
| # Plot loss | |
| plt.subplot(1, 2, 1) | |
| plt.plot(epochs, loss, label="train_loss") | |
| plt.plot(epochs, test_loss, label="test_loss") | |
| plt.title("Loss") | |
| plt.xlabel("Epochs") | |
| plt.legend() | |
| # Plot accuracy | |
| plt.subplot(1, 2, 2) | |
| plt.plot(epochs, accuracy, label="train_accuracy") | |
| plt.plot(epochs, test_accuracy, label="test_accuracy") | |
| plt.title("Accuracy") | |
| plt.xlabel("Epochs") | |
| plt.legend() | |
| plot_loss_curves(results) | |
| #Recommendations | |
| # Example recommendation for a user | |
| user_id = "AHZM3GVSTF4MCGO67QFLXCNIXSIQ" | |
| user_index = user_map[user_id] | |
| def recommend(model, user_idx, item_indices, k=10): | |
| model.eval() | |
| user_tensor = torch.tensor([user_idx] * len(item_indices)).to("cuda") | |
| item_tensor = torch.tensor(item_indices).to("cuda") | |
| with torch.no_grad(): | |
| predictions = model(user_tensor, item_tensor).squeeze() | |
| top_k_items = torch.topk(predictions, k=k).indices.cpu().numpy() | |
| return [list(item_map.keys())[i] for i in top_k_items] | |
| item_indices = list(range(len(item_map))) | |
| recommendations = recommend(ncf_model, user_index, item_indices) | |
| print("Recommended items:", recommendations) | |
| # import matplotlib.pyplot as plt | |
| # from PIL import Image | |
| # import requests | |
| # from io import BytesIO | |
| # def fetch_item_images_from_df(asins, meta_df): | |
| # items_with_images = [] | |
| # for asin in asins: | |
| # row = meta_df[meta_df["parent_asin"] == asin] | |
| # if not row.empty: | |
| # images = row["images"].iloc[0] | |
| # if images: # Check if images are available | |
| # #print(images["large"][0]) | |
| # items_with_images.append((asin, images["large"][0], row["title"])) | |
| # return items_with_images | |
| # def display_items(title, items): | |
| # print(items) | |
| # plt.figure(figsize=(15, 5)) | |
| # plt.suptitle(title, fontsize=16) | |
| # for idx, (asin, image_urls, title) in enumerate(items): | |
| # if image_urls: # Only display if images are available | |
| # try: | |
| # response = requests.get(image_urls) | |
| # img = Image.open(BytesIO(response.content)) | |
| # plt.subplot(1, len(items), idx + 1) | |
| # plt.imshow(img) | |
| # plt.axis("off") | |
| # plt.title(title) | |
| # except Exception as e: | |
| # print(f"Could not fetch image for ASIN {asin}: {e}") | |
| # plt.tight_layout() | |
| # plt.show() | |
| # # Fetch ASINs for bought items | |
| # #user_id = "user_1" | |
| # bought_asins = reviews_df[reviews_df["user_id"] == user_id]["asin"].tolist() | |
| # # Fetch images for recommended items | |
| # recommended_asins = recommend(ncf_model, user_index, list(range(len(item_map)))) | |
| # bought_items = fetch_item_images_from_df(bought_asins, meta_df) | |
| # recommended_items = fetch_item_images_from_df(recommended_asins, meta_df) | |
| # # Display images | |
| # display_items("User Bought Items", bought_items) | |
| # display_items("Recommended Items", recommended_items) | |
| import gradio as gr | |
| import torch | |
| from PIL import Image | |
| import requests | |
| from io import BytesIO | |
| # Function to fetch item images from the DataFrame | |
| def fetch_item_images_from_df(asins, meta_df): | |
| items_with_images = [] | |
| for asin in asins: | |
| row = meta_df[meta_df["parent_asin"] == asin] | |
| if not row.empty: | |
| images = row["images"].iloc[0] | |
| if images: # Check if images are available | |
| items_with_images.append([images["large"][0], row["title"].iloc[0]]) | |
| return items_with_images | |
| # Function to recommend and fetch images for bought and recommended items | |
| def recommend_and_display(user_id): | |
| user_index = user_map.get(user_id) | |
| if user_index is None: | |
| return [], [] # Return empty lists if user not found | |
| # Fetch ASINs for bought items | |
| bought_asins = reviews_df[reviews_df["user_id"] == user_id]["parent_asin"].tolist() | |
| # Fetch images for bought and recommended items | |
| bought_items = fetch_item_images_from_df(bought_asins, meta_df) | |
| recommended_asins = recommend(ncf_model, user_index, list(range(len(item_map)))) | |
| recommended_items = fetch_item_images_from_df(recommended_asins, meta_df) | |
| return bought_items, recommended_items | |
| # Gradio function to display the recommendations | |
| def gradio_interface(user_id): | |
| bought, recommended = recommend_and_display(user_id) | |
| return bought, recommended | |
| # Gradio Interface | |
| interface = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=gr.Textbox(label="Enter User ID"), | |
| outputs=[ | |
| gr.Gallery(label="Bought Items"), | |
| gr.Gallery(label="Recommended Items") | |
| ], | |
| title="Amazon Recommender", | |
| description="Enter a User ID to see images of bought and recommended items.", | |
| live=True | |
| ) | |
| # Launch Gradio Interface | |
| interface.launch(share=True) | |