| import torch |
| import sys |
| import pandas as pd |
| from typing import TypedDict, Optional, Tuple |
| import datetime |
| import math |
| import importlib.util |
| from huggingface_hub import hf_hub_download |
| import pickle |
|
|
|
|
| """ |
| Data container class representing the data shape of the synapse coming into `run_inference` |
| """ |
|
|
|
|
| class ProcessedSynapse(TypedDict): |
| id: Optional[str] |
| nextplace_id: Optional[str] |
| property_id: Optional[str] |
| listing_id: Optional[str] |
| address: Optional[str] |
| city: Optional[str] |
| state: Optional[str] |
| zip_code: Optional[str] |
| price: Optional[float] |
| beds: Optional[int] |
| baths: Optional[float] |
| sqft: Optional[int] |
| lot_size: Optional[int] |
| year_built: Optional[int] |
| days_on_market: Optional[int] |
| latitude: Optional[float] |
| longitude: Optional[float] |
| property_type: Optional[str] |
| last_sale_date: Optional[str] |
| hoa_dues: Optional[float] |
| query_date: Optional[str] |
|
|
|
|
| """ |
| This class must do two things |
| 1) The constructor must load the model |
| 2) This class must implement a method called `run_inference` that takes the input data and returns a tuple |
| of float, str representing the predicted sale price and the predicted sale date. |
| """ |
|
|
|
|
| class MLBaseModelDriver: |
|
|
| def __init__(self): |
| self.model, self.label_encoder, self.scaler = self.load_model() |
|
|
| def load_model(self) -> Tuple[any, any, any]: |
| """ |
| load the model and model parameters |
| :return: model, label encoder, and scaler |
| """ |
| print(f"Loading model...") |
| model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files() |
| model_class = self._import_model_class(model_class_file) |
|
|
| model = model_class(input_dim=4) |
| state_dict = torch.load(model_file, weights_only=False) |
| model.load_state_dict(state_dict) |
| model.eval() |
|
|
| |
| with open(scaler_file, 'rb') as f: |
| scaler = pickle.load(f) |
|
|
| with open(label_encoders_file, 'rb') as f: |
| label_encoders = pickle.load(f) |
|
|
| print(f"Model Loaded.") |
| return model, label_encoders, scaler |
|
|
| def _download_model_files(self) -> Tuple[str, str, str, str]: |
| """ |
| download files from hugging face |
| :return: downloaded files |
| """ |
| model_path = "Nickel5HF/NextPlace" |
|
|
| |
| model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth") |
| scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl") |
| label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl") |
| model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py") |
|
|
| |
| return model_file, scaler_file, label_encoders_file, model_class_file |
|
|
| def _import_model_class(self, model_class_file): |
| """ |
| import the model class and instantiate it |
| :param model_class_file: file path to the model class |
| :return: None |
| """ |
| |
| module_name = "MLBaseModel" |
| spec = importlib.util.spec_from_file_location(module_name, model_class_file) |
| model_module = importlib.util.module_from_spec(spec) |
| sys.modules[module_name] = model_module |
| spec.loader.exec_module(model_module) |
|
|
| if hasattr(model_module, "MLBaseModel"): |
| return model_module.MLBaseModel |
| else: |
| raise AttributeError(f"The module does not contain a class named 'MLBaseModel'") |
|
|
| def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]: |
| """ |
| run inference using the MLBaseModel |
| :param input_data: synapse from the validator |
| :return: the predicted sale price and date |
| """ |
| input_tensor = self._preprocess_input(input_data) |
|
|
| with torch.no_grad(): |
| prediction = self.model(input_tensor) |
| predicted_sale_price, predicted_days_on_market = prediction[0].numpy() |
| predicted_days_on_market = math.floor(predicted_days_on_market) |
| predicted_sale_date = self._sale_date_predictor(input_data['days_on_market'], predicted_days_on_market) |
|
|
| return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d") |
|
|
| def _sale_date_predictor(self, days_on_market: int, predicted_days_on_market: int) -> datetime.date: |
| """ |
| convert predicted days on market to a sale date |
| :param days_on_market: number of days this home has been on the market |
| :param predicted_days_on_market: the predicted number of days for this home on the market |
| :return: the predicted sale date |
| """ |
| if days_on_market < predicted_days_on_market: |
| days_until_sale = predicted_days_on_market - days_on_market |
| sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale) |
| return sale_date |
| else: |
| return datetime.date.today() + datetime.timedelta(days=1) |
|
|
| def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor: |
| """ |
| preprocess the input for inference |
| :param data: synapse from the validator |
| :return: tensor representing the synapse |
| """ |
| df = pd.DataFrame([data]) |
| default_beds = 3 |
| default_sqft = 1500.0 |
| default_property_type = '6' |
| df['beds'] = df['beds'].fillna(default_beds) |
| df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft) |
| df['property_type'] = df['property_type'].fillna(default_property_type) |
| df['property_type'] = df['property_type'].astype(int) |
| df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']]) |
| X = df[['beds', 'sqft', 'property_type', 'price']] |
| input_tensor = torch.tensor(X.values, dtype=torch.float32) |
| return input_tensor |
|
|