| | import torch |
| | import sys |
| | import pandas as pd |
| | from typing import TypedDict, Optional, Tuple |
| | import datetime |
| | import math |
| | import importlib.util |
| | from huggingface_hub import hf_hub_download |
| | import pickle |
| |
|
| |
|
| | """ |
| | Data container class representing the data shape of the synapse coming into `run_inference` |
| | """ |
| |
|
| |
|
| | class ProcessedSynapse(TypedDict): |
| | id: Optional[str] |
| | nextplace_id: Optional[str] |
| | property_id: Optional[str] |
| | listing_id: Optional[str] |
| | address: Optional[str] |
| | city: Optional[str] |
| | state: Optional[str] |
| | zip_code: Optional[str] |
| | price: Optional[float] |
| | beds: Optional[int] |
| | baths: Optional[float] |
| | sqft: Optional[int] |
| | lot_size: Optional[int] |
| | year_built: Optional[int] |
| | days_on_market: Optional[int] |
| | latitude: Optional[float] |
| | longitude: Optional[float] |
| | property_type: Optional[str] |
| | last_sale_date: Optional[str] |
| | hoa_dues: Optional[float] |
| | query_date: Optional[str] |
| |
|
| |
|
| | """ |
| | This class must do two things |
| | 1) The constructor must load the model |
| | 2) This class must implement a method called `run_inference` that takes the input data and returns a tuple |
| | of float, str representing the predicted sale price and the predicted sale date. |
| | """ |
| |
|
| |
|
| | class MLBaseModelDriver: |
| |
|
| | def __init__(self): |
| | self.model, self.label_encoder, self.scaler = self.load_model() |
| |
|
| | def load_model(self) -> Tuple[any, any, any]: |
| | """ |
| | load the model and model parameters |
| | :return: model, label encoder, and scaler |
| | """ |
| | print(f"Loading model...") |
| | model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files() |
| | model_class = self._import_model_class(model_class_file) |
| |
|
| | model = model_class(input_dim=4) |
| | state_dict = torch.load(model_file, weights_only=False) |
| | model.load_state_dict(state_dict) |
| | model.eval() |
| |
|
| | |
| | with open(scaler_file, 'rb') as f: |
| | scaler = pickle.load(f) |
| |
|
| | with open(label_encoders_file, 'rb') as f: |
| | label_encoders = pickle.load(f) |
| |
|
| | print(f"Model Loaded.") |
| | return model, label_encoders, scaler |
| |
|
| | def _download_model_files(self) -> Tuple[str, str, str, str]: |
| | """ |
| | download files from hugging face |
| | :return: downloaded files |
| | """ |
| | model_path = "ckoozzzu/NextPlace" |
| |
|
| | |
| | model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth") |
| | scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl") |
| | label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl") |
| | model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py") |
| |
|
| | |
| | return model_file, scaler_file, label_encoders_file, model_class_file |
| |
|
| | def _import_model_class(self, model_class_file): |
| | """ |
| | import the model class and instantiate it |
| | :param model_class_file: file path to the model class |
| | :return: None |
| | """ |
| | |
| | module_name = "MLBaseModel" |
| | spec = importlib.util.spec_from_file_location(module_name, model_class_file) |
| | model_module = importlib.util.module_from_spec(spec) |
| | sys.modules[module_name] = model_module |
| | spec.loader.exec_module(model_module) |
| |
|
| | if hasattr(model_module, "MLBaseModel"): |
| | return model_module.MLBaseModel |
| | else: |
| | raise AttributeError(f"The module does not contain a class named 'MLBaseModel'") |
| |
|
| | def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]: |
| | """ |
| | run inference using the MLBaseModel |
| | :param input_data: synapse from the validator |
| | :return: the predicted sale price and date |
| | """ |
| | input_tensor = self._preprocess_input(input_data) |
| |
|
| | with torch.no_grad(): |
| | prediction = self.model(input_tensor) |
| | predicted_sale_price, predicted_days_on_market = prediction[0].numpy() |
| | predicted_days_on_market = math.floor(predicted_days_on_market) |
| | predicted_sale_date = self._sale_date_predictor(input_data['days_on_market'], predicted_days_on_market) |
| |
|
| | return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d") |
| |
|
| | def _sale_date_predictor(self, days_on_market: int, predicted_days_on_market: int) -> datetime.date: |
| | """ |
| | convert predicted days on market to a sale date |
| | :param days_on_market: number of days this home has been on the market |
| | :param predicted_days_on_market: the predicted number of days for this home on the market |
| | :return: the predicted sale date |
| | """ |
| | if days_on_market < predicted_days_on_market: |
| | days_until_sale = predicted_days_on_market - days_on_market |
| | sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale) |
| | return sale_date |
| | else: |
| | return datetime.date.today() + datetime.timedelta(days=1) |
| |
|
| | def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor: |
| | """ |
| | preprocess the input for inference |
| | :param data: synapse from the validator |
| | :return: tensor representing the synapse |
| | """ |
| | df = pd.DataFrame([data]) |
| | default_beds = 3 |
| | default_sqft = 1500.0 |
| | default_property_type = '6' |
| | df['beds'] = df['beds'].fillna(default_beds) |
| | df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft) |
| | df['property_type'] = df['property_type'].fillna(default_property_type) |
| | df['property_type'] = df['property_type'].astype(int) |
| | df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']]) |
| | X = df[['beds', 'sqft', 'property_type', 'price']] |
| | input_tensor = torch.tensor(X.values, dtype=torch.float32) |
| | return input_tensor |
| |
|