feat: upload model

Browse files

Files changed (4) hide show

README.md +10 -0
config.json +27 -0
model.py +707 -0
model.safetensors +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+tags:
+- model_hub_mixin
+- pytorch_model_hub_mixin
+---
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
+- Code: [More Information Needed]
+- Paper: [More Information Needed]
+- Docs: [More Information Needed]

config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "activation": "gelu",
+  "affine": false,
+  "attn_dropout": 0.0,
+  "c_in": 1,
+  "c_out": null,
+  "classification": true,
+  "d_ff": 2048,
+  "d_model": 512,
+  "decomposition": false,
+  "dropout": 0.05,
+  "individual": false,
+  "kernel_size": 25,
+  "n_heads": 8,
+  "n_layers": 2,
+  "norm": "BatchNorm",
+  "padding_patch": true,
+  "patch_len": 16,
+  "pre_norm": false,
+  "pred_dim": 2,
+  "res_attention": true,
+  "revin": true,
+  "seq_len": 82,
+  "store_attn": false,
+  "stride": 8,
+  "subtract_last": false
+}

model.py ADDED Viewed

	@@ -0,0 +1,707 @@

+import warnings
+from typing import Optional
+import torch
+from torch import nn
+from torch.nn import Module
+import torch.nn.functional as F
+from torch import Tensor
+from huggingface_hub import PyTorchModelHubMixin
+warnings.filterwarnings("ignore", category=UserWarning)
+class Transpose(Module):
+    def __init__(self, *dims, contiguous=False):
+        super(Transpose, self).__init__()
+        self.dims, self.contiguous = dims, contiguous
+    def forward(self, x):
+        if self.contiguous:
+            return x.transpose(*self.dims).contiguous()
+        else:
+            return x.transpose(*self.dims)
+    def __repr__(self):
+        if self.contiguous:
+            return f"{self.__class__.__name__}(dims={', '.join([str(d) for d in self.dims])}).contiguous()"
+        else:
+            return (
+                f"{self.__class__.__name__}({', '.join([str(d) for d in self.dims])})"
+            )
+pytorch_acts = [
+    nn.ELU,
+    nn.LeakyReLU,
+    nn.PReLU,
+    nn.ReLU,
+    nn.ReLU6,
+    nn.SELU,
+    nn.CELU,
+    nn.GELU,
+    nn.Sigmoid,
+    nn.Softplus,
+    nn.Tanh,
+    nn.Softmax,
+]
+pytorch_act_names = [a.__name__.lower() for a in pytorch_acts]
+def get_act_fn(act, **act_kwargs):
+    if act is None:
+        return
+    elif isinstance(act, nn.Module):
+        return act
+    elif callable(act):
+        return act(**act_kwargs)
+    idx = pytorch_act_names.index(act.lower())
+    return pytorch_acts[idx](**act_kwargs)
+class RevIN(nn.Module):
+    def __init__(
+        self,
+        c_in: int,
+        affine: bool = True,
+        subtract_last: bool = False,
+        dim: int = 2,
+        eps: float = 1e-5,
+    ):
+        super().__init__()
+        self.c_in, self.affine, self.subtract_last, self.dim, self.eps = (
+            c_in,
+            affine,
+            subtract_last,
+            dim,
+            eps,
+        )
+        if self.affine:
+            self.weight = nn.Parameter(torch.ones(1, c_in, 1))
+            self.bias = nn.Parameter(torch.zeros(1, c_in, 1))
+    def forward(self, x: Tensor, mode: Tensor):
+        if mode:
+            return self.normalize(x)
+        else:
+            return self.denormalize(x)
+    def normalize(self, x):
+        if self.subtract_last:
+            self.sub = x[..., -1].unsqueeze(-1).detach()
+        else:
+            self.sub = torch.mean(x, dim=-1, keepdim=True).detach()
+        self.std = (
+            torch.std(x, dim=-1, keepdim=True, unbiased=False).detach() + self.eps
+        )
+        if self.affine:
+            x = x.sub(self.sub)
+            x = x.div(self.std)
+            x = x.mul(self.weight)
+            x = x.add(self.bias)
+            return x
+        else:
+            x = x.sub(self.sub)
+            x = x.div(self.std)
+            return x
+    def denormalize(self, x):
+        if self.affine:
+            x = x.sub(self.bias)
+            x = x.div(self.weight)
+            x = x.mul(self.std)
+            x = x.add(self.sub)
+            return x
+        else:
+            x = x.mul(self.std)
+            x = x.add(self.sub)
+            return x
+class MovingAverage(nn.Module):
+    def __init__(
+        self,
+        kernel_size: int,
+    ):
+        super().__init__()
+        padding_left = (kernel_size - 1) // 2
+        padding_right = kernel_size - padding_left - 1
+        self.padding = torch.nn.ReplicationPad1d((padding_left, padding_right))
+        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=1)
+    def forward(self, x: Tensor):
+        return self.avg(self.padding(x))
+class SeriesDecomposition(nn.Module):
+    def __init__(
+        self,
+        kernel_size: int,  # the size of the window
+    ):
+        super().__init__()
+        self.moving_avg = MovingAverage(kernel_size)
+    def forward(self, x: Tensor):
+        moving_mean = self.moving_avg(x)
+        residual = x - moving_mean
+        return residual, moving_mean
+class _ScaledDotProductAttention(nn.Module):
+    def __init__(self, d_model, n_heads, attn_dropout=0.0, res_attention=False):
+        super().__init__()
+        self.attn_dropout = nn.Dropout(attn_dropout)
+        self.res_attention = res_attention
+        head_dim = d_model // n_heads
+        self.scale = nn.Parameter(torch.tensor(head_dim**-0.5), requires_grad=False)
+    def forward(self, q: Tensor, k: Tensor, v: Tensor, prev: Optional[Tensor] = None):
+        attn_scores = torch.matmul(q, k) * self.scale
+        if prev is not None:
+            attn_scores = attn_scores + prev
+        attn_weights = F.softmax(attn_scores, dim=-1)
+        attn_weights = self.attn_dropout(attn_weights)
+        output = torch.matmul(attn_weights, v)
+        if self.res_attention:
+            return output, attn_weights, attn_scores
+        else:
+            return output, attn_weights
+class _MultiheadAttention(nn.Module):
+    def __init__(
+        self,
+        d_model,
+        n_heads,
+        d_k=None,
+        d_v=None,
+        res_attention=False,
+        attn_dropout=0.0,
+        proj_dropout=0.0,
+        qkv_bias=True,
+    ):
+        "Multi Head Attention Layer"
+        super().__init__()
+        d_k = d_v = d_model // n_heads
+        self.n_heads, self.d_k, self.d_v = n_heads, d_k, d_v
+        self.W_Q = nn.Linear(d_model, d_k * n_heads, bias=qkv_bias)
+        self.W_K = nn.Linear(d_model, d_k * n_heads, bias=qkv_bias)
+        self.W_V = nn.Linear(d_model, d_v * n_heads, bias=qkv_bias)
+        # Scaled Dot-Product Attention (multiple heads)
+        self.res_attention = res_attention
+        self.sdp_attn = _ScaledDotProductAttention(
+            d_model,
+            n_heads,
+            attn_dropout=attn_dropout,
+            res_attention=self.res_attention,
+        )
+        # Poject output
+        self.to_out = nn.Sequential(
+            nn.Linear(n_heads * d_v, d_model), nn.Dropout(proj_dropout)
+        )
+    def forward(
+        self,
+        Q: Tensor,
+        K: Optional[Tensor] = None,
+        V: Optional[Tensor] = None,
+        prev: Optional[Tensor] = None,
+    ):
+        bs = Q.size(0)
+        if K is None:
+            K = Q
+        if V is None:
+            V = Q
+        # Linear (+ split in multiple heads)
+        q_s = (
+            self.W_Q(Q).view(bs, -1, self.n_heads, self.d_k).transpose(1, 2)
+        )  # q_s: [bs x n_heads x max_q_len x d_k]
+        k_s = (
+            self.W_K(K).view(bs, -1, self.n_heads, self.d_k).permute(0, 2, 3, 1)
+        )  # k_s: [bs x n_heads x d_k x q_len] - transpose(1,2) + transpose(2,3)
+        v_s = (
+            self.W_V(V).view(bs, -1, self.n_heads, self.d_v).transpose(1, 2)
+        )  # v_s: [bs x n_heads x q_len x d_v]
+        # Apply Scaled Dot-Product Attention (multiple heads)
+        if self.res_attention:
+            output, attn_weights, attn_scores = self.sdp_attn(q_s, k_s, v_s, prev=prev)
+        else:
+            output, attn_weights = self.sdp_attn(q_s, k_s, v_s)
+        # output: [bs x n_heads x q_len x d_v], attn: [bs x n_heads x q_len x q_len], scores: [bs x n_heads x max_q_len x q_len]
+        # back to the original inputs dimensions
+        output = (
+            output.transpose(1, 2).contiguous().view(bs, -1, self.n_heads * self.d_v)
+        )  # output: [bs x q_len x n_heads * d_v]
+        output = self.to_out(output)
+        if self.res_attention:
+            return output, attn_weights, attn_scores
+        else:
+            return output, attn_weights
+class Flatten_Head(nn.Module):
+    def __init__(self, individual, n_vars, nf, pred_dim):
+        super().__init__()
+        if isinstance(pred_dim, (tuple, list)):
+            pred_dim = pred_dim[-1]
+        self.individual = individual
+        self.n = n_vars if individual else 1
+        self.nf, self.pred_dim = nf, pred_dim
+        if individual:
+            self.layers = nn.ModuleList()
+            for i in range(self.n):
+                self.layers.append(
+                    nn.Sequential(nn.Flatten(start_dim=-2), nn.Linear(nf, pred_dim))
+                )
+        else:
+            self.layer = nn.Sequential(
+                nn.Flatten(start_dim=-2), nn.Linear(nf, pred_dim)
+            )
+    def forward(self, x: Tensor):
+        """
+        Args:
+            x: [bs x nvars x d_model x n_patch]
+            output: [bs x nvars x pred_dim]
+        """
+        if self.individual:
+            x_out = []
+            for i, layer in enumerate(self.layers):
+                x_out.append(layer(x[:, i]))
+            x = torch.stack(x_out, dim=1)
+            return x
+        else:
+            return self.layer(x)
+class _TSTiEncoderLayer(nn.Module):
+    def __init__(
+        self,
+        q_len,
+        d_model,
+        n_heads,
+        d_k=None,
+        d_v=None,
+        d_ff=256,
+        store_attn=False,
+        norm="BatchNorm",
+        attn_dropout=0,
+        dropout=0.0,
+        bias=True,
+        activation="gelu",
+        res_attention=False,
+        pre_norm=False,
+    ):
+        super().__init__()
+        assert (
+            not d_model % n_heads
+        ), f"d_model ({d_model}) must be divisible by n_heads ({n_heads})"
+        d_k = d_model // n_heads if d_k is None else d_k
+        d_v = d_model // n_heads if d_v is None else d_v
+        # Multi-Head attention
+        self.res_attention = res_attention
+        self.self_attn = _MultiheadAttention(
+            d_model,
+            n_heads,
+            d_k,
+            d_v,
+            attn_dropout=attn_dropout,
+            proj_dropout=dropout,
+            res_attention=res_attention,
+        )
+        # Add & Norm
+        self.dropout_attn = nn.Dropout(dropout)
+        if "batch" in norm.lower():
+            self.norm_attn = nn.Sequential(
+                Transpose(1, 2), nn.BatchNorm1d(d_model), Transpose(1, 2)
+            )
+        else:
+            self.norm_attn = nn.LayerNorm(d_model)
+        # Position-wise Feed-Forward
+        self.ff = nn.Sequential(
+            nn.Linear(d_model, d_ff, bias=bias),
+            get_act_fn(activation),
+            nn.Dropout(dropout),
+            nn.Linear(d_ff, d_model, bias=bias),
+        )
+        # Add & Norm
+        self.dropout_ffn = nn.Dropout(dropout)
+        if "batch" in norm.lower():
+            self.norm_ffn = nn.Sequential(
+                Transpose(1, 2), nn.BatchNorm1d(d_model), Transpose(1, 2)
+            )
+        else:
+            self.norm_ffn = nn.LayerNorm(d_model)
+        self.pre_norm = pre_norm
+        self.store_attn = store_attn
+    def forward(self, src: Tensor, prev: Optional[Tensor] = None):
+        """
+        Args:
+            src: [bs x q_len x d_model]
+        """
+        # Multi-Head attention sublayer
+        if self.pre_norm:
+            src = self.norm_attn(src)
+        ## Multi-Head attention
+        if self.res_attention:
+            src2, attn, scores = self.self_attn(src, src, src, prev)
+        else:
+            src2, attn = self.self_attn(src, src, src)
+        if self.store_attn:
+            self.attn = attn
+        ## Add & Norm
+        src = src + self.dropout_attn(
+            src2
+        )  # Add: residual connection with residual dropout
+        if not self.pre_norm:
+            src = self.norm_attn(src)
+        # Feed-forward sublayer
+        if self.pre_norm:
+            src = self.norm_ffn(src)
+        ## Position-wise Feed-Forward
+        src2 = self.ff(src)
+        ## Add & Norm
+        src = src + self.dropout_ffn(
+            src2
+        )  # Add: residual connection with residual dropout
+        if not self.pre_norm:
+            src = self.norm_ffn(src)
+        if self.res_attention:
+            return src, scores
+        else:
+            return src
+class _TSTiEncoder(nn.Module):  # i means channel-independent
+    def __init__(
+        self,
+        c_in,
+        patch_num,
+        patch_len,
+        n_layers=3,
+        d_model=128,
+        n_heads=16,
+        d_k=None,
+        d_v=None,
+        d_ff=256,
+        norm="BatchNorm",
+        attn_dropout=0.0,
+        dropout=0.0,
+        act="gelu",
+        store_attn=False,
+        res_attention=True,
+        pre_norm=False,
+    ):
+        super().__init__()
+        self.patch_num = patch_num
+        self.patch_len = patch_len
+        # Input encoding
+        q_len = patch_num
+        self.W_P = nn.Linear(
+            patch_len, d_model
+        )  # Eq 1: projection of feature vectors onto a d-dim vector space
+        self.seq_len = q_len
+        # Positional encoding
+        W_pos = torch.empty((q_len, d_model))
+        nn.init.uniform_(W_pos, -0.02, 0.02)
+        self.W_pos = nn.Parameter(W_pos)
+        # Residual dropout
+        self.dropout = nn.Dropout(dropout)
+        # Encoder
+        self.layers = nn.ModuleList(
+            [
+                _TSTiEncoderLayer(
+                    q_len,
+                    d_model,
+                    n_heads=n_heads,
+                    d_k=d_k,
+                    d_v=d_v,
+                    d_ff=d_ff,
+                    norm=norm,
+                    attn_dropout=attn_dropout,
+                    dropout=dropout,
+                    activation=act,
+                    res_attention=res_attention,
+                    pre_norm=pre_norm,
+                    store_attn=store_attn,
+                )
+                for i in range(n_layers)
+            ]
+        )
+        self.res_attention = res_attention
+    def forward(self, x: Tensor):
+        """
+        Args:
+            x: [bs x nvars x patch_len x patch_num]
+        """
+        n_vars = x.shape[1]
+        # Input encoding
+        x = x.permute(0, 1, 3, 2)  # x: [bs x nvars x patch_num x patch_len]
+        x = self.W_P(x)  # x: [bs x nvars x patch_num x d_model]
+        x = torch.reshape(
+            x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3])
+        )  # x: [bs * nvars x patch_num x d_model]
+        x = self.dropout(x + self.W_pos)  # x: [bs * nvars x patch_num x d_model]
+        # Encoder
+        if self.res_attention:
+            scores = None
+            for mod in self.layers:
+                x, scores = mod(x, prev=scores)
+        else:
+            for mod in self.layers:
+                x = mod(x)
+        x = torch.reshape(
+            x, (-1, n_vars, x.shape[-2], x.shape[-1])
+        )  # x: [bs x nvars x patch_num x d_model]
+        x = x.permute(0, 1, 3, 2)  # x: [bs x nvars x d_model x patch_num]
+        return x
+class _PatchTST_backbone(nn.Module):
+    def __init__(
+        self,
+        c_in,
+        seq_len,
+        pred_dim,
+        patch_len,
+        stride,
+        n_layers=3,
+        d_model=128,
+        n_heads=16,
+        d_k=None,
+        d_v=None,
+        d_ff=256,
+        norm="BatchNorm",
+        attn_dropout=0.0,
+        dropout=0.0,
+        act="gelu",
+        res_attention=True,
+        pre_norm=False,
+        store_attn=False,
+        padding_patch=True,
+        individual=False,
+        revin=True,
+        affine=True,
+        subtract_last=False,
+    ):
+        super().__init__()
+        self.revin = revin
+        self.revin_layer = RevIN(c_in, affine=affine, subtract_last=subtract_last)
+        self.patch_len = patch_len
+        self.stride = stride
+        self.padding_patch = padding_patch
+        patch_num = int((seq_len - patch_len) / stride + 1) + 1
+        self.patch_num = patch_num
+        self.padding_patch_layer = nn.ReplicationPad1d((stride, 0))
+        self.unfold = nn.Unfold(kernel_size=(1, patch_len), stride=stride)
+        self.patch_len = patch_len
+        self.backbone = _TSTiEncoder(
+            c_in,
+            patch_num=patch_num,
+            patch_len=patch_len,
+            n_layers=n_layers,
+            d_model=d_model,
+            n_heads=n_heads,
+            d_k=d_k,
+            d_v=d_v,
+            d_ff=d_ff,
+            attn_dropout=attn_dropout,
+            dropout=dropout,
+            act=act,
+            res_attention=res_attention,
+            pre_norm=pre_norm,
+            store_attn=store_attn,
+        )
+        # Head
+        self.head_nf = d_model * patch_num
+        self.n_vars = c_in
+        self.individual = individual
+        self.head = Flatten_Head(self.individual, self.n_vars, self.head_nf, pred_dim)
+    def forward(self, z: Tensor):
+        """
+        Args:
+            z: [bs x c_in x seq_len]
+        """
+        if self.revin:
+            z = self.revin_layer(z, torch.tensor(True, dtype=torch.bool))
+        z = self.padding_patch_layer(z)
+        b, c, s = z.size()
+        z = z.reshape(-1, 1, 1, s)
+        z = self.unfold(z)
+        z = z.permute(0, 2, 1).reshape(b, c, -1, self.patch_len).permute(0, 1, 3, 2)
+        z = self.backbone(z)
+        z = self.head(z)
+        if self.revin:
+            z = self.revin_layer(z, torch.tensor(False, dtype=torch.bool))
+        return z
+class PatchTST(nn.Module, PyTorchModelHubMixin):
+    def __init__(
+        self,
+        c_in,
+        c_out,
+        seq_len,
+        pred_dim=None,
+        n_layers=2,
+        n_heads=8,
+        d_model=512,
+        d_ff=2048,
+        dropout=0.05,
+        attn_dropout=0.0,
+        patch_len=16,
+        stride=8,
+        padding_patch=True,
+        revin=True,
+        affine=False,
+        individual=False,
+        subtract_last=False,
+        decomposition=False,
+        kernel_size=25,
+        activation="gelu",
+        norm="BatchNorm",
+        pre_norm=False,
+        res_attention=True,
+        store_attn=False,
+        classification=False,
+    ):
+        super().__init__()
+        if pred_dim is None:
+            pred_dim = seq_len
+        self.decomposition = decomposition
+        if self.decomposition:
+            self.decomp_module = SeriesDecomposition(kernel_size)
+            self.model_trend = _PatchTST_backbone(
+                c_in=c_in,
+                seq_len=seq_len,
+                pred_dim=pred_dim,
+                patch_len=patch_len,
+                stride=stride,
+                n_layers=n_layers,
+                d_model=d_model,
+                n_heads=n_heads,
+                d_ff=d_ff,
+                norm=norm,
+                attn_dropout=attn_dropout,
+                dropout=dropout,
+                act=activation,
+                res_attention=res_attention,
+                pre_norm=pre_norm,
+                store_attn=store_attn,
+                padding_patch=padding_patch,
+                individual=individual,
+                revin=revin,
+                affine=affine,
+                subtract_last=subtract_last,
+            )
+            self.model_res = _PatchTST_backbone(
+                c_in=c_in,
+                seq_len=seq_len,
+                pred_dim=pred_dim,
+                patch_len=patch_len,
+                stride=stride,
+                n_layers=n_layers,
+                d_model=d_model,
+                n_heads=n_heads,
+                d_ff=d_ff,
+                norm=norm,
+                attn_dropout=attn_dropout,
+                dropout=dropout,
+                act=activation,
+                res_attention=res_attention,
+                pre_norm=pre_norm,
+                store_attn=store_attn,
+                padding_patch=padding_patch,
+                individual=individual,
+                revin=revin,
+                affine=affine,
+                subtract_last=subtract_last,
+            )
+            self.patch_num = self.model_trend.patch_num
+        else:
+            self.model = _PatchTST_backbone(
+                c_in=c_in,
+                seq_len=seq_len,
+                pred_dim=pred_dim,
+                patch_len=patch_len,
+                stride=stride,
+                n_layers=n_layers,
+                d_model=d_model,
+                n_heads=n_heads,
+                d_ff=d_ff,
+                norm=norm,
+                attn_dropout=attn_dropout,
+                dropout=dropout,
+                act=activation,
+                res_attention=res_attention,
+                pre_norm=pre_norm,
+                store_attn=store_attn,
+                padding_patch=padding_patch,
+                individual=individual,
+                revin=revin,
+                affine=affine,
+                subtract_last=subtract_last,
+            )
+            self.patch_num = self.model.patch_num
+        self.classification = classification
+    def forward(self, x):
+        if self.decomposition:
+            res_init, trend_init = self.decomp_module(x)
+            res = self.model_res(res_init)
+            trend = self.model_trend(trend_init)
+            x = res + trend
+        else:
+            x = self.model(x)
+        if self.classification:
+            x = x.squeeze(-2)
+        return x

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36ca37c96811a3cd2f528d01f9203be172cc9d9ea38562d6cb30baf682c1f332
+size 25337280