Spaces:

VisionLanguageGroup
/

MicroscopyMatching

Running on Zero

App Files Files Community

VisionLanguageGroup commited on 3 days ago

Commit

4ce5a27

1 Parent(s): 02e04fb

cleanup

Browse files

Files changed (31) hide show

_utils/load_track_data.py +1 -1
_utils/track_args.py +1 -96
inference_track.py +1 -1
models/tra_post_model/{trackastra/data/wrfeat.py → data.py} +0 -348
models/tra_post_model/{trackastra/model/model.py → model.py} +280 -12
models/tra_post_model/trackastra/data/__init__.py +0 -18
models/tra_post_model/trackastra/data/_check_ctc.py +0 -114
models/tra_post_model/trackastra/data/_compute_overlap.py +0 -209
models/tra_post_model/trackastra/data/augmentations.py +0 -557
models/tra_post_model/trackastra/data/data.py +0 -1509
models/tra_post_model/trackastra/data/distributed.py +0 -316
models/tra_post_model/trackastra/data/example_data.py +0 -48
models/tra_post_model/trackastra/data/features.py +0 -148
models/tra_post_model/trackastra/data/matching.py +0 -251
models/tra_post_model/trackastra/data/utils.py +0 -232
models/tra_post_model/trackastra/model/__init__.py +0 -4
models/tra_post_model/trackastra/model/model_api.py +0 -338
models/tra_post_model/trackastra/model/model_parts.py +0 -287
models/tra_post_model/trackastra/model/model_sd.py +0 -338
models/tra_post_model/trackastra/model/predict.py +0 -188
models/tra_post_model/trackastra/model/pretrained.json +0 -81
models/tra_post_model/trackastra/model/pretrained.py +0 -90
models/tra_post_model/trackastra/model/rope.py +0 -94
models/tra_post_model/trackastra/utils/__init__.py +0 -14
models/tra_post_model/{trackastra/tracking → tracking}/__init__.py +0 -3
models/tra_post_model/{trackastra/tracking → tracking}/ilp.py +0 -0
models/tra_post_model/{trackastra/tracking → tracking}/track_graph.py +0 -0
models/tra_post_model/{trackastra/tracking → tracking}/tracking.py +1 -9
models/tra_post_model/{trackastra/tracking → tracking}/utils.py +73 -103
models/tra_post_model/{trackastra/utils/utils.py → utils.py} +0 -338
tracking_one.py +6 -8

_utils/load_track_data.py CHANGED Viewed

@@ -9,7 +9,7 @@ import skimage.io as io
 import torchvision.transforms as T
 import cv2
 from tqdm import tqdm
-from models.tra_post_model.trackastra.utils import normalize_01, normalize
 IMG_SIZE = 512
 def _load_tiffs(folder: Path, dtype=None):

 import torchvision.transforms as T
 import cv2
 from tqdm import tqdm
+from models.tra_post_model.utils import normalize_01, normalize
 IMG_SIZE = 512
 def _load_tiffs(folder: Path, dtype=None):

_utils/track_args.py CHANGED Viewed

@@ -14,49 +14,14 @@ def parse_train_args():
         is_config_file=True,
         help="config file path",
     )
-    parser.add_argument("--device", type=str, choices=["cuda", "cpu"], default="cuda")
-    parser.add_argument("-o", "--outdir", type=str, default="runs")
-    parser.add_argument("--name", type=str, help="Name to append to timestamp")
-    parser.add_argument("--timestamp", type=bool, default=True)
-    parser.add_argument(
-        "-m",
-        "--model",
-        type=str,
-        default="",
-        help="load this model at start (e.g. to continue training)",
-    )
-    parser.add_argument(
-        "--ndim", type=int, default=2, help="number of spatial dimensions"
-    )
     parser.add_argument("-d", "--d_model", type=int, default=256)
     parser.add_argument("-w", "--window", type=int, default=10)
-    parser.add_argument("--epochs", type=int, default=100)
-    parser.add_argument("--warmup_epochs", type=int, default=10)
-    parser.add_argument(
-        "--detection_folders",
-        type=str,
-        nargs="+",
-        default=["TRA"],
-        help=(
-            "Subfolders to search for detections. Defaults to `TRA`, which corresponds"
-            " to using only the GT."
-        ),
-    )
-    parser.add_argument("--downscale_temporal", type=int, default=1)
-    parser.add_argument("--downscale_spatial", type=int, default=1)
     parser.add_argument("--spatial_pos_cutoff", type=int, default=256)
-    parser.add_argument("--from_subfolder", action="store_true")
-    # parser.add_argument("--train_samples", type=int, default=50000)
     parser.add_argument("--num_encoder_layers", type=int, default=6)
     parser.add_argument("--num_decoder_layers", type=int, default=6)
     parser.add_argument("--pos_embed_per_dim", type=int, default=32)
     parser.add_argument("--feat_embed_per_dim", type=int, default=8)
     parser.add_argument("--dropout", type=float, default=0.00)
-    parser.add_argument("--num_workers", type=int, default=4)
-    parser.add_argument("--batch_size", type=int, default=1)
-    parser.add_argument("--max_tokens", type=int, default=None)
-    parser.add_argument("--delta_cutoff", type=int, default=2)
-    parser.add_argument("--lr", type=float, default=1e-4)
     parser.add_argument(
         "--attn_positional_bias",
         type=str,
@@ -65,72 +30,12 @@ def parse_train_args():
     )
     parser.add_argument("--attn_positional_bias_n_spatial", type=int, default=16)
     parser.add_argument("--attn_dist_mode", default="v0")
-    parser.add_argument("--mixedp", type=bool, default=True)
-    parser.add_argument("--dry", action="store_true")
-    parser.add_argument("--profile", action="store_true")
-    parser.add_argument(
-        "--features",
-        type=str,
-        choices=[
-            "none",
-            "regionprops",
-            "regionprops2",
-            "patch",
-            "patch_regionprops",
-            "wrfeat",
-        ],
-        default="wrfeat",
-    )
     parser.add_argument(
         "--causal_norm",
         type=str,
         choices=["none", "linear", "softmax", "quiet_softmax"],
         default="quiet_softmax",
     )
-    parser.add_argument("--div_upweight", type=float, default=2)
-    parser.add_argument("--augment", type=int, default=3)
-    parser.add_argument("--tracking_frequency", type=int, default=-1)
-    parser.add_argument("--sanity_dist", action="store_true")
-    parser.add_argument("--preallocate", type=bool, default=False)
-    parser.add_argument("--only_prechecks", action="store_true")
-    parser.add_argument(
-        "--compress", type=bool, default=True, help="compress dataset"
-    )
-    parser.add_argument("--seed", type=int, default=None)
-    parser.add_argument(
-        "--logger",
-        type=str,
-        default="tensorboard",
-        choices=["tensorboard", "wandb", "none"],
-    )
-    parser.add_argument("--wandb_project", type=str, default="trackastra")
-    parser.add_argument(
-        "--crop_size",
-        type=int,
-        # required=True,
-        nargs="+",
-        default=None,
-        help="random crop size for augmentation",
-    )
-    parser.add_argument(
-        "--weight_by_ndivs",
-        type=bool,
-        default=True,
-        help="Oversample windows that contain divisions",
-    )
-    parser.add_argument(
-        "--weight_by_dataset",
-        type=bool,
-        default=False,
-        help=(
-            "Inversely weight datasets by number of samples (to counter dataset size"
-            " imbalance)"
-        ),
-    )
     args, unknown_args = parser.parse_known_args()
@@ -154,4 +59,4 @@ def parse_train_args():
     #         "--train_samples must be > 0, full dataset pass not supported."
     #     )
-    return args

         is_config_file=True,
         help="config file path",
     )
     parser.add_argument("-d", "--d_model", type=int, default=256)
     parser.add_argument("-w", "--window", type=int, default=10)
     parser.add_argument("--spatial_pos_cutoff", type=int, default=256)
     parser.add_argument("--num_encoder_layers", type=int, default=6)
     parser.add_argument("--num_decoder_layers", type=int, default=6)
     parser.add_argument("--pos_embed_per_dim", type=int, default=32)
     parser.add_argument("--feat_embed_per_dim", type=int, default=8)
     parser.add_argument("--dropout", type=float, default=0.00)
     parser.add_argument(
         "--attn_positional_bias",
         type=str,
     )
     parser.add_argument("--attn_positional_bias_n_spatial", type=int, default=16)
     parser.add_argument("--attn_dist_mode", default="v0")
     parser.add_argument(
         "--causal_norm",
         type=str,
         choices=["none", "linear", "softmax", "quiet_softmax"],
         default="quiet_softmax",
     )
     args, unknown_args = parser.parse_known_args()
     #         "--train_samples must be > 0, full dataset pass not supported."
     #     )
+    return args

inference_track.py CHANGED Viewed

@@ -7,7 +7,7 @@ from pathlib import Path
 from tqdm import tqdm
 from huggingface_hub import hf_hub_download
 from tracking_one import TrackingModule
-from models.tra_post_model.trackastra.tracking import graph_to_ctc
 MODEL = None
 DEVICE = torch.device("cpu")

 from tqdm import tqdm
 from huggingface_hub import hf_hub_download
 from tracking_one import TrackingModule
+from models.tra_post_model.tracking import graph_to_ctc
 MODEL = None
 DEVICE = torch.device("cpu")

models/tra_post_model/{trackastra/data/wrfeat.py → data.py} RENAMED Viewed

@@ -17,11 +17,6 @@ from skimage.measure import regionprops, regionprops_table
 from tqdm import tqdm
 from typing import Tuple, Optional, Sequence, Union, List
 import typing
-try:
-    from .utils import load_tiff_timeseries
-except:
-    from utils import load_tiff_timeseries
 import torch
 logger = logging.getLogger(__name__)
@@ -42,36 +37,6 @@ _PROPERTIES = {
 }
-def _filter_points(
-    points: np.ndarray, shape: Tuple[int], origin: Optional[Tuple[int]] = None
-) -> np.ndarray:
-    """Returns indices of points that are inside the shape extent and given origin."""
-    ndim = points.shape[-1]
-    if origin is None:
-        origin = (0,) * ndim
-    idx = tuple(
-        np.logical_and(points[:, i] >= origin[i], points[:, i] < origin[i] + shape[i])
-        for i in range(ndim)
-    )
-    idx = np.where(np.all(idx, axis=0))[0]
-    return idx
-def _border_dist(mask: np.ndarray, cutoff: float = 5):
-    """Returns distance to border normalized to 0 (at least cutoff away) and 1 (at border)."""
-    border = np.zeros_like(mask)
-    # only apply to last two dimensions
-    ss = tuple(
-        slice(None) if i < mask.ndim - 2 else slice(1, -1)
-        for i, s in enumerate(mask.shape)
-    )
-    border[ss] = 1
-    dist = 1 - np.minimum(edt(border) / cutoff, 1)
-    return tuple(r.intensity_max for r in regionprops(mask, intensity_image=dist))
 def _border_dist_fast(mask: np.ndarray, cutoff: float = 5):
     cutoff = int(cutoff)
     border = np.ones(mask.shape, dtype=np.float32)
@@ -241,268 +206,6 @@ class WRFeatures:
         )
-# augmentations
-class WRRandomCrop:
-    """windowed region random crop augmentation."""
-    def __init__(
-        self,
-        crop_size: Optional[Union[int, Tuple[int]]] = None,
-        ndim: int = 2,
-    ) -> None:
-        """crop_size: tuple of int
-        can be tuple of length 1 (all dimensions)
-                     of length ndim (y,x,...)
-                     of length 2*ndim (y1,y2, x1,x2, ...).
-        """
-        if isinstance(crop_size, int):
-            crop_size = (crop_size,) * 2 * ndim
-        elif isinstance(crop_size, Iterable):
-            pass
-        else:
-            raise ValueError(f"{crop_size} has to be int or tuple of int")
-        if len(crop_size) == 1:
-            crop_size = (crop_size[0],) * 2 * ndim
-        elif len(crop_size) == ndim:
-            crop_size = tuple(itertools.chain(*tuple((c, c) for c in crop_size)))
-        elif len(crop_size) == 2 * ndim:
-            pass
-        else:
-            raise ValueError(f"crop_size has to be of length 1, {ndim}, or {2 * ndim}")
-        crop_size = np.array(crop_size)
-        self._ndim = ndim
-        self._crop_bounds = crop_size[::2], crop_size[1::2]
-        self._rng = np.random.RandomState()
-    def __call__(self, features: WRFeatures):
-        crop_size = self._rng.randint(self._crop_bounds[0], self._crop_bounds[1] + 1)
-        points = features.coords
-        if len(points) == 0:
-            print("No points given, cannot ensure inside points")
-            return features
-        # sample point and corner relative to it
-        _idx = np.random.randint(len(points))
-        corner = (
-            points[_idx]
-            - crop_size
-            + 1
-            + self._rng.randint(crop_size // 4, 3 * crop_size // 4)
-        )
-        idx = _filter_points(points, shape=crop_size, origin=corner)
-        return (
-            WRFeatures(
-                coords=points[idx],
-                labels=features.labels[idx],
-                timepoints=features.timepoints[idx],
-                features=OrderedDict((k, v[idx]) for k, v in features.features.items()),
-            ),
-            idx,
-        )
-class WRBaseAugmentation:
-    def __init__(self, p: float = 0.5) -> None:
-        self._p = p
-        self._rng = np.random.RandomState()
-    def __call__(self, features: WRFeatures):
-        if self._rng.rand() > self._p or len(features) == 0:
-            return features
-        return self._augment(features)
-    def _augment(self, features: WRFeatures):
-        raise NotImplementedError()
-class WRRandomFlip(WRBaseAugmentation):
-    def _augment(self, features: WRFeatures):
-        ndim = features.ndim
-        flip = self._rng.randint(0, 2, features.ndim)
-        points = features.coords.copy()
-        for i, f in enumerate(flip):
-            if f == 1:
-                points[:, ndim - i - 1] *= -1
-        return WRFeatures(
-            coords=points,
-            labels=features.labels,
-            timepoints=features.timepoints,
-            features=features.features,
-        )
-def _scale_matrix(sz: float, sy: float, sx: float):
-    return np.diag([sz, sy, sx])
-# def _scale_matrix(sy: float, sx: float):
-#     return np.array([[1, 0, 0], [0, sy, 0], [0, 0, sx]])
-def _shear_matrix(shy: float, shx: float):
-    return np.array([[1, 0, 0], [0, 1 + shx * shy, shy], [0, shx, 1]])
-def _rotation_matrix(theta: float):
-    return np.array([
-        [1, 0, 0],
-        [0, np.cos(theta), -np.sin(theta)],
-        [0, np.sin(theta), np.cos(theta)],
-    ])
-def _transform_affine(k: str, v: np.ndarray, M: np.ndarray):
-    ndim = len(M)
-    if k == "area":
-        v = np.linalg.det(M) * v
-    elif k == "equivalent_diameter_area":
-        v = np.linalg.det(M) ** (1 / len(M)) * v
-    elif k == "inertia_tensor":
-        # v' = M * v  * M^T
-        v = v.reshape(-1, ndim, ndim)
-        # v * M^T
-        v = np.einsum("ijk, mk -> ijm", v, M)
-        # M * v
-        v = np.einsum("ij, kjm -> kim", M, v)
-        v = v.reshape(-1, ndim * ndim)
-    elif k in (
-        "intensity_mean",
-        "intensity_std",
-        "intensity_max",
-        "intensity_min",
-        "border_dist",
-    ):
-        pass
-    else:
-        raise ValueError(f"Don't know how to affinely transform {k}")
-    return v
-class WRRandomAffine(WRBaseAugmentation):
-    def __init__(
-        self,
-        degrees: float = 10,
-        scale: float = (0.9, 1.1),
-        shear: float = (0.1, 0.1),
-        p: float = 0.5,
-    ):
-        super().__init__(p)
-        self.degrees = degrees if degrees is not None else 0
-        self.scale = scale if scale is not None else (1, 1)
-        self.shear = shear if shear is not None else (0, 0)
-    def _augment(self, features: WRFeatures):
-        degrees = self._rng.uniform(-self.degrees, self.degrees) / 180 * np.pi
-        scale = self._rng.uniform(*self.scale, 3)
-        shy = self._rng.uniform(-self.shear[0], self.shear[0])
-        shx = self._rng.uniform(-self.shear[1], self.shear[1])
-        self._M = (
-            _rotation_matrix(degrees) @ _scale_matrix(*scale) @ _shear_matrix(shy, shx)
-        )
-        # M is by default 3D , we need to remove the last dimension for 2D
-        self._M = self._M[-features.ndim :, -features.ndim :]
-        points = features.coords @ self._M.T
-        feats = OrderedDict(
-            (k, _transform_affine(k, v, self._M)) for k, v in features.features.items()
-        )
-        return WRFeatures(
-            coords=points,
-            labels=features.labels,
-            timepoints=features.timepoints,
-            features=feats,
-        )
-class WRRandomBrightness(WRBaseAugmentation):
-    def __init__(
-        self,
-        scale: Tuple[float] = (0.5, 2.0),
-        shift: Tuple[float] = (-0.1, 0.1),
-        p: float = 0.5,
-    ):
-        super().__init__(p)
-        self.scale = scale
-        self.shift = shift
-    def _augment(self, features: WRFeatures):
-        scale = self._rng.uniform(*self.scale)
-        shift = self._rng.uniform(*self.shift)
-        key_vals = []
-        for k, v in features.features.items():
-            if "intensity" in k:
-                v = v * scale + shift
-            key_vals.append((k, v))
-        feats = OrderedDict(key_vals)
-        return WRFeatures(
-            coords=features.coords,
-            labels=features.labels,
-            timepoints=features.timepoints,
-            features=feats,
-        )
-class WRRandomOffset(WRBaseAugmentation):
-    def __init__(self, offset: float = (-3, 3), p: float = 0.5):
-        super().__init__(p)
-        self.offset = offset
-    def _augment(self, features: WRFeatures):
-        offset = self._rng.uniform(*self.offset, features.coords.shape)
-        coords = features.coords + offset
-        return WRFeatures(
-            coords=coords,
-            labels=features.labels,
-            timepoints=features.timepoints,
-            features=features.features,
-        )
-class WRRandomMovement(WRBaseAugmentation):
-    """random global linear shift."""
-    def __init__(self, offset: float = (-10, 10), p: float = 0.5):
-        super().__init__(p)
-        self.offset = offset
-    def _augment(self, features: WRFeatures):
-        base_offset = self._rng.uniform(*self.offset, features.coords.shape[-1])
-        tmin = features.timepoints.min()
-        offset = (features.timepoints[:, None] - tmin) * base_offset[None]
-        coords = features.coords + offset
-        return WRFeatures(
-            coords=coords,
-            labels=features.labels,
-            timepoints=features.timepoints,
-            features=features.features,
-        )
-class WRAugmentationPipeline:
-    def __init__(self, augmentations: Sequence[WRBaseAugmentation]):
-        self.augmentations = augmentations
-    def __call__(self, feats: WRFeatures):
-        for aug in self.augmentations:
-            feats = aug(feats)
-        return feats
 def get_features(
     detections: np.ndarray,
     imgs: Optional[np.ndarray] = None,
@@ -561,36 +264,6 @@ def _check_dimensions(x: np.ndarray, ndim: int):
     return x
-def build_windows(
-    features: List[WRFeatures], window_size: int, progbar_class=tqdm
-) -> List[dict]:
-    windows = []
-    for t1, t2 in progbar_class(
-        zip(range(0, len(features)), range(window_size, len(features) + 1)),
-        total=len(features) - window_size + 1,
-        desc="Building windows",
-    ):
-        feat = WRFeatures.concat(features[t1:t2])
-        labels = feat.labels
-        timepoints = feat.timepoints
-        coords = feat.coords
-        if len(feat) == 0:
-            coords = np.zeros((0, feat.ndim), dtype=int)
-        w = dict(
-            coords=coords,
-            t1=t1,
-            labels=labels,
-            timepoints=timepoints,
-            features=feat.features_stacked,
-        )
-        windows.append(w)
-    logger.debug(f"Built {len(windows)} track windows.\n")
-    return windows
 def build_windows_sd(
     features: List[WRFeatures], imgs_enc, imgs_stable, boxes, imgs, masks, window_size: int, progbar_class=tqdm
 ) -> List[dict]:
@@ -632,24 +305,3 @@ def build_windows_sd(
     logger.debug(f"Built {len(windows)} track windows.\n")
     return windows
-if __name__ == "__main__":
-    imgs = load_tiff_timeseries(
-        # "/scratch0/data/celltracking/ctc_2024/Fluo-C3DL-MDA231/train/01",
-        "/scratch0/data/celltracking/ctc_2024/Fluo-N2DL-HeLa/train/01",
-    )
-    masks = load_tiff_timeseries(
-        # "/scratch0/data/celltracking/ctc_2024/Fluo-C3DL-MDA231/train/01_GT/TRA",
-        "/scratch0/data/celltracking/ctc_2024/Fluo-N2DL-HeLa/train/01_GT/TRA",
-        dtype=int,
-    )
-    features = get_features(detections=masks, imgs=imgs, ndim=3)
-    windows = build_windows(features, window_size=4)
-# if __name__ == "__main__":
-#     y = np.zeros((1, 100, 100), np.uint8)
-#     y[:, 20:40, 20:60] = 1
-#     x = y + np.random.normal(0, 0.1, y.shape)
-#     f = WRFeatures.from_mask_img(y, x, properties=("intensity_mean", "area"))

 from tqdm import tqdm
 from typing import Tuple, Optional, Sequence, Union, List
 import typing
 import torch
 logger = logging.getLogger(__name__)
 }
 def _border_dist_fast(mask: np.ndarray, cutoff: float = 5):
     cutoff = int(cutoff)
     border = np.ones(mask.shape, dtype=np.float32)
         )
 def get_features(
     detections: np.ndarray,
     imgs: Optional[np.ndarray] = None,
     return x
 def build_windows_sd(
     features: List[WRFeatures], imgs_enc, imgs_stable, boxes, imgs, masks, window_size: int, progbar_class=tqdm
 ) -> List[dict]:
     logger.debug(f"Built {len(windows)} track windows.\n")
     return windows

models/tra_post_model/{trackastra/model/model.py → model.py} RENAMED Viewed

@@ -1,32 +1,300 @@
 """Transformer class."""
 import logging
 from collections import OrderedDict
 from pathlib import Path
-from typing import Literal
 import torch
-# from torch_geometric.nn import GATv2Conv
 import yaml
 from torch import nn
 import sys, os
-sys.path.append(os.path.join(os.getcwd(), "External_Repos", "trackastra"))
-# NoPositionalEncoding,
-from ..utils import blockwise_causal_norm
-from .model_parts import (
-    FeedForward,
-    PositionalEncoding,
-    RelativePositionalAttention,
-)
-# from memory_profiler import profile
 logger = logging.getLogger(__name__)
 class EncoderLayer(nn.Module):
     def __init__(
         self,

 """Transformer class."""
 import logging
+import math
 from collections import OrderedDict
 from pathlib import Path
+from typing import Literal, Tuple
 import torch
+import torch.nn.functional as F
 import yaml
 from torch import nn
 import sys, os
+from .utils import blockwise_causal_norm
 logger = logging.getLogger(__name__)
+def _pos_embed_fourier1d_init(
+    cutoff: float = 256, n: int = 32, cutoff_start: float = 1
+):
+    return (
+        torch.exp(torch.linspace(-math.log(cutoff_start), -math.log(cutoff), n))
+        .unsqueeze(0)
+        .unsqueeze(0)
+    )
+def _rope_pos_embed_fourier1d_init(cutoff: float = 128, n: int = 32):
+    # Maximum initial frequency is 1
+    return torch.exp(torch.linspace(0, -math.log(cutoff), n)).unsqueeze(0).unsqueeze(0)
+def _rotate_half(x: torch.Tensor) -> torch.Tensor:
+    """Rotate pairs of scalars as 2d vectors by pi/2."""
+    x = x.unflatten(-1, (-1, 2))
+    x1, x2 = x.unbind(dim=-1)
+    return torch.stack((-x2, x1), dim=-1).flatten(start_dim=-2)
+class RotaryPositionalEncoding(nn.Module):
+    def __init__(self, cutoffs: Tuple[float] = (256,), n_pos: Tuple[int] = (32,)):
+        super().__init__()
+        assert len(cutoffs) == len(n_pos)
+        if not all(n % 2 == 0 for n in n_pos):
+            raise ValueError("n_pos must be even")
+        self._n_dim = len(cutoffs)
+        self.freqs = nn.ParameterList([
+            nn.Parameter(_rope_pos_embed_fourier1d_init(cutoff, n // 2))
+            for cutoff, n in zip(cutoffs, n_pos)
+        ])
+    def get_co_si(self, coords: torch.Tensor):
+        _B, _N, D = coords.shape
+        assert D == len(self.freqs)
+        co = torch.cat(
+            tuple(
+                torch.cos(0.5 * math.pi * x.unsqueeze(-1) * freq) / math.sqrt(len(freq))
+                for x, freq in zip(coords.moveaxis(-1, 0), self.freqs)
+            ),
+            axis=-1,
+        )
+        si = torch.cat(
+            tuple(
+                torch.sin(0.5 * math.pi * x.unsqueeze(-1) * freq) / math.sqrt(len(freq))
+                for x, freq in zip(coords.moveaxis(-1, 0), self.freqs)
+            ),
+            axis=-1,
+        )
+        return co, si
+    def forward(self, q: torch.Tensor, k: torch.Tensor, coords: torch.Tensor):
+        _B, _N, D = coords.shape
+        _B, _H, _N, _C = q.shape
+        if D != self._n_dim:
+            raise ValueError(f"coords must have {self._n_dim} dimensions, got {D}")
+        co, si = self.get_co_si(coords)
+        co = co.unsqueeze(1).repeat_interleave(2, dim=-1)
+        si = si.unsqueeze(1).repeat_interleave(2, dim=-1)
+        q2 = q * co + _rotate_half(q) * si
+        k2 = k * co + _rotate_half(k) * si
+        return q2, k2
+class FeedForward(nn.Module):
+    def __init__(self, d_model, expand: float = 2, bias: bool = True):
+        super().__init__()
+        self.fc1 = nn.Linear(d_model, int(d_model * expand))
+        self.fc2 = nn.Linear(int(d_model * expand), d_model, bias=bias)
+        self.act = nn.GELU()
+    def forward(self, x):
+        return self.fc2(self.act(self.fc1(x)))
+class PositionalEncoding(nn.Module):
+    def __init__(
+        self,
+        cutoffs: Tuple[float] = (256,),
+        n_pos: Tuple[int] = (32,),
+        cutoffs_start=None,
+    ):
+        super().__init__()
+        if cutoffs_start is None:
+            cutoffs_start = (1,) * len(cutoffs)
+        assert len(cutoffs) == len(n_pos)
+        self.freqs = nn.ParameterList([
+            nn.Parameter(_pos_embed_fourier1d_init(cutoff, n // 2))
+            for cutoff, n, cutoff_start in zip(cutoffs, n_pos, cutoffs_start)
+        ])
+    def forward(self, coords: torch.Tensor):
+        _B, _N, D = coords.shape
+        assert D == len(self.freqs)
+        embed = torch.cat(
+            tuple(
+                torch.cat(
+                    (
+                        torch.sin(0.5 * math.pi * x.unsqueeze(-1) * freq),
+                        torch.cos(0.5 * math.pi * x.unsqueeze(-1) * freq),
+                    ),
+                    axis=-1,
+                )
+                / math.sqrt(len(freq))
+                for x, freq in zip(coords.moveaxis(-1, 0), self.freqs)
+            ),
+            axis=-1,
+        )
+        return embed
+def _bin_init_exp(cutoff: float, n: int):
+    return torch.exp(torch.linspace(0, math.log(cutoff + 1), n))
+def _bin_init_linear(cutoff: float, n: int):
+    return torch.linspace(-cutoff, cutoff, n)
+class RelativePositionalBias(nn.Module):
+    def __init__(
+        self,
+        n_head: int,
+        cutoff_spatial: float,
+        cutoff_temporal: float,
+        n_spatial: int = 32,
+        n_temporal: int = 16,
+    ):
+        super().__init__()
+        self._spatial_bins = _bin_init_exp(cutoff_spatial, n_spatial)
+        self._temporal_bins = _bin_init_linear(cutoff_temporal, 2 * n_temporal + 1)
+        self.register_buffer("spatial_bins", self._spatial_bins)
+        self.register_buffer("temporal_bins", self._temporal_bins)
+        self.n_spatial = n_spatial
+        self.n_head = n_head
+        self.bias = nn.Parameter(
+            -0.5 + torch.rand((2 * n_temporal + 1) * n_spatial, n_head)
+        )
+    def forward(self, coords: torch.Tensor):
+        _B, _N, _D = coords.shape
+        t = coords[..., 0]
+        yx = coords[..., 1:]
+        temporal_dist = t.unsqueeze(-1) - t.unsqueeze(-2)
+        spatial_dist = torch.cdist(yx, yx)
+        spatial_idx = torch.bucketize(spatial_dist, self.spatial_bins)
+        torch.clamp_(spatial_idx, max=len(self.spatial_bins) - 1)
+        temporal_idx = torch.bucketize(temporal_dist, self.temporal_bins)
+        torch.clamp_(temporal_idx, max=len(self.temporal_bins) - 1)
+        idx = spatial_idx.flatten() + temporal_idx.flatten() * self.n_spatial
+        bias = self.bias.index_select(0, idx).view((*spatial_idx.shape, self.n_head))
+        bias = bias.transpose(-1, 1)
+        return bias
+class RelativePositionalAttention(nn.Module):
+    def __init__(
+        self,
+        coord_dim: int,
+        embed_dim: int,
+        n_head: int,
+        cutoff_spatial: float = 256,
+        cutoff_temporal: float = 16,
+        n_spatial: int = 32,
+        n_temporal: int = 16,
+        dropout: float = 0.0,
+        mode: Literal["bias", "rope", "none"] = "bias",
+        attn_dist_mode: str = "v0",
+    ):
+        super().__init__()
+        if not embed_dim % (2 * n_head) == 0:
+            raise ValueError(
+                f"embed_dim {embed_dim} must be divisible by 2 times n_head {2 * n_head}"
+            )
+        self.q_pro = nn.Linear(embed_dim, embed_dim, bias=True)
+        self.k_pro = nn.Linear(embed_dim, embed_dim, bias=True)
+        self.v_pro = nn.Linear(embed_dim, embed_dim, bias=True)
+        self.proj = nn.Linear(embed_dim, embed_dim)
+        self.dropout = dropout
+        self.n_head = n_head
+        self.embed_dim = embed_dim
+        self.cutoff_spatial = cutoff_spatial
+        self.attn_dist_mode = attn_dist_mode
+        if mode == "bias" or mode is True:
+            self.pos_bias = RelativePositionalBias(
+                n_head=n_head,
+                cutoff_spatial=cutoff_spatial,
+                cutoff_temporal=cutoff_temporal,
+                n_spatial=n_spatial,
+                n_temporal=n_temporal,
+            )
+        elif mode == "rope":
+            n_split = 2 * (embed_dim // (2 * (coord_dim + 1) * n_head))
+            self.rot_pos_enc = RotaryPositionalEncoding(
+                cutoffs=((cutoff_temporal,) + (cutoff_spatial,) * coord_dim),
+                n_pos=(embed_dim // n_head - coord_dim * n_split,)
+                + (n_split,) * coord_dim,
+            )
+        elif mode == "none":
+            pass
+        elif mode is None or mode is False:
+            logger.warning(
+                "attn_positional_bias is not set (None or False), no positional bias."
+            )
+        else:
+            raise ValueError(f"Unknown mode {mode}")
+        self._mode = mode
+    def forward(
+        self,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        coords: torch.Tensor,
+        padding_mask: torch.Tensor = None,
+    ):
+        B, N, D = query.size()
+        q = self.q_pro(query)
+        k = self.k_pro(key)
+        v = self.v_pro(value)
+        k = k.view(B, N, self.n_head, D // self.n_head).transpose(1, 2)
+        q = q.view(B, N, self.n_head, D // self.n_head).transpose(1, 2)
+        v = v.view(B, N, self.n_head, D // self.n_head).transpose(1, 2)
+        attn_mask = torch.zeros(
+            (B, self.n_head, N, N), device=query.device, dtype=q.dtype
+        )
+        attn_ignore_val = -1e3
+        yx = coords[..., 1:]
+        spatial_dist = torch.cdist(yx, yx)
+        spatial_mask = (spatial_dist > self.cutoff_spatial).unsqueeze(1)
+        attn_mask.masked_fill_(spatial_mask, attn_ignore_val)
+        if coords is not None:
+            if self._mode == "bias":
+                attn_mask = attn_mask + self.pos_bias(coords)
+            elif self._mode == "rope":
+                q, k = self.rot_pos_enc(q, k, coords)
+            if self.attn_dist_mode == "v0":
+                dist = torch.cdist(coords, coords, p=2)
+                attn_mask += torch.exp(-0.1 * dist.unsqueeze(1))
+            elif self.attn_dist_mode == "v1":
+                attn_mask += torch.exp(
+                    -5 * spatial_dist.unsqueeze(1) / self.cutoff_spatial
+                )
+            else:
+                raise ValueError(f"Unknown attn_dist_mode {self.attn_dist_mode}")
+        if padding_mask is not None:
+            ignore_mask = torch.logical_or(
+                padding_mask.unsqueeze(1), padding_mask.unsqueeze(2)
+            ).unsqueeze(1)
+            attn_mask.masked_fill_(ignore_mask, attn_ignore_val)
+        y = F.scaled_dot_product_attention(
+            q, k, v, attn_mask=attn_mask, dropout_p=self.dropout if self.training else 0
+        )
+        y = y.transpose(1, 2).contiguous().view(B, N, D)
+        y = self.proj(y)
+        return y
 class EncoderLayer(nn.Module):
     def __init__(
         self,

models/tra_post_model/trackastra/data/__init__.py DELETED Viewed

@@ -1,18 +0,0 @@
-# ruff: noqa: F401
-from .augmentations import AugmentationPipeline, RandomCrop
-from .data import (
-    CTCData,
-    _ctc_lineages,
-    # load_ctc_data_from_subfolders,
-    collate_sequence_padding,
-    extract_features_regionprops,
-)
-from .distributed import (
-    BalancedBatchSampler,
-    # BalancedDataModule,
-    BalancedDistributedSampler,
-)
-from .example_data import example_data_bacteria, example_data_fluo_3d, example_data_hela, data_hela
-from .utils import filter_track_df, load_tiff_timeseries, load_tracklet_links
-from .wrfeat import WRFeatures, build_windows, get_features, build_windows_sd

models/tra_post_model/trackastra/data/_check_ctc.py DELETED Viewed

@@ -1,114 +0,0 @@
-import logging
-import numpy as np
-import pandas as pd
-from skimage.measure import label, regionprops_table
-logger = logging.getLogger(__name__)
-# from https://github.com/Janelia-Trackathon-2023/traccuracy/blob/main/src/traccuracy/loaders/_ctc.py
-def _check_ctc(tracks: pd.DataFrame, detections: pd.DataFrame, masks: np.ndarray):
-    """Sanity checks for valid CTC format.
-    Hard checks (throws exception):
-    - Tracklet IDs in tracks file must be unique and positive
-    - Parent tracklet IDs must exist in the tracks file
-    - Intertracklet edges must be directed forward in time.
-    - In each time point, the set of segmentation IDs present in the detections must equal the set
-    of tracklet IDs in the tracks file that overlap this time point.
-    Soft checks (prints warning):
-    - No duplicate tracklet IDs (non-connected pixels with same ID) in a single timepoint.
-    Args:
-        tracks (pd.DataFrame): Tracks in CTC format with columns Cell_ID, Start, End, Parent_ID.
-        detections (pd.DataFrame): Detections extracted from masks, containing columns
-            segmentation_id, t.
-        masks (np.ndarray): Set of masks with time in the first axis.
-    Raises:
-        ValueError: If any of the hard checks fail.
-    """
-    logger.debug("Running CTC format checks")
-    tracks = tracks.copy()
-    tracks.columns = ["Cell_ID", "Start", "End", "Parent_ID"]
-    if tracks["Cell_ID"].min() < 1:
-        raise ValueError("Cell_IDs in tracks file must be positive integers.")
-    if len(tracks["Cell_ID"]) < len(tracks["Cell_ID"].unique()):
-        raise ValueError("Cell_IDs in tracks file must be unique integers.")
-    for _, row in tracks.iterrows():
-        if row["Parent_ID"] != 0:
-            if row["Parent_ID"] not in tracks["Cell_ID"].values:
-                raise ValueError(
-                    f"Parent_ID {row['Parent_ID']} is not present in tracks."
-                )
-            parent_end = tracks[tracks["Cell_ID"] == row["Parent_ID"]]["End"].iloc[0]
-            if parent_end >= row["Start"]:
-                raise ValueError(
-                    f"Invalid tracklet connection: Daughter tracklet with ID {row['Cell_ID']} "
-                    f"starts at t={row['Start']}, "
-                    f"but parent tracklet with ID {row['Parent_ID']} only ends at t={parent_end}."
-                )
-    for t in range(tracks["Start"].min(), tracks["End"].max()):
-        track_ids = set(
-            tracks[(tracks["Start"] <= t) & (tracks["End"] >= t)]["Cell_ID"]
-        )
-        det_ids = set(detections[(detections["t"] == t)]["segmentation_id"])
-        if not track_ids.issubset(det_ids):
-            raise ValueError(f"Missing IDs in masks at t={t}: {track_ids - det_ids}")
-        if not det_ids.issubset(track_ids):
-            raise ValueError(
-                f"IDs {det_ids - track_ids} at t={t} not represented in tracks file."
-            )
-    for t, frame in enumerate(masks):
-        _, n_components = label(frame, return_num=True)
-        n_labels = len(detections[detections["t"] == t])
-        if n_labels < n_components:
-            logger.warning(f"{n_components - n_labels} non-connected masks at t={t}.")
-def _get_node_attributes(masks):
-    """Calculates x,y,z,t,label for each detection in a movie.
-    Args:
-        masks (np.ndarray): Set of masks with time in the first axis
-    Returns:
-        pd.DataFrame: Dataframe with one detection per row. Columns
-            segmentation_id, x, y, z, t
-    """
-    data_df = pd.concat([
-        _detections_from_image(masks, idx) for idx in range(masks.shape[0])
-    ]).reset_index(drop=True)
-    data_df = data_df.rename(
-        columns={
-            "label": "segmentation_id",
-            "centroid-2": "z",
-            "centroid-1": "y",
-            "centroid-0": "x",
-        }
-    )
-    data_df["segmentation_id"] = data_df["segmentation_id"].astype(int)
-    data_df["t"] = data_df["t"].astype(int)
-    return data_df
-def _detections_from_image(stack, idx):
-    """Return the unique track label, centroid and time for each track vertex.
-    Args:
-        stack (np.ndarray): Stack of masks
-        idx (int): Index of the image to calculate the centroids and track labels
-    Returns:
-        pd.DataFrame: The dataframe of track data for one time step (specified by idx)
-    """
-    props = regionprops_table(
-        np.asarray(stack[idx, ...]), properties=("label", "centroid")
-    )
-    props["t"] = np.full(props["label"].shape, idx)
-    return pd.DataFrame(props)

models/tra_post_model/trackastra/data/_compute_overlap.py DELETED Viewed

@@ -1,209 +0,0 @@
-"""Adapted from Fast R-CNN
-Written by Sergey Karayev
-Licensed under The MIT License
-Copyright (c) 2015 Microsoft.
-"""
-import numpy as np
-from skimage.measure import regionprops
-def _union_slice(a: tuple[slice], b: tuple[slice]):
-    """Returns the union of slice tuples a and b."""
-    starts = tuple(min(_a.start, _b.start) for _a, _b in zip(a, b))
-    stops = tuple(max(_a.stop, _b.stop) for _a, _b in zip(a, b))
-    return tuple(slice(start, stop) for start, stop in zip(starts, stops))
-def get_labels_with_overlap(gt_frame, res_frame):
-    """Get all labels IDs in gt_frame and res_frame whose bounding boxes
-    overlap.
-    Args:
-        gt_frame (np.ndarray): ground truth segmentation for a single frame
-        res_frame (np.ndarray): result segmentation for a given frame
-    Returns:
-        overlapping_gt_labels: List[int], labels of gt boxes that overlap with res boxes
-        overlapping_res_labels: List[int], labels of res boxes that overlap with gt boxes
-        intersections_over_gt: List[float], list of (intersection gt vs res) / (gt area)
-    """
-    gt_frame = gt_frame.astype(np.uint16, copy=False)
-    res_frame = res_frame.astype(np.uint16, copy=False)
-    gt_props = regionprops(gt_frame)
-    gt_boxes = [np.array(gt_prop.bbox) for gt_prop in gt_props]
-    gt_boxes = np.array(gt_boxes).astype(np.float64)
-    gt_box_labels = np.asarray(
-        [int(gt_prop.label) for gt_prop in gt_props], dtype=np.uint16
-    )
-    res_props = regionprops(res_frame)
-    res_boxes = [np.array(res_prop.bbox) for res_prop in res_props]
-    res_boxes = np.array(res_boxes).astype(np.float64)
-    res_box_labels = np.asarray(
-        [int(res_prop.label) for res_prop in res_props], dtype=np.uint16
-    )
-    if len(gt_props) == 0 or len(res_props) == 0:
-        return [], [], []
-    if gt_frame.ndim == 3:
-        overlaps = compute_overlap_3D(gt_boxes, res_boxes)
-    else:
-        overlaps = compute_overlap(
-            gt_boxes, res_boxes
-        )  # has the form [gt_bbox, res_bbox]
-    # Find the bboxes that have overlap at all (ind_ corresponds to box number - starting at 0)
-    ind_gt, ind_res = np.nonzero(overlaps)
-    ind_gt = np.asarray(ind_gt, dtype=np.uint16)
-    ind_res = np.asarray(ind_res, dtype=np.uint16)
-    overlapping_gt_labels = gt_box_labels[ind_gt]
-    overlapping_res_labels = res_box_labels[ind_res]
-    intersections_over_gt = []
-    for i, j in zip(ind_gt, ind_res):
-        sslice = _union_slice(gt_props[i].slice, res_props[j].slice)
-        gt_mask = gt_frame[sslice] == gt_box_labels[i]
-        res_mask = res_frame[sslice] == res_box_labels[j]
-        area_inter = np.count_nonzero(np.logical_and(gt_mask, res_mask))
-        area_gt = np.count_nonzero(gt_mask)
-        intersections_over_gt.append(area_inter / area_gt)
-    return overlapping_gt_labels, overlapping_res_labels, intersections_over_gt
-def compute_overlap(boxes: np.ndarray, query_boxes: np.ndarray) -> np.ndarray:
-    """Args:
-        a: (N, 4) ndarray of float
-        b: (K, 4) ndarray of float.
-    Returns:
-        overlaps: (N, K) ndarray of overlap between boxes and query_boxes
-    """
-    N = boxes.shape[0]
-    K = query_boxes.shape[0]
-    overlaps = np.zeros((N, K), dtype=np.float64)
-    for k in range(K):
-        box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (
-            query_boxes[k, 3] - query_boxes[k, 1] + 1
-        )
-        for n in range(N):
-            iw = (
-                min(boxes[n, 2], query_boxes[k, 2])
-                - max(boxes[n, 0], query_boxes[k, 0])
-                + 1
-            )
-            if iw > 0:
-                ih = (
-                    min(boxes[n, 3], query_boxes[k, 3])
-                    - max(boxes[n, 1], query_boxes[k, 1])
-                    + 1
-                )
-                if ih > 0:
-                    ua = np.float64(
-                        (boxes[n, 2] - boxes[n, 0] + 1)
-                        * (boxes[n, 3] - boxes[n, 1] + 1)
-                        + box_area
-                        - iw * ih
-                    )
-                    overlaps[n, k] = iw * ih / ua
-    return overlaps
-def compute_overlap_3D(boxes: np.ndarray, query_boxes: np.ndarray) -> np.ndarray:
-    """Args:
-        a: (N, 6) ndarray of float
-        b: (K, 6) ndarray of float.
-    Returns:
-        overlaps: (N, K) ndarray of overlap between boxes and query_boxes
-    """
-    N = boxes.shape[0]
-    K = query_boxes.shape[0]
-    overlaps = np.zeros((N, K), dtype=np.float64)
-    for k in range(K):
-        box_volume = (
-            (query_boxes[k, 3] - query_boxes[k, 0] + 1)
-            * (query_boxes[k, 4] - query_boxes[k, 1] + 1)
-            * (query_boxes[k, 5] - query_boxes[k, 2] + 1)
-        )
-        for n in range(N):
-            id_ = (
-                min(boxes[n, 3], query_boxes[k, 3])
-                - max(boxes[n, 0], query_boxes[k, 0])
-                + 1
-            )
-            if id_ > 0:
-                iw = (
-                    min(boxes[n, 4], query_boxes[k, 4])
-                    - max(boxes[n, 1], query_boxes[k, 1])
-                    + 1
-                )
-                if iw > 0:
-                    ih = (
-                        min(boxes[n, 5], query_boxes[k, 5])
-                        - max(boxes[n, 2], query_boxes[k, 2])
-                        + 1
-                    )
-                    if ih > 0:
-                        ua = np.float64(
-                            (boxes[n, 3] - boxes[n, 0] + 1)
-                            * (boxes[n, 4] - boxes[n, 1] + 1)
-                            * (boxes[n, 5] - boxes[n, 2] + 1)
-                            + box_volume
-                            - iw * ih * id_
-                        )
-                        overlaps[n, k] = iw * ih * id_ / ua
-    return overlaps
-try:
-    import numba
-except ImportError:
-    import os
-    import warnings
-    if not os.getenv("NO_JIT_WARNING", False):
-        warnings.warn(
-            "Numba not installed, falling back to slower numpy implementation. "
-            "Install numba for a significant speedup.  Set the environment "
-            "variable NO_JIT_WARNING=1 to disable this warning.",
-            stacklevel=2,
-        )
-else:
-    # compute_overlap 2d and 3d have the same signature
-    signature = [
-        "f8[:,::1](f8[:,::1], f8[:,::1])",
-        numba.types.Array(numba.float64, 2, "C", readonly=True)(
-            numba.types.Array(numba.float64, 2, "C", readonly=True),
-            numba.types.Array(numba.float64, 2, "C", readonly=True),
-        ),
-    ]
-    # variables that appear in the body of each function
-    common_locals = {
-        "N": numba.uint64,
-        "K": numba.uint64,
-        "overlaps": numba.types.Array(numba.float64, 2, "C"),
-        "iw": numba.float64,
-        "ih": numba.float64,
-        "ua": numba.float64,
-        "n": numba.uint64,
-        "k": numba.uint64,
-    }
-    compute_overlap = numba.njit(
-        signature,
-        locals={**common_locals, "box_area": numba.float64},
-        fastmath=True,
-        nogil=True,
-        boundscheck=False,
-    )(compute_overlap)
-    compute_overlap_3D = numba.njit(
-        signature,
-        locals={**common_locals, "id_": numba.float64, "box_volume": numba.float64},
-        fastmath=True,
-        nogil=True,
-        boundscheck=False,
-    )(compute_overlap_3D)

models/tra_post_model/trackastra/data/augmentations.py DELETED Viewed

@@ -1,557 +0,0 @@
-"""#TODO: dont convert to numpy and back to torch."""
-from collections.abc import Iterable, Sequence
-from itertools import chain
-from typing import Any
-import kornia.augmentation as K
-import numpy as np
-import torch
-from kornia.augmentation import random_generator as rg
-from kornia.augmentation.utils import _range_bound
-from kornia.constants import DataKey, Resample
-from typing import Optional, Tuple, Sequence, Dict, Union
-def default_augmenter(coords: np.ndarray):
-    # TODO parametrize magnitude of different augmentations
-    ndim = coords.shape[1]
-    assert coords.ndim == 2 and ndim in (2, 3)
-    # first remove offset
-    center = coords.mean(axis=0, keepdims=True)
-    coords = coords - center
-    # apply random flip
-    coords *= 2 * np.random.randint(0, 2, (1, ndim)) - 1
-    # apply rotation along the last two dimensions
-    phi = np.random.uniform(0, 2 * np.pi)
-    coords = _rotate(coords, phi, center=None)
-    if ndim == 3:
-        # rotate along the first two dimensions too
-        phi2, phi3 = np.random.uniform(0, 2 * np.pi, 2)
-        coords = _rotate(coords, phi2, rot_axis=(0, 1), center=None)
-        coords = _rotate(coords, phi3, rot_axis=(0, 2), center=None)
-    coords += center
-    # translation
-    trans = 128 * np.random.uniform(-1, 1, (1, ndim))
-    coords += trans
-    # elastic
-    coords += 1.5 * np.random.normal(0, 1, coords.shape)
-    return coords
-def _rotate(
-    coords: np.ndarray, phi: float, rot_axis=(-2, -1), center: Optional[Tuple] = None
-):
-    """Rotation along the last two dimensions of coords[..,:-2:]."""
-    ndim = coords.shape[1]
-    assert coords.ndim == 2 and ndim in (2, 3)
-    if center is None:
-        center = (0,) * ndim
-    assert len(center) == ndim
-    center = np.asarray(center)
-    co, si = np.cos(phi), np.sin(phi)
-    Rot = np.eye(ndim)
-    Rot[np.ix_(rot_axis, rot_axis)] = np.array(((co, -si), (si, co)))
-    x = coords - center
-    x = x @ Rot.T
-    x += center
-    return x
-def _filter_points(
-    points: np.ndarray, shape: tuple, origin: Optional[Tuple] = None
-) -> np.ndarray:
-    """Returns indices of points that are inside the shape extent and given origin."""
-    ndim = points.shape[-1]
-    if origin is None:
-        origin = (0,) * ndim
-    idx = tuple(
-        np.logical_and(points[:, i] >= origin[i], points[:, i] < origin[i] + shape[i])
-        for i in range(ndim)
-    )
-    idx = np.where(np.all(idx, axis=0))[0]
-    return idx
-class ConcatAffine(K.RandomAffine):
-    """Concatenate multiple affine transformations without intermediates."""
-    def __init__(self, affines: Sequence[K.RandomAffine]):
-        super().__init__(degrees=0)
-        self._affines = affines
-        if not all([a.same_on_batch for a in affines]):
-            raise ValueError("all affines must have same_on_batch=True")
-    def merge_params(self, params: Sequence[Dict[str, torch.Tensor]]):
-        """Merge params from affines."""
-        out = params[0].copy()
-        def _torchmax(x, dim):
-            return torch.max(x, dim=dim).values
-        ops = {
-            "translations": torch.sum,
-            "center": torch.mean,
-            "scale": torch.prod,
-            "shear_x": torch.sum,
-            "shear_y": torch.sum,
-            "angle": torch.sum,
-            "batch_prob": _torchmax,
-        }
-        for k, v in params[0].items():
-            ps = [p[k] for p in params if len(p[k]) > 0]
-            if len(ps) > 0 and k in ops:
-                v_new = torch.stack(ps, dim=0).float()
-                v_new = ops[k](v_new, dim=0)
-                v_new = v_new.to(v.dtype)
-            else:
-                v_new = v
-            out[k] = v_new
-        return out
-    def forward_parameters(
-        self, batch_shape: Tuple[int, ...]
-    ) -> Dict[str, torch.Tensor]:
-        params = tuple(a.forward_parameters(batch_shape) for a in self._affines)
-        # print(params)
-        return self.merge_params(params)
-# custom augmentations
-class RandomIntensityScaleShift(K.IntensityAugmentationBase2D):
-    r"""Apply a random scale and shift to the image intensity.
-    Args:
-        p: probability of applying the transformation.
-        scale:  the scale factor to apply
-        shift: the offset to apply
-        clip_output: if true clip output
-        same_on_batch: apply the same transformation across the batch.
-        keepdim: whether to keep the output shape the same as input (True) or broadcast it
-                 to the batch form (False).
-    Shape:
-        - Input: :math:`(C, H, W)` or :math:`(B, C, H, W)`, Optional: :math:`(B, 3, 3)`
-        - Output: :math:`(B, C, H, W)`
-    .. note::
-        This function internally uses :func:`kornia.enhance.adjust_brightness`
-    """
-    def __init__(
-        self,
-        scale: Tuple[float, float] = (0.5, 2.0),
-        shift: Tuple[float, float] = (-0.1, 0.1),
-        clip_output: bool = True,
-        same_on_batch: bool = False,
-        p: float = 1.0,
-        keepdim: bool = False,
-    ) -> None:
-        super().__init__(p=p, same_on_batch=same_on_batch, keepdim=keepdim)
-        self.scale = _range_bound(
-            scale, "scale", center=0, bounds=(-float("inf"), float("inf"))
-        )
-        self.shift = _range_bound(
-            shift, "shift", center=0, bounds=(-float("inf"), float("inf"))
-        )
-        self._param_generator = rg.PlainUniformGenerator(
-            (self.scale, "scale_factor", None, None),
-            (self.shift, "shift_factor", None, None),
-        )
-        self.clip_output = clip_output
-    def apply_transform(
-        self,
-        input: torch.Tensor,
-        params: Dict[str, torch.Tensor],
-        flags: Dict[str, Any],
-        transform: Optional[torch.Tensor] = None,
-    ) -> torch.Tensor:
-        scale_factor = params["scale_factor"].to(input)
-        shift_factor = params["shift_factor"].to(input)
-        scale_factor = scale_factor.view(len(scale_factor), 1, 1, 1)
-        shift_factor = shift_factor.view(len(scale_factor), 1, 1, 1)
-        img_adjust = input * scale_factor + shift_factor
-        if self.clip_output:
-            img_adjust = img_adjust.clamp(min=0.0, max=1.0)
-        return img_adjust
-class RandomTemporalAffine(K.RandomAffine):
-    r"""Apply a random 2D affine transformation to a batch of images while
-    varying the transformation across the time dimension from 0 to 1.
-    Same args/kwargs as K.RandomAffine
-    """
-    def __init__(self, *args, **kwargs) -> None:
-        super().__init__(*args, same_on_batch=True, **kwargs)
-    def forward_parameters(
-        self, batch_shape: Tuple[int, ...]
-    ) -> Dict[str, torch.Tensor]:
-        params = super().forward_parameters(batch_shape)
-        factor = torch.linspace(0, 1, batch_shape[0]).to(params["translations"])
-        for key in ["translations", "center", "angle", "shear_x", "shear_y"]:
-            v = params[key]
-            if len(v) > 0:
-                params[key] = v * factor.view(*((-1,) + (1,) * len(v.shape[1:])))
-        for key in [
-            "scale",
-        ]:
-            v = params[key]
-            if len(v) > 0:
-                params[key] = 1 + (v - 1) * factor.view(
-                    *((-1,) + (1,) * len(v.shape[1:]))
-                )
-        return params
-    # def compute_transformation(self, input: torch.Tensor,
-    #                            params: Dict[str, torch.Tensor],
-    #                            flags: Dict[str, Any]) -> torch.Tensor:
-    #     factor = torch.linspace(0, 1, input.shape[0]).to(input)
-    #     for key in ["translations", "center", "angle", "shear_x", "shear_y"]:
-    #         v = params[key]
-    #         params[key] = v * factor.view(*((-1,)+(1,)*len(v.shape[1:])))
-    #     for key in ["scale", ]:
-    #         v = params[key]
-    #         params[key] = 1 + (v-1) * factor.view(*((-1,)+(1,)*len(v.shape[1:])))
-    #     return super().compute_transformation(input, params, flags)
-class BasicPipeline:
-    """transforms img, mask, and points.
-    Only supports 2D transformations for now (any 3D object will preserve its z coordinates/dimensions)
-    """
-    def __init__(self, augs: tuple, filter_points: bool = True):
-        self.data_keys = ("input", "mask", "keypoints")
-        self.pipeline = K.AugmentationSequential(
-            *augs,
-            # disable align_corners to not trigger lots of warnings from kornia
-            extra_args={
-                DataKey.MASK: {"resample": Resample.NEAREST, "align_corners": False}
-            },
-            data_keys=self.data_keys,
-        )
-        self.filter_points = filter_points
-    def __call__(
-        self,
-        img: np.ndarray,
-        mask: np.ndarray,
-        points: np.ndarray,
-        timepoints: np.ndarray,
-    ):
-        ndim = img.ndim - 1
-        assert (
-            ndim in (2, 3)
-            and points.ndim == 2
-            and points.shape[-1] == ndim
-            and timepoints.ndim == 1
-            and img.shape == mask.shape
-        )
-        x = torch.from_numpy(img).float()
-        y = torch.from_numpy(mask.astype(np.int64)).float()
-        # if 2D add dummy channel
-        if ndim == 2:
-            x = x.unsqueeze(1)
-            y = y.unsqueeze(1)
-            p = points[..., [1, 0]]
-        # if 3D we use z as channel (i.e. fix augs across z)
-        elif ndim == 3:
-            p = points[..., [2, 1]]
-        # flip as kornia expects xy and not yx
-        p = torch.from_numpy(p).unsqueeze(0).float()
-        # add batch by duplicating to make kornia happy
-        p = p.expand(len(x), -1, -1)
-        # create a mask to know which timepoint the points belong to
-        ts = torch.from_numpy(timepoints).long()
-        n_points = p.shape[1]
-        if n_points > 0:
-            x, y, p = self.pipeline(x, y, p)
-        else:
-            # dummy keypoints
-            x, y = self.pipeline(x, y, torch.zeros((len(x), 1, 2)))[:2]
-        # remove batch
-        p = p[ts, torch.arange(n_points)]
-        # flip back
-        p = p[..., [1, 0]]
-        # remove channel
-        if ndim == 2:
-            x = x.squeeze(1)
-            y = y.squeeze(1)
-        x = x.numpy()
-        y = y.numpy().astype(np.uint16)
-        # p = p.squeeze(0).numpy()
-        p = p.numpy()
-        # add back z coordinates
-        if ndim == 3:
-            p = np.concatenate([points[..., 0:1], p], axis=-1)
-        ts = ts.numpy()
-        # remove points outside of img/mask
-        if self.filter_points:
-            idx = _filter_points(p, shape=x.shape[-ndim:])
-        else:
-            idx = np.arange(len(p), dtype=int)
-        p = p[idx]
-        return (x, y, p), idx
-class RandomCrop:
-    def __init__(
-        self,
-        crop_size: Optional[Union[int, Tuple[int]]] = None,
-        ndim: int = 2,
-        ensure_inside_points: bool = False,
-        use_padding: bool = True,
-        padding_mode="constant",
-    ) -> None:
-        """crop_size: tuple of int
-        can be tuple of length 1 (all dimensions)
-                     of length ndim (y,x,...)
-                     of length 2*ndim (y1,y2, x1,x2, ...).
-        """
-        if isinstance(crop_size, int):
-            crop_size = (crop_size,) * 2 * ndim
-        elif isinstance(crop_size, Iterable):
-            pass
-        else:
-            raise ValueError(f"{crop_size} has to be int or tuple of int")
-        if len(crop_size) == 1:
-            crop_size = (crop_size[0],) * 2 * ndim
-        elif len(crop_size) == ndim:
-            crop_size = tuple(chain(*tuple((c, c) for c in crop_size)))
-        elif len(crop_size) == 2 * ndim:
-            pass
-        else:
-            raise ValueError(f"crop_size has to be of length 1, {ndim}, or {2 * ndim}")
-        crop_size = np.array(crop_size)
-        self._ndim = ndim
-        self._crop_bounds = crop_size[::2], crop_size[1::2]
-        self._use_padding = use_padding
-        self._ensure_inside_points = ensure_inside_points
-        self._rng = np.random.RandomState()
-        self._padding_mode = padding_mode
-    def crop_img(self, img: np.ndarray, corner: np.ndarray, crop_size: np.ndarray):
-        if not img.ndim == self._ndim + 1:
-            raise ValueError(
-                f"img has to be 1 (time) + {self._ndim} spatial dimensions"
-            )
-        pad_left = np.maximum(0, -corner)
-        pad_right = np.maximum(
-            0, corner + crop_size - np.array(img.shape[-self._ndim :])
-        )
-        img = np.pad(
-            img,
-            ((0, 0), *tuple(np.stack((pad_left, pad_right)).T)),
-            mode=self._padding_mode,
-        )
-        slices = (
-            slice(None),
-            *tuple(slice(c, c + s) for c, s in zip(corner + pad_left, crop_size)),
-        )
-        return img[slices]
-    def crop_points(
-        self, points: np.ndarray, corner: np.ndarray, crop_size: np.ndarray
-    ):
-        idx = _filter_points(points, shape=crop_size, origin=corner)
-        return points[idx] - corner, idx
-    def __call__(self, img: np.ndarray, mask: np.ndarray, points: np.ndarray):
-        assert (
-            img.ndim == self._ndim + 1
-            and points.ndim == 2
-            and points.shape[-1] == self._ndim
-            and img.shape == mask.shape
-        )
-        points = points.astype(int)
-        crop_size = self._rng.randint(self._crop_bounds[0], self._crop_bounds[1] + 1)
-        # print(f'{crop_size=}')
-        if self._ensure_inside_points:
-            if len(points) == 0:
-                print("No points given, cannot ensure inside points")
-                return (img, mask, points), np.zeros((0,), int)
-            # sample point and corner relative to it
-            _idx = np.random.randint(len(points))
-            corner = (
-                points[_idx]
-                - crop_size
-                + 1
-                + self._rng.randint(crop_size // 4, 3 * crop_size // 4)
-            )
-        else:
-            corner = self._rng.randint(
-                0, np.maximum(1, np.array(img.shape[-self._ndim :]) - crop_size)
-            )
-        if not self._use_padding:
-            corner = np.maximum(0, corner)
-            crop_size = np.minimum(
-                crop_size, np.array(img.shape[-self._ndim :]) - corner
-            )
-        img = self.crop_img(img, corner, crop_size)
-        mask = self.crop_img(mask, corner, crop_size)
-        points, idx = self.crop_points(points, corner, crop_size)
-        return (img, mask, points), idx
-class AugmentationPipeline(BasicPipeline):
-    """transforms img, mask, and points."""
-    def __init__(self, p=0.5, filter_points=True, level=1):
-        if level == 1:
-            augs = [
-                # Augmentations for all images in a window
-                K.RandomHorizontalFlip(p=0.5, same_on_batch=True),
-                K.RandomVerticalFlip(p=0.5, same_on_batch=True),
-                K.RandomAffine(
-                    degrees=180,
-                    shear=(-10, 10, -10, 10),  # x_min, x_max, y_min, y_max
-                    translate=(0.05, 0.05),
-                    scale=(0.8, 1.2),  # x_min, x_max, y_min, y_max
-                    p=p,
-                    same_on_batch=True,
-                ),
-                K.RandomBrightness(
-                    (0.5, 1.5), clip_output=False, p=p, same_on_batch=True
-                ),
-                K.RandomGaussianNoise(mean=0.0, std=0.03, p=p, same_on_batch=False),
-            ]
-        elif level == 2:
-            # Crafted for DeepCell crop size 256
-            augs = [
-                # Augmentations for all images in a window
-                K.RandomHorizontalFlip(p=0.5, same_on_batch=True),
-                K.RandomVerticalFlip(p=0.5, same_on_batch=True),
-                K.RandomAffine(
-                    degrees=180,
-                    shear=(-5, 5, -5, 5),  # x_min, x_max, y_min, y_max
-                    translate=(0.03, 0.03),
-                    scale=(0.8, 1.2),  # isotropic
-                    p=p,
-                    same_on_batch=True,
-                ),
-                # Anisotropic scaling
-                K.RandomAffine(
-                    degrees=0,
-                    scale=(0.9, 1.1, 0.9, 1.1),  # x_min, x_max, y_min, y_max
-                    p=p,
-                    same_on_batch=True,
-                ),
-                # Independet augmentations for each image in window
-                K.RandomAffine(
-                    degrees=3,
-                    shear=(-2, 2, -2, 2),  # x_min, x_max, y_min, y_max
-                    translate=(0.04, 0.04),
-                    p=p,
-                    same_on_batch=False,
-                ),
-                # not implemented for points in kornia 0.7.0
-                # K.RandomElasticTransform(alpha=50, sigma=5, p=p, same_on_batch=False),
-                # Intensity-based augmentations
-                K.RandomBrightness(
-                    (0.5, 1.5), clip_output=False, p=p, same_on_batch=True
-                ),
-                K.RandomGaussianNoise(mean=0.0, std=0.03, p=p, same_on_batch=False),
-            ]
-        elif level == 3:
-            # Crafted for DeepCell crop size 256
-            augs = [
-                # Augmentations for all images in a window
-                K.RandomHorizontalFlip(p=0.5, same_on_batch=True),
-                K.RandomVerticalFlip(p=0.5, same_on_batch=True),
-                ConcatAffine([
-                    K.RandomAffine(
-                        degrees=180,
-                        shear=(-5, 5, -5, 5),  # x_min, x_max, y_min, y_max
-                        translate=(0.03, 0.03),
-                        scale=(0.8, 1.2),  # isotropic
-                        p=p,
-                        same_on_batch=True,
-                    ),
-                    # Anisotropic scaling
-                    K.RandomAffine(
-                        degrees=0,
-                        scale=(0.9, 1.1, 0.9, 1.1),  # x_min, x_max, y_min, y_max
-                        p=p,
-                        same_on_batch=True,
-                    ),
-                ]),
-                RandomTemporalAffine(
-                    degrees=10,
-                    translate=(0.05, 0.05),
-                    p=p,
-                    # same_on_batch=True,
-                ),
-                # Independet augmentations for each image in window
-                K.RandomAffine(
-                    degrees=2,
-                    shear=(-2, 2, -2, 2),  # x_min, x_max, y_min, y_max
-                    translate=(0.01, 0.01),
-                    p=0.5 * p,
-                    same_on_batch=False,
-                ),
-                # Intensity-based augmentations
-                RandomIntensityScaleShift(
-                    (0.5, 2.0), (-0.1, 0.1), clip_output=False, p=p, same_on_batch=True
-                ),
-                K.RandomGaussianNoise(mean=0.0, std=0.03, p=p, same_on_batch=False),
-            ]
-        elif level == 4:
-            # debug
-            augs = [
-                K.RandomAffine(
-                    degrees=30,
-                    shear=(-0, 0, -0, 0),  # x_min, x_max, y_min, y_max
-                    translate=(0.0, 0.0),
-                    p=1,
-                    same_on_batch=True,
-                ),
-            ]
-        else:
-            raise ValueError(f"level {level} not supported")
-        super().__init__(augs, filter_points)

models/tra_post_model/trackastra/data/data.py DELETED Viewed

@@ -1,1509 +0,0 @@
-import logging
-# from collections.abc import Sequence
-from pathlib import Path
-from timeit import default_timer
-from typing import Literal
-import joblib
-import lz4.frame
-import networkx as nx
-import numpy as np
-import pandas as pd
-import tifffile
-import torch
-from numba import njit
-from scipy import ndimage as ndi
-from scipy.spatial.distance import cdist
-from skimage.measure import regionprops
-from skimage.segmentation import relabel_sequential
-from torch.utils.data import Dataset
-from tqdm import tqdm
-from . import wrfeat
-from ._check_ctc import _check_ctc, _get_node_attributes
-from .augmentations import (
-    AugmentationPipeline,
-    RandomCrop,
-    default_augmenter,
-)
-from .features import (
-    _PROPERTIES,
-    extract_features_patch,
-    extract_features_regionprops,
-)
-from .matching import matching
-from typing import List, Optional, Union, Tuple, Sequence
-# from ..utils import blockwise_sum, normalize
-from ..utils import blockwise_sum, normalize
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-def _filter_track_df(df, start_frame, end_frame, downscale):
-    """Only keep tracklets that are present in the given time interval."""
-    # only retain cells in interval
-    df = df[(df.t2 >= start_frame) & (df.t1 < end_frame)]
-    # shift start and end of each cell
-    df.t1 = df.t1 - start_frame
-    df.t2 = df.t2 - start_frame
-    # set start/end to min/max
-    df.t1 = df.t1.clip(0, end_frame - start_frame - 1)
-    df.t2 = df.t2.clip(0, end_frame - start_frame - 1)
-    # set all parents to 0 that are not in the interval
-    df.loc[~df.parent.isin(df.label), "parent"] = 0
-    if downscale > 1:
-        if start_frame % downscale != 0:
-            raise ValueError("start_frame must be a multiple of downscale")
-        logger.info(f"Temporal downscaling of tracklet links by {downscale}")
-        # remove tracklets that have been fully deleted by temporal downsampling
-        mask = (
-            # (df["t2"] - df["t1"] < downscale - 1)
-            (df["t1"] % downscale != 0)
-            & (df["t2"] % downscale != 0)
-            & (df["t1"] // downscale == df["t2"] // downscale)
-        )
-        logger.info(
-            f"Remove {mask.sum()} tracklets that are fully deleted by downsampling"
-        )
-        logger.debug(f"Remove {df[mask]}")
-        df = df[~mask]
-        # set parent to 0 if it has been deleted
-        df.loc[~df.parent.isin(df.label), "parent"] = 0
-        df["t2"] = (df["t2"] / float(downscale)).apply(np.floor).astype(int)
-        df["t1"] = (df["t1"] / float(downscale)).apply(np.ceil).astype(int)
-        # Correct for edge case of single frame tracklet
-        assert np.all(df["t1"] == np.minimum(df["t1"], df["t2"]))
-    return df
-class _CompressedArray:
-    """a simple class to compress and decompress a numpy arrays using lz4."""
-    # dont compress float types
-    def __init__(self, data):
-        self._data = lz4.frame.compress(data)
-        self._dtype = data.dtype.type
-        self._shape = data.shape
-    def decompress(self):
-        s = lz4.frame.decompress(self._data)
-        data = np.frombuffer(s, dtype=self._dtype).reshape(self._shape)
-        return data
-def debug_function(f):
-    def wrapper(*args, **kwargs):
-        try:
-            batch = f(*args, **kwargs)
-        except Exception as e:
-            logger.error(f"Error in {f.__name__}: {e}")
-            return None
-        logger.info(f"XXXX {len(batch['coords'])}")
-        return batch
-    return wrapper
-class CTCData(Dataset):
-    def __init__(
-        self,
-        root: str = "",
-        ndim: int = 2,
-        use_gt: bool = True,
-        detection_folders: List[str] = ["TRA"],
-        window_size: int = 10,
-        max_tokens: Optional[int] = None,
-        slice_pct: tuple = (0.0, 1.0),
-        downscale_spatial: int = 1,
-        downscale_temporal: int = 1,
-        augment: int = 0,
-        features: Literal[
-            "none",
-            "regionprops",
-            "regionprops2",
-            "patch",
-            "patch_regionprops",
-            "wrfeat",
-        ] = "wrfeat",
-        sanity_dist: bool = False,
-        crop_size: Optional[tuple] = None,
-        return_dense: bool = False,
-        compress: bool = False,
-        **kwargs,
-    ) -> None:
-        """_summary_.
-        Args:
-            root (str):
-                Folder containing the CTC TRA folder.
-            ndim (int):
-                Number of dimensions of the data. Defaults to 2d
-                (if ndim=3 and data is two dimensional, it will be cast to 3D)
-            detection_folders:
-                List of relative paths to folder with detections.
-                Defaults to ["TRA"], which uses the ground truth detections.
-            window_size (int):
-                Window size for transformer.
-            slice_pct (tuple):
-                Slice the dataset by percentages (from, to).
-            augment (int):
-                if 0, no data augmentation. if > 0, defines level of data augmentation.
-            features (str):
-                Types of features to use.
-            sanity_dist (bool):
-                Use euclidian distance instead of the association matrix as a target.
-            crop_size (tuple):
-                Size of the crops to use for augmentation. If None, no cropping is used.
-            return_dense (bool):
-                Return dense masks and images in the data samples.
-            compress (bool):
-                Compress elements/remove img if not needed to save memory for large datasets
-        """
-        super().__init__()
-        self.root = Path(root)
-        self.name = self.root.name
-        self.use_gt = use_gt
-        self.slice_pct = slice_pct
-        if not 0 <= slice_pct[0] < slice_pct[1] <= 1:
-            raise ValueError(f"Invalid slice_pct {slice_pct}")
-        self.downscale_spatial = downscale_spatial
-        self.downscale_temporal = downscale_temporal
-        self.detection_folders = detection_folders
-        self.ndim = ndim
-        self.features = features
-        if features not in ("none", "wrfeat") and features not in _PROPERTIES[ndim]:
-            raise ValueError(
-                f"'{features}' not one of the supported {ndim}D features"
-                f" {tuple(_PROPERTIES[ndim].keys())}"
-            )
-        logger.info(f"ROOT (config): \t{self.root}")
-        self.root, self.gt_tra_folder = self._guess_root_and_gt_tra_folder(self.root)
-        logger.info(f"ROOT (guessed): \t{self.root}")
-        logger.info(f"GT TRA (guessed):\t{self.gt_tra_folder}")
-        if self.use_gt:
-            self.gt_mask_folder = self._guess_mask_folder(self.root, self.gt_tra_folder)
-        else:
-            logger.info("Using dummy masks as GT")
-            self.gt_mask_folder = self._guess_det_folder(
-                self.root, self.detection_folders[0]
-            )
-        logger.info(f"GT MASK (guessed):\t{self.gt_mask_folder}")
-        # dont load image data if not needed
-        if features in ("none",):
-            self.img_folder = None
-        else:
-            self.img_folder = self._guess_img_folder(self.root)
-        logger.info(f"IMG (guessed):\t{self.img_folder}")
-        self.feat_dim, self.augmenter, self.cropper = self._setup_features_augs(
-            ndim, features, augment, crop_size
-        )
-        if window_size <= 1:
-            raise ValueError("window must be >1")
-        self.window_size = window_size
-        self.max_tokens = max_tokens
-        self.slice_pct = slice_pct
-        self.sanity_dist = sanity_dist
-        self.return_dense = return_dense
-        self.compress = compress
-        self.start_frame = 0
-        self.end_frame = None
-        start = default_timer()
-        if self.features == "wrfeat":
-            self.windows = self._load_wrfeat()
-        else:
-            self.windows = self._load()
-        self.n_divs = self._get_ndivs(self.windows)
-        if len(self.windows) > 0:
-            self.ndim = self.windows[0]["coords"].shape[1]
-            self.n_objects = tuple(len(t["coords"]) for t in self.windows)
-            logger.info(
-                f"Found {np.sum(self.n_objects)} objects in {len(self.windows)} track"
-                f" windows from {self.root} ({default_timer() - start:.1f}s)\n"
-            )
-        else:
-            self.n_objects = 0
-            logger.warning(f"Could not load any tracks from {self.root}")
-        if self.compress:
-            self._compress_data()
-    # def from_ctc
-    @classmethod
-    def from_arrays(cls, imgs: np.ndarray, masks: np.ndarray, train_args: dict):
-        self = cls(**train_args)
-        # for key, value in train_args.items():
-        #     setattr(self, key, value)
-        # self.use_gt = use_gt
-        # self.slice_pct = slice_pct
-        # if not 0 <= slice_pct[0] < slice_pct[1] <= 1:
-        # raise ValueError(f"Invalid slice_pct {slice_pct}")
-        # self.downscale_spatial = downscale_spatial
-        # self.downscale_temporal = downscale_temporal
-        # self.detection_folders = detection_folders
-        # self.ndim = ndim
-        # self.features = features
-        # if features not in ("none", "wrfeat") and features not in _PROPERTIES[ndim]:
-        # raise ValueError(
-        # f"'{features}' not one of the supported {ndim}D features {tuple(_PROPERTIES[ndim].keys())}"
-        # )
-        # logger.info(f"ROOT (config): {self.root}")
-        # self.root, self.gt_tra_folder = self._guess_root_and_gt_tra_folder(self.root)
-        # logger.info(f"ROOT: \t{self.root}")
-        # logger.info(f"GT TRA:\t{self.gt_tra_folder}")
-        # if self.use_gt:
-        # self.gt_mask_folder = self._guess_mask_folder(self.root, self.gt_tra_folder)
-        # else:
-        # logger.info("Using dummy masks as GT")
-        # self.gt_mask_folder = self._guess_det_folder(
-        # self.root, self.detection_folders[0]
-        # )
-        # logger.info(f"GT MASK:\t{self.gt_mask_folder}")
-        # dont load image data if not needed
-        # if features in ("none",):
-        # self.img_folder = None
-        # else:
-        # self.img_folder = self._guess_img_folder(self.root)
-        # logger.info(f"IMG:\t\t{self.img_folder}")
-        self.feat_dim, self.augmenter, self.cropper = self._setup_features_augs(
-            self.ndim, self.features, self.augment, self.crop_size
-        )
-        start = default_timer()
-        if self.features == "wrfeat":
-            self.windows = self._load_wrfeat()
-        else:
-            self.windows = self._load()
-        self.n_divs = self._get_ndivs(self.windows)
-        if len(self.windows) > 0:
-            self.ndim = self.windows[0]["coords"].shape[1]
-            self.n_objects = tuple(len(t["coords"]) for t in self.windows)
-            logger.info(
-                f"Found {np.sum(self.n_objects)} objects in {len(self.windows)} track"
-                f" windows from {self.root} ({default_timer() - start:.1f}s)\n"
-            )
-        else:
-            self.n_objects = 0
-            logger.warning(f"Could not load any tracks from {self.root}")
-        if self.compress:
-            self._compress_data()
-    def _get_ndivs(self, windows):
-        n_divs = []
-        for w in tqdm(windows, desc="Counting divisions", leave=False):
-            _n = (
-                (
-                    blockwise_sum(
-                        torch.from_numpy(w["assoc_matrix"]).float(),
-                        torch.from_numpy(w["timepoints"]).long(),
-                    ).max(dim=0)[0]
-                    == 2
-                )
-                .sum()
-                .item()
-            )
-            n_divs.append(_n)
-        return n_divs
-    def _setup_features_augs(
-        self, ndim: int, features: str, augment: int, crop_size: Tuple[int]
-    ):
-        if self.features == "wrfeat":
-            return self._setup_features_augs_wrfeat(ndim, features, augment, crop_size)
-        cropper = (
-            RandomCrop(
-                crop_size=crop_size,
-                ndim=ndim,
-                use_padding=False,
-                ensure_inside_points=True,
-            )
-            if crop_size is not None
-            else None
-        )
-        # Hack
-        if self.features == "none":
-            return 0, default_augmenter, cropper
-        if ndim == 2:
-            augmenter = AugmentationPipeline(p=0.8, level=augment) if augment else None
-            feat_dim = {
-                "none": 0,
-                "regionprops": 7,
-                "regionprops2": 6,
-                "patch": 256,
-                "patch_regionprops": 256 + 5,
-            }[features]
-        elif ndim == 3:
-            augmenter = AugmentationPipeline(p=0.8, level=augment) if augment else None
-            feat_dim = {
-                "none": 0,
-                "regionprops2": 11,
-                "patch_regionprops": 256 + 8,
-            }[features]
-        return feat_dim, augmenter, cropper
-    def _compress_data(self):
-        # compress masks and assoc_matrix
-        logger.info("Compressing masks and assoc_matrix to save memory")
-        for w in self.windows:
-            w["mask"] = _CompressedArray(w["mask"])
-            # dont compress full imgs (as needed for patch features)
-            w["img"] = _CompressedArray(w["img"])
-            w["assoc_matrix"] = _CompressedArray(w["assoc_matrix"])
-        self.gt_masks = _CompressedArray(self.gt_masks)
-        self.det_masks = {k: _CompressedArray(v) for k, v in self.det_masks.items()}
-        # dont compress full imgs (as needed for patch features)
-        self.imgs = _CompressedArray(self.imgs)
-    def _guess_root_and_gt_tra_folder(self, inp: Path):
-        """Guesses the root and the ground truth folder from a given input path.
-        Args:
-            inp (Path): _description_
-        Returns:
-            Path: root folder,
-        """
-        if inp.name == "TRA":
-            # 01_GT/TRA --> 01, 01_GT/TRA
-            root = inp.parent.parent / inp.parent.name.split("_")[0]
-            return root, inp
-        elif "ERR_SEG" in inp.name:
-            # 01_ERR_SEG --> 01, 01_GT/TRA. We know that the data is in CTC folder format
-            num = inp.name.split("_")[0]
-            return inp.parent / num, inp.parent / f"{num}_GT" / "TRA"
-        else:
-            ctc_tra = Path(f"{inp}_GT") / "TRA"
-            tra = ctc_tra if ctc_tra.exists() else inp / "TRA"
-            # 01 --> 01, 01_GT/TRA or 01/TRA
-            return inp, tra
-    def _guess_img_folder(self, root: Path):
-        """Guesses the image folder corresponding to a root."""
-        if (root / "img").exists():
-            return root / "img"
-        else:
-            return root
-    def _guess_mask_folder(self, root: Path, gt_tra: Path):
-        """Guesses the mask folder corresponding to a root.
-        In CTC format, we use silver truth segmentation masks.
-        """
-        f = None
-        # first try CTC format
-        if gt_tra.parent.name.endswith("_GT"):
-            # We use the silver truth segmentation masks
-            f = root / str(gt_tra.parent.name).replace("_GT", "_ST") / "SEG"
-        # try our simpler 'img' format
-        if f is None or not f.exists():
-            f = gt_tra
-        if not f.exists():
-            raise ValueError(f"Could not find mask folder for {root}")
-        return f
-    @classmethod
-    def _guess_det_folder(cls, root: Path, suffix: str):
-        """Checks for the annoying CTC format with dataset numbering as part of folder names."""
-        guesses = (
-            (root / suffix),
-            Path(f"{root}_{suffix}"),
-            Path(f"{root}_GT") / suffix,
-        )
-        for path in guesses:
-            if path.exists():
-                return path
-        logger.warning(f"Skipping non-existing detection folder {root / suffix}")
-        return None
-    def __len__(self):
-        return len(self.windows)
-    def _load_gt(self):
-        logger.info("Loading ground truth")
-        self.start_frame = int(
-            len(list(self.gt_mask_folder.glob("*.tif"))) * self.slice_pct[0]
-        )
-        self.end_frame = int(
-            len(list(self.gt_mask_folder.glob("*.tif"))) * self.slice_pct[1]
-        )
-        masks = self._load_tiffs(self.gt_mask_folder, dtype=np.int32)
-        masks = self._correct_gt_with_st(self.gt_mask_folder, masks, dtype=np.int32)
-        if self.use_gt:
-            track_df = self._load_tracklet_links(self.gt_tra_folder)
-            track_df = _filter_track_df(
-                track_df, self.start_frame, self.end_frame, self.downscale_temporal
-            )
-        else:
-            # create dummy track dataframe
-            logger.info("Using dummy track dataframe")
-            track_df = self._build_tracklets_without_gt(masks)
-        _check_ctc(track_df, _get_node_attributes(masks), masks)
-        # Build ground truth lineage graph
-        self.gt_labels, self.gt_timepoints, self.gt_graph = _ctc_lineages(
-            track_df, masks
-        )
-        return masks, track_df
-    def _correct_gt_with_st(
-        self, folder: Path, x: np.ndarray, dtype: Optional[str] = None
-    ):
-        if str(folder).endswith("_GT/TRA"):
-            st_path = (
-                tuple(folder.parents)[1]
-                / folder.parent.stem.replace("_GT", "_ST")
-                / "SEG"
-            )
-            if not st_path.exists():
-                logger.debug("No _ST folder found, skipping correction")
-            else:
-                logger.info(f"ST MASK:\t\t{st_path} for correcting with ST masks")
-                st_masks = self._load_tiffs(st_path, dtype)
-                x = np.maximum(x, st_masks)
-        return x
-    def _load_tiffs(self, folder: Path, dtype=None):
-        assert isinstance(self.downscale_temporal, int)
-        logger.debug(f"Loading tiffs from {folder} as {dtype}")
-        logger.debug(
-            f"Temporal downscaling of {folder.name} by {self.downscale_temporal}"
-        )
-        x = np.stack([
-            tifffile.imread(f).astype(dtype)
-            for f in tqdm(
-                sorted(folder.glob("*.tif"))[
-                    self.start_frame : self.end_frame : self.downscale_temporal
-                ],
-                leave=False,
-                desc=f"Loading [{self.start_frame}:{self.end_frame}]",
-            )
-        ])
-        # T, (Z), Y, X
-        assert isinstance(self.downscale_spatial, int)
-        if self.downscale_spatial > 1 or self.downscale_temporal > 1:
-            # TODO make safe for label arrays
-            logger.debug(
-                f"Spatial downscaling of {folder.name} by {self.downscale_spatial}"
-            )
-            slices = (
-                slice(None),
-                *tuple(
-                    slice(None, None, self.downscale_spatial) for _ in range(x.ndim - 1)
-                ),
-            )
-            x = x[slices]
-        logger.debug(f"Loaded array of shape {x.shape} from {folder}")
-        return x
-    def _masks2properties(self, masks):
-        """Turn label masks into lists of properties, sorted (ascending) by time and label id.
-        Args:
-            masks (np.ndarray): T, (Z), H, W
-        Returns:
-            labels: List of labels
-            ts: List of timepoints
-            coords: List of coordinates
-        """
-        # Get coordinates, timepoints, and labels of detections
-        labels = []
-        ts = []
-        coords = []
-        properties_by_time = dict()
-        assert len(self.imgs) == len(masks)
-        for _t, frame in tqdm(
-            enumerate(masks),
-            # total=len(detections),
-            leave=False,
-            desc="Loading masks and properties",
-        ):
-            regions = regionprops(frame)
-            t_labels = []
-            t_ts = []
-            t_coords = []
-            for _r in regions:
-                t_labels.append(_r.label)
-                t_ts.append(_t)
-                centroid = np.array(_r.centroid).astype(int)
-                t_coords.append(centroid)
-            properties_by_time[_t] = dict(coords=t_coords, labels=t_labels)
-            labels.extend(t_labels)
-            ts.extend(t_ts)
-            coords.extend(t_coords)
-        labels = np.array(labels, dtype=int)
-        ts = np.array(ts, dtype=int)
-        coords = np.array(coords, dtype=int)
-        return labels, ts, coords, properties_by_time
-    def _load_tracklet_links(self, folder: Path) -> pd.DataFrame:
-        df = pd.read_csv(
-            folder / "man_track.txt",
-            delimiter=" ",
-            names=["label", "t1", "t2", "parent"],
-            dtype=int,
-        )
-        n_dets = (df.t2 - df.t1 + 1).sum()
-        logger.debug(f"{folder} has {n_dets} detections")
-        n_divs = (df[df.parent != 0]["parent"].value_counts() == 2).sum()
-        logger.debug(f"{folder} has {n_divs} divisions")
-        return df
-    def _build_tracklets_without_gt(self, masks):
-        """Create a dataframe with tracklets from masks."""
-        rows = []
-        for t, m in enumerate(masks):
-            for c in np.unique(m[m > 0]):
-                rows.append([c, t, t, 0])
-        df = pd.DataFrame(rows, columns=["label", "t1", "t2", "parent"])
-        return df
-    def _check_dimensions(self, x: np.ndarray):
-        if self.ndim == 2 and not x.ndim == 3:
-            raise ValueError(f"Expected 2D data, got {x.ndim - 1}D data")
-        elif self.ndim == 3:
-            # if ndim=3 and data is two dimensional, it will be cast to 3D
-            if x.ndim == 3:
-                x = np.expand_dims(x, axis=1)
-            elif x.ndim == 4:
-                pass
-            else:
-                raise ValueError(f"Expected 3D data, got {x.ndim - 1}D data")
-        return x
-    def _load(self):
-        # Load ground truth
-        logger.info("Loading ground truth")
-        self.gt_masks, self.gt_track_df = self._load_gt()
-        self.gt_masks = self._check_dimensions(self.gt_masks)
-        # Load images
-        if self.img_folder is None:
-            self.imgs = np.zeros_like(self.gt_masks)
-        else:
-            logger.info("Loading images")
-            imgs = self._load_tiffs(self.img_folder, dtype=np.float32)
-            self.imgs = np.stack([
-                normalize(_x) for _x in tqdm(imgs, desc="Normalizing", leave=False)
-            ])
-            self.imgs = self._check_dimensions(self.imgs)
-            if self.compress:
-                # prepare images to be compressed later (e.g. removing non masked parts for regionprops features)
-                self.imgs = np.stack([
-                    _compress_img_mask_preproc(im, mask, self.features)
-                    for im, mask in zip(self.imgs, self.gt_masks)
-                ])
-        assert len(self.gt_masks) == len(self.imgs)
-        # Load each of the detection folders and create data samples with a sliding window
-        windows = []
-        self.properties_by_time = dict()
-        self.det_masks = dict()
-        for _f in self.detection_folders:
-            det_folder = self.root / _f
-            if det_folder == self.gt_mask_folder:
-                det_masks = self.gt_masks
-                logger.info("DET MASK:\tUsing GT masks")
-                (
-                    det_labels,
-                    det_ts,
-                    det_coords,
-                    det_properties_by_time,
-                ) = self._masks2properties(det_masks)
-                det_gt_matching = {
-                    t: {_l: _l for _l in det_properties_by_time[t]["labels"]}
-                    for t in range(len(det_masks))
-                }
-            else:
-                det_folder = self._guess_det_folder(root=self.root, suffix=_f)
-                if det_folder is None:
-                    continue
-                logger.info(f"DET MASK:\t{det_folder}")
-                det_masks = self._load_tiffs(det_folder, dtype=np.int32)
-                det_masks = self._correct_gt_with_st(
-                    det_folder, det_masks, dtype=np.int32
-                )
-                det_masks = self._check_dimensions(det_masks)
-                (
-                    det_labels,
-                    det_ts,
-                    det_coords,
-                    det_properties_by_time,
-                ) = self._masks2properties(det_masks)
-                # FIXME matching can be slow for big images
-                # raise NotImplementedError("Matching not implemented for 3d version")
-                det_gt_matching = {
-                    t: {
-                        _d: _gt
-                        for _gt, _d in matching(
-                            self.gt_masks[t],
-                            det_masks[t],
-                            threshold=0.3,
-                            max_distance=16,
-                        )
-                    }
-                    for t in tqdm(range(len(det_masks)), leave=False, desc="Matching")
-                }
-            self.properties_by_time[_f] = det_properties_by_time
-            self.det_masks[_f] = det_masks
-            _w = self._build_windows(
-                det_folder,
-                det_masks,
-                det_labels,
-                det_ts,
-                det_coords,
-                det_gt_matching,
-            )
-            windows.extend(_w)
-        return windows
-    def _build_windows(
-        self,
-        det_folder,
-        det_masks,
-        labels,
-        ts,
-        coords,
-        matching,
-    ):
-        """_summary_.
-        Args:
-            det_folder (_type_): _description_
-            det_masks (_type_): _description_
-            labels (_type_): _description_
-            ts (_type_): _description_
-            coords (_type_): _description_
-            matching (_type_): _description_
-        Raises:
-            ValueError: _description_
-            ValueError: _description_
-        Returns:
-            _type_: _description_
-        """
-        window_size = self.window_size
-        windows = []
-        # Creates the data samples with a sliding window
-        masks = self.gt_masks
-        for t1, t2 in tqdm(
-            zip(range(0, len(masks)), range(window_size, len(masks) + 1)),
-            total=len(masks) - window_size + 1,
-            leave=False,
-            desc="Building windows",
-        ):
-            idx = (ts >= t1) & (ts < t2)
-            _ts = ts[idx]
-            _labels = labels[idx]
-            _coords = coords[idx]
-            # Use GT
-            # _labels = self.gt_labels[idx]
-            # _ts = self.gt_timepoints[idx]
-            if len(_labels) == 0:
-                # raise ValueError(f"No detections in sample {det_folder}:{t1}")
-                A = np.zeros((0, 0), dtype=bool)
-                _coords = np.zeros((0, masks.ndim - 1), dtype=int)
-            else:
-                if len(np.unique(_ts)) == 1:
-                    logger.debug(
-                        "Only detections from a single timepoint in sample"
-                        f" {det_folder}:{t1}"
-                    )
-                # build matrix from incomplete labels, but full lineage graph. If a label is missing, I should skip over it.
-                A = _ctc_assoc_matrix(
-                    _labels,
-                    _ts,
-                    self.gt_graph,
-                    matching,
-                )
-            if self.sanity_dist:
-                # # Sanity check: Can the model learn the euclidian distances?
-                # c = coords - coords.mean(axis=0, keepdims=True)
-                # c /= c.std(axis=0, keepdims=True)
-                # A = np.einsum('id,jd',c,c)
-                # A = 1 / (1 + np.exp(-A))
-                A = np.exp(-0.01 * cdist(_coords, _coords))
-            w = dict(
-                coords=_coords,
-                # TODO imgs and masks are unaltered here
-                t1=t1,
-                img=self.imgs[t1:t2],
-                mask=det_masks[t1:t2],
-                assoc_matrix=A,
-                labels=_labels,
-                timepoints=_ts,
-            )
-            windows.append(w)
-        logger.debug(f"Built {len(windows)} track windows from {det_folder}.\n")
-        return windows
-    def __getitem__(self, n: int, return_dense=None):
-        # if not set, use default
-        if self.features == "wrfeat":
-            return self._getitem_wrfeat(n, return_dense)
-        if return_dense is None:
-            return_dense = self.return_dense
-        track = self.windows[n]
-        coords = track["coords"]
-        assoc_matrix = track["assoc_matrix"]
-        labels = track["labels"]
-        img = track["img"]
-        mask = track["mask"]
-        timepoints = track["timepoints"]
-        min_time = track["t1"]
-        if isinstance(mask, _CompressedArray):
-            mask = mask.decompress()
-        if isinstance(img, _CompressedArray):
-            img = img.decompress()
-        if isinstance(assoc_matrix, _CompressedArray):
-            assoc_matrix = assoc_matrix.decompress()
-        # cropping
-        if self.cropper is not None:
-            (img2, mask2, coords2), idx = self.cropper(img, mask, coords)
-            cropped_timepoints = timepoints[idx]
-            # at least one detection in each timepoint to accept the crop
-            if len(np.unique(cropped_timepoints)) == self.window_size:
-                # at least two total detections to accept the crop
-                # if len(idx) >= 2:
-                img, mask, coords = img2, mask2, coords2
-                labels = labels[idx]
-                timepoints = timepoints[idx]
-                assoc_matrix = assoc_matrix[idx][:, idx]
-            else:
-                logger.debug("disable cropping as no trajectories would be left")
-        if self.features == "none":
-            if self.augmenter is not None:
-                coords = self.augmenter(coords)
-            # Empty features
-            features = np.zeros((len(coords), 0))
-        elif self.features in ("regionprops", "regionprops2"):
-            if self.augmenter is not None:
-                (img2, mask2, coords2), idx = self.augmenter(
-                    img, mask, coords, timepoints - min_time
-                )
-                if len(idx) > 0:
-                    img, mask, coords = img2, mask2, coords2
-                    labels = labels[idx]
-                    timepoints = timepoints[idx]
-                    assoc_matrix = assoc_matrix[idx][:, idx]
-                    mask = mask.astype(int)
-                else:
-                    logger.debug(
-                        "disable augmentation as no trajectories would be left"
-                    )
-            features = tuple(
-                extract_features_regionprops(
-                    m, im, labels[timepoints == i + min_time], properties=self.features
-                )
-                for i, (m, im) in enumerate(zip(mask, img))
-            )
-            features = np.concatenate(features, axis=0)
-            # features = np.zeros((len(coords), self.feat_dim))
-        elif self.features == "patch":
-            if self.augmenter is not None:
-                (img2, mask2, coords2), idx = self.augmenter(
-                    img, mask, coords, timepoints - min_time
-                )
-                if len(idx) > 0:
-                    img, mask, coords = img2, mask2, coords2
-                    labels = labels[idx]
-                    timepoints = timepoints[idx]
-                    assoc_matrix = assoc_matrix[idx][:, idx]
-                    mask = mask.astype(int)
-                else:
-                    print("disable augmentation as no trajectories would be left")
-            features = tuple(
-                extract_features_patch(
-                    m,
-                    im,
-                    coords[timepoints == min_time + i],
-                    labels[timepoints == min_time + i],
-                )
-                for i, (m, im) in enumerate(zip(mask, img))
-            )
-            features = np.concatenate(features, axis=0)
-        elif self.features == "patch_regionprops":
-            if self.augmenter is not None:
-                (img2, mask2, coords2), idx = self.augmenter(
-                    img, mask, coords, timepoints - min_time
-                )
-                if len(idx) > 0:
-                    img, mask, coords = img2, mask2, coords2
-                    labels = labels[idx]
-                    timepoints = timepoints[idx]
-                    assoc_matrix = assoc_matrix[idx][:, idx]
-                    mask = mask.astype(int)
-                else:
-                    print("disable augmentation as no trajectories would be left")
-            features1 = tuple(
-                extract_features_patch(
-                    m,
-                    im,
-                    coords[timepoints == min_time + i],
-                    labels[timepoints == min_time + i],
-                )
-                for i, (m, im) in enumerate(zip(mask, img))
-            )
-            features2 = tuple(
-                extract_features_regionprops(
-                    m,
-                    im,
-                    labels[timepoints == i + min_time],
-                    properties=self.features,
-                )
-                for i, (m, im) in enumerate(zip(mask, img))
-            )
-            features = tuple(
-                np.concatenate((f1, f2), axis=-1)
-                for f1, f2 in zip(features1, features2)
-            )
-            features = np.concatenate(features, axis=0)
-        # remove temporal offset and add timepoints to coords
-        relative_timepoints = timepoints - track["t1"]
-        coords = np.concatenate((relative_timepoints[:, None], coords), axis=-1)
-        if self.max_tokens and len(timepoints) > self.max_tokens:
-            time_incs = np.where(timepoints - np.roll(timepoints, 1))[0]
-            n_elems = time_incs[np.searchsorted(time_incs, self.max_tokens) - 1]
-            timepoints = timepoints[:n_elems]
-            labels = labels[:n_elems]
-            coords = coords[:n_elems]
-            features = features[:n_elems]
-            assoc_matrix = assoc_matrix[:n_elems, :n_elems]
-            logger.info(
-                f"Clipped window of size {timepoints[n_elems - 1] - timepoints.min()}"
-            )
-        coords0 = torch.from_numpy(coords).float()
-        features = torch.from_numpy(features).float()
-        assoc_matrix = torch.from_numpy(assoc_matrix.copy()).float()
-        labels = torch.from_numpy(labels).long()
-        timepoints = torch.from_numpy(timepoints).long()
-        if self.augmenter is not None:
-            coords = coords0.clone()
-            coords[:, 1:] += torch.randint(0, 256, (1, self.ndim))
-        else:
-            coords = coords0.clone()
-        res = dict(
-            features=features,
-            coords0=coords0,
-            coords=coords,
-            assoc_matrix=assoc_matrix,
-            timepoints=timepoints,
-            labels=labels,
-        )
-        if return_dense:
-            if all([x is not None for x in img]):
-                img = torch.from_numpy(img).float()
-                res["img"] = img
-            mask = torch.from_numpy(mask.astype(int)).long()
-            res["mask"] = mask
-        return res
-    # wrfeat functions...
-    # TODO: refactor this as a subclass or make everything a class factory. *very* hacky this way
-    def _setup_features_augs_wrfeat(
-        self, ndim: int, features: str, augment: int, crop_size: Tuple[int]
-    ):
-        # FIXME: hardcoded
-        feat_dim = 7 if ndim == 2 else 12
-        if augment == 1:
-            augmenter = wrfeat.WRAugmentationPipeline([
-                wrfeat.WRRandomFlip(p=0.5),
-                wrfeat.WRRandomAffine(
-                    p=0.8, degrees=180, scale=(0.5, 2), shear=(0.1, 0.1)
-                ),
-                # wrfeat.WRRandomBrightness(p=0.8, factor=(0.5, 2.0)),
-                # wrfeat.WRRandomOffset(p=0.8, offset=(-3, 3)),
-            ])
-        elif augment == 2:
-            augmenter = wrfeat.WRAugmentationPipeline([
-                wrfeat.WRRandomFlip(p=0.5),
-                wrfeat.WRRandomAffine(
-                    p=0.8, degrees=180, scale=(0.5, 2), shear=(0.1, 0.1)
-                ),
-                wrfeat.WRRandomBrightness(p=0.8),
-                wrfeat.WRRandomOffset(p=0.8, offset=(-3, 3)),
-            ])
-        elif augment == 3:
-            augmenter = wrfeat.WRAugmentationPipeline([
-                wrfeat.WRRandomFlip(p=0.5),
-                wrfeat.WRRandomAffine(
-                    p=0.8, degrees=180, scale=(0.5, 2), shear=(0.1, 0.1)
-                ),
-                wrfeat.WRRandomBrightness(p=0.8),
-                wrfeat.WRRandomMovement(offset=(-10, 10), p=0.3),
-                wrfeat.WRRandomOffset(p=0.8, offset=(-3, 3)),
-            ])
-        else:
-            augmenter = None
-        cropper = (
-            wrfeat.WRRandomCrop(
-                crop_size=crop_size,
-                ndim=ndim,
-            )
-            if crop_size is not None
-            else None
-        )
-        return feat_dim, augmenter, cropper
-    def _load_wrfeat(self):
-        # Load ground truth
-        self.gt_masks, self.gt_track_df = self._load_gt()
-        self.gt_masks = self._check_dimensions(self.gt_masks)
-        # Load images
-        if self.img_folder is None:
-            if self.gt_masks is not None:
-                self.imgs = np.zeros_like(self.gt_masks)
-            else:
-                raise NotImplementedError("No images and no GT masks")
-        else:
-            logger.info("Loading images")
-            imgs = self._load_tiffs(self.img_folder, dtype=np.float32)
-            self.imgs = np.stack([
-                normalize(_x) for _x in tqdm(imgs, desc="Normalizing", leave=False)
-            ])
-            self.imgs = self._check_dimensions(self.imgs)
-            if self.compress:
-                # prepare images to be compressed later (e.g. removing non masked parts for regionprops features)
-                self.imgs = np.stack([
-                    _compress_img_mask_preproc(im, mask, self.features)
-                    for im, mask in zip(self.imgs, self.gt_masks)
-                ])
-        assert len(self.gt_masks) == len(self.imgs)
-        # Load each of the detection folders and create data samples with a sliding window
-        windows = []
-        self.properties_by_time = dict()
-        self.det_masks = dict()
-        logger.info("Loading detections")
-        for _f in self.detection_folders:
-            det_folder = self.root / _f
-            if det_folder == self.gt_mask_folder:
-                det_masks = self.gt_masks
-                logger.info("DET MASK:\tUsing GT masks")
-                # identity matching
-                det_gt_matching = {
-                    t: {_l: _l for _l in set(np.unique(d)) - {0}}
-                    for t, d in enumerate(det_masks)
-                }
-            else:
-                det_folder = self._guess_det_folder(root=self.root, suffix=_f)
-                if det_folder is None:
-                    continue
-                logger.info(f"DET MASK (guessed):\t{det_folder}")
-                det_masks = self._load_tiffs(det_folder, dtype=np.int32)
-                det_masks = self._correct_gt_with_st(
-                    det_folder, det_masks, dtype=np.int32
-                )
-                det_masks = self._check_dimensions(det_masks)
-                # FIXME matching can be slow for big images
-                # raise NotImplementedError("Matching not implemented for 3d version")
-                det_gt_matching = {
-                    t: {
-                        _d: _gt
-                        for _gt, _d in matching(
-                            self.gt_masks[t],
-                            det_masks[t],
-                            threshold=0.3,
-                            max_distance=16,
-                        )
-                    }
-                    for t in tqdm(range(len(det_masks)), leave=False, desc="Matching")
-                }
-            self.det_masks[_f] = det_masks
-            # build features
-            features = joblib.Parallel(n_jobs=8)(
-                joblib.delayed(wrfeat.WRFeatures.from_mask_img)(
-                    mask=mask[None], img=img[None], t_start=t
-                )
-                for t, (mask, img) in enumerate(zip(det_masks, self.imgs))
-            )
-            properties_by_time = dict()
-            for _t, _feats in enumerate(features):
-                properties_by_time[_t] = dict(
-                    coords=_feats.coords, labels=_feats.labels
-                )
-            self.properties_by_time[_f] = properties_by_time
-            _w = self._build_windows_wrfeat(
-                features,
-                det_masks,
-                det_gt_matching,
-            )
-            windows.extend(_w)
-        return windows
-    def _build_windows_wrfeat(
-        self,
-        features: Sequence[wrfeat.WRFeatures],
-        det_masks: np.ndarray,
-        matching: Tuple[dict],
-    ):
-        assert len(self.imgs) == len(det_masks)
-        window_size = self.window_size
-        windows = []
-        # Creates the data samples with a sliding window
-        for t1, t2 in tqdm(
-            zip(range(0, len(det_masks)), range(window_size, len(det_masks) + 1)),
-            total=len(det_masks) - window_size + 1,
-            leave=False,
-            desc="Building windows",
-        ):
-            img = self.imgs[t1:t2]
-            mask = det_masks[t1:t2]
-            feat = wrfeat.WRFeatures.concat(features[t1:t2])
-            labels = feat.labels
-            timepoints = feat.timepoints
-            coords = feat.coords
-            if len(feat) == 0:
-                A = np.zeros((0, 0), dtype=bool)
-                coords = np.zeros((0, feat.ndim), dtype=int)
-            else:
-                # build matrix from incomplete labels, but full lineage graph. If a label is missing, I should skip over it.
-                A = _ctc_assoc_matrix(
-                    labels,
-                    timepoints,
-                    self.gt_graph,
-                    matching,
-                )
-            w = dict(
-                coords=coords,
-                # TODO imgs and masks are unaltered here
-                t1=t1,
-                img=img,
-                mask=mask,
-                assoc_matrix=A,
-                labels=labels,
-                timepoints=timepoints,
-                wrfeat=feat,
-            )
-            windows.append(w)
-        logger.debug(f"Built {len(windows)} track windows.\n")
-        return windows
-    def _getitem_wrfeat(self, n: int, return_dense=None):
-        # if not set, use default
-        if return_dense is None:
-            return_dense = self.return_dense
-        track = self.windows[n]
-        # coords = track["coords"]
-        assoc_matrix = track["assoc_matrix"]
-        labels = track["labels"]
-        img = track["img"]
-        mask = track["mask"]
-        timepoints = track["timepoints"]
-        # track["t1"]
-        feat = track["wrfeat"]
-        if return_dense and isinstance(mask, _CompressedArray):
-            mask = mask.decompress()
-        if return_dense and isinstance(img, _CompressedArray):
-            img = img.decompress()
-        if isinstance(assoc_matrix, _CompressedArray):
-            assoc_matrix = assoc_matrix.decompress()
-        # cropping
-        if self.cropper is not None:
-            # Use only if there is at least one timepoint per detection
-            cropped_feat, cropped_idx = self.cropper(feat)
-            cropped_timepoints = timepoints[cropped_idx]
-            if len(np.unique(cropped_timepoints)) == self.window_size:
-                idx = cropped_idx
-                feat = cropped_feat
-                labels = labels[idx]
-                timepoints = timepoints[idx]
-                assoc_matrix = assoc_matrix[idx][:, idx]
-            else:
-                logger.debug("Skipping cropping")
-        if self.augmenter is not None:
-            feat = self.augmenter(feat)
-        coords0 = np.concatenate((feat.timepoints[:, None], feat.coords), axis=-1)
-        coords0 = torch.from_numpy(coords0).float()
-        assoc_matrix = torch.from_numpy(assoc_matrix.astype(np.float32))
-        features = torch.from_numpy(feat.features_stacked).float()
-        labels = torch.from_numpy(feat.labels).long()
-        timepoints = torch.from_numpy(feat.timepoints).long()
-        if self.max_tokens and len(timepoints) > self.max_tokens:
-            time_incs = np.where(timepoints - np.roll(timepoints, 1))[0]
-            n_elems = time_incs[np.searchsorted(time_incs, self.max_tokens) - 1]
-            timepoints = timepoints[:n_elems]
-            labels = labels[:n_elems]
-            coords0 = coords0[:n_elems]
-            features = features[:n_elems]
-            assoc_matrix = assoc_matrix[:n_elems, :n_elems]
-            logger.debug(
-                f"Clipped window of size {timepoints[n_elems - 1] - timepoints.min()}"
-            )
-        if self.augmenter is not None:
-            coords = coords0.clone()
-            coords[:, 1:] += torch.randint(0, 512, (1, self.ndim))
-        else:
-            coords = coords0.clone()
-        res = dict(
-            features=features,
-            coords0=coords0,
-            coords=coords,
-            assoc_matrix=assoc_matrix,
-            timepoints=timepoints,
-            labels=labels,
-        )
-        if return_dense:
-            if all([x is not None for x in img]):
-                img = torch.from_numpy(img).float()
-                res["img"] = img
-            mask = torch.from_numpy(mask.astype(int)).long()
-            res["mask"] = mask
-        return res
-def _ctc_lineages(df, masks, t1=0, t2=None):
-    """From a ctc dataframe, create a digraph that contains all sublineages
-    between t1 and t2 (exclusive t2).
-    Args:
-        df: pd.DataFrame with columns `label`, `t1`, `t2`, `parent` (man_track.txt)
-        masks: List of masks. If t1 is not 0, then the masks are assumed to be already cropped accordingly.
-        t1: Start timepoint
-        t2: End timepoint (exclusive). If None, then t2 is set to len(masks)
-    Returns:
-        labels: List of label ids extracted from the masks, ordered by timepoint.
-        ts: List of corresponding timepoints
-        graph: The digraph of the lineages between t1 and t2.
-    """
-    if t1 > 0:
-        assert t2 is not None
-        assert t2 - t1 == len(masks)
-    if t2 is None:
-        t2 = len(masks)
-    graph = nx.DiGraph()
-    labels = []
-    ts = []
-    # get all objects that are present in the time interval
-    df_sub = df[(df.t1 < t2) & (df.t2 >= t1)]
-    # Correct offset
-    df_sub.loc[:, "t1"] -= t1
-    df_sub.loc[:, "t2"] -= t1
-    # all_labels = df_sub.label.unique()
-    # TODO speed up by precalculating unique values once
-    # in_masks = set(np.where(np.bincount(np.stack(masks[t1:t2]).ravel()))[0]) - {0}
-    # all_labels = [l for l in all_labels if l in in_masks]
-    all_labels = set()
-    for t in tqdm(
-        range(0, t2 - t1), desc="Building and checking lineage graph", leave=False
-    ):
-        # get all entities at timepoint
-        obs = df_sub[(df_sub.t1 <= t) & (df_sub.t2 >= t)]
-        in_t = set(np.where(np.bincount(masks[t].ravel()))[0]) - {0}
-        all_labels.update(in_t)
-        for row in obs.itertuples():
-            label, t1, t2, parent = row.label, row.t1, row.t2, row.parent
-            if label not in in_t:
-                continue
-            labels.append(label)
-            ts.append(t)
-            # add label as node if not already in graph
-            if not graph.has_node(label):
-                graph.add_node(label)
-            # Parents have been added in previous timepoints
-            if parent in all_labels:
-                if not graph.has_node(parent):
-                    graph.add_node(parent)
-                graph.add_edge(parent, label)
-    labels = np.array(labels)
-    ts = np.array(ts)
-    return labels, ts, graph
-@njit
-def _assoc(A: np.ndarray, labels: np.ndarray, family: np.ndarray):
-    """For each detection, associate with all detections that are."""
-    for i in range(len(labels)):
-        for j in range(len(labels)):
-            A[i, j] = family[i, labels[j]]
-def _ctc_assoc_matrix(detections, ts, graph, matching):
-    """Create the association matrix for a list of labels and a tracklet parent -> childrend graph.
-    Each detection is associated with all its ancestors and descendants, but not its siblings and their offspring.
-    Args:
-        detections: list of integer labels, ordered by timepoint
-        ts: list of timepoints corresponding to the detections
-        graph: networkx DiGraph with each ground truth tracklet id (spanning n timepoints) as a single node
-            and parent -> children relationships as edges.
-        matching: for each timepoint, a dictionary that maps from detection id to gt tracklet id
-    """
-    assert 0 not in graph
-    matched_gt = []
-    for i, (label, t) in enumerate(zip(detections, ts)):
-        gt_tracklet_id = matching[t].get(label, 0)
-        matched_gt.append(gt_tracklet_id)
-    matched_gt = np.array(matched_gt, dtype=int)
-    # Now we have the subset of gt nodes that is matched to any detection in the current window
-    # relabel to reduce the size of lookup matrices
-    # offset 0 not allowed in skimage, which makes this very annoying
-    relabeled_gt, fwd_map, _inv_map = relabel_sequential(matched_gt, offset=1)
-    # dict is faster than arraymap
-    fwd_map = dict(zip(fwd_map.in_values, fwd_map.out_values))
-    # inv_map = dict(zip(inv_map.in_values, inv_map.out_values))
-    # the family relationships for each ground truth detection,
-    # Maps from local detection number (0-indexed) to global gt tracklet id (1-indexed)
-    family = np.zeros((len(detections), len(relabeled_gt) + 1), bool)
-    # Connects each tracklet id with its children and parent tracklets (according to man_track.txt)
-    for i, (label, t) in enumerate(zip(detections, ts)):
-        # Get the original label corresponding to the graph
-        gt_tracklet_id = matching[t].get(label, None)
-        if gt_tracklet_id is not None:
-            ancestors = []
-            descendants = []
-            # This iterates recursively through the graph
-            for n in nx.descendants(graph, gt_tracklet_id):
-                if n in fwd_map:
-                    descendants.append(fwd_map[n])
-            for n in nx.ancestors(graph, gt_tracklet_id):
-                if n in fwd_map:
-                    ancestors.append(fwd_map[n])
-            family[i, np.array([fwd_map[gt_tracklet_id], *ancestors, *descendants])] = (
-                True
-            )
-        else:
-            pass
-            # Now we match to nothing, so even the matrix diagonal will not be filled.
-    # This assures that matching to 0 is always false
-    assert family[:, 0].sum() == 0
-    # Create the detection-to-detection association matrix
-    A = np.zeros((len(detections), len(detections)), dtype=bool)
-    _assoc(A, relabeled_gt, family)
-    return A
-def sigmoid(x):
-    return 1 / (1 + np.exp(-x))
-def _compress_img_mask_preproc(img, mask, features):
-    """Remove certain img pixels if not needed to save memory for large datasets."""
-    # dont change anything if we need patch values
-    if features in ("patch", "patch_regionprops"):
-        # clear img pixels outside of patch_mask of size 16x16
-        patch_width = 16  # TOD: hardcoded: change this if needed
-        coords = tuple(np.array(r.centroid).astype(int) for r in regionprops(mask))
-        img2 = np.zeros_like(img)
-        if len(coords) > 0:
-            coords = np.stack(coords)
-            coords = np.clip(coords, 0, np.array(mask.shape)[None] - 1)
-            patch_mask = np.zeros_like(img, dtype=bool)
-            patch_mask[tuple(coords.T)] = True
-            # retain 3*patch_width+1 around center to be safe...
-            patch_mask = ndi.maximum_filter(patch_mask, 3 * patch_width + 1)
-            img2[patch_mask] = img[patch_mask]
-    else:
-        # otherwise set img value inside masks to mean
-        # FIXME: change when using other intensity based regionprops
-        img2 = np.zeros_like(img)
-        for reg in regionprops(mask, intensity_image=img):
-            m = mask[reg.slice] == reg.label
-            img2[reg.slice][m] = reg.mean_intensity
-    return img2
-def pad_tensor(x, n_max: int, dim=0, value=0):
-    n = x.shape[dim]
-    if n_max < n:
-        raise ValueError(f"pad_tensor: n_max={n_max} must be larger than n={n} !")
-    pad_shape = list(x.shape)
-    pad_shape[dim] = n_max - n
-    # pad = torch.full(pad_shape, fill_value=value, dtype=x.dtype).to(x.device)
-    pad = torch.full(pad_shape, fill_value=value, dtype=x.dtype)
-    return torch.cat((x, pad), dim=dim)
-def collate_sequence_padding(batch):
-    """Collate function that pads all sequences to the same length."""
-    lens = tuple(len(x["coords"]) for x in batch)
-    n_max_len = max(lens)
-    # print(tuple(len(x["coords"]) for x in batch))
-    # print(tuple(len(x["features"]) for x in batch))
-    # print(batch[0].keys())
-    tuple(batch[0].keys())
-    normal_keys = {
-        "coords": 0,
-        "features": 0,
-        "labels": 0,  # Not needed, remove for speed.
-        "timepoints": -1,  # There are real timepoints with t=0. -1 for distinction from that.
-    }
-    n_pads = tuple(n_max_len - s for s in lens)
-    batch_new = dict(
-        (
-            k,
-            torch.stack(
-                [pad_tensor(x[k], n_max=n_max_len, value=v) for x in batch], dim=0
-            ),
-        )
-        for k, v in normal_keys.items()
-    )
-    batch_new["assoc_matrix"] = torch.stack(
-        [
-            pad_tensor(
-                pad_tensor(x["assoc_matrix"], n_max_len, dim=0), n_max_len, dim=1
-            )
-            for x in batch
-        ],
-        dim=0,
-    )
-    # add boolean mask that signifies whether tokens are padded or not (such that they can be ignored later)
-    pad_mask = torch.zeros((len(batch), n_max_len), dtype=torch.bool)
-    for i, n_pad in enumerate(n_pads):
-        pad_mask[i, n_max_len - n_pad :] = True
-    batch_new["padding_mask"] = pad_mask.bool()
-    return batch_new
-if __name__ == "__main__":
-    dummy_data = CTCData(
-        root="../../scripts/data/synthetic_cells/01",
-        ndim=2,
-        detection_folders=["TRA"],
-        window_size=4,
-        max_tokens=None,
-        augment=3,
-        features="none",
-        downscale_temporal=1,
-        downscale_spatial=1,
-        sanity_dist=False,
-        crop_size=(256, 256),
-    )
-    x = dummy_data[0]

models/tra_post_model/trackastra/data/distributed.py DELETED Viewed

@@ -1,316 +0,0 @@
-"""Data loading and sampling utils for distributed training."""
-import hashlib
-import json
-import logging
-import pickle
-# from collections.abc import Iterable
-from copy import deepcopy
-from pathlib import Path
-from timeit import default_timer
-import numpy as np
-import torch
-# from lightning import LightningDataModule
-from torch.utils.data import (
-    BatchSampler,
-    ConcatDataset,
-    DataLoader,
-    Dataset,
-    DistributedSampler,
-)
-from typing import Optional, Iterable
-from .data import CTCData
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-def cache_class(cachedir=None):
-    """A simple file cache for CTCData."""
-    def make_hashable(obj):
-        if isinstance(obj, tuple | list):
-            return tuple(make_hashable(e) for e in obj)
-        elif isinstance(obj, Path):
-            return obj.as_posix()
-        elif isinstance(obj, dict):
-            return tuple(sorted((k, make_hashable(v)) for k, v in obj.items()))
-        else:
-            return obj
-    def hash_args_kwargs(*args, **kwargs):
-        hashable_args = tuple(make_hashable(arg) for arg in args)
-        hashable_kwargs = make_hashable(kwargs)
-        combined_serialized = json.dumps(
-            [hashable_args, hashable_kwargs], sort_keys=True
-        )
-        hash_obj = hashlib.sha256(combined_serialized.encode())
-        return hash_obj.hexdigest()
-    if cachedir is None:
-        return CTCData
-    else:
-        cachedir = Path(cachedir)
-        def _wrapped(*args, **kwargs):
-            h = hash_args_kwargs(*args, **kwargs)
-            cachedir.mkdir(exist_ok=True, parents=True)
-            cache_file = cachedir / f"{h}.pkl"
-            if cache_file.exists():
-                logger.info(f"Loading cached dataset from {cache_file}")
-                with open(cache_file, "rb") as f:
-                    return pickle.load(f)
-            else:
-                c = CTCData(*args, **kwargs)
-                logger.info(f"Saving cached dataset to {cache_file}")
-                pickle.dump(c, open(cache_file, "wb"))
-            return c
-        return _wrapped
-class BalancedBatchSampler(BatchSampler):
-    """samples batch indices such that the number of objects in each batch is balanced
-    (so to reduce the number of paddings in the batch).
-    """
-    def __init__(
-        self,
-        dataset: torch.utils.data.Dataset,
-        batch_size: int,
-        n_pool: int = 10,
-        num_samples: Optional[int] = None,
-        weight_by_ndivs: bool = False,
-        weight_by_dataset: bool = False,
-        drop_last: bool = False,
-    ):
-        """Setting n_pool =1 will result in a regular random batch sampler.
-        weight_by_ndivs: if True, the probability of sampling an element is proportional to the number of divisions
-        weight_by_dataset: if True, the probability of sampling an element is inversely proportional to the length of the dataset
-        """
-        if isinstance(dataset, CTCData):
-            self.n_objects = dataset.n_objects
-            self.n_divs = np.array(dataset.n_divs)
-            self.n_sizes = np.ones(len(dataset)) * len(dataset)
-        elif isinstance(dataset, ConcatDataset):
-            self.n_objects = tuple(n for d in dataset.datasets for n in d.n_objects)
-            self.n_divs = np.array(tuple(n for d in dataset.datasets for n in d.n_divs))
-            self.n_sizes = np.array(
-                tuple(len(d) for d in dataset.datasets for _ in range(len(d)))
-            )
-        else:
-            raise NotImplementedError(
-                f"BalancedBatchSampler: Unknown dataset type {type(dataset)}"
-            )
-        assert len(self.n_objects) == len(self.n_divs) == len(self.n_sizes)
-        self.batch_size = batch_size
-        self.n_pool = n_pool
-        self.drop_last = drop_last
-        self.num_samples = num_samples
-        self.weight_by_ndivs = weight_by_ndivs
-        self.weight_by_dataset = weight_by_dataset
-        logger.debug(f"{weight_by_ndivs=}")
-        logger.debug(f"{weight_by_dataset=}")
-    def get_probs(self, idx):
-        idx = np.array(idx)
-        if self.weight_by_ndivs:
-            probs = 1 + np.sqrt(self.n_divs[idx])
-        else:
-            probs = np.ones(len(idx))
-        if self.weight_by_dataset:
-            probs = probs / (self.n_sizes[idx] + 1e-6)
-        probs = probs / (probs.sum() + 1e-10)
-        return probs
-    def sample_batches(self, idx: Iterable[int]):
-        # we will split the indices into pools of size n_pool
-        num_samples = self.num_samples if self.num_samples is not None else len(idx)
-        # sample from the indices with replacement and given probabilites
-        idx = np.random.choice(idx, num_samples, replace=True, p=self.get_probs(idx))
-        n_pool = min(
-            self.n_pool * self.batch_size,
-            (len(idx) // self.batch_size) * self.batch_size,
-        )
-        batches = []
-        for i in range(0, len(idx), n_pool):
-            # the indices in the pool are sorted by their number of objects
-            idx_pool = idx[i : i + n_pool]
-            idx_pool = sorted(idx_pool, key=lambda i: self.n_objects[i])
-            # such that we can create batches where each element has a similar number of objects
-            jj = np.arange(0, len(idx_pool), self.batch_size)
-            np.random.shuffle(jj)
-            for j in jj:
-                # dont drop_last, as this leads to a lot of lightning problems....
-                # if j + self.batch_size > len(idx_pool):  # assume drop_last=True
-                #     continue
-                batch = idx_pool[j : j + self.batch_size]
-                batches.append(batch)
-        return batches
-    def __iter__(self):
-        idx = np.arange(len(self.n_objects))
-        batches = self.sample_batches(idx)
-        return iter(batches)
-    def __len__(self):
-        if self.num_samples is not None:
-            return self.num_samples // self.batch_size
-        else:
-            return len(self.n_objects) // self.batch_size
-class BalancedDistributedSampler(DistributedSampler):
-    def __init__(
-        self,
-        dataset: Dataset,
-        batch_size: int,
-        n_pool: int,
-        num_samples: int,
-        weight_by_ndivs: bool = False,
-        weight_by_dataset: bool = False,
-        *args,
-        **kwargs,
-    ) -> None:
-        super().__init__(dataset=dataset, *args, drop_last=True, **kwargs)
-        self._balanced_batch_sampler = BalancedBatchSampler(
-            dataset,
-            batch_size=batch_size,
-            n_pool=n_pool,
-            num_samples=max(1, num_samples // self.num_replicas),
-            weight_by_ndivs=weight_by_ndivs,
-            weight_by_dataset=weight_by_dataset,
-        )
-    def __len__(self) -> int:
-        if self.num_samples is not None:
-            return self._balanced_batch_sampler.num_samples
-        else:
-            return super().__len__()
-    def __iter__(self):
-        indices = list(super().__iter__())
-        batches = self._balanced_batch_sampler.sample_batches(indices)
-        for batch in batches:
-            yield from batch
-# class BalancedDataModule(LightningDataModule):
-#     def __init__(
-#         self,
-#         input_train: list,
-#         input_val: list,
-#         cachedir: str,
-#         augment: int,
-#         distributed: bool,
-#         dataset_kwargs: dict,
-#         sampler_kwargs: dict,
-#         loader_kwargs: dict,
-#     ):
-#         super().__init__()
-#         self.input_train = input_train
-#         self.input_val = input_val
-#         self.cachedir = cachedir
-#         self.augment = augment
-#         self.distributed = distributed
-#         self.dataset_kwargs = dataset_kwargs
-#         self.sampler_kwargs = sampler_kwargs
-#         self.loader_kwargs = loader_kwargs
-#     def prepare_data(self):
-#         """Loads and caches the datasets if not already done.
-#         Running on the main CPU process.
-#         """
-#         CTCData = cache_class(self.cachedir)
-#         datasets = dict()
-#         for split, inps in zip(
-#             ("train", "val"),
-#             (self.input_train, self.input_val),
-#         ):
-#             logger.info(f"Loading {split.upper()} data")
-#             start = default_timer()
-#             datasets[split] = torch.utils.data.ConcatDataset(
-#                 CTCData(
-#                     root=Path(inp),
-#                     augment=self.augment if split == "train" else 0,
-#                     **self.dataset_kwargs,
-#                 )
-#                 for inp in inps
-#             )
-#             logger.info(
-#                 f"Loaded {len(datasets[split])} {split.upper()} samples (in"
-#                 f" {(default_timer() - start):.1f} s)\n\n"
-#             )
-#         del datasets
-#     def setup(self, stage: str):
-#         CTCData = cache_class(self.cachedir)
-#         self.datasets = dict()
-#         for split, inps in zip(
-#             ("train", "val"),
-#             (self.input_train, self.input_val),
-#         ):
-#             logger.info(f"Loading {split.upper()} data")
-#             start = default_timer()
-#             self.datasets[split] = torch.utils.data.ConcatDataset(
-#                 CTCData(
-#                     root=Path(inp),
-#                     augment=self.augment if split == "train" else 0,
-#                     **self.dataset_kwargs,
-#                 )
-#                 for inp in inps
-#             )
-#             logger.info(
-#                 f"Loaded {len(self.datasets[split])} {split.upper()} samples (in"
-#                 f" {(default_timer() - start):.1f} s)\n\n"
-#             )
-#     def train_dataloader(self):
-#         loader_kwargs = self.loader_kwargs.copy()
-#         if self.distributed:
-#             sampler = BalancedDistributedSampler(
-#                 self.datasets["train"],
-#                 **self.sampler_kwargs,
-#             )
-#             batch_sampler = None
-#         else:
-#             sampler = None
-#             batch_sampler = BalancedBatchSampler(
-#                 self.datasets["train"],
-#                 **self.sampler_kwargs,
-#             )
-#             if not loader_kwargs["batch_size"] == batch_sampler.batch_size:
-#                 raise ValueError(
-#                     f"Batch size in loader_kwargs ({loader_kwargs['batch_size']}) and sampler_kwargs ({batch_sampler.batch_size}) must match"
-#                 )
-#             del loader_kwargs["batch_size"]
-#         loader = DataLoader(
-#             self.datasets["train"],
-#             sampler=sampler,
-#             batch_sampler=batch_sampler,
-#             **loader_kwargs,
-#         )
-#         return loader
-#     def val_dataloader(self):
-#         val_loader_kwargs = deepcopy(self.loader_kwargs)
-#         val_loader_kwargs["persistent_workers"] = False
-#         val_loader_kwargs["num_workers"] = 1
-#         return DataLoader(
-#             self.datasets["val"],
-#             shuffle=False,
-#             **val_loader_kwargs,
-#         )

models/tra_post_model/trackastra/data/example_data.py DELETED Viewed

@@ -1,48 +0,0 @@
-from pathlib import Path
-import tifffile
-root = Path(__file__).parent / "resources"
-def example_data_bacteria():
-    """Bacteria images and masks from.
-    Van Vliet et al. Spatially Correlated Gene Expression in Bacterial Groups: The Role of Lineage History, Spatial Gradients, and Cell-Cell Interactions (2018)
-    https://doi.org/10.1016/j.cels.2018.03.009
-    subset of timelapse trpL/150310-11
-    """
-    img = tifffile.imread(root / "trpL_150310-11_img.tif")
-    mask = tifffile.imread(root / "trpL_150310-11_mask.tif")
-    return img, mask
-def example_data_hela():
-    """Hela data from the cell tracking challenge.
-    Neumann et al. Phenotypic profiling of the human genome by time-lapse microscopy reveals cell division genes (2010)
-    subset of Fluo-N2DL-HeLa/train/02
-    """
-    img = tifffile.imread(root / "Fluo_Hela_02_img.tif")
-    mask = tifffile.imread(root / "Fluo_Hela_02_ERR_SEG.tif")
-    print(img.shape, mask.shape)
-    return img, mask
-def example_data_fluo_3d():
-    """Fluo-N3DH-CHO data from the cell tracking challenge.
-    Dzyubachyk et al. Advanced Level-Set-Based Cell Tracking in Time-Lapse Fluorescence Microscopy (2010)
-    subset of Fluo-N3DH-CHO/train/02
-    """
-    img = tifffile.imread(root / "Fluo-N3DH-CHO_02_img.tif")
-    mask = tifffile.imread(root / "Fluo-N3DH-CHO_02_ERR_SEG.tif")
-    return img, mask
-def data_hela():
-    img = tifffile.imread("02_imgs.tif")
-    mask = tifffile.imread("02_masks.tif")
-    return img, mask

models/tra_post_model/trackastra/data/features.py DELETED Viewed

@@ -1,148 +0,0 @@
-import itertools
-import numpy as np
-import pandas as pd
-from skimage.measure import regionprops_table
-# the property keys that are supported for 2 and 3 dim
-_PROPERTIES = {
-    2: {
-        # FIXME: The only image regionprop possible now (when compressing) is mean_intensity,
-        # since we store a mask with the mean intensity of each detection as the image.
-        "regionprops": (
-            "label",
-            "area",
-            "intensity_mean",
-            "eccentricity",
-            "solidity",
-            "inertia_tensor",
-        ),
-        # faster
-        "regionprops2": (
-            "label",
-            "area",
-            "intensity_mean",
-            "inertia_tensor",
-        ),
-        "patch_regionprops": (
-            "label",
-            "area",
-            "intensity_mean",
-            "inertia_tensor",
-        ),
-    },
-    3: {
-        "regionprops2": (
-            "label",
-            "area",
-            "intensity_mean",
-            "inertia_tensor",
-        ),
-        "patch_regionprops": (
-            "label",
-            "area",
-            "intensity_mean",
-            "inertia_tensor",
-        ),
-    },
-}
-def extract_features_regionprops(
-    mask: np.ndarray,
-    img: np.ndarray,
-    labels: np.ndarray,
-    properties="regionprops2",
-):
-    ndim = mask.ndim
-    assert ndim in (2, 3)
-    assert mask.shape == img.shape
-    prop_dict = _PROPERTIES[ndim]
-    if properties not in prop_dict:
-        raise ValueError(f"properties must be one of {prop_dict.keys()}")
-    properties_tuple = prop_dict[properties]
-    assert properties_tuple[0] == "label"
-    labels = np.asarray(labels)
-    # remove mask labels that are not present
-    # not needed, remove for speed
-    # mask[~np.isin(mask, labels)] = 0
-    df = pd.DataFrame(
-        regionprops_table(mask, intensity_image=img, properties=properties_tuple)
-    )
-    assert df.columns[0] == "label"
-    assert df.columns[1] == "area"
-    # the bnumber of inertia tensor columns depends on the dimensionality
-    n_cols_inertia = ndim**2
-    assert np.all(["inertia_tensor" in col for col in df.columns[-n_cols_inertia:]])
-    # Hack for backwards compatibility
-    if properties in ("regionprops", "patch_regionprops"):
-        # Nice for conceptual clarity, but does not matter for speed
-        # drop upper triangular part of symmetric inertia tensor
-        for i, j in itertools.product(range(ndim), repeat=2):
-            if i > j:
-                df.drop(f"inertia_tensor-{i}-{j}", axis=1, inplace=True)
-    table = df.to_numpy()
-    table[:, 1] *= 0.001
-    table[:, -n_cols_inertia:] *= 0.01
-    # reorder according to labels
-    features = np.zeros((len(labels), len(df.columns) - 1))
-    # faster than iterating over pandas dataframe
-    for row in table:
-        # old version with tuple indexing, slow.
-        # n = labels.index(int(row.label))
-        # features[n] = row.to_numpy()[1:]
-        # Only process regions present in the labels
-        n = np.where(labels == int(row[0]))[0]
-        if len(n) > 0:
-            # Remove label column (0)!
-            features[n[0]] = row[1:]
-    return features
-def extract_features_patch(
-    mask: np.ndarray,
-    img: np.ndarray,
-    coords: np.ndarray,
-    labels: np.ndarray,
-    width_patch: int = 16,
-):
-    """16x16 Image patch around detection."""
-    ndim = mask.ndim
-    assert ndim in (2, 3) and mask.shape == img.shape
-    if len(coords) == 0:
-        return np.zeros((0, width_patch * width_patch))
-    pads = (width_patch // 2,) * ndim
-    img = np.pad(
-        img,
-        tuple((p, p) for p in pads),
-        mode="constant",
-    )
-    coords = coords.astype(int) + np.array(pads)
-    ss = tuple(
-        tuple(slice(_c - width_patch // 2, _c + width_patch // 2) for _c in c)
-        for c in coords
-    )
-    fs = tuple(img[_s] for _s in ss)
-    # max project along z if 3D
-    if ndim == 3:
-        fs = tuple(f.max(0) for f in fs)
-    features = np.stack([f.flatten() for f in fs])
-    return features

models/tra_post_model/trackastra/data/matching.py DELETED Viewed

@@ -1,251 +0,0 @@
-# Adapted from https://github.com/stardist/stardist/blob/master/stardist/matching.py
-import numpy as np
-from numba import jit
-from scipy.optimize import linear_sum_assignment
-from scipy.spatial.distance import cdist
-from skimage.measure import regionprops
-matching_criteria = dict()
-def label_are_sequential(y):
-    """Returns true if y has only sequential labels from 1..."""
-    labels = np.unique(y)
-    return (set(labels) - {0}) == set(range(1, 1 + labels.max()))
-def is_array_of_integers(y):
-    return isinstance(y, np.ndarray) and np.issubdtype(y.dtype, np.integer)
-def _check_label_array(y, name=None, check_sequential=False):
-    err = ValueError(
-        "{label} must be an array of {integers}.".format(
-            label="labels" if name is None else name,
-            integers=("sequential " if check_sequential else "")
-            + "non-negative integers",
-        )
-    )
-    if not is_array_of_integers(y):
-        raise err
-    if len(y) == 0:
-        return True
-    if check_sequential and not label_are_sequential(y):
-        raise err
-    else:
-        if not y.min() >= 0:
-            raise err
-    return True
-def label_overlap(x, y, check=True):
-    if check:
-        _check_label_array(x, "x", True)
-        _check_label_array(y, "y", True)
-        if not x.shape == y.shape:
-            raise ValueError("x and y must have the same shape")
-    return _label_overlap(x, y)
-@jit(nopython=True)
-def _label_overlap(x, y):
-    x = x.ravel()
-    y = y.ravel()
-    overlap = np.zeros((1 + x.max(), 1 + y.max()), dtype=np.uint32)
-    for i in range(len(x)):
-        overlap[x[i], y[i]] += 1
-    return overlap[1:, 1:]
-def _safe_divide(x, y, eps=1e-10):
-    """Computes a safe divide which returns 0 if y is zero."""
-    if np.isscalar(x) and np.isscalar(y):
-        return x / y if np.abs(y) > eps else 0.0
-    else:
-        out = np.zeros(np.broadcast(x, y).shape, np.float32)
-        np.divide(x, y, out=out, where=np.abs(y) > eps)
-        return out
-def intersection_over_union(overlap):
-    _check_label_array(overlap, "overlap")
-    if np.sum(overlap) == 0:
-        return overlap
-    n_pixels_pred = np.sum(overlap, axis=0, keepdims=True)
-    n_pixels_true = np.sum(overlap, axis=1, keepdims=True)
-    return _safe_divide(overlap, (n_pixels_pred + n_pixels_true - overlap))
-def dist_score(y_true, y_pred, max_distance: int = 10):
-    """Compute distance score between centroids of regions in y_true and y_pred
-    and returns a score matrix of shape (n_true, n_pred) with values in [0,1]
-    where
-    distance >= max_distance  -> score = 0
-    distance = 0             -> score = 1.
-    """
-    c_true = np.stack([r.centroid for r in regionprops(y_true)], axis=0)
-    c_pred = np.stack([r.centroid for r in regionprops(y_pred)], axis=0)
-    dist = np.minimum(cdist(c_true, c_pred), max_distance)
-    score = 1 - dist / max_distance
-    return score
-# copied from scikit-image master for now (remove when part of a release)
-def relabel_sequential(label_field, offset=1):
-    """Relabel arbitrary labels to {`offset`, ... `offset` + number_of_labels}.
-    This function also returns the forward map (mapping the original labels to
-    the reduced labels) and the inverse map (mapping the reduced labels back
-    to the original ones).
-    Parameters
-    ----------
-    label_field : numpy array of int, arbitrary shape
-        An array of labels, which must be non-negative integers.
-    offset : int, optional
-        The return labels will start at `offset`, which should be
-        strictly positive.
-    Returns:
-    -------
-    relabeled : numpy array of int, same shape as `label_field`
-        The input label field with labels mapped to
-        {offset, ..., number_of_labels + offset - 1}.
-        The data type will be the same as `label_field`, except when
-        offset + number_of_labels causes overflow of the current data type.
-    forward_map : numpy array of int, shape ``(label_field.max() + 1,)``
-        The map from the original label space to the returned label
-        space. Can be used to re-apply the same mapping. See examples
-        for usage. The data type will be the same as `relabeled`.
-    inverse_map : 1D numpy array of int, of length offset + number of labels
-        The map from the new label space to the original space. This
-        can be used to reconstruct the original label field from the
-        relabeled one. The data type will be the same as `relabeled`.
-    Notes:
-    -----
-    The label 0 is assumed to denote the background and is never remapped.
-    The forward map can be extremely big for some inputs, since its
-    length is given by the maximum of the label field. However, in most
-    situations, ``label_field.max()`` is much smaller than
-    ``label_field.size``, and in these cases the forward map is
-    guaranteed to be smaller than either the input or output images.
-    Examples:
-    --------
-    >>> from skimage.segmentation import relabel_sequential
-    >>> label_field = np.array([1, 1, 5, 5, 8, 99, 42])
-    >>> relab, fw, inv = relabel_sequential(label_field)
-    >>> relab
-    array([1, 1, 2, 2, 3, 5, 4])
-    >>> fw
-    array([0, 1, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0,
-           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5])
-    >>> inv
-    array([ 0,  1,  5,  8, 42, 99])
-    >>> (fw[label_field] == relab).all()
-    True
-    >>> (inv[relab] == label_field).all()
-    True
-    >>> relab, fw, inv = relabel_sequential(label_field, offset=5)
-    >>> relab
-    array([5, 5, 6, 6, 7, 9, 8])
-    """
-    offset = int(offset)
-    if offset <= 0:
-        raise ValueError("Offset must be strictly positive.")
-    if np.min(label_field) < 0:
-        raise ValueError("Cannot relabel array that contains negative values.")
-    max_label = int(label_field.max())  # Ensure max_label is an integer
-    if not np.issubdtype(label_field.dtype, np.integer):
-        new_type = np.min_scalar_type(max_label)
-        label_field = label_field.astype(new_type)
-    labels = np.unique(label_field)
-    labels0 = labels[labels != 0]
-    new_max_label = offset - 1 + len(labels0)
-    new_labels0 = np.arange(offset, new_max_label + 1)
-    output_type = label_field.dtype
-    required_type = np.min_scalar_type(new_max_label)
-    if np.dtype(required_type).itemsize > np.dtype(label_field.dtype).itemsize:
-        output_type = required_type
-    forward_map = np.zeros(max_label + 1, dtype=output_type)
-    forward_map[labels0] = new_labels0
-    inverse_map = np.zeros(new_max_label + 1, dtype=output_type)
-    inverse_map[offset:] = labels0
-    relabeled = forward_map[label_field]
-    return relabeled, forward_map, inverse_map
-def matching(y_true, y_pred, threshold=0.5, max_distance: int = 16):
-    """Computes IoU and distance score between all pairs of regions in y_true and y_pred.
-    returns the true/pred matching based on the higher of the two scores for each pair of regions
-    Parameters
-    ----------
-    y_true: ndarray
-        ground truth label image (integer valued)
-    y_pred: ndarray
-        predicted label image (integer valued)
-    threshold: float
-        threshold for matching criterion (default 0.5)
-    max_distance: int
-        maximum distance between centroids of regions in y_true and y_pred (default 16)
-    Returns:
-    -------
-    gt_pred: tuple
-        tuple of all matched region label pairs in y_true and y_pred
-    """
-    y_true, y_pred = y_true.astype(np.int32), y_pred.astype(np.int32)
-    _check_label_array(y_true, "y_true")
-    _check_label_array(y_pred, "y_pred")
-    if not y_true.shape == y_pred.shape:
-        raise ValueError(
-            f"y_true ({y_true.shape}) and y_pred ({y_pred.shape}) have different shapes"
-        )
-    if threshold is None:
-        threshold = 0
-    threshold = float(threshold) if np.isscalar(threshold) else map(float, threshold)
-    y_true, _, map_rev_true = relabel_sequential(y_true)
-    y_pred, _, map_rev_pred = relabel_sequential(y_pred)
-    overlap = label_overlap(y_true, y_pred, check=False)
-    scores_iou = intersection_over_union(overlap)
-    scores_dist = dist_score(y_true, y_pred, max_distance)
-    scores = np.maximum(scores_iou, scores_dist)
-    assert 0 <= np.min(scores) <= np.max(scores) <= 1
-    n_true, n_pred = scores.shape
-    n_matched = min(n_true, n_pred)
-    # not_trivial = n_matched > 0 and np.any(scores >= thr)
-    not_trivial = n_matched > 0
-    if not_trivial:
-        # compute optimal matching with scores as tie-breaker
-        costs = -(scores >= threshold).astype(float) - scores / (2 * n_matched)
-        true_ind, pred_ind = linear_sum_assignment(costs)
-        assert n_matched == len(true_ind) == len(pred_ind)
-        match_ok = scores[true_ind, pred_ind] >= threshold
-        true_ind = true_ind[match_ok]
-        pred_ind = pred_ind[match_ok]
-        matched = tuple(
-            (int(map_rev_true[i]), int(map_rev_pred[j]))
-            for i, j in zip(1 + true_ind, 1 + pred_ind)
-        )
-    else:
-        matched = ()
-    return matched

models/tra_post_model/trackastra/data/utils.py DELETED Viewed

@@ -1,232 +0,0 @@
-import logging
-import sys
-from pathlib import Path
-import numpy as np
-import pandas as pd
-# from .data import CTCData
-import tifffile
-from tqdm import tqdm
-from typing import Optional, Union, Tuple
-logger = logging.getLogger(__name__)
-def load_tiff_timeseries(
-    dir: Path,
-    dtype: Optional[Union[str, type]] = None,
-    downscale: Optional[Tuple[int, ...]] = None,
-    start_frame: int = 0,
-    end_frame: Optional[int] = None,
-) -> np.ndarray:
-    """Loads a folder of `.tif` or `.tiff` files into a numpy array.
-    Each file is interpreted as a frame of a time series.
-    Args:
-        folder:
-        dtype:
-        downscale: One int for each dimension of the data. Avoids memory overhead.
-        start_frame: The first frame to load.
-        end_frame: The last frame to load.
-    Returns:
-        np.ndarray: The loaded data.
-    """
-    # TODO make safe for label arrays
-    logger.debug(f"Loading tiffs from {dir} as {dtype}")
-    files = sorted(list(dir.glob("*.tif")) + list(dir.glob("*.tiff")))[
-        start_frame:end_frame
-    ]
-    shape = tifffile.imread(files[0]).shape
-    if downscale:
-        assert len(downscale) == len(shape)
-    else:
-        downscale = (1,) * len(shape)
-    files = files[:: downscale[0]]
-    x = []
-    for f in tqdm(
-        files,
-        leave=False,
-        desc=f"Loading [{start_frame}:{end_frame}:{downscale[0]}]",
-    ):
-        _x = tifffile.imread(f)
-        if dtype:
-            _x = _x.astype(dtype)
-        assert _x.shape == shape
-        slices = tuple(slice(None, None, d) for d in downscale[1:])
-        _x = _x[slices]
-        x.append(_x)
-    x = np.stack(x)
-    logger.debug(f"Loaded array of shape {x.shape} from {dir}")
-    return x
-def load_tracklet_links(folder: Path) -> pd.DataFrame:
-    candidates = [
-        folder / "man_track.txt",
-        folder / "res_track.txt",
-    ]
-    for c in candidates:
-        if c.exists():
-            path = c
-            break
-    else:
-        raise FileNotFoundError(f"Could not find tracklet links in {folder}")
-    df = pd.read_csv(
-        path,
-        delimiter=" ",
-        names=["label", "t1", "t2", "parent"],
-        dtype=int,
-    )
-    # Remove invalid tracks with t2 > t1
-    df = df[df.t1 <= df.t2]
-    n_dets = (df.t2 - df.t1 + 1).sum()
-    logger.debug(f"{folder} has {n_dets} detections")
-    n_divs = (df[df.parent != 0]["parent"].value_counts() == 2).sum()
-    logger.debug(f"{folder} has {n_divs} divisions")
-    return df
-def filter_track_df(
-    df: pd.DataFrame,
-    start_frame: int = 0,
-    end_frame: int = sys.maxsize,
-    downscale: int = 1,
-) -> pd.DataFrame:
-    """Only keep tracklets that are present in the given time interval."""
-    df.columns = ["label", "t1", "t2", "parent"]
-    # only retain cells in interval
-    df = df[(df.t2 >= start_frame) & (df.t1 < end_frame)]
-    # shift start and end of each cell
-    df.t1 = df.t1 - start_frame
-    df.t2 = df.t2 - start_frame
-    # set start/end to min/max
-    df.t1 = df.t1.clip(0, end_frame - start_frame - 1)
-    df.t2 = df.t2.clip(0, end_frame - start_frame - 1)
-    # set all parents to 0 that are not in the interval
-    df.loc[~df.parent.isin(df.label), "parent"] = 0
-    if downscale > 1:
-        if start_frame % downscale != 0:
-            raise ValueError("start_frame must be a multiple of downscale")
-        logger.debug(f"Temporal downscaling of tracklet links by {downscale}")
-        # remove tracklets that have been fully deleted by temporal downsampling
-        mask = (
-            # (df["t2"] - df["t1"] < downscale - 1)
-            (df["t1"] % downscale != 0)
-            & (df["t2"] % downscale != 0)
-            & (df["t1"] // downscale == df["t2"] // downscale)
-        )
-        logger.debug(
-            f"Remove {mask.sum()} tracklets that are fully deleted by downsampling"
-        )
-        logger.debug(f"Remove {df[mask]}")
-        df = df[~mask]
-        # set parent to 0 if it has been deleted
-        df.loc[~df.parent.isin(df.label), "parent"] = 0
-        df["t2"] = (df["t2"] / float(downscale)).apply(np.floor).astype(int)
-        df["t1"] = (df["t1"] / float(downscale)).apply(np.ceil).astype(int)
-        # Correct for edge case of single frame tracklet
-        assert np.all(df["t1"] == np.minimum(df["t1"], df["t2"]))
-    return df
-# TODO fix
-# def dataset_to_ctc(dataset: CTCData, path, start: int = 0, stop: int | None = None):
-#     """save dataset to ctc format for debugging purposes"""
-#     out = Path(path)
-#     print(f"Saving dataset to {out}")
-#     out_img = out / "img"
-#     out_img.mkdir(exist_ok=True, parents=True)
-#     out_mask = out / "TRA"
-#     out_mask.mkdir(exist_ok=True, parents=True)
-#     if stop is None:
-#         stop = len(self)
-#     lines = []
-#     masks, imgs = [], []
-#     t_offset = 0
-#     max_mask = 0
-#     n_lines = 0
-#     all_coords = []
-#     for i in tqdm(range(start, stop)):
-#         d = dataset.__getitem__(i, return_dense=True)
-#         mask = d["mask"].numpy()
-#         mask[mask > 0] += max_mask
-#         max_mask = max(max_mask, mask.max())
-#         masks.extend(mask)
-#         imgs.extend(d["img"].numpy())
-#         # add vertices
-#         coords = d["coords0"].numpy()
-#         ts, coords = coords[:, 0].astype(int), coords[:, 1:]
-#         A = d["assoc_matrix"].numpy()
-#         t_unique = sorted(np.unique(ts))
-#         for t1, t2 in zip(t_unique[:-1], t_unique[1:]):
-#             A_sub = A[ts == t1][:, ts == t2]
-#             for i, a in enumerate(A_sub):
-#                 v1 = coords[ts == t1][i]
-#                 for j in np.where(a > 0)[0]:
-#                     v2 = coords[ts == t2][j]
-#                     # lines.append(
-#                     #     {
-#                     #         "index": n_lines,
-#                     #         "shape-type": "line",
-#                     #         "vertex-index": 0,
-#                     #         "axis-0": t2 + t_offset,
-#                     #         "axis-1": v1[0],
-#                     #         "axis-2": v1[1],
-#                     #     }
-#                     # )
-#                     # lines.append(
-#                     #     {
-#                     #         "index": n_lines,
-#                     #         "shape-type": "line",
-#                     #         "vertex-index": 1,
-#                     #         "axis-0": t2 + t_offset,
-#                     #         "axis-1": v2[0],
-#                     #         "axis-2": v2[1],
-#                     #     }
-#                     # )
-#                     lines.append([n_lines, "line", 0, t2 + t_offset] + v1.tolist())
-#                     lines.append([n_lines, "line", 1, t2 + t_offset] + v2.tolist())
-#                     n_lines += 1
-#         c = d["coords0"].numpy()
-#         c[:, 0] += t_offset
-#         all_coords.extend(c)
-#         t_offset += len(mask)
-#     ax_cols = [f"axis-{i}" for i in range(dataset.ndim + 1)]
-#     df = pd.DataFrame(lines, columns=["index", "shape-type", "vertex-index"] + ax_cols)
-#     df.to_csv(out / "lines.csv", index=False)
-#     df_c = pd.DataFrame(all_coords, columns=ax_cols)
-#     df_c.to_csv(out / "coords.csv", index=False)
-#     for i, m in enumerate(imgs):
-#         # tifffile.imwrite(out_img/f'img_{i:04d}.tif', m)
-#         if dataset.ndim == 2:
-#             imageio.imwrite(
-#                 out_img / f"img_{i:04d}.jpg",
-#                 np.clip(20 + 100 * m, 0, 255).astype(np.uint8),
-#             )
-#     for i, m in enumerate(masks):
-#         tifffile.imwrite(out_mask / f"mask_{i:04d}.tif", m, compression="zstd")
-#     return d

models/tra_post_model/trackastra/model/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-# ruff: noqa: F401
-from .model import TrackingTransformer
-from .model_api import Trackastra

models/tra_post_model/trackastra/model/model_api.py DELETED Viewed

@@ -1,338 +0,0 @@
-import logging
-import os
-from pathlib import Path
-from typing import Literal, Union, Optional, Tuple
-import dask.array as da
-import numpy as np
-import tifffile
-import torch
-import yaml
-from tqdm import tqdm
-from ..data import build_windows, get_features, load_tiff_timeseries
-from ..tracking import TrackGraph, build_graph, track_greedy
-from ..utils import normalize
-from .model import TrackingTransformer
-from .predict import predict_windows
-from .pretrained import download_pretrained
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class Trackastra:
-    """A transformer-based tracking model for time-lapse data.
-    Trackastra links segmented objects across time frames by predicting
-    associations with a transformer model trained on diverse time-lapse videos.
-    The model takes as input:
-    - A sequence of images of shape (T,(Z),Y,X)
-    - Corresponding instance segmentation masks of shape (T,(Z),Y,X)
-    It supports multiple tracking modes:
-    - greedy_nodiv: Fast greedy linking without division
-    - greedy: Fast greedy linking with division
-    - ilp: Integer Linear Programming based linking (more accurate but slower)
-    Examples:
-        >>> # Load example data
-        >>> from trackastra.data import example_data_bacteria
-        >>> imgs, masks = example_data_bacteria()
-        >>>
-        >>> # Load pretrained model and track
-        >>> model = Trackastra.from_pretrained("general_2d", device="cuda")
-        >>> track_graph = model.track(imgs, masks, mode="greedy")
-    """
-    def __init__(
-        self,
-        transformer: TrackingTransformer,
-        train_args: dict,
-        device: Literal["cuda", "mps", "cpu", "automatic", None] = None,
-    ):
-        """Initialize Trackastra model.
-        Args:
-            transformer: The underlying transformer model.
-            train_args: Training configuration arguments.
-            device: Device to run model on ("cuda", "mps", "cpu", "automatic" or None).
-        """
-        if device == "cuda":
-            if torch.cuda.is_available():
-                self.device = "cuda"
-            else:
-                logger.info("Cuda not available, falling back to cpu.")
-                self.device = "cpu"
-        elif device == "mps":
-            if (
-                torch.backends.mps.is_available()
-                and os.getenv("PYTORCH_ENABLE_MPS_FALLBACK") is not None
-                and os.getenv("PYTORCH_ENABLE_MPS_FALLBACK") != "0"
-            ):
-                self.device = "mps"
-            else:
-                logger.info("Mps not available, falling back to cpu.")
-                self.device = "cpu"
-        elif device == "cpu":
-            self.device = "cpu"
-        elif device == "automatic" or device is None:
-            should_use_mps = (
-                torch.backends.mps.is_available()
-                and os.getenv("PYTORCH_ENABLE_MPS_FALLBACK") is not None
-                and os.getenv("PYTORCH_ENABLE_MPS_FALLBACK") != "0"
-            )
-            self.device = (
-                "cuda"
-                if torch.cuda.is_available()
-                else (
-                    "mps"
-                    if should_use_mps and os.getenv("PYTORCH_ENABLE_MPS_FALLBACK")
-                    else "cpu"
-                )
-            )
-        else:
-            raise ValueError(f"Device {device} not recognized.")
-        logger.info(f"Using device {self.device}")
-        self.transformer = transformer.to(self.device)
-        self.train_args = train_args
-    @classmethod
-    def from_folder(cls, dir: Union[Path, str], device: Optional[str] = None):
-        """Load a Trackastra model from a local folder.
-        Args:
-            dir: Path to model folder containing:
-                - model weights
-                - train_config.yaml with training arguments
-            device: Device to run model on.
-        Returns:
-            Trackastra model instance.
-        """
-        # Always load to cpu first
-        transformer = TrackingTransformer.from_folder(
-            Path(dir).expanduser(), map_location="cpu"
-        )
-        train_args = yaml.load(open(dir / "train_config.yaml"), Loader=yaml.FullLoader)
-        return cls(transformer=transformer, train_args=train_args, device=device)
-    @classmethod
-    def from_pretrained(
-        cls, name: str, device: Optional[str] = None, download_dir: Optional[Path] = None
-    ):
-        """Load a pretrained Trackastra model.
-        Available pretrained models are described in detail in pretrained.json.
-        Args:
-            name: Name of pretrained model (e.g. "general_2d").
-            device: Device to run model on ("cuda", "mps", "cpu", "automatic" or None).
-            download_dir: Directory to download model to (defaults to ~/.cache/trackastra).
-        Returns:
-            Trackastra model instance.
-        """
-        folder = download_pretrained(name, download_dir)
-        # download zip from github to location/name, then unzip
-        return cls.from_folder(folder, device=device)
-    def _predict(
-        self,
-        imgs: Union[np.ndarray, da.Array],
-        masks: Union[np.ndarray, da.Array],
-        edge_threshold: float = 0.05,
-        n_workers: int = 0,
-        normalize_imgs: bool = True,
-        progbar_class=tqdm,
-    ):
-        logger.info("Predicting weights for candidate graph")
-        if normalize_imgs:
-            if isinstance(imgs, da.Array):
-                imgs = imgs.map_blocks(normalize)
-            else:
-                imgs = normalize(imgs)
-        self.transformer.eval()
-        features = get_features(
-            detections=masks,
-            imgs=imgs,
-            ndim=self.transformer.config["coord_dim"],
-            n_workers=n_workers,
-            progbar_class=progbar_class,
-        )
-        logger.info("Building windows")
-        windows = build_windows(
-            features,
-            window_size=self.transformer.config["window"],
-            progbar_class=progbar_class,
-        )
-        logger.info("Predicting windows")
-        predictions = predict_windows(
-            windows=windows,
-            features=features,
-            model=self.transformer,
-            edge_threshold=edge_threshold,
-            spatial_dim=masks.ndim - 1,
-            progbar_class=progbar_class,
-        )
-        return predictions
-    def _track_from_predictions(
-        self,
-        predictions,
-        mode: Literal["greedy_nodiv", "greedy", "ilp"] = "greedy",
-        use_distance: bool = False,
-        max_distance: int = 256,
-        max_neighbors: int = 10,
-        delta_t: int = 1,
-        **kwargs,
-    ):
-        logger.info("Running greedy tracker")
-        nodes = predictions["nodes"]
-        weights = predictions["weights"]
-        candidate_graph = build_graph(
-            nodes=nodes,
-            weights=weights,
-            use_distance=use_distance,
-            max_distance=max_distance,
-            max_neighbors=max_neighbors,
-            delta_t=delta_t,
-        )
-        if mode == "greedy":
-            return track_greedy(candidate_graph)
-        elif mode == "greedy_nodiv":
-            return track_greedy(candidate_graph, allow_divisions=False)
-        elif mode == "ilp":
-            from trackastra.tracking.ilp import track_ilp
-            return track_ilp(candidate_graph, ilp_config="gt", **kwargs)
-        else:
-            raise ValueError(f"Tracking mode {mode} does not exist.")
-    def track(
-        self,
-        imgs: Union[np.ndarray, da.Array],
-        masks: Union[np.ndarray, da.Array],
-        mode: Literal["greedy_nodiv", "greedy", "ilp"] = "greedy",
-        normalize_imgs: bool = True,
-        progbar_class=tqdm,
-        n_workers: int = 0,
-        **kwargs,
-    ) -> TrackGraph:
-        """Track objects across time frames.
-        This method links segmented objects across time frames using the specified
-        tracking mode. No hyperparameters need to be chosen beyond the tracking mode.
-        Args:
-            imgs: Input images of shape (T,(Z),Y,X) (numpy or dask array)
-            masks: Instance segmentation masks of shape (T,(Z),Y,X).
-            mode: Tracking mode:
-                - "greedy_nodiv": Fast greedy linking without division
-                - "greedy": Fast greedy linking with division
-                - "ilp": Integer Linear Programming based linking (more accurate but slower)
-            progbar_class: Progress bar class to use.
-            n_workers: Number of worker processes for feature extraction.
-            normalize_imgs: Whether to normalize the images.
-            **kwargs: Additional arguments passed to tracking algorithm.
-        Returns:
-            TrackGraph containing the tracking results.
-        """
-        if not imgs.shape == masks.shape:
-            raise RuntimeError(
-                f"Img shape {imgs.shape} and mask shape {masks.shape} do not match."
-            )
-        if not imgs.ndim == self.transformer.config["coord_dim"] + 1:
-            raise RuntimeError(
-                f"images should be a sequence of {self.transformer.config['coord_dim']}D images"
-            )
-        predictions = self._predict(
-            imgs,
-            masks,
-            normalize_imgs=normalize_imgs,
-            progbar_class=progbar_class,
-            n_workers=n_workers,
-        )
-        track_graph = self._track_from_predictions(predictions, mode=mode, **kwargs)
-        return track_graph
-    def track_from_disk(
-        self,
-        imgs_path: Path,
-        masks_path: Path,
-        mode: Literal["greedy_nodiv", "greedy", "ilp"] = "greedy",
-        normalize_imgs: bool = True,
-        **kwargs,
-    ) -> Tuple[TrackGraph, np.ndarray]:
-        """Track objects directly from image and mask files on disk.
-        This method supports both single tiff files and directories
-        Args:
-            imgs_path: Path to input images. Can be:
-                - Directory containing numbered tiff files of shape (C),(Z),Y,X
-                - Single tiff file with time series of shape T,(C),(Z),Y,X
-            masks_path: Path to mask files. Can be:
-                - Directory containing numbered tiff files of shape (Z),Y,X
-                - Single tiff file with time series of shape T,(Z),Y,X
-            mode: Tracking mode:
-                - "greedy_nodiv": Fast greedy linking without division
-                - "greedy": Fast greedy linking with division
-                - "ilp": Integer Linear Programming based linking (more accurate but slower)
-            normalize_imgs: Whether to normalize the images.
-            **kwargs: Additional arguments passed to tracking algorithm.
-        Returns:
-            Tuple of (TrackGraph, tracked masks).
-        """
-        if not imgs_path.exists():
-            raise FileNotFoundError(f"{imgs_path=} does not exist.")
-        if not masks_path.exists():
-            raise FileNotFoundError(f"{masks_path=} does not exist.")
-        if imgs_path.is_dir():
-            imgs = load_tiff_timeseries(imgs_path)
-        else:
-            imgs = tifffile.imread(imgs_path)
-        if masks_path.is_dir():
-            masks = load_tiff_timeseries(masks_path)
-        else:
-            masks = tifffile.imread(masks_path)
-        if len(imgs) != len(masks):
-            raise RuntimeError(
-                f"#imgs and #masks do not match. Found {len(imgs)} images,"
-                f" {len(masks)} masks."
-            )
-        if imgs.ndim - 1 == masks.ndim:
-            if imgs[1] == 1:
-                logger.info(
-                    "Found a channel dimension with a single channel. Removing dim."
-                )
-                masks = np.squeeze(masks, 1)
-            else:
-                raise RuntimeError(
-                    "Trackastra currently only supports single channel images."
-                )
-        if imgs.shape != masks.shape:
-            raise RuntimeError(
-                f"Img shape {imgs.shape} and mask shape {masks.shape} do not match."
-            )
-        return self.track(
-            imgs, masks, mode, normalize_imgs=normalize_imgs, **kwargs
-        ), masks

models/tra_post_model/trackastra/model/model_parts.py DELETED Viewed

@@ -1,287 +0,0 @@
-"""Transformer class."""
-import logging
-import math
-from typing import Literal
-import torch
-import torch.nn.functional as F
-from torch import nn
-from .rope import RotaryPositionalEncoding
-from typing import Tuple
-logger = logging.getLogger(__name__)
-def _pos_embed_fourier1d_init(
-    cutoff: float = 256, n: int = 32, cutoff_start: float = 1
-):
-    return (
-        torch.exp(torch.linspace(-math.log(cutoff_start), -math.log(cutoff), n))
-        .unsqueeze(0)
-        .unsqueeze(0)
-    )
-class FeedForward(nn.Module):
-    def __init__(self, d_model, expand: float = 2, bias: bool = True):
-        super().__init__()
-        self.fc1 = nn.Linear(d_model, int(d_model * expand))
-        self.fc2 = nn.Linear(int(d_model * expand), d_model, bias=bias)
-        self.act = nn.GELU()
-    def forward(self, x):
-        return self.fc2(self.act(self.fc1(x)))
-class PositionalEncoding(nn.Module):
-    def __init__(
-        self,
-        cutoffs: Tuple[float] = (256,),
-        n_pos: Tuple[int] = (32,),
-        cutoffs_start=None,
-    ):
-        """Positional encoding with given cutoff and number of frequencies for each dimension.
-        number of dimension is inferred from the length of cutoffs and n_pos.
-        """
-        super().__init__()
-        if cutoffs_start is None:
-            cutoffs_start = (1,) * len(cutoffs)
-        assert len(cutoffs) == len(n_pos)
-        self.freqs = nn.ParameterList([
-            nn.Parameter(_pos_embed_fourier1d_init(cutoff, n // 2))
-            for cutoff, n, cutoff_start in zip(cutoffs, n_pos, cutoffs_start)
-        ])
-    def forward(self, coords: torch.Tensor):
-        _B, _N, D = coords.shape
-        assert D == len(self.freqs)
-        embed = torch.cat(
-            tuple(
-                torch.cat(
-                    (
-                        torch.sin(0.5 * math.pi * x.unsqueeze(-1) * freq),
-                        torch.cos(0.5 * math.pi * x.unsqueeze(-1) * freq),
-                    ),
-                    axis=-1,
-                )
-                / math.sqrt(len(freq))
-                for x, freq in zip(coords.moveaxis(-1, 0), self.freqs)
-            ),
-            axis=-1,
-        )
-        return embed
-class NoPositionalEncoding(nn.Module):
-    def __init__(self, d):
-        """One learnable input token that ignores positional information."""
-        super().__init__()
-        self.d = d
-        # self.token = nn.Parameter(torch.randn(d))
-    def forward(self, coords: torch.Tensor):
-        B, N, _ = coords.shape
-        return (
-            # torch.ones((B, N, self.d), device=coords.device) * 0.1
-            # torch.randn((1, 1, self.d), device=coords.device).expand(B, N, -1) * 0.01
-            torch.randn((B, N, self.d), device=coords.device) * 0.01
-            + torch.randn((1, 1, self.d), device=coords.device).expand(B, N, -1) * 0.1
-        )
-        # return self.token.view(1, 1, -1).expand(B, N, -1)
-def _bin_init_exp(cutoff: float, n: int):
-    return torch.exp(torch.linspace(0, math.log(cutoff + 1), n))
-def _bin_init_linear(cutoff: float, n: int):
-    return torch.linspace(-cutoff, cutoff, n)
-class RelativePositionalBias(nn.Module):
-    def __init__(
-        self,
-        n_head: int,
-        cutoff_spatial: float,
-        cutoff_temporal: float,
-        n_spatial: int = 32,
-        n_temporal: int = 16,
-    ):
-        """Learnt relative positional bias to add to self-attention matrix.
-        Spatial bins are exponentially spaced, temporal bins are linearly spaced.
-        Args:
-            n_head (int): Number of pos bias heads. Equal to number of attention heads
-            cutoff_spatial (float): Maximum distance in space.
-            cutoff_temporal (float): Maxium distance in time. Equal to window size of transformer.
-            n_spatial (int, optional): Number of spatial bins.
-            n_temporal (int, optional): Number of temporal bins in each direction. Should be equal to window size. Total = 2 * n_temporal + 1. Defaults to 16.
-        """
-        super().__init__()
-        self._spatial_bins = _bin_init_exp(cutoff_spatial, n_spatial)
-        self._temporal_bins = _bin_init_linear(cutoff_temporal, 2 * n_temporal + 1)
-        self.register_buffer("spatial_bins", self._spatial_bins)
-        self.register_buffer("temporal_bins", self._temporal_bins)
-        self.n_spatial = n_spatial
-        self.n_head = n_head
-        self.bias = nn.Parameter(
-            -0.5 + torch.rand((2 * n_temporal + 1) * n_spatial, n_head)
-        )
-    def forward(self, coords: torch.Tensor):
-        _B, _N, _D = coords.shape
-        t = coords[..., 0]
-        yx = coords[..., 1:]
-        temporal_dist = t.unsqueeze(-1) - t.unsqueeze(-2)
-        spatial_dist = torch.cdist(yx, yx)
-        spatial_idx = torch.bucketize(spatial_dist, self.spatial_bins)
-        torch.clamp_(spatial_idx, max=len(self.spatial_bins) - 1)
-        temporal_idx = torch.bucketize(temporal_dist, self.temporal_bins)
-        torch.clamp_(temporal_idx, max=len(self.temporal_bins) - 1)
-        # do some index gymnastics such that backward is not super slow
-        # https://discuss.pytorch.org/t/how-to-select-multiple-indexes-over-multiple-dimensions-at-the-same-time/98532/2
-        idx = spatial_idx.flatten() + temporal_idx.flatten() * self.n_spatial
-        bias = self.bias.index_select(0, idx).view((*spatial_idx.shape, self.n_head))
-        # -> B, nH, N, N
-        bias = bias.transpose(-1, 1)
-        return bias
-class RelativePositionalAttention(nn.Module):
-    def __init__(
-        self,
-        coord_dim: int,
-        embed_dim: int,
-        n_head: int,
-        cutoff_spatial: float = 256,
-        cutoff_temporal: float = 16,
-        n_spatial: int = 32,
-        n_temporal: int = 16,
-        dropout: float = 0.0,
-        mode: Literal["bias", "rope", "none"] = "bias",
-        attn_dist_mode: str = "v0",
-    ):
-        super().__init__()
-        if not embed_dim % (2 * n_head) == 0:
-            raise ValueError(
-                f"embed_dim {embed_dim} must be divisible by 2 times n_head {2 * n_head}"
-            )
-        # qkv projection
-        self.q_pro = nn.Linear(embed_dim, embed_dim, bias=True)
-        self.k_pro = nn.Linear(embed_dim, embed_dim, bias=True)
-        self.v_pro = nn.Linear(embed_dim, embed_dim, bias=True)
-        # output projection
-        self.proj = nn.Linear(embed_dim, embed_dim)
-        # regularization
-        self.dropout = dropout
-        self.n_head = n_head
-        self.embed_dim = embed_dim
-        self.cutoff_spatial = cutoff_spatial
-        self.attn_dist_mode = attn_dist_mode
-        if mode == "bias" or mode is True:
-            self.pos_bias = RelativePositionalBias(
-                n_head=n_head,
-                cutoff_spatial=cutoff_spatial,
-                cutoff_temporal=cutoff_temporal,
-                n_spatial=n_spatial,
-                n_temporal=n_temporal,
-            )
-        elif mode == "rope":
-            # each part needs to be divisible by 2
-            n_split = 2 * (embed_dim // (2 * (coord_dim + 1) * n_head))
-            self.rot_pos_enc = RotaryPositionalEncoding(
-                cutoffs=((cutoff_temporal,) + (cutoff_spatial,) * coord_dim),
-                n_pos=(embed_dim // n_head - coord_dim * n_split,)
-                + (n_split,) * coord_dim,
-            )
-        elif mode == "none":
-            pass
-        elif mode is None or mode is False:
-            logger.warning(
-                "attn_positional_bias is not set (None or False), no positional bias."
-            )
-            pass
-        else:
-            raise ValueError(f"Unknown mode {mode}")
-        self._mode = mode
-    def forward(
-        self,
-        query: torch.Tensor,
-        key: torch.Tensor,
-        value: torch.Tensor,
-        coords: torch.Tensor,
-        padding_mask: torch.Tensor = None,
-    ):
-        B, N, D = query.size()
-        q = self.q_pro(query)  # (B, N, D)
-        k = self.k_pro(key)  # (B, N, D)
-        v = self.v_pro(value)  # (B, N, D)
-        # (B, nh, N, hs)
-        k = k.view(B, N, self.n_head, D // self.n_head).transpose(1, 2)
-        q = q.view(B, N, self.n_head, D // self.n_head).transpose(1, 2)
-        v = v.view(B, N, self.n_head, D // self.n_head).transpose(1, 2)
-        attn_mask = torch.zeros(
-            (B, self.n_head, N, N), device=query.device, dtype=q.dtype
-        )
-        # add negative value but not too large to keep mixed precision loss from becoming nan
-        attn_ignore_val = -1e3
-        # spatial cutoff
-        yx = coords[..., 1:]
-        spatial_dist = torch.cdist(yx, yx)
-        spatial_mask = (spatial_dist > self.cutoff_spatial).unsqueeze(1)
-        attn_mask.masked_fill_(spatial_mask, attn_ignore_val)
-        # dont add positional bias to self-attention if coords is None
-        if coords is not None:
-            if self._mode == "bias":
-                attn_mask = attn_mask + self.pos_bias(coords)
-            elif self._mode == "rope":
-                q, k = self.rot_pos_enc(q, k, coords)
-            else:
-                pass
-            if self.attn_dist_mode == "v0":
-                dist = torch.cdist(coords, coords, p=2)
-                attn_mask += torch.exp(-0.1 * dist.unsqueeze(1))
-            elif self.attn_dist_mode == "v1":
-                attn_mask += torch.exp(
-                    -5 * spatial_dist.unsqueeze(1) / self.cutoff_spatial
-                )
-            else:
-                raise ValueError(f"Unknown attn_dist_mode {self.attn_dist_mode}")
-        # if given key_padding_mask = (B,N) then ignore those tokens (e.g. padding tokens)
-        if padding_mask is not None:
-            ignore_mask = torch.logical_or(
-                padding_mask.unsqueeze(1), padding_mask.unsqueeze(2)
-            ).unsqueeze(1)
-            attn_mask.masked_fill_(ignore_mask, attn_ignore_val)
-        # self.attn_mask = attn_mask.clone()
-        y = F.scaled_dot_product_attention(
-            q, k, v, attn_mask=attn_mask, dropout_p=self.dropout if self.training else 0
-        )
-        y = y.transpose(1, 2).contiguous().view(B, N, D)
-        # output projection
-        y = self.proj(y)
-        return y

models/tra_post_model/trackastra/model/model_sd.py DELETED Viewed

@@ -1,338 +0,0 @@
-import logging
-import os
-from pathlib import Path
-from typing import Literal, Union, Optional, Tuple
-import dask.array as da
-import numpy as np
-import tifffile
-import torch
-import yaml
-from tqdm import tqdm
-from ..data import build_windows, get_features, load_tiff_timeseries
-from ..tracking import TrackGraph, build_graph, track_greedy
-from ..utils import normalize
-from .model import TrackingTransformer
-from .predict import predict_windows
-from .pretrained import download_pretrained
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class Trackastra:
-    """A transformer-based tracking model for time-lapse data.
-    Trackastra links segmented objects across time frames by predicting
-    associations with a transformer model trained on diverse time-lapse videos.
-    The model takes as input:
-    - A sequence of images of shape (T,(Z),Y,X)
-    - Corresponding instance segmentation masks of shape (T,(Z),Y,X)
-    It supports multiple tracking modes:
-    - greedy_nodiv: Fast greedy linking without division
-    - greedy: Fast greedy linking with division
-    - ilp: Integer Linear Programming based linking (more accurate but slower)
-    Examples:
-        >>> # Load example data
-        >>> from trackastra.data import example_data_bacteria
-        >>> imgs, masks = example_data_bacteria()
-        >>>
-        >>> # Load pretrained model and track
-        >>> model = Trackastra.from_pretrained("general_2d", device="cuda")
-        >>> track_graph = model.track(imgs, masks, mode="greedy")
-    """
-    def __init__(
-        self,
-        transformer: TrackingTransformer,
-        train_args: dict,
-        device: Literal["cuda", "mps", "cpu", "automatic", None] = None,
-    ):
-        """Initialize Trackastra model.
-        Args:
-            transformer: The underlying transformer model.
-            train_args: Training configuration arguments.
-            device: Device to run model on ("cuda", "mps", "cpu", "automatic" or None).
-        """
-        if device == "cuda":
-            if torch.cuda.is_available():
-                self.device = "cuda"
-            else:
-                logger.info("Cuda not available, falling back to cpu.")
-                self.device = "cpu"
-        elif device == "mps":
-            if (
-                torch.backends.mps.is_available()
-                and os.getenv("PYTORCH_ENABLE_MPS_FALLBACK") is not None
-                and os.getenv("PYTORCH_ENABLE_MPS_FALLBACK") != "0"
-            ):
-                self.device = "mps"
-            else:
-                logger.info("Mps not available, falling back to cpu.")
-                self.device = "cpu"
-        elif device == "cpu":
-            self.device = "cpu"
-        elif device == "automatic" or device is None:
-            should_use_mps = (
-                torch.backends.mps.is_available()
-                and os.getenv("PYTORCH_ENABLE_MPS_FALLBACK") is not None
-                and os.getenv("PYTORCH_ENABLE_MPS_FALLBACK") != "0"
-            )
-            self.device = (
-                "cuda"
-                if torch.cuda.is_available()
-                else (
-                    "mps"
-                    if should_use_mps and os.getenv("PYTORCH_ENABLE_MPS_FALLBACK")
-                    else "cpu"
-                )
-            )
-        else:
-            raise ValueError(f"Device {device} not recognized.")
-        logger.info(f"Using device {self.device}")
-        self.transformer = transformer.to(self.device)
-        self.train_args = train_args
-    @classmethod
-    def from_folder(cls, dir: Union[Path, str], device: Optional[str] = None):
-        """Load a Trackastra model from a local folder.
-        Args:
-            dir: Path to model folder containing:
-                - model weights
-                - train_config.yaml with training arguments
-            device: Device to run model on.
-        Returns:
-            Trackastra model instance.
-        """
-        # Always load to cpu first
-        transformer = TrackingTransformer.from_folder(
-            Path(dir).expanduser(), map_location="cpu"
-        )
-        train_args = yaml.load(open(dir / "train_config.yaml"), Loader=yaml.FullLoader)
-        return cls(transformer=transformer, train_args=train_args, device=device)
-    @classmethod
-    def from_pretrained(
-        cls, name: str, device: Optional[str] = None, download_dir: Optional[Path] = None
-    ):
-        """Load a pretrained Trackastra model.
-        Available pretrained models are described in detail in pretrained.json.
-        Args:
-            name: Name of pretrained model (e.g. "general_2d").
-            device: Device to run model on ("cuda", "mps", "cpu", "automatic" or None).
-            download_dir: Directory to download model to (defaults to ~/.cache/trackastra).
-        Returns:
-            Trackastra model instance.
-        """
-        folder = download_pretrained(name, download_dir)
-        # download zip from github to location/name, then unzip
-        return cls.from_folder(folder, device=device)
-    def _predict(
-        self,
-        imgs: Union[np.ndarray, da.Array],
-        masks: Union[np.ndarray, da.Array],
-        edge_threshold: float = 0.05,
-        n_workers: int = 0,
-        normalize_imgs: bool = True,
-        progbar_class=tqdm,
-    ):
-        logger.info("Predicting weights for candidate graph")
-        if normalize_imgs:
-            if isinstance(imgs, da.Array):
-                imgs = imgs.map_blocks(normalize)
-            else:
-                imgs = normalize(imgs)
-        self.transformer.eval()
-        features = get_features(
-            detections=masks,
-            imgs=imgs,
-            ndim=self.transformer.config["coord_dim"],
-            n_workers=n_workers,
-            progbar_class=progbar_class,
-        )
-        logger.info("Building windows")
-        windows = build_windows(
-            features,
-            window_size=self.transformer.config["window"],
-            progbar_class=progbar_class,
-        )
-        logger.info("Predicting windows")
-        predictions = predict_windows(
-            windows=windows,
-            features=features,
-            model=self.transformer,
-            edge_threshold=edge_threshold,
-            spatial_dim=masks.ndim - 1,
-            progbar_class=progbar_class,
-        )
-        return predictions
-    def _track_from_predictions(
-        self,
-        predictions,
-        mode: Literal["greedy_nodiv", "greedy", "ilp"] = "greedy",
-        use_distance: bool = False,
-        max_distance: int = 256,
-        max_neighbors: int = 10,
-        delta_t: int = 1,
-        **kwargs,
-    ):
-        logger.info("Running greedy tracker")
-        nodes = predictions["nodes"]
-        weights = predictions["weights"]
-        candidate_graph = build_graph(
-            nodes=nodes,
-            weights=weights,
-            use_distance=use_distance,
-            max_distance=max_distance,
-            max_neighbors=max_neighbors,
-            delta_t=delta_t,
-        )
-        if mode == "greedy":
-            return track_greedy(candidate_graph)
-        elif mode == "greedy_nodiv":
-            return track_greedy(candidate_graph, allow_divisions=False)
-        elif mode == "ilp":
-            from trackastra.tracking.ilp import track_ilp
-            return track_ilp(candidate_graph, ilp_config="gt", **kwargs)
-        else:
-            raise ValueError(f"Tracking mode {mode} does not exist.")
-    def track(
-        self,
-        imgs: Union[np.ndarray, da.Array],
-        masks: Union[np.ndarray, da.Array],
-        mode: Literal["greedy_nodiv", "greedy", "ilp"] = "greedy",
-        normalize_imgs: bool = True,
-        progbar_class=tqdm,
-        n_workers: int = 0,
-        **kwargs,
-    ) -> TrackGraph:
-        """Track objects across time frames.
-        This method links segmented objects across time frames using the specified
-        tracking mode. No hyperparameters need to be chosen beyond the tracking mode.
-        Args:
-            imgs: Input images of shape (T,(Z),Y,X) (numpy or dask array)
-            masks: Instance segmentation masks of shape (T,(Z),Y,X).
-            mode: Tracking mode:
-                - "greedy_nodiv": Fast greedy linking without division
-                - "greedy": Fast greedy linking with division
-                - "ilp": Integer Linear Programming based linking (more accurate but slower)
-            progbar_class: Progress bar class to use.
-            n_workers: Number of worker processes for feature extraction.
-            normalize_imgs: Whether to normalize the images.
-            **kwargs: Additional arguments passed to tracking algorithm.
-        Returns:
-            TrackGraph containing the tracking results.
-        """
-        if not imgs.shape == masks.shape:
-            raise RuntimeError(
-                f"Img shape {imgs.shape} and mask shape {masks.shape} do not match."
-            )
-        if not imgs.ndim == self.transformer.config["coord_dim"] + 1:
-            raise RuntimeError(
-                f"images should be a sequence of {self.transformer.config['coord_dim']}D images"
-            )
-        predictions = self._predict(
-            imgs,
-            masks,
-            normalize_imgs=normalize_imgs,
-            progbar_class=progbar_class,
-            n_workers=n_workers,
-        )
-        track_graph = self._track_from_predictions(predictions, mode=mode, **kwargs)
-        return track_graph
-    def track_from_disk(
-        self,
-        imgs_path: Path,
-        masks_path: Path,
-        mode: Literal["greedy_nodiv", "greedy", "ilp"] = "greedy",
-        normalize_imgs: bool = True,
-        **kwargs,
-    ) -> Tuple[TrackGraph, np.ndarray]:
-        """Track objects directly from image and mask files on disk.
-        This method supports both single tiff files and directories
-        Args:
-            imgs_path: Path to input images. Can be:
-                - Directory containing numbered tiff files of shape (C),(Z),Y,X
-                - Single tiff file with time series of shape T,(C),(Z),Y,X
-            masks_path: Path to mask files. Can be:
-                - Directory containing numbered tiff files of shape (Z),Y,X
-                - Single tiff file with time series of shape T,(Z),Y,X
-            mode: Tracking mode:
-                - "greedy_nodiv": Fast greedy linking without division
-                - "greedy": Fast greedy linking with division
-                - "ilp": Integer Linear Programming based linking (more accurate but slower)
-            normalize_imgs: Whether to normalize the images.
-            **kwargs: Additional arguments passed to tracking algorithm.
-        Returns:
-            Tuple of (TrackGraph, tracked masks).
-        """
-        if not imgs_path.exists():
-            raise FileNotFoundError(f"{imgs_path=} does not exist.")
-        if not masks_path.exists():
-            raise FileNotFoundError(f"{masks_path=} does not exist.")
-        if imgs_path.is_dir():
-            imgs = load_tiff_timeseries(imgs_path)
-        else:
-            imgs = tifffile.imread(imgs_path)
-        if masks_path.is_dir():
-            masks = load_tiff_timeseries(masks_path)
-        else:
-            masks = tifffile.imread(masks_path)
-        if len(imgs) != len(masks):
-            raise RuntimeError(
-                f"#imgs and #masks do not match. Found {len(imgs)} images,"
-                f" {len(masks)} masks."
-            )
-        if imgs.ndim - 1 == masks.ndim:
-            if imgs[1] == 1:
-                logger.info(
-                    "Found a channel dimension with a single channel. Removing dim."
-                )
-                masks = np.squeeze(masks, 1)
-            else:
-                raise RuntimeError(
-                    "Trackastra currently only supports single channel images."
-                )
-        if imgs.shape != masks.shape:
-            raise RuntimeError(
-                f"Img shape {imgs.shape} and mask shape {masks.shape} do not match."
-            )
-        return self.track(
-            imgs, masks, mode, normalize_imgs=normalize_imgs, **kwargs
-        ), masks

models/tra_post_model/trackastra/model/predict.py DELETED Viewed

@@ -1,188 +0,0 @@
-import logging
-import warnings
-import numpy as np
-import torch
-from scipy.sparse import SparseEfficiencyWarning, csr_array
-from tqdm import tqdm
-from typing import List
-# TODO fix circular import
-# from .model import TrackingTransformer
-# from trackastra.data import WRFeatures
-warnings.simplefilter("ignore", SparseEfficiencyWarning)
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-def predict(batch, model):
-    """Predict association scores between objects in a batch.
-    Args:
-        batch: Dictionary containing:
-            - features: Object features array
-            - coords: Object coordinates array
-            - timepoints: Time points array
-        model: TrackingTransformer model to use for prediction.
-    Returns:
-        Array of association scores between objects.
-    """
-    feats = torch.from_numpy(batch["features"])
-    coords = torch.from_numpy(batch["coords"])
-    timepoints = torch.from_numpy(batch["timepoints"]).long()
-    # Hack that assumes that all parameters of a model are on the same device
-    device = next(model.parameters()).device
-    feats = feats.unsqueeze(0).to(device)
-    timepoints = timepoints.unsqueeze(0).to(device)
-    coords = coords.unsqueeze(0).to(device)
-    # Concat timepoints to coordinates
-    coords = torch.cat((timepoints.unsqueeze(2).float(), coords), dim=2)
-    with torch.no_grad():
-        A = model(coords, features=feats)
-        A = model.normalize_output(A, timepoints, coords)
-        # # Spatially far entries should not influence the causal normalization
-        # dist = torch.cdist(coords[0, :, 1:], coords[0, :, 1:])
-        # invalid = dist > model.config["spatial_pos_cutoff"]
-        # A[invalid] = -torch.inf
-        A = A.squeeze(0).detach().cpu().numpy()
-    return A
-def predict_windows(
-    windows: List[dict],
-    # features: list[WRFeatures],
-    # model: TrackingTransformer,
-    features: list,
-    model,
-    intra_window_weight: float = 0,
-    delta_t: int = 1,
-    edge_threshold: float = 0.05,
-    spatial_dim: int = 3,
-    progbar_class=tqdm,
-) -> dict:
-    """Predict associations between objects across sliding windows.
-    This function processes a sequence of sliding windows to predict associations
-    between objects across time frames. It handles:
-    - Object tracking across time
-    - Weight normalization across windows
-    - Edge thresholding
-    - Time-based filtering
-    Args:
-        windows: List of window dictionaries containing:
-            - timepoints: Array of time points
-            - labels: Array of object labels
-            - features: Object features
-            - coords: Object coordinates
-        features: List of feature objects containing:
-            - labels: Object labels
-            - timepoints: Time points
-            - coords: Object coordinates
-        model: TrackingTransformer model to use for prediction.
-        intra_window_weight: Weight factor for objects in middle of window. Defaults to 0.
-        delta_t: Maximum time difference between objects to consider. Defaults to 1.
-        edge_threshold: Minimum association score to consider. Defaults to 0.05.
-        spatial_dim: Dimensionality of input masks. May be less than model.coord_dim.
-        progbar_class: Progress bar class to use. Defaults to tqdm.
-    Returns:
-        Dictionary containing:
-            - nodes: List of node properties (id, coords, time, label)
-            - weights: Tuple of ((node_i, node_j), weight) pairs
-    """
-    # first get all objects/coords
-    time_labels_to_id = dict()
-    node_properties = list()
-    max_id = np.sum([len(f.labels) for f in features])
-    all_timepoints = np.concatenate([f.timepoints for f in features])
-    all_labels = np.concatenate([f.labels for f in features])
-    all_coords = np.concatenate([f.coords for f in features])
-    all_coords = all_coords[:, -spatial_dim:]
-    for i, (t, la, c) in enumerate(zip(all_timepoints, all_labels, all_coords)):
-        time_labels_to_id[(t, la)] = i
-        node_properties.append(
-            dict(
-                id=i,
-                coords=tuple(c),
-                time=t,
-                # index=ix,
-                label=la,
-            )
-        )
-    # create assoc matrix between ids
-    sp_weights, sp_accum = (
-        csr_array((max_id, max_id), dtype=np.float32),
-        csr_array((max_id, max_id), dtype=np.float32),
-    )
-    for t in progbar_class(
-        range(len(windows)),
-        desc="Computing associations",
-    ):
-        # This assumes that the samples in the dataset are ordered by time and start at 0.
-        batch = windows[t]
-        timepoints = batch["timepoints"]
-        labels = batch["labels"]
-        A = predict(batch, model)
-        dt = timepoints[None, :] - timepoints[:, None]
-        time_mask = np.logical_and(dt <= delta_t, dt > 0)
-        A[~time_mask] = 0
-        ii, jj = np.where(A >= edge_threshold)
-        if len(ii) == 0:
-            continue
-        labels_ii = labels[ii]
-        labels_jj = labels[jj]
-        ts_ii = timepoints[ii]
-        ts_jj = timepoints[jj]
-        nodes_ii = np.array(
-            tuple(time_labels_to_id[(t, lab)] for t, lab in zip(ts_ii, labels_ii))
-        )
-        nodes_jj = np.array(
-            tuple(time_labels_to_id[(t, lab)] for t, lab in zip(ts_jj, labels_jj))
-        )
-        # weight middle parts higher
-        t_middle = t + (model.config["window"] - 1) / 2
-        ddt = timepoints[:, None] - t_middle * np.ones_like(dt)
-        window_weight = np.exp(-intra_window_weight * ddt**2)  # default is 1
-        # window_weight = np.exp(4*A) # smooth max
-        sp_weights[nodes_ii, nodes_jj] += window_weight[ii, jj] * A[ii, jj]
-        sp_accum[nodes_ii, nodes_jj] += window_weight[ii, jj]
-    sp_weights_coo = sp_weights.tocoo()
-    sp_accum_coo = sp_accum.tocoo()
-    assert np.allclose(sp_weights_coo.col, sp_accum_coo.col) and np.allclose(
-        sp_weights_coo.row, sp_accum_coo.row
-    )
-    # Normalize weights by the number of times they were written from different sliding window positions
-    weights = tuple(
-        ((i, j), v / a)
-        for i, j, v, a in zip(
-            sp_weights_coo.row,
-            sp_weights_coo.col,
-            sp_weights_coo.data,
-            sp_accum_coo.data,
-        )
-    )
-    results = dict()
-    results["nodes"] = node_properties
-    results["weights"] = weights
-    return results

models/tra_post_model/trackastra/model/pretrained.json DELETED Viewed

@@ -1,81 +0,0 @@
-{
-    "general_2d": {
-        "tags": ["cells, nuclei, bacteria, epithelial, yeast, particles"],
-        "dimensionality": [2],
-        "description": "For tracking fluorescent nuclei, bacteria (PhC), whole cells (BF, PhC, DIC), epithelial cells with fluorescent membrane, budding yeast cells (PhC), fluorescent particles, .",
-        "url": "https://github.com/weigertlab/trackastra-models/releases/download/v0.3.0/general_2d.zip",
-        "datasets": {
-            "Subset of Cell Tracking Challenge 2d datasets": {
-                "url": "https://celltrackingchallenge.net/2d-datasets/",
-                "reference": "Maška M, Ulman V, Delgado-Rodriguez P, Gómez-de-Mariscal E, Nečasová T, Guerrero Peña FA, Ren TI, Meyerowitz EM, Scherr T, Löffler K, Mikut R. The Cell Tracking Challenge: 10 years of objective benchmarking. Nature Methods. 2023 Jul;20(7):1010-20."
-            },
-            "Bacteria van Vliet": {
-                "url": "https://zenodo.org/records/268921",
-                "reference": "van Vliet S, Winkler AR, Spriewald S, Stecher B, Ackermann M. Spatially correlated gene expression in bacterial groups: the role of lineage history, spatial gradients, and cell-cell interactions. Cell systems. 2018 Apr 25;6(4):496-507."
-            },
-            "Bacteria ObiWan-Microbi": {
-                "url": "https://zenodo.org/records/7260137",
-                "reference": "Seiffarth J, Scherr T, Wollenhaupt B, Neumann O, Scharr H, Kohlheyer D, Mikut R, Nöh K. ObiWan-Microbi: OMERO-based integrated workflow for annotating microbes in the cloud. SoftwareX. 2024 May 1;26:101638."
-            },
-            "Bacteria Persat": {
-                "url": "https://www.p-lab.science",
-                "reference": "Datasets kindly provided by Persat lab, EPFL."
-            },
-            "DeepCell": {
-                "url": "https://datasets.deepcell.org/data",
-                "reference": "Schwartz, M, Moen E, Miller G, Dougherty T, Borba E, Ding R, Graf W, Pao E, Van Valen D. Caliban: Accurate cell tracking and lineage construction in live-cell imaging experiments with deep learning. Biorxiv. 2023 Sept 13:803205."
-            },
-            "Ker phase contrast": {
-                "url": "https://osf.io/ysaq2/",
-                "reference": "Ker DF, Eom S, Sanami S, Bise R, Pascale C, Yin Z, Huh SI, Osuna-Highley E, Junkers SN, Helfrich CJ, Liang PY. Phase contrast time-lapse microscopy datasets with automated and manual cell tracking annotations. Scientific data. 2018 Nov 13;5(1):1-2."
-            },
-            "Epithelia benchmark": {
-                "reference": "Funke J, Mais L, Champion A, Dye N, Kainmueller D. A benchmark for epithelial cell tracking. InProceedings of The European Conference on Computer Vision (ECCV) Workshops 2018 (pp. 0-0)."
-            },
-            "T Cells": {
-                "url": "https://zenodo.org/records/5206119"
-            },
-            "Neisseria meningitidis bacterial growth": {
-                "url": "https://zenodo.org/records/5419619"
-            },
-            "Synthetic nuclei": {
-                "reference": "Weigert group live cell simulator."
-            },
-            "Synthetic particles": {
-                "reference": "Weigert group particle simulator."
-            },
-            "Particle Tracking Challenge": {
-                "url": "http://bioimageanalysis.org/track/#data",
-                "reference": "Chenouard, N., Smal, I., De Chaumont, F., Maška, M., Sbalzarini, I. F., Gong, Y., ... & Meijering, E. (2014). Objective comparison of particle tracking methods. Nature methods, 11(3), 281-289."
-            },
-            "Yeast Cell-ACDC": {
-                "url": "https://zenodo.org/records/6795124",
-                "reference": "Padovani, F., Mairhörmann, B., Falter-Braun, P., Lengefeld, J., & Schmoller, K. M. (2022). Segmentation, tracking and cell cycle analysis of live-cell imaging data with Cell-ACDC. BMC biology, 20(1), 174."
-            },
-            "DeepSea": {
-                "url": "https://deepseas.org/datasets/",
-                "reference": "Zargari, A., Lodewijk, G. A., Mashhadi, N., Cook, N., Neudorf, C. W., Araghbidikashani, K., ... & Shariati, S. A. (2023). DeepSea is an efficient deep-learning model for single-cell segmentation and tracking in time-lapse microscopy. Cell Reports Methods, 3(6)."
-            },
-            "Btrack" : {
-                "url": "https://rdr.ucl.ac.uk/articles/dataset/Cell_tracking_reference_dataset/16595978",
-                "reference": "Ulicna, K., Vallardi, G., Charras, G., & Lowe, A. R. (2021). Automated deep lineage tree analysis using a Bayesian single cell tracking approach. Frontiers in Computer Science, 3, 734559."
-            },
-            "E. coli in mother machine": {
-                "url": "https://zenodo.org/records/11237127",
-                "reference": "O’Connor, O. M., & Dunlop, M. J. (2024). Cell-TRACTR: A transformer-based model for end-to-end segmentation and tracking of cells. bioRxiv, 2024-07."
-            }
-        }
-    },
-    "ctc": {
-        "tags": ["ctc", "Cell Tracking Challenge", "Cell Linking Benchmark"],
-        "dimensionality": [2, 3],
-        "description": "For tracking Cell Tracking Challenge datasets. This is the successor of the winning model of the ISBI 2024 CTC generalizable linking challenge.",
-        "url": "https://github.com/weigertlab/trackastra-models/releases/download/v0.3.0/ctc.zip",
-        "datasets": {
-            "All Cell Tracking Challenge 2d+3d datasets with available GT and ERR_SEG": {
-                "url": "https://celltrackingchallenge.net/3d-datasets/",
-                "reference": "Maška M, Ulman V, Delgado-Rodriguez P, Gómez-de-Mariscal E, Nečasová T, Guerrero Peña FA, Ren TI, Meyerowitz EM, Scherr T, Löffler K, Mikut R. The Cell Tracking Challenge: 10 years of objective benchmarking. Nature Methods. 2023 Jul;20(7):1010-20."
-            }
-        }
-    }
-}

models/tra_post_model/trackastra/model/pretrained.py DELETED Viewed

@@ -1,90 +0,0 @@
-import logging
-import shutil
-import tempfile
-import zipfile
-try:
-    from importlib.resources import files
-except:
-    from importlib_resources import files
-from pathlib import Path
-import requests
-from tqdm import tqdm
-from typing import Optional
-logger = logging.getLogger(__name__)
-_MODELS = {
-    "ctc": "https://github.com/weigertlab/trackastra-models/releases/download/v0.3.0/ctc.zip",
-    "general_2d": "https://github.com/weigertlab/trackastra-models/releases/download/v0.3.0/general_2d.zip",
-}
-def download_and_unzip(url: str, dst: Path):
-    # TODO make safe and use tempfile lib
-    if dst.exists():
-        print(f"{dst} already downloaded, skipping.")
-        return
-    # get the name of the zipfile
-    zip_base = Path(url.split("/")[-1])
-    with tempfile.TemporaryDirectory() as tmp:
-        tmp = Path(tmp)
-        zip_file = tmp / zip_base
-        # Download the zip file
-        download(url, zip_file)
-        # Unzip the file
-        with zipfile.ZipFile(zip_file, "r") as zip_ref:
-            zip_ref.extractall(tmp)
-        shutil.move(tmp / zip_base.stem, dst)
-def download(url: str, fname: Path):
-    resp = requests.get(url, stream=True)
-    total = int(resp.headers.get("content-length", 0))
-    # try:
-    #     with (open(str(fname), "wb") as file,
-    #         tqdm(
-    #             desc=str(fname),
-    #             total=total,
-    #             unit="iB",
-    #             unit_scale=True,
-    #             unit_divisor=1024,
-    #         ) as bar,):
-    #         for data in resp.iter_content(chunk_size=1024):
-    #             size = file.write(data)
-    #             bar.update(size)
-    # except:
-    with open(str(fname), "wb") as file, tqdm(
-            desc=str(fname),
-            total=total,
-            unit="iB",
-            unit_scale=True,
-            unit_divisor=1024,
-        ) as bar:
-        for data in resp.iter_content(chunk_size=1024):
-            size = file.write(data)
-            bar.update(size)
-def download_pretrained(name: str, download_dir: Optional[Path] = None):
-    # TODO make safe, introduce versioning
-    if download_dir is None:
-        download_dir = files("trackastra").joinpath(".models")
-    else:
-        download_dir = Path(download_dir)
-    download_dir.mkdir(exist_ok=True, parents=True)
-    try:
-        url = _MODELS[name]
-    except KeyError:
-        raise ValueError(
-            "Pretrained model `name` is not available. Choose from"
-            f" {list(_MODELS.keys())}"
-        )
-    folder = download_dir / name
-    download_and_unzip(url=url, dst=folder)
-    return folder

models/tra_post_model/trackastra/model/rope.py DELETED Viewed

@@ -1,94 +0,0 @@
-"""Transformer class."""
-# from torch_geometric.nn import GATv2Conv
-import math
-import torch
-from torch import nn
-from typing import Tuple
-def _pos_embed_fourier1d_init(cutoff: float = 128, n: int = 32):
-    # Maximum initial frequency is 1
-    return torch.exp(torch.linspace(0, -math.log(cutoff), n)).unsqueeze(0).unsqueeze(0)
-# https://github.com/cvg/LightGlue/blob/b1cd942fc4a3a824b6aedff059d84f5c31c297f6/lightglue/lightglue.py#L51
-def _rotate_half(x: torch.Tensor) -> torch.Tensor:
-    """Rotate pairs of scalars as 2d vectors by pi/2.
-    Refer to eq 34 in https://arxiv.org/pdf/2104.09864.pdf.
-    """
-    x = x.unflatten(-1, (-1, 2))
-    x1, x2 = x.unbind(dim=-1)
-    return torch.stack((-x2, x1), dim=-1).flatten(start_dim=-2)
-class RotaryPositionalEncoding(nn.Module):
-    def __init__(self, cutoffs: Tuple[float] = (256,), n_pos: Tuple[int] = (32,)):
-        """Rotary positional encoding with given cutoff and number of frequencies for each dimension.
-        number of dimension is inferred from the length of cutoffs and n_pos.
-        see
-        https://arxiv.org/pdf/2104.09864.pdf
-        """
-        super().__init__()
-        assert len(cutoffs) == len(n_pos)
-        if not all(n % 2 == 0 for n in n_pos):
-            raise ValueError("n_pos must be even")
-        self._n_dim = len(cutoffs)
-        # theta in RoFormer https://arxiv.org/pdf/2104.09864.pdf
-        self.freqs = nn.ParameterList([
-            nn.Parameter(_pos_embed_fourier1d_init(cutoff, n // 2))
-            for cutoff, n in zip(cutoffs, n_pos)
-        ])
-    def get_co_si(self, coords: torch.Tensor):
-        _B, _N, D = coords.shape
-        assert D == len(self.freqs)
-        co = torch.cat(
-            tuple(
-                torch.cos(0.5 * math.pi * x.unsqueeze(-1) * freq) / math.sqrt(len(freq))
-                for x, freq in zip(coords.moveaxis(-1, 0), self.freqs)
-            ),
-            axis=-1,
-        )
-        si = torch.cat(
-            tuple(
-                torch.sin(0.5 * math.pi * x.unsqueeze(-1) * freq) / math.sqrt(len(freq))
-                for x, freq in zip(coords.moveaxis(-1, 0), self.freqs)
-            ),
-            axis=-1,
-        )
-        return co, si
-    def forward(self, q: torch.Tensor, k: torch.Tensor, coords: torch.Tensor):
-        _B, _N, D = coords.shape
-        _B, _H, _N, _C = q.shape
-        if not D == self._n_dim:
-            raise ValueError(f"coords must have {self._n_dim} dimensions, got {D}")
-        co, si = self.get_co_si(coords)
-        co = co.unsqueeze(1).repeat_interleave(2, dim=-1)
-        si = si.unsqueeze(1).repeat_interleave(2, dim=-1)
-        q2 = q * co + _rotate_half(q) * si
-        k2 = k * co + _rotate_half(k) * si
-        return q2, k2
-if __name__ == "__main__":
-    model = RotaryPositionalEncoding((256, 256), (32, 32))
-    x = 100 * torch.rand(1, 17, 2)
-    q = torch.rand(1, 4, 17, 64)
-    k = torch.rand(1, 4, 17, 64)
-    q1, k1 = model(q, k, x)
-    A1 = q1[:, :, 0] @ k1[:, :, 0].transpose(-1, -2)
-    q2, k2 = model(q, k, x + 10)
-    A2 = q2[:, :, 0] @ k2[:, :, 0].transpose(-1, -2)

models/tra_post_model/trackastra/utils/__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-# ruff: noqa: F401
-from .utils import (
-    blockwise_causal_norm,
-    blockwise_sum,
-    normalize,
-    normalize_01,
-    preallocate_memory,
-    random_label_cmap,
-    render_label,
-    seed,
-    str2bool,
-    str2path,
-)

models/tra_post_model/{trackastra/tracking → tracking}/__init__.py RENAMED Viewed

@@ -7,9 +7,6 @@ from .tracking import (
 )
 from .utils import (
     ctc_to_graph,
-    ctc_to_napari_tracks,
     graph_to_ctc,
     graph_to_edge_table,
-    graph_to_napari_tracks,
-    linear_chains,
 )

 )
 from .utils import (
     ctc_to_graph,
     graph_to_ctc,
     graph_to_edge_table,
 )

models/tra_post_model/{trackastra/tracking → tracking}/ilp.py RENAMED Viewed

File without changes

models/tra_post_model/{trackastra/tracking → tracking}/track_graph.py RENAMED Viewed

File without changes

models/tra_post_model/{trackastra/tracking → tracking}/tracking.py RENAMED Viewed

@@ -9,8 +9,6 @@ from tqdm import tqdm
 from .track_graph import TrackGraph
 from typing import Optional, Tuple
-# from trackastra.tracking import graph_to_napari_tracks, graph_to_ctc
 logger = logging.getLogger(__name__)
@@ -43,9 +41,6 @@ def track_greedy(
     solution_graph = nx.DiGraph()
-    # TODO bring back
-    # if args.gt_as_dets:
-    # solution_graph.add_nodes_from(candidate_graph.nodes(data=True))
     edges = candidate_graph.edges(data=True)
     edges = sorted(
@@ -75,12 +70,9 @@ def track_greedy(
         # otherwise add to solution
         copy_edge(edge, candidate_graph, solution_graph)
-    # df, masks = graph_to_ctc(solution_graph, masks_original)
-    # tracks, tracks_graph, _ = graph_to_napari_tracks(solution_graph)
     return solution_graph
-    # TODO this should all be in a tracker class
-    # return df, masks, solution_graph, tracks_graph, tracks, candidate_graph
 def build_graph(

 from .track_graph import TrackGraph
 from typing import Optional, Tuple
 logger = logging.getLogger(__name__)
     solution_graph = nx.DiGraph()
     edges = candidate_graph.edges(data=True)
     edges = sorted(
         # otherwise add to solution
         copy_edge(edge, candidate_graph, solution_graph)
     return solution_graph
 def build_graph(

models/tra_post_model/{trackastra/tracking → tracking}/utils.py RENAMED Viewed

@@ -14,38 +14,8 @@ logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
-class FoundTracks(Exception):
-    pass
-def ctc_to_napari_tracks(segmentation: np.ndarray, man_track: pd.DataFrame):
-    """Convert tracks in CTC format to tracks in napari format.
-    Args:
-        segmentation: Dims time, spatial_0, ... , spatial_n
-        man_track: columns id, start, end, parent
-    """
-    tracks = []
-    for t, frame in tqdm(
-        enumerate(segmentation),
-        total=len(segmentation),
-        leave=False,
-        desc="Computing centroids",
-    ):
-        for r in regionprops(frame):
-            tracks.append((r.label, t, *r.centroid))
-    tracks_graph = {}
-    for idx, _, _, parent in tqdm(
-        man_track.to_numpy(),
-        desc="Converting CTC to napari tracks",
-        leave=False,
-    ):
-        if parent != 0:
-            tracks_graph[idx] = [parent]
-    return tracks, tracks_graph
 class CtcTracklet:
     def __init__(self, parent: int, nodes: List[int], start_frame: int) -> None:
@@ -125,77 +95,77 @@ def ctc_tracklets(G: nx.DiGraph, frame_attribute: str = "time") -> List[CtcTrack
     return tracklets
-def linear_chains(G: nx.DiGraph):
-    """Find all linear chains in a tree/graph, i.e. paths that.
-    i) either start/end at a node with out_degree>in_degree or and have no internal branches, or
-    ii) consists of a single node or a single splitting node
-    Note that each chain includes its start/end node, i.e. they can be appear in multiple chains.
-    """
-    # get all nodes with out_degree>in_degree (i.e. start of chain)
-    nodes = tuple(n for n in G.nodes if G.out_degree[n] > G.in_degree[n])
-    # single nodes are those that are not starting a linear chain
-    # single_nodes = tuple(n for n in G.nodes if G.out_degree[n] == G.in_degree[n] == 0)
-    single_nodes = tuple(
-        n for n in G.nodes if G.in_degree[n] == 0 and G.out_degree[n] != 1
-    )
-    for ni in single_nodes:
-        yield [ni]
-    for ni in nodes:
-        neighs = tuple(G.neighbors(ni))
-        for child in neighs:
-            path = [ni, child]
-            while len(childs := tuple(G.neighbors(path[-1]))) == 1:
-                path.append(childs[0])
-            yield path
-def graph_to_napari_tracks(
-    graph: nx.DiGraph,
-    properties: List[str] = [],
-):
-    """Convert a track graph to napari tracks."""
-    # each tracklet is a linear chain in the graph
-    chains = tuple(linear_chains(graph))
-    track_end_to_track_id = dict()
-    labels = []
-    for i, cs in enumerate(chains):
-        label = i + 1
-        labels.append(label)
-        # if len(cs) == 1:
-        #     print(cs)
-        #     # Non-connected node
-        #     continue
-        end = cs[-1]
-        track_end_to_track_id[end] = label
-    tracks = []
-    tracks_graph = dict()
-    tracks_props = {p: [] for p in properties}
-    for label, cs in tqdm(zip(labels, chains), total=len(chains)):
-        start = cs[0]
-        if start in track_end_to_track_id and len(cs) > 1:
-            tracks_graph[label] = track_end_to_track_id[start]
-            nodes = cs[1:]
-        else:
-            nodes = cs
-        for c in nodes:
-            node = graph.nodes[c]
-            t = node["time"]
-            coord = node["coords"]
-            tracks.append([label, t, *list(coord)])
-            for p in properties:
-                tracks_props[p].append(node[p])
-    tracks = np.array(tracks)
-    return tracks, tracks_graph, tracks_props
 def _check_ctc_df(df: pd.DataFrame, masks: np.ndarray):

 logger.setLevel(logging.INFO)
+# class FoundTracks(Exception):
+#     pass
 class CtcTracklet:
     def __init__(self, parent: int, nodes: List[int], start_frame: int) -> None:
     return tracklets
+# def linear_chains(G: nx.DiGraph):
+#     """Find all linear chains in a tree/graph, i.e. paths that.
+#     i) either start/end at a node with out_degree>in_degree or and have no internal branches, or
+#     ii) consists of a single node or a single splitting node
+#     Note that each chain includes its start/end node, i.e. they can be appear in multiple chains.
+#     """
+#     # get all nodes with out_degree>in_degree (i.e. start of chain)
+#     nodes = tuple(n for n in G.nodes if G.out_degree[n] > G.in_degree[n])
+#     # single nodes are those that are not starting a linear chain
+#     # single_nodes = tuple(n for n in G.nodes if G.out_degree[n] == G.in_degree[n] == 0)
+#     single_nodes = tuple(
+#         n for n in G.nodes if G.in_degree[n] == 0 and G.out_degree[n] != 1
+#     )
+#     for ni in single_nodes:
+#         yield [ni]
+#     for ni in nodes:
+#         neighs = tuple(G.neighbors(ni))
+#         for child in neighs:
+#             path = [ni, child]
+#             while len(childs := tuple(G.neighbors(path[-1]))) == 1:
+#                 path.append(childs[0])
+#             yield path
+# def graph_to_napari_tracks(
+#     graph: nx.DiGraph,
+#     properties: List[str] = [],
+# ):
+#     """Convert a track graph to napari tracks."""
+#     # each tracklet is a linear chain in the graph
+#     chains = tuple(linear_chains(graph))
+#     track_end_to_track_id = dict()
+#     labels = []
+#     for i, cs in enumerate(chains):
+#         label = i + 1
+#         labels.append(label)
+#         # if len(cs) == 1:
+#         #     print(cs)
+#         #     # Non-connected node
+#         #     continue
+#         end = cs[-1]
+#         track_end_to_track_id[end] = label
+#     tracks = []
+#     tracks_graph = dict()
+#     tracks_props = {p: [] for p in properties}
+#     for label, cs in tqdm(zip(labels, chains), total=len(chains)):
+#         start = cs[0]
+#         if start in track_end_to_track_id and len(cs) > 1:
+#             tracks_graph[label] = track_end_to_track_id[start]
+#             nodes = cs[1:]
+#         else:
+#             nodes = cs
+#         for c in nodes:
+#             node = graph.nodes[c]
+#             t = node["time"]
+#             coord = node["coords"]
+#             tracks.append([label, t, *list(coord)])
+#             for p in properties:
+#                 tracks_props[p].append(node[p])
+#     tracks = np.array(tracks)
+#     return tracks, tracks_graph, tracks_props
 def _check_ctc_df(df: pd.DataFrame, masks: np.ndarray):

models/tra_post_model/{trackastra/utils/utils.py → utils.py} RENAMED Viewed

@@ -1,13 +1,6 @@
-import colorsys
-import itertools
 import logging
-import random
-import sys
-from pathlib import Path
-from timeit import default_timer
 import dask.array as da
-import matplotlib
 import numpy as np
 import torch
 from typing import Optional, Union
@@ -15,174 +8,6 @@ from typing import Optional, Union
 logger = logging.getLogger(__name__)
-def _single_color_integer_cmap(color=(0.3, 0.4, 0.5)):
-    from matplotlib.colors import Colormap
-    assert len(color) in (3, 4)
-    class BinaryMap(Colormap):
-        def __init__(self, color):
-            self.color = np.array(color)
-            if len(self.color) == 3:
-                self.color = np.concatenate([self.color, [1]])
-        def __call__(self, X, alpha=None, bytes=False):
-            res = np.zeros((*X.shape, 4), np.float32)
-            res[..., -1] = self.color[-1]
-            res[X > 0] = np.expand_dims(self.color, 0)
-            if bytes:
-                return np.clip(256 * res, 0, 255).astype(np.uint8)
-            else:
-                return res
-    return BinaryMap(color)
-def render_label(
-    lbl,
-    img=None,
-    cmap=None,
-    cmap_img="gray",
-    alpha=0.5,
-    alpha_boundary=None,
-    normalize_img=True,
-):
-    """Renders a label image and optionally overlays it with another image. Used for generating simple output images to asses the label quality.
-    Parameters
-    ----------
-    lbl: np.ndarray of dtype np.uint16
-        The 2D label image
-    img: np.ndarray
-        The array to overlay the label image with (optional)
-    cmap: string, tuple, or callable
-        The label colormap. If given as rgb(a)  only a single color is used, if None uses a random colormap
-    cmap_img: string or callable
-        The colormap of img (optional)
-    alpha: float
-        The alpha value of the overlay. Set alpha=1 to get fully opaque labels
-    alpha_boundary: float
-        The alpha value of the boundary (if None, use the same as for labels, i.e. no boundaries are visible)
-    normalize_img: bool
-        If True, normalizes the img (if given)
-    Returns:
-    -------
-    img: np.ndarray
-        the (m,n,4) RGBA image of the rendered label
-    Example:
-    -------
-    from scipy.ndimage import label, zoom
-    img = zoom(np.random.uniform(0,1,(16,16)),(8,8),order=3)
-    lbl,_ = label(img>.8)
-    u1 = render_label(lbl, img = img, alpha = .7)
-    u2 = render_label(lbl, img = img, alpha = 0, alpha_boundary =.8)
-    plt.subplot(1,2,1);plt.imshow(u1)
-    plt.subplot(1,2,2);plt.imshow(u2)
-    """
-    from matplotlib import cm
-    from skimage.segmentation import find_boundaries
-    alpha = np.clip(alpha, 0, 1)
-    if alpha_boundary is None:
-        alpha_boundary = alpha
-    if cmap is None:
-        cmap = random_label_cmap()
-    elif isinstance(cmap, tuple):
-        cmap = _single_color_integer_cmap(cmap)
-    else:
-        pass
-    cmap = cm.get_cmap(cmap) if isinstance(cmap, str) else cmap
-    cmap_img = cm.get_cmap(cmap_img) if isinstance(cmap_img, str) else cmap_img
-    # render image if given
-    if img is None:
-        im_img = np.zeros((*lbl.shape, 4), np.float32)
-        im_img[..., -1] = 1
-    else:
-        assert lbl.shape[:2] == img.shape[:2]
-        img = normalize(img) if normalize_img else img
-        if img.ndim == 2:
-            im_img = cmap_img(img)
-        elif img.ndim == 3:
-            im_img = img[..., :4]
-            if img.shape[-1] < 4:
-                im_img = np.concatenate(
-                    [img, np.ones(img.shape[:2] + (4 - img.shape[-1],))], axis=-1
-                )
-        else:
-            raise ValueError("img should be 2 or 3 dimensional")
-    # render label
-    im_lbl = cmap(lbl)
-    mask_lbl = lbl > 0
-    mask_bound = np.bitwise_and(mask_lbl, find_boundaries(lbl, mode="thick"))
-    # blend
-    im = im_img.copy()
-    im[mask_lbl] = alpha * im_lbl[mask_lbl] + (1 - alpha) * im_img[mask_lbl]
-    im[mask_bound] = (
-        alpha_boundary * im_lbl[mask_bound] + (1 - alpha_boundary) * im_img[mask_bound]
-    )
-    return im
-def random_label_cmap(n=2**16, h=(0, 1), lightness=(0.4, 1), s=(0.2, 0.8)):
-    h, lightness, s = (
-        np.random.uniform(*h, n),
-        np.random.uniform(*lightness, n),
-        np.random.uniform(*s, n),
-    )
-    cols = np.stack(
-        [colorsys.hls_to_rgb(_h, _l, _s) for _h, _l, _s in zip(h, lightness, s)], axis=0
-    )
-    cols[0] = 0
-    return matplotlib.colors.ListedColormap(cols)
-# @torch.jit.script
-def _blockwise_sum_with_bounds(A: torch.Tensor, bounds: torch.Tensor, dim: int = 0):
-    A = A.transpose(dim, 0)
-    cum = torch.cumsum(A, dim=0)
-    cum = torch.cat((torch.zeros_like(cum[:1]), cum), dim=0)
-    B = torch.zeros_like(A, device=A.device)
-    for i, j in itertools.pairwise(bounds[:-1], bounds[1:]):
-        B[i:j] = cum[j] - cum[i]
-    B = B.transpose(0, dim)
-    return B
-def _bounds_from_timepoints(timepoints: torch.Tensor):
-    assert timepoints.ndim == 1
-    bounds = torch.cat((
-        torch.tensor([0], device=timepoints.device),
-        # torch.nonzero faster than torch.where
-        torch.nonzero(timepoints[1:] - timepoints[:-1], as_tuple=False)[:, 0] + 1,
-        torch.tensor([len(timepoints)], device=timepoints.device),
-    ))
-    return bounds
-# def blockwise_sum(A: torch.Tensor, timepoints: torch.Tensor, dim: int = 0):
-#     # get block boundaries
-#     assert A.shape[dim] == len(timepoints)
-#     bounds = _bounds_from_timepoints(timepoints)
-#     # normalize within blocks
-#     u = _blockwise_sum_with_bounds(A, bounds, dim=dim)
-#     return u
 def blockwise_sum(
     A: torch.Tensor, timepoints: torch.Tensor, dim: int = 0, reduce: str = "sum"
 ):
@@ -270,11 +95,6 @@ def blockwise_causal_norm(
     else:
         raise NotImplementedError(f"Mode {mode} not implemented")
-    # get block boundaries and normalize within blocks
-    # bounds = _bounds_from_timepoints(timepoints)
-    # u0_sum = _blockwise_sum_with_bounds(u0, bounds, dim=0) + eps
-    # u1_sum = _blockwise_sum_with_bounds(u1, bounds, dim=1) + eps
     u0_sum = blockwise_sum(u0, timepoints, dim=0) + eps
     u1_sum = blockwise_sum(u1, timepoints, dim=1) + eps
@@ -296,12 +116,6 @@ def blockwise_causal_norm(
     return res
-def normalize_tensor(x: torch.Tensor, dim: Optional[int] = None, eps: float = 1e-8):
-    if dim is None:
-        dim = tuple(range(x.ndim))
-    mi, ma = torch.amin(x, dim=dim, keepdim=True), torch.amax(x, dim=dim, keepdim=True)
-    return (x - mi) / (ma - mi + eps)
 def normalize(x: Union[np.ndarray, da.Array], subsample: Optional[int] = 4):
@@ -340,155 +154,3 @@ def normalize_01(x: Union[np.ndarray, da.Array], subsample: Optional[int] = 4):
     x /= ma - mi + 1e-8
     return x
-def batched(x, batch_size, device):
-    return x.unsqueeze(0).expand(batch_size, *((-1,) * x.ndim)).to(device)
-def preallocate_memory(dataset, model_lightning, batch_size, max_tokens, device):
-    """https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#preallocate-memory-in-case-of-variable-input-length."""
-    start = default_timer()
-    if max_tokens is None:
-        logger.warning(
-            "Preallocating memory without specifying max_tokens not implemented."
-        )
-        return
-        # max_len = 0
-        # max_idx = -1
-        # # TODO speed up
-        # # find largest training sample
-        # if isinstance(dataset, torch.utils.data.dataset.ConcatDataset):
-        #     lens = tuple(
-        #         len(t["timepoints"]) for data in dataset.datasets for t in data.windows
-        #     )
-        # elif isinstance(dataset, torch.utils.data.Dataset):
-        #     lens = tuple(len(t["timepoints"]) for t in dataset.windows)
-        # else:
-        #     lens = tuple(
-        #         len(s["timepoints"])
-        #         for i, s in tqdm(
-        #             enumerate(dataset),
-        #             desc="Iterate over training set to find largest training sample",
-        #             total=len(dataset),
-        #             leave=False,
-        #         )
-        #     )
-        # max_len = max(lens)
-        # max_idx = lens.index(max_len)
-        # # build random batch
-        # x = dataset[max_idx]
-        # batch = dict(
-        #     features=batched(x["features"], batch_size, device),
-        #     coords=batched(x["coords"], batch_size, device),
-        #     assoc_matrix=batched(x["assoc_matrix"], batch_size, device),
-        #     timepoints=batched(x["timepoints"], batch_size, device),
-        #     padding_mask=batched(torch.zeros_like(x["timepoints"]), batch_size, device),
-        # )
-    else:
-        max_len = max_tokens
-        x = dataset[0]
-        batch = dict(
-            features=batched(
-                torch.zeros(
-                    (max_len,) + x["features"].shape[1:], dtype=x["features"].dtype
-                ),
-                batch_size,
-                device,
-            ),
-            coords=batched(
-                torch.zeros(
-                    (max_len,) + x["coords"].shape[1:], dtype=x["coords"].dtype
-                ),
-                batch_size,
-                device,
-            ),
-            assoc_matrix=batched(
-                torch.zeros((max_len, max_len), dtype=x["assoc_matrix"].dtype),
-                batch_size,
-                device,
-            ),
-            timepoints=batched(
-                torch.zeros(max_len, dtype=x["timepoints"].dtype), batch_size, device
-            ),
-            padding_mask=batched(torch.zeros(max_len, dtype=bool), batch_size, device),
-        )
-    loss = model_lightning._common_step(batch)["loss"]
-    loss.backward()
-    model_lightning.zero_grad()
-    logger.info(
-        f"Preallocated memory for largest training batch (length {max_len}) in"
-        f" {default_timer() - start:.02f} s"
-    )
-    if device.type == "cuda":
-        logger.info(
-            "Memory allocated for model:"
-            f" {torch.cuda.max_memory_allocated() / 1024**3:.02f} GB"
-        )
-def seed(s=None):
-    """Seed random number generators.
-    Defaults to unix timestamp of function call.
-    Args:
-        s (``int``): Manual seed.
-    """
-    if s is None:
-        s = int(default_timer())
-    random.seed(s)
-    logger.debug(f"Seed `random` rng with {s}.")
-    np.random.seed(s)
-    logger.debug(f"Seed `numpy` rng with {s}.")
-    if "torch" in sys.modules:
-        torch.manual_seed(s)
-        logger.debug(f"Seed `torch` rng with {s}.")
-    return s
-def str2bool(x: str) -> bool:
-    """Cast string to boolean.
-    Useful for parsing command line arguments.
-    """
-    if not isinstance(x, str):
-        raise TypeError("String expected.")
-    elif x.lower() in ("true", "t", "1"):
-        return True
-    elif x.lower() in ("false", "f", "0"):
-        return False
-    else:
-        raise ValueError(f"'{x}' does not seem to be boolean.")
-def str2path(x: str) -> Path:
-    """Cast string to resolved absolute path.
-    Useful for parsing command line arguments.
-    """
-    if not isinstance(x, str):
-        raise TypeError("String expected.")
-    else:
-        return Path(x).expanduser().resolve()
-if __name__ == "__main__":
-    A = torch.rand(50, 50)
-    idx = torch.tensor([0, 10, 20, A.shape[0]])
-    A = torch.eye(50)
-    B = _blockwise_sum_with_bounds(A, idx)
-    tps = torch.repeat_interleave(torch.arange(5), 10)
-    C = blockwise_causal_norm(A, tps)

 import logging
 import dask.array as da
 import numpy as np
 import torch
 from typing import Optional, Union
 logger = logging.getLogger(__name__)
 def blockwise_sum(
     A: torch.Tensor, timepoints: torch.Tensor, dim: int = 0, reduce: str = "sum"
 ):
     else:
         raise NotImplementedError(f"Mode {mode} not implemented")
     u0_sum = blockwise_sum(u0, timepoints, dim=0) + eps
     u1_sum = blockwise_sum(u1, timepoints, dim=1) + eps
     return res
 def normalize(x: Union[np.ndarray, da.Array], subsample: Optional[int] = 4):
     x /= ma - mi + 1e-8
     return x

tracking_one.py CHANGED Viewed

@@ -27,12 +27,12 @@ from _utils.load_models import load_stable_diffusion_model
 from models.model import Counting_with_SD_features_track as Counting
 from models.enc_model.loca import build_model as build_loca_model
 import time
-from models.tra_post_model.trackastra.model import TrackingTransformer
-from models.tra_post_model.trackastra.utils import (
     normalize,
 )
-from models.tra_post_model.trackastra.data import build_windows_sd, get_features
-from models.tra_post_model.trackastra.tracking import TrackGraph, build_graph, track_greedy
 from _utils.track_args import parse_train_args as get_track_args
 import torchvision.transforms as T
 from pathlib import Path
@@ -142,9 +142,7 @@ class TrackingModule(pl.LightningModule):
         # others
         self.placeholder_token = placeholder_token
         self.placeholder_token_id = placeholder_token_id
-        # tracking model
-        # fpath = Path("models/tra_post_model/trackastra/.models/general_2d/model.pt")
         fpath = Path("_utils/config.yaml")
         args_ = get_track_args()
@@ -935,7 +933,7 @@ class TrackingModule(pl.LightningModule):
         elif mode == "greedy_nodiv":
             return track_greedy(candidate_graph, allow_divisions=False)
         elif mode == "ilp":
-            from models.tra_post_model.trackastra.tracking.ilp import track_ilp
             return track_ilp(candidate_graph, ilp_config="gt", **kwargs)
         else:

 from models.model import Counting_with_SD_features_track as Counting
 from models.enc_model.loca import build_model as build_loca_model
 import time
+from models.tra_post_model.model import TrackingTransformer
+from models.tra_post_model.utils import (
     normalize,
 )
+from models.tra_post_model.data import build_windows_sd, get_features
+from models.tra_post_model.tracking import TrackGraph, build_graph, track_greedy
 from _utils.track_args import parse_train_args as get_track_args
 import torchvision.transforms as T
 from pathlib import Path
         # others
         self.placeholder_token = placeholder_token
         self.placeholder_token_id = placeholder_token_id
         fpath = Path("_utils/config.yaml")
         args_ = get_track_args()
         elif mode == "greedy_nodiv":
             return track_greedy(candidate_graph, allow_divisions=False)
         elif mode == "ilp":
+            from models.tra_post_model.tracking.ilp import track_ilp
             return track_ilp(candidate_graph, ilp_config="gt", **kwargs)
         else: