| import numpy as np |
| import json |
| import os |
| from pynndescent import NNDescent |
|
|
| |
| def hausdorff_d(curr_data, prev_data): |
| |
| n_trees = min(64, 5 + int(round((curr_data.shape[0]) ** 0.5 / 20.0))) |
| |
| n_iters = max(5, int(round(np.log2(curr_data.shape[0])))) |
| |
| metric = "euclidean" |
| |
| nnd = NNDescent( |
| curr_data, |
| n_neighbors=1, |
| metric=metric, |
| n_trees=n_trees, |
| n_iters=n_iters, |
| max_candidates=10, |
| verbose=False |
| ) |
| _, dists1 = nnd.query(prev_data,k=1) |
| m1 = dists1.mean() |
| return m1 |
|
|
| class Segmenter: |
| def __init__(self, data_provider, threshold, range_s=None, range_e=None, range_p=None): |
| self.data_provider = data_provider |
| self.threshold = threshold |
| if range_s is None: |
| self.s = data_provider.s |
| self.e = data_provider.e |
| self.p = data_provider.p |
| else: |
| self.s = range_s |
| self.e = range_e |
| self.p = range_p |
|
|
| def _cal_interval_dists(self): |
| interval_num = (self.e - self.s)// self.p |
|
|
| dists = np.zeros(interval_num) |
| for curr_epoch in range(self.s, self.e, self.p): |
| next_data = self.data_provider.train_representation(curr_epoch+ self.p) |
| curr_data = self.data_provider.train_representation(curr_epoch) |
| l = next_data.shape[0] |
| next_data = next_data.reshape(l, - 1) |
| curr_data = curr_data.reshape(l, -1) |
| |
| dists[(curr_epoch-self.s)//self.p] = hausdorff_d(curr_data=next_data, prev_data=curr_data) |
| |
| |
| return dists |
| def segment(self): |
| dists = self._cal_interval_dists() |
| dists_segs = list() |
| count = 0 |
| base = len(dists)-1 |
| for i in range(len(dists)-1, -1, -1): |
| count = count + dists[i] |
| if count >self.threshold: |
| dists_segs.insert(0, (i+1, base)) |
| base = i |
| count = dists[i] |
| dists_segs.insert(0, (0, base)) |
| segs = [(self.s+i*self.p, self.s+(j+1)*self.p) for i, j in dists_segs] |
| self.segments = segs |
| return segs |
| |
| def record_time(self, save_dir, file_name, t): |
| |
| save_file = os.path.join(save_dir, file_name+".json") |
| if not os.path.exists(save_file): |
| evaluation = dict() |
| else: |
| f = open(save_file, "r") |
| evaluation = json.load(f) |
| f.close() |
| evaluation["segmentation"] = round(t, 3) |
| with open(save_file, 'w') as f: |
| json.dump(evaluation, f) |
|
|
|
|
| class DenseALSegmenter(Segmenter): |
| def __init__(self, data_provider, threshold, epoch_num): |
| super().__init__(data_provider, threshold, 1, epoch_num, 1) |
| |
| def _cal_interval_dists(self, iteration): |
| interval_num = (self.e - self.s)// self.p |
|
|
| dists = np.zeros(interval_num) |
| for curr_epoch in range(self.s, self.e, self.p): |
| next_data = self.data_provider.train_representation_lb(iteration, curr_epoch+ self.p) |
| curr_data = self.data_provider.train_representation_lb(iteration, curr_epoch) |
| dists[(curr_epoch-self.s)//self.p] = hausdorff_d(curr_data=next_data, prev_data=curr_data) |
| |
| |
| return dists |
| def segment(self, iteration): |
| dists = self._cal_interval_dists(iteration) |
| dists_segs = list() |
| count = 0 |
| base = len(dists)-1 |
| for i in range(len(dists)-1, -1, -1): |
| count = count + dists[i] |
| if count >self.threshold: |
| dists_segs.insert(0, (i+1, base)) |
| base = i |
| count = dists[i] |
| dists_segs.insert(0, (0, base)) |
| segs = [(self.s+i*self.p, self.s+(j+1)*self.p) for i, j in dists_segs] |
| return segs |
|
|
|
|
| |
| |
|
|
|
|
|
|