| import java.io.BufferedReader; |
| import java.io.BufferedWriter; |
| import java.io.IOException; |
| import java.nio.charset.StandardCharsets; |
| import java.nio.file.Files; |
| import java.nio.file.Path; |
| import java.nio.file.Paths; |
| import java.util.*; |
| import java.util.function.Consumer; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| public class LRMCseedsTUD_streamsafe { |
|
|
| public static void main(String[] args) throws Exception { |
| if (args.length < 2) { |
| System.err.println("Usage: java LRMCseedsTUD_streamsafe <input_file_or_dir> <output_file_or_dir> [alpha_kind] [epsilon]"); |
| return; |
| } |
|
|
| final Path inPath = Paths.get(args[0]); |
| final Path outPath = Paths.get(args[1]); |
| final AlphaKind alphaKind = (args.length >= 3 ? parseAlpha(args[2]) : AlphaKind.DIAM); |
| final double eps = (args.length >= 4 ? Double.parseDouble(args[3]) : 1e-6); |
|
|
| if (Files.isDirectory(inPath)) { |
| Files.createDirectories(outPath); |
| try (java.util.stream.Stream<Path> stream = Files.list(inPath)) { |
| stream.filter(Files::isRegularFile) |
| .filter(p -> { |
| String s = p.getFileName().toString().toLowerCase(Locale.ROOT); |
| return s.endsWith(".txt") || s.endsWith(".csv"); |
| }) |
| .forEach(p -> { |
| try { |
| GraphData Gi = loadEdgeList0Based(p); |
| String base = p.getFileName().toString().replaceFirst("\\.(txt|csv)$", ""); |
| Path outFile = outPath.resolve(base + ".json"); |
| runOnce(Gi, outFile, alphaKind, eps); |
| } catch (Exception e) { |
| throw new RuntimeException("Failed on " + p, e); |
| } |
| }); |
| } |
| } else { |
| GraphData G = loadEdgeList0Based(inPath); |
| System.out.printf(Locale.US, "# Loaded edge list: n=%d, m=%d%n", G.n, G.m); |
| runOnce(G, outPath, alphaKind, eps); |
| } |
| } |
|
|
| static void runOnce(GraphData G, Path outSeeds, AlphaKind alphaKind, double eps) throws Exception { |
| PeakTracker tracker = new PeakTracker(G, eps, alphaKind); |
| |
| clique2_ablations_parallel2.runLaplacianRMCStreaming(G.adj1Based, tracker); |
| tracker.writeJson(outSeeds); |
| System.out.println("# Done. wrote " + outSeeds.toAbsolutePath()); |
| } |
|
|
| |
| static final class PeakTracker implements Consumer<clique2_ablations_parallel2.SnapshotDTO> { |
| final GraphData G; |
| final double epsilon; |
| final AlphaKind alphaKind; |
|
|
| final boolean[] inC; |
| final Map<Integer, Integer> bestIdxByComp = new LinkedHashMap<>(); |
| final Map<Integer, Double> bestScoreByComp = new HashMap<>(); |
| final List<Rec> arrivals = new ArrayList<>(); |
| int idx = 0; |
|
|
| static final class Rec { |
| final int compId; |
| final int sid; |
| final double score; |
| final int[] nodes; |
| Rec(int compId, int sid, double score, int[] nodes) { |
| this.compId = compId; this.sid = sid; this.score = score; this.nodes = nodes; |
| } |
| } |
|
|
| PeakTracker(GraphData G, double epsilon, AlphaKind alphaKind) { |
| this.G = G; this.epsilon = epsilon; this.alphaKind = alphaKind; |
| this.inC = new boolean[G.n]; |
| } |
|
|
| @Override |
| public void accept(clique2_ablations_parallel2.SnapshotDTO s) { |
| final int[] nodes = s.nodes; |
| final int k = nodes.length; |
| if (k == 0) return; |
| for (int u : nodes) inC[u] = true; |
|
|
| final double Q = s.Q; |
| final double sc = k / (Q + epsilon); |
| final int compId = s.componentId; |
| final int sid = idx++; |
|
|
| if (!bestIdxByComp.containsKey(compId) || sc > bestScoreByComp.get(compId)) { |
| bestIdxByComp.put(compId, sid); |
| bestScoreByComp.put(compId, sc); |
| } |
| arrivals.add(new Rec(compId, sid, sc, Arrays.copyOf(nodes, nodes.length))); |
|
|
| for (int u : nodes) inC[u] = false; |
| } |
|
|
| void writeJson(Path outJson) throws IOException { |
| final int n = G.n; |
| boolean[] covered = new boolean[n]; |
|
|
| try (BufferedWriter w = Files.newBufferedWriter(outJson, StandardCharsets.UTF_8)) { |
| w.write("{\n"); |
| w.write("\"meta\":{"); |
| w.write("\"epsilon\":" + epsilon); |
| w.write(",\"alpha_kind\":\"" + (alphaKind == AlphaKind.DIAM ? "DIAM" : "INV_SQRT_LAMBDA2") + "\""); |
| w.write(",\"n\":" + G.n); |
| w.write(",\"m\":" + G.m); |
| w.write(",\"mode\":\"peaks_per_component+singletons(stream)\""); |
| w.write("},\n"); |
| w.write("\"clusters\":[\n"); |
|
|
| boolean first = true; |
| int nextClusterId = 0; |
|
|
| for (Rec r : arrivals) { |
| Integer best = bestIdxByComp.get(r.compId); |
| if (best != null && best == r.sid) { |
| if (!first) w.write(",\n"); |
| first = false; |
| w.write(" {\"cluster_id\":" + (nextClusterId++)); |
| w.write(",\"component_id\":" + r.compId); |
| w.write(",\"snapshot_id\":" + r.sid); |
| w.write(",\"score\":" + r.score); |
| w.write(",\"k_seed\":" + r.nodes.length); |
| w.write(",\"members\":" + intArrayToJson(r.nodes)); |
| w.write(",\"seed_nodes\":" + intArrayToJson(r.nodes)); |
| w.write("}"); |
| for (int u : r.nodes) covered[u] = true; |
| } |
| } |
|
|
| for (int u = 0; u < n; u++) { |
| if (!covered[u]) { |
| if (!first) w.write(",\n"); |
| first = false; |
| int[] singleton = new int[]{u}; |
| w.write(" {\"cluster_id\":" + (nextClusterId++)); |
| w.write(",\"component_id\":-1"); |
| w.write(",\"snapshot_id\":-1"); |
| w.write(",\"score\":0.0"); |
| w.write(",\"k_seed\":1"); |
| w.write(",\"members\":" + intArrayToJson(singleton)); |
| w.write(",\"seed_nodes\":" + intArrayToJson(singleton)); |
| w.write(",\"is_singleton\":true"); |
| w.write("}"); |
| } |
| } |
| w.write("\n]}"); |
| } |
| } |
| } |
|
|
| |
| static GraphData loadEdgeList0Based(Path edgesFile) throws IOException { |
| int[] deg = new int[1 << 12]; |
| int maxNode = -1; |
| long mUndir = 0; |
| try (BufferedReader br = Files.newBufferedReader(edgesFile, StandardCharsets.UTF_8)) { |
| String s; |
| while ((s = br.readLine()) != null) { |
| s = s.trim(); |
| if (s.isEmpty() || s.startsWith("#")) continue; |
| String[] tok = s.split("\\s+|,"); |
| if (tok.length < 2) continue; |
| int u = Integer.parseInt(tok[0]); |
| int v = Integer.parseInt(tok[1]); |
| if (u == v) continue; |
| int needed = Math.max(u, v) + 1; |
| if (needed > deg.length) { |
| int newLen = deg.length; |
| while (newLen < needed) newLen <<= 1; |
| deg = Arrays.copyOf(deg, newLen); |
| } |
| deg[u]++; deg[v]++; |
| if (u < v) mUndir++; |
| if (u > maxNode) maxNode = u; |
| if (v > maxNode) maxNode = v; |
| } |
| } |
| final int n = maxNode + 1; |
| @SuppressWarnings("unchecked") |
| List<Integer>[] adj1 = (List<Integer>[]) new List<?>[n + 1]; |
| for (int i = 1; i <= n; i++) adj1[i] = new ArrayList<>(deg[i - 1]); |
| try (BufferedReader br = Files.newBufferedReader(edgesFile, StandardCharsets.UTF_8)) { |
| String s; |
| while ((s = br.readLine()) != null) { |
| s = s.trim(); |
| if (s.isEmpty() || s.startsWith("#")) continue; |
| String[] tok = s.split("\\s+|,"); |
| if (tok.length < 2) continue; |
| int u = Integer.parseInt(tok[0]); |
| int v = Integer.parseInt(tok[1]); |
| if (u == v) continue; |
| adj1[u + 1].add(v + 1); |
| adj1[v + 1].add(u + 1); |
| } |
| } |
| GraphData G = new GraphData(); |
| G.n = n; G.m = mUndir; G.adj1Based = adj1; |
| G.labels = new int[n]; Arrays.fill(G.labels, -1); |
| G.labelNames = new String[0]; |
| return G; |
| } |
|
|
| static AlphaKind parseAlpha(String s) { |
| String t = s.trim().toUpperCase(Locale.ROOT); |
| if (t.startsWith("DIAM")) return AlphaKind.DIAM; |
| if (t.contains("LAMBDA")) return AlphaKind.INV_SQRT_LAMBDA2; |
| return AlphaKind.DIAM; |
| } |
|
|
| static String intArrayToJson(int[] arr) { |
| StringBuilder sb = new StringBuilder(); |
| sb.append('['); |
| for (int i = 0; i < arr.length; i++) { |
| if (i > 0) sb.append(','); |
| sb.append(arr[i]); |
| } |
| sb.append(']'); |
| return sb.toString(); |
| } |
|
|
| enum AlphaKind {DIAM, INV_SQRT_LAMBDA2} |
|
|
| static final class GraphData { |
| int n; |
| long m; |
| List<Integer>[] adj1Based; |
| int[] labels; |
| String[] labelNames; |
| } |
| } |
|
|
|
|