| import streamlit as st |
| from huggingface_hub import HfApi |
| import pandas as pd |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| from datetime import datetime |
| from concurrent.futures import ThreadPoolExecutor, as_completed |
| from functools import lru_cache |
| import time |
|
|
| st.set_page_config(page_title="HF Contributions", layout="wide") |
| api = HfApi() |
|
|
|
|
| |
| @lru_cache(maxsize=1000) |
| def cached_repo_info(repo_id, repo_type): |
| return api.repo_info(repo_id=repo_id, repo_type=repo_type) |
|
|
|
|
| @lru_cache(maxsize=1000) |
| def cached_list_commits(repo_id, repo_type): |
| return list(api.list_repo_commits(repo_id=repo_id, repo_type=repo_type)) |
|
|
|
|
| @lru_cache(maxsize=100) |
| def cached_list_items(username, kind): |
| if kind == "model": |
| return list(api.list_models(author=username)) |
| elif kind == "dataset": |
| return list(api.list_datasets(author=username)) |
| elif kind == "space": |
| return list(api.list_spaces(author=username)) |
| return [] |
|
|
|
|
| |
| class RateLimiter: |
| def __init__(self, calls_per_second=10): |
| self.calls_per_second = calls_per_second |
| self.last_call = 0 |
|
|
| def wait(self): |
| current_time = time.time() |
| time_since_last_call = current_time - self.last_call |
| if time_since_last_call < (1.0 / self.calls_per_second): |
| time.sleep((1.0 / self.calls_per_second) - time_since_last_call) |
| self.last_call = time.time() |
|
|
|
|
| rate_limiter = RateLimiter() |
|
|
|
|
| |
| def fetch_commits_for_repo(repo_id, repo_type, username, selected_year): |
| try: |
| rate_limiter.wait() |
| |
| repo_info = cached_repo_info(repo_id, repo_type) |
| if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated): |
| return [], [] |
|
|
| |
| initial_commit_date = pd.to_datetime(repo_info.created_at).tz_localize(None).date() |
| commit_dates = [] |
| commit_count = 0 |
|
|
| |
| if initial_commit_date.year == selected_year: |
| commit_dates.append(initial_commit_date) |
| commit_count += 1 |
|
|
| |
| commits = cached_list_commits(repo_id, repo_type) |
| for commit in commits: |
| commit_date = pd.to_datetime(commit.created_at).tz_localize(None).date() |
| if commit_date.year == selected_year: |
| commit_dates.append(commit_date) |
| commit_count += 1 |
|
|
| return commit_dates, commit_count |
| except Exception: |
| return [], 0 |
|
|
|
|
| |
| def get_commit_events(username, kind=None, selected_year=None): |
| commit_dates = [] |
| items_with_type = [] |
| kinds = [kind] if kind else ["model", "dataset", "space"] |
|
|
| for k in kinds: |
| try: |
| items = cached_list_items(username, k) |
| items_with_type.extend((item, k) for item in items) |
| repo_ids = [item.id for item in items] |
|
|
| |
| chunk_size = 5 |
| for i in range(0, len(repo_ids), chunk_size): |
| chunk = repo_ids[i:i + chunk_size] |
| with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor: |
| future_to_repo = { |
| executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id |
| for repo_id in chunk |
| } |
| for future in as_completed(future_to_repo): |
| repo_commits, repo_count = future.result() |
| if repo_commits: |
| commit_dates.extend(repo_commits) |
| except Exception as e: |
| st.warning(f"Error fetching {k}s for {username}: {str(e)}") |
|
|
| |
| df = pd.DataFrame(commit_dates, columns=["date"]) |
| if not df.empty: |
| df = df.drop_duplicates() |
| return df, items_with_type |
|
|
|
|
| |
| def make_calendar_heatmap(df, title, year): |
| if df.empty: |
| st.info(f"No {title.lower()} found for {year}.") |
| return |
|
|
| |
| df["count"] = 1 |
| df = df.groupby("date", as_index=False).sum() |
| df["date"] = pd.to_datetime(df["date"]) |
|
|
| |
| start = pd.Timestamp(f"{year}-01-01") |
| end = pd.Timestamp(f"{year}-12-31") |
| all_days = pd.date_range(start=start, end=end) |
|
|
| |
| heatmap_data = pd.DataFrame({"date": all_days, "count": 0}) |
| heatmap_data = heatmap_data.merge(df, on="date", how="left", suffixes=("", "_y")) |
| heatmap_data["count"] = heatmap_data["count_y"].fillna(0) |
| heatmap_data = heatmap_data.drop("count_y", axis=1) |
|
|
| |
| heatmap_data["dow"] = heatmap_data["date"].dt.dayofweek |
| heatmap_data["week"] = (heatmap_data["date"] - start).dt.days // 7 |
|
|
| |
| pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0) |
|
|
| |
| month_labels = pd.date_range(start, end, freq="MS").strftime("%b") |
| month_positions = pd.date_range(start, end, freq="MS").map(lambda x: (x - start).days // 7) |
|
|
| |
| from matplotlib.colors import ListedColormap, BoundaryNorm |
| colors = ['#ebedf0', '#9be9a8', '#40c463', '#30a14e', '#216e39'] |
| bounds = [0, 1, 3, 11, 31, float('inf')] |
| cmap = ListedColormap(colors) |
| norm = BoundaryNorm(bounds, cmap.N) |
|
|
| |
| fig, ax = plt.subplots(figsize=(12, 1.2)) |
|
|
| |
| pivot_int = pivot.astype(int) |
|
|
| |
| sns.heatmap(pivot_int, ax=ax, cmap=cmap, norm=norm, linewidths=0.5, linecolor="white", |
| square=True, cbar=False, yticklabels=["M", "T", "W", "T", "F", "S", "S"]) |
|
|
| ax.set_title(f"{title}", fontsize=12, pad=10) |
| ax.set_xlabel("") |
| ax.set_ylabel("") |
| ax.set_xticks(month_positions) |
| ax.set_xticklabels(month_labels, fontsize=8) |
| ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8) |
| st.pyplot(fig) |
|
|
|
|
| |
| with st.sidebar: |
| st.title("👤 Contributor") |
| username = st.selectbox( |
| "Select or type a username", |
| options=["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"], |
| index=0 |
| ) |
| st.markdown("<div style='text-align: center; margin: 10px 0;'>OR</div>", unsafe_allow_html=True) |
| custom = st.text_input("", placeholder="Enter custom username/org") |
| if custom.strip(): |
| username = custom.strip() |
| year_options = list(range(datetime.now().year, 2017, -1)) |
| selected_year = st.selectbox("🗓️ Year", options=year_options) |
|
|
| |
| st.title("🤗 Hugging Face Contributions") |
| if username: |
| with st.spinner("Fetching commit data..."): |
| |
| commits_by_type = {} |
| commit_counts_by_type = {} |
|
|
| |
| for kind in ["model", "dataset", "space"]: |
| try: |
| items = cached_list_items(username, kind) |
| repo_ids = [item.id for item in items] |
|
|
| |
| chunk_size = 5 |
| total_commits = 0 |
| all_commit_dates = [] |
|
|
| for i in range(0, len(repo_ids), chunk_size): |
| chunk = repo_ids[i:i + chunk_size] |
| with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor: |
| future_to_repo = { |
| executor.submit(fetch_commits_for_repo, repo_id, kind, username, selected_year): repo_id |
| for repo_id in chunk |
| } |
| for future in as_completed(future_to_repo): |
| repo_commits, repo_count = future.result() |
| if repo_commits: |
| all_commit_dates.extend(repo_commits) |
| total_commits += repo_count |
|
|
| commits_by_type[kind] = all_commit_dates |
| commit_counts_by_type[kind] = total_commits |
|
|
| except Exception as e: |
| st.warning(f"Error fetching {kind}s for {username}: {str(e)}") |
| commits_by_type[kind] = [] |
| commit_counts_by_type[kind] = 0 |
|
|
| |
| total_commits = sum(commit_counts_by_type.values()) |
|
|
| st.subheader(f"{username}'s Activity in {selected_year}") |
| st.metric("Total Commits", total_commits) |
|
|
| |
| all_commits = [] |
| for commits in commits_by_type.values(): |
| all_commits.extend(commits) |
| all_df = pd.DataFrame(all_commits, columns=["date"]) |
| if not all_df.empty: |
| all_df = all_df.drop_duplicates() |
|
|
| make_calendar_heatmap(all_df, "All Commits", selected_year) |
|
|
| |
| col1, col2, col3 = st.columns(3) |
| for col, kind, emoji, label in [ |
| (col1, "model", "🧠", "Models"), |
| (col2, "dataset", "📦", "Datasets"), |
| (col3, "space", "🚀", "Spaces") |
| ]: |
| with col: |
| try: |
| total = len(cached_list_items(username, kind)) |
| commits = commits_by_type.get(kind, []) |
| commit_count = commit_counts_by_type.get(kind, 0) |
| df_kind = pd.DataFrame(commits, columns=["date"]) |
| if not df_kind.empty: |
| df_kind = df_kind.drop_duplicates() |
| st.metric(f"{emoji} {label}", total) |
| st.metric(f"Commits in {selected_year}", commit_count) |
| make_calendar_heatmap(df_kind, f"{label} Commits", selected_year) |
| except Exception as e: |
| st.warning(f"Error processing {label}: {str(e)}") |
| st.metric(f"{emoji} {label}", 0) |
| st.metric(f"Commits in {selected_year}", 0) |
| make_calendar_heatmap(pd.DataFrame(), f"{label} Commits", selected_year) |