| import fsspec |
| import pyarrow.parquet as pq |
| import numpy as np |
| from PIL import Image |
| from io import BytesIO |
| from rasterio.io import MemoryFile |
| import matplotlib.pyplot as plt |
| import cartopy.crs as ccrs |
| import cartopy.io.img_tiles as cimgt |
| from matplotlib.patches import Rectangle |
| import math |
| from matplotlib.figure import Figure |
| from matplotlib.backends.backend_agg import FigureCanvasAgg |
|
|
|
|
| def crop_center(img_array, cropx, cropy): |
| y, x, c = img_array.shape |
| startx = x // 2 - (cropx // 2) |
| starty = y // 2 - (cropy // 2) |
| return img_array[starty:starty+cropy, startx:startx+cropx] |
|
|
| def read_tif_bytes(tif_bytes): |
| with MemoryFile(tif_bytes) as mem_f: |
| with mem_f.open(driver='GTiff') as f: |
| return f.read().squeeze() |
|
|
| def read_row_memory(row_dict, columns=["thumbnail"]): |
| url = row_dict['parquet_url'] |
| row_idx = row_dict['parquet_row'] |
| |
| fs_options = { |
| "cache_type": "readahead", |
| "block_size": 5 * 1024 * 1024 |
| } |
| |
| with fsspec.open(url, mode='rb', **fs_options) as f: |
| with pq.ParquetFile(f) as pf: |
| table = pf.read_row_group(row_idx, columns=columns) |
| |
| row_output = {} |
| for col in columns: |
| col_data = table[col][0].as_py() |
| |
| if col != 'thumbnail': |
| row_output[col] = read_tif_bytes(col_data) |
| else: |
| stream = BytesIO(col_data) |
| row_output[col] = Image.open(stream) |
| |
| return row_output |
|
|
| def download_and_process_image(product_id, df_source=None, verbose=True): |
| if df_source is None: |
| if verbose: print("❌ Error: No DataFrame provided.") |
| return None, None |
| |
| row_subset = df_source[df_source['product_id'] == product_id] |
| if len(row_subset) == 0: |
| if verbose: print(f"❌ Error: Product ID {product_id} not found in DataFrame.") |
| return None, None |
| |
| row_dict = row_subset.iloc[0].to_dict() |
| |
| if 'parquet_url' in row_dict: |
| url = row_dict['parquet_url'] |
| if 'huggingface.co' in url: |
| row_dict['parquet_url'] = url.replace('https://huggingface.co', 'https://modelscope.cn').replace('resolve/main', 'resolve/master') |
| elif 'hf-mirror.com' in url: |
| row_dict['parquet_url'] = url.replace('https://hf-mirror.com', 'https://modelscope.cn').replace('resolve/main', 'resolve/master') |
| else: |
| if verbose: print("❌ Error: 'parquet_url' missing in metadata.") |
| return None, None |
|
|
| if verbose: print(f"⬇️ Fetching data for {product_id} from {row_dict['parquet_url']}...") |
| |
| try: |
| bands_data = read_row_memory(row_dict, columns=['B04', 'B03', 'B02']) |
| |
| if not all(b in bands_data for b in ['B04', 'B03', 'B02']): |
| if verbose: print(f"❌ Error: Missing bands in fetched data for {product_id}") |
| return None, None |
| |
| rgb_img = np.stack([bands_data['B04'], bands_data['B03'], bands_data['B02']], axis=-1) |
| |
| if verbose: |
| print(f"Raw RGB stats: Min={rgb_img.min()}, Max={rgb_img.max()}, Mean={rgb_img.mean()}, Dtype={rgb_img.dtype}") |
|
|
| |
| if rgb_img.max() <= 255: |
| |
| pass |
| |
| rgb_norm = (2.5 * (rgb_img.astype(float) / 10000.0)).clip(0, 1) |
| rgb_uint8 = (rgb_norm * 255).astype(np.uint8) |
| |
| if verbose: |
| print(f"Processed RGB stats: Min={rgb_uint8.min()}, Max={rgb_uint8.max()}, Mean={rgb_uint8.mean()}") |
| |
| img_full = Image.fromarray(rgb_uint8) |
| |
| if rgb_uint8.shape[0] >= 384 and rgb_uint8.shape[1] >= 384: |
| cropped_array = crop_center(rgb_uint8, 384, 384) |
| img_384 = Image.fromarray(cropped_array) |
| else: |
| if verbose: print(f"⚠️ Image too small {rgb_uint8.shape}, resizing to 384x384.") |
| img_384 = img_full.resize((384, 384)) |
| |
| if verbose: print(f"✅ Successfully processed {product_id}") |
| return img_384, img_full |
|
|
| except Exception as e: |
| if verbose: print(f"❌ Error processing {product_id}: {e}") |
| import traceback |
| traceback.print_exc() |
| return None, None |
|
|
| |
| class EsriImagery(cimgt.GoogleTiles): |
| def _image_url(self, tile): |
| x, y, z = tile |
| return f'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}' |
|
|
| from PIL import Image, ImageDraw, ImageFont |
|
|
| def get_placeholder_image(text="Image Unavailable", size=(384, 384)): |
| img = Image.new('RGB', size, color=(200, 200, 200)) |
| d = ImageDraw.Draw(img) |
| try: |
| |
| font = ImageFont.load_default() |
| except: |
| font = None |
| |
| |
| |
| d.text((20, size[1]//2), text, fill=(0, 0, 0), font=font) |
| return img |
|
|
| def get_esri_satellite_image(lat, lon, score=None, rank=None, query=None): |
| """ |
| Generates a satellite image visualization using Esri World Imagery via Cartopy. |
| Matches the style of the provided notebook. |
| Uses OO Matplotlib API for thread safety. |
| """ |
| try: |
| imagery = EsriImagery() |
| |
| |
| fig = Figure(figsize=(5, 5), dpi=100) |
| canvas = FigureCanvasAgg(fig) |
| ax = fig.add_subplot(1, 1, 1, projection=imagery.crs) |
|
|
| |
| extent_deg = 0.05 |
| ax.set_extent([lon - extent_deg, lon + extent_deg, lat - extent_deg, lat + extent_deg], crs=ccrs.PlateCarree()) |
|
|
| |
| ax.add_image(imagery, 14) |
|
|
| |
| ax.plot(lon, lat, marker='+', color='yellow', markersize=12, markeredgewidth=2, transform=ccrs.PlateCarree()) |
| |
| |
| box_size_m = 384 * 10 |
| |
| |
| |
| |
| dlat = (box_size_m / 111320) |
| dlon = (box_size_m / (111320 * math.cos(math.radians(lat)))) |
| |
| |
| rect_lon = lon - dlon / 2 |
| rect_lat = lat - dlat / 2 |
| |
| |
| rect = Rectangle((rect_lon, rect_lat), dlon, dlat, |
| linewidth=2, edgecolor='red', facecolor='none', transform=ccrs.PlateCarree()) |
| ax.add_patch(rect) |
|
|
| |
| title_parts = [] |
| if query: title_parts.append(f"{query}") |
| if rank is not None: title_parts.append(f"Rank {rank}") |
| if score is not None: title_parts.append(f"Score: {score:.4f}") |
| |
| ax.set_title("\n".join(title_parts), fontsize=10) |
| |
| |
| buf = BytesIO() |
| fig.savefig(buf, format='png', bbox_inches='tight') |
| buf.seek(0) |
| |
| return Image.open(buf) |
| |
| except Exception as e: |
| |
| error_msg = str(e) |
| if "Connection reset by peer" in error_msg or "Network is unreachable" in error_msg or "urlopen error" in error_msg: |
| print(f"⚠️ Network warning: Could not fetch Esri satellite map for ({lat:.4f}, {lon:.4f}). Server might be offline.") |
| else: |
| print(f"Error generating Esri image for {lat}, {lon}: {e}") |
| |
| |
| |
| |
| |
| return get_placeholder_image(f"Map Unavailable\n({lat:.2f}, {lon:.2f})") |
|
|
| def get_esri_satellite_image_url(lat, lon, zoom=14): |
| """ |
| Returns the URL for the Esri World Imagery tile at the given location. |
| """ |
| try: |
| imagery = EsriImagery() |
| |
| |
| |
| |
| pass |
| except: |
| pass |
| return None |
|
|