| from ..Node import ChunkNode |
| from typing import List, Dict |
|
|
| def dict_to_chunknode(d: dict) -> ChunkNode: |
| """ |
| Converts a dictionary to a ChunkNode instance. |
| """ |
| return ChunkNode(**d) |
|
|
| def extract_filename_from_chunk(chunk:ChunkNode) -> str: |
| """ |
| Extracts the file name from a chunk. |
| |
| Args: |
| chunk (str): The chunk from which to extract the file name. |
| |
| Returns: |
| str: The extracted file name. |
| """ |
| if isinstance(chunk, dict): |
| chunk = dict_to_chunknode(chunk) |
| return '_'.join(chunk.id.split('_')[:-1]) |
| |
|
|
| def order_chunks_by_order_in_file(chunks:List[ChunkNode]) -> list: |
| """ |
| Orders a list of chunks by their order in the file. |
| |
| Args: |
| chunks (list): The list of chunks to order. |
| |
| Returns: |
| list: The ordered list of chunks. |
| """ |
| |
| chunks = [dict_to_chunknode(c) if isinstance(c, dict) else c for c in chunks] |
| return sorted(chunks, key=lambda x: int(x.order_in_file)) |
|
|
| def organize_chunks_by_file_name(chunks: List[ChunkNode]) -> Dict[str, List[ChunkNode]]: |
| """ |
| Organizes a list of chunks by their file names. |
| |
| Args: |
| chunks (list): The list of chunks to organize. |
| |
| Returns: |
| dict: A dictionary mapping file names to lists of chunks. |
| """ |
| |
| chunks = [dict_to_chunknode(c) if isinstance(c, dict) else c for c in chunks] |
| organized_chunks = {} |
| for chunk in chunks: |
| file_name = extract_filename_from_chunk(chunk) |
| if file_name not in organized_chunks: |
| organized_chunks[file_name] = [] |
| organized_chunks[file_name].append(chunk) |
| for file_name in organized_chunks: |
| organized_chunks[file_name] = order_chunks_by_order_in_file(organized_chunks[file_name]) |
| return organized_chunks |
|
|
| def join_organized_chunks(organized_chunks: Dict[str, List[ChunkNode]]) -> str: |
| """ |
| Joins organized chunks into a single string. |
| |
| Args: |
| organized_chunks (dict): The dictionary of organized chunks. |
| |
| Returns: |
| str: The joined string of organized chunks. |
| """ |
| joined_chunks_list = [] |
| separator = "=" * 48 |
| for filename in organized_chunks: |
| joined_chunks_list.append(separator) |
| joined_chunks_list.append(f"File: {filename}") |
| joined_chunks_list.append(separator) |
| |
| chunks = [dict_to_chunknode(c) if isinstance(c, dict) else c for c in organized_chunks[filename]] |
| if len(chunks) == 0: |
| continue |
| if int(chunks[0].order_in_file) > 0: |
| joined_chunks_list.append("\n[...]") |
| for i, chunk in enumerate(chunks): |
| joined_chunks_list.append(chunk.content) |
| if i < len(chunks) - 1: |
| if int(chunks[i+1].order_in_file) - int(chunk.order_in_file) > 1: |
| joined_chunks_list.append("\n[...]") |
| return "\n".join(joined_chunks_list) |
|
|