[ { "additions": 4, "author": "cjkindel", "author_association": "FIRST_TIME_CONTRIBUTOR", "body_excerpt": "# What does this PR do? `_can_set_attn_implementation` and `_can_set_experts_implementation` both do a direct subscript lookup into `sys.modules`: ```python class_module = sys.modules[cls.__module__] ``` If the module is not registered und\u2026", "changed_files": 1, "cluster_id": "cluster-44815-10", "cluster_ids": [ "cluster-44815-10" ], "cluster_role": "member", "comments_count": 0, "conversation_url": "https://github.com/huggingface/transformers/pull/44978", "created_at": "2026-03-24T21:01:11Z", "deletions": 4, "draft": false, "files_url": "https://github.com/huggingface/transformers/pull/44978/files", "html_url": "https://github.com/huggingface/transformers/pull/44978", "labels": [], "merged": false, "number": 44978, "review_comments_count": 0, "state": "open", "title": "fix: handle absent sys.modules entry in modeling_utils", "updated_at": "2026-03-24T21:12:19Z" }, { "additions": 2, "author": "hmellor", "author_association": "MEMBER", "body_excerpt": "- Adds a type hint to `ModernVBertForMaskedLM.__init__` - Removes `tie_word_embeddings` from `Qwen2VLTextConfig` (and therefore also `Qwen2_5_VLTextConfig`) because it's not valid for these models - Remove hack from `ColQwen2Config` (and t\u2026", "changed_files": 6, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, "conversation_url": "https://github.com/huggingface/transformers/pull/44976", "created_at": "2026-03-24T19:26:33Z", "deletions": 10, "draft": false, "files_url": "https://github.com/huggingface/transformers/pull/44976/files", "html_url": "https://github.com/huggingface/transformers/pull/44976", "labels": [], "merged": true, "number": 44976, "review_comments_count": 3, "state": "closed", "title": "Fix tie_word_embedding issues with `Qwen2VL`", "updated_at": "2026-03-24T20:55:15Z" }, { "additions": 6971, "author": "philippguevorguian", "author_association": "NONE", "body_excerpt": null, "changed_files": 20, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, "conversation_url": "https://github.com/huggingface/transformers/pull/44975", "created_at": "2026-03-24T17:12:31Z", "deletions": 2, "draft": false, "files_url": "https://github.com/huggingface/transformers/pull/44975/files", "html_url": "https://github.com/huggingface/transformers/pull/44975", "labels": [], "merged": false, "number": 44975, "review_comments_count": 0, "state": "closed", "title": "fix: rebase main; clean config reads, ImageProcessor backend, misc cleanup", "updated_at": "2026-03-24T17:13:42Z" }, { "additions": 799, "author": "3outeille", "author_association": "MEMBER", "body_excerpt": null, "changed_files": 6, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, "conversation_url": "https://github.com/huggingface/transformers/pull/44974", "created_at": "2026-03-24T16:13:25Z", "deletions": 82, "draft": false, "files_url": "https://github.com/huggingface/transformers/pull/44974/files", "html_url": "https://github.com/huggingface/transformers/pull/44974", "labels": [], "merged": false, "number": 44974, "review_comments_count": 0, "state": "open", "title": "Refactor core_model_loading to support FSDP shard-on-read loading", "updated_at": "2026-03-24T16:28:48Z" }, { "additions": 22, "author": "andylizf", "author_association": "FIRST_TIME_CONTRIBUTOR", "body_excerpt": "## What does this PR do? Adds `.item()` to `max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max()` in all vision attention modules that pass this value to `flash_attn_varlen_func`. ### Context On **released versions** (e.g. 4.52.4), using\u2026", "changed_files": 19, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 1, "conversation_url": "https://github.com/huggingface/transformers/pull/44973", "created_at": "2026-03-24T15:42:32Z", "deletions": 22, "draft": false, "files_url": "https://github.com/huggingface/transformers/pull/44973/files", "html_url": "https://github.com/huggingface/transformers/pull/44973", "labels": [], "merged": false, "number": 44973, "review_comments_count": 0, "state": "open", "title": "Fix max_seqlen type in vision attention for torch.compile + FA2", "updated_at": "2026-03-24T15:46:30Z" }, { "additions": 17, "author": "Abdennacer-Badaoui", "author_association": "MEMBER", "body_excerpt": "As per title. Updating Gemma3/Gemma3n expectations.", "changed_files": 3, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 2, "conversation_url": "https://github.com/huggingface/transformers/pull/44972", "created_at": "2026-03-24T15:11:50Z", "deletions": 12, "draft": false, "files_url": "https://github.com/huggingface/transformers/pull/44972/files", "html_url": "https://github.com/huggingface/transformers/pull/44972", "labels": [], "merged": true, "number": 44972, "review_comments_count": 10, "state": "closed", "title": "[AMD CI] Gemma3/Gemma3n Expectations", "updated_at": "2026-03-24T16:30:03Z" }, { "additions": 0, "author": "ArthurZucker", "author_association": "MEMBER", "body_excerpt": "# What does this PR do? Removed the tokenizer_class attr was never there to begin with, and kwargs are now supported. This was failing some test on vllm ci. Fixes https://buildkite.com/vllm/ci/builds/57601/steps/canvas?sid=019d1aec-aa5a-41\u2026", "changed_files": 4, "cluster_id": null, "cluster_ids": [], "cluster_role": null, "comments_count": 3, "conversation_url": "https://github.com/huggingface/transformers/pull/44971", "created_at": "2026-03-24T14:59:36Z", "deletions": 11, "draft": false, "files_url": "https://github.com/huggingface/transformers/pull/44971/files", "html_url": "https://github.com/huggingface/transformers/pull/44971", "labels": [], "merged": true, "number": 44971, "review_comments_count": 1, "state": "closed", "title": "[ `vllm x v5`] nit", "updated_at": "2026-03-24T17:40:05Z" }, { "additions": 20, "author": "IlyasMoutawwakil", "author_association": "MEMBER", "body_excerpt": "# What does this PR do?