TopAI-1
/

Pixel-1

topai_image_gen

feature-extraction

Model card Files Files and versions

Pixel-1 / modeling_pixel.py

Raziel1234's picture

Update modeling_pixel.py

8c7686d verified about 1 month ago

history blame contribute delete

2.44 kB

	import torch
	import torch.nn as nn
	from transformers import PreTrainedModel
	from .configuration_pixel import TopAIImageConfig

	class ResidualBlock(nn.Module):
	def __init__(self, channels):
	super().__init__()
	self.block = nn.Sequential(
	nn.Conv2d(channels, channels, kernel_size=3, padding=1, bias=False),
	nn.BatchNorm2d(channels),
	nn.ReLU(True),
	nn.Conv2d(channels, channels, kernel_size=3, padding=1, bias=False),
	nn.BatchNorm2d(channels)
	)

	def forward(self, x):
	return x + self.block(x)

	class TopAIImageGenerator(PreTrainedModel):
	config_class = TopAIImageConfig

	# תיקון ה-AttributeError: חייב להיות מילון (dict) כדי שתהיה לו מתודה .keys()
	all_tied_weights_keys = {}

	def __init__(self, config):
	super().__init__(config)
	# שימוש ב-hidden_dim מהקונפיג (512)
	h = config.hidden_dim

	self.text_projection = nn.Linear(config.input_dim, 4 * 4 * h)

	# בנייה דינמית שמתאימה בדיוק למשקולות ב-Safetensors
	self.decoder = nn.Sequential(
	# שכבה 0: מ-512 ל-512 (כאן היה ה-Mismatch)
	self._upsample(h, h),
	# שכבה 1
	ResidualBlock(h),
	# שכבה 2: מ-512 ל-256
	self._upsample(h, 256),
	# שכבה 3
	ResidualBlock(256),
	# שכבה 4: מ-256 ל-128
	self._upsample(256, 128),
	# שכבה 5: מ-128 ל-64
	self._upsample(128, 64),
	# שכבה 6: המעבר הסופי ל-32 פילטרים
	nn.ConvTranspose2d(64, 32, 4, 2, 1, bias=False),
	nn.BatchNorm2d(32),
	nn.ReLU(True),
	# שכבה 9: המרה לערוצי תמונה (RGB)
	nn.Conv2d(32, config.image_channels, kernel_size=3, padding=1),
	nn.Tanh()
	)

	def _upsample(self, i, o):
	return nn.Sequential(
	nn.ConvTranspose2d(i, o, 4, 2, 1, bias=False),
	nn.BatchNorm2d(o),
	nn.ReLU(True)
	)

	def forward(self, text_embeddings):
	# שינוי צורה למפת מאפיינים ראשונית
	x = self.text_projection(text_embeddings)
	x = x.view(-1, self.config.hidden_dim, 4, 4)
	return self.decoder(x)