karamjotsingh commited on
Commit
093474c
·
verified ·
1 Parent(s): 8103ccd

Upload position_ids_debug.ipynb with huggingface_hub

Browse files
Files changed (1) hide show
  1. position_ids_debug.ipynb +69 -58
position_ids_debug.ipynb CHANGED
@@ -2,22 +2,22 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 3,
6
  "id": "6511a91c-ed20-41ff-befb-699bda1912a3",
7
  "metadata": {
8
  "execution": {
9
- "iopub.execute_input": "2026-03-25T05:56:03.439346Z",
10
- "iopub.status.busy": "2026-03-25T05:56:03.439077Z",
11
- "iopub.status.idle": "2026-03-25T05:56:12.533462Z",
12
- "shell.execute_reply": "2026-03-25T05:56:12.532731Z",
13
- "shell.execute_reply.started": "2026-03-25T05:56:03.439327Z"
14
  }
15
  },
16
  "outputs": [
17
  {
18
  "data": {
19
  "application/vnd.jupyter.widget-view+json": {
20
- "model_id": "4655d86aa4f24755a2a3a230298174ac",
21
  "version_major": 2,
22
  "version_minor": 0
23
  },
@@ -31,7 +31,7 @@
31
  {
32
  "data": {
33
  "application/vnd.jupyter.widget-view+json": {
34
- "model_id": "b7f679f3d8a64e01a015d59e7bc7f07e",
35
  "version_major": 2,
36
  "version_minor": 0
37
  },
@@ -45,7 +45,7 @@
45
  {
46
  "data": {
47
  "application/vnd.jupyter.widget-view+json": {
48
- "model_id": "1c0c55230de542548486cd47b87fa2a8",
49
  "version_major": 2,
50
  "version_minor": 0
51
  },
@@ -172,33 +172,15 @@
172
  },
173
  {
174
  "cell_type": "code",
175
- "execution_count": 5,
176
  "id": "504fa71b-42b4-4f53-8988-25fcfba38d13",
177
  "metadata": {
178
  "execution": {
179
- "iopub.execute_input": "2026-03-25T05:56:12.538139Z",
180
- "iopub.status.busy": "2026-03-25T05:56:12.537990Z",
181
- "iopub.status.idle": "2026-03-25T05:56:12.549445Z",
182
- "shell.execute_reply": "2026-03-25T05:56:12.548820Z",
183
- "shell.execute_reply.started": "2026-03-25T05:56:12.538125Z"
184
- }
185
- },
186
- "outputs": [],
187
- "source": [
188
- "cos = torch.load('cos.pt')"
189
- ]
190
- },
191
- {
192
- "cell_type": "code",
193
- "execution_count": 6,
194
- "id": "642d9dcf-e591-4d70-96af-b69bf955d9e1",
195
- "metadata": {
196
- "execution": {
197
- "iopub.execute_input": "2026-03-25T05:56:12.549936Z",
198
- "iopub.status.busy": "2026-03-25T05:56:12.549800Z",
199
- "iopub.status.idle": "2026-03-25T05:56:12.558625Z",
200
- "shell.execute_reply": "2026-03-25T05:56:12.557969Z",
201
- "shell.execute_reply.started": "2026-03-25T05:56:12.549924Z"
202
  }
203
  },
204
  "outputs": [
@@ -208,52 +190,81 @@
208
  "(torch.Size([1, 1, 94, 128]), torch.float16)"
209
  ]
210
  },
211
- "execution_count": 6,
212
  "metadata": {},
213
  "output_type": "execute_result"
214
  }
215
  ],
216
  "source": [
 
217
  "cos.shape, cos.dtype"
218
  ]
219
  },
220
  {
221
  "cell_type": "code",
222
- "execution_count": 14,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  "id": "f44460e3-58e9-4fd2-898a-06e8a00f9365",
224
  "metadata": {
225
  "execution": {
226
- "iopub.execute_input": "2026-03-25T06:04:09.103843Z",
227
- "iopub.status.busy": "2026-03-25T06:04:09.103513Z",
228
- "iopub.status.idle": "2026-03-25T06:04:09.110953Z",
229
- "shell.execute_reply": "2026-03-25T06:04:09.110385Z",
230
- "shell.execute_reply.started": "2026-03-25T06:04:09.103826Z"
231
  }
232
  },
233
  "outputs": [
234
  {
235
  "data": {
236
  "text/plain": [
237
- "tensor([[[0.5405, 0.6924, 0.7964, 0.8662, 0.9126, 0.9429, 0.9629, 0.9756,\n",
238
- " 0.9844, 0.9897, 0.9932, 0.9956, 0.9971, 0.9980, 0.9990, 0.9990,\n",
239
- " 0.9995, 0.9995, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
240
- " 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
241
- " 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
242
- " 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
243
- " 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
244
- " 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
245
- " 0.5405, 0.6924, 0.7964, 0.8662, 0.9126, 0.9429, 0.9629, 0.9756,\n",
246
- " 0.9844, 0.9897, 0.9932, 0.9956, 0.9971, 0.9980, 0.9990, 0.9990,\n",
247
- " 0.9995, 0.9995, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
248
- " 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
249
- " 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
250
- " 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
251
- " 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
252
- " 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000]]],\n",
253
- " device='cuda:0', dtype=torch.float16)"
 
 
 
 
 
 
 
 
 
 
254
  ]
255
  },
256
- "execution_count": 14,
257
  "metadata": {},
258
  "output_type": "execute_result"
259
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "id": "6511a91c-ed20-41ff-befb-699bda1912a3",
7
  "metadata": {
8
  "execution": {
9
+ "iopub.execute_input": "2026-03-25T06:38:11.914633Z",
10
+ "iopub.status.busy": "2026-03-25T06:38:11.914498Z",
11
+ "iopub.status.idle": "2026-03-25T06:38:26.481581Z",
12
+ "shell.execute_reply": "2026-03-25T06:38:26.480877Z",
13
+ "shell.execute_reply.started": "2026-03-25T06:38:11.914618Z"
14
  }
15
  },
16
  "outputs": [
17
  {
18
  "data": {
19
  "application/vnd.jupyter.widget-view+json": {
20
+ "model_id": "a70b6714abe946bfbd7f496bb0913fc8",
21
  "version_major": 2,
22
  "version_minor": 0
23
  },
 
31
  {
32
  "data": {
33
  "application/vnd.jupyter.widget-view+json": {
34
+ "model_id": "f9fe923a4e3a4de6bb7e948de7995ea9",
35
  "version_major": 2,
36
  "version_minor": 0
37
  },
 
45
  {
46
  "data": {
47
  "application/vnd.jupyter.widget-view+json": {
48
+ "model_id": "dde6f6e9831547b4a89e36084b117b56",
49
  "version_major": 2,
50
  "version_minor": 0
51
  },
 
172
  },
173
  {
174
  "cell_type": "code",
175
+ "execution_count": 2,
176
  "id": "504fa71b-42b4-4f53-8988-25fcfba38d13",
177
  "metadata": {
178
  "execution": {
179
+ "iopub.execute_input": "2026-03-25T06:38:26.483841Z",
180
+ "iopub.status.busy": "2026-03-25T06:38:26.483701Z",
181
+ "iopub.status.idle": "2026-03-25T06:38:26.488982Z",
182
+ "shell.execute_reply": "2026-03-25T06:38:26.488521Z",
183
+ "shell.execute_reply.started": "2026-03-25T06:38:26.483826Z"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  }
185
  },
186
  "outputs": [
 
190
  "(torch.Size([1, 1, 94, 128]), torch.float16)"
191
  ]
192
  },
193
+ "execution_count": 2,
194
  "metadata": {},
195
  "output_type": "execute_result"
196
  }
197
  ],
198
  "source": [
199
+ "cos = torch.load('cos.pt')\n",
200
  "cos.shape, cos.dtype"
201
  ]
202
  },
203
  {
204
  "cell_type": "code",
205
+ "execution_count": 10,
206
+ "id": "a874f8d4-efa9-4ba8-9e57-c019da0775bd",
207
+ "metadata": {
208
+ "execution": {
209
+ "iopub.execute_input": "2026-03-25T06:43:28.437717Z",
210
+ "iopub.status.busy": "2026-03-25T06:43:28.437502Z",
211
+ "iopub.status.idle": "2026-03-25T06:43:28.440474Z",
212
+ "shell.execute_reply": "2026-03-25T06:43:28.439711Z",
213
+ "shell.execute_reply.started": "2026-03-25T06:43:28.437702Z"
214
+ }
215
+ },
216
+ "outputs": [],
217
+ "source": [
218
+ "torch.set_printoptions(precision=10)"
219
+ ]
220
+ },
221
+ {
222
+ "cell_type": "code",
223
+ "execution_count": 12,
224
  "id": "f44460e3-58e9-4fd2-898a-06e8a00f9365",
225
  "metadata": {
226
  "execution": {
227
+ "iopub.execute_input": "2026-03-25T06:43:37.079158Z",
228
+ "iopub.status.busy": "2026-03-25T06:43:37.078953Z",
229
+ "iopub.status.idle": "2026-03-25T06:43:37.086609Z",
230
+ "shell.execute_reply": "2026-03-25T06:43:37.086029Z",
231
+ "shell.execute_reply.started": "2026-03-25T06:43:37.079144Z"
232
  }
233
  },
234
  "outputs": [
235
  {
236
  "data": {
237
  "text/plain": [
238
+ "tensor([[[0.5405273438, 0.6923828125, 0.7963867188, 0.8662109375, 0.9125976562,\n",
239
+ " 0.9428710938, 0.9628906250, 0.9755859375, 0.9843750000, 0.9897460938,\n",
240
+ " 0.9931640625, 0.9956054688, 0.9970703125, 0.9980468750, 0.9990234375,\n",
241
+ " 0.9990234375, 0.9995117188, 0.9995117188, 1.0000000000, 1.0000000000,\n",
242
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
243
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
244
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
245
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
246
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
247
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
248
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
249
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
250
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 0.5405273438,\n",
251
+ " 0.6923828125, 0.7963867188, 0.8662109375, 0.9125976562, 0.9428710938,\n",
252
+ " 0.9628906250, 0.9755859375, 0.9843750000, 0.9897460938, 0.9931640625,\n",
253
+ " 0.9956054688, 0.9970703125, 0.9980468750, 0.9990234375, 0.9990234375,\n",
254
+ " 0.9995117188, 0.9995117188, 1.0000000000, 1.0000000000, 1.0000000000,\n",
255
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
256
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
257
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
258
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
259
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
260
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
261
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
262
+ " 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000,\n",
263
+ " 1.0000000000, 1.0000000000, 1.0000000000]]], device='cuda:0',\n",
264
+ " dtype=torch.float16)"
265
  ]
266
  },
267
+ "execution_count": 12,
268
  "metadata": {},
269
  "output_type": "execute_result"
270
  }