File size: 4,764 Bytes
5f923cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// Copyright 2026 The ODML Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "runtime/executor/common_utils.h"

#include <cstdint>
#include <cstring>

#include "absl/status/status.h"  // from @com_google_absl
#include "absl/types/span.h"  // from @com_google_absl
#include "litert/cc/litert_macros.h"  // from @litert
#include "litert/cc/litert_tensor_buffer.h"  // from @litert
#include "runtime/util/status_macros.h"

namespace litert::lm {

absl::Status ExpandBuffer(const uint8_t* src_data,
                          absl::Span<const int> src_shape, uint8_t* dst_data,
                          absl::Span<const int> dst_shape,
                          size_t element_size) {
  RET_CHECK_EQ(src_shape.size(), dst_shape.size());
  int expansion_axis = -1;
  for (int i = 0; i < src_shape.size(); ++i) {
    if (src_shape[i] != dst_shape[i]) {
      if (expansion_axis != -1) {
        return absl::InvalidArgumentError(
            "Tensors differ in more than one dimension.");
      }
      if (dst_shape[i] < src_shape[i]) {
        return absl::InvalidArgumentError(
            "Destination tensor dimension is smaller than source along an "
            "axis.");
      }
      expansion_axis = i;
    }
  }
  if (expansion_axis == -1) {
    return absl::InvalidArgumentError("No expansion axis found.");
  }

  int64_t dest_total_elements = 1;
  for (int dim : dst_shape) {
    dest_total_elements *= dim;
  }
  memset(dst_data, 0, dest_total_elements * element_size);

  int64_t inner_block_size_in_elements = 1;
  for (int i = expansion_axis + 1; i < src_shape.size(); ++i) {
    inner_block_size_in_elements *= src_shape[i];
  }
  const size_t inner_block_size_in_bytes =
      inner_block_size_in_elements * element_size;

  int64_t outer_block_count = 1;
  for (int i = 0; i < expansion_axis; ++i) {
    outer_block_count *= src_shape[i];
  }

  int64_t src_outer_block_stride_in_elements =
      src_shape[expansion_axis] * inner_block_size_in_elements;
  int64_t dest_outer_block_stride_in_elements =
      dst_shape[expansion_axis] * inner_block_size_in_elements;

  for (int64_t i = 0; i < outer_block_count; ++i) {
    // Calculate the starting pointer for this outer block
    const uint8_t* src_outer_block_start =
        src_data + i * src_outer_block_stride_in_elements * element_size;
    uint8_t* dest_outer_block_start =
        dst_data + i * dest_outer_block_stride_in_elements * element_size;

    // Copy each inner block from source to destination
    for (int j = 0; j < src_shape[expansion_axis]; ++j) {
      const uint8_t* src_inner_block =
          src_outer_block_start + j * inner_block_size_in_bytes;
      uint8_t* dest_inner_block =
          dest_outer_block_start + j * inner_block_size_in_bytes;
      memcpy(dest_inner_block, src_inner_block, inner_block_size_in_bytes);
    }
  }

  return absl::OkStatus();
}

absl::Status CopyBuffer(const TensorBuffer& src_buffer,
                        TensorBuffer& dst_buffer, size_t src_offset,
                        size_t dst_offset, int64_t size) {
  LITERT_ASSIGN_OR_RETURN(auto src_buffer_size, src_buffer.PackedSize());
  LITERT_ASSIGN_OR_RETURN(auto dst_buffer_size, dst_buffer.PackedSize());
  if (size == -1) {
    size = src_buffer_size - src_offset;
  }
  LITERT_RETURN_IF_ERROR(src_offset + size <= src_buffer_size);
  LITERT_RETURN_IF_ERROR(dst_offset + size <= dst_buffer_size);

  // TODO: b/452977992: For GPU, we could use a shader to copy the buffer. If we
  // were to do it this way for GPU, then it might make more sense just to keep
  // the copy on the host. Also for GPU, consider optionally keeping its buffer
  // copies in CPU memory to save on GPU memory.
  LITERT_ASSIGN_OR_RETURN(auto src_read_lock,
                          TensorBufferScopedLock::Create(
                              src_buffer, TensorBuffer::LockMode::kRead));
  LITERT_ASSIGN_OR_RETURN(auto dst_write_lock,
                          TensorBufferScopedLock::Create(
                              dst_buffer, TensorBuffer::LockMode::kWrite));

  memcpy(static_cast<char*>(dst_write_lock.second) + dst_offset,
         static_cast<const char*>(src_read_lock.second) + src_offset, size);
  return absl::OkStatus();
}

}  // namespace litert::lm