Spaces:

FINAL-Bench
/

LiteRT-LM

Running

App Files Files Community

LiteRT-LM / python /litert_lm_cli /main.py

SeaWolf-AI

Upload full LiteRT-LM codebase

5f923cd verified 3 days ago

raw

history blame contribute delete

16.2 kB

	# Copyright 2026 The ODML Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Main script for litert-lm binary."""

	import datetime
	import os
	import shutil
	import subprocess
	import sys

	import click

	import litert_lm
	from litert_lm_cli import help_formatter
	from litert_lm_cli import model
	from litert_lm_cli import venv_manager
	from litert_lm_cli import version


	@click.group(
	cls=help_formatter.ColorGroup,
	name="litert-lm",
	context_settings=dict(
	show_default=True,
	max_content_width=120,
	help_option_names=["-h", "--help"],
	),
	)
	@click.version_option(version=version.VERSION)
	def cli():
	"""CLI tool for LiteRT-LM models."""


	@cli.command(name="list")
	def list_models():
	"""Lists all imported LiteRT-LM models."""
	base_dir = model.get_converted_models_base_dir()
	click.echo(f"Listing models in: {base_dir}")

	models = sorted(model.Model.get_all_models(), key=lambda m: m.model_id)

	# Calculate dynamic width for ID column
	id_width = max([len(m.model_id) for m in models] + [len("ID"), 25]) + 2

	click.echo(
	click.style(f"{'ID':<{id_width}} {'SIZE':<15} {'MODIFIED'}", bold=True)
	)

	for model_item in models:
	path = model_item.model_path
	try:
	stat = os.stat(path)
	size_bytes = stat.st_size
	if size_bytes >= 1024 * 1024 * 1024:
	size_str = f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
	else:
	size_str = f"{size_bytes / (1024 * 1024):.1f} MB"
	modified_date = datetime.datetime.fromtimestamp(stat.st_mtime).strftime(
	"%Y-%m-%d %H:%M:%S"
	)
	except FileNotFoundError:
	size_str = "Unknown"
	modified_date = "Unknown"

	click.echo(
	f"{model_item.model_id:<{id_width}} {size_str:<15} {modified_date}"
	)


	def _download_from_huggingface(repo_id, filename, token):
	"""Downloads a file from HuggingFace Hub.

	Args:
	repo_id: The HuggingFace repository ID.
	filename: The filename to download.
	token: The HuggingFace API token.

	Returns:
	The local path to the downloaded file, or None if download failed.
	"""
	try:
	# pylint: disable=g-import-not-at-top
	from huggingface_hub import get_token
	from huggingface_hub import hf_hub_download
	except ImportError:
	click.echo(
	click.style(
	"Error: huggingface_hub is not installed. Please install it to"
	" download from HuggingFace.",
	fg="red",
	)
	)
	return None

	effective_token = token or get_token()

	click.echo(f"Downloading {filename} from {repo_id}...")
	try:
	return hf_hub_download(
	repo_id=repo_id,
	filename=filename,
	token=effective_token,
	)
	except Exception as e: # pylint: disable=broad-exception-caught
	click.echo(
	click.style(f"Error downloading from HuggingFace: {e}", fg="red")
	)
	if not effective_token:
	click.echo(
	click.style(
	"HuggingFace token not found. If this is a private or gated"
	" repository, you can provide the token via the"
	" --huggingface-token option, setting the"
	" HUGGING_FACE_HUB_TOKEN environment variable, or by running"
	" 'hf auth login'.",
	fg="yellow",
	)
	)
	return None


	def huggingface_options(f):
	"""Decorator for HuggingFace-related options."""
	f = click.option(
	"--huggingface-token",
	default=None,
	help=(
	"The HuggingFace API token to use when downloading from a access"
	" gated HuggingFace repository. This can also be set via the"
	" HUGGING_FACE_HUB_TOKEN or HF_TOKEN environment variables, or by"
	" running `hf auth login`."
	),
	)(f)
	f = click.option(
	"--from-huggingface-repo",
	default=None,
	help="The HuggingFace repository ID to download the model from, if set.",
	)(f)
	return f


	@cli.command(
	name="import",
	help="""Imports a model from a local path or HuggingFace hub.

	MODEL_FILE: The local path to the model file, or the path in the HuggingFace
	repo if --from-huggingface-repo is set.
	MODEL_REF: The ID to store the model as. Defaults to the filename of
	MODEL_FILE.

	\b
	Examples:
	# Import from a local path
	litert-lm import ./model.litertlm my-model

	# Import from a HuggingFace repository
	litert-lm import --from-huggingface-repo org/repo model.litertlm my-model

	# Import and use the default model ID
	litert-lm import ./model.litertlm""",
	)
	@huggingface_options
	@click.argument("model_file")
	@click.argument("model_ref", required=False)
	def import_model(
	from_huggingface_repo, huggingface_token, model_file, model_ref
	):
	"""Imports a model from a local path or HuggingFace hub.

	Args:
	from_huggingface_repo: The HuggingFace repository ID.
	huggingface_token: HuggingFace API token.
	model_file: The path in the repo (if from-huggingface-repo is set) or local
	path.
	model_ref: The reference ID to store the model as. Defaults to the filename
	of MODEL_FILE.
	"""
	effective_model_ref = model_ref or os.path.basename(model_file)

	if from_huggingface_repo:
	source = _download_from_huggingface(
	from_huggingface_repo, model_file, huggingface_token
	)
	if not source:
	return
	else:
	source = model_file
	if not os.path.exists(source):
	click.echo(click.style(f"Source file not found: {source}", fg="red"))
	return

	model_obj = model.Model.from_model_id(effective_model_ref)
	model_path = model_obj.model_path
	model_dir = os.path.dirname(model_path)

	os.makedirs(model_dir, exist_ok=True)

	shutil.copy(source, model_path)
	click.echo(
	click.style(f"Successfully imported model to {model_path}", fg="green")
	)
	click.echo(
	click.style(
	"You can now run the model with 'litert-lm run"
	f" {effective_model_ref}'",
	fg="green",
	)
	)


	@cli.command(help="Deletes a model from the local storage.")
	@click.argument("model_id")
	def delete(model_id):
	"""Deletes a model from the local storage.

	Args:
	model_id: The ID of the model to delete.
	"""
	model_obj = model.Model.from_model_id(model_id)
	model_dir = os.path.dirname(model_obj.model_path)
	if os.path.exists(model_dir) and model_dir.startswith(
	model.get_converted_models_base_dir()
	):
	shutil.rmtree(model_dir)
	click.echo(click.style(f"Deleted model: {model_id}", fg="green"))
	else:
	click.echo(click.style(f"Model not found: {model_id}", fg="red"))


	@cli.command(help="Renames a model.")
	@click.argument("old_model_id")
	@click.argument("new_model_id")
	def rename(old_model_id, new_model_id):
	"""Renames a model.

	Args:
	old_model_id: The current model ID.
	new_model_id: The new model ID.
	"""
	old_model = model.Model.from_model_id(old_model_id)
	if not old_model.exists():
	click.echo(click.style(f"Model not found: {old_model_id}", fg="red"))
	return

	new_model = model.Model.from_model_id(new_model_id)
	if new_model.exists():
	click.echo(
	click.style(f"Target model ID already exists: {new_model_id}", fg="red")
	)
	return

	old_dir = os.path.dirname(old_model.model_path)
	new_dir = os.path.dirname(new_model.model_path)

	os.makedirs(os.path.dirname(new_dir), exist_ok=True)
	shutil.move(old_dir, new_dir)
	click.echo(
	click.style(
	f'Renamed model "{old_model_id}" to "{new_model_id}"', fg="green"
	)
	)


	def parse_speculative_decoding(unused_ctx, unused_param, value):
	"""Click callback to parse speculative decoding mode strings into bool \| None.

	Args:
	unused_ctx: The click context.
	unused_param: The click parameter.
	value: The value to parse ("auto", "true", or "false").

	Returns:
	True for "true", False for "false", and None for "auto".
	"""
	if value is None:
	return None
	value_lower = value.lower()
	if value_lower == "auto":
	return None
	elif value_lower == "true":
	return True
	elif value_lower == "false":
	return False
	return value


	def common_inference_options(f):
	"""Decorator for common options shared across commands."""
	f = huggingface_options(f)
	f = click.option(
	"--verbose",
	is_flag=True,
	default=False,
	help="Whether to enable verbose logging.",
	)(f)
	f = click.option(
	"--enable-speculative-decoding",
	type=click.Choice(["auto", "true", "false"], case_sensitive=False),
	default="auto",
	callback=parse_speculative_decoding,
	help="""\b
	Speculative decoding mode ("auto", "true", "false").
	- auto: Automatically determine the speculative decoding behavior from the model metadata.
	- true: Force enable speculative decoding. It will throw an error if the model does not support it.
	- false: Force disable speculative decoding.
	""",
	)(f)
	f = click.option(
	"-b",
	"--backend",
	type=click.Choice(["cpu", "gpu"], case_sensitive=False),
	default="cpu",
	help="The backend to use.",
	)(f)
	return f


	@cli.command(
	help="""Benchmarks a LiteRT-LM model.

	\b
	Examples:
	# Benchmark using a model ID from 'litert-lm list'
	litert-lm benchmark my-model

	# Benchmark using a local path
	litert-lm benchmark ./model.litertlm

	# Benchmark directly from a HuggingFace repository
	litert-lm benchmark --from-huggingface-repo org/repo model.litertlm""",
	)
	@click.argument("model_reference")
	@click.option(
	"-p",
	"--prefill_tokens",
	default=256,
	type=int,
	help="The number of tokens to prefill.",
	)
	@click.option(
	"-d",
	"--decode_tokens",
	default=256,
	type=int,
	help="The number of tokens to decode.",
	)
	@common_inference_options
	def benchmark(
	model_reference: str,
	prefill_tokens: int = 256,
	decode_tokens: int = 256,
	backend: str = "cpu",
	android: bool = False,
	enable_speculative_decoding: bool \| None = None,
	verbose: bool = False,
	from_huggingface_repo: str \| None = None,
	huggingface_token: str \| None = None,
	):
	"""Benchmarks a LiteRT-LM model.

	Args:
	model_reference: A relative or absolute path to a .litertlm model file, or a
	model ID from `litert-lm list`. If from-huggingface-repo is set, this is
	the filename in the repository.
	prefill_tokens: The number of tokens to prefill.
	decode_tokens: The number of tokens to decode.
	backend: The backend to use (cpu or gpu).
	android: Run on Android via ADB.
	enable_speculative_decoding: Speculative decoding mode (True, False, or None
	for auto).
	verbose: Whether to enable verbose logging.
	from_huggingface_repo: The HuggingFace repository ID.
	huggingface_token: The HuggingFace API token.
	"""
	if verbose:
	litert_lm.set_min_log_severity(litert_lm.LogSeverity.VERBOSE)

	if from_huggingface_repo:
	model_path = _download_from_huggingface(
	from_huggingface_repo, model_reference, huggingface_token
	)
	if not model_path:
	return
	model_obj = model.Model.from_model_path(model_path)
	else:
	model_obj = model.Model.from_model_reference(model_reference)

	model_obj.benchmark(
	prefill_tokens=prefill_tokens,
	decode_tokens=decode_tokens,
	is_android=android,
	backend=backend,
	enable_speculative_decoding=enable_speculative_decoding,
	)


	@cli.command(
	help="""Runs a LiteRT-LM model interactively or with a single prompt.

	\b
	Examples:
	# Run interactively using a model ID from 'litert-lm list'
	litert-lm run my-model

	# Run with a single prompt using a local path
	litert-lm run ./model.litertlm --prompt "Hi there!"

	# Run directly from a HuggingFace repository
	litert-lm run --from-huggingface-repo org/repo model.litertlm""",
	)
	@click.argument("model_reference")
	@click.option(
	"--prompt", default=None, help="A single prompt to run once and exit."
	)
	@click.option(
	"--preset",
	type=click.Path(exists=True, dir_okay=False),
	default=None,
	help=(
	"Path to a Python file containing tool functions and system"
	" instructions."
	),
	)
	@click.option(
	"--no-template",
	is_flag=True,
	default=False,
	help=(
	"Interact with the model directly without applying prompt templates."
	" That means the input should include all control tokens for the model"
	" expected."
	),
	)
	@common_inference_options
	def run(
	model_reference,
	prompt=None,
	preset=None,
	backend="cpu",
	android=False,
	enable_speculative_decoding=None,
	verbose=False,
	no_template=False,
	from_huggingface_repo=None,
	huggingface_token=None,
	):
	r"""Runs a LiteRT-LM model interactively or with a single prompt.

	Args:
	model_reference: A relative or absolute path to a .litertlm model file, or a
	model ID from `litert-lm list`. If from-huggingface-repo is set, this is
	the filename in the repository.
	prompt: A single prompt to run once and exit.
	preset: Path to a Python file containing tool functions and system
	instructions.
	backend: The backend to use (cpu or gpu).
	android: Run on Android via ADB.
	enable_speculative_decoding: Speculative decoding mode (True, False, or None
	for auto).
	verbose: Whether to enable verbose logging.
	no_template: Interact with the model directly without applying prompt
	templates or stripping stop tokens.
	from_huggingface_repo: The HuggingFace repository ID.
	huggingface_token: The HuggingFace API token.
	"""
	# If the stdin is not connected to the terminal, e.g., piped or redirected
	# input, then handle the input as the one-shot prompt.
	#
	# # Redirected input:
	# $ litert-lm run < prompt.txt
	# $ litert-lm run --prompt="Explain this error log" < error.log
	#
	# # Piped input:
	# $ cat text.txt \| litert-lm run --prompt="Summarize the content."
	if not sys.stdin.isatty():
	piped_input = sys.stdin.read().strip()
	if piped_input:
	prompt = f"{prompt}\n\n{piped_input}" if prompt else piped_input
	elif not prompt:
	# If no prompt is provided and it's not a TTY, we can't be interactive.
	return

	if verbose:
	litert_lm.set_min_log_severity(litert_lm.LogSeverity.VERBOSE)

	if from_huggingface_repo:
	model_path = _download_from_huggingface(
	from_huggingface_repo, model_reference, huggingface_token
	)
	if not model_path:
	return
	model_obj = model.Model.from_model_path(model_path)
	else:
	model_obj = model.Model.from_model_reference(model_reference)
	if not model_obj.exists():
	# Only auto-convert if it looks like a HuggingFace repo ID (account/repo)
	# and is not a local path.
	parts = model_reference.split("/")
	if len(parts) == 2 and all(parts) and not os.path.exists(model_reference):
	click.echo(
	click.style(
	f"Model '{model_reference}' not found. Attempting to convert"
	f" from https://huggingface.co/{model_reference} ...",
	fg="yellow",
	)
	)
	convert.callback(source=model_reference)
	model_obj = model.Model.from_model_reference(model_reference)

	if not model_obj.exists():
	click.echo(
	click.style(
	f"Failed to find or convert model '{model_reference}'.",
	fg="red",
	)
	)
	return

	model_obj.run_interactive(
	prompt=prompt,
	is_android=android,
	backend=backend,
	preset=preset,
	enable_speculative_decoding=enable_speculative_decoding,
	no_template=no_template,
	)


	def main():
	litert_lm.set_min_log_severity(litert_lm.LogSeverity.ERROR)
	cli()


	if __name__ == "__main__":
	main()