Spaces:

lamhieu
/

lightweight-embeddings

Running

App Files Files Community

lamhieu commited on Jan 2

Commit

e8f9d10

1 Parent(s): fce0b1f

chore: initialize the project

Browse files

Files changed (13) hide show

.editorconfig +14 -0
.github/workflows/githubhfsync.yaml +26 -0
.gitignore +54 -0
.pylintrc +579 -0
.vscode/settings.json +6 -0
Dockerfile +34 -0
README.md +85 -0
app.py +1 -0
lightweight_embeddings/__init__.py +193 -0
lightweight_embeddings/router.py +296 -0
lightweight_embeddings/service.py +477 -0
pyproject.toml +14 -0
requirements.txt +9 -0

.editorconfig ADDED Viewed

	@@ -0,0 +1,14 @@

+# top-most EditorConfig file
+root = true
+# Unix-style newlines with a newline ending every file
+[*]
+end_of_line = lf
+insert_final_newline = true
+# Matches multiple files with brace expansion notation
+# Set default charset
+[*]
+charset = utf-8
+indent_style = space
+indent_size = 2

.github/workflows/githubhfsync.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+name: Sync Repository to HuggingFace Space
+on:
+  push:
+    branches: [main]
+  workflow_dispatch:  # Enable manual trigger
+jobs:
+  sync-to-huggingface:
+    name: Sync code to HuggingFace Space
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0  # Fetch all history for all branches and tags
+          lfs: true      # Enable Git LFS support
+      - name: Push to HuggingFace Space
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          if ! git push https://lamhieu:$HF_TOKEN@huggingface.co/spaces/lamhieu/lightweight-embeddings main; then
+            echo "Failed to sync with HuggingFace Space"
+            exit 1
+          fi

.gitignore ADDED Viewed

	@@ -0,0 +1,54 @@

+# Python bytecode files
+# Ignore Python bytecode files
+*.pyc
+# Distribution packages
+# Ignore distribution packages
+/dist/*
+# Test and coverage reports
+# Ignore coverage and test result files
+.coverage
+.pytest_cache
+.mypy_cache
+# Log and temporary files
+# Ignore log files and temporary files
+*.log
+*.tmp
+tmp
+# System files
+# Ignore OS generated files
+.DS_Store
+# IDE and editor specific files
+# Ignore project-specific files from various IDEs and editors
+.idea/*
+.vscode/*
+.python-version
+# Generated documentation
+# Ignore generated documentation files
+/docs/site/*
+# Virtual environments
+# Ignore virtual environment directories
+.venv
+# Configuration files
+# Ignore configuration files
+.poetry.toml
+.env.local
+.env.development
+.env.test
+.env.production
+.env
+# Temporary files and directories for operations
+# Ignore Ops temporary files and directories
+.aider*
+# Credentials and secrets
+# Ignore credentials and secrets files
+.credentials

.pylintrc ADDED Viewed

	@@ -0,0 +1,579 @@

+[MAIN]
+# Specify a configuration file.
+#rcfile=
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+# Files or directories to be skipped. They should be base names, not
+# paths.
+ignore=CVS
+# Add files or directories matching the regex patterns to the ignore-list. The
+# regex matches against paths and can be in Posix or Windows format.
+ignore-paths=
+# Files or directories matching the regex patterns are skipped. The regex
+# matches against base names, not paths.
+ignore-patterns=^\.#
+# Pickle collected data for later comparisons.
+persistent=yes
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+    pylint.extensions.check_elif,
+    pylint.extensions.bad_builtin,
+    pylint.extensions.docparams,
+    pylint.extensions.for_any_all,
+    pylint.extensions.set_membership,
+    pylint.extensions.code_style,
+    pylint.extensions.overlapping_exceptions,
+    pylint.extensions.typing,
+    pylint.extensions.redefined_variable_type,
+    pylint.extensions.comparison_placement,
+    pylint.extensions.mccabe,
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use.
+jobs=0
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code
+extension-pkg-allow-list=
+# Minimum supported python version
+py-version = 3.7.2
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+# Specify a score threshold to be exceeded before program exits with error.
+fail-under=10.0
+# Return non-zero exit code if any of these messages/categories are detected,
+# even if score is above --fail-under value. Syntax same as enable. Messages
+# specified are enabled, while categories only check already-enabled messages.
+fail-on=
+[MESSAGES CONTROL]
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
+# confidence=
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=
+    use-symbolic-message-instead,
+    useless-suppression,
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once).You can also use "--disable=all" to
+# disable everything first and then re-enable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use"--disable=all --enable=classes
+# --disable=W"
+disable=
+    attribute-defined-outside-init,
+    invalid-name,
+    missing-docstring,
+    protected-access,
+    too-few-public-methods,
+    # handled by black
+    format,
+    # We anticipate #3512 where it will become optional
+    fixme,
+    cyclic-import,
+    import-error,
+    #
+    unnecessary-pass,
+    unrecognized-option,
+    cell-var-from-loop,
+    no-member,
+    wrong-import-order,
+    raise-missing-from,
+    consider-using-f-string
+[REPORTS]
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html. You can also give a reporter class, eg
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+# Tells whether to display a full report or only the messages
+reports=no
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables 'fatal', 'error', 'warning', 'refactor', 'convention'
+# and 'info', which contain the number of messages in each category, as
+# well as 'statement', which is the total number of statements analyzed. This
+# score is used by the global evaluation report (RP0004).
+evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details
+#msg-template=
+# Activate the evaluation score.
+score=yes
+[LOGGING]
+# Logging modules to check that the string format arguments are in logging
+# function parameter format
+logging-modules=logging
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+[MISCELLANEOUS]
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+# Regular expression of note tags to take in consideration.
+#notes-rgx=
+[SIMILARITIES]
+# Minimum lines number of a similarity.
+min-similarity-lines=6
+# Ignore comments when computing similarities.
+ignore-comments=yes
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+# Ignore imports when computing similarities.
+ignore-imports=yes
+# Signatures are removed from the similarity computation
+ignore-signatures=yes
+[VARIABLES]
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+# A regular expression matching the name of dummy variables (i.e. expectedly
+# not used).
+dummy-variables-rgx=_$|dummy
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,_cb
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+# List of names allowed to shadow builtins
+allowed-redefined-builtins=
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore.
+ignored-argument-names=_.*
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
+[FORMAT]
+# Maximum number of characters on a single line.
+max-line-length=120
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+# Maximum number of lines in a module
+max-module-lines=1000
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+[BASIC]
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted
+good-names-rgxs=
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+# Bad variable names regexes, separated by a comma. If names match any regex,
+# they will always be refused
+bad-names-rgxs=
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+# Include a hint for the correct naming format with invalid-name
+include-naming-hint=no
+# Naming style matching correct function names.
+function-naming-style=snake_case
+# Regular expression matching correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+# Regular expression matching correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+# Naming style matching correct constant names.
+const-naming-style=UPPER_CASE
+# Regular expression matching correct constant names
+const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+# Regular expression matching correct attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,}$
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+# Regular expression matching correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+# Regular expression matching correct class attribute names
+class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
+# Naming style matching correct class constant names.
+class-const-naming-style=UPPER_CASE
+# Regular expression matching correct class constant names. Overrides class-
+# const-naming-style.
+#class-const-rgx=
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+# Regular expression matching correct inline iteration names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+# Regular expression matching correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+# Naming style matching correct module names.
+module-naming-style=snake_case
+# Regular expression matching correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+# Naming style matching correct method names.
+method-naming-style=snake_case
+# Regular expression matching correct method names
+method-rgx=[a-z_][a-z0-9_]{2,}$
+# Regular expression which can overwrite the naming style set by typevar-naming-style.
+#typevar-rgx=
+# Regular expression which should only match function or class names that do
+# not require a docstring. Use ^(?!__init__$)_ to also check __init__.
+no-docstring-rgx=__.*__
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+# List of decorators that define properties, such as abc.abstractproperty.
+property-classes=abc.abstractproperty
+[TYPECHECK]
+# Regex pattern to define which classes are considered mixins if ignore-mixin-
+# members is set to 'yes'
+mixin-class-rgx=.*MixIn
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis). It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=SQLObject, optparse.Values, thread._local, _thread._local
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=REQUEST,acl_users,aq_parent,argparse.Namespace
+# List of decorators that create context managers from functions, such as
+# contextlib.contextmanager.
+contextmanager-decorators=contextlib.contextmanager
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+[SPELLING]
+# Spelling dictionary name. Available dictionaries: none. To make it working
+# install python-enchant package.
+spelling-dict=
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+# List of comma separated words that should be considered directives if they
+# appear and the beginning of a comment and should not be checked.
+spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:,pragma:,# noinspection
+# A path to a file that contains private dictionary; one word per line.
+spelling-private-dict-file=.pyenchant_pylint_custom_dict.txt
+# Tells whether to store unknown words to indicated private dictionary in
+# --spelling-private-dict-file option instead of raising a message.
+spelling-store-unknown-words=no
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=2
+[DESIGN]
+# Maximum number of arguments for function / method
+max-args=10
+# Maximum number of locals for function / method body
+max-locals=25
+# Maximum number of return / yield for function / method body
+max-returns=11
+# Maximum number of branch for function / method body
+max-branches=27
+# Maximum number of statements in function / method body
+max-statements=100
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+# List of qualified class names to ignore when counting class parents (see R0901).
+ignored-parents=
+# Maximum number of attributes for a class (see R0902).
+max-attributes=11
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=25
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+# List of regular expressions of class ancestor names to
+# ignore when counting public methods (see R0903).
+exclude-too-few-public-methods=
+max-complexity=10
+[CLASSES]
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp,__post_init__
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,_fields,_replace,_source,_make
+# Warn about protected attribute access inside special methods
+check-protected-access-in-special-methods=no
+[IMPORTS]
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,TERMIOS,Bastion,rexec
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+import-graph=
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+ext-import-graph=
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+int-import-graph=
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=
+[EXCEPTIONS]
+# Exceptions that will emit a warning when being caught. Defaults to
+# "Exception"
+overgeneral-exceptions=Exception
+[TYPING]
+# Set to ``no`` if the app / library does **NOT** need to support runtime
+# introspection of type annotations. If you use type annotations
+# **exclusively** for type checking of an application, you're probably fine.
+# For libraries, evaluate if some users what to access the type hints at
+# runtime first, e.g., through ``typing.get_type_hints``. Applies to Python
+# versions 3.7 - 3.9
+runtime-typing = no
+[DEPRECATED_BUILTINS]
+# List of builtins function names that should not be used, separated by a comma
+bad-functions=map,input
+[REFACTORING]
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit,argparse.parse_error
+[STRING]
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=no
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps=no
+[CODE_STYLE]
+# Max line length for which to sill emit suggestions. Used to prevent optional
+# suggestions which would get split by a code formatter (e.g., black). Will
+# default to the setting for ``max-line-length``.
+#max-line-length-suggestions=
+W0107:unnecessary-pass

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "python.languageServer": "Pylance",
+  "python.analysis.typeCheckingMode": "basic",
+  "python.analysis.diagnosticSeverityOverrides": {},
+  "python.analysis.typeshedPaths": [".venv/Lib/site-packages"]
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+# Use Python 3.10.9 as the base image for consistent runtime environment
+FROM python:3.10.9
+# Add metadata labels
+LABEL maintainer="lamhieu.vk@gmail.com"
+LABEL description="Lightweight embeddings service using FastAPI and Hugging Face Transformers"
+LABEL version="1.0"
+# Setup non-root user for security
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+  PATH=/home/user/.local/bin:$PATH
+# Set working directory for all subsequent commands
+WORKDIR $HOME/app
+# Copy application files
+# Copy requirements first to leverage Docker cache
+COPY --chown=user requirements.txt .
+COPY --chown=user . .
+# Install Python dependencies
+# --no-cache-dir reduces image size
+# --upgrade ensures latest compatible versions
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Expose service port
+EXPOSE 8000
+# Launch FastAPI application using uvicorn server
+# --host 0.0.0.0: Listen on all network interfaces
+# --port 8000: Run on port 8000
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

README.md ADDED Viewed

	@@ -0,0 +1,85 @@

+---
+title: Lightweight Embeddings
+emoji: 🌍
+colorFrom: green
+colorTo: green
+sdk: docker
+app_file: app.py
+---
+# 🌍 LightweightEmbeddings: Multilingual, Fast, and Lightweight
+**LightweightEmbeddings** is a high-performance framework designed for generating embeddings from **text** or **image-text inputs** across multiple languages. Engineered for efficiency and adaptability, it offers a perfect balance between speed and accuracy, making it ideal for **real-time applications** and **resource-constrained environments**.
+## ✨ Key Features
+- **Multilingual Support**: Seamlessly process text in over 100+ languages for truly global applications.
+- **Text and Image Embeddings**: Generate embeddings from text or image-text pairs using state-of-the-art models.
+- **Optimized for Speed**: Built with lightweight transformer models and efficient backends to ensure rapid inference, even on low-resource systems.
+- **Flexibility**: Supports multiple transformer models for diverse use cases:
+  - Text models: `multilingual-e5-small`, `paraphrase-multilingual-MiniLM-L12-v2`, `bge-m3`
+  - Image model: `google/siglip-base-patch16-256-multilingual`
+- **Dockerized**: Deploy anywhere with ease using Docker, making it production-ready out of the box.
+- **Interactive API**: Comes with a **Gradio-powered playground** and detailed REST API documentation.
+## 🚀 Use Cases
+- **Search and Ranking**: Generate embeddings for advanced similarity-based ranking in search engines.
+- **Recommendation Systems**: Use embeddings for personalized recommendations based on user input or preferences.
+- **Multimodal Applications**: Combine text and image embeddings to power tasks like product catalog indexing, content moderation, or multimodal retrieval.
+- **Language Understanding**: Enable semantic text analysis, summarization, or classification in multiple languages.
+## 🛠️ Getting Started
+### 1. Clone the Repository
+```bash
+git clone https://github.com/lh0x00/lightweight-embeddings.git
+cd lightweight-embeddings
+```
+### 2. Build and Run with Docker
+Make sure Docker is installed and running on your machine.
+```bash
+docker build -t lightweight-embeddings .
+docker run -p 8000:8000 lightweight-embeddings
+```
+The API will now be accessible at `http://localhost:8000`.
+## 📖 API Overview
+### Endpoints
+- **`/v1/embeddings`**: Generate text or image embeddings using the model of your choice.
+- **`/v1/rank`**: Rank candidate inputs based on similarity to a query.
+### Interactive Docs
+- Visit the [Swagger UI](http://localhost:8000/docs) for detailed, interactive documentation.
+- Explore additional resources with [ReDoc](http://localhost:8000/redoc).
+## 🔬 Playground
+### Embeddings Playground
+- Test text and image embedding generation in the browser with a user-friendly **Gradio interface**.
+- Simply visit `http://localhost:8000` after starting the server to access the playground.
+## 🌐 Resources
+- **Documentation**: [Explore full documentation](https://lamhieu-lightweight-embeddings.hf.space/docs)
+- **Hugging Face Space**: [Try the live demo](https://huggingface.co/spaces/lamhieu/lightweight-embeddings)
+- **GitHub Repository**: [View source code](https://github.com/lh0x00/lightweight-embeddings)
+## 💡 Why LightweightEmbeddings?
+1. **Performance-Oriented**: Delivers rapid results without compromising on quality, ideal for real-world deployment.
+2. **Highly Adaptable**: Works in diverse environments, from cloud clusters to local devices.
+3. **Developer-Friendly**: Intuitive API design with robust documentation and an integrated playground for experimentation.
+## 👥 Contributors
+- **lamhieu** – Creator and Maintainer ([GitHub](https://github.com/lh0x00))
+Contributions are welcome! Check out the [contribution guidelines](https://github.com/lh0x00/lightweight-embeddings/blob/main/CONTRIBUTING.md).
+## 📜 License
+This project is licensed under the **MIT License**. See the [LICENSE](https://github.com/lh0x00/lightweight-embeddings/blob/main/LICENSE) file for details.

app.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from lightweight_embeddings import app

lightweight_embeddings/__init__.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# filename: __init__.py
+"""
+LightweightEmbeddings - FastAPI Application Entry Point
+This application provides text and image embeddings using multiple text models and one image model.
+Supported text model IDs:
+- "multilingual-e5-small"
+- "paraphrase-multilingual-MiniLM-L12-v2"
+- "bge-m3"
+Supported image model ID:
+- "google/siglip-base-patch16-256-multilingual"
+"""
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+import gradio as gr
+import requests
+import json
+from gradio.routes import mount_gradio_app
+# Application metadata
+__version__ = "1.0.0"
+__author__ = "lamhieu"
+__description__ = "Fast, lightweight, multilingual embeddings solution."
+# Set your embeddings API URL here (change host/port if needed)
+EMBEDDINGS_API_URL = "http://localhost:8000/v1/embeddings"
+# Initialize FastAPI application
+app = FastAPI(
+    title="Lightweight Embeddings API",
+    description=__description__,
+    version=__version__,
+    docs_url="/docs",
+    redoc_url="/redoc",
+)
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Adjust if needed for specific domains
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Include your existing router (which provides /v1/embeddings, /v1/rank, etc.)
+from .router import router
+app.include_router(router, prefix="/v1")
+def call_embeddings_api(user_input: str, selected_model: str) -> str:
+    """
+    Send a request to the /v1/embeddings endpoint with the given model and input.
+    Return a pretty-printed JSON response or an error message.
+    """
+    payload = {
+        "model": selected_model,
+        "input": user_input,
+    }
+    headers = {"Content-Type": "application/json"}
+    try:
+        response = requests.post(
+            EMBEDDINGS_API_URL, json=payload, headers=headers, timeout=20
+        )
+    except requests.exceptions.RequestException as e:
+        return f"❌ Network Error: {str(e)}"
+    if response.status_code != 200:
+        # Provide detailed error message
+        return f"❌ API Error {response.status_code}: {response.text}"
+    try:
+        data = response.json()
+        return json.dumps(data, indent=2)
+    except ValueError:
+        return "❌ Failed to parse JSON from API response."
+def create_main_interface():
+    """
+    Creates a Gradio Blocks interface showing project info and an embeddings playground.
+    """
+    # Metadata to be displayed
+    root_data = {
+        "project": "Lightweight Embeddings Service",
+        "version": "1.0.0",
+        "description": (
+            "Fast and efficient multilingual text and image embeddings service "
+            "powered by sentence-transformers, supporting 100+ languages and multi-modal inputs"
+        ),
+        "docs": "https://lamhieu-lightweight-embeddings.hf.space/docs",
+        "github": "https://github.com/lh0x00/lightweight-embeddings",
+        "spaces": "https://huggingface.co/spaces/lamhieu/lightweight-embeddings",
+    }
+    # Available model options for the dropdown
+    model_options = [
+        "multilingual-e5-small",
+        "paraphrase-multilingual-MiniLM-L12-v2",
+        "bge-m3",
+        "google/siglip-base-patch16-256-multilingual",
+    ]
+    with gr.Blocks(title="Lightweight Embeddings", theme="default") as demo:
+        # Project Info
+        gr.Markdown(
+            """
+            # 🎉 **Lightweight Embeddings Service** 🎉
+            Welcome to the **Lightweight Embeddings** API, a blazing-fast and flexible service
+            supporting **text** and **image** embeddings. Below you'll find key project details:
+            """
+        )
+        gr.Markdown(
+            f"""
+            **Project**: {root_data["project"]} 🚀
+            **Version**: {root_data["version"]}
+            **Description**: {root_data["description"]}
+            **Docs**: [Click here]({root_data["docs"]}) 😎
+            **GitHub**: [Check it out]({root_data["github"]}) 🐙
+            **Spaces**: [Explore]({root_data["spaces"]}) 🤗
+            """
+        )
+        gr.Markdown(
+            """
+            ---
+            ### 💡 How to Use
+            - Visit **/docs** or **/redoc** for interactive API documentation.
+            - Check out **/v1/embeddings** and **/v1/rank** endpoints for direct usage.
+            - Or try the simple playground below! Enjoy exploring a multilingual, multi-modal world! 🌏🌐
+            """
+        )
+        # Embeddings Playground
+        with gr.Accordion("🔬 Try the Embeddings Playground", open=True):
+            gr.Markdown(
+                "Enter your **text** or an **image URL**, pick a model, "
+                "then click **Generate** to get embeddings from the `/v1/embeddings` API."
+            )
+            input_text = gr.Textbox(
+                label="Input Text or Image URL",
+                placeholder="Type some text or paste an image URL...",
+                lines=3,
+            )
+            model_dropdown = gr.Dropdown(
+                choices=model_options,
+                value=model_options[0],
+                label="Select Model",
+            )
+            generate_btn = gr.Button("Generate Embeddings")
+            output_json = gr.Textbox(
+                label="Embeddings API Response",
+                lines=15,
+                interactive=False,
+            )
+            # Link the button to the inference function
+            generate_btn.click(
+                fn=call_embeddings_api,
+                inputs=[input_text, model_dropdown],
+                outputs=output_json,
+            )
+    return demo
+# Create and mount the Gradio Blocks at the root path
+main_interface = create_main_interface()
+mount_gradio_app(app, main_interface, path="/")
+# Startup and shutdown events
+@app.on_event("startup")
+async def startup_event():
+    """
+    Initialize resources (like model loading) when the application starts.
+    """
+    pass
+@app.on_event("shutdown")
+async def shutdown_event():
+    """
+    Perform cleanup before the application shuts down.
+    """
+    pass

lightweight_embeddings/router.py ADDED Viewed

	@@ -0,0 +1,296 @@

+# filename: router.py
+"""
+FastAPI Router for Embeddings Service
+This file exposes the EmbeddingsService functionality via a RESTful API
+to generate embeddings and rank candidates.
+Supported Text Model IDs:
+- "multilingual-e5-small"
+- "paraphrase-multilingual-MiniLM-L12-v2"
+- "bge-m3"
+Supported Image Model ID:
+- "google/siglip-base-patch16-256-multilingual"
+"""
+from __future__ import annotations
+import logging
+from typing import List, Union
+from enum import Enum
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+from .service import ModelConfig, TextModelType, EmbeddingsService
+logger = logging.getLogger(__name__)
+# Initialize FastAPI router
+router = APIRouter(
+    tags=["v1"],
+    responses={404: {"description": "Not found"}},
+)
+class ModelType(str, Enum):
+    """
+    High-level distinction for text vs. image models.
+    """
+    TEXT = "text"
+    IMAGE = "image"
+def detect_model_type(model_id: str) -> ModelType:
+    """
+    Detect whether the provided model ID is for text or image.
+    Supported text model IDs:
+      - "multilingual-e5-small"
+      - "paraphrase-multilingual-MiniLM-L12-v2"
+      - "bge-m3"
+    Supported image model ID:
+      - "google/siglip-base-patch16-256-multilingual"
+      (or any model containing "siglip" in its identifier).
+    Args:
+        model_id: String identifier of the model.
+    Returns:
+        ModelType.TEXT if it matches one of the recognized text model IDs,
+        ModelType.IMAGE if it matches (or contains "siglip").
+    Raises:
+        ValueError: If the model_id is not recognized as either text or image.
+    """
+    # Gather all known text model IDs (from TextModelType enum)
+    text_model_ids = {m.value for m in TextModelType}
+    # Simple check: if it's in text_model_ids, it's text;
+    # if 'siglip' is in the model ID, it's recognized as an image model.
+    if model_id in text_model_ids:
+        return ModelType.TEXT
+    elif "siglip" in model_id.lower():
+        return ModelType.IMAGE
+    error_msg = (
+        f"Unsupported model ID: '{model_id}'.\n"
+        "Valid text model IDs are: "
+        "'multilingual-e5-small', 'paraphrase-multilingual-MiniLM-L12-v2', 'bge-m3'.\n"
+        "Valid image model ID contains 'siglip', for example: 'google/siglip-base-patch16-256-multilingual'."
+    )
+    raise ValueError(error_msg)
+# Pydantic Models for request/response
+class EmbeddingRequest(BaseModel):
+    """
+    Request body for embedding creation.
+    Model IDs (text):
+      - "multilingual-e5-small"
+      - "paraphrase-multilingual-MiniLM-L12-v2"
+      - "bge-m3"
+    Model ID (image):
+      - "google/siglip-base-patch16-256-multilingual"
+    """
+    model: str = Field(
+        default=TextModelType.MULTILINGUAL_E5_SMALL.value,
+        description=(
+            "Model ID to use. Possible text models include: 'multilingual-e5-small', "
+            "'paraphrase-multilingual-MiniLM-L12-v2', 'bge-m3'. "
+            "For images, you can use: 'google/siglip-base-patch16-256-multilingual' "
+            "or any ID containing 'siglip'."
+        ),
+    )
+    input: Union[str, List[str]] = Field(
+        ...,
+        description=(
+            "Input text(s) or image path(s)/URL(s). "
+            "Accepts a single string or a list of strings."
+        ),
+    )
+class RankRequest(BaseModel):
+    """
+    Request body for ranking candidates against queries.
+    Model IDs (text):
+      - "multilingual-e5-small"
+      - "paraphrase-multilingual-MiniLM-L12-v2"
+      - "bge-m3"
+    Model ID (image):
+      - "google/siglip-base-patch16-256-multilingual"
+    """
+    model: str = Field(
+        default=TextModelType.MULTILINGUAL_E5_SMALL.value,
+        description=(
+            "Model ID to use for the queries. Supported text models: "
+            "'multilingual-e5-small', 'paraphrase-multilingual-MiniLM-L12-v2', 'bge-m3'. "
+            "For image queries, use an ID containing 'siglip' such as 'google/siglip-base-patch16-256-multilingual'."
+        ),
+    )
+    queries: Union[str, List[str]] = Field(
+        ...,
+        description=(
+            "Query input(s): can be text(s) or image path(s)/URL(s). "
+            "If using an image model, ensure your inputs reference valid image paths or URLs."
+        ),
+    )
+    candidates: List[str] = Field(
+        ...,
+        description=(
+            "List of candidate texts to rank against the given queries. "
+            "Currently, all candidates must be text."
+        ),
+    )
+class EmbeddingResponse(BaseModel):
+    """
+    Response structure for embedding creation.
+    """
+    object: str = "list"
+    data: List[dict]
+    model: str
+    usage: dict
+class RankResponse(BaseModel):
+    """
+    Response structure for ranking results.
+    """
+    probabilities: List[List[float]]
+    cosine_similarities: List[List[float]]
+# Initialize the service with default configuration
+service_config = ModelConfig()
+embeddings_service = EmbeddingsService(config=service_config)
+@router.post("/embeddings", response_model=EmbeddingResponse, tags=["embeddings"])
+async def create_embeddings(request: EmbeddingRequest):
+    """
+    Generate embeddings for the provided input text(s) or image(s).
+    Supported Model IDs for text:
+      - "multilingual-e5-small"
+      - "paraphrase-multilingual-MiniLM-L12-v2"
+      - "bge-m3"
+    Supported Model ID for image:
+      - "google/siglip-base-patch16-256-multilingual"
+    Steps:
+      1. Detects model type (text or image) based on the model ID.
+      2. Adjusts the service configuration accordingly.
+      3. Produces embeddings via the EmbeddingsService.
+      4. Returns embedding vectors along with usage information.
+    Raises:
+      HTTPException: For any errors during model detection or embedding generation.
+    """
+    try:
+        modality = detect_model_type(request.model)
+        # Adjust global config based on the detected modality
+        if modality == ModelType.TEXT:
+            service_config.text_model_type = TextModelType(request.model)
+        else:
+            service_config.image_model_id = request.model
+        # Generate embeddings asynchronously
+        embeddings = await embeddings_service.generate_embeddings(
+            input_data=request.input, modality=modality.value
+        )
+        # Estimate tokens only if it's text
+        total_tokens = 0
+        if modality == ModelType.TEXT:
+            total_tokens = embeddings_service.estimate_tokens(request.input)
+        return {
+            "object": "list",
+            "data": [
+                {
+                    "object": "embedding",
+                    "index": idx,
+                    "embedding": emb.tolist(),
+                }
+                for idx, emb in enumerate(embeddings)
+            ],
+            "model": request.model,
+            "usage": {
+                "prompt_tokens": total_tokens,
+                "total_tokens": total_tokens,
+            },
+        }
+    except Exception as e:
+        error_msg = (
+            "Failed to generate embeddings. Please verify your model ID, input data, and server logs.\n"
+            f"Error Details: {str(e)}"
+        )
+        logger.error(error_msg)
+        raise HTTPException(status_code=500, detail=error_msg)
+@router.post("/rank", response_model=RankResponse, tags=["rank"])
+async def rank_candidates(request: RankRequest):
+    """
+    Rank the given candidate texts against the provided queries.
+    Supported Model IDs for text queries:
+      - "multilingual-e5-small"
+      - "paraphrase-multilingual-MiniLM-L12-v2"
+      - "bge-m3"
+    Supported Model ID for image queries:
+      - "google/siglip-base-patch16-256-multilingual"
+    Steps:
+      1. Detects model type (text or image) based on the query model ID.
+      2. Adjusts the service configuration accordingly.
+      3. Generates embeddings for the queries (text or image).
+      4. Generates embeddings for the candidates (always text).
+      5. Computes cosine similarities and returns softmax-normalized probabilities.
+    Raises:
+      HTTPException: For any errors during model detection or ranking.
+    """
+    try:
+        modality = detect_model_type(request.model)
+        # Adjust global config based on the detected modality
+        if modality == ModelType.TEXT:
+            service_config.text_model_type = TextModelType(request.model)
+        else:
+            service_config.image_model_id = request.model
+        # Perform the ranking
+        results = await embeddings_service.rank(
+            queries=request.queries,
+            candidates=request.candidates,
+            modality=modality.value,
+        )
+        return results
+    except Exception as e:
+        error_msg = (
+            "Failed to rank candidates. Please verify your model ID, input data, and server logs.\n"
+            f"Error Details: {str(e)}"
+        )
+        logger.error(error_msg)
+        raise HTTPException(status_code=500, detail=error_msg)

lightweight_embeddings/service.py ADDED Viewed

	@@ -0,0 +1,477 @@

+# filename: service.py
+"""
+Lightweight Embeddings Service Module
+This module provides a service for generating and comparing embeddings from text and images
+using state-of-the-art transformer models. It supports both CPU and GPU inference.
+Key Features:
+- Text and image embedding generation
+- Cross-modal similarity ranking
+- Batch processing support
+- Asynchronous API support
+Supported Text Model IDs:
+- "multilingual-e5-small"
+- "paraphrase-multilingual-MiniLM-L12-v2"
+- "bge-m3"
+Supported Image Model ID (default):
+- "google/siglip-base-patch16-256-multilingual"
+"""
+from __future__ import annotations
+import logging
+from enum import Enum
+from typing import List, Union, Literal, Dict, Optional, NamedTuple
+from dataclasses import dataclass
+from pathlib import Path
+from io import BytesIO
+import requests
+import numpy as np
+import torch
+from PIL import Image
+from sentence_transformers import SentenceTransformer
+from transformers import AutoProcessor, AutoModel
+# Configure logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+# Default Model IDs
+TEXT_MODEL_ID = "Xenova/multilingual-e5-small"
+IMAGE_MODEL_ID = "google/siglip-base-patch16-256-multilingual"
+class TextModelType(str, Enum):
+    """
+    Enumeration of supported text models.
+    Please ensure the ONNX files and Hugging Face model IDs are consistent
+    with your local or remote environment.
+    """
+    MULTILINGUAL_E5_SMALL = "multilingual-e5-small"
+    PARAPHRASE_MULTILINGUAL_MINILM_L12_V2 = "paraphrase-multilingual-MiniLM-L12-v2"
+    BGE_M3 = "bge-m3"
+class ModelInfo(NamedTuple):
+    """
+    Simple container for mapping a given text model type
+    to its Hugging Face model repository and the local ONNX file path.
+    """
+    model_id: str
+    onnx_file: str
+@dataclass
+class ModelConfig:
+    """
+    Configuration settings for model providers, backends, and defaults.
+    """
+    provider: str = "CPUExecutionProvider"
+    backend: str = "onnx"
+    logit_scale: float = 4.60517
+    text_model_type: TextModelType = TextModelType.MULTILINGUAL_E5_SMALL
+    image_model_id: str = IMAGE_MODEL_ID
+    @property
+    def text_model_info(self) -> ModelInfo:
+        """
+        Retrieves the ModelInfo for the currently selected text_model_type.
+        """
+        model_configs = {
+            TextModelType.MULTILINGUAL_E5_SMALL: ModelInfo(
+                "Xenova/multilingual-e5-small",
+                "onnx/model_quantized.onnx",
+            ),
+            TextModelType.PARAPHRASE_MULTILINGUAL_MINILM_L12_V2: ModelInfo(
+                "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+                "onnx/model_quint8_avx2.onnx",
+            ),
+            TextModelType.BGE_M3: ModelInfo(
+                "BAAI/bge-m3",
+                "model.onnx",
+            ),
+        }
+        return model_configs[self.text_model_type]
+class EmbeddingsService:
+    """
+    Service for generating and comparing text/image embeddings.
+    This service supports multiple text models and a single image model.
+    It provides methods for:
+        - Generating text embeddings
+        - Generating image embeddings
+        - Ranking candidates by similarity
+    """
+    def __init__(self, config: Optional[ModelConfig] = None) -> None:
+        """
+        Initialize the EmbeddingsService.
+        Args:
+            config: Optional ModelConfig object to override default settings.
+        """
+        # Determine whether GPU (CUDA) is available
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Use the provided config or fall back to defaults
+        self.config = config or ModelConfig()
+        # Dictionary to hold multiple text models
+        self.text_models: Dict[TextModelType, SentenceTransformer] = {}
+        # Load all models (text + image) into memory
+        self._load_models()
+    def _load_models(self) -> None:
+        """
+        Load text and image models into memory.
+        This pre-loads all text models defined in the TextModelType enum
+        and a single image model, enabling quick switching at runtime.
+        """
+        try:
+            # Load all text models
+            for model_type in TextModelType:
+                model_info = ModelConfig(text_model_type=model_type).text_model_info
+                logger.info(f"Loading text model: {model_info.model_id}")
+                self.text_models[model_type] = SentenceTransformer(
+                    model_info.model_id,
+                    device=self.device,
+                    backend=self.config.backend,
+                    model_kwargs={
+                        "provider": self.config.provider,
+                        "file_name": model_info.onnx_file,
+                    },
+                )
+            logger.info(f"Loading image model: {self.config.image_model_id}")
+            self.image_model = AutoModel.from_pretrained(self.config.image_model_id).to(
+                self.device
+            )
+            self.image_processor = AutoProcessor.from_pretrained(
+                self.config.image_model_id
+            )
+            logger.info(f"All models loaded successfully on {self.device}.")
+        except Exception as e:
+            logger.error(
+                "Model loading failed. Please ensure you have valid model IDs and local files.\n"
+                f"Error details: {str(e)}"
+            )
+            raise RuntimeError(f"Failed to load models: {str(e)}") from e
+    @staticmethod
+    def _validate_text_input(input_text: Union[str, List[str]]) -> List[str]:
+        """
+        Validate and standardize the input for text embeddings.
+        Args:
+            input_text: Either a single string or a list of strings.
+        Returns:
+            A list of strings to process.
+        Raises:
+            ValueError: If input_text is empty or not string-based.
+        """
+        if isinstance(input_text, str):
+            return [input_text]
+        if not isinstance(input_text, list) or not all(
+            isinstance(x, str) for x in input_text
+        ):
+            raise ValueError(
+                "Text input must be a single string or a list of strings. "
+                "Found a different data type instead."
+            )
+        if not input_text:
+            raise ValueError("Text input list cannot be empty.")
+        return input_text
+    @staticmethod
+    def _validate_modality(modality: str) -> None:
+        """
+        Validate the input modality.
+        Args:
+            modality: Must be either 'text' or 'image'.
+        Raises:
+            ValueError: If modality is neither 'text' nor 'image'.
+        """
+        if modality not in ["text", "image"]:
+            raise ValueError(
+                "Invalid modality. Please specify 'text' or 'image' for embeddings."
+            )
+    def _process_image(self, image_path: Union[str, Path]) -> torch.Tensor:
+        """
+        Load and preprocess an image from either a local path or a URL.
+        Args:
+            image_path: Path to the local image file or a URL.
+        Returns:
+            Torch Tensor suitable for model input.
+        Raises:
+            ValueError: If the image file or URL cannot be loaded.
+        """
+        try:
+            if str(image_path).startswith("http"):
+                response = requests.get(image_path, timeout=10)
+                response.raise_for_status()
+                image_content = BytesIO(response.content)
+            else:
+                image_content = image_path
+            image = Image.open(image_content).convert("RGB")
+            processed = self.image_processor(images=image, return_tensors="pt").to(
+                self.device
+            )
+            return processed
+        except Exception as e:
+            raise ValueError(
+                f"Failed to process image at '{image_path}'. Check the path/URL and file format.\n"
+                f"Details: {str(e)}"
+            ) from e
+    def _generate_text_embeddings(self, texts: List[str]) -> np.ndarray:
+        """
+        Helper method to generate text embeddings for a list of texts
+        using the currently configured text model.
+        Args:
+            texts: A list of text strings.
+        Returns:
+            Numpy array of shape (num_texts, embedding_dim).
+        Raises:
+            RuntimeError: If the text model fails to generate embeddings.
+        """
+        try:
+            logger.info(
+                f"Generating embeddings for {len(texts)} text items using model: "
+                f"{self.config.text_model_type}"
+            )
+            # Select the preloaded text model based on the current config
+            model = self.text_models[self.config.text_model_type]
+            embeddings = model.encode(texts)
+            return embeddings
+        except Exception as e:
+            error_msg = (
+                f"Error generating text embeddings with model: {self.config.text_model_type}. "
+                f"Details: {str(e)}"
+            )
+            logger.error(error_msg)
+            raise RuntimeError(error_msg) from e
+    def _generate_image_embeddings(
+        self, input_data: Union[str, List[str]], batch_size: Optional[int]
+    ) -> np.ndarray:
+        """
+        Helper method to generate image embeddings.
+        Args:
+            input_data: Either a single image path/URL or a list of them.
+            batch_size: Batch size for processing images in chunks.
+                        If None, process all at once.
+        Returns:
+            Numpy array of shape (num_images, embedding_dim).
+        Raises:
+            RuntimeError: If the image model fails to generate embeddings.
+        """
+        try:
+            if isinstance(input_data, str):
+                # Single image scenario
+                processed = self._process_image(input_data)
+                with torch.no_grad():
+                    embedding = self.image_model.get_image_features(**processed)
+                return embedding.cpu().numpy()
+            # Multiple images scenario
+            logger.info(f"Generating embeddings for {len(input_data)} images.")
+            if batch_size is None:
+                # Process all images at once
+                processed_batches = [
+                    self._process_image(img_path) for img_path in input_data
+                ]
+                with torch.no_grad():
+                    # Concatenate all images along the batch dimension
+                    batch_keys = processed_batches[0].keys()
+                    concatenated = {
+                        k: torch.cat([pb[k] for pb in processed_batches], dim=0)
+                        for k in batch_keys
+                    }
+                    embedding = self.image_model.get_image_features(**concatenated)
+                return embedding.cpu().numpy()
+            # Process images in smaller batches
+            embeddings_list = []
+            for i, img_path in enumerate(input_data):
+                if i % batch_size == 0:
+                    logger.debug(
+                        f"Processing image batch {i // batch_size + 1} with size up to {batch_size}."
+                    )
+                processed = self._process_image(img_path)
+                with torch.no_grad():
+                    embedding = self.image_model.get_image_features(**processed)
+                embeddings_list.append(embedding.cpu().numpy())
+            return np.vstack(embeddings_list)
+        except Exception as e:
+            error_msg = (
+                f"Error generating image embeddings with model: {self.config.image_model_id}. "
+                f"Details: {str(e)}"
+            )
+            logger.error(error_msg)
+            raise RuntimeError(error_msg) from e
+    async def generate_embeddings(
+        self,
+        input_data: Union[str, List[str]],
+        modality: Literal["text", "image"] = "text",
+        batch_size: Optional[int] = None,
+    ) -> np.ndarray:
+        """
+        Asynchronously generate embeddings for text or image inputs.
+        Args:
+            input_data: A string or list of strings (text/image paths/URLs).
+            modality: "text" for text data or "image" for image data.
+            batch_size: Optional batch size for processing images in chunks.
+        Returns:
+            Numpy array of embeddings.
+        Raises:
+            ValueError: If the modality is invalid.
+        """
+        self._validate_modality(modality)
+        if modality == "text":
+            texts = self._validate_text_input(input_data)
+            return self._generate_text_embeddings(texts)
+        else:
+            return self._generate_image_embeddings(input_data, batch_size)
+    async def rank(
+        self,
+        queries: Union[str, List[str]],
+        candidates: List[str],
+        modality: Literal["text", "image"] = "text",
+        batch_size: Optional[int] = None,
+    ) -> Dict[str, List[List[float]]]:
+        """
+        Rank a set of candidate texts against one or more queries using cosine similarity
+        and a softmax to produce probability-like scores.
+        Args:
+            queries: Query text(s) or image path(s)/URL(s).
+            candidates: Candidate texts to be ranked.
+                        (Note: This implementation always treats candidates as text.)
+            modality: "text" for text queries or "image" for image queries.
+            batch_size: Batch size if images are processed in chunks.
+        Returns:
+            Dictionary containing:
+                - "probabilities": 2D list of softmax-normalized scores.
+                - "cosine_similarities": 2D list of raw cosine similarity values.
+        Raises:
+            RuntimeError: If the query or candidate embeddings fail to generate.
+        """
+        logger.info(
+            f"Ranking {len(candidates)} candidates against "
+            f"{len(queries) if isinstance(queries, list) else 1} query item(s)."
+        )
+        # Generate embeddings for queries
+        query_embeds = await self.generate_embeddings(
+            queries, modality=modality, batch_size=batch_size
+        )
+        # Generate embeddings for candidates (always text)
+        candidate_embeds = await self.generate_embeddings(
+            candidates, modality="text", batch_size=batch_size
+        )
+        # Compute cosine similarity and scaled probabilities
+        cosine_sims = self.cosine_similarity(query_embeds, candidate_embeds)
+        logit_scale = np.exp(self.config.logit_scale)
+        probabilities = self.softmax(logit_scale * cosine_sims)
+        return {
+            "probabilities": probabilities.tolist(),
+            "cosine_similarities": cosine_sims.tolist(),
+        }
+    def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
+        """
+        Roughly estimate the total number of tokens in the given text(s).
+        Args:
+            input_data: A string or list of strings representing text input.
+        Returns:
+            Estimated token count (int).
+        Raises:
+            ValueError: If the input is not valid text data.
+        """
+        texts = self._validate_text_input(input_data)
+        # Very rough approximation: assume ~4 characters per token
+        total_chars = sum(len(t) for t in texts)
+        return max(1, round(total_chars / 4))
+    @staticmethod
+    def softmax(scores: np.ndarray) -> np.ndarray:
+        """
+        Apply softmax along the last dimension of the scores array.
+        Args:
+            scores: Numpy array of shape (..., num_candidates).
+        Returns:
+            Numpy array of softmax-normalized values, same shape as scores.
+        """
+        exp_scores = np.exp(scores - np.max(scores, axis=-1, keepdims=True))
+        return exp_scores / np.sum(exp_scores, axis=-1, keepdims=True)
+    @staticmethod
+    def cosine_similarity(
+        query_embeds: np.ndarray, candidate_embeds: np.ndarray
+    ) -> np.ndarray:
+        """
+        Compute the cosine similarity between two sets of vectors.
+        Args:
+            query_embeds: Numpy array of shape (num_queries, embed_dim).
+            candidate_embeds: Numpy array of shape (num_candidates, embed_dim).
+        Returns:
+            2D Numpy array of shape (num_queries, num_candidates)
+            containing cosine similarity scores.
+        """
+        # Normalize embeddings
+        query_norm = query_embeds / np.linalg.norm(query_embeds, axis=1, keepdims=True)
+        candidate_norm = candidate_embeds / np.linalg.norm(
+            candidate_embeds, axis=1, keepdims=True
+        )
+        return np.dot(query_norm, candidate_norm.T)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,14 @@

+[tool.poetry]
+name = "lightweight-embeddings"
+version = "0.1.0"
+description = "Fast, lightweight, multilingual embeddings solution."
+authors = ["Hieu Lam <lamhieu.vk@gmail.com>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.10"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio
+fastapi
+uvicorn
+requests
+pydantic
+sentence-transformers[onnx]==3.3.1
+sentencepiece==0.2.0
+torch==2.4.0
+transformers==4.45.0