Text Embeddings Inference
TEI Usages
Local CUDA
# Install sub sudo apt install libssl-dev gcc -y curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh source "$HOME/.cargo/env" # Clone git clone https://github.com/huggingface/text-embeddings-inference cd text-embeddings-inference # Install main export PATH=$PATH:/usr/local/cuda/bin # On Turing GPUs (T4, RTX 2000 series ... ) cargo install --path router -F candle-cuda-turing --no-default-features # On Ampere and Hopper cargo install --path router -F candle-cuda --no-default-features # Run model=jinaai/jina-embeddings-v2-base-en text-embeddings-router --model-id $model --port 8080
Docker
model=efederici/multilingual-e5-small-4096 volume=data docker run --name tei -d --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.3.0 --model-id $model
Test
curl 127.0.0.1:8080/embed \ -X POST \ -d '{"inputs":"What is Deep Learning?"}' \ -H 'Content-Type: application/json'
TEI Pooling
default is
1_Pooling
folder—pool
- mean
- cls