diff --git a/examples/rag-retriever/rag_feast_docembedder.ipynb b/examples/rag-retriever/rag_feast_docembedder.ipynb new file mode 100644 index 00000000000..47728fef556 --- /dev/null +++ b/examples/rag-retriever/rag_feast_docembedder.ipynb @@ -0,0 +1,648 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2855fd1a", + "metadata": {}, + "outputs": [], + "source": [ + "# %pip install --quiet feast[milvus] sentence-transformers datasets\n", + "# %pip install bigtree==0.19.2\n", + "# %pip install marshmallow==3.10.0 " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3bb14cf1", + "metadata": {}, + "outputs": [], + "source": [ + "from datasets import load_dataset\n", + "# load wikipedia dataset - 1% of the training split\n", + "dataset = load_dataset(\n", + " \"facebook/wiki_dpr\",\n", + " \"psgs_w100.nq.exact\",\n", + " split=\"train[:1%]\",\n", + " with_index=False,\n", + " trust_remote_code=True,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "92a5e18c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | id | \n", + "text | \n", + "title | \n", + "embeddings | \n", + "
|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "Aaron Aaron ( or ; \"Ahärôn\") is a prophet, hig... | \n", + "Aaron | \n", + "[0.013342111, 0.58217376, -0.31309745, -0.6991... | \n", + "
| 1 | \n", + "2 | \n", + "God at Sinai granted Aaron the priesthood for ... | \n", + "Aaron | \n", + "[-0.19236332, 0.539003, -0.5652932, -0.5195250... | \n", + "
| 2 | \n", + "3 | \n", + "his rod turn into a snake. Then he stretched o... | \n", + "Aaron | \n", + "[-0.23045847, 0.28877887, -0.3449004, -0.14077... | \n", + "
| 3 | \n", + "4 | \n", + "however, Aaron and Hur remained below to look ... | \n", + "Aaron | \n", + "[0.107315615, 0.5992388, -0.37498242, -0.53419... | \n", + "
| 4 | \n", + "5 | \n", + "Aaron and his sons to the priesthood, and arra... | \n", + "Aaron | \n", + "[0.32623303, 0.51600194, -0.5568064, -0.494033... | \n", + "
| \n", + " | passage_id | \n", + "text | \n", + "embedding | \n", + "event_timestamp | \n", + "source_id | \n", + "
|---|---|---|---|---|---|
| 0 | \n", + "1_0 | \n", + "Aaron Aaron ( or ; \"Ahärôn\") is a prophet, hig... | \n", + "[0.002557202707976103, 0.12003513425588608, -0... | \n", + "2026-02-11 12:26:29.098091+00:00 | \n", + "1 | \n", + "
| 1 | \n", + "1_1 | \n", + "Israelites, Aaron served as his brother's spok... | \n", + "[-0.01853535883128643, 0.13290095329284668, -0... | \n", + "2026-02-11 12:26:29.098091+00:00 | \n", + "1 | \n", + "
| 2 | \n", + "2_0 | \n", + "God at Sinai granted Aaron the priesthood for ... | \n", + "[0.014343681745231152, 0.10290483385324478, -0... | \n", + "2026-02-11 12:26:29.098091+00:00 | \n", + "2 | \n", + "
| 3 | \n", + "2_1 | \n", + "could not speak well, God appointed Aaron as M... | \n", + "[0.0504433810710907, 0.1175316572189331, -0.00... | \n", + "2026-02-11 12:26:29.098091+00:00 | \n", + "2 | \n", + "
| 4 | \n", + "3_0 | \n", + "his rod turn into a snake. Then he stretched o... | \n", + "[-0.06228446215391159, 0.10652626305818558, 0.... | \n", + "2026-02-11 12:26:29.098091+00:00 | \n", + "3 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 195 | \n", + "98_1 | \n", + "State College before entering Columbia Univers... | \n", + "[0.03597380220890045, 0.04296444356441498, 0.0... | \n", + "2026-02-11 12:26:29.098091+00:00 | \n", + "98 | \n", + "
| 196 | \n", + "99_0 | \n", + "joined the Merchant Marine to earn money to co... | \n", + "[0.05798682942986488, -0.007653537206351757, -... | \n", + "2026-02-11 12:26:29.098091+00:00 | \n", + "99 | \n", + "
| 197 | \n", + "99_1 | \n", + "spent several months in a mental institution a... | \n", + "[0.05905637890100479, 0.030195411294698715, -0... | \n", + "2026-02-11 12:26:29.098091+00:00 | \n", + "99 | \n", + "
| 198 | \n", + "100_0 | \n", + "harboring stolen goods in his dorm room. It wa... | \n", + "[-0.005938616115599871, 0.02653227001428604, -... | \n", + "2026-02-11 12:26:29.098091+00:00 | \n", + "100 | \n", + "
| 199 | \n", + "100_1 | \n", + "Eugene to party meetings. Ginsberg later said ... | \n", + "[0.007752032019197941, 0.06832979619503021, 0.... | \n", + "2026-02-11 12:26:29.098091+00:00 | \n", + "100 | \n", + "
200 rows × 5 columns
\n", + "