From b0da945b06fa7d4082efe5608e2a9386c6ee5b9a Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Mon, 2 Jun 2025 07:50:50 -0600 Subject: [PATCH 1/2] feat: Add data labeling tabs to UI (#5410) * Add GenAI documentation page to Introduction section Co-Authored-By: Francisco Javier Arceo * Move GenAI page to getting-started directory and update SUMMARY.md Co-Authored-By: Francisco Javier Arceo * Update SUMMARY.md * hell 3.12.7 :wq d unstructured data transformation and Spark integration details to GenAI documentation Co-Authored-By: Francisco Javier Arceo * Update genai.md * Rename Document Labeling to Data Labeling with blue icon - Update sidebar navigation to show 'Data Labeling' instead of 'Document Labeling' - Add blue color (#006BB4) to Data Labeling icon to match other navbar icons - Update route from 'document-labeling' to 'data-labeling' - Update page title from 'Document Labeling for RAG' to 'Data Labeling for RAG' - Update custom tab types from DocumentLabeling to DataLabeling - Update test document text to reference 'data labeling functionality' Co-Authored-By: Francisco Javier Arceo Signed-off-by: Devin AI Co-Authored-By: Francisco Javier Arceo * Add tabbed interface to Data Labeling with RAG and Classification tabs - Implement separate RAG and Classification tabs for Data Labeling page - Add RAG Context section with prompt and query text areas - Separate chunk extraction and generation labels into distinct H2 sections - Keep existing 'Label Selected Text' button for chunk extraction - Add long text area for ground truth label in 'Label for Generation' section - Implement Classification tab with CSV data loading and editing functionality - Maintain all existing text selection and highlighting functionality - Follow established UI patterns using EUI components Co-Authored-By: Francisco Javier Arceo Co-Authored-By: Francisco Javier Arceo Signed-off-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> * Fix save functionality and improve RagTab layout - Simplify save function with setTimeout to avoid protobuf errors - Improve filename extraction for JSON download - Maintain conditional rendering of RAG Context after document loading - Keep existing layout with Step 1 and Step 2 sections - Preserve 'Label Selected Text' button functionality Signed-off-by: Devin AI Co-Authored-By: Francisco Javier Arceo Co-Authored-By: Francisco Javier Arceo * Fix lint-python and unit-test-ui formatting issues - Fix import sorting in feature_server.py (ruff I001) - Remove trailing comma in RagTab.tsx imports - Resolve CI formatting failures Signed-off-by: Devin AI Co-Authored-By: Francisco Javier Arceo Co-Authored-By: Francisco Javier Arceo --------- Signed-off-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Signed-off-by: Rob Howley --- docs/getting-started/genai.md | 93 --- sdk/python/feast/feature_server.py | 26 + sdk/python/feast/ui_server.py | 26 + ui/src/FeastUISansProviders.tsx | 2 +- ui/src/custom-tabs/types.ts | 12 +- ui/src/pages/Sidebar.tsx | 10 +- .../document-labeling/ClassificationTab.tsx | 310 +++++++++ .../DocumentLabelingPage.tsx | 399 +----------- ui/src/pages/document-labeling/RagTab.tsx | 615 ++++++++++++++++++ ui/src/test-document.txt | 2 +- 10 files changed, 1019 insertions(+), 476 deletions(-) create mode 100644 ui/src/pages/document-labeling/ClassificationTab.tsx create mode 100644 ui/src/pages/document-labeling/RagTab.tsx diff --git a/docs/getting-started/genai.md b/docs/getting-started/genai.md index 0b4e5225142..e6c915f7ff5 100644 --- a/docs/getting-started/genai.md +++ b/docs/getting-started/genai.md @@ -65,99 +65,6 @@ Feast supports transformations that can be used to: * Normalize and preprocess features before serving to LLMs * Apply custom transformations to adapt features for specific LLM requirements -## Getting Started with Feast for GenAI - -### Installation - -To use Feast with vector database support, install with the appropriate extras: - -```bash -# For Milvus support -pip install feast[milvus,nlp] - -# For Elasticsearch support -pip install feast[elasticsearch] - -# For Qdrant support -pip install feast[qdrant] - -# For SQLite support (Python 3.10 only) -pip install feast[sqlite_vec] -``` - -### Configuration - -Configure your feature store to use a vector database as the online store: - -```yaml -project: genai-project -provider: local -registry: data/registry.db -online_store: - type: milvus - path: data/online_store.db - vector_enabled: true - embedding_dim: 384 # Adjust based on your embedding model - index_type: "IVF_FLAT" - -offline_store: - type: file -entity_key_serialization_version: 3 -``` - -### Defining Vector Features - -Create feature views with vector index support: - -```python -from feast import FeatureView, Field, Entity -from feast.types import Array, Float32, String - -document = Entity( - name="document_id", - description="Document identifier", - join_keys=["document_id"], -) - -document_embeddings = FeatureView( - name="document_embeddings", - entities=[document], - schema=[ - Field( - name="vector", - dtype=Array(Float32), - vector_index=True, # Enable vector search - vector_search_metric="COSINE", # Similarity metric - ), - Field(name="document_id", dtype=String), - Field(name="content", dtype=String), - ], - source=document_source, - ttl=timedelta(days=30), -) -``` - -### Retrieving Similar Documents - -Use the `retrieve_online_documents_v2` method to find similar documents: - -```python -# Generate query embedding -query = "How does Feast support vector databases?" -query_embedding = embed_text(query) # Your embedding function - -# Retrieve similar documents -context_data = store.retrieve_online_documents_v2( - features=[ - "document_embeddings:vector", - "document_embeddings:document_id", - "document_embeddings:content", - ], - query=query_embedding, - top_k=3, - distance_metric='COSINE', -).to_df() -``` ## Use Cases ### Document Question-Answering diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py index 990fa4f2fb6..ee98a612c0c 100644 --- a/sdk/python/feast/feature_server.py +++ b/sdk/python/feast/feature_server.py @@ -105,6 +105,11 @@ class ReadDocumentRequest(BaseModel): file_path: str +class SaveDocumentRequest(BaseModel): + file_path: str + data: dict + + def _get_features(request: GetOnlineFeaturesRequest, store: "feast.FeatureStore"): if request.feature_service: feature_service = store.get_feature_service( @@ -375,6 +380,27 @@ async def read_document_endpoint(request: ReadDocumentRequest): except Exception as e: return {"error": str(e)} + @app.post("/save-document") + async def save_document_endpoint(request: SaveDocumentRequest): + try: + import json + import os + from pathlib import Path + + file_path = Path(request.file_path).resolve() + if not str(file_path).startswith(os.getcwd()): + return {"error": "Invalid file path"} + + base_name = file_path.stem + labels_file = file_path.parent / f"{base_name}-labels.json" + + with open(labels_file, "w", encoding="utf-8") as file: + json.dump(request.data, file, indent=2, ensure_ascii=False) + + return {"success": True, "saved_to": str(labels_file)} + except Exception as e: + return {"error": str(e)} + @app.get("/chat") async def chat_ui(): # Serve the chat UI diff --git a/sdk/python/feast/ui_server.py b/sdk/python/feast/ui_server.py index d852bb279cc..6883dc1105e 100644 --- a/sdk/python/feast/ui_server.py +++ b/sdk/python/feast/ui_server.py @@ -7,10 +7,16 @@ from fastapi import FastAPI, Response from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles +from pydantic import BaseModel import feast +class SaveDocumentRequest(BaseModel): + file_path: str + data: dict + + def get_app( store: "feast.FeatureStore", project_id: str, @@ -76,6 +82,26 @@ def read_registry(): media_type="application/octet-stream", ) + @app.post("/save-document") + async def save_document_endpoint(request: SaveDocumentRequest): + try: + import os + from pathlib import Path + + file_path = Path(request.file_path).resolve() + if not str(file_path).startswith(os.getcwd()): + return {"error": "Invalid file path"} + + base_name = file_path.stem + labels_file = file_path.parent / f"{base_name}-labels.json" + + with open(labels_file, "w", encoding="utf-8") as file: + json.dump(request.data, file, indent=2, ensure_ascii=False) + + return {"success": True, "saved_to": str(labels_file)} + except Exception as e: + return {"error": str(e)} + # For all other paths (such as paths that would otherwise be handled by react router), pass to React @app.api_route("/p/{path_name:path}", methods=["GET"]) def catch_all(): diff --git a/ui/src/FeastUISansProviders.tsx b/ui/src/FeastUISansProviders.tsx index 6ce5866e008..2c00a985c09 100644 --- a/ui/src/FeastUISansProviders.tsx +++ b/ui/src/FeastUISansProviders.tsx @@ -147,7 +147,7 @@ const FeastUISansProvidersInner = ({ element={} /> } /> } /> diff --git a/ui/src/custom-tabs/types.ts b/ui/src/custom-tabs/types.ts index be8c19651a0..8ee5f659b1c 100644 --- a/ui/src/custom-tabs/types.ts +++ b/ui/src/custom-tabs/types.ts @@ -136,18 +136,18 @@ interface DatasetCustomTabRegistrationInterface }: DatasetCustomTabProps) => JSX.Element; } -// Type for Document Labeling Custom Tabs -interface DocumentLabelingCustomTabProps { +// Type for Data Labeling Custom Tabs +interface DataLabelingCustomTabProps { id: string | undefined; feastObjectQuery: RegularFeatureViewQueryReturnType; } -interface DocumentLabelingCustomTabRegistrationInterface +interface DataLabelingCustomTabRegistrationInterface extends CustomTabRegistrationInterface { Component: ({ id, feastObjectQuery, ...args - }: DocumentLabelingCustomTabProps) => JSX.Element; + }: DataLabelingCustomTabProps) => JSX.Element; } export type { @@ -171,6 +171,6 @@ export type { FeatureCustomTabProps, DatasetCustomTabRegistrationInterface, DatasetCustomTabProps, - DocumentLabelingCustomTabRegistrationInterface, - DocumentLabelingCustomTabProps, + DataLabelingCustomTabRegistrationInterface, + DataLabelingCustomTabProps, }; diff --git a/ui/src/pages/Sidebar.tsx b/ui/src/pages/Sidebar.tsx index afc6c43acb4..d7a5a54cda0 100644 --- a/ui/src/pages/Sidebar.tsx +++ b/ui/src/pages/Sidebar.tsx @@ -132,13 +132,13 @@ const SideNav = () => { isSelected: useMatchSubpath(`${baseUrl}/data-set`), }, { - name: "Document Labeling", - id: htmlIdGenerator("documentLabeling")(), - icon: , + name: "Data Labeling", + id: htmlIdGenerator("dataLabeling")(), + icon: , renderItem: (props) => ( - + ), - isSelected: useMatchSubpath(`${baseUrl}/document-labeling`), + isSelected: useMatchSubpath(`${baseUrl}/data-labeling`), }, { name: "Permissions", diff --git a/ui/src/pages/document-labeling/ClassificationTab.tsx b/ui/src/pages/document-labeling/ClassificationTab.tsx new file mode 100644 index 00000000000..302b03cd9fa --- /dev/null +++ b/ui/src/pages/document-labeling/ClassificationTab.tsx @@ -0,0 +1,310 @@ +import React, { useState } from "react"; +import { + EuiPageSection, + EuiCallOut, + EuiSpacer, + EuiFlexGroup, + EuiFlexItem, + EuiFormRow, + EuiFieldText, + EuiButton, + EuiPanel, + EuiTitle, + EuiText, + EuiTable, + EuiTableHeader, + EuiTableHeaderCell, + EuiTableBody, + EuiTableRow, + EuiTableRowCell, + EuiSelect, + EuiLoadingSpinner, +} from "@elastic/eui"; + +interface ClassificationData { + id: number; + text: string; + currentClass: string; + originalClass?: string; +} + +const ClassificationTab = () => { + const [csvPath, setCsvPath] = useState("./src/sample-data.csv"); + const [isLoading, setIsLoading] = useState(false); + const [data, setData] = useState([]); + const [error, setError] = useState(null); + const [availableClasses] = useState(["positive", "negative", "neutral"]); + + const loadCsvData = async () => { + if (!csvPath) return; + + setIsLoading(true); + setError(null); + + try { + if (csvPath === "./src/sample-data.csv") { + const sampleData: ClassificationData[] = [ + { + id: 1, + text: "This product is amazing! I love the quality and design.", + currentClass: "positive", + originalClass: "positive", + }, + { + id: 2, + text: "The service was terrible and the food was cold.", + currentClass: "negative", + originalClass: "negative", + }, + { + id: 3, + text: "It's an okay product, nothing special but does the job.", + currentClass: "neutral", + originalClass: "neutral", + }, + { + id: 4, + text: "Excellent customer support and fast delivery!", + currentClass: "positive", + originalClass: "positive", + }, + { + id: 5, + text: "I'm not sure how I feel about this purchase.", + currentClass: "neutral", + originalClass: "positive", + }, + ]; + + setData(sampleData); + } else { + throw new Error( + "CSV file not found. Please use the sample data path: ./src/sample-data.csv", + ); + } + } catch (err) { + setError( + err instanceof Error + ? err.message + : "An error occurred while loading the CSV data", + ); + } finally { + setIsLoading(false); + } + }; + + const handleClassChange = (id: number, newClass: string) => { + setData( + data.map((item) => + item.id === id ? { ...item, currentClass: newClass } : item, + ), + ); + }; + + const getChangedItems = () => { + return data.filter((item) => item.currentClass !== item.originalClass); + }; + + const resetChanges = () => { + setData( + data.map((item) => ({ ...item, currentClass: item.originalClass || "" })), + ); + }; + + const saveChanges = () => { + const changedItems = getChangedItems(); + console.log("Saving classification changes:", changedItems); + alert(`Saved ${changedItems.length} classification changes!`); + }; + + const columns = [ + { + field: "id", + name: "ID", + width: "60px", + }, + { + field: "text", + name: "Text", + width: "60%", + }, + { + field: "originalClass", + name: "Original Class", + width: "15%", + }, + { + field: "currentClass", + name: "Current Class", + width: "20%", + }, + ]; + + return ( + + +

+ Load a CSV file containing text samples and edit their classification + labels. This helps improve your classification models by providing + corrected training data. +

+
+ + + + + + + setCsvPath(e.target.value)} + /> + + + + + + Load CSV Data + + + + + + + + {isLoading && ( + + + + + + Loading CSV data... + + + )} + + {error && ( + +

{error}

+
+ )} + + {data.length > 0 && ( + <> + + + +

Classification Data ({data.length} samples)

+
+
+ + + + + Reset Changes + + + + + Save Changes ({getChangedItems().length}) + + + + +
+ + + + + + + {columns.map((column, index) => ( + + {column.name} + + ))} + + + {data.map((item) => ( + + {item.id} + + {item.text} + + + + {item.originalClass} + + + + ({ + value: cls, + text: cls, + }))} + value={item.currentClass} + onChange={(e) => + handleClassChange(item.id, e.target.value) + } + compressed + /> + + + ))} + + + + + {getChangedItems().length > 0 && ( + <> + + +

+ You have unsaved changes. Click "Save Changes" to persist your + modifications. +

+
+ + )} + + )} +
+ ); +}; + +export default ClassificationTab; diff --git a/ui/src/pages/document-labeling/DocumentLabelingPage.tsx b/ui/src/pages/document-labeling/DocumentLabelingPage.tsx index 9ec4c090a6f..5563d6328c1 100644 --- a/ui/src/pages/document-labeling/DocumentLabelingPage.tsx +++ b/ui/src/pages/document-labeling/DocumentLabelingPage.tsx @@ -6,398 +6,57 @@ import { EuiPageHeader, EuiTitle, EuiSpacer, - EuiFlexGroup, - EuiFlexItem, - EuiButton, - EuiFieldText, - EuiFormRow, - EuiPanel, - EuiText, - EuiCallOut, - EuiLoadingSpinner, - EuiButtonGroup, - EuiCode, + EuiTabs, + EuiTab, } from "@elastic/eui"; - -interface DocumentContent { - content: string; - file_path: string; -} - -interface TextSelection { - text: string; - start: number; - end: number; -} - -interface DocumentLabel { - text: string; - start: number; - end: number; - label: string; - timestamp: number; -} +import RagTab from "./RagTab"; +import ClassificationTab from "./ClassificationTab"; const DocumentLabelingPage = () => { - const [filePath, setFilePath] = useState("./src/test-document.txt"); - const [selectedText, setSelectedText] = useState(null); - const [labelingMode, setLabelingMode] = useState("relevant"); - const [labels, setLabels] = useState([]); - const [isLoading, setIsLoading] = useState(false); - const [documentContent, setDocumentContent] = - useState(null); - const [error, setError] = useState(null); - - const loadDocument = async () => { - if (!filePath) return; - - setIsLoading(true); - setError(null); - - try { - if (filePath === "./src/test-document.txt") { - const testContent = `This is a sample document for testing the document labeling functionality in Feast UI. - -The document contains multiple paragraphs and sections that can be used to test the text highlighting and labeling features. - -This paragraph discusses machine learning and artificial intelligence concepts. It covers topics like neural networks, deep learning, and natural language processing. Users should be able to select and label relevant portions of this text for RAG retrieval systems. - -Another section focuses on data engineering and ETL pipelines. This content explains how to process large datasets and build scalable data infrastructure. The labeling system should allow users to mark this as relevant or irrelevant for their specific use cases. - -The final paragraph contains information about feature stores and real-time machine learning systems. This text can be used to test the highlighting functionality and ensure that labels are properly stored and displayed in the user interface.`; - - setDocumentContent({ - content: testContent, - file_path: filePath, - }); - } else { - throw new Error( - "Document not found. Please use the test document path: ./src/test-document.txt", - ); - } - } catch (err) { - setError( - err instanceof Error - ? err.message - : "An error occurred while loading the document", - ); - } finally { - setIsLoading(false); - } - }; - - const handleTextSelection = () => { - const selection = window.getSelection(); - if (selection && selection.toString().trim() && documentContent) { - const selectedTextContent = selection.toString().trim(); - const range = selection.getRangeAt(0); - - const textContent = documentContent.content; - - let startIndex = -1; - let endIndex = -1; - - const rangeText = range.toString(); - if (rangeText) { - startIndex = textContent.indexOf(rangeText); - if (startIndex !== -1) { - endIndex = startIndex + rangeText.length; - } - } - - if (startIndex !== -1 && endIndex !== -1) { - setSelectedText({ - text: selectedTextContent, - start: startIndex, - end: endIndex, - }); - } - } - }; - - const handleLabelSelection = () => { - if (selectedText) { - const newLabel: DocumentLabel = { - text: selectedText.text, - start: selectedText.start, - end: selectedText.end, - label: labelingMode, - timestamp: Date.now(), - }; - - setLabels([...labels, newLabel]); - setSelectedText(null); - - const selection = window.getSelection(); - if (selection) { - selection.removeAllRanges(); - } - } - }; - - const handleRemoveLabel = (index: number) => { - setLabels(labels.filter((_: DocumentLabel, i: number) => i !== index)); - }; + const [selectedTab, setSelectedTab] = useState("rag"); - const renderDocumentWithHighlights = ( - content: string, - ): (string | React.ReactElement)[] => { - const allHighlights = [...labels]; - - if (selectedText) { - allHighlights.push({ - text: selectedText.text, - start: selectedText.start, - end: selectedText.end, - label: "temp-selection", - timestamp: 0, - }); - } - - if (allHighlights.length === 0) { - return [content]; - } - - const sortedHighlights = [...allHighlights].sort( - (a, b) => a.start - b.start, - ); - const result: (string | React.ReactElement)[] = []; - let lastIndex = 0; - - sortedHighlights.forEach((highlight, index) => { - result.push(content.slice(lastIndex, highlight.start)); - - let highlightColor = "#d4edda"; - let borderColor = "#c3e6cb"; - - if (highlight.label === "temp-selection") { - highlightColor = "#add8e6"; - borderColor = "#87ceeb"; - } else if (highlight.label === "irrelevant") { - highlightColor = "#f8d7da"; - borderColor = "#f5c6cb"; - } - - result.push( - - {highlight.text} - , - ); - - lastIndex = highlight.end; - }); - - result.push(content.slice(lastIndex)); - return result; - }; - - const labelingOptions = [ + const tabs = [ { - id: "relevant", - label: "Relevant", + id: "rag", + name: "RAG", + content: , }, { - id: "irrelevant", - label: "Irrelevant", + id: "classification", + name: "Classification", + content: , }, ]; + const selectedTabContent = tabs.find( + (tab) => tab.id === selectedTab, + )?.content; + return ( -

Document Labeling for RAG

+

Data Labeling

- - -

- Load a document file and highlight text chunks to label them as - relevant or irrelevant for RAG retrieval. This helps improve the - quality of your retrieval system by providing human feedback. -

-
- - - - - - - setFilePath(e.target.value)} - /> - - - - - - Load Document - - - - - - - - {isLoading && ( - - - - - - Loading document... - - - )} - - {error && ( - + {tabs.map((tab) => ( + setSelectedTab(tab.id)} + isSelected={tab.id === selectedTab} > -

{error}

-
- )} - - {documentContent && ( - <> - - - - Labeling mode: - - - - setLabelingMode(id)} - buttonSize="s" - /> - - - - Label Selected Text - - - - - - - {selectedText && ( - - {selectedText.text} - - )} - - + {tab.name} + + ))} + - - -

Document Content

-
- - -
- {renderDocumentWithHighlights(documentContent.content)} -
-
-
+ - {labels.length > 0 && ( - <> - - - -

Labels ({labels.length})

-
- - {labels.map((label, index) => ( - - - - {label.label} - - - - - "{label.text.substring(0, 100)} - {label.text.length > 100 ? "..." : ""}" - - - - handleRemoveLabel(index)} - > - Remove - - - - ))} -
- - )} - - )} -
+ {selectedTabContent}
diff --git a/ui/src/pages/document-labeling/RagTab.tsx b/ui/src/pages/document-labeling/RagTab.tsx new file mode 100644 index 00000000000..ae5fd22aea7 --- /dev/null +++ b/ui/src/pages/document-labeling/RagTab.tsx @@ -0,0 +1,615 @@ +import React, { useState } from "react"; +import { + EuiPageSection, + EuiCallOut, + EuiSpacer, + EuiFlexGroup, + EuiFlexItem, + EuiFormRow, + EuiFieldText, + EuiButton, + EuiPanel, + EuiTitle, + EuiText, + EuiLoadingSpinner, + EuiButtonGroup, + EuiCode, + EuiTextArea, +} from "@elastic/eui"; +import { useTheme } from "../../contexts/ThemeContext"; + +interface DocumentContent { + content: string; + file_path: string; +} + +interface TextSelection { + text: string; + start: number; + end: number; +} + +interface DocumentLabel { + text: string; + start: number; + end: number; + label: string; + timestamp: number; + groundTruthLabel: string; +} + +const RagTab = () => { + const { colorMode } = useTheme(); + const [filePath, setFilePath] = useState("./src/test-document.txt"); + const [selectedText, setSelectedText] = useState(null); + const [labelingMode, setLabelingMode] = useState("relevant"); + const [labels, setLabels] = useState([]); + const [isLoading, setIsLoading] = useState(false); + const [documentContent, setDocumentContent] = + useState(null); + const [error, setError] = useState(null); + const [prompt, setPrompt] = useState(""); + const [query, setQuery] = useState(""); + const [groundTruthLabel, setGroundTruthLabel] = useState(""); + const [isSaving, setIsSaving] = useState(false); + const [hasUnsavedChanges, setHasUnsavedChanges] = useState(false); + + const loadDocument = async () => { + if (!filePath) return; + + setIsLoading(true); + setError(null); + + try { + if (filePath === "./src/test-document.txt") { + const testContent = `This is a sample document for testing the data labeling functionality in Feast UI. + +The document contains multiple paragraphs and sections that can be used to test the text highlighting and labeling features. + +This paragraph discusses machine learning and artificial intelligence concepts. It covers topics like neural networks, deep learning, and natural language processing. Users should be able to select and label relevant portions of this text for RAG retrieval systems. + +Another section focuses on data engineering and ETL pipelines. This content explains how to process large datasets and build scalable data infrastructure. The labeling system should allow users to mark this as relevant or irrelevant for their specific use cases. + +The final paragraph contains information about feature stores and real-time machine learning systems. This text can be used to test the highlighting functionality and ensure that labels are properly stored and displayed in the user interface.`; + + setDocumentContent({ + content: testContent, + file_path: filePath, + }); + + loadSavedLabels(); + } else { + throw new Error( + "Document not found. Please use the test document path: ./src/test-document.txt", + ); + } + } catch (err) { + setError( + err instanceof Error + ? err.message + : "An error occurred while loading the document", + ); + } finally { + setIsLoading(false); + } + }; + + const handleTextSelection = () => { + const selection = window.getSelection(); + if (selection && selection.toString().trim() && documentContent) { + const selectedTextContent = selection.toString().trim(); + const range = selection.getRangeAt(0); + + const textContent = documentContent.content; + + let startIndex = -1; + let endIndex = -1; + + const rangeText = range.toString(); + if (rangeText) { + startIndex = textContent.indexOf(rangeText); + if (startIndex !== -1) { + endIndex = startIndex + rangeText.length; + } + } + + if (startIndex !== -1 && endIndex !== -1) { + setSelectedText({ + text: selectedTextContent, + start: startIndex, + end: endIndex, + }); + } + } + }; + + const handleLabelSelection = () => { + if (selectedText) { + const newLabel: DocumentLabel = { + text: selectedText.text, + start: selectedText.start, + end: selectedText.end, + label: labelingMode, + timestamp: Date.now(), + groundTruthLabel: groundTruthLabel, + }; + + setLabels([...labels, newLabel]); + setSelectedText(null); + setHasUnsavedChanges(true); + + const selection = window.getSelection(); + if (selection) { + selection.removeAllRanges(); + } + } + }; + + const handleRemoveLabel = (index: number) => { + setLabels(labels.filter((_: DocumentLabel, i: number) => i !== index)); + setHasUnsavedChanges(true); + }; + + const saveLabels = () => { + setIsSaving(true); + + setTimeout(() => { + try { + const saveData = { + filePath: filePath, + prompt: prompt, + query: query, + groundTruthLabel: groundTruthLabel, + labels: labels, + timestamp: new Date().toISOString(), + }; + + const pathParts = filePath.split("/"); + const filename = pathParts[pathParts.length - 1]; + const nameWithoutExt = filename.replace(/\.[^/.]+$/, ""); + const downloadFilename = `${nameWithoutExt}-labels.json`; + + const jsonString = JSON.stringify(saveData, null, 2); + const blob = new Blob([jsonString], { type: "application/json" }); + const url = URL.createObjectURL(blob); + + const link = document.createElement("a"); + link.href = url; + link.download = downloadFilename; + link.style.display = "none"; + + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + URL.revokeObjectURL(url); + + setHasUnsavedChanges(false); + alert( + `Successfully saved ${labels.length} labels. File downloaded as ${downloadFilename}`, + ); + } catch (error) { + console.error("Error saving labels:", error); + alert("Error saving labels. Please try again."); + } finally { + setIsSaving(false); + } + }, 100); + }; + + const loadSavedLabels = () => { + try { + const savedData = JSON.parse(localStorage.getItem("ragLabels") || "[]"); + const fileData = savedData.find( + (item: any) => item.filePath === filePath, + ); + + if (fileData) { + setPrompt(fileData.prompt || ""); + setQuery(fileData.query || ""); + setGroundTruthLabel(fileData.groundTruthLabel || ""); + setLabels(fileData.labels || []); + setHasUnsavedChanges(false); + } + } catch (error) { + console.error("Error loading saved labels:", error); + } + }; + + const renderDocumentWithHighlights = ( + content: string, + ): (string | React.ReactElement)[] => { + const allHighlights = [...labels]; + + if (selectedText) { + allHighlights.push({ + text: selectedText.text, + start: selectedText.start, + end: selectedText.end, + label: "temp-selection", + timestamp: 0, + groundTruthLabel: "", + }); + } + + if (allHighlights.length === 0) { + return [content]; + } + + const sortedHighlights = [...allHighlights].sort( + (a, b) => a.start - b.start, + ); + const result: (string | React.ReactElement)[] = []; + let lastIndex = 0; + + sortedHighlights.forEach((highlight, index) => { + result.push(content.slice(lastIndex, highlight.start)); + + let highlightColor, borderColor; + + if (highlight.label === "temp-selection") { + if (colorMode === "dark") { + highlightColor = "#1a4d66"; + borderColor = "#2d6b8a"; + } else { + highlightColor = "#add8e6"; + borderColor = "#87ceeb"; + } + } else if (highlight.label === "irrelevant") { + if (colorMode === "dark") { + highlightColor = "#4d1a1a"; + borderColor = "#6b2d2d"; + } else { + highlightColor = "#f8d7da"; + borderColor = "#f5c6cb"; + } + } else { + if (colorMode === "dark") { + highlightColor = "#1a4d1a"; + borderColor = "#2d6b2d"; + } else { + highlightColor = "#d4edda"; + borderColor = "#c3e6cb"; + } + } + + result.push( + + {highlight.text} + , + ); + + lastIndex = highlight.end; + }); + + result.push(content.slice(lastIndex)); + return result; + }; + + const labelingOptions = [ + { + id: "relevant", + label: "Relevant", + }, + { + id: "irrelevant", + label: "Irrelevant", + }, + ]; + + return ( + + +

+ Load a document and highlight text chunks to label them for chunk + extraction/retrieval. Add prompt and query context, then provide + ground truth labels for generation evaluation. +

+
+ + + + + + + setFilePath(e.target.value)} + /> + + + + + + Load Document + + + + + + + + {isLoading && ( + + + + + + Loading document... + + + )} + + {error && ( + +

{error}

+
+ )} + + {documentContent && ( + <> + + +

RAG Context

+
+ + + + + { + setPrompt(e.target.value); + setHasUnsavedChanges(true); + }} + rows={3} + /> + + + + + { + setQuery(e.target.value); + setHasUnsavedChanges(true); + }} + rows={3} + /> + + + +
+ + + + +

Step 1: Label for Chunk Extraction

+
+ + + + + + setLabelingMode(id)} + buttonSize="s" + /> + + + + + + Label Selected Text + + + + + + + + {selectedText && ( + + {selectedText.text} + + )} + + + + + +

Document Content

+
+ + +
+ {renderDocumentWithHighlights(documentContent.content)} +
+
+
+ + + + +

Step 2: Label for Generation

+
+ + + + { + setGroundTruthLabel(e.target.value); + setHasUnsavedChanges(true); + }} + rows={3} + /> + + + + + + + + Save Labels + + + + + + + {(labels.length > 0 || groundTruthLabel || prompt || query) && ( + <> + +

+ Click "Save Labels" to download your labeled data as a JSON + file. +

+
+ + + )} + + + + {hasUnsavedChanges && ( + <> + +

+ You have unsaved changes. Click "Save Labels" to persist your + work. +

+
+ + + )} + + {labels.length > 0 && ( + <> + + + +

Extracted Chunk Labels ({labels.length})

+
+ + {labels.map((label, index) => ( + + + + Chunk: {label.label} + + + {label.groundTruthLabel && ( + + + GT: {label.groundTruthLabel} + + + )} + + + "{label.text.substring(0, 80)} + {label.text.length > 80 ? "..." : ""}" + + + + handleRemoveLabel(index)} + > + Remove + + + + ))} +
+ + )} + + )} +
+ ); +}; + +export default RagTab; diff --git a/ui/src/test-document.txt b/ui/src/test-document.txt index 9a25d0c3d95..eae58809242 100644 --- a/ui/src/test-document.txt +++ b/ui/src/test-document.txt @@ -1,4 +1,4 @@ -This is a sample document for testing the document labeling functionality in Feast UI. +This is a sample document for testing the data labeling functionality in Feast UI. The document contains multiple paragraphs and sections that can be used to test the text highlighting and labeling features. From 210da6dcf86cb0e7413c530ff0f5c4870787f3a1 Mon Sep 17 00:00:00 2001 From: Rob Howley Date: Mon, 2 Jun 2025 13:22:11 -0400 Subject: [PATCH 2/2] fix: push_async feature store signature Signed-off-by: Rob Howley --- sdk/python/feast/feature_store.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 8b936e899c5..5cc232d5fca 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1495,6 +1495,7 @@ async def push_async( df: pd.DataFrame, allow_registry_cache: bool = True, to: PushMode = PushMode.ONLINE, + **kwargs, ): fvs = self._fvs_for_push_source_or_raise(push_source_name, allow_registry_cache)