diff --git a/docs/getting-started/genai.md b/docs/getting-started/genai.md index 0b4e5225142..e6c915f7ff5 100644 --- a/docs/getting-started/genai.md +++ b/docs/getting-started/genai.md @@ -65,99 +65,6 @@ Feast supports transformations that can be used to: * Normalize and preprocess features before serving to LLMs * Apply custom transformations to adapt features for specific LLM requirements -## Getting Started with Feast for GenAI - -### Installation - -To use Feast with vector database support, install with the appropriate extras: - -```bash -# For Milvus support -pip install feast[milvus,nlp] - -# For Elasticsearch support -pip install feast[elasticsearch] - -# For Qdrant support -pip install feast[qdrant] - -# For SQLite support (Python 3.10 only) -pip install feast[sqlite_vec] -``` - -### Configuration - -Configure your feature store to use a vector database as the online store: - -```yaml -project: genai-project -provider: local -registry: data/registry.db -online_store: - type: milvus - path: data/online_store.db - vector_enabled: true - embedding_dim: 384 # Adjust based on your embedding model - index_type: "IVF_FLAT" - -offline_store: - type: file -entity_key_serialization_version: 3 -``` - -### Defining Vector Features - -Create feature views with vector index support: - -```python -from feast import FeatureView, Field, Entity -from feast.types import Array, Float32, String - -document = Entity( - name="document_id", - description="Document identifier", - join_keys=["document_id"], -) - -document_embeddings = FeatureView( - name="document_embeddings", - entities=[document], - schema=[ - Field( - name="vector", - dtype=Array(Float32), - vector_index=True, # Enable vector search - vector_search_metric="COSINE", # Similarity metric - ), - Field(name="document_id", dtype=String), - Field(name="content", dtype=String), - ], - source=document_source, - ttl=timedelta(days=30), -) -``` - -### Retrieving Similar Documents - -Use the `retrieve_online_documents_v2` method to find similar documents: - -```python -# Generate query embedding -query = "How does Feast support vector databases?" -query_embedding = embed_text(query) # Your embedding function - -# Retrieve similar documents -context_data = store.retrieve_online_documents_v2( - features=[ - "document_embeddings:vector", - "document_embeddings:document_id", - "document_embeddings:content", - ], - query=query_embedding, - top_k=3, - distance_metric='COSINE', -).to_df() -``` ## Use Cases ### Document Question-Answering diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py index 990fa4f2fb6..ee98a612c0c 100644 --- a/sdk/python/feast/feature_server.py +++ b/sdk/python/feast/feature_server.py @@ -105,6 +105,11 @@ class ReadDocumentRequest(BaseModel): file_path: str +class SaveDocumentRequest(BaseModel): + file_path: str + data: dict + + def _get_features(request: GetOnlineFeaturesRequest, store: "feast.FeatureStore"): if request.feature_service: feature_service = store.get_feature_service( @@ -375,6 +380,27 @@ async def read_document_endpoint(request: ReadDocumentRequest): except Exception as e: return {"error": str(e)} + @app.post("/save-document") + async def save_document_endpoint(request: SaveDocumentRequest): + try: + import json + import os + from pathlib import Path + + file_path = Path(request.file_path).resolve() + if not str(file_path).startswith(os.getcwd()): + return {"error": "Invalid file path"} + + base_name = file_path.stem + labels_file = file_path.parent / f"{base_name}-labels.json" + + with open(labels_file, "w", encoding="utf-8") as file: + json.dump(request.data, file, indent=2, ensure_ascii=False) + + return {"success": True, "saved_to": str(labels_file)} + except Exception as e: + return {"error": str(e)} + @app.get("/chat") async def chat_ui(): # Serve the chat UI diff --git a/sdk/python/feast/ui_server.py b/sdk/python/feast/ui_server.py index d852bb279cc..6883dc1105e 100644 --- a/sdk/python/feast/ui_server.py +++ b/sdk/python/feast/ui_server.py @@ -7,10 +7,16 @@ from fastapi import FastAPI, Response from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles +from pydantic import BaseModel import feast +class SaveDocumentRequest(BaseModel): + file_path: str + data: dict + + def get_app( store: "feast.FeatureStore", project_id: str, @@ -76,6 +82,26 @@ def read_registry(): media_type="application/octet-stream", ) + @app.post("/save-document") + async def save_document_endpoint(request: SaveDocumentRequest): + try: + import os + from pathlib import Path + + file_path = Path(request.file_path).resolve() + if not str(file_path).startswith(os.getcwd()): + return {"error": "Invalid file path"} + + base_name = file_path.stem + labels_file = file_path.parent / f"{base_name}-labels.json" + + with open(labels_file, "w", encoding="utf-8") as file: + json.dump(request.data, file, indent=2, ensure_ascii=False) + + return {"success": True, "saved_to": str(labels_file)} + except Exception as e: + return {"error": str(e)} + # For all other paths (such as paths that would otherwise be handled by react router), pass to React @app.api_route("/p/{path_name:path}", methods=["GET"]) def catch_all(): diff --git a/ui/src/FeastUISansProviders.tsx b/ui/src/FeastUISansProviders.tsx index 6ce5866e008..2c00a985c09 100644 --- a/ui/src/FeastUISansProviders.tsx +++ b/ui/src/FeastUISansProviders.tsx @@ -147,7 +147,7 @@ const FeastUISansProvidersInner = ({ element={} /> } /> } /> diff --git a/ui/src/custom-tabs/types.ts b/ui/src/custom-tabs/types.ts index be8c19651a0..8ee5f659b1c 100644 --- a/ui/src/custom-tabs/types.ts +++ b/ui/src/custom-tabs/types.ts @@ -136,18 +136,18 @@ interface DatasetCustomTabRegistrationInterface }: DatasetCustomTabProps) => JSX.Element; } -// Type for Document Labeling Custom Tabs -interface DocumentLabelingCustomTabProps { +// Type for Data Labeling Custom Tabs +interface DataLabelingCustomTabProps { id: string | undefined; feastObjectQuery: RegularFeatureViewQueryReturnType; } -interface DocumentLabelingCustomTabRegistrationInterface +interface DataLabelingCustomTabRegistrationInterface extends CustomTabRegistrationInterface { Component: ({ id, feastObjectQuery, ...args - }: DocumentLabelingCustomTabProps) => JSX.Element; + }: DataLabelingCustomTabProps) => JSX.Element; } export type { @@ -171,6 +171,6 @@ export type { FeatureCustomTabProps, DatasetCustomTabRegistrationInterface, DatasetCustomTabProps, - DocumentLabelingCustomTabRegistrationInterface, - DocumentLabelingCustomTabProps, + DataLabelingCustomTabRegistrationInterface, + DataLabelingCustomTabProps, }; diff --git a/ui/src/pages/Sidebar.tsx b/ui/src/pages/Sidebar.tsx index afc6c43acb4..d7a5a54cda0 100644 --- a/ui/src/pages/Sidebar.tsx +++ b/ui/src/pages/Sidebar.tsx @@ -132,13 +132,13 @@ const SideNav = () => { isSelected: useMatchSubpath(`${baseUrl}/data-set`), }, { - name: "Document Labeling", - id: htmlIdGenerator("documentLabeling")(), - icon: , + name: "Data Labeling", + id: htmlIdGenerator("dataLabeling")(), + icon: , renderItem: (props) => ( - + ), - isSelected: useMatchSubpath(`${baseUrl}/document-labeling`), + isSelected: useMatchSubpath(`${baseUrl}/data-labeling`), }, { name: "Permissions", diff --git a/ui/src/pages/document-labeling/ClassificationTab.tsx b/ui/src/pages/document-labeling/ClassificationTab.tsx new file mode 100644 index 00000000000..302b03cd9fa --- /dev/null +++ b/ui/src/pages/document-labeling/ClassificationTab.tsx @@ -0,0 +1,310 @@ +import React, { useState } from "react"; +import { + EuiPageSection, + EuiCallOut, + EuiSpacer, + EuiFlexGroup, + EuiFlexItem, + EuiFormRow, + EuiFieldText, + EuiButton, + EuiPanel, + EuiTitle, + EuiText, + EuiTable, + EuiTableHeader, + EuiTableHeaderCell, + EuiTableBody, + EuiTableRow, + EuiTableRowCell, + EuiSelect, + EuiLoadingSpinner, +} from "@elastic/eui"; + +interface ClassificationData { + id: number; + text: string; + currentClass: string; + originalClass?: string; +} + +const ClassificationTab = () => { + const [csvPath, setCsvPath] = useState("./src/sample-data.csv"); + const [isLoading, setIsLoading] = useState(false); + const [data, setData] = useState([]); + const [error, setError] = useState(null); + const [availableClasses] = useState(["positive", "negative", "neutral"]); + + const loadCsvData = async () => { + if (!csvPath) return; + + setIsLoading(true); + setError(null); + + try { + if (csvPath === "./src/sample-data.csv") { + const sampleData: ClassificationData[] = [ + { + id: 1, + text: "This product is amazing! I love the quality and design.", + currentClass: "positive", + originalClass: "positive", + }, + { + id: 2, + text: "The service was terrible and the food was cold.", + currentClass: "negative", + originalClass: "negative", + }, + { + id: 3, + text: "It's an okay product, nothing special but does the job.", + currentClass: "neutral", + originalClass: "neutral", + }, + { + id: 4, + text: "Excellent customer support and fast delivery!", + currentClass: "positive", + originalClass: "positive", + }, + { + id: 5, + text: "I'm not sure how I feel about this purchase.", + currentClass: "neutral", + originalClass: "positive", + }, + ]; + + setData(sampleData); + } else { + throw new Error( + "CSV file not found. Please use the sample data path: ./src/sample-data.csv", + ); + } + } catch (err) { + setError( + err instanceof Error + ? err.message + : "An error occurred while loading the CSV data", + ); + } finally { + setIsLoading(false); + } + }; + + const handleClassChange = (id: number, newClass: string) => { + setData( + data.map((item) => + item.id === id ? { ...item, currentClass: newClass } : item, + ), + ); + }; + + const getChangedItems = () => { + return data.filter((item) => item.currentClass !== item.originalClass); + }; + + const resetChanges = () => { + setData( + data.map((item) => ({ ...item, currentClass: item.originalClass || "" })), + ); + }; + + const saveChanges = () => { + const changedItems = getChangedItems(); + console.log("Saving classification changes:", changedItems); + alert(`Saved ${changedItems.length} classification changes!`); + }; + + const columns = [ + { + field: "id", + name: "ID", + width: "60px", + }, + { + field: "text", + name: "Text", + width: "60%", + }, + { + field: "originalClass", + name: "Original Class", + width: "15%", + }, + { + field: "currentClass", + name: "Current Class", + width: "20%", + }, + ]; + + return ( + + +

+ Load a CSV file containing text samples and edit their classification + labels. This helps improve your classification models by providing + corrected training data. +

+
+ + + + + + + setCsvPath(e.target.value)} + /> + + + + + + Load CSV Data + + + + + + + + {isLoading && ( + + + + + + Loading CSV data... + + + )} + + {error && ( + +

{error}

+
+ )} + + {data.length > 0 && ( + <> + + + +

Classification Data ({data.length} samples)

+
+
+ + + + + Reset Changes + + + + + Save Changes ({getChangedItems().length}) + + + + +
+ + + + + + + {columns.map((column, index) => ( + + {column.name} + + ))} + + + {data.map((item) => ( + + {item.id} + + {item.text} + + + + {item.originalClass} + + + + ({ + value: cls, + text: cls, + }))} + value={item.currentClass} + onChange={(e) => + handleClassChange(item.id, e.target.value) + } + compressed + /> + + + ))} + + + + + {getChangedItems().length > 0 && ( + <> + + +

+ You have unsaved changes. Click "Save Changes" to persist your + modifications. +

+
+ + )} + + )} +
+ ); +}; + +export default ClassificationTab; diff --git a/ui/src/pages/document-labeling/DocumentLabelingPage.tsx b/ui/src/pages/document-labeling/DocumentLabelingPage.tsx index 9ec4c090a6f..5563d6328c1 100644 --- a/ui/src/pages/document-labeling/DocumentLabelingPage.tsx +++ b/ui/src/pages/document-labeling/DocumentLabelingPage.tsx @@ -6,398 +6,57 @@ import { EuiPageHeader, EuiTitle, EuiSpacer, - EuiFlexGroup, - EuiFlexItem, - EuiButton, - EuiFieldText, - EuiFormRow, - EuiPanel, - EuiText, - EuiCallOut, - EuiLoadingSpinner, - EuiButtonGroup, - EuiCode, + EuiTabs, + EuiTab, } from "@elastic/eui"; - -interface DocumentContent { - content: string; - file_path: string; -} - -interface TextSelection { - text: string; - start: number; - end: number; -} - -interface DocumentLabel { - text: string; - start: number; - end: number; - label: string; - timestamp: number; -} +import RagTab from "./RagTab"; +import ClassificationTab from "./ClassificationTab"; const DocumentLabelingPage = () => { - const [filePath, setFilePath] = useState("./src/test-document.txt"); - const [selectedText, setSelectedText] = useState(null); - const [labelingMode, setLabelingMode] = useState("relevant"); - const [labels, setLabels] = useState([]); - const [isLoading, setIsLoading] = useState(false); - const [documentContent, setDocumentContent] = - useState(null); - const [error, setError] = useState(null); - - const loadDocument = async () => { - if (!filePath) return; - - setIsLoading(true); - setError(null); - - try { - if (filePath === "./src/test-document.txt") { - const testContent = `This is a sample document for testing the document labeling functionality in Feast UI. - -The document contains multiple paragraphs and sections that can be used to test the text highlighting and labeling features. - -This paragraph discusses machine learning and artificial intelligence concepts. It covers topics like neural networks, deep learning, and natural language processing. Users should be able to select and label relevant portions of this text for RAG retrieval systems. - -Another section focuses on data engineering and ETL pipelines. This content explains how to process large datasets and build scalable data infrastructure. The labeling system should allow users to mark this as relevant or irrelevant for their specific use cases. - -The final paragraph contains information about feature stores and real-time machine learning systems. This text can be used to test the highlighting functionality and ensure that labels are properly stored and displayed in the user interface.`; - - setDocumentContent({ - content: testContent, - file_path: filePath, - }); - } else { - throw new Error( - "Document not found. Please use the test document path: ./src/test-document.txt", - ); - } - } catch (err) { - setError( - err instanceof Error - ? err.message - : "An error occurred while loading the document", - ); - } finally { - setIsLoading(false); - } - }; - - const handleTextSelection = () => { - const selection = window.getSelection(); - if (selection && selection.toString().trim() && documentContent) { - const selectedTextContent = selection.toString().trim(); - const range = selection.getRangeAt(0); - - const textContent = documentContent.content; - - let startIndex = -1; - let endIndex = -1; - - const rangeText = range.toString(); - if (rangeText) { - startIndex = textContent.indexOf(rangeText); - if (startIndex !== -1) { - endIndex = startIndex + rangeText.length; - } - } - - if (startIndex !== -1 && endIndex !== -1) { - setSelectedText({ - text: selectedTextContent, - start: startIndex, - end: endIndex, - }); - } - } - }; - - const handleLabelSelection = () => { - if (selectedText) { - const newLabel: DocumentLabel = { - text: selectedText.text, - start: selectedText.start, - end: selectedText.end, - label: labelingMode, - timestamp: Date.now(), - }; - - setLabels([...labels, newLabel]); - setSelectedText(null); - - const selection = window.getSelection(); - if (selection) { - selection.removeAllRanges(); - } - } - }; - - const handleRemoveLabel = (index: number) => { - setLabels(labels.filter((_: DocumentLabel, i: number) => i !== index)); - }; + const [selectedTab, setSelectedTab] = useState("rag"); - const renderDocumentWithHighlights = ( - content: string, - ): (string | React.ReactElement)[] => { - const allHighlights = [...labels]; - - if (selectedText) { - allHighlights.push({ - text: selectedText.text, - start: selectedText.start, - end: selectedText.end, - label: "temp-selection", - timestamp: 0, - }); - } - - if (allHighlights.length === 0) { - return [content]; - } - - const sortedHighlights = [...allHighlights].sort( - (a, b) => a.start - b.start, - ); - const result: (string | React.ReactElement)[] = []; - let lastIndex = 0; - - sortedHighlights.forEach((highlight, index) => { - result.push(content.slice(lastIndex, highlight.start)); - - let highlightColor = "#d4edda"; - let borderColor = "#c3e6cb"; - - if (highlight.label === "temp-selection") { - highlightColor = "#add8e6"; - borderColor = "#87ceeb"; - } else if (highlight.label === "irrelevant") { - highlightColor = "#f8d7da"; - borderColor = "#f5c6cb"; - } - - result.push( - - {highlight.text} - , - ); - - lastIndex = highlight.end; - }); - - result.push(content.slice(lastIndex)); - return result; - }; - - const labelingOptions = [ + const tabs = [ { - id: "relevant", - label: "Relevant", + id: "rag", + name: "RAG", + content: , }, { - id: "irrelevant", - label: "Irrelevant", + id: "classification", + name: "Classification", + content: , }, ]; + const selectedTabContent = tabs.find( + (tab) => tab.id === selectedTab, + )?.content; + return ( -

Document Labeling for RAG

+

Data Labeling

- - -

- Load a document file and highlight text chunks to label them as - relevant or irrelevant for RAG retrieval. This helps improve the - quality of your retrieval system by providing human feedback. -

-
- - - - - - - setFilePath(e.target.value)} - /> - - - - - - Load Document - - - - - - - - {isLoading && ( - - - - - - Loading document... - - - )} - - {error && ( - + {tabs.map((tab) => ( + setSelectedTab(tab.id)} + isSelected={tab.id === selectedTab} > -

{error}

-
- )} - - {documentContent && ( - <> - - - - Labeling mode: - - - - setLabelingMode(id)} - buttonSize="s" - /> - - - - Label Selected Text - - - - - - - {selectedText && ( - - {selectedText.text} - - )} - - + {tab.name} + + ))} + - - -

Document Content

-
- - -
- {renderDocumentWithHighlights(documentContent.content)} -
-
-
+ - {labels.length > 0 && ( - <> - - - -

Labels ({labels.length})

-
- - {labels.map((label, index) => ( - - - - {label.label} - - - - - "{label.text.substring(0, 100)} - {label.text.length > 100 ? "..." : ""}" - - - - handleRemoveLabel(index)} - > - Remove - - - - ))} -
- - )} - - )} -
+ {selectedTabContent}
diff --git a/ui/src/pages/document-labeling/RagTab.tsx b/ui/src/pages/document-labeling/RagTab.tsx new file mode 100644 index 00000000000..ae5fd22aea7 --- /dev/null +++ b/ui/src/pages/document-labeling/RagTab.tsx @@ -0,0 +1,615 @@ +import React, { useState } from "react"; +import { + EuiPageSection, + EuiCallOut, + EuiSpacer, + EuiFlexGroup, + EuiFlexItem, + EuiFormRow, + EuiFieldText, + EuiButton, + EuiPanel, + EuiTitle, + EuiText, + EuiLoadingSpinner, + EuiButtonGroup, + EuiCode, + EuiTextArea, +} from "@elastic/eui"; +import { useTheme } from "../../contexts/ThemeContext"; + +interface DocumentContent { + content: string; + file_path: string; +} + +interface TextSelection { + text: string; + start: number; + end: number; +} + +interface DocumentLabel { + text: string; + start: number; + end: number; + label: string; + timestamp: number; + groundTruthLabel: string; +} + +const RagTab = () => { + const { colorMode } = useTheme(); + const [filePath, setFilePath] = useState("./src/test-document.txt"); + const [selectedText, setSelectedText] = useState(null); + const [labelingMode, setLabelingMode] = useState("relevant"); + const [labels, setLabels] = useState([]); + const [isLoading, setIsLoading] = useState(false); + const [documentContent, setDocumentContent] = + useState(null); + const [error, setError] = useState(null); + const [prompt, setPrompt] = useState(""); + const [query, setQuery] = useState(""); + const [groundTruthLabel, setGroundTruthLabel] = useState(""); + const [isSaving, setIsSaving] = useState(false); + const [hasUnsavedChanges, setHasUnsavedChanges] = useState(false); + + const loadDocument = async () => { + if (!filePath) return; + + setIsLoading(true); + setError(null); + + try { + if (filePath === "./src/test-document.txt") { + const testContent = `This is a sample document for testing the data labeling functionality in Feast UI. + +The document contains multiple paragraphs and sections that can be used to test the text highlighting and labeling features. + +This paragraph discusses machine learning and artificial intelligence concepts. It covers topics like neural networks, deep learning, and natural language processing. Users should be able to select and label relevant portions of this text for RAG retrieval systems. + +Another section focuses on data engineering and ETL pipelines. This content explains how to process large datasets and build scalable data infrastructure. The labeling system should allow users to mark this as relevant or irrelevant for their specific use cases. + +The final paragraph contains information about feature stores and real-time machine learning systems. This text can be used to test the highlighting functionality and ensure that labels are properly stored and displayed in the user interface.`; + + setDocumentContent({ + content: testContent, + file_path: filePath, + }); + + loadSavedLabels(); + } else { + throw new Error( + "Document not found. Please use the test document path: ./src/test-document.txt", + ); + } + } catch (err) { + setError( + err instanceof Error + ? err.message + : "An error occurred while loading the document", + ); + } finally { + setIsLoading(false); + } + }; + + const handleTextSelection = () => { + const selection = window.getSelection(); + if (selection && selection.toString().trim() && documentContent) { + const selectedTextContent = selection.toString().trim(); + const range = selection.getRangeAt(0); + + const textContent = documentContent.content; + + let startIndex = -1; + let endIndex = -1; + + const rangeText = range.toString(); + if (rangeText) { + startIndex = textContent.indexOf(rangeText); + if (startIndex !== -1) { + endIndex = startIndex + rangeText.length; + } + } + + if (startIndex !== -1 && endIndex !== -1) { + setSelectedText({ + text: selectedTextContent, + start: startIndex, + end: endIndex, + }); + } + } + }; + + const handleLabelSelection = () => { + if (selectedText) { + const newLabel: DocumentLabel = { + text: selectedText.text, + start: selectedText.start, + end: selectedText.end, + label: labelingMode, + timestamp: Date.now(), + groundTruthLabel: groundTruthLabel, + }; + + setLabels([...labels, newLabel]); + setSelectedText(null); + setHasUnsavedChanges(true); + + const selection = window.getSelection(); + if (selection) { + selection.removeAllRanges(); + } + } + }; + + const handleRemoveLabel = (index: number) => { + setLabels(labels.filter((_: DocumentLabel, i: number) => i !== index)); + setHasUnsavedChanges(true); + }; + + const saveLabels = () => { + setIsSaving(true); + + setTimeout(() => { + try { + const saveData = { + filePath: filePath, + prompt: prompt, + query: query, + groundTruthLabel: groundTruthLabel, + labels: labels, + timestamp: new Date().toISOString(), + }; + + const pathParts = filePath.split("/"); + const filename = pathParts[pathParts.length - 1]; + const nameWithoutExt = filename.replace(/\.[^/.]+$/, ""); + const downloadFilename = `${nameWithoutExt}-labels.json`; + + const jsonString = JSON.stringify(saveData, null, 2); + const blob = new Blob([jsonString], { type: "application/json" }); + const url = URL.createObjectURL(blob); + + const link = document.createElement("a"); + link.href = url; + link.download = downloadFilename; + link.style.display = "none"; + + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + URL.revokeObjectURL(url); + + setHasUnsavedChanges(false); + alert( + `Successfully saved ${labels.length} labels. File downloaded as ${downloadFilename}`, + ); + } catch (error) { + console.error("Error saving labels:", error); + alert("Error saving labels. Please try again."); + } finally { + setIsSaving(false); + } + }, 100); + }; + + const loadSavedLabels = () => { + try { + const savedData = JSON.parse(localStorage.getItem("ragLabels") || "[]"); + const fileData = savedData.find( + (item: any) => item.filePath === filePath, + ); + + if (fileData) { + setPrompt(fileData.prompt || ""); + setQuery(fileData.query || ""); + setGroundTruthLabel(fileData.groundTruthLabel || ""); + setLabels(fileData.labels || []); + setHasUnsavedChanges(false); + } + } catch (error) { + console.error("Error loading saved labels:", error); + } + }; + + const renderDocumentWithHighlights = ( + content: string, + ): (string | React.ReactElement)[] => { + const allHighlights = [...labels]; + + if (selectedText) { + allHighlights.push({ + text: selectedText.text, + start: selectedText.start, + end: selectedText.end, + label: "temp-selection", + timestamp: 0, + groundTruthLabel: "", + }); + } + + if (allHighlights.length === 0) { + return [content]; + } + + const sortedHighlights = [...allHighlights].sort( + (a, b) => a.start - b.start, + ); + const result: (string | React.ReactElement)[] = []; + let lastIndex = 0; + + sortedHighlights.forEach((highlight, index) => { + result.push(content.slice(lastIndex, highlight.start)); + + let highlightColor, borderColor; + + if (highlight.label === "temp-selection") { + if (colorMode === "dark") { + highlightColor = "#1a4d66"; + borderColor = "#2d6b8a"; + } else { + highlightColor = "#add8e6"; + borderColor = "#87ceeb"; + } + } else if (highlight.label === "irrelevant") { + if (colorMode === "dark") { + highlightColor = "#4d1a1a"; + borderColor = "#6b2d2d"; + } else { + highlightColor = "#f8d7da"; + borderColor = "#f5c6cb"; + } + } else { + if (colorMode === "dark") { + highlightColor = "#1a4d1a"; + borderColor = "#2d6b2d"; + } else { + highlightColor = "#d4edda"; + borderColor = "#c3e6cb"; + } + } + + result.push( + + {highlight.text} + , + ); + + lastIndex = highlight.end; + }); + + result.push(content.slice(lastIndex)); + return result; + }; + + const labelingOptions = [ + { + id: "relevant", + label: "Relevant", + }, + { + id: "irrelevant", + label: "Irrelevant", + }, + ]; + + return ( + + +

+ Load a document and highlight text chunks to label them for chunk + extraction/retrieval. Add prompt and query context, then provide + ground truth labels for generation evaluation. +

+
+ + + + + + + setFilePath(e.target.value)} + /> + + + + + + Load Document + + + + + + + + {isLoading && ( + + + + + + Loading document... + + + )} + + {error && ( + +

{error}

+
+ )} + + {documentContent && ( + <> + + +

RAG Context

+
+ + + + + { + setPrompt(e.target.value); + setHasUnsavedChanges(true); + }} + rows={3} + /> + + + + + { + setQuery(e.target.value); + setHasUnsavedChanges(true); + }} + rows={3} + /> + + + +
+ + + + +

Step 1: Label for Chunk Extraction

+
+ + + + + + setLabelingMode(id)} + buttonSize="s" + /> + + + + + + Label Selected Text + + + + + + + + {selectedText && ( + + {selectedText.text} + + )} + + + + + +

Document Content

+
+ + +
+ {renderDocumentWithHighlights(documentContent.content)} +
+
+
+ + + + +

Step 2: Label for Generation

+
+ + + + { + setGroundTruthLabel(e.target.value); + setHasUnsavedChanges(true); + }} + rows={3} + /> + + + + + + + + Save Labels + + + + + + + {(labels.length > 0 || groundTruthLabel || prompt || query) && ( + <> + +

+ Click "Save Labels" to download your labeled data as a JSON + file. +

+
+ + + )} + + + + {hasUnsavedChanges && ( + <> + +

+ You have unsaved changes. Click "Save Labels" to persist your + work. +

+
+ + + )} + + {labels.length > 0 && ( + <> + + + +

Extracted Chunk Labels ({labels.length})

+
+ + {labels.map((label, index) => ( + + + + Chunk: {label.label} + + + {label.groundTruthLabel && ( + + + GT: {label.groundTruthLabel} + + + )} + + + "{label.text.substring(0, 80)} + {label.text.length > 80 ? "..." : ""}" + + + + handleRemoveLabel(index)} + > + Remove + + + + ))} +
+ + )} + + )} +
+ ); +}; + +export default RagTab; diff --git a/ui/src/test-document.txt b/ui/src/test-document.txt index 9a25d0c3d95..eae58809242 100644 --- a/ui/src/test-document.txt +++ b/ui/src/test-document.txt @@ -1,4 +1,4 @@ -This is a sample document for testing the document labeling functionality in Feast UI. +This is a sample document for testing the data labeling functionality in Feast UI. The document contains multiple paragraphs and sections that can be used to test the text highlighting and labeling features.