From 57f2e84c8094f47d88e3d108c2948c447f5f0e7b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 16 Jul 2024 21:32:24 -0700 Subject: [PATCH] Added postgres module for Postgres.js --- CHANGELOG.md | 4 ++++ package.json | 7 ++++++ src/postgres/index.js | 45 +++++++++++++++++++++++++++++++++++++++ tests/postgres.test.mjs | 22 +++++++++++-------- types/postgres/index.d.ts | 11 ++++++++++ 5 files changed, 80 insertions(+), 9 deletions(-) create mode 100644 src/postgres/index.js create mode 100644 types/postgres/index.d.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index dc6b5bd..ce68600 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.2.1 (unreleased) + +- Added `postgres` module + ## 0.2.0 (2024-06-27) - Added support for `halfvec` and `sparsevec` types to node-postgres diff --git a/package.json b/package.json index 3d8fb61..bc96ad6 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,10 @@ "types": "./types/pg-promise/index.d.ts", "default": "./src/pg-promise/index.js" }, + "./postgres": { + "types": "./types/postgres/index.d.ts", + "default": "./src/postgres/index.js" + }, "./sequelize": { "types": "./types/sequelize/index.d.ts", "default": "./src/sequelize/index.js" @@ -73,6 +77,9 @@ "pg-promise": [ "types/pg-promise/index.d.ts" ], + "postgres": [ + "types/postgres/index.d.ts" + ], "sequelize": [ "types/sequelize/index.d.ts" ], diff --git a/src/postgres/index.js b/src/postgres/index.js new file mode 100644 index 0000000..923e1f9 --- /dev/null +++ b/src/postgres/index.js @@ -0,0 +1,45 @@ +const { fromSql, toSql } = require('..'); +const utils = require('../utils'); + +async function types(sql) { + const typeInfo = await sql`SELECT regtype('vector')::oid AS vector, regtype('halfvec')::oid AS halfvec, regtype('sparsevec')::oid AS sparsevec`; + + const vectorOid = typeInfo[0].vector; + const halfvecOid = typeInfo[0].halfvec; + const sparsevecOid = typeInfo[0].sparsevec; + + if (!vectorOid) { + throw new Error('vector type not found in the database'); + } + + const types = { + vector: { + to: vectorOid, + from: [vectorOid], + serialize: (v) => utils.vectorToSql(v), + parse: (v) => utils.vectorFromSql(v) + } + }; + + if (halfvecOid) { + types['halfvec'] = { + to: halfvecOid, + from: [halfvecOid], + serialize: (v) => utils.halfvecToSql(v), + parse: (v) => utils.halfvecFromSql(v) + }; + } + + if (sparsevecOid) { + types['sparsevec'] = { + to: sparsevecOid, + from: [sparsevecOid], + serialize: (v) => utils.sparsevecToSql(v), + parse: (v) => utils.sparsevecFromSql(v) + }; + } + + return types; +} + +module.exports = {types, fromSql, toSql}; diff --git a/tests/postgres.test.mjs b/tests/postgres.test.mjs index 363a8f2..f7d1495 100644 --- a/tests/postgres.test.mjs +++ b/tests/postgres.test.mjs @@ -1,30 +1,34 @@ import assert from 'node:assert'; import test from 'node:test'; import postgres from 'postgres'; -import pgvector from 'pgvector'; +import pgvector from 'pgvector/postgres'; import { SparseVector } from 'pgvector'; test('postgres example', async () => { - const sql = postgres({database: 'pgvector_node_test', onnotice: function () { }}); + let sql = postgres({database: 'pgvector_node_test'}); + const types = await pgvector.types(sql); + await sql.end(); + + sql = postgres({database: 'pgvector_node_test', types: types, onnotice: function () { }}); await sql`CREATE EXTENSION IF NOT EXISTS vector`; await sql`DROP TABLE IF EXISTS postgres_items`; await sql`CREATE TABLE postgres_items (id serial PRIMARY KEY, embedding vector(3), half_embedding halfvec(3), binary_embedding bit(3), sparse_embedding sparsevec(3))`; const newItems = [ - {embedding: pgvector.toSql([1, 1, 1]), half_embedding: pgvector.toSql([1, 1, 1]), binary_embedding: '000', sparse_embedding: new SparseVector([1, 1, 1])}, - {embedding: pgvector.toSql([2, 2, 2]), half_embedding: pgvector.toSql([2, 2, 2]), binary_embedding: '101', sparse_embedding: new SparseVector([2, 2, 2])}, - {embedding: pgvector.toSql([1, 1, 2]), half_embedding: pgvector.toSql([1, 1, 2]), binary_embedding: '111', sparse_embedding: new SparseVector([1, 1, 2])} + {embedding: [1, 1, 1], half_embedding: [1, 1, 1], binary_embedding: '000', sparse_embedding: new SparseVector([1, 1, 1])}, + {embedding: [2, 2, 2], half_embedding: [2, 2, 2], binary_embedding: '101', sparse_embedding: new SparseVector([2, 2, 2])}, + {embedding: [1, 1, 2], half_embedding: [1, 1, 2], binary_embedding: '111', sparse_embedding: new SparseVector([1, 1, 2])} ]; await sql`INSERT INTO postgres_items ${ sql(newItems, 'embedding', 'half_embedding', 'binary_embedding', 'sparse_embedding') }`; - const embedding = pgvector.toSql([1, 1, 1]); + const embedding = [1, 1, 1]; const items = await sql`SELECT * FROM postgres_items ORDER BY embedding <-> ${ embedding } LIMIT 5`; assert.deepEqual(items.map(v => v.id), [1, 3, 2]); - assert.deepEqual(pgvector.fromSql(items[0].embedding), [1, 1, 1]); - assert.deepEqual(pgvector.fromSql(items[0].half_embedding), [1, 1, 1]); + assert.deepEqual(items[0].embedding, [1, 1, 1]); + assert.deepEqual(items[0].half_embedding, [1, 1, 1]); assert.equal(items[0].binary_embedding, '000'); - assert.deepEqual((new SparseVector(items[0].sparse_embedding)).toArray(), [1, 1, 1]); + assert.deepEqual(items[0].sparse_embedding.toArray(), [1, 1, 1]); await sql`CREATE INDEX ON postgres_items USING hnsw (embedding vector_l2_ops)`; diff --git a/types/postgres/index.d.ts b/types/postgres/index.d.ts new file mode 100644 index 0000000..f532bc9 --- /dev/null +++ b/types/postgres/index.d.ts @@ -0,0 +1,11 @@ +export function types(sql: any): Promise<{ + vector: { + to: any; + from: any[]; + serialize: (v: any) => any; + parse: (v: any) => any; + }; +}>; +import { fromSql } from ".."; +import { toSql } from ".."; +export { fromSql, toSql };