8000 use vector type · lohithgn/azure-sql-db-openai@36c1c9a · GitHub
[go: up one dir, main page]

Skip to content

Commit 36c1c9a

Browse files
committed
use vector type
1 parent 509c39c commit 36c1c9a

5 files changed

+58
-53
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
Cleanup if needed
3+
*/
4+
if not exists(select * from sys.symmetric_keys where [name] = '##MS_DatabaseMasterKey##')
5+
begin
6+
create master key encryption by password = 'Pa$$w0rd!'
7+
end
8+
go
9+
if exists(select * from sys.[external_data_sources] where name = 'openai_playground')
10+
begin
11+
drop external data source [openai_playground];
12+
end
13+
go
14+
if exists(select * from sys.[database_scoped_credentials] where name = 'openai_playground')
15+
begin
16+
drop database scoped credential [openai_playground];
17+
end
18+
go
19+
20+
/*
21+
Create database scoped credential and external data source.
22+
File is assumed to be in a path like:
23+
https://<myaccount>.blob.core.windows.net/playground/wikipedia/vector_database_wikipedia_articles_embedded.csv
24+
25+
Please note that it is recommened to avoid using SAS tokens: the best practice is to use Managed Identity as described here:
26+
https://learn.microsoft.com/en-us/sql/relational-databases/import-export/import-bulk-data-by-using-bulk-insert-or-openrowset-bulk-sql-server?view=sql-server-ver16#bulk-importing-from-azure-blob-storage
27+
28+
*/
29+
create database scoped credential [openai_playground]
30+
with identity = 'SHARED ACCESS SIGNATURE',
31+
secret = '<sas-token>'; -- make sure not to include the ? at the beginning
32+
go
33+
create external data source [openai_playground]
34+
with
35+
(
36+
type = blob_storage,
37+
location = 'https://<account>.blob.core.windows.net/playground',
38+
credential = [openai_playground]
39+
);
40+
go
Lines changed: 9 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,3 @@
1-
/*
2-
Cleanup if needed
3-
*/
4-
if not exists(select * from sys.symmetric_keys where [name] = '##MS_DatabaseMasterKey##')
5-
begin
6-
create master key encryption by password = 'Pa$$w0rd!'
7-
end
8-
go
9-
if exists(select * from sys.[external_data_sources] where name = 'openai_playground')
10-
begin
11-
drop external data source [openai_playground];
12-
end
13-
go
14-
if exists(select * from sys.[database_scoped_credentials] where name = 'openai_playground')
15-
begin
16-
drop database scoped credential [openai_playground];
17-
end
18-
go
19-
20-
/*
21-
Create database scoped credential and external data source.
22-
File is assumed to be in a path like:
23-
https://<myaccount>.blob.core.windows.net/playground/wikipedia/vector_database_wikipedia_articles_embedded.csv
24-
*/
25-
create database scoped credential [openai_playground]
26-
with identity = 'SHARED ACCESS SIGNATURE',
27-
secret = '<sas-token>'; -- make sure not to include the ? at the beginning
28-
go
29-
create external data source [openai_playground]
30-
with
31-
(
32-
type = blob_storage,
33-
location = 'https://<account>.blob.core.windows.net/playground',
34-
credential = [openai_playground]
35-
);
36-
go
37-
381
/*
392
Create table
403
*/
@@ -73,17 +36,24 @@ go
7336
Add primary key
7437
*/
7538
alter table [dbo].[wikipedia_articles_embeddings]
76-
add constraint pk__wikipedia_articles_embeddings primary key nonclustered (id)
39+
add constraint pk__wikipedia_articles_embeddings primary key clustered (id)
40+
go
41+
42+
/*
43+
Add index on title
44+
*/
45+
create index [ix_title] on [dbo].[wikipedia_articles_embeddings](title)
7746
go
7847

7948
/*
8049
Verify data
8150
*/
82-
select top (100) * from [dbo].[wikipedia_articles_embeddings]
51+
select top (10) * from [dbo].[wikipedia_articles_embeddings]
8352
go
8453

8554
select * from [dbo].[wikipedia_articles_embeddings] where title = 'Alan Turing'
8655
go
8756

8857

8958

59+

vector-embeddings/02-use-native-vectors.sql

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,20 @@
22
Add columns to store the native vectors
33
*/
44
alter table wikipedia_articles_embeddings
5-
add title_vector_native varbinary(8000);
5+
add title_vector_ada2 vector(1536);
66

77
alter table wikipedia_articles_embeddings
8-
add content_vector_native varbinary(8000);
8+
add content_vector_ada2 vector(1536);
9+
go
910

1011
/*
1112
Update the native vectors
1213
*/
1314
update
1415
wikipedia_articles_embeddings
1516
set
16-
title_vector_native = json_array_to_vector(title_vector),
17-
content_vector_native = json_array_to_vector(content_vector);
17+
title_vector_ada2 = cast(title_vector as vector(1536)),
18+
content_vector_ada2 = cast(content_vector as vector(1536))
1819
go
1920

2021
/*
@@ -28,16 +29,10 @@ alter table wikipedia_articles_embeddings
2829
drop column content_vector;
2930
go
3031

31-
/*
32-
Rename the columns
33-
*/
34-
EXEC sp_rename 'dbo.wikipedia_articles_embeddings.title_vector_native', 'title_vector_ada2', 'COLUMN';
35-
EXEC sp_rename 'dbo.wikipedia_articles_embeddings.content_vector_native', 'content_vector_ada2', 'COLUMN';
36-
3732
/*
3833
Verify data
3934
*/
40-
select top (100) * from [dbo].[wikipedia_articles_embeddings]
35+
select top (10) * from [dbo].[wikipedia_articles_embeddings]
4136
go
4237

4338
select * from [dbo].[wikipedia_articles_embeddings] where title = 'Alan Turing'

vector-embeddings/04-create-get-embeddings-procedure.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
create or alter procedure dbo.get_embedding
55
@deployedModelName nvarchar(1000),
66
@inputText nvarchar(max),
7-
@embedding varbinary(8000) output
7+
@embedding vector(1536) output
88
as
99
declare @retval int, @response nvarchar(max);
1010
declare @payload nvarchar(max) = json_object('input': @inputText);
@@ -23,7 +23,7 @@ end else begin
2323
select @response as 'Error message from OpenAI API';
2424
end
2525

26-
set @embedding = json_array_to_vector(@re);
26+
set @embedding = cast(@re as vector(1536));
2727

2828
return @retval
2929
go

vector-embeddings/05-find-similar-articles.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*/
66

77
declare @inputText nvarchar(max) = 'the foundation series by isaac asimov';
8-
declare @retval int, @embedding varbinary(8000);
8+
declare @retval int, @embedding vector(1536);
99

1010
exec @retval = dbo.get_embedding '<deployment-id>', @inputText, @embedding output;
1111

0 commit comments

Comments
 (0)
0