10000 chore: add create indexes section to gen AI notebook · peterma/gcp-python-docs-samples@5bb78f4 · GitHub
[go: up one dir, main page]

Skip to content

Commit 5bb78f4

Browse files
authored
chore: add create indexes section to gen AI notebook
1 parent 15fe883 commit 5bb78f4

File tree

1 file changed

+122
-0
lines changed

1 file changed

+122
-0
lines changed

cloud-sql/postgres/pgvector/notebooks/pgvector_gen_ai_demo.ipynb

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,128 @@
639639
"await main() # type: ignore"
640640
]
641641
},
642+
{
643+
"cell_type": "markdown",
644+
"metadata": {
645+
"id": "Ley1ZGjnG5Fr"
646+
},
647+
"source": [
648+
"### Create indexes for faster similarity search in pgvector\n",
649+
"\n",
650+
"- Vector indexes can significantly speed up similarity search operation and avoid the brute-force exact nearest neighbor search that is used by default.\n",
651+
"\n",
652+
"- pgvector comes with two types of indexes (as of v0.5.1): `hnsw` and `ivfflat`.\n",
653+
"\n",
654+
"> 💡 Click [here](https://cloud.google.com/blog/products/databases/faster-similarity-search-performance-with-pgvector-indexes) to learn more about pgvector indexes.\n",
655+
"\n",
656+
"Enter or modify the values of index parameters for your index of choice and run the corresponding cell:"
657+
]
658+
},
659+
{
660+
"cell_type": "code",
661+
"source": [
662+
"# @markdown Create an HNSW index on the `product_embeddings` table:\n",
663+
"m = 24 # @param {type:\"integer\"}\n",
664+
"ef_construction = 100 # @param {type:\"integer\"}\n",
665+
"operator = \"vector_cosine_ops\" # @param [\"vector_cosine_ops\", \"vector_l2_ops\", \"vector_ip_ops\"]\n",
666+
"\n",
667+
"# Quick input validations.\n",
668+
"assert m, \"⚠️ Please input a valid value for m.\"\n",
669+
"assert ef_construction, \"⚠️ Please input a valid value for ef_construction.\"\n",
670+
"assert operator, \"⚠️ Please input a valid value for operator.\"\n",
671+
"\n",
672+
"from pgvector.asyncpg import register_vector\n",
673+
"import asyncio\n",
674+
"import asyncpg\n",
675+
"from google.cloud.sql.connector import Connector\n",
676+
"\n",
677+
"\n",
678+
"async def main():\n",
679+
" loop = asyncio.get_running_loop()\n",
680+
" async with Connector(loop=loop) as connector:\n",
681+
" # Create connection to Cloud SQL database.\n",
682+
" conn: asyncpg.Connection = await connector.connect_async(\n",
683+
" f\"{project_id}:{region}:{instance_name}\", # Cloud SQL instance connection name\n",
684+
" \"asyncpg\",\n",
685+
" user=f\"{database_user}\",\n",
686+
" password=f\"{database_password}\",\n",
687+
" db=f\"{database_name}\",\n",
688+
" )\n",
689+
"\n",
690+
" await register_vector(conn)\n",
691+
"\n",
692+
" # Create an HNSW index on the `product_embeddings` table.\n",
693+
" await conn.execute(\n",
694+
" f\"\"\"CREATE INDEX ON product_embeddings\n",
695+
" USING hnsw(embedding {operator})\n",
696+
" WITH (m = {m}, ef_construction = {ef_construction})\n",
697+
" \"\"\"\n",
698+
" )\n",
699+
"\n",
700+
" await conn.close()\n",
701+
"\n",
702+
"\n",
703+
"# Run the SQL commands now.\n",
704+
"await main() # type: ignore"
705+
],
706+
"metadata": {
707+
"id": "EJUDntZ1KTk7",
708+
"cellView": "form"
709+
},
710+
"execution_count": null,
711+
"outputs": []
712+
},
713+
{
714+
"cell_type": "code",
715+
"source": [
716+
"# @markdown Create an IVFFLAT index on the `product_embeddings` table:\n",
717+
"lists = 100 # @param {type:\"integer\"}\n",
718+
"operator = \"vector_cosine_ops\" # @param [\"vector_cosine_ops\", \"vector_l2_ops\", \"vector_ip_ops\"]\n",
719+
"\n",
720+
"# Quick input validations.\n",
721+
"assert lists, \"⚠️ Please input a valid value for lists.\"\n",
722+
"\n",
723+
"from pgvector.asyncpg import register_vector\n",
724+
"import asyncio\n",
725+
"import asyncpg\n",
726+
"from google.cloud.sql.connector import Connector\n",
727+
"\n",
728+
"\n",
729+
"async def main():\n",
730+
" loop = asyncio.get_running_loop()\n",
731+
" async with Connector(loop=loop) as connector:\n",
732+
" # Create connection to Cloud SQL database.\n",
733+
" conn: asyncpg.Connection = await connector.connect_async(\n",
734+
" f\"{project_id}:{region}:{instance_name}\", # Cloud SQL instance connection name\n",
735+
" \"asyncpg\",\n",
736+
" user=f\"{database_user}\",\n",
737+
" password=f\"{database_password}\",\n",
738+
" db=f\"{database_name}\",\n",
739+
" )\n",
740+
"\n",
741+
" await register_vector(conn)\n",
742+
"\n",
743+
" # Create an IVFFLAT index on the `product_embeddings` table.\n",
744+
" await conn.execute(\n",
745+
" f\"\"\"CREATE INDEX ON product_embeddings\n",
746+
" USING ivfflat(embedding {operator})\n",
747+
" WITH (lists = {lists})\n",
748+
" \"\"\"\n",
749+
" )\n",
750+
"\n",
751+
" await conn.close()\n",
752+
"\n",
753+
"\n",
754+
"# Run the SQL commands now.\n",
755+
"await main() # type: ignore"
756+
],
757+
"metadata": {
758+
"id": "7kFKBuysMk2I",
759+
"cellView": "form"
760+
},
761+
"execution_count": null,
762+
"outputs": []
763+
},
642764
{
643765
"cell_type": "markdown",
644766
"metadata": {

0 commit comments

Comments
 (0)
0