|
639 | 639 | "await main() # type: ignore"
|
640 | 640 | ]
|
641 | 641 | },
|
| 642 | + { |
| 643 | + "cell_type": "markdown", |
| 644 | + "metadata": { |
| 645 | + "id": "Ley1ZGjnG5Fr" |
| 646 | + }, |
| 647 | + "source": [ |
| 648 | + "### Create indexes for faster similarity search in pgvector\n", |
| 649 | + "\n", |
| 650 | + "- Vector indexes can significantly speed up similarity search operation and avoid the brute-force exact nearest neighbor search that is used by default.\n", |
| 651 | + "\n", |
| 652 | + "- pgvector comes with two types of indexes (as of v0.5.1): `hnsw` and `ivfflat`.\n", |
| 653 | + "\n", |
| 654 | + "> 💡 Click [here](https://cloud.google.com/blog/products/databases/faster-similarity-search-performance-with-pgvector-indexes) to learn more about pgvector indexes.\n", |
| 655 | + "\n", |
| 656 | + "Enter or modify the values of index parameters for your index of choice and run the corresponding cell:" |
| 657 | + ] |
| 658 | + }, |
| 659 | + { |
| 660 | + "cell_type": "code", |
| 661 | + "source": [ |
| 662 | + "# @markdown Create an HNSW index on the `product_embeddings` table:\n", |
| 663 | + "m = 24 # @param {type:\"integer\"}\n", |
| 664 | + "ef_construction = 100 # @param {type:\"integer\"}\n", |
| 665 | + "operator = \"vector_cosine_ops\" # @param [\"vector_cosine_ops\", \"vector_l2_ops\", \"vector_ip_ops\"]\n", |
| 666 | + "\n", |
| 667 | + "# Quick input validations.\n", |
| 668 | + "assert m, \"⚠️ Please input a valid value for m.\"\n", |
| 669 | + "assert ef_construction, \"⚠️ Please input a valid value for ef_construction.\"\n", |
| 670 | + "assert operator, \"⚠️ Please input a valid value for operator.\"\n", |
| 671 | + "\n", |
| 672 | + "from pgvector.asyncpg import register_vector\n", |
| 673 | + "import asyncio\n", |
| 674 | + "import asyncpg\n", |
| 675 | + "from google.cloud.sql.connector import Connector\n", |
| 676 | + "\n", |
| 677 | + "\n", |
| 678 | + "async def main():\n", |
| 679 | + " loop = asyncio.get_running_loop()\n", |
| 680 | + " async with Connector(loop=loop) as connector:\n", |
| 681 | + " # Create connection to Cloud SQL database.\n", |
| 682 | + " conn: asyncpg.Connection = await connector.connect_async(\n", |
| 683 | + " f\"{project_id}:{region}:{instance_name}\", # Cloud SQL instance connection name\n", |
| 684 | + " \"asyncpg\",\n", |
| 685 | + " user=f\"{database_user}\",\n", |
| 686 | + " password=f\"{database_password}\",\n", |
| 687 | + " db=f\"{database_name}\",\n", |
| 688 | + " )\n", |
| 689 | + "\n", |
| 690 | + " await register_vector(conn)\n", |
| 691 | + "\n", |
| 692 | + " # Create an HNSW index on the `product_embeddings` table.\n", |
| 693 | + " await conn.execute(\n", |
| 694 | + " f\"\"\"CREATE INDEX ON product_embeddings\n", |
| 695 | + " USING hnsw(embedding {operator})\n", |
| 696 | + " WITH (m = {m}, ef_construction = {ef_construction})\n", |
| 697 | + " \"\"\"\n", |
| 698 | + " )\n", |
| 699 | + "\n", |
| 700 | + " await conn.close()\n", |
| 701 | + "\n", |
| 702 | + "\n", |
| 703 | + "# Run the SQL commands now.\n", |
| 704 | + "await main() # type: ignore" |
| 705 | + ], |
| 706 | + "metadata": { |
| 707 | + "id": "EJUDntZ1KTk7", |
| 708 | + "cellView": "form" |
| 709 | + }, |
| 710 | + "execution_count": null, |
| 711 | + "outputs": [] |
| 712 | + }, |
| 713 | + { |
| 714 | + "cell_type": "code", |
| 715 | + "source": [ |
| 716 | + "# @markdown Create an IVFFLAT index on the `product_embeddings` table:\n", |
| 717 | + "lists = 100 # @param {type:\"integer\"}\n", |
| 718 | + "operator = \"vector_cosine_ops\" # @param [\"vector_cosine_ops\", \"vector_l2_ops\", \"vector_ip_ops\"]\n", |
| 719 | + "\n", |
| 720 | + "# Quick input validations.\n", |
| 721 | + "assert lists, \"⚠️ Please input a valid value for lists.\"\n", |
| 722 | + "\n", |
| 723 | + "from pgvector.asyncpg import register_vector\n", |
| 724 | + "import asyncio\n", |
| 725 | + "import asyncpg\n", |
| 726 | + "from google.cloud.sql.connector import Connector\n", |
| 727 | + "\n", |
| 728 | + "\n", |
| 729 | + "async def main():\n", |
| 730 | + " loop = asyncio.get_running_loop()\n", |
| 731 | + " async with Connector(loop=loop) as connector:\n", |
| 732 | + " # Create connection to Cloud SQL database.\n", |
| 733 | + " conn: asyncpg.Connection = await connector.connect_async(\n", |
| 734 | + " f\"{project_id}:{region}:{instance_name}\", # Cloud SQL instance connection name\n", |
| 735 | + " \"asyncpg\",\n", |
| 736 | + " user=f\"{database_user}\",\n", |
| 737 | + " password=f\"{database_password}\",\n", |
| 738 | + " db=f\"{database_name}\",\n", |
| 739 | + " )\n", |
| 740 | + "\n", |
| 741 | + " await register_vector(conn)\n", |
| 742 | + "\n", |
| 743 | + " # Create an IVFFLAT index on the `product_embeddings` table.\n", |
| 744 | + " await conn.execute(\n", |
| 745 | + " f\"\"\"CREATE INDEX ON product_embeddings\n", |
| 746 | + " USING ivfflat(embedding {operator})\n", |
| 747 | + " WITH (lists = {lists})\n", |
| 748 | + " \"\"\"\n", |
| 749 | + " )\n", |
| 750 | + "\n", |
| 751 | + " await conn.close()\n", |
| 752 | + "\n", |
| 753 | + "\n", |
| 754 | + "# Run the SQL commands now.\n", |
| 755 | + "await main() # type: ignore" |
| 756 | + ], |
| 757 | + "metadata": { |
| 758 | + "id": "7kFKBuysMk2I", |
| 759 | + "cellView": "form" |
| 760 | + }, |
| 761 | + "execution_count": null, |
| 762 | + "outputs": [] |
| 763 | + }, |
642 | 764 | {
|
643 | 765 | "cell_type": "markdown",
|
644 | 766 | "metadata": {
|
|
0 commit comments