8000 added some instructions · explodinggradients/linkedin_ai@a6b3f46 · GitHub
[go: up one dir, main page]

Skip to content

Commit a6b3f46

Browse files
committed
added some instructions
1 parent e377cbb commit a6b3f46

File tree

2 files changed

+47
-219
lines changed

2 files changed

+47
-219
lines changed

0_example.ipynb

+9-41
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
"from linkedin_ai import LinkedinAI\n",
3434
"\n",
3535
"# Set your OpenAI API key as an environment variable\n",
36-
"os.environ[\"OPENAI_API_KEY\"] = \"\""
36+
"#os.environ[\"OPENAI_API_KEY\"] = \"\""
3737
]
3838
},
3939
{
@@ -47,18 +47,9 @@
4747
},
4848
{
4949
"cell_type": "code",
50-
"execution_count": 11,
50+
"execution_count": null,
5151
"metadata": {},
52-
"outputs": [
53-
{
54-
"name": "stdout",
55-
"output_type": "stream",
56-
"text": [
57-
"Loaded 437 LinkedIn posts\n",
58-
"BM25 index initialized\n"
59-
]
60-
}
61-
],
52+
"outputs": [],
6253
"source": [
6354
"my_ai = await LinkedinAI.from_bm25(posts=\"data/posts.json\")"
6455
]
@@ -84,24 +75,9 @@
8475
},
8576
{
8677
"cell_type": "code",
87-
"execution_count": 12,
78+
"execution_count": null,
8879
"metadata": {},
89-
"outputs": [
90-
{
91-
"name": "stderr",
92-
"output_type": "stream",
93-
"text": [
94-
"2025/04/17 12:56:45 WARNING mlflow.tracing.processor.mlflow: Creating a trace within the default experiment with id '0'. It is strongly recommended to not use the default experiment to log traces due to ambiguous search results and probable performance issues over time due to directory table listing performance degradation with high volumes of directories within a specific path. To avoid performance and disambiguation issues, set the experiment for your environment using `mlflow.set_experiment()` API.\n"
95-
]
96-
},
97-
{
98-
"name": "stdout",
99-
"output_type": "stream",
100-
"text": [
101-
"Open source AI models, particularly large language models (LLMs), have shown significant potential and have led to a surge in applications since the release of models like Llama-2. Contrary to the predictions of some AI doomers, these open source models have not resulted in catastrophic scenarios. Instead, they have fostered innovation and development in the AI community. This suggests that the idea of open source AI models being more dangerous than closed ones is not supported by the evidence we've seen so far.\n"
102-
]
103-
}
104-
],
80+
"outputs": [],
10581
"source": [
10682
"response = await my_ai.ask(\"What are your thoughts on OSS LLMs?\")\n",
10783
"\n",
@@ -117,17 +93,9 @@
11793
},
11894
{
11995
"cell_type": "code",
120-
"execution_count": 13,
96+
"execution_count": null,
12197
"metadata": {},
122-
"outputs": [
123-
{
124-
"name": "stdout",
125-
"output_type": "stream",
126-
"text": [
127-
"Closed source LLMs have their place, but it's important to recognize the benefits that open source models bring to the table. Since the release of Llama-2, we've seen an explosion of applications built on open source LLMs without any of the catastrophic scenarios predicted by AI doomers. Open source models foster innovation and collaboration, allowing for a broader range of applications and advancements. While closed source models can offer certain advantages, such as proprietary enhancements and potentially more controlled environments, the open source approach has proven to be a powerful driver of progress in the AI field.\n"
128-
]
129-
}
130-
],
98+
"outputs": [],
13199
"source": [
132100
"response = await my_ai.ask(\"What are your thoughts on closed source LLMs?\")\n",
133101
"\n",
@@ -151,7 +119,7 @@
151119
],
152120
"metadata": {
153121
"kernelspec": {
154-
"display_name": "superme",
122+
"display_name": ".venv",
155123
"language": "python",
156124
"name": "python3"
157125
},
@@ -165,7 +133,7 @@
165133
"name": "python",
166134
"nbconvert_exporter": "python",
167135
"pygments_lexer": "ipython3",
168-
"version": "3.11.11"
136+
"version": "3.12.8"
169137
}
170138
},
171139
"nbformat": 4,

1_experiment.ipynb

+38-178
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
"\n",
1414
"### Setting Up Observability\n",
1515
"\n",
16+
"We use MLFlow for tracing. MLflow has already been installed but you have to start the server locally with `mlflow ui` command.\n",
17+
"\n",
1618
"1. Navigate to the linkedin_ai directory:\n",
1719
" ```bash\n",
1820
" cd linkedin_ai\n",
@@ -21,7 +23,26 @@
2123
"2. Host mlflow UI and point to the URI\n",
2224
" ```bash\n",
2325
" mlflow ui --port 8080 --backend-store-uri file:///<your-file-path>/linkedin_ai/mlruns \n",
24-
" ```"
26+
" ```\n",
27+
"\n",
28+
"to find the exact command to run check 👇🏻"
29+
]
30+
},
31+
{
32+
"cell_type": "code",
33+
"execution_count": null,
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"# if you want to find the mlflow path\n",
38+
"import os\n",
39+
"\n",
40+
"current_dir = os.getcwd()\n",
41+
"tracking_uri = os.path.join(current_dir, \"mlruns\")\n",
42+
"tracking_uri = f\"file:///{tracking_uri.lstrip('/')}\"\n",
43+
"\n",
44+
"print(\"now you run this in your terminal\")\n",
45+
"print(f\"$kmlflow ui --port 8080 --backend-store-uri {tracking_uri}\")"
2546
]
2647
},
2748
{
@@ -45,9 +66,6 @@
4566
"import os\n",
4667
"import mlflow\n",
4768
"\n",
48-
"current_dir = os.getcwd()\n",
49-
"tracking_uri = os.path.join(current_dir, \"mlruns\")\n",
50-
"tracking_uri = f\"file:///{tracking_uri.lstrip('/')}\"\n",
5169
"\n",
5270
"os.environ[\"MLFLOW_HOST\"] = \"http://127.0.0.1:8080\"\n",
5371
"os.environ[\"MLFLOW_TRACKING_URI\"] = tracking_uri\n",
@@ -56,20 +74,9 @@
5674
},
5775
{
5876
"cell_type": "code",
59-
"execution_count": 3,
77+
"execution_count": null,
6078
"metadata": {},
61-
"outputs": [
62-
{
63-
"data": {
64-
"text/plain": [
65-
"'905517997201736171'"
66-
]
67-
},
68-
"execution_count": 3,
69-
"metadata": {},
70-
"output_type": "execute_result"
71-
}
72-
],
79+
"outputs": [],
7380
"source": [
7481
"# mlflow uses this to log your traces\n",
7582
"mlflow.create_experiment(\n",
@@ -80,20 +87,9 @@
8087
},
8188
{
8289
"cell_type": "code",
83-
"execution_count": 4,
90+
"execution_count": null,
8491
"metadata": {},
85-
"outputs": [
86-
{
87-
"data": {
88-
"text/plain": [
89-
"<Experiment: artifact_location='file:///Users/jjmachan/workspace/eglabs/clientwork/linkedin_ai/mlruns/905517997201736171', creation_time=1744931730125, experiment_id='905517997201736171', last_update_time=1744931730125, lifecycle_stage='active', name='my_hackathon_experiment', tags={}>"
90-
]
91-
},
92-
"execution_count": 4,
93-
"metadata": {},
94-
"output_type": "execute_result"
95-
}
96-
],
92+
"outputs": [],
9793
"source": [
9894
"mlflow.set_experiment(\n",
9995
" \"my_hackathon_experiment\",\n",
@@ -110,39 +106,19 @@
110106
},
111107
{
112108
"cell_type": "code",
113-
"execution_count": 5,
109+
"execution_count": null,
114110
"metadata": {},
115-
"outputs": [
116-
{
117-
"name": "stdout",
118-
"output_type": "stream",
119-
"text": [
120-
"Loaded 437 LinkedIn posts\n",
121-
"BM25 index initialized\n"
122-
]
123-
}
124-
],
111+
"outputs": [],
125112
"source": [
126113
"from linkedin_ai import LinkedinAI\n",
127114
"my_ai = await LinkedinAI.from_bm25('data/posts.json')\n"
128115
]
129116
},
130117
{
131118
"cell_type": "code",
132-
"execution_count": 6,
119+
"execution_count": null,
133120
"metadata": {},
134-
"outputs": [
135-
{
136-
"data": {
137-
"text/plain": [
138-
"\"Open-source models are a significant part of Meta's approach, as seen with LLAMA. I believe that open-sourcing models can accelerate innovation and collaboration within the AI community. By making models and code accessible, we enable researchers and developers to build upon existing work, fostering a more inclusive and rapid advancement in AI technologies. This approach aligns with the vision of creating AI systems that can better understand and interact with the physical world.\""
139-
]
140-
},
141-
"execution_count": 6,
142-
"metadata": {},
143-
"output_type": "execute_result"
144-
}
145-
],
121+
"outputs": [],
146122
"source": [
147123
"await my_ai.ask(\"what is your take on opensource models\")"
148124
]
@@ -191,20 +167,9 @@
191167
},
192168
{
193169
"cell_type": "code",
194-
"execution_count": 10,
170+
"execution_count": null,
195171
"metadata": {},
196-
"outputs": [
197-
{
198-
"data": {
199-
"text/plain": [
200-
"TestDataset(question='Did you found or advise any startups in the AI or biometrics space?', citations='[\"https://www.linkedin.com/in/yann-lecun\"]', grading_notes='- Should mention Element Inc as a co-founded startup.\\n- May also mention Museami.\\n- Describes involvement in biometric authentication or AI-related products.')"
201-
]
202-
},
203-
"execution_count": 10,
204-
"metadata": {},
205-
"output_type": "execute_result"
206-
}
207-
],
172+
"outputs": [],
208173
"source": [
209174
"test_dataset[0]"
210175
]
@@ -218,20 +183,9 @@
218183
},
219184
{
220185
"cell_type": "code",
221-
"execution_count": 11,
186+
"execution_count": null,
222187
"metadata": {},
223-
"outputs": [
224-
{
225-
"data": {
226-
"text/plain": [
227-
"'fail'"
228-
]
229-
},
230-
"execution_count": 11,
231-
"metadata": {},
232-
"output_type": "execute_result"
233-
}
234-
],
188+
"outputs": [],
235189
"source": [
236190
"# de\n",
237191
"from ragas_experimental.llm import ragas_llm\n",
@@ -296,69 +250,9 @@
296250
},
297251
{
298252
"cell_type": "code",
299-
"execution_count": 15,
300-
"metadata": {},
301-
"outputs": [
302-
{
303-
"name": "stdout",
304-
"output_type": "stream",
305-
"text": [
306-
"\u001b[31mSignature:\u001b[39m\n",
307-
"p.mlflow_experiment(\n",
308-
" experiment_model,\n",
309-
" name_prefix: str = \u001b[33m''\u001b[39m,\n",
310-
" save_to_git: bool = \u001b[38;5;28;01mTrue\u001b[39;00m,\n",
311-
" stage_all: bool = \u001b[38;5;28;01mTrue\u001b[39;00m,\n",
312-
")\n",
313-
"\u001b[31mDocstring:\u001b[39m\n",
314-
"Decorator for creating experiment functions with mlflow integration.\n",
315-
"\n",
316-
"Args:\n",
317-
" experiment_model: The NotionModel type to use for experiment results\n",
318-
" name_prefix: Optional prefix for experiment names\n",
319-
"\n",
320-
"Returns:\n",
321-
" Decorator function that wraps experiment functions with mlflow observation\n",
322-
"\u001b[31mFile:\u001b[39m ~/workspace/eglabs/clientwork/linkedin_ai/.venv/lib/python3.12/site-packages/ragas_experimental/project/experiments.py\n",
323-
"\u001b[31mType:\u001b[39m method"
324-
]
325-
}
326-
],
327-
"source": [
328-
"p.mlflow_experiment?"
329-
]
330-
},
331-
{
332-
"cell_type": "code",
333-
"execution_count": 14,
253+
"execution_count": null,
334254
"metadata": {},
335-
"outputs": [
336-
{
337-
"name": "stderr",
338-
"output_type": "stream",
339-
"text": [
340-
"Running experiment: 100%|██████████| 100/100 [00:46<00:00, 2.13it/s]\n"
341-
]
342-
},
343-
{
344-
"name": "stdout",
345-
"output_type": "stream",
346-
"text": [
347-
"No changes detected, nothing to commit\n",
348-
"Created branch: ragas/quirky_shamir\n"
349-
]
350-
},
351-
{
352-
"data": {
353-
"text/plain": [
354-
"Experiment(name=quirky_shamir, model=ExperimentModel)"
355-
]
356-
},
357-
"execution_count": 14,
358-
"metadata": {},
359-
"output_type": "execute_result"
360-
}
361-
],
255+
"outputs": [],
362256
"source": [
363257
"await experiment_func.run_async(test_dataset)"
364258
]
@@ -397,43 +291,9 @@
397291
},
398292
{
399293
"cell_type": "code",
400-
"execution_count": 14,
294+
"execution_count": null,
401295
"metadata": {},
402-
"outputs": [
403-
{
404-
"name": "stderr",
405-
"output_type": "stream",
406-
"text": [
407-
"Running experiment: 100%|██████████| 100/100 [00:44<00:00, 2.25it/s]"
408-
]
409-
},
410-
{
411-
"name": "stdout",
412-
"output_type": "stream",
413-
"text": [
414-
"Staging changes to tracked files\n",
415-
"Changes committed with hash: a0692b82\n",
416-
"Created branch: ragas/elegant_rivest\n"
417-
]
418-
},
419-
{
420-
"name": "stderr",
421-
"output_type": "stream",
422-
"text": [
423-
"\n"
424-
]
425-
},
426-
{
427-
"data": {
428-
"text/plain": [
429-
"Experiment(name=elegant_rivest, model=ExperimentModel)"
430-
]
431-
},
432-
"execution_count": 14,
433-
"metadata": {},
434-
"output_type": "execute_result"
435-
}
436-
],
296+
"outputs": [],
437297
"source": [
438298
"await experiment_func.run_async(test_dataset)"
439299
]

0 commit comments

Comments
 (0)
0