Skip to content

Commit

Permalink
Merge pull request #133 from haesleinhuepf/notebook_generation
Browse files Browse the repository at this point in the history
add notebook-generation
  • Loading branch information
haesleinhuepf authored May 26, 2024
2 parents 37daa3d + 789a5d5 commit e450bc9
Show file tree
Hide file tree
Showing 8 changed files with 723 additions and 19 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ It will then respond with a python code snippet that you can execute ([see full

![img.png](https://github.com/haesleinhuepf/bia-bob/raw/main/docs/images/load_and_show_blobs.png)

### Notebook generation

When asking Bob explicitly to generate a notebook, it will put a new notebook file in the current directory with the generated code. You can then open it in Jupyter lab.

![](https://github.com/haesleinhuepf/bia-bob/raw/main/docs/images/generate_notebook.gif)


### Bug fixing

Bob can fix simple bugs in code you executed. Just add `%%fix` on top of the cell right after the error happened.
Expand Down
417 changes: 417 additions & 0 deletions demo/Blob_Segmentation_Analysis.ipynb

Large diffs are not rendered by default.

166 changes: 166 additions & 0 deletions demo/generate_notebooks.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "bbd93ac1-f79b-491c-88b4-50807b541b6e",
"metadata": {},
"source": [
"# Generating notebooks\n",
"When working on complex tasks, it may make sense to generate an entire notebook for the task. \n",
"Hence, we can ask bob to generate a notebook for a series of tasks."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "51256f2d-e4cb-4c8b-975d-75afad68c837",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from bia_bob import bob"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8a1e8db6-a144-4773-a2ab-6e0eac17f94b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Task type: 3\n"
]
},
{
"data": {
"text/markdown": [
"A notebook has been saved as [Blob_Segmentation_Analysis.ipynb](Blob_Segmentation_Analysis.ipynb)."
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%bob \n",
"Please write a notebook that does the following:\n",
"* open blobs.tif, \n",
"* segment the bright objects\n",
"* measure their area and perimeter\n",
"* plot the area against perimeter"
]
},
{
"cell_type": "markdown",
"id": "9a13dd39-7c0a-42b9-94c0-a916e21addd9",
"metadata": {},
"source": [
"Note, if we don't ask for the notebook explicitly, Bob will write a huge code block, which might be less readable."
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "8b152eb3-326b-4651-9033-0458116be0d7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Task type: 1\n"
]
},
{
"data": {
"text/markdown": [
"We will load an image, segment bright objects, measure their area and perimeter, and plot area against perimeter.\n",
"\n"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%bob \n",
"Please do the following:\n",
"* open blobs.tif, \n",
"* segment the bright objects\n",
"* measure their area and perimeter\n",
"* plot the area against perimeter"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f12df40-04df-42ab-88ca-3691418362b6",
"metadata": {},
"outputs": [],
"source": [
"from skimage.io import imread\n",
"from skimage.filters import threshold_otsu\n",
"from skimage.measure import label, regionprops_table\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import stackview\n",
"\n",
"# Load the image\n",
"filename = 'blobs.tif'\n",
"image = imread(filename)\n",
"stackview.insight(image)\n",
"\n",
"# Segment the bright objects using Otsu's thresholding\n",
"threshold_value = threshold_otsu(image)\n",
"binary_image = image > threshold_value\n",
"stackview.insight(binary_image)\n",
"\n",
"# Label the segmented objects\n",
"labeled_image = label(binary_image)\n",
"stackview.insight(labeled_image)\n",
"\n",
"# Measure area and perimeter of each labeled object\n",
"properties = regionprops_table(labeled_image, properties=['area', 'perimeter'])\n",
"df_properties = pd.DataFrame(properties)\n",
"\n",
"# Plot area against perimeter\n",
"plt.scatter(df_properties['area'], df_properties['perimeter'])\n",
"plt.xlabel('Area')\n",
"plt.ylabel('Perimeter')\n",
"plt.title('Area vs Perimeter of Segmented Objects')\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Binary file added docs/images/generate_notebook.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion src/bia_bob/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.14.1"
__version__ = "0.15.0"

__all__ = (
)
Expand All @@ -8,6 +8,7 @@
from ._bug_fixing import fix
from ._document import doc
from ._utilities import available_models
from ._notebook_generation import generate_notebook

bob.initialize = init_assistant
bob.__version__ = __version__
Expand Down
34 changes: 31 additions & 3 deletions src/bia_bob/_machinery.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def bob(line: str = None, cell: str = None):
"""
from IPython.core.getipython import get_ipython
from IPython.display import display
from ._utilities import generate_response_to_user, output_text, is_image
from ._utilities import generate_response_to_user, output_text, is_image, generate_response
from ._notebook_generation import generate_notebook

if Context.model is None:
init_assistant()
Expand All @@ -79,8 +80,35 @@ def bob(line: str = None, cell: str = None):
display("Please ask a question!")
return

# generate the response
code, text = generate_response_to_user(Context.model, user_input, image)
TASK_TYPE_CODE_GENERATION = 1
TASK_TYPE_TEXT_RESPONSE = 2
TASK_TYPE_NOTEBOOK_GENERATION = 3

task_selection_prompt = f"""
Given the following prompt, decide which of the following types of tasks we need to perform:
{TASK_TYPE_CODE_GENERATION}. Code generation: The prompt asks for code to be generated.
{TASK_TYPE_TEXT_RESPONSE}. Text response: The prompt asks for a text response.
{TASK_TYPE_NOTEBOOK_GENERATION}. Notebook generation: The prompt asks explicitly for a notebook to be generated. Only choose this if the prompt explicitly asks for a notebook.
This is the prompt:
{user_input}
Now, write the number of the task type into the next cell. Print the number only.
"""
response = generate_response(chat_history=[],
image=None,
model=Context.model,
system_prompt="",
user_prompt=task_selection_prompt,
vision_system_prompt="")
task_type = int(response.strip().strip("\n").split(".")[0])

if task_type == TASK_TYPE_CODE_GENERATION or task_type == TASK_TYPE_TEXT_RESPONSE:
code, text = generate_response_to_user(Context.model, user_input, image)
else:
code = None
filename = generate_notebook(user_input)
text = f"A notebook has been saved as [{filename}]({filename})."

# print out explanation
if code is None or not Context.auto_execute:
Expand Down
78 changes: 78 additions & 0 deletions src/bia_bob/_notebook_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
def generate_notebook(prompt, filename=None):
"""
Generates a notebook from a given prompt and saves it to a file.
If the filename is not specified, a filename will be generated by the language model.
If the specified file exists already, it will be overwritten.
Parameters
----------
prompt
filename, optional
Returns
-------
filename
"""
from ._utilities import create_system_prompt, generate_response
from ._machinery import Context, init_assistant
import os
if Context.model is None:
init_assistant()

system_prompt = create_system_prompt().split("## Todos")[0] + f"""
You have been asked to generate a notebook in the ipynb format.
Start the notebook with an introduction of the task.
List the steps your are planning to do in the notebook. Do not use Python yet.
Also labeling is an individual processing step.
Before starting to write code cells, add the disclaimer "This code is generated by an AI model using the [bia-bob project](https://github.com/haesleinhuepf/bia-bob). It is good scientific practice to check the code and results it produces carefully.".
Import the libraries you are planning to use.
Write one code cell for each processing step in the analysis.
For each individual processing step, write a markdown cell explaining what you are doing.
By the end of each cell write code for showing the intermediate result:
* Use stackview.insight() for showing images.
* Use display() for showing dataframes.
* Use print() for any other results.
In the ipynb-json format you write, there is no output in any of the cells you write.
There are no images displayed in the notebook you write.
Your answer is in ipynb format.
"""

full_response = generate_response(chat_history=[], image=None, model=Context.model, system_prompt=system_prompt, user_prompt=prompt, vision_system_prompt="")

if filename is None:
filename_prompt = f"""
What would be a good filename for a notebook that answers the following prompt?
Prompt:
{prompt}
Respond with the filename and nothing else.
"""

filename = generate_response(chat_history=[], image=None, model=Context.model, system_prompt="", user_prompt=filename_prompt, vision_system_prompt="")
if not filename.endswith(".ipynb"):
filename += ".ipynb"

# check if file not exists and suffix a number if it does
if os.path.exists(filename):
i = 1
while os.path.exists(filename):
filename = filename.replace(".ipynb", f"_{i}.ipynb")
i += 1

full_response = full_response.strip().strip("\n").strip()

if full_response.startswith("```python"):
full_response = full_response[9:]
if full_response.startswith("```json"):
full_response = full_response[7:]
if full_response.startswith("```"):
full_response = full_response[3:]
if full_response.endswith("```"):
full_response = full_response[:-3]

# write the response to a file in utf-8 encoding
with open(filename, "w", encoding="utf-8") as file:
file.write(full_response)

return filename
37 changes: 22 additions & 15 deletions src/bia_bob/_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,7 @@ def generate_response_to_user(model, user_prompt: str, image=None, additional_sy
print("\nSystem prompt:", system_prompt)
print_chat(chat_history)

if Context.endpoint is not None:
full_response = generate_response_from_openai(model, system_prompt, user_prompt, chat_history, image,
base_url=Context.endpoint, api_key=Context.api_key,
vision_model=Context.vision_model,
vision_system_prompt=vision_system_prompt)
elif "gpt-" in model:
full_response = generate_response_from_openai(model, system_prompt, user_prompt, chat_history, image,
vision_model=Context.vision_model,
vision_system_prompt=vision_system_prompt)
elif "gemini-" in model:
full_response = generate_response_from_vertex_ai(model, system_prompt, user_prompt, chat_history, image,
vision_model=Context.vision_model,
vision_system_prompt=vision_system_prompt)
else:
raise RuntimeError(f"Unknown model API for {model}")
full_response = generate_response(chat_history, image, model, system_prompt, user_prompt, vision_system_prompt)

if Context.verbose:
print("\n\nFull response:\n", full_response)
Expand Down Expand Up @@ -72,6 +58,27 @@ def generate_response_to_user(model, user_prompt: str, image=None, additional_sy

return code, text


def generate_response(chat_history, image, model, system_prompt, user_prompt, vision_system_prompt):
from ._machinery import Context
if Context.endpoint is not None:
full_response = generate_response_from_openai(model, system_prompt, user_prompt, chat_history, image,
base_url=Context.endpoint, api_key=Context.api_key,
vision_model=Context.vision_model,
vision_system_prompt=vision_system_prompt)
elif "gpt-" in model:
full_response = generate_response_from_openai(model, system_prompt, user_prompt, chat_history, image,
vision_model=Context.vision_model,
vision_system_prompt=vision_system_prompt)
elif "gemini-" in model:
full_response = generate_response_from_vertex_ai(model, system_prompt, user_prompt, chat_history, image,
vision_model=Context.vision_model,
vision_system_prompt=vision_system_prompt)
else:
raise RuntimeError(f"Unknown model API for {model}")
return full_response


def split_response(text):
# hotfix modifications for not-so-capable models (e.g. ollama/codellama or blablador/Mistral-7B-Instruct-v0.2)
for item in ["Summary", "Plan", "Code"]:
Expand Down

0 comments on commit e450bc9

Please sign in to comment.